xref: /llvm-project/llvm/utils/TableGen/CodeEmitterGen.cpp (revision aeafcbcd75f2338cacfb2d2c2a538a6e3b6a38e7)
1 //===- CodeEmitterGen.cpp - Code Emitter Generator ------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // CodeEmitterGen uses the descriptions of instructions and their fields to
10 // construct an automated code emitter: a function that, given a MachineInstr,
11 // returns the (currently, 32-bit unsigned) value of the instruction.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "CodeGenHwModes.h"
16 #include "CodeGenInstruction.h"
17 #include "CodeGenTarget.h"
18 #include "InfoByHwMode.h"
19 #include "TableGenBackends.h"
20 #include "VarLenCodeEmitterGen.h"
21 #include "llvm/ADT/APInt.h"
22 #include "llvm/ADT/ArrayRef.h"
23 #include "llvm/ADT/StringExtras.h"
24 #include "llvm/Support/Casting.h"
25 #include "llvm/Support/raw_ostream.h"
26 #include "llvm/TableGen/Error.h"
27 #include "llvm/TableGen/Record.h"
28 #include "llvm/TableGen/TableGenBackend.h"
29 #include <cstdint>
30 #include <map>
31 #include <set>
32 #include <string>
33 #include <utility>
34 #include <vector>
35 
36 using namespace llvm;
37 
38 namespace {
39 
40 class CodeEmitterGen {
41   RecordKeeper &Records;
42 
43 public:
44   CodeEmitterGen(RecordKeeper &R) : Records(R) {}
45 
46   void run(raw_ostream &o);
47 
48 private:
49   int getVariableBit(const std::string &VarName, BitsInit *BI, int bit);
50   std::string getInstructionCase(Record *R, CodeGenTarget &Target);
51   std::string getInstructionCaseForEncoding(Record *R, Record *EncodingDef,
52                                             CodeGenTarget &Target);
53   bool addCodeToMergeInOperand(Record *R, BitsInit *BI,
54                                const std::string &VarName, unsigned &NumberedOp,
55                                std::set<unsigned> &NamedOpIndices,
56                                std::string &Case, CodeGenTarget &Target);
57 
58   void emitInstructionBaseValues(
59       raw_ostream &o, ArrayRef<const CodeGenInstruction *> NumberedInstructions,
60       CodeGenTarget &Target, int HwMode = -1);
61   unsigned BitWidth;
62   bool UseAPInt;
63 };
64 
65 // If the VarBitInit at position 'bit' matches the specified variable then
66 // return the variable bit position.  Otherwise return -1.
67 int CodeEmitterGen::getVariableBit(const std::string &VarName,
68                                    BitsInit *BI, int bit) {
69   if (VarBitInit *VBI = dyn_cast<VarBitInit>(BI->getBit(bit))) {
70     if (VarInit *VI = dyn_cast<VarInit>(VBI->getBitVar()))
71       if (VI->getName() == VarName)
72         return VBI->getBitNum();
73   } else if (VarInit *VI = dyn_cast<VarInit>(BI->getBit(bit))) {
74     if (VI->getName() == VarName)
75       return 0;
76   }
77 
78   return -1;
79 }
80 
81 // Returns true if it succeeds, false if an error.
82 bool CodeEmitterGen::addCodeToMergeInOperand(Record *R, BitsInit *BI,
83                                              const std::string &VarName,
84                                              unsigned &NumberedOp,
85                                              std::set<unsigned> &NamedOpIndices,
86                                              std::string &Case,
87                                              CodeGenTarget &Target) {
88   CodeGenInstruction &CGI = Target.getInstruction(R);
89 
90   // Determine if VarName actually contributes to the Inst encoding.
91   int bit = BI->getNumBits()-1;
92 
93   // Scan for a bit that this contributed to.
94   for (; bit >= 0; ) {
95     if (getVariableBit(VarName, BI, bit) != -1)
96       break;
97 
98     --bit;
99   }
100 
101   // If we found no bits, ignore this value, otherwise emit the call to get the
102   // operand encoding.
103   if (bit < 0)
104     return true;
105 
106   // If the operand matches by name, reference according to that
107   // operand number. Non-matching operands are assumed to be in
108   // order.
109   unsigned OpIdx;
110   std::pair<unsigned, unsigned> SubOp;
111   if (CGI.Operands.hasSubOperandAlias(VarName, SubOp)) {
112     OpIdx = CGI.Operands[SubOp.first].MIOperandNo + SubOp.second;
113   } else if (CGI.Operands.hasOperandNamed(VarName, OpIdx)) {
114     // Get the machine operand number for the indicated operand.
115     OpIdx = CGI.Operands[OpIdx].MIOperandNo;
116   } else {
117     // Fall back to positional lookup. By default, we now disable positional
118     // lookup (and print an error, below), but even so, we'll do the lookup to
119     // help print a helpful diagnostic message.
120     //
121     // TODO: When we remove useDeprecatedPositionallyEncodedOperands, delete all
122     // this code, just leaving a "no operand named X in record Y" error.
123 
124     unsigned NumberOps = CGI.Operands.size();
125     /// If this operand is not supposed to be emitted by the
126     /// generated emitter, skip it.
127     while (NumberedOp < NumberOps &&
128            (CGI.Operands.isFlatOperandNotEmitted(NumberedOp) ||
129               (!NamedOpIndices.empty() && NamedOpIndices.count(
130                 CGI.Operands.getSubOperandNumber(NumberedOp).first)))) {
131       ++NumberedOp;
132     }
133 
134     if (NumberedOp >=
135         CGI.Operands.back().MIOperandNo + CGI.Operands.back().MINumOperands) {
136       if (!Target.getInstructionSet()->getValueAsBit(
137               "useDeprecatedPositionallyEncodedOperands")) {
138         PrintError(R, Twine("No operand named ") + VarName + " in record " +
139                           R->getName() +
140                           " (would've given 'too few operands' error with "
141                           "useDeprecatedPositionallyEncodedOperands=true)");
142       } else {
143         PrintError(R, "Too few operands in record " + R->getName() +
144                           " (no match for variable " + VarName + ")");
145       }
146       return false;
147     }
148 
149     OpIdx = NumberedOp++;
150 
151     if (!Target.getInstructionSet()->getValueAsBit(
152             "useDeprecatedPositionallyEncodedOperands")) {
153       std::pair<unsigned, unsigned> SO =
154           CGI.Operands.getSubOperandNumber(OpIdx);
155       std::string OpName = CGI.Operands[SO.first].Name;
156       PrintError(R, Twine("No operand named ") + VarName + " in record " +
157                         R->getName() + " (would've used positional operand #" +
158                         Twine(SO.first) + " ('" + OpName + "') sub-op #" +
159                         Twine(SO.second) +
160                         " with useDeprecatedPositionallyEncodedOperands=true)");
161       return false;
162     }
163   }
164 
165   if (CGI.Operands.isFlatOperandNotEmitted(OpIdx)) {
166     PrintError(R, "Operand " + VarName + " used but also marked as not emitted!");
167     return false;
168   }
169 
170   std::pair<unsigned, unsigned> SO = CGI.Operands.getSubOperandNumber(OpIdx);
171   std::string &EncoderMethodName =
172       CGI.Operands[SO.first].EncoderMethodNames[SO.second];
173 
174   if (UseAPInt)
175     Case += "      op.clearAllBits();\n";
176 
177   Case += "      // op: " + VarName + "\n";
178 
179   // If the source operand has a custom encoder, use it.
180   if (!EncoderMethodName.empty()) {
181     if (UseAPInt) {
182       Case += "      " + EncoderMethodName + "(MI, " + utostr(OpIdx);
183       Case += ", op";
184     } else {
185       Case += "      op = " + EncoderMethodName + "(MI, " + utostr(OpIdx);
186     }
187     Case += ", Fixups, STI);\n";
188   } else {
189     if (UseAPInt) {
190       Case += "      getMachineOpValue(MI, MI.getOperand(" + utostr(OpIdx) + ")";
191       Case += ", op, Fixups, STI";
192     } else {
193       Case += "      op = getMachineOpValue(MI, MI.getOperand(" + utostr(OpIdx) + ")";
194       Case += ", Fixups, STI";
195     }
196     Case += ");\n";
197   }
198 
199   // Precalculate the number of lits this variable contributes to in the
200   // operand. If there is a single lit (consecutive range of bits) we can use a
201   // destructive sequence on APInt that reduces memory allocations.
202   int numOperandLits = 0;
203   for (int tmpBit = bit; tmpBit >= 0;) {
204     int varBit = getVariableBit(VarName, BI, tmpBit);
205 
206     // If this bit isn't from a variable, skip it.
207     if (varBit == -1) {
208       --tmpBit;
209       continue;
210     }
211 
212     // Figure out the consecutive range of bits covered by this operand, in
213     // order to generate better encoding code.
214     int beginVarBit = varBit;
215     int N = 1;
216     for (--tmpBit; tmpBit >= 0;) {
217       varBit = getVariableBit(VarName, BI, tmpBit);
218       if (varBit == -1 || varBit != (beginVarBit - N))
219         break;
220       ++N;
221       --tmpBit;
222     }
223     ++numOperandLits;
224   }
225 
226   for (; bit >= 0; ) {
227     int varBit = getVariableBit(VarName, BI, bit);
228 
229     // If this bit isn't from a variable, skip it.
230     if (varBit == -1) {
231       --bit;
232       continue;
233     }
234 
235     // Figure out the consecutive range of bits covered by this operand, in
236     // order to generate better encoding code.
237     int beginInstBit = bit;
238     int beginVarBit = varBit;
239     int N = 1;
240     for (--bit; bit >= 0;) {
241       varBit = getVariableBit(VarName, BI, bit);
242       if (varBit == -1 || varBit != (beginVarBit - N)) break;
243       ++N;
244       --bit;
245     }
246 
247     std::string maskStr;
248     int opShift;
249 
250     unsigned loBit = beginVarBit - N + 1;
251     unsigned hiBit = loBit + N;
252     unsigned loInstBit = beginInstBit - N + 1;
253     if (UseAPInt) {
254       std::string extractStr;
255       if (N >= 64) {
256         extractStr = "op.extractBits(" + itostr(hiBit - loBit) + ", " +
257                      itostr(loBit) + ")";
258         Case += "      Value.insertBits(" + extractStr + ", " +
259                 itostr(loInstBit) + ");\n";
260       } else {
261         extractStr = "op.extractBitsAsZExtValue(" + itostr(hiBit - loBit) +
262                      ", " + itostr(loBit) + ")";
263         Case += "      Value.insertBits(" + extractStr + ", " +
264                 itostr(loInstBit) + ", " + itostr(hiBit - loBit) + ");\n";
265       }
266     } else {
267       uint64_t opMask = ~(uint64_t)0 >> (64 - N);
268       opShift = beginVarBit - N + 1;
269       opMask <<= opShift;
270       maskStr = "UINT64_C(" + utostr(opMask) + ")";
271       opShift = beginInstBit - beginVarBit;
272 
273       if (numOperandLits == 1) {
274         Case += "      op &= " + maskStr + ";\n";
275         if (opShift > 0) {
276           Case += "      op <<= " + itostr(opShift) + ";\n";
277         } else if (opShift < 0) {
278           Case += "      op >>= " + itostr(-opShift) + ";\n";
279         }
280         Case += "      Value |= op;\n";
281       } else {
282         if (opShift > 0) {
283           Case += "      Value |= (op & " + maskStr + ") << " +
284                   itostr(opShift) + ";\n";
285         } else if (opShift < 0) {
286           Case += "      Value |= (op & " + maskStr + ") >> " +
287                   itostr(-opShift) + ";\n";
288         } else {
289           Case += "      Value |= (op & " + maskStr + ");\n";
290         }
291       }
292     }
293   }
294   return true;
295 }
296 
297 std::string CodeEmitterGen::getInstructionCase(Record *R,
298                                                CodeGenTarget &Target) {
299   std::string Case;
300   if (const RecordVal *RV = R->getValue("EncodingInfos")) {
301     if (auto *DI = dyn_cast_or_null<DefInit>(RV->getValue())) {
302       const CodeGenHwModes &HWM = Target.getHwModes();
303       EncodingInfoByHwMode EBM(DI->getDef(), HWM);
304       Case += "      switch (HwMode) {\n";
305       Case += "      default: llvm_unreachable(\"Unhandled HwMode\");\n";
306       for (auto &KV : EBM) {
307         Case += "      case " + itostr(KV.first) + ": {\n";
308         Case += getInstructionCaseForEncoding(R, KV.second, Target);
309         Case += "      break;\n";
310         Case += "      }\n";
311       }
312       Case += "      }\n";
313       return Case;
314     }
315   }
316   return getInstructionCaseForEncoding(R, R, Target);
317 }
318 
319 std::string CodeEmitterGen::getInstructionCaseForEncoding(Record *R, Record *EncodingDef,
320                                                           CodeGenTarget &Target) {
321   std::string Case;
322   BitsInit *BI = EncodingDef->getValueAsBitsInit("Inst");
323   unsigned NumberedOp = 0;
324   std::set<unsigned> NamedOpIndices;
325 
326   // Collect the set of operand indices that might correspond to named
327   // operand, and skip these when assigning operands based on position.
328   if (Target.getInstructionSet()->
329        getValueAsBit("noNamedPositionallyEncodedOperands")) {
330     CodeGenInstruction &CGI = Target.getInstruction(R);
331     for (const RecordVal &RV : R->getValues()) {
332       unsigned OpIdx;
333       if (!CGI.Operands.hasOperandNamed(RV.getName(), OpIdx))
334         continue;
335 
336       NamedOpIndices.insert(OpIdx);
337     }
338   }
339 
340   // Loop over all of the fields in the instruction, determining which are the
341   // operands to the instruction.
342   bool Success = true;
343   for (const RecordVal &RV : EncodingDef->getValues()) {
344     // Ignore fixed fields in the record, we're looking for values like:
345     //    bits<5> RST = { ?, ?, ?, ?, ? };
346     if (RV.isNonconcreteOK() || RV.getValue()->isComplete())
347       continue;
348 
349     Success &=
350         addCodeToMergeInOperand(R, BI, std::string(RV.getName()), NumberedOp,
351                                 NamedOpIndices, Case, Target);
352   }
353 
354   if (!Success) {
355     // Dump the record, so we can see what's going on...
356     std::string E;
357     raw_string_ostream S(E);
358     S << "Dumping record for previous error:\n";
359     S << *R;
360     PrintNote(E);
361   }
362 
363   StringRef PostEmitter = R->getValueAsString("PostEncoderMethod");
364   if (!PostEmitter.empty()) {
365     Case += "      Value = ";
366     Case += PostEmitter;
367     Case += "(MI, Value";
368     Case += ", STI";
369     Case += ");\n";
370   }
371 
372   return Case;
373 }
374 
375 static void emitInstBits(raw_ostream &OS, const APInt &Bits) {
376   for (unsigned I = 0; I < Bits.getNumWords(); ++I)
377     OS << ((I > 0) ? ", " : "") << "UINT64_C(" << utostr(Bits.getRawData()[I])
378        << ")";
379 }
380 
381 void CodeEmitterGen::emitInstructionBaseValues(
382     raw_ostream &o, ArrayRef<const CodeGenInstruction *> NumberedInstructions,
383     CodeGenTarget &Target, int HwMode) {
384   const CodeGenHwModes &HWM = Target.getHwModes();
385   if (HwMode == -1)
386     o << "  static const uint64_t InstBits[] = {\n";
387   else
388     o << "  static const uint64_t InstBits_" << HWM.getMode(HwMode).Name
389       << "[] = {\n";
390 
391   for (const CodeGenInstruction *CGI : NumberedInstructions) {
392     Record *R = CGI->TheDef;
393 
394     if (R->getValueAsString("Namespace") == "TargetOpcode" ||
395         R->getValueAsBit("isPseudo")) {
396       o << "    "; emitInstBits(o, APInt(BitWidth, 0)); o << ",\n";
397       continue;
398     }
399 
400     Record *EncodingDef = R;
401     if (const RecordVal *RV = R->getValue("EncodingInfos")) {
402       if (auto *DI = dyn_cast_or_null<DefInit>(RV->getValue())) {
403         EncodingInfoByHwMode EBM(DI->getDef(), HWM);
404         if (EBM.hasMode(HwMode))
405           EncodingDef = EBM.get(HwMode);
406       }
407     }
408     BitsInit *BI = EncodingDef->getValueAsBitsInit("Inst");
409 
410     // Start by filling in fixed values.
411     APInt Value(BitWidth, 0);
412     for (unsigned i = 0, e = BI->getNumBits(); i != e; ++i) {
413       if (auto *B = dyn_cast<BitInit>(BI->getBit(i)); B && B->getValue())
414         Value.setBit(i);
415     }
416     o << "    ";
417     emitInstBits(o, Value);
418     o << "," << '\t' << "// " << R->getName() << "\n";
419   }
420   o << "    UINT64_C(0)\n  };\n";
421 }
422 
423 void CodeEmitterGen::run(raw_ostream &o) {
424   CodeGenTarget Target(Records);
425   std::vector<Record*> Insts = Records.getAllDerivedDefinitions("Instruction");
426 
427   // For little-endian instruction bit encodings, reverse the bit order
428   Target.reverseBitsForLittleEndianEncoding();
429 
430   ArrayRef<const CodeGenInstruction*> NumberedInstructions =
431     Target.getInstructionsByEnumValue();
432 
433   if (any_of(NumberedInstructions, [](const CodeGenInstruction *CGI) {
434         Record *R = CGI->TheDef;
435         return R->getValue("Inst") && isa<DagInit>(R->getValueInit("Inst"));
436       })) {
437     emitVarLenCodeEmitter(Records, o);
438   } else {
439     const CodeGenHwModes &HWM = Target.getHwModes();
440     // The set of HwModes used by instruction encodings.
441     std::set<unsigned> HwModes;
442     BitWidth = 0;
443     for (const CodeGenInstruction *CGI : NumberedInstructions) {
444       Record *R = CGI->TheDef;
445       if (R->getValueAsString("Namespace") == "TargetOpcode" ||
446           R->getValueAsBit("isPseudo"))
447         continue;
448 
449       if (const RecordVal *RV = R->getValue("EncodingInfos")) {
450         if (DefInit *DI = dyn_cast_or_null<DefInit>(RV->getValue())) {
451           EncodingInfoByHwMode EBM(DI->getDef(), HWM);
452           for (auto &KV : EBM) {
453             BitsInit *BI = KV.second->getValueAsBitsInit("Inst");
454             BitWidth = std::max(BitWidth, BI->getNumBits());
455             HwModes.insert(KV.first);
456           }
457           continue;
458         }
459       }
460       BitsInit *BI = R->getValueAsBitsInit("Inst");
461       BitWidth = std::max(BitWidth, BI->getNumBits());
462     }
463     UseAPInt = BitWidth > 64;
464 
465     // Emit function declaration
466     if (UseAPInt) {
467       o << "void " << Target.getName()
468         << "MCCodeEmitter::getBinaryCodeForInstr(const MCInst &MI,\n"
469         << "    SmallVectorImpl<MCFixup> &Fixups,\n"
470         << "    APInt &Inst,\n"
471         << "    APInt &Scratch,\n"
472         << "    const MCSubtargetInfo &STI) const {\n";
473     } else {
474       o << "uint64_t " << Target.getName();
475       o << "MCCodeEmitter::getBinaryCodeForInstr(const MCInst &MI,\n"
476         << "    SmallVectorImpl<MCFixup> &Fixups,\n"
477         << "    const MCSubtargetInfo &STI) const {\n";
478     }
479 
480     // Emit instruction base values
481     if (HwModes.empty()) {
482       emitInstructionBaseValues(o, NumberedInstructions, Target, -1);
483     } else {
484       for (unsigned HwMode : HwModes)
485         emitInstructionBaseValues(o, NumberedInstructions, Target, (int)HwMode);
486     }
487 
488     if (!HwModes.empty()) {
489       o << "  const uint64_t *InstBits;\n";
490       o << "  unsigned HwMode = STI.getHwMode();\n";
491       o << "  switch (HwMode) {\n";
492       o << "  default: llvm_unreachable(\"Unknown hardware mode!\"); break;\n";
493       for (unsigned I : HwModes) {
494         o << "  case " << I << ": InstBits = InstBits_" << HWM.getMode(I).Name
495           << "; break;\n";
496       }
497       o << "  };\n";
498     }
499 
500     // Map to accumulate all the cases.
501     std::map<std::string, std::vector<std::string>> CaseMap;
502 
503     // Construct all cases statement for each opcode
504     for (Record *R : Insts) {
505       if (R->getValueAsString("Namespace") == "TargetOpcode" ||
506           R->getValueAsBit("isPseudo"))
507         continue;
508       std::string InstName =
509           (R->getValueAsString("Namespace") + "::" + R->getName()).str();
510       std::string Case = getInstructionCase(R, Target);
511 
512       CaseMap[Case].push_back(std::move(InstName));
513     }
514 
515     // Emit initial function code
516     if (UseAPInt) {
517       int NumWords = APInt::getNumWords(BitWidth);
518       o << "  const unsigned opcode = MI.getOpcode();\n"
519         << "  if (Scratch.getBitWidth() != " << BitWidth << ")\n"
520         << "    Scratch = Scratch.zext(" << BitWidth << ");\n"
521         << "  Inst = APInt(" << BitWidth << ", ArrayRef(InstBits + opcode * "
522         << NumWords << ", " << NumWords << "));\n"
523         << "  APInt &Value = Inst;\n"
524         << "  APInt &op = Scratch;\n"
525         << "  switch (opcode) {\n";
526     } else {
527       o << "  const unsigned opcode = MI.getOpcode();\n"
528         << "  uint64_t Value = InstBits[opcode];\n"
529         << "  uint64_t op = 0;\n"
530         << "  (void)op;  // suppress warning\n"
531         << "  switch (opcode) {\n";
532     }
533 
534     // Emit each case statement
535     std::map<std::string, std::vector<std::string>>::iterator IE, EE;
536     for (IE = CaseMap.begin(), EE = CaseMap.end(); IE != EE; ++IE) {
537       const std::string &Case = IE->first;
538       std::vector<std::string> &InstList = IE->second;
539 
540       for (int i = 0, N = InstList.size(); i < N; i++) {
541         if (i)
542           o << "\n";
543         o << "    case " << InstList[i] << ":";
544       }
545       o << " {\n";
546       o << Case;
547       o << "      break;\n"
548         << "    }\n";
549     }
550 
551     // Default case: unhandled opcode
552     o << "  default:\n"
553       << "    std::string msg;\n"
554       << "    raw_string_ostream Msg(msg);\n"
555       << "    Msg << \"Not supported instr: \" << MI;\n"
556       << "    report_fatal_error(Msg.str().c_str());\n"
557       << "  }\n";
558     if (UseAPInt)
559       o << "  Inst = Value;\n";
560     else
561       o << "  return Value;\n";
562     o << "}\n\n";
563   }
564 }
565 
566 } // end anonymous namespace
567 
568 namespace llvm {
569 
570 void EmitCodeEmitter(RecordKeeper &RK, raw_ostream &OS) {
571   emitSourceFileHeader("Machine Code Emitter", OS);
572   CodeEmitterGen(RK).run(OS);
573 }
574 
575 } // end namespace llvm
576