xref: /llvm-project/llvm/utils/TableGen/CodeEmitterGen.cpp (revision 9c93e728bfb8079c1de51e5481168c4083038c2a)
1 //===- CodeEmitterGen.cpp - Code Emitter Generator ------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // CodeEmitterGen uses the descriptions of instructions and their fields to
10 // construct an automated code emitter: a function that, given a MachineInstr,
11 // returns the (currently, 32-bit unsigned) value of the instruction.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "CodeGenHwModes.h"
16 #include "CodeGenInstruction.h"
17 #include "CodeGenTarget.h"
18 #include "InfoByHwMode.h"
19 #include "VarLenCodeEmitterGen.h"
20 #include "llvm/ADT/APInt.h"
21 #include "llvm/ADT/ArrayRef.h"
22 #include "llvm/ADT/StringExtras.h"
23 #include "llvm/Support/Casting.h"
24 #include "llvm/Support/raw_ostream.h"
25 #include "llvm/TableGen/Error.h"
26 #include "llvm/TableGen/Record.h"
27 #include "llvm/TableGen/TableGenBackend.h"
28 #include <cstdint>
29 #include <map>
30 #include <set>
31 #include <string>
32 #include <utility>
33 #include <vector>
34 
35 using namespace llvm;
36 
37 namespace {
38 
39 class CodeEmitterGen {
40   RecordKeeper &Records;
41 
42 public:
43   CodeEmitterGen(RecordKeeper &R) : Records(R) {}
44 
45   void run(raw_ostream &o);
46 
47 private:
48   int getVariableBit(const std::string &VarName, BitsInit *BI, int bit);
49   std::string getInstructionCase(Record *R, CodeGenTarget &Target);
50   std::string getInstructionCaseForEncoding(Record *R, Record *EncodingDef,
51                                             CodeGenTarget &Target);
52   bool addCodeToMergeInOperand(Record *R, BitsInit *BI,
53                                const std::string &VarName,
54                                std::string &Case, CodeGenTarget &Target);
55 
56   void emitInstructionBaseValues(
57       raw_ostream &o, ArrayRef<const CodeGenInstruction *> NumberedInstructions,
58       CodeGenTarget &Target, int HwMode = -1);
59   unsigned BitWidth;
60   bool UseAPInt;
61 };
62 
63 // If the VarBitInit at position 'bit' matches the specified variable then
64 // return the variable bit position.  Otherwise return -1.
65 int CodeEmitterGen::getVariableBit(const std::string &VarName,
66                                    BitsInit *BI, int bit) {
67   if (VarBitInit *VBI = dyn_cast<VarBitInit>(BI->getBit(bit))) {
68     if (VarInit *VI = dyn_cast<VarInit>(VBI->getBitVar()))
69       if (VI->getName() == VarName)
70         return VBI->getBitNum();
71   } else if (VarInit *VI = dyn_cast<VarInit>(BI->getBit(bit))) {
72     if (VI->getName() == VarName)
73       return 0;
74   }
75 
76   return -1;
77 }
78 
79 // Returns true if it succeeds, false if an error.
80 bool CodeEmitterGen::addCodeToMergeInOperand(Record *R, BitsInit *BI,
81                                              const std::string &VarName,
82                                              std::string &Case,
83                                              CodeGenTarget &Target) {
84   CodeGenInstruction &CGI = Target.getInstruction(R);
85 
86   // Determine if VarName actually contributes to the Inst encoding.
87   int bit = BI->getNumBits()-1;
88 
89   // Scan for a bit that this contributed to.
90   for (; bit >= 0; ) {
91     if (getVariableBit(VarName, BI, bit) != -1)
92       break;
93 
94     --bit;
95   }
96 
97   // If we found no bits, ignore this value, otherwise emit the call to get the
98   // operand encoding.
99   if (bit < 0)
100     return true;
101 
102   // If the operand matches by name, reference according to that
103   // operand number. Non-matching operands are assumed to be in
104   // order.
105   unsigned OpIdx;
106   std::pair<unsigned, unsigned> SubOp;
107   if (CGI.Operands.hasSubOperandAlias(VarName, SubOp)) {
108     OpIdx = CGI.Operands[SubOp.first].MIOperandNo + SubOp.second;
109   } else if (CGI.Operands.hasOperandNamed(VarName, OpIdx)) {
110     // Get the machine operand number for the indicated operand.
111     OpIdx = CGI.Operands[OpIdx].MIOperandNo;
112   } else {
113     PrintError(R, Twine("No operand named ") + VarName + " in record " + R->getName());
114     return false;
115   }
116 
117   if (CGI.Operands.isFlatOperandNotEmitted(OpIdx)) {
118     PrintError(R, "Operand " + VarName + " used but also marked as not emitted!");
119     return false;
120   }
121 
122   std::pair<unsigned, unsigned> SO = CGI.Operands.getSubOperandNumber(OpIdx);
123   std::string &EncoderMethodName =
124       CGI.Operands[SO.first].EncoderMethodNames[SO.second];
125 
126   if (UseAPInt)
127     Case += "      op.clearAllBits();\n";
128 
129   Case += "      // op: " + VarName + "\n";
130 
131   // If the source operand has a custom encoder, use it.
132   if (!EncoderMethodName.empty()) {
133     if (UseAPInt) {
134       Case += "      " + EncoderMethodName + "(MI, " + utostr(OpIdx);
135       Case += ", op";
136     } else {
137       Case += "      op = " + EncoderMethodName + "(MI, " + utostr(OpIdx);
138     }
139     Case += ", Fixups, STI);\n";
140   } else {
141     if (UseAPInt) {
142       Case += "      getMachineOpValue(MI, MI.getOperand(" + utostr(OpIdx) + ")";
143       Case += ", op, Fixups, STI";
144     } else {
145       Case += "      op = getMachineOpValue(MI, MI.getOperand(" + utostr(OpIdx) + ")";
146       Case += ", Fixups, STI";
147     }
148     Case += ");\n";
149   }
150 
151   // Precalculate the number of lits this variable contributes to in the
152   // operand. If there is a single lit (consecutive range of bits) we can use a
153   // destructive sequence on APInt that reduces memory allocations.
154   int numOperandLits = 0;
155   for (int tmpBit = bit; tmpBit >= 0;) {
156     int varBit = getVariableBit(VarName, BI, tmpBit);
157 
158     // If this bit isn't from a variable, skip it.
159     if (varBit == -1) {
160       --tmpBit;
161       continue;
162     }
163 
164     // Figure out the consecutive range of bits covered by this operand, in
165     // order to generate better encoding code.
166     int beginVarBit = varBit;
167     int N = 1;
168     for (--tmpBit; tmpBit >= 0;) {
169       varBit = getVariableBit(VarName, BI, tmpBit);
170       if (varBit == -1 || varBit != (beginVarBit - N))
171         break;
172       ++N;
173       --tmpBit;
174     }
175     ++numOperandLits;
176   }
177 
178   for (; bit >= 0; ) {
179     int varBit = getVariableBit(VarName, BI, bit);
180 
181     // If this bit isn't from a variable, skip it.
182     if (varBit == -1) {
183       --bit;
184       continue;
185     }
186 
187     // Figure out the consecutive range of bits covered by this operand, in
188     // order to generate better encoding code.
189     int beginInstBit = bit;
190     int beginVarBit = varBit;
191     int N = 1;
192     for (--bit; bit >= 0;) {
193       varBit = getVariableBit(VarName, BI, bit);
194       if (varBit == -1 || varBit != (beginVarBit - N)) break;
195       ++N;
196       --bit;
197     }
198 
199     std::string maskStr;
200     int opShift;
201 
202     unsigned loBit = beginVarBit - N + 1;
203     unsigned hiBit = loBit + N;
204     unsigned loInstBit = beginInstBit - N + 1;
205     if (UseAPInt) {
206       std::string extractStr;
207       if (N >= 64) {
208         extractStr = "op.extractBits(" + itostr(hiBit - loBit) + ", " +
209                      itostr(loBit) + ")";
210         Case += "      Value.insertBits(" + extractStr + ", " +
211                 itostr(loInstBit) + ");\n";
212       } else {
213         extractStr = "op.extractBitsAsZExtValue(" + itostr(hiBit - loBit) +
214                      ", " + itostr(loBit) + ")";
215         Case += "      Value.insertBits(" + extractStr + ", " +
216                 itostr(loInstBit) + ", " + itostr(hiBit - loBit) + ");\n";
217       }
218     } else {
219       uint64_t opMask = ~(uint64_t)0 >> (64 - N);
220       opShift = beginVarBit - N + 1;
221       opMask <<= opShift;
222       maskStr = "UINT64_C(" + utostr(opMask) + ")";
223       opShift = beginInstBit - beginVarBit;
224 
225       if (numOperandLits == 1) {
226         Case += "      op &= " + maskStr + ";\n";
227         if (opShift > 0) {
228           Case += "      op <<= " + itostr(opShift) + ";\n";
229         } else if (opShift < 0) {
230           Case += "      op >>= " + itostr(-opShift) + ";\n";
231         }
232         Case += "      Value |= op;\n";
233       } else {
234         if (opShift > 0) {
235           Case += "      Value |= (op & " + maskStr + ") << " +
236                   itostr(opShift) + ";\n";
237         } else if (opShift < 0) {
238           Case += "      Value |= (op & " + maskStr + ") >> " +
239                   itostr(-opShift) + ";\n";
240         } else {
241           Case += "      Value |= (op & " + maskStr + ");\n";
242         }
243       }
244     }
245   }
246   return true;
247 }
248 
249 std::string CodeEmitterGen::getInstructionCase(Record *R,
250                                                CodeGenTarget &Target) {
251   std::string Case;
252   if (const RecordVal *RV = R->getValue("EncodingInfos")) {
253     if (auto *DI = dyn_cast_or_null<DefInit>(RV->getValue())) {
254       const CodeGenHwModes &HWM = Target.getHwModes();
255       EncodingInfoByHwMode EBM(DI->getDef(), HWM);
256       Case += "      switch (HwMode) {\n";
257       Case += "      default: llvm_unreachable(\"Unhandled HwMode\");\n";
258       for (auto &KV : EBM) {
259         Case += "      case " + itostr(KV.first) + ": {\n";
260         Case += getInstructionCaseForEncoding(R, KV.second, Target);
261         Case += "      break;\n";
262         Case += "      }\n";
263       }
264       Case += "      }\n";
265       return Case;
266     }
267   }
268   return getInstructionCaseForEncoding(R, R, Target);
269 }
270 
271 std::string CodeEmitterGen::getInstructionCaseForEncoding(Record *R, Record *EncodingDef,
272                                                           CodeGenTarget &Target) {
273   std::string Case;
274   BitsInit *BI = EncodingDef->getValueAsBitsInit("Inst");
275 
276   // Loop over all of the fields in the instruction, determining which are the
277   // operands to the instruction.
278   bool Success = true;
279   for (const RecordVal &RV : EncodingDef->getValues()) {
280     // Ignore fixed fields in the record, we're looking for values like:
281     //    bits<5> RST = { ?, ?, ?, ?, ? };
282     if (RV.isNonconcreteOK() || RV.getValue()->isComplete())
283       continue;
284 
285     Success &=
286         addCodeToMergeInOperand(R, BI, std::string(RV.getName()),
287                                 Case, Target);
288   }
289 
290   if (!Success) {
291     // Dump the record, so we can see what's going on...
292     std::string E;
293     raw_string_ostream S(E);
294     S << "Dumping record for previous error:\n";
295     S << *R;
296     PrintNote(E);
297   }
298 
299   StringRef PostEmitter = R->getValueAsString("PostEncoderMethod");
300   if (!PostEmitter.empty()) {
301     Case += "      Value = ";
302     Case += PostEmitter;
303     Case += "(MI, Value";
304     Case += ", STI";
305     Case += ");\n";
306   }
307 
308   return Case;
309 }
310 
311 static void emitInstBits(raw_ostream &OS, const APInt &Bits) {
312   for (unsigned I = 0; I < Bits.getNumWords(); ++I)
313     OS << ((I > 0) ? ", " : "") << "UINT64_C(" << utostr(Bits.getRawData()[I])
314        << ")";
315 }
316 
317 void CodeEmitterGen::emitInstructionBaseValues(
318     raw_ostream &o, ArrayRef<const CodeGenInstruction *> NumberedInstructions,
319     CodeGenTarget &Target, int HwMode) {
320   const CodeGenHwModes &HWM = Target.getHwModes();
321   if (HwMode == -1)
322     o << "  static const uint64_t InstBits[] = {\n";
323   else
324     o << "  static const uint64_t InstBits_" << HWM.getMode(HwMode).Name
325       << "[] = {\n";
326 
327   for (const CodeGenInstruction *CGI : NumberedInstructions) {
328     Record *R = CGI->TheDef;
329 
330     if (R->getValueAsString("Namespace") == "TargetOpcode" ||
331         R->getValueAsBit("isPseudo")) {
332       o << "    "; emitInstBits(o, APInt(BitWidth, 0)); o << ",\n";
333       continue;
334     }
335 
336     Record *EncodingDef = R;
337     if (const RecordVal *RV = R->getValue("EncodingInfos")) {
338       if (auto *DI = dyn_cast_or_null<DefInit>(RV->getValue())) {
339         EncodingInfoByHwMode EBM(DI->getDef(), HWM);
340         if (EBM.hasMode(HwMode))
341           EncodingDef = EBM.get(HwMode);
342       }
343     }
344     BitsInit *BI = EncodingDef->getValueAsBitsInit("Inst");
345 
346     // Start by filling in fixed values.
347     APInt Value(BitWidth, 0);
348     for (unsigned i = 0, e = BI->getNumBits(); i != e; ++i) {
349       if (auto *B = dyn_cast<BitInit>(BI->getBit(i)); B && B->getValue())
350         Value.setBit(i);
351     }
352     o << "    ";
353     emitInstBits(o, Value);
354     o << "," << '\t' << "// " << R->getName() << "\n";
355   }
356   o << "    UINT64_C(0)\n  };\n";
357 }
358 
359 void CodeEmitterGen::run(raw_ostream &o) {
360   emitSourceFileHeader("Machine Code Emitter", o);
361 
362   CodeGenTarget Target(Records);
363   std::vector<Record*> Insts = Records.getAllDerivedDefinitions("Instruction");
364 
365   // For little-endian instruction bit encodings, reverse the bit order
366   Target.reverseBitsForLittleEndianEncoding();
367 
368   ArrayRef<const CodeGenInstruction*> NumberedInstructions =
369     Target.getInstructionsByEnumValue();
370 
371   if (any_of(NumberedInstructions, [](const CodeGenInstruction *CGI) {
372         Record *R = CGI->TheDef;
373         return R->getValue("Inst") && isa<DagInit>(R->getValueInit("Inst"));
374       })) {
375     emitVarLenCodeEmitter(Records, o);
376   } else {
377     const CodeGenHwModes &HWM = Target.getHwModes();
378     // The set of HwModes used by instruction encodings.
379     std::set<unsigned> HwModes;
380     BitWidth = 0;
381     for (const CodeGenInstruction *CGI : NumberedInstructions) {
382       Record *R = CGI->TheDef;
383       if (R->getValueAsString("Namespace") == "TargetOpcode" ||
384           R->getValueAsBit("isPseudo"))
385         continue;
386 
387       if (const RecordVal *RV = R->getValue("EncodingInfos")) {
388         if (DefInit *DI = dyn_cast_or_null<DefInit>(RV->getValue())) {
389           EncodingInfoByHwMode EBM(DI->getDef(), HWM);
390           for (auto &KV : EBM) {
391             BitsInit *BI = KV.second->getValueAsBitsInit("Inst");
392             BitWidth = std::max(BitWidth, BI->getNumBits());
393             HwModes.insert(KV.first);
394           }
395           continue;
396         }
397       }
398       BitsInit *BI = R->getValueAsBitsInit("Inst");
399       BitWidth = std::max(BitWidth, BI->getNumBits());
400     }
401     UseAPInt = BitWidth > 64;
402 
403     // Emit function declaration
404     if (UseAPInt) {
405       o << "void " << Target.getName()
406         << "MCCodeEmitter::getBinaryCodeForInstr(const MCInst &MI,\n"
407         << "    SmallVectorImpl<MCFixup> &Fixups,\n"
408         << "    APInt &Inst,\n"
409         << "    APInt &Scratch,\n"
410         << "    const MCSubtargetInfo &STI) const {\n";
411     } else {
412       o << "uint64_t " << Target.getName();
413       o << "MCCodeEmitter::getBinaryCodeForInstr(const MCInst &MI,\n"
414         << "    SmallVectorImpl<MCFixup> &Fixups,\n"
415         << "    const MCSubtargetInfo &STI) const {\n";
416     }
417 
418     // Emit instruction base values
419     if (HwModes.empty()) {
420       emitInstructionBaseValues(o, NumberedInstructions, Target, -1);
421     } else {
422       for (unsigned HwMode : HwModes)
423         emitInstructionBaseValues(o, NumberedInstructions, Target, (int)HwMode);
424     }
425 
426     if (!HwModes.empty()) {
427       o << "  const uint64_t *InstBits;\n";
428       o << "  unsigned HwMode = STI.getHwMode();\n";
429       o << "  switch (HwMode) {\n";
430       o << "  default: llvm_unreachable(\"Unknown hardware mode!\"); break;\n";
431       for (unsigned I : HwModes) {
432         o << "  case " << I << ": InstBits = InstBits_" << HWM.getMode(I).Name
433           << "; break;\n";
434       }
435       o << "  };\n";
436     }
437 
438     // Map to accumulate all the cases.
439     std::map<std::string, std::vector<std::string>> CaseMap;
440 
441     // Construct all cases statement for each opcode
442     for (Record *R : Insts) {
443       if (R->getValueAsString("Namespace") == "TargetOpcode" ||
444           R->getValueAsBit("isPseudo"))
445         continue;
446       std::string InstName =
447           (R->getValueAsString("Namespace") + "::" + R->getName()).str();
448       std::string Case = getInstructionCase(R, Target);
449 
450       CaseMap[Case].push_back(std::move(InstName));
451     }
452 
453     // Emit initial function code
454     if (UseAPInt) {
455       int NumWords = APInt::getNumWords(BitWidth);
456       o << "  const unsigned opcode = MI.getOpcode();\n"
457         << "  if (Scratch.getBitWidth() != " << BitWidth << ")\n"
458         << "    Scratch = Scratch.zext(" << BitWidth << ");\n"
459         << "  Inst = APInt(" << BitWidth << ", ArrayRef(InstBits + opcode * "
460         << NumWords << ", " << NumWords << "));\n"
461         << "  APInt &Value = Inst;\n"
462         << "  APInt &op = Scratch;\n"
463         << "  switch (opcode) {\n";
464     } else {
465       o << "  const unsigned opcode = MI.getOpcode();\n"
466         << "  uint64_t Value = InstBits[opcode];\n"
467         << "  uint64_t op = 0;\n"
468         << "  (void)op;  // suppress warning\n"
469         << "  switch (opcode) {\n";
470     }
471 
472     // Emit each case statement
473     std::map<std::string, std::vector<std::string>>::iterator IE, EE;
474     for (IE = CaseMap.begin(), EE = CaseMap.end(); IE != EE; ++IE) {
475       const std::string &Case = IE->first;
476       std::vector<std::string> &InstList = IE->second;
477 
478       for (int i = 0, N = InstList.size(); i < N; i++) {
479         if (i)
480           o << "\n";
481         o << "    case " << InstList[i] << ":";
482       }
483       o << " {\n";
484       o << Case;
485       o << "      break;\n"
486         << "    }\n";
487     }
488 
489     // Default case: unhandled opcode
490     o << "  default:\n"
491       << "    std::string msg;\n"
492       << "    raw_string_ostream Msg(msg);\n"
493       << "    Msg << \"Not supported instr: \" << MI;\n"
494       << "    report_fatal_error(Msg.str().c_str());\n"
495       << "  }\n";
496     if (UseAPInt)
497       o << "  Inst = Value;\n";
498     else
499       o << "  return Value;\n";
500     o << "}\n\n";
501   }
502 }
503 
504 } // end anonymous namespace
505 
506 static TableGen::Emitter::OptClass<CodeEmitterGen>
507     X("gen-emitter", "Generate machine code emitter");
508