xref: /llvm-project/llvm/utils/TableGen/CodeEmitterGen.cpp (revision b87dc35669929ed29838cc7006c25ef9fa84e6f6)
1 //===- CodeEmitterGen.cpp - Code Emitter Generator ------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // CodeEmitterGen uses the descriptions of instructions and their fields to
10 // construct an automated code emitter: a function that, given a MachineInstr,
11 // returns the (currently, 32-bit unsigned) value of the instruction.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "CodeGenHwModes.h"
16 #include "CodeGenInstruction.h"
17 #include "CodeGenTarget.h"
18 #include "InfoByHwMode.h"
19 #include "TableGenBackends.h"
20 #include "VarLenCodeEmitterGen.h"
21 #include "llvm/ADT/APInt.h"
22 #include "llvm/ADT/ArrayRef.h"
23 #include "llvm/ADT/StringExtras.h"
24 #include "llvm/Support/Casting.h"
25 #include "llvm/Support/raw_ostream.h"
26 #include "llvm/TableGen/Error.h"
27 #include "llvm/TableGen/Record.h"
28 #include "llvm/TableGen/TableGenBackend.h"
29 #include <cstdint>
30 #include <map>
31 #include <set>
32 #include <string>
33 #include <utility>
34 #include <vector>
35 
36 using namespace llvm;
37 
38 namespace {
39 
40 class CodeEmitterGen {
41   RecordKeeper &Records;
42 
43 public:
44   CodeEmitterGen(RecordKeeper &R) : Records(R) {}
45 
46   void run(raw_ostream &o);
47 
48 private:
49   int getVariableBit(const std::string &VarName, BitsInit *BI, int bit);
50   std::string getInstructionCase(Record *R, CodeGenTarget &Target);
51   std::string getInstructionCaseForEncoding(Record *R, Record *EncodingDef,
52                                             CodeGenTarget &Target);
53   bool addCodeToMergeInOperand(Record *R, BitsInit *BI,
54                                const std::string &VarName,
55                                std::string &Case, CodeGenTarget &Target);
56 
57   void emitInstructionBaseValues(
58       raw_ostream &o, ArrayRef<const CodeGenInstruction *> NumberedInstructions,
59       CodeGenTarget &Target, int HwMode = -1);
60   unsigned BitWidth;
61   bool UseAPInt;
62 };
63 
64 // If the VarBitInit at position 'bit' matches the specified variable then
65 // return the variable bit position.  Otherwise return -1.
66 int CodeEmitterGen::getVariableBit(const std::string &VarName,
67                                    BitsInit *BI, int bit) {
68   if (VarBitInit *VBI = dyn_cast<VarBitInit>(BI->getBit(bit))) {
69     if (VarInit *VI = dyn_cast<VarInit>(VBI->getBitVar()))
70       if (VI->getName() == VarName)
71         return VBI->getBitNum();
72   } else if (VarInit *VI = dyn_cast<VarInit>(BI->getBit(bit))) {
73     if (VI->getName() == VarName)
74       return 0;
75   }
76 
77   return -1;
78 }
79 
80 // Returns true if it succeeds, false if an error.
81 bool CodeEmitterGen::addCodeToMergeInOperand(Record *R, BitsInit *BI,
82                                              const std::string &VarName,
83                                              std::string &Case,
84                                              CodeGenTarget &Target) {
85   CodeGenInstruction &CGI = Target.getInstruction(R);
86 
87   // Determine if VarName actually contributes to the Inst encoding.
88   int bit = BI->getNumBits()-1;
89 
90   // Scan for a bit that this contributed to.
91   for (; bit >= 0; ) {
92     if (getVariableBit(VarName, BI, bit) != -1)
93       break;
94 
95     --bit;
96   }
97 
98   // If we found no bits, ignore this value, otherwise emit the call to get the
99   // operand encoding.
100   if (bit < 0)
101     return true;
102 
103   // If the operand matches by name, reference according to that
104   // operand number. Non-matching operands are assumed to be in
105   // order.
106   unsigned OpIdx;
107   std::pair<unsigned, unsigned> SubOp;
108   if (CGI.Operands.hasSubOperandAlias(VarName, SubOp)) {
109     OpIdx = CGI.Operands[SubOp.first].MIOperandNo + SubOp.second;
110   } else if (CGI.Operands.hasOperandNamed(VarName, OpIdx)) {
111     // Get the machine operand number for the indicated operand.
112     OpIdx = CGI.Operands[OpIdx].MIOperandNo;
113   } else {
114     PrintError(R, Twine("No operand named ") + VarName + " in record " + R->getName());
115     return false;
116   }
117 
118   if (CGI.Operands.isFlatOperandNotEmitted(OpIdx)) {
119     PrintError(R, "Operand " + VarName + " used but also marked as not emitted!");
120     return false;
121   }
122 
123   std::pair<unsigned, unsigned> SO = CGI.Operands.getSubOperandNumber(OpIdx);
124   std::string &EncoderMethodName =
125       CGI.Operands[SO.first].EncoderMethodNames[SO.second];
126 
127   if (UseAPInt)
128     Case += "      op.clearAllBits();\n";
129 
130   Case += "      // op: " + VarName + "\n";
131 
132   // If the source operand has a custom encoder, use it.
133   if (!EncoderMethodName.empty()) {
134     if (UseAPInt) {
135       Case += "      " + EncoderMethodName + "(MI, " + utostr(OpIdx);
136       Case += ", op";
137     } else {
138       Case += "      op = " + EncoderMethodName + "(MI, " + utostr(OpIdx);
139     }
140     Case += ", Fixups, STI);\n";
141   } else {
142     if (UseAPInt) {
143       Case += "      getMachineOpValue(MI, MI.getOperand(" + utostr(OpIdx) + ")";
144       Case += ", op, Fixups, STI";
145     } else {
146       Case += "      op = getMachineOpValue(MI, MI.getOperand(" + utostr(OpIdx) + ")";
147       Case += ", Fixups, STI";
148     }
149     Case += ");\n";
150   }
151 
152   // Precalculate the number of lits this variable contributes to in the
153   // operand. If there is a single lit (consecutive range of bits) we can use a
154   // destructive sequence on APInt that reduces memory allocations.
155   int numOperandLits = 0;
156   for (int tmpBit = bit; tmpBit >= 0;) {
157     int varBit = getVariableBit(VarName, BI, tmpBit);
158 
159     // If this bit isn't from a variable, skip it.
160     if (varBit == -1) {
161       --tmpBit;
162       continue;
163     }
164 
165     // Figure out the consecutive range of bits covered by this operand, in
166     // order to generate better encoding code.
167     int beginVarBit = varBit;
168     int N = 1;
169     for (--tmpBit; tmpBit >= 0;) {
170       varBit = getVariableBit(VarName, BI, tmpBit);
171       if (varBit == -1 || varBit != (beginVarBit - N))
172         break;
173       ++N;
174       --tmpBit;
175     }
176     ++numOperandLits;
177   }
178 
179   for (; bit >= 0; ) {
180     int varBit = getVariableBit(VarName, BI, bit);
181 
182     // If this bit isn't from a variable, skip it.
183     if (varBit == -1) {
184       --bit;
185       continue;
186     }
187 
188     // Figure out the consecutive range of bits covered by this operand, in
189     // order to generate better encoding code.
190     int beginInstBit = bit;
191     int beginVarBit = varBit;
192     int N = 1;
193     for (--bit; bit >= 0;) {
194       varBit = getVariableBit(VarName, BI, bit);
195       if (varBit == -1 || varBit != (beginVarBit - N)) break;
196       ++N;
197       --bit;
198     }
199 
200     std::string maskStr;
201     int opShift;
202 
203     unsigned loBit = beginVarBit - N + 1;
204     unsigned hiBit = loBit + N;
205     unsigned loInstBit = beginInstBit - N + 1;
206     if (UseAPInt) {
207       std::string extractStr;
208       if (N >= 64) {
209         extractStr = "op.extractBits(" + itostr(hiBit - loBit) + ", " +
210                      itostr(loBit) + ")";
211         Case += "      Value.insertBits(" + extractStr + ", " +
212                 itostr(loInstBit) + ");\n";
213       } else {
214         extractStr = "op.extractBitsAsZExtValue(" + itostr(hiBit - loBit) +
215                      ", " + itostr(loBit) + ")";
216         Case += "      Value.insertBits(" + extractStr + ", " +
217                 itostr(loInstBit) + ", " + itostr(hiBit - loBit) + ");\n";
218       }
219     } else {
220       uint64_t opMask = ~(uint64_t)0 >> (64 - N);
221       opShift = beginVarBit - N + 1;
222       opMask <<= opShift;
223       maskStr = "UINT64_C(" + utostr(opMask) + ")";
224       opShift = beginInstBit - beginVarBit;
225 
226       if (numOperandLits == 1) {
227         Case += "      op &= " + maskStr + ";\n";
228         if (opShift > 0) {
229           Case += "      op <<= " + itostr(opShift) + ";\n";
230         } else if (opShift < 0) {
231           Case += "      op >>= " + itostr(-opShift) + ";\n";
232         }
233         Case += "      Value |= op;\n";
234       } else {
235         if (opShift > 0) {
236           Case += "      Value |= (op & " + maskStr + ") << " +
237                   itostr(opShift) + ";\n";
238         } else if (opShift < 0) {
239           Case += "      Value |= (op & " + maskStr + ") >> " +
240                   itostr(-opShift) + ";\n";
241         } else {
242           Case += "      Value |= (op & " + maskStr + ");\n";
243         }
244       }
245     }
246   }
247   return true;
248 }
249 
250 std::string CodeEmitterGen::getInstructionCase(Record *R,
251                                                CodeGenTarget &Target) {
252   std::string Case;
253   if (const RecordVal *RV = R->getValue("EncodingInfos")) {
254     if (auto *DI = dyn_cast_or_null<DefInit>(RV->getValue())) {
255       const CodeGenHwModes &HWM = Target.getHwModes();
256       EncodingInfoByHwMode EBM(DI->getDef(), HWM);
257       Case += "      switch (HwMode) {\n";
258       Case += "      default: llvm_unreachable(\"Unhandled HwMode\");\n";
259       for (auto &KV : EBM) {
260         Case += "      case " + itostr(KV.first) + ": {\n";
261         Case += getInstructionCaseForEncoding(R, KV.second, Target);
262         Case += "      break;\n";
263         Case += "      }\n";
264       }
265       Case += "      }\n";
266       return Case;
267     }
268   }
269   return getInstructionCaseForEncoding(R, R, Target);
270 }
271 
272 std::string CodeEmitterGen::getInstructionCaseForEncoding(Record *R, Record *EncodingDef,
273                                                           CodeGenTarget &Target) {
274   std::string Case;
275   BitsInit *BI = EncodingDef->getValueAsBitsInit("Inst");
276 
277   // Loop over all of the fields in the instruction, determining which are the
278   // operands to the instruction.
279   bool Success = true;
280   for (const RecordVal &RV : EncodingDef->getValues()) {
281     // Ignore fixed fields in the record, we're looking for values like:
282     //    bits<5> RST = { ?, ?, ?, ?, ? };
283     if (RV.isNonconcreteOK() || RV.getValue()->isComplete())
284       continue;
285 
286     Success &=
287         addCodeToMergeInOperand(R, BI, std::string(RV.getName()),
288                                 Case, Target);
289   }
290 
291   if (!Success) {
292     // Dump the record, so we can see what's going on...
293     std::string E;
294     raw_string_ostream S(E);
295     S << "Dumping record for previous error:\n";
296     S << *R;
297     PrintNote(E);
298   }
299 
300   StringRef PostEmitter = R->getValueAsString("PostEncoderMethod");
301   if (!PostEmitter.empty()) {
302     Case += "      Value = ";
303     Case += PostEmitter;
304     Case += "(MI, Value";
305     Case += ", STI";
306     Case += ");\n";
307   }
308 
309   return Case;
310 }
311 
312 static void emitInstBits(raw_ostream &OS, const APInt &Bits) {
313   for (unsigned I = 0; I < Bits.getNumWords(); ++I)
314     OS << ((I > 0) ? ", " : "") << "UINT64_C(" << utostr(Bits.getRawData()[I])
315        << ")";
316 }
317 
318 void CodeEmitterGen::emitInstructionBaseValues(
319     raw_ostream &o, ArrayRef<const CodeGenInstruction *> NumberedInstructions,
320     CodeGenTarget &Target, int HwMode) {
321   const CodeGenHwModes &HWM = Target.getHwModes();
322   if (HwMode == -1)
323     o << "  static const uint64_t InstBits[] = {\n";
324   else
325     o << "  static const uint64_t InstBits_" << HWM.getMode(HwMode).Name
326       << "[] = {\n";
327 
328   for (const CodeGenInstruction *CGI : NumberedInstructions) {
329     Record *R = CGI->TheDef;
330 
331     if (R->getValueAsString("Namespace") == "TargetOpcode" ||
332         R->getValueAsBit("isPseudo")) {
333       o << "    "; emitInstBits(o, APInt(BitWidth, 0)); o << ",\n";
334       continue;
335     }
336 
337     Record *EncodingDef = R;
338     if (const RecordVal *RV = R->getValue("EncodingInfos")) {
339       if (auto *DI = dyn_cast_or_null<DefInit>(RV->getValue())) {
340         EncodingInfoByHwMode EBM(DI->getDef(), HWM);
341         if (EBM.hasMode(HwMode))
342           EncodingDef = EBM.get(HwMode);
343       }
344     }
345     BitsInit *BI = EncodingDef->getValueAsBitsInit("Inst");
346 
347     // Start by filling in fixed values.
348     APInt Value(BitWidth, 0);
349     for (unsigned i = 0, e = BI->getNumBits(); i != e; ++i) {
350       if (auto *B = dyn_cast<BitInit>(BI->getBit(i)); B && B->getValue())
351         Value.setBit(i);
352     }
353     o << "    ";
354     emitInstBits(o, Value);
355     o << "," << '\t' << "// " << R->getName() << "\n";
356   }
357   o << "    UINT64_C(0)\n  };\n";
358 }
359 
360 void CodeEmitterGen::run(raw_ostream &o) {
361   CodeGenTarget Target(Records);
362   std::vector<Record*> Insts = Records.getAllDerivedDefinitions("Instruction");
363 
364   // For little-endian instruction bit encodings, reverse the bit order
365   Target.reverseBitsForLittleEndianEncoding();
366 
367   ArrayRef<const CodeGenInstruction*> NumberedInstructions =
368     Target.getInstructionsByEnumValue();
369 
370   if (any_of(NumberedInstructions, [](const CodeGenInstruction *CGI) {
371         Record *R = CGI->TheDef;
372         return R->getValue("Inst") && isa<DagInit>(R->getValueInit("Inst"));
373       })) {
374     emitVarLenCodeEmitter(Records, o);
375   } else {
376     const CodeGenHwModes &HWM = Target.getHwModes();
377     // The set of HwModes used by instruction encodings.
378     std::set<unsigned> HwModes;
379     BitWidth = 0;
380     for (const CodeGenInstruction *CGI : NumberedInstructions) {
381       Record *R = CGI->TheDef;
382       if (R->getValueAsString("Namespace") == "TargetOpcode" ||
383           R->getValueAsBit("isPseudo"))
384         continue;
385 
386       if (const RecordVal *RV = R->getValue("EncodingInfos")) {
387         if (DefInit *DI = dyn_cast_or_null<DefInit>(RV->getValue())) {
388           EncodingInfoByHwMode EBM(DI->getDef(), HWM);
389           for (auto &KV : EBM) {
390             BitsInit *BI = KV.second->getValueAsBitsInit("Inst");
391             BitWidth = std::max(BitWidth, BI->getNumBits());
392             HwModes.insert(KV.first);
393           }
394           continue;
395         }
396       }
397       BitsInit *BI = R->getValueAsBitsInit("Inst");
398       BitWidth = std::max(BitWidth, BI->getNumBits());
399     }
400     UseAPInt = BitWidth > 64;
401 
402     // Emit function declaration
403     if (UseAPInt) {
404       o << "void " << Target.getName()
405         << "MCCodeEmitter::getBinaryCodeForInstr(const MCInst &MI,\n"
406         << "    SmallVectorImpl<MCFixup> &Fixups,\n"
407         << "    APInt &Inst,\n"
408         << "    APInt &Scratch,\n"
409         << "    const MCSubtargetInfo &STI) const {\n";
410     } else {
411       o << "uint64_t " << Target.getName();
412       o << "MCCodeEmitter::getBinaryCodeForInstr(const MCInst &MI,\n"
413         << "    SmallVectorImpl<MCFixup> &Fixups,\n"
414         << "    const MCSubtargetInfo &STI) const {\n";
415     }
416 
417     // Emit instruction base values
418     if (HwModes.empty()) {
419       emitInstructionBaseValues(o, NumberedInstructions, Target, -1);
420     } else {
421       for (unsigned HwMode : HwModes)
422         emitInstructionBaseValues(o, NumberedInstructions, Target, (int)HwMode);
423     }
424 
425     if (!HwModes.empty()) {
426       o << "  const uint64_t *InstBits;\n";
427       o << "  unsigned HwMode = STI.getHwMode();\n";
428       o << "  switch (HwMode) {\n";
429       o << "  default: llvm_unreachable(\"Unknown hardware mode!\"); break;\n";
430       for (unsigned I : HwModes) {
431         o << "  case " << I << ": InstBits = InstBits_" << HWM.getMode(I).Name
432           << "; break;\n";
433       }
434       o << "  };\n";
435     }
436 
437     // Map to accumulate all the cases.
438     std::map<std::string, std::vector<std::string>> CaseMap;
439 
440     // Construct all cases statement for each opcode
441     for (Record *R : Insts) {
442       if (R->getValueAsString("Namespace") == "TargetOpcode" ||
443           R->getValueAsBit("isPseudo"))
444         continue;
445       std::string InstName =
446           (R->getValueAsString("Namespace") + "::" + R->getName()).str();
447       std::string Case = getInstructionCase(R, Target);
448 
449       CaseMap[Case].push_back(std::move(InstName));
450     }
451 
452     // Emit initial function code
453     if (UseAPInt) {
454       int NumWords = APInt::getNumWords(BitWidth);
455       o << "  const unsigned opcode = MI.getOpcode();\n"
456         << "  if (Scratch.getBitWidth() != " << BitWidth << ")\n"
457         << "    Scratch = Scratch.zext(" << BitWidth << ");\n"
458         << "  Inst = APInt(" << BitWidth << ", ArrayRef(InstBits + opcode * "
459         << NumWords << ", " << NumWords << "));\n"
460         << "  APInt &Value = Inst;\n"
461         << "  APInt &op = Scratch;\n"
462         << "  switch (opcode) {\n";
463     } else {
464       o << "  const unsigned opcode = MI.getOpcode();\n"
465         << "  uint64_t Value = InstBits[opcode];\n"
466         << "  uint64_t op = 0;\n"
467         << "  (void)op;  // suppress warning\n"
468         << "  switch (opcode) {\n";
469     }
470 
471     // Emit each case statement
472     std::map<std::string, std::vector<std::string>>::iterator IE, EE;
473     for (IE = CaseMap.begin(), EE = CaseMap.end(); IE != EE; ++IE) {
474       const std::string &Case = IE->first;
475       std::vector<std::string> &InstList = IE->second;
476 
477       for (int i = 0, N = InstList.size(); i < N; i++) {
478         if (i)
479           o << "\n";
480         o << "    case " << InstList[i] << ":";
481       }
482       o << " {\n";
483       o << Case;
484       o << "      break;\n"
485         << "    }\n";
486     }
487 
488     // Default case: unhandled opcode
489     o << "  default:\n"
490       << "    std::string msg;\n"
491       << "    raw_string_ostream Msg(msg);\n"
492       << "    Msg << \"Not supported instr: \" << MI;\n"
493       << "    report_fatal_error(Msg.str().c_str());\n"
494       << "  }\n";
495     if (UseAPInt)
496       o << "  Inst = Value;\n";
497     else
498       o << "  return Value;\n";
499     o << "}\n\n";
500   }
501 }
502 
503 } // end anonymous namespace
504 
505 namespace llvm {
506 
507 void EmitCodeEmitter(RecordKeeper &RK, raw_ostream &OS) {
508   emitSourceFileHeader("Machine Code Emitter", OS);
509   CodeEmitterGen(RK).run(OS);
510 }
511 
512 } // end namespace llvm
513