1 //===- CodeEmitterGen.cpp - Code Emitter Generator ------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // CodeEmitterGen uses the descriptions of instructions and their fields to
10 // construct an automated code emitter: a function that, given a MachineInstr,
11 // returns the (currently, 32-bit unsigned) value of the instruction.
12 //
13 //===----------------------------------------------------------------------===//
14
15 #include "CodeGenInstruction.h"
16 #include "CodeGenTarget.h"
17 #include "SubtargetFeatureInfo.h"
18 #include "Types.h"
19 #include "VarLenCodeEmitterGen.h"
20 #include "llvm/ADT/APInt.h"
21 #include "llvm/ADT/ArrayRef.h"
22 #include "llvm/ADT/StringExtras.h"
23 #include "llvm/Support/Casting.h"
24 #include "llvm/Support/raw_ostream.h"
25 #include "llvm/TableGen/Error.h"
26 #include "llvm/TableGen/Record.h"
27 #include "llvm/TableGen/TableGenBackend.h"
28 #include <cstdint>
29 #include <map>
30 #include <set>
31 #include <string>
32 #include <utility>
33 #include <vector>
34
35 using namespace llvm;
36
37 namespace {
38
39 class CodeEmitterGen {
40 RecordKeeper &Records;
41
42 public:
CodeEmitterGen(RecordKeeper & R)43 CodeEmitterGen(RecordKeeper &R) : Records(R) {}
44
45 void run(raw_ostream &o);
46
47 private:
48 int getVariableBit(const std::string &VarName, BitsInit *BI, int bit);
49 std::string getInstructionCase(Record *R, CodeGenTarget &Target);
50 std::string getInstructionCaseForEncoding(Record *R, Record *EncodingDef,
51 CodeGenTarget &Target);
52 bool addCodeToMergeInOperand(Record *R, BitsInit *BI,
53 const std::string &VarName, unsigned &NumberedOp,
54 std::set<unsigned> &NamedOpIndices,
55 std::string &Case, CodeGenTarget &Target);
56
57 void emitInstructionBaseValues(
58 raw_ostream &o, ArrayRef<const CodeGenInstruction *> NumberedInstructions,
59 CodeGenTarget &Target, int HwMode = -1);
60 unsigned BitWidth;
61 bool UseAPInt;
62 };
63
64 // If the VarBitInit at position 'bit' matches the specified variable then
65 // return the variable bit position. Otherwise return -1.
getVariableBit(const std::string & VarName,BitsInit * BI,int bit)66 int CodeEmitterGen::getVariableBit(const std::string &VarName,
67 BitsInit *BI, int bit) {
68 if (VarBitInit *VBI = dyn_cast<VarBitInit>(BI->getBit(bit))) {
69 if (VarInit *VI = dyn_cast<VarInit>(VBI->getBitVar()))
70 if (VI->getName() == VarName)
71 return VBI->getBitNum();
72 } else if (VarInit *VI = dyn_cast<VarInit>(BI->getBit(bit))) {
73 if (VI->getName() == VarName)
74 return 0;
75 }
76
77 return -1;
78 }
79
80 // Returns true if it succeeds, false if an error.
addCodeToMergeInOperand(Record * R,BitsInit * BI,const std::string & VarName,unsigned & NumberedOp,std::set<unsigned> & NamedOpIndices,std::string & Case,CodeGenTarget & Target)81 bool CodeEmitterGen::addCodeToMergeInOperand(Record *R, BitsInit *BI,
82 const std::string &VarName,
83 unsigned &NumberedOp,
84 std::set<unsigned> &NamedOpIndices,
85 std::string &Case,
86 CodeGenTarget &Target) {
87 CodeGenInstruction &CGI = Target.getInstruction(R);
88
89 // Determine if VarName actually contributes to the Inst encoding.
90 int bit = BI->getNumBits()-1;
91
92 // Scan for a bit that this contributed to.
93 for (; bit >= 0; ) {
94 if (getVariableBit(VarName, BI, bit) != -1)
95 break;
96
97 --bit;
98 }
99
100 // If we found no bits, ignore this value, otherwise emit the call to get the
101 // operand encoding.
102 if (bit < 0)
103 return true;
104
105 // If the operand matches by name, reference according to that
106 // operand number. Non-matching operands are assumed to be in
107 // order.
108 unsigned OpIdx;
109 std::pair<unsigned, unsigned> SubOp;
110 if (CGI.Operands.hasSubOperandAlias(VarName, SubOp)) {
111 OpIdx = CGI.Operands[SubOp.first].MIOperandNo + SubOp.second;
112 } else if (CGI.Operands.hasOperandNamed(VarName, OpIdx)) {
113 // Get the machine operand number for the indicated operand.
114 OpIdx = CGI.Operands[OpIdx].MIOperandNo;
115 } else {
116 // Fall back to positional lookup. By default, we now disable positional
117 // lookup (and print an error, below), but even so, we'll do the lookup to
118 // help print a helpful diagnostic message.
119 //
120 // TODO: When we remove useDeprecatedPositionallyEncodedOperands, delete all
121 // this code, just leaving a "no operand named X in record Y" error.
122
123 unsigned NumberOps = CGI.Operands.size();
124 /// If this operand is not supposed to be emitted by the
125 /// generated emitter, skip it.
126 while (NumberedOp < NumberOps &&
127 (CGI.Operands.isFlatOperandNotEmitted(NumberedOp) ||
128 (!NamedOpIndices.empty() && NamedOpIndices.count(
129 CGI.Operands.getSubOperandNumber(NumberedOp).first)))) {
130 ++NumberedOp;
131 }
132
133 if (NumberedOp >=
134 CGI.Operands.back().MIOperandNo + CGI.Operands.back().MINumOperands) {
135 if (!Target.getInstructionSet()->getValueAsBit(
136 "useDeprecatedPositionallyEncodedOperands")) {
137 PrintError(R, Twine("No operand named ") + VarName + " in record " +
138 R->getName() +
139 " (would've given 'too few operands' error with "
140 "useDeprecatedPositionallyEncodedOperands=true)");
141 } else {
142 PrintError(R, "Too few operands in record " + R->getName() +
143 " (no match for variable " + VarName + ")");
144 }
145 return false;
146 }
147
148 OpIdx = NumberedOp++;
149
150 if (!Target.getInstructionSet()->getValueAsBit(
151 "useDeprecatedPositionallyEncodedOperands")) {
152 std::pair<unsigned, unsigned> SO =
153 CGI.Operands.getSubOperandNumber(OpIdx);
154 std::string OpName = CGI.Operands[SO.first].Name;
155 PrintError(R, Twine("No operand named ") + VarName + " in record " +
156 R->getName() + " (would've used positional operand #" +
157 Twine(SO.first) + " ('" + OpName + "') sub-op #" +
158 Twine(SO.second) +
159 " with useDeprecatedPositionallyEncodedOperands=true)");
160 return false;
161 }
162 }
163
164 if (CGI.Operands.isFlatOperandNotEmitted(OpIdx)) {
165 PrintError(R, "Operand " + VarName + " used but also marked as not emitted!");
166 return false;
167 }
168
169 std::pair<unsigned, unsigned> SO = CGI.Operands.getSubOperandNumber(OpIdx);
170 std::string &EncoderMethodName =
171 CGI.Operands[SO.first].EncoderMethodNames[SO.second];
172
173 if (UseAPInt)
174 Case += " op.clearAllBits();\n";
175
176 Case += " // op: " + VarName + "\n";
177
178 // If the source operand has a custom encoder, use it.
179 if (!EncoderMethodName.empty()) {
180 if (UseAPInt) {
181 Case += " " + EncoderMethodName + "(MI, " + utostr(OpIdx);
182 Case += ", op";
183 } else {
184 Case += " op = " + EncoderMethodName + "(MI, " + utostr(OpIdx);
185 }
186 Case += ", Fixups, STI);\n";
187 } else {
188 if (UseAPInt) {
189 Case += " getMachineOpValue(MI, MI.getOperand(" + utostr(OpIdx) + ")";
190 Case += ", op, Fixups, STI";
191 } else {
192 Case += " op = getMachineOpValue(MI, MI.getOperand(" + utostr(OpIdx) + ")";
193 Case += ", Fixups, STI";
194 }
195 Case += ");\n";
196 }
197
198 // Precalculate the number of lits this variable contributes to in the
199 // operand. If there is a single lit (consecutive range of bits) we can use a
200 // destructive sequence on APInt that reduces memory allocations.
201 int numOperandLits = 0;
202 for (int tmpBit = bit; tmpBit >= 0;) {
203 int varBit = getVariableBit(VarName, BI, tmpBit);
204
205 // If this bit isn't from a variable, skip it.
206 if (varBit == -1) {
207 --tmpBit;
208 continue;
209 }
210
211 // Figure out the consecutive range of bits covered by this operand, in
212 // order to generate better encoding code.
213 int beginVarBit = varBit;
214 int N = 1;
215 for (--tmpBit; tmpBit >= 0;) {
216 varBit = getVariableBit(VarName, BI, tmpBit);
217 if (varBit == -1 || varBit != (beginVarBit - N))
218 break;
219 ++N;
220 --tmpBit;
221 }
222 ++numOperandLits;
223 }
224
225 for (; bit >= 0; ) {
226 int varBit = getVariableBit(VarName, BI, bit);
227
228 // If this bit isn't from a variable, skip it.
229 if (varBit == -1) {
230 --bit;
231 continue;
232 }
233
234 // Figure out the consecutive range of bits covered by this operand, in
235 // order to generate better encoding code.
236 int beginInstBit = bit;
237 int beginVarBit = varBit;
238 int N = 1;
239 for (--bit; bit >= 0;) {
240 varBit = getVariableBit(VarName, BI, bit);
241 if (varBit == -1 || varBit != (beginVarBit - N)) break;
242 ++N;
243 --bit;
244 }
245
246 std::string maskStr;
247 int opShift;
248
249 unsigned loBit = beginVarBit - N + 1;
250 unsigned hiBit = loBit + N;
251 unsigned loInstBit = beginInstBit - N + 1;
252 if (UseAPInt) {
253 std::string extractStr;
254 if (N >= 64) {
255 extractStr = "op.extractBits(" + itostr(hiBit - loBit) + ", " +
256 itostr(loBit) + ")";
257 Case += " Value.insertBits(" + extractStr + ", " +
258 itostr(loInstBit) + ");\n";
259 } else {
260 extractStr = "op.extractBitsAsZExtValue(" + itostr(hiBit - loBit) +
261 ", " + itostr(loBit) + ")";
262 Case += " Value.insertBits(" + extractStr + ", " +
263 itostr(loInstBit) + ", " + itostr(hiBit - loBit) + ");\n";
264 }
265 } else {
266 uint64_t opMask = ~(uint64_t)0 >> (64 - N);
267 opShift = beginVarBit - N + 1;
268 opMask <<= opShift;
269 maskStr = "UINT64_C(" + utostr(opMask) + ")";
270 opShift = beginInstBit - beginVarBit;
271
272 if (numOperandLits == 1) {
273 Case += " op &= " + maskStr + ";\n";
274 if (opShift > 0) {
275 Case += " op <<= " + itostr(opShift) + ";\n";
276 } else if (opShift < 0) {
277 Case += " op >>= " + itostr(-opShift) + ";\n";
278 }
279 Case += " Value |= op;\n";
280 } else {
281 if (opShift > 0) {
282 Case += " Value |= (op & " + maskStr + ") << " +
283 itostr(opShift) + ";\n";
284 } else if (opShift < 0) {
285 Case += " Value |= (op & " + maskStr + ") >> " +
286 itostr(-opShift) + ";\n";
287 } else {
288 Case += " Value |= (op & " + maskStr + ");\n";
289 }
290 }
291 }
292 }
293 return true;
294 }
295
getInstructionCase(Record * R,CodeGenTarget & Target)296 std::string CodeEmitterGen::getInstructionCase(Record *R,
297 CodeGenTarget &Target) {
298 std::string Case;
299 if (const RecordVal *RV = R->getValue("EncodingInfos")) {
300 if (auto *DI = dyn_cast_or_null<DefInit>(RV->getValue())) {
301 const CodeGenHwModes &HWM = Target.getHwModes();
302 EncodingInfoByHwMode EBM(DI->getDef(), HWM);
303 Case += " switch (HwMode) {\n";
304 Case += " default: llvm_unreachable(\"Unhandled HwMode\");\n";
305 for (auto &KV : EBM) {
306 Case += " case " + itostr(KV.first) + ": {\n";
307 Case += getInstructionCaseForEncoding(R, KV.second, Target);
308 Case += " break;\n";
309 Case += " }\n";
310 }
311 Case += " }\n";
312 return Case;
313 }
314 }
315 return getInstructionCaseForEncoding(R, R, Target);
316 }
317
getInstructionCaseForEncoding(Record * R,Record * EncodingDef,CodeGenTarget & Target)318 std::string CodeEmitterGen::getInstructionCaseForEncoding(Record *R, Record *EncodingDef,
319 CodeGenTarget &Target) {
320 std::string Case;
321 BitsInit *BI = EncodingDef->getValueAsBitsInit("Inst");
322 unsigned NumberedOp = 0;
323 std::set<unsigned> NamedOpIndices;
324
325 // Collect the set of operand indices that might correspond to named
326 // operand, and skip these when assigning operands based on position.
327 if (Target.getInstructionSet()->
328 getValueAsBit("noNamedPositionallyEncodedOperands")) {
329 CodeGenInstruction &CGI = Target.getInstruction(R);
330 for (const RecordVal &RV : R->getValues()) {
331 unsigned OpIdx;
332 if (!CGI.Operands.hasOperandNamed(RV.getName(), OpIdx))
333 continue;
334
335 NamedOpIndices.insert(OpIdx);
336 }
337 }
338
339 // Loop over all of the fields in the instruction, determining which are the
340 // operands to the instruction.
341 bool Success = true;
342 for (const RecordVal &RV : EncodingDef->getValues()) {
343 // Ignore fixed fields in the record, we're looking for values like:
344 // bits<5> RST = { ?, ?, ?, ?, ? };
345 if (RV.isNonconcreteOK() || RV.getValue()->isComplete())
346 continue;
347
348 Success &=
349 addCodeToMergeInOperand(R, BI, std::string(RV.getName()), NumberedOp,
350 NamedOpIndices, Case, Target);
351 }
352
353 if (!Success) {
354 // Dump the record, so we can see what's going on...
355 std::string E;
356 raw_string_ostream S(E);
357 S << "Dumping record for previous error:\n";
358 S << *R;
359 PrintNote(E);
360 }
361
362 StringRef PostEmitter = R->getValueAsString("PostEncoderMethod");
363 if (!PostEmitter.empty()) {
364 Case += " Value = ";
365 Case += PostEmitter;
366 Case += "(MI, Value";
367 Case += ", STI";
368 Case += ");\n";
369 }
370
371 return Case;
372 }
373
emitInstBits(raw_ostream & OS,const APInt & Bits)374 static void emitInstBits(raw_ostream &OS, const APInt &Bits) {
375 for (unsigned I = 0; I < Bits.getNumWords(); ++I)
376 OS << ((I > 0) ? ", " : "") << "UINT64_C(" << utostr(Bits.getRawData()[I])
377 << ")";
378 }
379
emitInstructionBaseValues(raw_ostream & o,ArrayRef<const CodeGenInstruction * > NumberedInstructions,CodeGenTarget & Target,int HwMode)380 void CodeEmitterGen::emitInstructionBaseValues(
381 raw_ostream &o, ArrayRef<const CodeGenInstruction *> NumberedInstructions,
382 CodeGenTarget &Target, int HwMode) {
383 const CodeGenHwModes &HWM = Target.getHwModes();
384 if (HwMode == -1)
385 o << " static const uint64_t InstBits[] = {\n";
386 else
387 o << " static const uint64_t InstBits_" << HWM.getMode(HwMode).Name
388 << "[] = {\n";
389
390 for (const CodeGenInstruction *CGI : NumberedInstructions) {
391 Record *R = CGI->TheDef;
392
393 if (R->getValueAsString("Namespace") == "TargetOpcode" ||
394 R->getValueAsBit("isPseudo")) {
395 o << " "; emitInstBits(o, APInt(BitWidth, 0)); o << ",\n";
396 continue;
397 }
398
399 Record *EncodingDef = R;
400 if (const RecordVal *RV = R->getValue("EncodingInfos")) {
401 if (auto *DI = dyn_cast_or_null<DefInit>(RV->getValue())) {
402 EncodingInfoByHwMode EBM(DI->getDef(), HWM);
403 if (EBM.hasMode(HwMode))
404 EncodingDef = EBM.get(HwMode);
405 }
406 }
407 BitsInit *BI = EncodingDef->getValueAsBitsInit("Inst");
408
409 // Start by filling in fixed values.
410 APInt Value(BitWidth, 0);
411 for (unsigned i = 0, e = BI->getNumBits(); i != e; ++i) {
412 if (auto *B = dyn_cast<BitInit>(BI->getBit(i)); B && B->getValue())
413 Value.setBit(i);
414 }
415 o << " ";
416 emitInstBits(o, Value);
417 o << "," << '\t' << "// " << R->getName() << "\n";
418 }
419 o << " UINT64_C(0)\n };\n";
420 }
421
run(raw_ostream & o)422 void CodeEmitterGen::run(raw_ostream &o) {
423 CodeGenTarget Target(Records);
424 std::vector<Record*> Insts = Records.getAllDerivedDefinitions("Instruction");
425
426 // For little-endian instruction bit encodings, reverse the bit order
427 Target.reverseBitsForLittleEndianEncoding();
428
429 ArrayRef<const CodeGenInstruction*> NumberedInstructions =
430 Target.getInstructionsByEnumValue();
431
432 if (any_of(NumberedInstructions, [](const CodeGenInstruction *CGI) {
433 Record *R = CGI->TheDef;
434 return R->getValue("Inst") && isa<DagInit>(R->getValueInit("Inst"));
435 })) {
436 emitVarLenCodeEmitter(Records, o);
437 } else {
438 const CodeGenHwModes &HWM = Target.getHwModes();
439 // The set of HwModes used by instruction encodings.
440 std::set<unsigned> HwModes;
441 BitWidth = 0;
442 for (const CodeGenInstruction *CGI : NumberedInstructions) {
443 Record *R = CGI->TheDef;
444 if (R->getValueAsString("Namespace") == "TargetOpcode" ||
445 R->getValueAsBit("isPseudo"))
446 continue;
447
448 if (const RecordVal *RV = R->getValue("EncodingInfos")) {
449 if (DefInit *DI = dyn_cast_or_null<DefInit>(RV->getValue())) {
450 EncodingInfoByHwMode EBM(DI->getDef(), HWM);
451 for (auto &KV : EBM) {
452 BitsInit *BI = KV.second->getValueAsBitsInit("Inst");
453 BitWidth = std::max(BitWidth, BI->getNumBits());
454 HwModes.insert(KV.first);
455 }
456 continue;
457 }
458 }
459 BitsInit *BI = R->getValueAsBitsInit("Inst");
460 BitWidth = std::max(BitWidth, BI->getNumBits());
461 }
462 UseAPInt = BitWidth > 64;
463
464 // Emit function declaration
465 if (UseAPInt) {
466 o << "void " << Target.getName()
467 << "MCCodeEmitter::getBinaryCodeForInstr(const MCInst &MI,\n"
468 << " SmallVectorImpl<MCFixup> &Fixups,\n"
469 << " APInt &Inst,\n"
470 << " APInt &Scratch,\n"
471 << " const MCSubtargetInfo &STI) const {\n";
472 } else {
473 o << "uint64_t " << Target.getName();
474 o << "MCCodeEmitter::getBinaryCodeForInstr(const MCInst &MI,\n"
475 << " SmallVectorImpl<MCFixup> &Fixups,\n"
476 << " const MCSubtargetInfo &STI) const {\n";
477 }
478
479 // Emit instruction base values
480 if (HwModes.empty()) {
481 emitInstructionBaseValues(o, NumberedInstructions, Target, -1);
482 } else {
483 for (unsigned HwMode : HwModes)
484 emitInstructionBaseValues(o, NumberedInstructions, Target, (int)HwMode);
485 }
486
487 if (!HwModes.empty()) {
488 o << " const uint64_t *InstBits;\n";
489 o << " unsigned HwMode = STI.getHwMode();\n";
490 o << " switch (HwMode) {\n";
491 o << " default: llvm_unreachable(\"Unknown hardware mode!\"); break;\n";
492 for (unsigned I : HwModes) {
493 o << " case " << I << ": InstBits = InstBits_" << HWM.getMode(I).Name
494 << "; break;\n";
495 }
496 o << " };\n";
497 }
498
499 // Map to accumulate all the cases.
500 std::map<std::string, std::vector<std::string>> CaseMap;
501
502 // Construct all cases statement for each opcode
503 for (Record *R : Insts) {
504 if (R->getValueAsString("Namespace") == "TargetOpcode" ||
505 R->getValueAsBit("isPseudo"))
506 continue;
507 std::string InstName =
508 (R->getValueAsString("Namespace") + "::" + R->getName()).str();
509 std::string Case = getInstructionCase(R, Target);
510
511 CaseMap[Case].push_back(std::move(InstName));
512 }
513
514 // Emit initial function code
515 if (UseAPInt) {
516 int NumWords = APInt::getNumWords(BitWidth);
517 o << " const unsigned opcode = MI.getOpcode();\n"
518 << " if (Scratch.getBitWidth() != " << BitWidth << ")\n"
519 << " Scratch = Scratch.zext(" << BitWidth << ");\n"
520 << " Inst = APInt(" << BitWidth << ", ArrayRef(InstBits + opcode * "
521 << NumWords << ", " << NumWords << "));\n"
522 << " APInt &Value = Inst;\n"
523 << " APInt &op = Scratch;\n"
524 << " switch (opcode) {\n";
525 } else {
526 o << " const unsigned opcode = MI.getOpcode();\n"
527 << " uint64_t Value = InstBits[opcode];\n"
528 << " uint64_t op = 0;\n"
529 << " (void)op; // suppress warning\n"
530 << " switch (opcode) {\n";
531 }
532
533 // Emit each case statement
534 std::map<std::string, std::vector<std::string>>::iterator IE, EE;
535 for (IE = CaseMap.begin(), EE = CaseMap.end(); IE != EE; ++IE) {
536 const std::string &Case = IE->first;
537 std::vector<std::string> &InstList = IE->second;
538
539 for (int i = 0, N = InstList.size(); i < N; i++) {
540 if (i)
541 o << "\n";
542 o << " case " << InstList[i] << ":";
543 }
544 o << " {\n";
545 o << Case;
546 o << " break;\n"
547 << " }\n";
548 }
549
550 // Default case: unhandled opcode
551 o << " default:\n"
552 << " std::string msg;\n"
553 << " raw_string_ostream Msg(msg);\n"
554 << " Msg << \"Not supported instr: \" << MI;\n"
555 << " report_fatal_error(Msg.str().c_str());\n"
556 << " }\n";
557 if (UseAPInt)
558 o << " Inst = Value;\n";
559 else
560 o << " return Value;\n";
561 o << "}\n\n";
562 }
563 }
564
565 } // end anonymous namespace
566
567 namespace llvm {
568
EmitCodeEmitter(RecordKeeper & RK,raw_ostream & OS)569 void EmitCodeEmitter(RecordKeeper &RK, raw_ostream &OS) {
570 emitSourceFileHeader("Machine Code Emitter", OS);
571 CodeEmitterGen(RK).run(OS);
572 }
573
574 } // end namespace llvm
575