1 //===- CodeEmitterGen.cpp - Code Emitter Generator ------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // CodeEmitterGen uses the descriptions of instructions and their fields to 10 // construct an automated code emitter: a function that, given a MachineInstr, 11 // returns the (currently, 32-bit unsigned) value of the instruction. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "CodeGenHwModes.h" 16 #include "CodeGenInstruction.h" 17 #include "CodeGenTarget.h" 18 #include "InfoByHwMode.h" 19 #include "TableGenBackends.h" 20 #include "VarLenCodeEmitterGen.h" 21 #include "llvm/ADT/APInt.h" 22 #include "llvm/ADT/ArrayRef.h" 23 #include "llvm/ADT/StringExtras.h" 24 #include "llvm/Support/Casting.h" 25 #include "llvm/Support/raw_ostream.h" 26 #include "llvm/TableGen/Error.h" 27 #include "llvm/TableGen/Record.h" 28 #include "llvm/TableGen/TableGenBackend.h" 29 #include <cstdint> 30 #include <map> 31 #include <set> 32 #include <string> 33 #include <utility> 34 #include <vector> 35 36 using namespace llvm; 37 38 namespace { 39 40 class CodeEmitterGen { 41 RecordKeeper &Records; 42 43 public: 44 CodeEmitterGen(RecordKeeper &R) : Records(R) {} 45 46 void run(raw_ostream &o); 47 48 private: 49 int getVariableBit(const std::string &VarName, BitsInit *BI, int bit); 50 std::string getInstructionCase(Record *R, CodeGenTarget &Target); 51 std::string getInstructionCaseForEncoding(Record *R, Record *EncodingDef, 52 CodeGenTarget &Target); 53 bool addCodeToMergeInOperand(Record *R, BitsInit *BI, 54 const std::string &VarName, unsigned &NumberedOp, 55 std::set<unsigned> &NamedOpIndices, 56 std::string &Case, CodeGenTarget &Target); 57 58 void emitInstructionBaseValues( 59 raw_ostream &o, ArrayRef<const CodeGenInstruction *> NumberedInstructions, 60 CodeGenTarget &Target, int HwMode = -1); 61 unsigned BitWidth; 62 bool UseAPInt; 63 }; 64 65 // If the VarBitInit at position 'bit' matches the specified variable then 66 // return the variable bit position. Otherwise return -1. 67 int CodeEmitterGen::getVariableBit(const std::string &VarName, 68 BitsInit *BI, int bit) { 69 if (VarBitInit *VBI = dyn_cast<VarBitInit>(BI->getBit(bit))) { 70 if (VarInit *VI = dyn_cast<VarInit>(VBI->getBitVar())) 71 if (VI->getName() == VarName) 72 return VBI->getBitNum(); 73 } else if (VarInit *VI = dyn_cast<VarInit>(BI->getBit(bit))) { 74 if (VI->getName() == VarName) 75 return 0; 76 } 77 78 return -1; 79 } 80 81 // Returns true if it succeeds, false if an error. 82 bool CodeEmitterGen::addCodeToMergeInOperand(Record *R, BitsInit *BI, 83 const std::string &VarName, 84 unsigned &NumberedOp, 85 std::set<unsigned> &NamedOpIndices, 86 std::string &Case, 87 CodeGenTarget &Target) { 88 CodeGenInstruction &CGI = Target.getInstruction(R); 89 90 // Determine if VarName actually contributes to the Inst encoding. 91 int bit = BI->getNumBits()-1; 92 93 // Scan for a bit that this contributed to. 94 for (; bit >= 0; ) { 95 if (getVariableBit(VarName, BI, bit) != -1) 96 break; 97 98 --bit; 99 } 100 101 // If we found no bits, ignore this value, otherwise emit the call to get the 102 // operand encoding. 103 if (bit < 0) 104 return true; 105 106 // If the operand matches by name, reference according to that 107 // operand number. Non-matching operands are assumed to be in 108 // order. 109 unsigned OpIdx; 110 std::pair<unsigned, unsigned> SubOp; 111 if (CGI.Operands.hasSubOperandAlias(VarName, SubOp)) { 112 OpIdx = CGI.Operands[SubOp.first].MIOperandNo + SubOp.second; 113 } else if (CGI.Operands.hasOperandNamed(VarName, OpIdx)) { 114 // Get the machine operand number for the indicated operand. 115 OpIdx = CGI.Operands[OpIdx].MIOperandNo; 116 } else { 117 // Fall back to positional lookup. By default, we now disable positional 118 // lookup (and print an error, below), but even so, we'll do the lookup to 119 // help print a helpful diagnostic message. 120 // 121 // TODO: When we remove useDeprecatedPositionallyEncodedOperands, delete all 122 // this code, just leaving a "no operand named X in record Y" error. 123 124 unsigned NumberOps = CGI.Operands.size(); 125 /// If this operand is not supposed to be emitted by the 126 /// generated emitter, skip it. 127 while (NumberedOp < NumberOps && 128 (CGI.Operands.isFlatOperandNotEmitted(NumberedOp) || 129 (!NamedOpIndices.empty() && NamedOpIndices.count( 130 CGI.Operands.getSubOperandNumber(NumberedOp).first)))) { 131 ++NumberedOp; 132 } 133 134 if (NumberedOp >= 135 CGI.Operands.back().MIOperandNo + CGI.Operands.back().MINumOperands) { 136 if (!Target.getInstructionSet()->getValueAsBit( 137 "useDeprecatedPositionallyEncodedOperands")) { 138 PrintError(R, Twine("No operand named ") + VarName + " in record " + 139 R->getName() + 140 " (would've given 'too few operands' error with " 141 "useDeprecatedPositionallyEncodedOperands=true)"); 142 } else { 143 PrintError(R, "Too few operands in record " + R->getName() + 144 " (no match for variable " + VarName + ")"); 145 } 146 return false; 147 } 148 149 OpIdx = NumberedOp++; 150 151 if (!Target.getInstructionSet()->getValueAsBit( 152 "useDeprecatedPositionallyEncodedOperands")) { 153 std::pair<unsigned, unsigned> SO = 154 CGI.Operands.getSubOperandNumber(OpIdx); 155 std::string OpName = CGI.Operands[SO.first].Name; 156 PrintError(R, Twine("No operand named ") + VarName + " in record " + 157 R->getName() + " (would've used positional operand #" + 158 Twine(SO.first) + " ('" + OpName + "') sub-op #" + 159 Twine(SO.second) + 160 " with useDeprecatedPositionallyEncodedOperands=true)"); 161 return false; 162 } 163 } 164 165 if (CGI.Operands.isFlatOperandNotEmitted(OpIdx)) { 166 PrintError(R, "Operand " + VarName + " used but also marked as not emitted!"); 167 return false; 168 } 169 170 std::pair<unsigned, unsigned> SO = CGI.Operands.getSubOperandNumber(OpIdx); 171 std::string &EncoderMethodName = 172 CGI.Operands[SO.first].EncoderMethodNames[SO.second]; 173 174 if (UseAPInt) 175 Case += " op.clearAllBits();\n"; 176 177 Case += " // op: " + VarName + "\n"; 178 179 // If the source operand has a custom encoder, use it. 180 if (!EncoderMethodName.empty()) { 181 if (UseAPInt) { 182 Case += " " + EncoderMethodName + "(MI, " + utostr(OpIdx); 183 Case += ", op"; 184 } else { 185 Case += " op = " + EncoderMethodName + "(MI, " + utostr(OpIdx); 186 } 187 Case += ", Fixups, STI);\n"; 188 } else { 189 if (UseAPInt) { 190 Case += " getMachineOpValue(MI, MI.getOperand(" + utostr(OpIdx) + ")"; 191 Case += ", op, Fixups, STI"; 192 } else { 193 Case += " op = getMachineOpValue(MI, MI.getOperand(" + utostr(OpIdx) + ")"; 194 Case += ", Fixups, STI"; 195 } 196 Case += ");\n"; 197 } 198 199 // Precalculate the number of lits this variable contributes to in the 200 // operand. If there is a single lit (consecutive range of bits) we can use a 201 // destructive sequence on APInt that reduces memory allocations. 202 int numOperandLits = 0; 203 for (int tmpBit = bit; tmpBit >= 0;) { 204 int varBit = getVariableBit(VarName, BI, tmpBit); 205 206 // If this bit isn't from a variable, skip it. 207 if (varBit == -1) { 208 --tmpBit; 209 continue; 210 } 211 212 // Figure out the consecutive range of bits covered by this operand, in 213 // order to generate better encoding code. 214 int beginVarBit = varBit; 215 int N = 1; 216 for (--tmpBit; tmpBit >= 0;) { 217 varBit = getVariableBit(VarName, BI, tmpBit); 218 if (varBit == -1 || varBit != (beginVarBit - N)) 219 break; 220 ++N; 221 --tmpBit; 222 } 223 ++numOperandLits; 224 } 225 226 for (; bit >= 0; ) { 227 int varBit = getVariableBit(VarName, BI, bit); 228 229 // If this bit isn't from a variable, skip it. 230 if (varBit == -1) { 231 --bit; 232 continue; 233 } 234 235 // Figure out the consecutive range of bits covered by this operand, in 236 // order to generate better encoding code. 237 int beginInstBit = bit; 238 int beginVarBit = varBit; 239 int N = 1; 240 for (--bit; bit >= 0;) { 241 varBit = getVariableBit(VarName, BI, bit); 242 if (varBit == -1 || varBit != (beginVarBit - N)) break; 243 ++N; 244 --bit; 245 } 246 247 std::string maskStr; 248 int opShift; 249 250 unsigned loBit = beginVarBit - N + 1; 251 unsigned hiBit = loBit + N; 252 unsigned loInstBit = beginInstBit - N + 1; 253 if (UseAPInt) { 254 std::string extractStr; 255 if (N >= 64) { 256 extractStr = "op.extractBits(" + itostr(hiBit - loBit) + ", " + 257 itostr(loBit) + ")"; 258 Case += " Value.insertBits(" + extractStr + ", " + 259 itostr(loInstBit) + ");\n"; 260 } else { 261 extractStr = "op.extractBitsAsZExtValue(" + itostr(hiBit - loBit) + 262 ", " + itostr(loBit) + ")"; 263 Case += " Value.insertBits(" + extractStr + ", " + 264 itostr(loInstBit) + ", " + itostr(hiBit - loBit) + ");\n"; 265 } 266 } else { 267 uint64_t opMask = ~(uint64_t)0 >> (64 - N); 268 opShift = beginVarBit - N + 1; 269 opMask <<= opShift; 270 maskStr = "UINT64_C(" + utostr(opMask) + ")"; 271 opShift = beginInstBit - beginVarBit; 272 273 if (numOperandLits == 1) { 274 Case += " op &= " + maskStr + ";\n"; 275 if (opShift > 0) { 276 Case += " op <<= " + itostr(opShift) + ";\n"; 277 } else if (opShift < 0) { 278 Case += " op >>= " + itostr(-opShift) + ";\n"; 279 } 280 Case += " Value |= op;\n"; 281 } else { 282 if (opShift > 0) { 283 Case += " Value |= (op & " + maskStr + ") << " + 284 itostr(opShift) + ";\n"; 285 } else if (opShift < 0) { 286 Case += " Value |= (op & " + maskStr + ") >> " + 287 itostr(-opShift) + ";\n"; 288 } else { 289 Case += " Value |= (op & " + maskStr + ");\n"; 290 } 291 } 292 } 293 } 294 return true; 295 } 296 297 std::string CodeEmitterGen::getInstructionCase(Record *R, 298 CodeGenTarget &Target) { 299 std::string Case; 300 if (const RecordVal *RV = R->getValue("EncodingInfos")) { 301 if (auto *DI = dyn_cast_or_null<DefInit>(RV->getValue())) { 302 const CodeGenHwModes &HWM = Target.getHwModes(); 303 EncodingInfoByHwMode EBM(DI->getDef(), HWM); 304 Case += " switch (HwMode) {\n"; 305 Case += " default: llvm_unreachable(\"Unhandled HwMode\");\n"; 306 for (auto &KV : EBM) { 307 Case += " case " + itostr(KV.first) + ": {\n"; 308 Case += getInstructionCaseForEncoding(R, KV.second, Target); 309 Case += " break;\n"; 310 Case += " }\n"; 311 } 312 Case += " }\n"; 313 return Case; 314 } 315 } 316 return getInstructionCaseForEncoding(R, R, Target); 317 } 318 319 std::string CodeEmitterGen::getInstructionCaseForEncoding(Record *R, Record *EncodingDef, 320 CodeGenTarget &Target) { 321 std::string Case; 322 BitsInit *BI = EncodingDef->getValueAsBitsInit("Inst"); 323 unsigned NumberedOp = 0; 324 std::set<unsigned> NamedOpIndices; 325 326 // Collect the set of operand indices that might correspond to named 327 // operand, and skip these when assigning operands based on position. 328 if (Target.getInstructionSet()-> 329 getValueAsBit("noNamedPositionallyEncodedOperands")) { 330 CodeGenInstruction &CGI = Target.getInstruction(R); 331 for (const RecordVal &RV : R->getValues()) { 332 unsigned OpIdx; 333 if (!CGI.Operands.hasOperandNamed(RV.getName(), OpIdx)) 334 continue; 335 336 NamedOpIndices.insert(OpIdx); 337 } 338 } 339 340 // Loop over all of the fields in the instruction, determining which are the 341 // operands to the instruction. 342 bool Success = true; 343 for (const RecordVal &RV : EncodingDef->getValues()) { 344 // Ignore fixed fields in the record, we're looking for values like: 345 // bits<5> RST = { ?, ?, ?, ?, ? }; 346 if (RV.isNonconcreteOK() || RV.getValue()->isComplete()) 347 continue; 348 349 Success &= 350 addCodeToMergeInOperand(R, BI, std::string(RV.getName()), NumberedOp, 351 NamedOpIndices, Case, Target); 352 } 353 354 if (!Success) { 355 // Dump the record, so we can see what's going on... 356 std::string E; 357 raw_string_ostream S(E); 358 S << "Dumping record for previous error:\n"; 359 S << *R; 360 PrintNote(E); 361 } 362 363 StringRef PostEmitter = R->getValueAsString("PostEncoderMethod"); 364 if (!PostEmitter.empty()) { 365 Case += " Value = "; 366 Case += PostEmitter; 367 Case += "(MI, Value"; 368 Case += ", STI"; 369 Case += ");\n"; 370 } 371 372 return Case; 373 } 374 375 static void emitInstBits(raw_ostream &OS, const APInt &Bits) { 376 for (unsigned I = 0; I < Bits.getNumWords(); ++I) 377 OS << ((I > 0) ? ", " : "") << "UINT64_C(" << utostr(Bits.getRawData()[I]) 378 << ")"; 379 } 380 381 void CodeEmitterGen::emitInstructionBaseValues( 382 raw_ostream &o, ArrayRef<const CodeGenInstruction *> NumberedInstructions, 383 CodeGenTarget &Target, int HwMode) { 384 const CodeGenHwModes &HWM = Target.getHwModes(); 385 if (HwMode == -1) 386 o << " static const uint64_t InstBits[] = {\n"; 387 else 388 o << " static const uint64_t InstBits_" << HWM.getMode(HwMode).Name 389 << "[] = {\n"; 390 391 for (const CodeGenInstruction *CGI : NumberedInstructions) { 392 Record *R = CGI->TheDef; 393 394 if (R->getValueAsString("Namespace") == "TargetOpcode" || 395 R->getValueAsBit("isPseudo")) { 396 o << " "; emitInstBits(o, APInt(BitWidth, 0)); o << ",\n"; 397 continue; 398 } 399 400 Record *EncodingDef = R; 401 if (const RecordVal *RV = R->getValue("EncodingInfos")) { 402 if (auto *DI = dyn_cast_or_null<DefInit>(RV->getValue())) { 403 EncodingInfoByHwMode EBM(DI->getDef(), HWM); 404 if (EBM.hasMode(HwMode)) 405 EncodingDef = EBM.get(HwMode); 406 } 407 } 408 BitsInit *BI = EncodingDef->getValueAsBitsInit("Inst"); 409 410 // Start by filling in fixed values. 411 APInt Value(BitWidth, 0); 412 for (unsigned i = 0, e = BI->getNumBits(); i != e; ++i) { 413 if (auto *B = dyn_cast<BitInit>(BI->getBit(i)); B && B->getValue()) 414 Value.setBit(i); 415 } 416 o << " "; 417 emitInstBits(o, Value); 418 o << "," << '\t' << "// " << R->getName() << "\n"; 419 } 420 o << " UINT64_C(0)\n };\n"; 421 } 422 423 void CodeEmitterGen::run(raw_ostream &o) { 424 CodeGenTarget Target(Records); 425 std::vector<Record*> Insts = Records.getAllDerivedDefinitions("Instruction"); 426 427 // For little-endian instruction bit encodings, reverse the bit order 428 Target.reverseBitsForLittleEndianEncoding(); 429 430 ArrayRef<const CodeGenInstruction*> NumberedInstructions = 431 Target.getInstructionsByEnumValue(); 432 433 if (any_of(NumberedInstructions, [](const CodeGenInstruction *CGI) { 434 Record *R = CGI->TheDef; 435 return R->getValue("Inst") && isa<DagInit>(R->getValueInit("Inst")); 436 })) { 437 emitVarLenCodeEmitter(Records, o); 438 } else { 439 const CodeGenHwModes &HWM = Target.getHwModes(); 440 // The set of HwModes used by instruction encodings. 441 std::set<unsigned> HwModes; 442 BitWidth = 0; 443 for (const CodeGenInstruction *CGI : NumberedInstructions) { 444 Record *R = CGI->TheDef; 445 if (R->getValueAsString("Namespace") == "TargetOpcode" || 446 R->getValueAsBit("isPseudo")) 447 continue; 448 449 if (const RecordVal *RV = R->getValue("EncodingInfos")) { 450 if (DefInit *DI = dyn_cast_or_null<DefInit>(RV->getValue())) { 451 EncodingInfoByHwMode EBM(DI->getDef(), HWM); 452 for (auto &KV : EBM) { 453 BitsInit *BI = KV.second->getValueAsBitsInit("Inst"); 454 BitWidth = std::max(BitWidth, BI->getNumBits()); 455 HwModes.insert(KV.first); 456 } 457 continue; 458 } 459 } 460 BitsInit *BI = R->getValueAsBitsInit("Inst"); 461 BitWidth = std::max(BitWidth, BI->getNumBits()); 462 } 463 UseAPInt = BitWidth > 64; 464 465 // Emit function declaration 466 if (UseAPInt) { 467 o << "void " << Target.getName() 468 << "MCCodeEmitter::getBinaryCodeForInstr(const MCInst &MI,\n" 469 << " SmallVectorImpl<MCFixup> &Fixups,\n" 470 << " APInt &Inst,\n" 471 << " APInt &Scratch,\n" 472 << " const MCSubtargetInfo &STI) const {\n"; 473 } else { 474 o << "uint64_t " << Target.getName(); 475 o << "MCCodeEmitter::getBinaryCodeForInstr(const MCInst &MI,\n" 476 << " SmallVectorImpl<MCFixup> &Fixups,\n" 477 << " const MCSubtargetInfo &STI) const {\n"; 478 } 479 480 // Emit instruction base values 481 if (HwModes.empty()) { 482 emitInstructionBaseValues(o, NumberedInstructions, Target, -1); 483 } else { 484 for (unsigned HwMode : HwModes) 485 emitInstructionBaseValues(o, NumberedInstructions, Target, (int)HwMode); 486 } 487 488 if (!HwModes.empty()) { 489 o << " const uint64_t *InstBits;\n"; 490 o << " unsigned HwMode = STI.getHwMode();\n"; 491 o << " switch (HwMode) {\n"; 492 o << " default: llvm_unreachable(\"Unknown hardware mode!\"); break;\n"; 493 for (unsigned I : HwModes) { 494 o << " case " << I << ": InstBits = InstBits_" << HWM.getMode(I).Name 495 << "; break;\n"; 496 } 497 o << " };\n"; 498 } 499 500 // Map to accumulate all the cases. 501 std::map<std::string, std::vector<std::string>> CaseMap; 502 503 // Construct all cases statement for each opcode 504 for (Record *R : Insts) { 505 if (R->getValueAsString("Namespace") == "TargetOpcode" || 506 R->getValueAsBit("isPseudo")) 507 continue; 508 std::string InstName = 509 (R->getValueAsString("Namespace") + "::" + R->getName()).str(); 510 std::string Case = getInstructionCase(R, Target); 511 512 CaseMap[Case].push_back(std::move(InstName)); 513 } 514 515 // Emit initial function code 516 if (UseAPInt) { 517 int NumWords = APInt::getNumWords(BitWidth); 518 o << " const unsigned opcode = MI.getOpcode();\n" 519 << " if (Scratch.getBitWidth() != " << BitWidth << ")\n" 520 << " Scratch = Scratch.zext(" << BitWidth << ");\n" 521 << " Inst = APInt(" << BitWidth << ", ArrayRef(InstBits + opcode * " 522 << NumWords << ", " << NumWords << "));\n" 523 << " APInt &Value = Inst;\n" 524 << " APInt &op = Scratch;\n" 525 << " switch (opcode) {\n"; 526 } else { 527 o << " const unsigned opcode = MI.getOpcode();\n" 528 << " uint64_t Value = InstBits[opcode];\n" 529 << " uint64_t op = 0;\n" 530 << " (void)op; // suppress warning\n" 531 << " switch (opcode) {\n"; 532 } 533 534 // Emit each case statement 535 std::map<std::string, std::vector<std::string>>::iterator IE, EE; 536 for (IE = CaseMap.begin(), EE = CaseMap.end(); IE != EE; ++IE) { 537 const std::string &Case = IE->first; 538 std::vector<std::string> &InstList = IE->second; 539 540 for (int i = 0, N = InstList.size(); i < N; i++) { 541 if (i) 542 o << "\n"; 543 o << " case " << InstList[i] << ":"; 544 } 545 o << " {\n"; 546 o << Case; 547 o << " break;\n" 548 << " }\n"; 549 } 550 551 // Default case: unhandled opcode 552 o << " default:\n" 553 << " std::string msg;\n" 554 << " raw_string_ostream Msg(msg);\n" 555 << " Msg << \"Not supported instr: \" << MI;\n" 556 << " report_fatal_error(Msg.str().c_str());\n" 557 << " }\n"; 558 if (UseAPInt) 559 o << " Inst = Value;\n"; 560 else 561 o << " return Value;\n"; 562 o << "}\n\n"; 563 } 564 } 565 566 } // end anonymous namespace 567 568 namespace llvm { 569 570 void EmitCodeEmitter(RecordKeeper &RK, raw_ostream &OS) { 571 emitSourceFileHeader("Machine Code Emitter", OS); 572 CodeEmitterGen(RK).run(OS); 573 } 574 575 } // end namespace llvm 576