1 //===- CodeEmitterGen.cpp - Code Emitter Generator ------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // CodeEmitterGen uses the descriptions of instructions and their fields to 10 // construct an automated code emitter: a function that, given a MachineInstr, 11 // returns the (currently, 32-bit unsigned) value of the instruction. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "CodeGenHwModes.h" 16 #include "CodeGenInstruction.h" 17 #include "CodeGenTarget.h" 18 #include "InfoByHwMode.h" 19 #include "VarLenCodeEmitterGen.h" 20 #include "llvm/ADT/APInt.h" 21 #include "llvm/ADT/ArrayRef.h" 22 #include "llvm/ADT/StringExtras.h" 23 #include "llvm/Support/Casting.h" 24 #include "llvm/Support/raw_ostream.h" 25 #include "llvm/TableGen/Error.h" 26 #include "llvm/TableGen/Record.h" 27 #include "llvm/TableGen/TableGenBackend.h" 28 #include <cstdint> 29 #include <map> 30 #include <set> 31 #include <string> 32 #include <utility> 33 #include <vector> 34 35 using namespace llvm; 36 37 namespace { 38 39 class CodeEmitterGen { 40 RecordKeeper &Records; 41 42 public: 43 CodeEmitterGen(RecordKeeper &R) : Records(R) {} 44 45 void run(raw_ostream &o); 46 47 private: 48 int getVariableBit(const std::string &VarName, BitsInit *BI, int bit); 49 std::string getInstructionCase(Record *R, CodeGenTarget &Target); 50 std::string getInstructionCaseForEncoding(Record *R, Record *EncodingDef, 51 CodeGenTarget &Target); 52 bool addCodeToMergeInOperand(Record *R, BitsInit *BI, 53 const std::string &VarName, 54 std::string &Case, CodeGenTarget &Target); 55 56 void emitInstructionBaseValues( 57 raw_ostream &o, ArrayRef<const CodeGenInstruction *> NumberedInstructions, 58 CodeGenTarget &Target, int HwMode = -1); 59 unsigned BitWidth; 60 bool UseAPInt; 61 }; 62 63 // If the VarBitInit at position 'bit' matches the specified variable then 64 // return the variable bit position. Otherwise return -1. 65 int CodeEmitterGen::getVariableBit(const std::string &VarName, 66 BitsInit *BI, int bit) { 67 if (VarBitInit *VBI = dyn_cast<VarBitInit>(BI->getBit(bit))) { 68 if (VarInit *VI = dyn_cast<VarInit>(VBI->getBitVar())) 69 if (VI->getName() == VarName) 70 return VBI->getBitNum(); 71 } else if (VarInit *VI = dyn_cast<VarInit>(BI->getBit(bit))) { 72 if (VI->getName() == VarName) 73 return 0; 74 } 75 76 return -1; 77 } 78 79 // Returns true if it succeeds, false if an error. 80 bool CodeEmitterGen::addCodeToMergeInOperand(Record *R, BitsInit *BI, 81 const std::string &VarName, 82 std::string &Case, 83 CodeGenTarget &Target) { 84 CodeGenInstruction &CGI = Target.getInstruction(R); 85 86 // Determine if VarName actually contributes to the Inst encoding. 87 int bit = BI->getNumBits()-1; 88 89 // Scan for a bit that this contributed to. 90 for (; bit >= 0; ) { 91 if (getVariableBit(VarName, BI, bit) != -1) 92 break; 93 94 --bit; 95 } 96 97 // If we found no bits, ignore this value, otherwise emit the call to get the 98 // operand encoding. 99 if (bit < 0) 100 return true; 101 102 // If the operand matches by name, reference according to that 103 // operand number. Non-matching operands are assumed to be in 104 // order. 105 unsigned OpIdx; 106 std::pair<unsigned, unsigned> SubOp; 107 if (CGI.Operands.hasSubOperandAlias(VarName, SubOp)) { 108 OpIdx = CGI.Operands[SubOp.first].MIOperandNo + SubOp.second; 109 } else if (CGI.Operands.hasOperandNamed(VarName, OpIdx)) { 110 // Get the machine operand number for the indicated operand. 111 OpIdx = CGI.Operands[OpIdx].MIOperandNo; 112 } else { 113 PrintError(R, Twine("No operand named ") + VarName + " in record " + R->getName()); 114 return false; 115 } 116 117 if (CGI.Operands.isFlatOperandNotEmitted(OpIdx)) { 118 PrintError(R, "Operand " + VarName + " used but also marked as not emitted!"); 119 return false; 120 } 121 122 std::pair<unsigned, unsigned> SO = CGI.Operands.getSubOperandNumber(OpIdx); 123 std::string &EncoderMethodName = 124 CGI.Operands[SO.first].EncoderMethodNames[SO.second]; 125 126 if (UseAPInt) 127 Case += " op.clearAllBits();\n"; 128 129 Case += " // op: " + VarName + "\n"; 130 131 // If the source operand has a custom encoder, use it. 132 if (!EncoderMethodName.empty()) { 133 if (UseAPInt) { 134 Case += " " + EncoderMethodName + "(MI, " + utostr(OpIdx); 135 Case += ", op"; 136 } else { 137 Case += " op = " + EncoderMethodName + "(MI, " + utostr(OpIdx); 138 } 139 Case += ", Fixups, STI);\n"; 140 } else { 141 if (UseAPInt) { 142 Case += " getMachineOpValue(MI, MI.getOperand(" + utostr(OpIdx) + ")"; 143 Case += ", op, Fixups, STI"; 144 } else { 145 Case += " op = getMachineOpValue(MI, MI.getOperand(" + utostr(OpIdx) + ")"; 146 Case += ", Fixups, STI"; 147 } 148 Case += ");\n"; 149 } 150 151 // Precalculate the number of lits this variable contributes to in the 152 // operand. If there is a single lit (consecutive range of bits) we can use a 153 // destructive sequence on APInt that reduces memory allocations. 154 int numOperandLits = 0; 155 for (int tmpBit = bit; tmpBit >= 0;) { 156 int varBit = getVariableBit(VarName, BI, tmpBit); 157 158 // If this bit isn't from a variable, skip it. 159 if (varBit == -1) { 160 --tmpBit; 161 continue; 162 } 163 164 // Figure out the consecutive range of bits covered by this operand, in 165 // order to generate better encoding code. 166 int beginVarBit = varBit; 167 int N = 1; 168 for (--tmpBit; tmpBit >= 0;) { 169 varBit = getVariableBit(VarName, BI, tmpBit); 170 if (varBit == -1 || varBit != (beginVarBit - N)) 171 break; 172 ++N; 173 --tmpBit; 174 } 175 ++numOperandLits; 176 } 177 178 for (; bit >= 0; ) { 179 int varBit = getVariableBit(VarName, BI, bit); 180 181 // If this bit isn't from a variable, skip it. 182 if (varBit == -1) { 183 --bit; 184 continue; 185 } 186 187 // Figure out the consecutive range of bits covered by this operand, in 188 // order to generate better encoding code. 189 int beginInstBit = bit; 190 int beginVarBit = varBit; 191 int N = 1; 192 for (--bit; bit >= 0;) { 193 varBit = getVariableBit(VarName, BI, bit); 194 if (varBit == -1 || varBit != (beginVarBit - N)) break; 195 ++N; 196 --bit; 197 } 198 199 std::string maskStr; 200 int opShift; 201 202 unsigned loBit = beginVarBit - N + 1; 203 unsigned hiBit = loBit + N; 204 unsigned loInstBit = beginInstBit - N + 1; 205 if (UseAPInt) { 206 std::string extractStr; 207 if (N >= 64) { 208 extractStr = "op.extractBits(" + itostr(hiBit - loBit) + ", " + 209 itostr(loBit) + ")"; 210 Case += " Value.insertBits(" + extractStr + ", " + 211 itostr(loInstBit) + ");\n"; 212 } else { 213 extractStr = "op.extractBitsAsZExtValue(" + itostr(hiBit - loBit) + 214 ", " + itostr(loBit) + ")"; 215 Case += " Value.insertBits(" + extractStr + ", " + 216 itostr(loInstBit) + ", " + itostr(hiBit - loBit) + ");\n"; 217 } 218 } else { 219 uint64_t opMask = ~(uint64_t)0 >> (64 - N); 220 opShift = beginVarBit - N + 1; 221 opMask <<= opShift; 222 maskStr = "UINT64_C(" + utostr(opMask) + ")"; 223 opShift = beginInstBit - beginVarBit; 224 225 if (numOperandLits == 1) { 226 Case += " op &= " + maskStr + ";\n"; 227 if (opShift > 0) { 228 Case += " op <<= " + itostr(opShift) + ";\n"; 229 } else if (opShift < 0) { 230 Case += " op >>= " + itostr(-opShift) + ";\n"; 231 } 232 Case += " Value |= op;\n"; 233 } else { 234 if (opShift > 0) { 235 Case += " Value |= (op & " + maskStr + ") << " + 236 itostr(opShift) + ";\n"; 237 } else if (opShift < 0) { 238 Case += " Value |= (op & " + maskStr + ") >> " + 239 itostr(-opShift) + ";\n"; 240 } else { 241 Case += " Value |= (op & " + maskStr + ");\n"; 242 } 243 } 244 } 245 } 246 return true; 247 } 248 249 std::string CodeEmitterGen::getInstructionCase(Record *R, 250 CodeGenTarget &Target) { 251 std::string Case; 252 if (const RecordVal *RV = R->getValue("EncodingInfos")) { 253 if (auto *DI = dyn_cast_or_null<DefInit>(RV->getValue())) { 254 const CodeGenHwModes &HWM = Target.getHwModes(); 255 EncodingInfoByHwMode EBM(DI->getDef(), HWM); 256 Case += " switch (HwMode) {\n"; 257 Case += " default: llvm_unreachable(\"Unhandled HwMode\");\n"; 258 for (auto &KV : EBM) { 259 Case += " case " + itostr(KV.first) + ": {\n"; 260 Case += getInstructionCaseForEncoding(R, KV.second, Target); 261 Case += " break;\n"; 262 Case += " }\n"; 263 } 264 Case += " }\n"; 265 return Case; 266 } 267 } 268 return getInstructionCaseForEncoding(R, R, Target); 269 } 270 271 std::string CodeEmitterGen::getInstructionCaseForEncoding(Record *R, Record *EncodingDef, 272 CodeGenTarget &Target) { 273 std::string Case; 274 BitsInit *BI = EncodingDef->getValueAsBitsInit("Inst"); 275 276 // Loop over all of the fields in the instruction, determining which are the 277 // operands to the instruction. 278 bool Success = true; 279 for (const RecordVal &RV : EncodingDef->getValues()) { 280 // Ignore fixed fields in the record, we're looking for values like: 281 // bits<5> RST = { ?, ?, ?, ?, ? }; 282 if (RV.isNonconcreteOK() || RV.getValue()->isComplete()) 283 continue; 284 285 Success &= 286 addCodeToMergeInOperand(R, BI, std::string(RV.getName()), 287 Case, Target); 288 } 289 290 if (!Success) { 291 // Dump the record, so we can see what's going on... 292 std::string E; 293 raw_string_ostream S(E); 294 S << "Dumping record for previous error:\n"; 295 S << *R; 296 PrintNote(E); 297 } 298 299 StringRef PostEmitter = R->getValueAsString("PostEncoderMethod"); 300 if (!PostEmitter.empty()) { 301 Case += " Value = "; 302 Case += PostEmitter; 303 Case += "(MI, Value"; 304 Case += ", STI"; 305 Case += ");\n"; 306 } 307 308 return Case; 309 } 310 311 static void emitInstBits(raw_ostream &OS, const APInt &Bits) { 312 for (unsigned I = 0; I < Bits.getNumWords(); ++I) 313 OS << ((I > 0) ? ", " : "") << "UINT64_C(" << utostr(Bits.getRawData()[I]) 314 << ")"; 315 } 316 317 void CodeEmitterGen::emitInstructionBaseValues( 318 raw_ostream &o, ArrayRef<const CodeGenInstruction *> NumberedInstructions, 319 CodeGenTarget &Target, int HwMode) { 320 const CodeGenHwModes &HWM = Target.getHwModes(); 321 if (HwMode == -1) 322 o << " static const uint64_t InstBits[] = {\n"; 323 else 324 o << " static const uint64_t InstBits_" << HWM.getMode(HwMode).Name 325 << "[] = {\n"; 326 327 for (const CodeGenInstruction *CGI : NumberedInstructions) { 328 Record *R = CGI->TheDef; 329 330 if (R->getValueAsString("Namespace") == "TargetOpcode" || 331 R->getValueAsBit("isPseudo")) { 332 o << " "; emitInstBits(o, APInt(BitWidth, 0)); o << ",\n"; 333 continue; 334 } 335 336 Record *EncodingDef = R; 337 if (const RecordVal *RV = R->getValue("EncodingInfos")) { 338 if (auto *DI = dyn_cast_or_null<DefInit>(RV->getValue())) { 339 EncodingInfoByHwMode EBM(DI->getDef(), HWM); 340 if (EBM.hasMode(HwMode)) 341 EncodingDef = EBM.get(HwMode); 342 } 343 } 344 BitsInit *BI = EncodingDef->getValueAsBitsInit("Inst"); 345 346 // Start by filling in fixed values. 347 APInt Value(BitWidth, 0); 348 for (unsigned i = 0, e = BI->getNumBits(); i != e; ++i) { 349 if (auto *B = dyn_cast<BitInit>(BI->getBit(i)); B && B->getValue()) 350 Value.setBit(i); 351 } 352 o << " "; 353 emitInstBits(o, Value); 354 o << "," << '\t' << "// " << R->getName() << "\n"; 355 } 356 o << " UINT64_C(0)\n };\n"; 357 } 358 359 void CodeEmitterGen::run(raw_ostream &o) { 360 emitSourceFileHeader("Machine Code Emitter", o); 361 362 CodeGenTarget Target(Records); 363 std::vector<Record*> Insts = Records.getAllDerivedDefinitions("Instruction"); 364 365 // For little-endian instruction bit encodings, reverse the bit order 366 Target.reverseBitsForLittleEndianEncoding(); 367 368 ArrayRef<const CodeGenInstruction*> NumberedInstructions = 369 Target.getInstructionsByEnumValue(); 370 371 if (any_of(NumberedInstructions, [](const CodeGenInstruction *CGI) { 372 Record *R = CGI->TheDef; 373 return R->getValue("Inst") && isa<DagInit>(R->getValueInit("Inst")); 374 })) { 375 emitVarLenCodeEmitter(Records, o); 376 } else { 377 const CodeGenHwModes &HWM = Target.getHwModes(); 378 // The set of HwModes used by instruction encodings. 379 std::set<unsigned> HwModes; 380 BitWidth = 0; 381 for (const CodeGenInstruction *CGI : NumberedInstructions) { 382 Record *R = CGI->TheDef; 383 if (R->getValueAsString("Namespace") == "TargetOpcode" || 384 R->getValueAsBit("isPseudo")) 385 continue; 386 387 if (const RecordVal *RV = R->getValue("EncodingInfos")) { 388 if (DefInit *DI = dyn_cast_or_null<DefInit>(RV->getValue())) { 389 EncodingInfoByHwMode EBM(DI->getDef(), HWM); 390 for (auto &KV : EBM) { 391 BitsInit *BI = KV.second->getValueAsBitsInit("Inst"); 392 BitWidth = std::max(BitWidth, BI->getNumBits()); 393 HwModes.insert(KV.first); 394 } 395 continue; 396 } 397 } 398 BitsInit *BI = R->getValueAsBitsInit("Inst"); 399 BitWidth = std::max(BitWidth, BI->getNumBits()); 400 } 401 UseAPInt = BitWidth > 64; 402 403 // Emit function declaration 404 if (UseAPInt) { 405 o << "void " << Target.getName() 406 << "MCCodeEmitter::getBinaryCodeForInstr(const MCInst &MI,\n" 407 << " SmallVectorImpl<MCFixup> &Fixups,\n" 408 << " APInt &Inst,\n" 409 << " APInt &Scratch,\n" 410 << " const MCSubtargetInfo &STI) const {\n"; 411 } else { 412 o << "uint64_t " << Target.getName(); 413 o << "MCCodeEmitter::getBinaryCodeForInstr(const MCInst &MI,\n" 414 << " SmallVectorImpl<MCFixup> &Fixups,\n" 415 << " const MCSubtargetInfo &STI) const {\n"; 416 } 417 418 // Emit instruction base values 419 if (HwModes.empty()) { 420 emitInstructionBaseValues(o, NumberedInstructions, Target, -1); 421 } else { 422 for (unsigned HwMode : HwModes) 423 emitInstructionBaseValues(o, NumberedInstructions, Target, (int)HwMode); 424 } 425 426 if (!HwModes.empty()) { 427 o << " const uint64_t *InstBits;\n"; 428 o << " unsigned HwMode = STI.getHwMode();\n"; 429 o << " switch (HwMode) {\n"; 430 o << " default: llvm_unreachable(\"Unknown hardware mode!\"); break;\n"; 431 for (unsigned I : HwModes) { 432 o << " case " << I << ": InstBits = InstBits_" << HWM.getMode(I).Name 433 << "; break;\n"; 434 } 435 o << " };\n"; 436 } 437 438 // Map to accumulate all the cases. 439 std::map<std::string, std::vector<std::string>> CaseMap; 440 441 // Construct all cases statement for each opcode 442 for (Record *R : Insts) { 443 if (R->getValueAsString("Namespace") == "TargetOpcode" || 444 R->getValueAsBit("isPseudo")) 445 continue; 446 std::string InstName = 447 (R->getValueAsString("Namespace") + "::" + R->getName()).str(); 448 std::string Case = getInstructionCase(R, Target); 449 450 CaseMap[Case].push_back(std::move(InstName)); 451 } 452 453 // Emit initial function code 454 if (UseAPInt) { 455 int NumWords = APInt::getNumWords(BitWidth); 456 o << " const unsigned opcode = MI.getOpcode();\n" 457 << " if (Scratch.getBitWidth() != " << BitWidth << ")\n" 458 << " Scratch = Scratch.zext(" << BitWidth << ");\n" 459 << " Inst = APInt(" << BitWidth << ", ArrayRef(InstBits + opcode * " 460 << NumWords << ", " << NumWords << "));\n" 461 << " APInt &Value = Inst;\n" 462 << " APInt &op = Scratch;\n" 463 << " switch (opcode) {\n"; 464 } else { 465 o << " const unsigned opcode = MI.getOpcode();\n" 466 << " uint64_t Value = InstBits[opcode];\n" 467 << " uint64_t op = 0;\n" 468 << " (void)op; // suppress warning\n" 469 << " switch (opcode) {\n"; 470 } 471 472 // Emit each case statement 473 std::map<std::string, std::vector<std::string>>::iterator IE, EE; 474 for (IE = CaseMap.begin(), EE = CaseMap.end(); IE != EE; ++IE) { 475 const std::string &Case = IE->first; 476 std::vector<std::string> &InstList = IE->second; 477 478 for (int i = 0, N = InstList.size(); i < N; i++) { 479 if (i) 480 o << "\n"; 481 o << " case " << InstList[i] << ":"; 482 } 483 o << " {\n"; 484 o << Case; 485 o << " break;\n" 486 << " }\n"; 487 } 488 489 // Default case: unhandled opcode 490 o << " default:\n" 491 << " std::string msg;\n" 492 << " raw_string_ostream Msg(msg);\n" 493 << " Msg << \"Not supported instr: \" << MI;\n" 494 << " report_fatal_error(Msg.str().c_str());\n" 495 << " }\n"; 496 if (UseAPInt) 497 o << " Inst = Value;\n"; 498 else 499 o << " return Value;\n"; 500 o << "}\n\n"; 501 } 502 } 503 504 } // end anonymous namespace 505 506 static TableGen::Emitter::OptClass<CodeEmitterGen> 507 X("gen-emitter", "Generate machine code emitter"); 508