1 //===- CodeEmitterGen.cpp - Code Emitter Generator ------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // CodeEmitterGen uses the descriptions of instructions and their fields to 10 // construct an automated code emitter: a function that, given a MachineInstr, 11 // returns the (currently, 32-bit unsigned) value of the instruction. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "CodeGenHwModes.h" 16 #include "CodeGenInstruction.h" 17 #include "CodeGenTarget.h" 18 #include "InfoByHwMode.h" 19 #include "TableGenBackends.h" 20 #include "VarLenCodeEmitterGen.h" 21 #include "llvm/ADT/APInt.h" 22 #include "llvm/ADT/ArrayRef.h" 23 #include "llvm/ADT/StringExtras.h" 24 #include "llvm/Support/Casting.h" 25 #include "llvm/Support/raw_ostream.h" 26 #include "llvm/TableGen/Error.h" 27 #include "llvm/TableGen/Record.h" 28 #include "llvm/TableGen/TableGenBackend.h" 29 #include <cstdint> 30 #include <map> 31 #include <set> 32 #include <string> 33 #include <utility> 34 #include <vector> 35 36 using namespace llvm; 37 38 namespace { 39 40 class CodeEmitterGen { 41 RecordKeeper &Records; 42 43 public: 44 CodeEmitterGen(RecordKeeper &R) : Records(R) {} 45 46 void run(raw_ostream &o); 47 48 private: 49 int getVariableBit(const std::string &VarName, BitsInit *BI, int bit); 50 std::string getInstructionCase(Record *R, CodeGenTarget &Target); 51 std::string getInstructionCaseForEncoding(Record *R, Record *EncodingDef, 52 CodeGenTarget &Target); 53 bool addCodeToMergeInOperand(Record *R, BitsInit *BI, 54 const std::string &VarName, 55 std::string &Case, CodeGenTarget &Target); 56 57 void emitInstructionBaseValues( 58 raw_ostream &o, ArrayRef<const CodeGenInstruction *> NumberedInstructions, 59 CodeGenTarget &Target, int HwMode = -1); 60 unsigned BitWidth; 61 bool UseAPInt; 62 }; 63 64 // If the VarBitInit at position 'bit' matches the specified variable then 65 // return the variable bit position. Otherwise return -1. 66 int CodeEmitterGen::getVariableBit(const std::string &VarName, 67 BitsInit *BI, int bit) { 68 if (VarBitInit *VBI = dyn_cast<VarBitInit>(BI->getBit(bit))) { 69 if (VarInit *VI = dyn_cast<VarInit>(VBI->getBitVar())) 70 if (VI->getName() == VarName) 71 return VBI->getBitNum(); 72 } else if (VarInit *VI = dyn_cast<VarInit>(BI->getBit(bit))) { 73 if (VI->getName() == VarName) 74 return 0; 75 } 76 77 return -1; 78 } 79 80 // Returns true if it succeeds, false if an error. 81 bool CodeEmitterGen::addCodeToMergeInOperand(Record *R, BitsInit *BI, 82 const std::string &VarName, 83 std::string &Case, 84 CodeGenTarget &Target) { 85 CodeGenInstruction &CGI = Target.getInstruction(R); 86 87 // Determine if VarName actually contributes to the Inst encoding. 88 int bit = BI->getNumBits()-1; 89 90 // Scan for a bit that this contributed to. 91 for (; bit >= 0; ) { 92 if (getVariableBit(VarName, BI, bit) != -1) 93 break; 94 95 --bit; 96 } 97 98 // If we found no bits, ignore this value, otherwise emit the call to get the 99 // operand encoding. 100 if (bit < 0) 101 return true; 102 103 // If the operand matches by name, reference according to that 104 // operand number. Non-matching operands are assumed to be in 105 // order. 106 unsigned OpIdx; 107 std::pair<unsigned, unsigned> SubOp; 108 if (CGI.Operands.hasSubOperandAlias(VarName, SubOp)) { 109 OpIdx = CGI.Operands[SubOp.first].MIOperandNo + SubOp.second; 110 } else if (CGI.Operands.hasOperandNamed(VarName, OpIdx)) { 111 // Get the machine operand number for the indicated operand. 112 OpIdx = CGI.Operands[OpIdx].MIOperandNo; 113 } else { 114 PrintError(R, Twine("No operand named ") + VarName + " in record " + R->getName()); 115 return false; 116 } 117 118 if (CGI.Operands.isFlatOperandNotEmitted(OpIdx)) { 119 PrintError(R, "Operand " + VarName + " used but also marked as not emitted!"); 120 return false; 121 } 122 123 std::pair<unsigned, unsigned> SO = CGI.Operands.getSubOperandNumber(OpIdx); 124 std::string &EncoderMethodName = 125 CGI.Operands[SO.first].EncoderMethodNames[SO.second]; 126 127 if (UseAPInt) 128 Case += " op.clearAllBits();\n"; 129 130 Case += " // op: " + VarName + "\n"; 131 132 // If the source operand has a custom encoder, use it. 133 if (!EncoderMethodName.empty()) { 134 if (UseAPInt) { 135 Case += " " + EncoderMethodName + "(MI, " + utostr(OpIdx); 136 Case += ", op"; 137 } else { 138 Case += " op = " + EncoderMethodName + "(MI, " + utostr(OpIdx); 139 } 140 Case += ", Fixups, STI);\n"; 141 } else { 142 if (UseAPInt) { 143 Case += " getMachineOpValue(MI, MI.getOperand(" + utostr(OpIdx) + ")"; 144 Case += ", op, Fixups, STI"; 145 } else { 146 Case += " op = getMachineOpValue(MI, MI.getOperand(" + utostr(OpIdx) + ")"; 147 Case += ", Fixups, STI"; 148 } 149 Case += ");\n"; 150 } 151 152 // Precalculate the number of lits this variable contributes to in the 153 // operand. If there is a single lit (consecutive range of bits) we can use a 154 // destructive sequence on APInt that reduces memory allocations. 155 int numOperandLits = 0; 156 for (int tmpBit = bit; tmpBit >= 0;) { 157 int varBit = getVariableBit(VarName, BI, tmpBit); 158 159 // If this bit isn't from a variable, skip it. 160 if (varBit == -1) { 161 --tmpBit; 162 continue; 163 } 164 165 // Figure out the consecutive range of bits covered by this operand, in 166 // order to generate better encoding code. 167 int beginVarBit = varBit; 168 int N = 1; 169 for (--tmpBit; tmpBit >= 0;) { 170 varBit = getVariableBit(VarName, BI, tmpBit); 171 if (varBit == -1 || varBit != (beginVarBit - N)) 172 break; 173 ++N; 174 --tmpBit; 175 } 176 ++numOperandLits; 177 } 178 179 for (; bit >= 0; ) { 180 int varBit = getVariableBit(VarName, BI, bit); 181 182 // If this bit isn't from a variable, skip it. 183 if (varBit == -1) { 184 --bit; 185 continue; 186 } 187 188 // Figure out the consecutive range of bits covered by this operand, in 189 // order to generate better encoding code. 190 int beginInstBit = bit; 191 int beginVarBit = varBit; 192 int N = 1; 193 for (--bit; bit >= 0;) { 194 varBit = getVariableBit(VarName, BI, bit); 195 if (varBit == -1 || varBit != (beginVarBit - N)) break; 196 ++N; 197 --bit; 198 } 199 200 std::string maskStr; 201 int opShift; 202 203 unsigned loBit = beginVarBit - N + 1; 204 unsigned hiBit = loBit + N; 205 unsigned loInstBit = beginInstBit - N + 1; 206 if (UseAPInt) { 207 std::string extractStr; 208 if (N >= 64) { 209 extractStr = "op.extractBits(" + itostr(hiBit - loBit) + ", " + 210 itostr(loBit) + ")"; 211 Case += " Value.insertBits(" + extractStr + ", " + 212 itostr(loInstBit) + ");\n"; 213 } else { 214 extractStr = "op.extractBitsAsZExtValue(" + itostr(hiBit - loBit) + 215 ", " + itostr(loBit) + ")"; 216 Case += " Value.insertBits(" + extractStr + ", " + 217 itostr(loInstBit) + ", " + itostr(hiBit - loBit) + ");\n"; 218 } 219 } else { 220 uint64_t opMask = ~(uint64_t)0 >> (64 - N); 221 opShift = beginVarBit - N + 1; 222 opMask <<= opShift; 223 maskStr = "UINT64_C(" + utostr(opMask) + ")"; 224 opShift = beginInstBit - beginVarBit; 225 226 if (numOperandLits == 1) { 227 Case += " op &= " + maskStr + ";\n"; 228 if (opShift > 0) { 229 Case += " op <<= " + itostr(opShift) + ";\n"; 230 } else if (opShift < 0) { 231 Case += " op >>= " + itostr(-opShift) + ";\n"; 232 } 233 Case += " Value |= op;\n"; 234 } else { 235 if (opShift > 0) { 236 Case += " Value |= (op & " + maskStr + ") << " + 237 itostr(opShift) + ";\n"; 238 } else if (opShift < 0) { 239 Case += " Value |= (op & " + maskStr + ") >> " + 240 itostr(-opShift) + ";\n"; 241 } else { 242 Case += " Value |= (op & " + maskStr + ");\n"; 243 } 244 } 245 } 246 } 247 return true; 248 } 249 250 std::string CodeEmitterGen::getInstructionCase(Record *R, 251 CodeGenTarget &Target) { 252 std::string Case; 253 if (const RecordVal *RV = R->getValue("EncodingInfos")) { 254 if (auto *DI = dyn_cast_or_null<DefInit>(RV->getValue())) { 255 const CodeGenHwModes &HWM = Target.getHwModes(); 256 EncodingInfoByHwMode EBM(DI->getDef(), HWM); 257 Case += " switch (HwMode) {\n"; 258 Case += " default: llvm_unreachable(\"Unhandled HwMode\");\n"; 259 for (auto &KV : EBM) { 260 Case += " case " + itostr(KV.first) + ": {\n"; 261 Case += getInstructionCaseForEncoding(R, KV.second, Target); 262 Case += " break;\n"; 263 Case += " }\n"; 264 } 265 Case += " }\n"; 266 return Case; 267 } 268 } 269 return getInstructionCaseForEncoding(R, R, Target); 270 } 271 272 std::string CodeEmitterGen::getInstructionCaseForEncoding(Record *R, Record *EncodingDef, 273 CodeGenTarget &Target) { 274 std::string Case; 275 BitsInit *BI = EncodingDef->getValueAsBitsInit("Inst"); 276 277 // Loop over all of the fields in the instruction, determining which are the 278 // operands to the instruction. 279 bool Success = true; 280 for (const RecordVal &RV : EncodingDef->getValues()) { 281 // Ignore fixed fields in the record, we're looking for values like: 282 // bits<5> RST = { ?, ?, ?, ?, ? }; 283 if (RV.isNonconcreteOK() || RV.getValue()->isComplete()) 284 continue; 285 286 Success &= 287 addCodeToMergeInOperand(R, BI, std::string(RV.getName()), 288 Case, Target); 289 } 290 291 if (!Success) { 292 // Dump the record, so we can see what's going on... 293 std::string E; 294 raw_string_ostream S(E); 295 S << "Dumping record for previous error:\n"; 296 S << *R; 297 PrintNote(E); 298 } 299 300 StringRef PostEmitter = R->getValueAsString("PostEncoderMethod"); 301 if (!PostEmitter.empty()) { 302 Case += " Value = "; 303 Case += PostEmitter; 304 Case += "(MI, Value"; 305 Case += ", STI"; 306 Case += ");\n"; 307 } 308 309 return Case; 310 } 311 312 static void emitInstBits(raw_ostream &OS, const APInt &Bits) { 313 for (unsigned I = 0; I < Bits.getNumWords(); ++I) 314 OS << ((I > 0) ? ", " : "") << "UINT64_C(" << utostr(Bits.getRawData()[I]) 315 << ")"; 316 } 317 318 void CodeEmitterGen::emitInstructionBaseValues( 319 raw_ostream &o, ArrayRef<const CodeGenInstruction *> NumberedInstructions, 320 CodeGenTarget &Target, int HwMode) { 321 const CodeGenHwModes &HWM = Target.getHwModes(); 322 if (HwMode == -1) 323 o << " static const uint64_t InstBits[] = {\n"; 324 else 325 o << " static const uint64_t InstBits_" << HWM.getMode(HwMode).Name 326 << "[] = {\n"; 327 328 for (const CodeGenInstruction *CGI : NumberedInstructions) { 329 Record *R = CGI->TheDef; 330 331 if (R->getValueAsString("Namespace") == "TargetOpcode" || 332 R->getValueAsBit("isPseudo")) { 333 o << " "; emitInstBits(o, APInt(BitWidth, 0)); o << ",\n"; 334 continue; 335 } 336 337 Record *EncodingDef = R; 338 if (const RecordVal *RV = R->getValue("EncodingInfos")) { 339 if (auto *DI = dyn_cast_or_null<DefInit>(RV->getValue())) { 340 EncodingInfoByHwMode EBM(DI->getDef(), HWM); 341 if (EBM.hasMode(HwMode)) 342 EncodingDef = EBM.get(HwMode); 343 } 344 } 345 BitsInit *BI = EncodingDef->getValueAsBitsInit("Inst"); 346 347 // Start by filling in fixed values. 348 APInt Value(BitWidth, 0); 349 for (unsigned i = 0, e = BI->getNumBits(); i != e; ++i) { 350 if (auto *B = dyn_cast<BitInit>(BI->getBit(i)); B && B->getValue()) 351 Value.setBit(i); 352 } 353 o << " "; 354 emitInstBits(o, Value); 355 o << "," << '\t' << "// " << R->getName() << "\n"; 356 } 357 o << " UINT64_C(0)\n };\n"; 358 } 359 360 void CodeEmitterGen::run(raw_ostream &o) { 361 CodeGenTarget Target(Records); 362 std::vector<Record*> Insts = Records.getAllDerivedDefinitions("Instruction"); 363 364 // For little-endian instruction bit encodings, reverse the bit order 365 Target.reverseBitsForLittleEndianEncoding(); 366 367 ArrayRef<const CodeGenInstruction*> NumberedInstructions = 368 Target.getInstructionsByEnumValue(); 369 370 if (any_of(NumberedInstructions, [](const CodeGenInstruction *CGI) { 371 Record *R = CGI->TheDef; 372 return R->getValue("Inst") && isa<DagInit>(R->getValueInit("Inst")); 373 })) { 374 emitVarLenCodeEmitter(Records, o); 375 } else { 376 const CodeGenHwModes &HWM = Target.getHwModes(); 377 // The set of HwModes used by instruction encodings. 378 std::set<unsigned> HwModes; 379 BitWidth = 0; 380 for (const CodeGenInstruction *CGI : NumberedInstructions) { 381 Record *R = CGI->TheDef; 382 if (R->getValueAsString("Namespace") == "TargetOpcode" || 383 R->getValueAsBit("isPseudo")) 384 continue; 385 386 if (const RecordVal *RV = R->getValue("EncodingInfos")) { 387 if (DefInit *DI = dyn_cast_or_null<DefInit>(RV->getValue())) { 388 EncodingInfoByHwMode EBM(DI->getDef(), HWM); 389 for (auto &KV : EBM) { 390 BitsInit *BI = KV.second->getValueAsBitsInit("Inst"); 391 BitWidth = std::max(BitWidth, BI->getNumBits()); 392 HwModes.insert(KV.first); 393 } 394 continue; 395 } 396 } 397 BitsInit *BI = R->getValueAsBitsInit("Inst"); 398 BitWidth = std::max(BitWidth, BI->getNumBits()); 399 } 400 UseAPInt = BitWidth > 64; 401 402 // Emit function declaration 403 if (UseAPInt) { 404 o << "void " << Target.getName() 405 << "MCCodeEmitter::getBinaryCodeForInstr(const MCInst &MI,\n" 406 << " SmallVectorImpl<MCFixup> &Fixups,\n" 407 << " APInt &Inst,\n" 408 << " APInt &Scratch,\n" 409 << " const MCSubtargetInfo &STI) const {\n"; 410 } else { 411 o << "uint64_t " << Target.getName(); 412 o << "MCCodeEmitter::getBinaryCodeForInstr(const MCInst &MI,\n" 413 << " SmallVectorImpl<MCFixup> &Fixups,\n" 414 << " const MCSubtargetInfo &STI) const {\n"; 415 } 416 417 // Emit instruction base values 418 if (HwModes.empty()) { 419 emitInstructionBaseValues(o, NumberedInstructions, Target, -1); 420 } else { 421 for (unsigned HwMode : HwModes) 422 emitInstructionBaseValues(o, NumberedInstructions, Target, (int)HwMode); 423 } 424 425 if (!HwModes.empty()) { 426 o << " const uint64_t *InstBits;\n"; 427 o << " unsigned HwMode = STI.getHwMode();\n"; 428 o << " switch (HwMode) {\n"; 429 o << " default: llvm_unreachable(\"Unknown hardware mode!\"); break;\n"; 430 for (unsigned I : HwModes) { 431 o << " case " << I << ": InstBits = InstBits_" << HWM.getMode(I).Name 432 << "; break;\n"; 433 } 434 o << " };\n"; 435 } 436 437 // Map to accumulate all the cases. 438 std::map<std::string, std::vector<std::string>> CaseMap; 439 440 // Construct all cases statement for each opcode 441 for (Record *R : Insts) { 442 if (R->getValueAsString("Namespace") == "TargetOpcode" || 443 R->getValueAsBit("isPseudo")) 444 continue; 445 std::string InstName = 446 (R->getValueAsString("Namespace") + "::" + R->getName()).str(); 447 std::string Case = getInstructionCase(R, Target); 448 449 CaseMap[Case].push_back(std::move(InstName)); 450 } 451 452 // Emit initial function code 453 if (UseAPInt) { 454 int NumWords = APInt::getNumWords(BitWidth); 455 o << " const unsigned opcode = MI.getOpcode();\n" 456 << " if (Scratch.getBitWidth() != " << BitWidth << ")\n" 457 << " Scratch = Scratch.zext(" << BitWidth << ");\n" 458 << " Inst = APInt(" << BitWidth << ", ArrayRef(InstBits + opcode * " 459 << NumWords << ", " << NumWords << "));\n" 460 << " APInt &Value = Inst;\n" 461 << " APInt &op = Scratch;\n" 462 << " switch (opcode) {\n"; 463 } else { 464 o << " const unsigned opcode = MI.getOpcode();\n" 465 << " uint64_t Value = InstBits[opcode];\n" 466 << " uint64_t op = 0;\n" 467 << " (void)op; // suppress warning\n" 468 << " switch (opcode) {\n"; 469 } 470 471 // Emit each case statement 472 std::map<std::string, std::vector<std::string>>::iterator IE, EE; 473 for (IE = CaseMap.begin(), EE = CaseMap.end(); IE != EE; ++IE) { 474 const std::string &Case = IE->first; 475 std::vector<std::string> &InstList = IE->second; 476 477 for (int i = 0, N = InstList.size(); i < N; i++) { 478 if (i) 479 o << "\n"; 480 o << " case " << InstList[i] << ":"; 481 } 482 o << " {\n"; 483 o << Case; 484 o << " break;\n" 485 << " }\n"; 486 } 487 488 // Default case: unhandled opcode 489 o << " default:\n" 490 << " std::string msg;\n" 491 << " raw_string_ostream Msg(msg);\n" 492 << " Msg << \"Not supported instr: \" << MI;\n" 493 << " report_fatal_error(Msg.str().c_str());\n" 494 << " }\n"; 495 if (UseAPInt) 496 o << " Inst = Value;\n"; 497 else 498 o << " return Value;\n"; 499 o << "}\n\n"; 500 } 501 } 502 503 } // end anonymous namespace 504 505 namespace llvm { 506 507 void EmitCodeEmitter(RecordKeeper &RK, raw_ostream &OS) { 508 emitSourceFileHeader("Machine Code Emitter", OS); 509 CodeEmitterGen(RK).run(OS); 510 } 511 512 } // end namespace llvm 513