1 //===---------------- DecoderEmitter.cpp - Decoder Generator --------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // It contains the tablegen backend that emits the decoder functions for 10 // targets with fixed/variable length instruction set. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "Common/CodeGenHwModes.h" 15 #include "Common/CodeGenInstruction.h" 16 #include "Common/CodeGenTarget.h" 17 #include "Common/InfoByHwMode.h" 18 #include "Common/VarLenCodeEmitterGen.h" 19 #include "TableGenBackends.h" 20 #include "llvm/ADT/APInt.h" 21 #include "llvm/ADT/ArrayRef.h" 22 #include "llvm/ADT/CachedHashString.h" 23 #include "llvm/ADT/STLExtras.h" 24 #include "llvm/ADT/SetVector.h" 25 #include "llvm/ADT/SmallBitVector.h" 26 #include "llvm/ADT/SmallString.h" 27 #include "llvm/ADT/Statistic.h" 28 #include "llvm/ADT/StringExtras.h" 29 #include "llvm/ADT/StringRef.h" 30 #include "llvm/MC/MCDecoderOps.h" 31 #include "llvm/Support/Casting.h" 32 #include "llvm/Support/CommandLine.h" 33 #include "llvm/Support/Debug.h" 34 #include "llvm/Support/ErrorHandling.h" 35 #include "llvm/Support/FormattedStream.h" 36 #include "llvm/Support/LEB128.h" 37 #include "llvm/Support/raw_ostream.h" 38 #include "llvm/TableGen/Error.h" 39 #include "llvm/TableGen/Record.h" 40 #include <algorithm> 41 #include <cassert> 42 #include <cstddef> 43 #include <cstdint> 44 #include <map> 45 #include <memory> 46 #include <set> 47 #include <string> 48 #include <utility> 49 #include <vector> 50 51 using namespace llvm; 52 53 #define DEBUG_TYPE "decoder-emitter" 54 55 extern cl::OptionCategory DisassemblerEmitterCat; 56 57 enum SuppressLevel { 58 SUPPRESSION_DISABLE, 59 SUPPRESSION_LEVEL1, 60 SUPPRESSION_LEVEL2 61 }; 62 63 cl::opt<SuppressLevel> DecoderEmitterSuppressDuplicates( 64 "suppress-per-hwmode-duplicates", 65 cl::desc("Suppress duplication of instrs into per-HwMode decoder tables"), 66 cl::values( 67 clEnumValN( 68 SUPPRESSION_DISABLE, "O0", 69 "Do not prevent DecoderTable duplications caused by HwModes"), 70 clEnumValN( 71 SUPPRESSION_LEVEL1, "O1", 72 "Remove duplicate DecoderTable entries generated due to HwModes"), 73 clEnumValN( 74 SUPPRESSION_LEVEL2, "O2", 75 "Extract HwModes-specific instructions into new DecoderTables, " 76 "significantly reducing Table Duplications")), 77 cl::init(SUPPRESSION_DISABLE), cl::cat(DisassemblerEmitterCat)); 78 79 namespace { 80 81 STATISTIC(NumEncodings, "Number of encodings considered"); 82 STATISTIC(NumEncodingsLackingDisasm, 83 "Number of encodings without disassembler info"); 84 STATISTIC(NumInstructions, "Number of instructions considered"); 85 STATISTIC(NumEncodingsSupported, "Number of encodings supported"); 86 STATISTIC(NumEncodingsOmitted, "Number of encodings omitted"); 87 88 struct EncodingField { 89 unsigned Base, Width, Offset; 90 EncodingField(unsigned B, unsigned W, unsigned O) 91 : Base(B), Width(W), Offset(O) {} 92 }; 93 94 struct OperandInfo { 95 std::vector<EncodingField> Fields; 96 std::string Decoder; 97 bool HasCompleteDecoder; 98 uint64_t InitValue; 99 100 OperandInfo(std::string D, bool HCD) 101 : Decoder(std::move(D)), HasCompleteDecoder(HCD), InitValue(0) {} 102 103 void addField(unsigned Base, unsigned Width, unsigned Offset) { 104 Fields.push_back(EncodingField(Base, Width, Offset)); 105 } 106 107 unsigned numFields() const { return Fields.size(); } 108 109 typedef std::vector<EncodingField>::const_iterator const_iterator; 110 111 const_iterator begin() const { return Fields.begin(); } 112 const_iterator end() const { return Fields.end(); } 113 }; 114 115 typedef std::vector<uint8_t> DecoderTable; 116 typedef uint32_t DecoderFixup; 117 typedef std::vector<DecoderFixup> FixupList; 118 typedef std::vector<FixupList> FixupScopeList; 119 typedef SmallSetVector<CachedHashString, 16> PredicateSet; 120 typedef SmallSetVector<CachedHashString, 16> DecoderSet; 121 struct DecoderTableInfo { 122 DecoderTable Table; 123 FixupScopeList FixupStack; 124 PredicateSet Predicates; 125 DecoderSet Decoders; 126 }; 127 128 struct EncodingAndInst { 129 const Record *EncodingDef; 130 const CodeGenInstruction *Inst; 131 StringRef HwModeName; 132 133 EncodingAndInst(const Record *EncodingDef, const CodeGenInstruction *Inst, 134 StringRef HwModeName = "") 135 : EncodingDef(EncodingDef), Inst(Inst), HwModeName(HwModeName) {} 136 }; 137 138 struct EncodingIDAndOpcode { 139 unsigned EncodingID; 140 unsigned Opcode; 141 142 EncodingIDAndOpcode() : EncodingID(0), Opcode(0) {} 143 EncodingIDAndOpcode(unsigned EncodingID, unsigned Opcode) 144 : EncodingID(EncodingID), Opcode(Opcode) {} 145 }; 146 147 using EncodingIDsVec = std::vector<EncodingIDAndOpcode>; 148 using NamespacesHwModesMap = std::map<std::string, std::set<StringRef>>; 149 150 raw_ostream &operator<<(raw_ostream &OS, const EncodingAndInst &Value) { 151 if (Value.EncodingDef != Value.Inst->TheDef) 152 OS << Value.EncodingDef->getName() << ":"; 153 OS << Value.Inst->TheDef->getName(); 154 return OS; 155 } 156 157 class DecoderEmitter { 158 const RecordKeeper &RK; 159 std::vector<EncodingAndInst> NumberedEncodings; 160 161 public: 162 DecoderEmitter(const RecordKeeper &R, const std::string &PredicateNamespace) 163 : RK(R), Target(R), PredicateNamespace(PredicateNamespace) {} 164 165 // Emit the decoder state machine table. 166 void emitTable(formatted_raw_ostream &OS, DecoderTable &Table, 167 unsigned Indent, unsigned BitWidth, StringRef Namespace, 168 const EncodingIDsVec &EncodingIDs) const; 169 void emitInstrLenTable(formatted_raw_ostream &OS, 170 std::vector<unsigned> &InstrLen) const; 171 void emitPredicateFunction(formatted_raw_ostream &OS, 172 PredicateSet &Predicates, unsigned Indent) const; 173 void emitDecoderFunction(formatted_raw_ostream &OS, DecoderSet &Decoders, 174 unsigned Indent) const; 175 176 // run - Output the code emitter 177 void run(raw_ostream &o); 178 179 private: 180 CodeGenTarget Target; 181 182 public: 183 const std::string &PredicateNamespace; 184 }; 185 186 } // end anonymous namespace 187 188 // The set (BIT_TRUE, BIT_FALSE, BIT_UNSET) represents a ternary logic system 189 // for a bit value. 190 // 191 // BIT_UNFILTERED is used as the init value for a filter position. It is used 192 // only for filter processings. 193 typedef enum { 194 BIT_TRUE, // '1' 195 BIT_FALSE, // '0' 196 BIT_UNSET, // '?' 197 BIT_UNFILTERED // unfiltered 198 } bit_value_t; 199 200 static bool ValueSet(bit_value_t V) { 201 return (V == BIT_TRUE || V == BIT_FALSE); 202 } 203 204 static bool ValueNotSet(bit_value_t V) { return (V == BIT_UNSET); } 205 206 static int Value(bit_value_t V) { 207 return ValueNotSet(V) ? -1 : (V == BIT_FALSE ? 0 : 1); 208 } 209 210 static bit_value_t bitFromBits(const BitsInit &bits, unsigned index) { 211 if (BitInit *bit = dyn_cast<BitInit>(bits.getBit(index))) 212 return bit->getValue() ? BIT_TRUE : BIT_FALSE; 213 214 // The bit is uninitialized. 215 return BIT_UNSET; 216 } 217 218 // Prints the bit value for each position. 219 static void dumpBits(raw_ostream &OS, const BitsInit &bits) { 220 for (unsigned index = bits.getNumBits(); index > 0; --index) { 221 switch (bitFromBits(bits, index - 1)) { 222 case BIT_TRUE: 223 OS << "1"; 224 break; 225 case BIT_FALSE: 226 OS << "0"; 227 break; 228 case BIT_UNSET: 229 OS << "_"; 230 break; 231 default: 232 llvm_unreachable("unexpected return value from bitFromBits"); 233 } 234 } 235 } 236 237 static BitsInit &getBitsField(const Record &def, StringRef str) { 238 const RecordVal *RV = def.getValue(str); 239 if (BitsInit *Bits = dyn_cast<BitsInit>(RV->getValue())) 240 return *Bits; 241 242 // variable length instruction 243 VarLenInst VLI = VarLenInst(cast<DagInit>(RV->getValue()), RV); 244 SmallVector<Init *, 16> Bits; 245 246 for (const auto &SI : VLI) { 247 if (const BitsInit *BI = dyn_cast<BitsInit>(SI.Value)) { 248 for (unsigned Idx = 0U; Idx < BI->getNumBits(); ++Idx) { 249 Bits.push_back(BI->getBit(Idx)); 250 } 251 } else if (const BitInit *BI = dyn_cast<BitInit>(SI.Value)) { 252 Bits.push_back(const_cast<BitInit *>(BI)); 253 } else { 254 for (unsigned Idx = 0U; Idx < SI.BitWidth; ++Idx) 255 Bits.push_back(UnsetInit::get(def.getRecords())); 256 } 257 } 258 259 return *BitsInit::get(def.getRecords(), Bits); 260 } 261 262 // Representation of the instruction to work on. 263 typedef std::vector<bit_value_t> insn_t; 264 265 namespace { 266 267 static const uint64_t NO_FIXED_SEGMENTS_SENTINEL = -1ULL; 268 269 class FilterChooser; 270 271 /// Filter - Filter works with FilterChooser to produce the decoding tree for 272 /// the ISA. 273 /// 274 /// It is useful to think of a Filter as governing the switch stmts of the 275 /// decoding tree in a certain level. Each case stmt delegates to an inferior 276 /// FilterChooser to decide what further decoding logic to employ, or in another 277 /// words, what other remaining bits to look at. The FilterChooser eventually 278 /// chooses a best Filter to do its job. 279 /// 280 /// This recursive scheme ends when the number of Opcodes assigned to the 281 /// FilterChooser becomes 1 or if there is a conflict. A conflict happens when 282 /// the Filter/FilterChooser combo does not know how to distinguish among the 283 /// Opcodes assigned. 284 /// 285 /// An example of a conflict is 286 /// 287 /// Conflict: 288 /// 111101000.00........00010000.... 289 /// 111101000.00........0001........ 290 /// 1111010...00........0001........ 291 /// 1111010...00.................... 292 /// 1111010......................... 293 /// 1111............................ 294 /// ................................ 295 /// VST4q8a 111101000_00________00010000____ 296 /// VST4q8b 111101000_00________00010000____ 297 /// 298 /// The Debug output shows the path that the decoding tree follows to reach the 299 /// the conclusion that there is a conflict. VST4q8a is a vst4 to double-spaced 300 /// even registers, while VST4q8b is a vst4 to double-spaced odd registers. 301 /// 302 /// The encoding info in the .td files does not specify this meta information, 303 /// which could have been used by the decoder to resolve the conflict. The 304 /// decoder could try to decode the even/odd register numbering and assign to 305 /// VST4q8a or VST4q8b, but for the time being, the decoder chooses the "a" 306 /// version and return the Opcode since the two have the same Asm format string. 307 class Filter { 308 protected: 309 const FilterChooser 310 *Owner; // points to the FilterChooser who owns this filter 311 unsigned StartBit; // the starting bit position 312 unsigned NumBits; // number of bits to filter 313 bool Mixed; // a mixed region contains both set and unset bits 314 315 // Map of well-known segment value to the set of uid's with that value. 316 std::map<uint64_t, std::vector<EncodingIDAndOpcode>> FilteredInstructions; 317 318 // Set of uid's with non-constant segment values. 319 std::vector<EncodingIDAndOpcode> VariableInstructions; 320 321 // Map of well-known segment value to its delegate. 322 std::map<uint64_t, std::unique_ptr<const FilterChooser>> FilterChooserMap; 323 324 // Number of instructions which fall under FilteredInstructions category. 325 unsigned NumFiltered; 326 327 // Keeps track of the last opcode in the filtered bucket. 328 EncodingIDAndOpcode LastOpcFiltered; 329 330 public: 331 Filter(Filter &&f); 332 Filter(FilterChooser &owner, unsigned startBit, unsigned numBits, bool mixed); 333 334 ~Filter() = default; 335 336 unsigned getNumFiltered() const { return NumFiltered; } 337 338 EncodingIDAndOpcode getSingletonOpc() const { 339 assert(NumFiltered == 1); 340 return LastOpcFiltered; 341 } 342 343 // Return the filter chooser for the group of instructions without constant 344 // segment values. 345 const FilterChooser &getVariableFC() const { 346 assert(NumFiltered == 1); 347 assert(FilterChooserMap.size() == 1); 348 return *(FilterChooserMap.find(NO_FIXED_SEGMENTS_SENTINEL)->second); 349 } 350 351 // Divides the decoding task into sub tasks and delegates them to the 352 // inferior FilterChooser's. 353 // 354 // A special case arises when there's only one entry in the filtered 355 // instructions. In order to unambiguously decode the singleton, we need to 356 // match the remaining undecoded encoding bits against the singleton. 357 void recurse(); 358 359 // Emit table entries to decode instructions given a segment or segments of 360 // bits. 361 void emitTableEntry(DecoderTableInfo &TableInfo) const; 362 363 // Returns the number of fanout produced by the filter. More fanout implies 364 // the filter distinguishes more categories of instructions. 365 unsigned usefulness() const; 366 }; // end class Filter 367 368 } // end anonymous namespace 369 370 // These are states of our finite state machines used in FilterChooser's 371 // filterProcessor() which produces the filter candidates to use. 372 typedef enum { 373 ATTR_NONE, 374 ATTR_FILTERED, 375 ATTR_ALL_SET, 376 ATTR_ALL_UNSET, 377 ATTR_MIXED 378 } bitAttr_t; 379 380 /// FilterChooser - FilterChooser chooses the best filter among a set of Filters 381 /// in order to perform the decoding of instructions at the current level. 382 /// 383 /// Decoding proceeds from the top down. Based on the well-known encoding bits 384 /// of instructions available, FilterChooser builds up the possible Filters that 385 /// can further the task of decoding by distinguishing among the remaining 386 /// candidate instructions. 387 /// 388 /// Once a filter has been chosen, it is called upon to divide the decoding task 389 /// into sub-tasks and delegates them to its inferior FilterChoosers for further 390 /// processings. 391 /// 392 /// It is useful to think of a Filter as governing the switch stmts of the 393 /// decoding tree. And each case is delegated to an inferior FilterChooser to 394 /// decide what further remaining bits to look at. 395 namespace { 396 397 class FilterChooser { 398 protected: 399 friend class Filter; 400 401 // Vector of codegen instructions to choose our filter. 402 ArrayRef<EncodingAndInst> AllInstructions; 403 404 // Vector of uid's for this filter chooser to work on. 405 // The first member of the pair is the opcode id being decoded, the second is 406 // the opcode id that should be emitted. 407 const std::vector<EncodingIDAndOpcode> &Opcodes; 408 409 // Lookup table for the operand decoding of instructions. 410 const std::map<unsigned, std::vector<OperandInfo>> &Operands; 411 412 // Vector of candidate filters. 413 std::vector<Filter> Filters; 414 415 // Array of bit values passed down from our parent. 416 // Set to all BIT_UNFILTERED's for Parent == NULL. 417 std::vector<bit_value_t> FilterBitValues; 418 419 // Links to the FilterChooser above us in the decoding tree. 420 const FilterChooser *Parent; 421 422 // Index of the best filter from Filters. 423 int BestIndex; 424 425 // Width of instructions 426 unsigned BitWidth; 427 428 // Parent emitter 429 const DecoderEmitter *Emitter; 430 431 public: 432 FilterChooser(ArrayRef<EncodingAndInst> Insts, 433 const std::vector<EncodingIDAndOpcode> &IDs, 434 const std::map<unsigned, std::vector<OperandInfo>> &Ops, 435 unsigned BW, const DecoderEmitter *E) 436 : AllInstructions(Insts), Opcodes(IDs), Operands(Ops), 437 FilterBitValues(BW, BIT_UNFILTERED), Parent(nullptr), BestIndex(-1), 438 BitWidth(BW), Emitter(E) { 439 doFilter(); 440 } 441 442 FilterChooser(ArrayRef<EncodingAndInst> Insts, 443 const std::vector<EncodingIDAndOpcode> &IDs, 444 const std::map<unsigned, std::vector<OperandInfo>> &Ops, 445 const std::vector<bit_value_t> &ParentFilterBitValues, 446 const FilterChooser &parent) 447 : AllInstructions(Insts), Opcodes(IDs), Operands(Ops), 448 FilterBitValues(ParentFilterBitValues), Parent(&parent), BestIndex(-1), 449 BitWidth(parent.BitWidth), Emitter(parent.Emitter) { 450 doFilter(); 451 } 452 453 FilterChooser(const FilterChooser &) = delete; 454 void operator=(const FilterChooser &) = delete; 455 456 unsigned getBitWidth() const { return BitWidth; } 457 458 protected: 459 // Populates the insn given the uid. 460 void insnWithID(insn_t &Insn, unsigned Opcode) const { 461 const Record *EncodingDef = AllInstructions[Opcode].EncodingDef; 462 BitsInit &Bits = getBitsField(*EncodingDef, "Inst"); 463 Insn.resize(std::max(BitWidth, Bits.getNumBits()), BIT_UNSET); 464 // We may have a SoftFail bitmask, which specifies a mask where an encoding 465 // may differ from the value in "Inst" and yet still be valid, but the 466 // disassembler should return SoftFail instead of Success. 467 // 468 // This is used for marking UNPREDICTABLE instructions in the ARM world. 469 const RecordVal *RV = EncodingDef->getValue("SoftFail"); 470 const BitsInit *SFBits = RV ? dyn_cast<BitsInit>(RV->getValue()) : nullptr; 471 for (unsigned i = 0; i < Bits.getNumBits(); ++i) { 472 if (SFBits && bitFromBits(*SFBits, i) == BIT_TRUE) 473 Insn[i] = BIT_UNSET; 474 else 475 Insn[i] = bitFromBits(Bits, i); 476 } 477 } 478 479 // Emit the name of the encoding/instruction pair. 480 void emitNameWithID(raw_ostream &OS, unsigned Opcode) const { 481 const Record *EncodingDef = AllInstructions[Opcode].EncodingDef; 482 const Record *InstDef = AllInstructions[Opcode].Inst->TheDef; 483 if (EncodingDef != InstDef) 484 OS << EncodingDef->getName() << ":"; 485 OS << InstDef->getName(); 486 } 487 488 // Populates the field of the insn given the start position and the number of 489 // consecutive bits to scan for. 490 // 491 // Returns a pair of values (indicator, field), where the indicator is false 492 // if there exists any uninitialized bit value in the range and true if all 493 // bits are well-known. The second value is the potentially populated field. 494 std::pair<bool, uint64_t> fieldFromInsn(const insn_t &Insn, unsigned StartBit, 495 unsigned NumBits) const; 496 497 /// dumpFilterArray - dumpFilterArray prints out debugging info for the given 498 /// filter array as a series of chars. 499 void dumpFilterArray(raw_ostream &OS, 500 const std::vector<bit_value_t> &filter) const; 501 502 /// dumpStack - dumpStack traverses the filter chooser chain and calls 503 /// dumpFilterArray on each filter chooser up to the top level one. 504 void dumpStack(raw_ostream &OS, const char *prefix) const; 505 506 Filter &bestFilter() { 507 assert(BestIndex != -1 && "BestIndex not set"); 508 return Filters[BestIndex]; 509 } 510 511 bool PositionFiltered(unsigned i) const { 512 return ValueSet(FilterBitValues[i]); 513 } 514 515 // Calculates the island(s) needed to decode the instruction. 516 // This returns a lit of undecoded bits of an instructions, for example, 517 // Inst{20} = 1 && Inst{3-0} == 0b1111 represents two islands of yet-to-be 518 // decoded bits in order to verify that the instruction matches the Opcode. 519 unsigned getIslands(std::vector<unsigned> &StartBits, 520 std::vector<unsigned> &EndBits, 521 std::vector<uint64_t> &FieldVals, 522 const insn_t &Insn) const; 523 524 // Emits code to check the Predicates member of an instruction are true. 525 // Returns true if predicate matches were emitted, false otherwise. 526 bool emitPredicateMatch(raw_ostream &OS, unsigned Opc) const; 527 bool emitPredicateMatchAux(const Init &Val, bool ParenIfBinOp, 528 raw_ostream &OS) const; 529 530 bool doesOpcodeNeedPredicate(unsigned Opc) const; 531 unsigned getPredicateIndex(DecoderTableInfo &TableInfo, StringRef P) const; 532 void emitPredicateTableEntry(DecoderTableInfo &TableInfo, unsigned Opc) const; 533 534 void emitSoftFailTableEntry(DecoderTableInfo &TableInfo, unsigned Opc) const; 535 536 // Emits table entries to decode the singleton. 537 void emitSingletonTableEntry(DecoderTableInfo &TableInfo, 538 EncodingIDAndOpcode Opc) const; 539 540 // Emits code to decode the singleton, and then to decode the rest. 541 void emitSingletonTableEntry(DecoderTableInfo &TableInfo, 542 const Filter &Best) const; 543 544 void emitBinaryParser(raw_ostream &OS, unsigned Indent, 545 const OperandInfo &OpInfo, 546 bool &OpHasCompleteDecoder) const; 547 548 void emitDecoder(raw_ostream &OS, unsigned Indent, unsigned Opc, 549 bool &HasCompleteDecoder) const; 550 unsigned getDecoderIndex(DecoderSet &Decoders, unsigned Opc, 551 bool &HasCompleteDecoder) const; 552 553 // Assign a single filter and run with it. 554 void runSingleFilter(unsigned startBit, unsigned numBit, bool mixed); 555 556 // reportRegion is a helper function for filterProcessor to mark a region as 557 // eligible for use as a filter region. 558 void reportRegion(bitAttr_t RA, unsigned StartBit, unsigned BitIndex, 559 bool AllowMixed); 560 561 // FilterProcessor scans the well-known encoding bits of the instructions and 562 // builds up a list of candidate filters. It chooses the best filter and 563 // recursively descends down the decoding tree. 564 bool filterProcessor(bool AllowMixed, bool Greedy = true); 565 566 // Decides on the best configuration of filter(s) to use in order to decode 567 // the instructions. A conflict of instructions may occur, in which case we 568 // dump the conflict set to the standard error. 569 void doFilter(); 570 571 public: 572 // emitTableEntries - Emit state machine entries to decode our share of 573 // instructions. 574 void emitTableEntries(DecoderTableInfo &TableInfo) const; 575 }; 576 577 } // end anonymous namespace 578 579 /////////////////////////// 580 // // 581 // Filter Implementation // 582 // // 583 /////////////////////////// 584 585 Filter::Filter(Filter &&f) 586 : Owner(f.Owner), StartBit(f.StartBit), NumBits(f.NumBits), Mixed(f.Mixed), 587 FilteredInstructions(std::move(f.FilteredInstructions)), 588 VariableInstructions(std::move(f.VariableInstructions)), 589 FilterChooserMap(std::move(f.FilterChooserMap)), 590 NumFiltered(f.NumFiltered), LastOpcFiltered(f.LastOpcFiltered) {} 591 592 Filter::Filter(FilterChooser &owner, unsigned startBit, unsigned numBits, 593 bool mixed) 594 : Owner(&owner), StartBit(startBit), NumBits(numBits), Mixed(mixed) { 595 assert(StartBit + NumBits - 1 < Owner->BitWidth); 596 597 NumFiltered = 0; 598 LastOpcFiltered = {0, 0}; 599 600 for (const auto &OpcPair : Owner->Opcodes) { 601 insn_t Insn; 602 603 // Populates the insn given the uid. 604 Owner->insnWithID(Insn, OpcPair.EncodingID); 605 606 // Scans the segment for possibly well-specified encoding bits. 607 auto [Ok, Field] = Owner->fieldFromInsn(Insn, StartBit, NumBits); 608 609 if (Ok) { 610 // The encoding bits are well-known. Lets add the uid of the 611 // instruction into the bucket keyed off the constant field value. 612 LastOpcFiltered = OpcPair; 613 FilteredInstructions[Field].push_back(LastOpcFiltered); 614 ++NumFiltered; 615 } else { 616 // Some of the encoding bit(s) are unspecified. This contributes to 617 // one additional member of "Variable" instructions. 618 VariableInstructions.push_back(OpcPair); 619 } 620 } 621 622 assert((FilteredInstructions.size() + VariableInstructions.size() > 0) && 623 "Filter returns no instruction categories"); 624 } 625 626 // Divides the decoding task into sub tasks and delegates them to the 627 // inferior FilterChooser's. 628 // 629 // A special case arises when there's only one entry in the filtered 630 // instructions. In order to unambiguously decode the singleton, we need to 631 // match the remaining undecoded encoding bits against the singleton. 632 void Filter::recurse() { 633 // Starts by inheriting our parent filter chooser's filter bit values. 634 std::vector<bit_value_t> BitValueArray(Owner->FilterBitValues); 635 636 if (!VariableInstructions.empty()) { 637 // Conservatively marks each segment position as BIT_UNSET. 638 for (unsigned bitIndex = 0; bitIndex < NumBits; ++bitIndex) 639 BitValueArray[StartBit + bitIndex] = BIT_UNSET; 640 641 // Delegates to an inferior filter chooser for further processing on this 642 // group of instructions whose segment values are variable. 643 FilterChooserMap.insert(std::pair( 644 NO_FIXED_SEGMENTS_SENTINEL, 645 std::make_unique<FilterChooser>(Owner->AllInstructions, 646 VariableInstructions, Owner->Operands, 647 BitValueArray, *Owner))); 648 } 649 650 // No need to recurse for a singleton filtered instruction. 651 // See also Filter::emit*(). 652 if (getNumFiltered() == 1) { 653 assert(FilterChooserMap.size() == 1); 654 return; 655 } 656 657 // Otherwise, create sub choosers. 658 for (const auto &Inst : FilteredInstructions) { 659 660 // Marks all the segment positions with either BIT_TRUE or BIT_FALSE. 661 for (unsigned bitIndex = 0; bitIndex < NumBits; ++bitIndex) { 662 if (Inst.first & (1ULL << bitIndex)) 663 BitValueArray[StartBit + bitIndex] = BIT_TRUE; 664 else 665 BitValueArray[StartBit + bitIndex] = BIT_FALSE; 666 } 667 668 // Delegates to an inferior filter chooser for further processing on this 669 // category of instructions. 670 FilterChooserMap.insert( 671 std::pair(Inst.first, std::make_unique<FilterChooser>( 672 Owner->AllInstructions, Inst.second, 673 Owner->Operands, BitValueArray, *Owner))); 674 } 675 } 676 677 static void resolveTableFixups(DecoderTable &Table, const FixupList &Fixups, 678 uint32_t DestIdx) { 679 // Any NumToSkip fixups in the current scope can resolve to the 680 // current location. 681 for (FixupList::const_reverse_iterator I = Fixups.rbegin(), E = Fixups.rend(); 682 I != E; ++I) { 683 // Calculate the distance from the byte following the fixup entry byte 684 // to the destination. The Target is calculated from after the 16-bit 685 // NumToSkip entry itself, so subtract two from the displacement here 686 // to account for that. 687 uint32_t FixupIdx = *I; 688 uint32_t Delta = DestIdx - FixupIdx - 3; 689 // Our NumToSkip entries are 24-bits. Make sure our table isn't too 690 // big. 691 assert(Delta < (1u << 24)); 692 Table[FixupIdx] = (uint8_t)Delta; 693 Table[FixupIdx + 1] = (uint8_t)(Delta >> 8); 694 Table[FixupIdx + 2] = (uint8_t)(Delta >> 16); 695 } 696 } 697 698 // Emit table entries to decode instructions given a segment or segments 699 // of bits. 700 void Filter::emitTableEntry(DecoderTableInfo &TableInfo) const { 701 assert((NumBits < (1u << 8)) && "NumBits overflowed uint8 table entry!"); 702 TableInfo.Table.push_back(MCD::OPC_ExtractField); 703 704 SmallString<16> SBytes; 705 raw_svector_ostream S(SBytes); 706 encodeULEB128(StartBit, S); 707 TableInfo.Table.insert(TableInfo.Table.end(), SBytes.begin(), SBytes.end()); 708 TableInfo.Table.push_back(NumBits); 709 710 // A new filter entry begins a new scope for fixup resolution. 711 TableInfo.FixupStack.emplace_back(); 712 713 DecoderTable &Table = TableInfo.Table; 714 715 size_t PrevFilter = 0; 716 bool HasFallthrough = false; 717 for (const auto &Filter : FilterChooserMap) { 718 // Field value -1 implies a non-empty set of variable instructions. 719 // See also recurse(). 720 if (Filter.first == NO_FIXED_SEGMENTS_SENTINEL) { 721 HasFallthrough = true; 722 723 // Each scope should always have at least one filter value to check 724 // for. 725 assert(PrevFilter != 0 && "empty filter set!"); 726 FixupList &CurScope = TableInfo.FixupStack.back(); 727 // Resolve any NumToSkip fixups in the current scope. 728 resolveTableFixups(Table, CurScope, Table.size()); 729 CurScope.clear(); 730 PrevFilter = 0; // Don't re-process the filter's fallthrough. 731 } else { 732 Table.push_back(MCD::OPC_FilterValue); 733 // Encode and emit the value to filter against. 734 uint8_t Buffer[16]; 735 unsigned Len = encodeULEB128(Filter.first, Buffer); 736 Table.insert(Table.end(), Buffer, Buffer + Len); 737 // Reserve space for the NumToSkip entry. We'll backpatch the value 738 // later. 739 PrevFilter = Table.size(); 740 Table.push_back(0); 741 Table.push_back(0); 742 Table.push_back(0); 743 } 744 745 // We arrive at a category of instructions with the same segment value. 746 // Now delegate to the sub filter chooser for further decodings. 747 // The case may fallthrough, which happens if the remaining well-known 748 // encoding bits do not match exactly. 749 Filter.second->emitTableEntries(TableInfo); 750 751 // Now that we've emitted the body of the handler, update the NumToSkip 752 // of the filter itself to be able to skip forward when false. Subtract 753 // two as to account for the width of the NumToSkip field itself. 754 if (PrevFilter) { 755 uint32_t NumToSkip = Table.size() - PrevFilter - 3; 756 assert(NumToSkip < (1u << 24) && 757 "disassembler decoding table too large!"); 758 Table[PrevFilter] = (uint8_t)NumToSkip; 759 Table[PrevFilter + 1] = (uint8_t)(NumToSkip >> 8); 760 Table[PrevFilter + 2] = (uint8_t)(NumToSkip >> 16); 761 } 762 } 763 764 // Any remaining unresolved fixups bubble up to the parent fixup scope. 765 assert(TableInfo.FixupStack.size() > 1 && "fixup stack underflow!"); 766 FixupScopeList::iterator Source = TableInfo.FixupStack.end() - 1; 767 FixupScopeList::iterator Dest = Source - 1; 768 llvm::append_range(*Dest, *Source); 769 TableInfo.FixupStack.pop_back(); 770 771 // If there is no fallthrough, then the final filter should get fixed 772 // up according to the enclosing scope rather than the current position. 773 if (!HasFallthrough) 774 TableInfo.FixupStack.back().push_back(PrevFilter); 775 } 776 777 // Returns the number of fanout produced by the filter. More fanout implies 778 // the filter distinguishes more categories of instructions. 779 unsigned Filter::usefulness() const { 780 if (!VariableInstructions.empty()) 781 return FilteredInstructions.size(); 782 else 783 return FilteredInstructions.size() + 1; 784 } 785 786 ////////////////////////////////// 787 // // 788 // Filterchooser Implementation // 789 // // 790 ////////////////////////////////// 791 792 // Emit the decoder state machine table. 793 void DecoderEmitter::emitTable(formatted_raw_ostream &OS, DecoderTable &Table, 794 unsigned Indent, unsigned BitWidth, 795 StringRef Namespace, 796 const EncodingIDsVec &EncodingIDs) const { 797 // We'll need to be able to map from a decoded opcode into the corresponding 798 // EncodingID for this specific combination of BitWidth and Namespace. This 799 // is used below to index into NumberedEncodings. 800 DenseMap<unsigned, unsigned> OpcodeToEncodingID; 801 OpcodeToEncodingID.reserve(EncodingIDs.size()); 802 for (const auto &EI : EncodingIDs) 803 OpcodeToEncodingID[EI.Opcode] = EI.EncodingID; 804 805 OS.indent(Indent) << "static const uint8_t DecoderTable" << Namespace 806 << BitWidth << "[] = {\n"; 807 808 Indent += 2; 809 810 // Emit ULEB128 encoded value to OS, returning the number of bytes emitted. 811 auto emitULEB128 = [](DecoderTable::const_iterator I, 812 formatted_raw_ostream &OS) { 813 unsigned Len = 0; 814 while (*I >= 128) { 815 OS << (unsigned)*I++ << ", "; 816 Len++; 817 } 818 OS << (unsigned)*I++ << ", "; 819 return Len + 1; 820 }; 821 822 // Emit 24-bit numtoskip value to OS, returning the NumToSkip value. 823 auto emitNumToSkip = [](DecoderTable::const_iterator I, 824 formatted_raw_ostream &OS) { 825 uint8_t Byte = *I++; 826 uint32_t NumToSkip = Byte; 827 OS << (unsigned)Byte << ", "; 828 Byte = *I++; 829 OS << (unsigned)Byte << ", "; 830 NumToSkip |= Byte << 8; 831 Byte = *I++; 832 OS << utostr(Byte) << ", "; 833 NumToSkip |= Byte << 16; 834 return NumToSkip; 835 }; 836 837 // FIXME: We may be able to use the NumToSkip values to recover 838 // appropriate indentation levels. 839 DecoderTable::const_iterator I = Table.begin(); 840 DecoderTable::const_iterator E = Table.end(); 841 while (I != E) { 842 assert(I < E && "incomplete decode table entry!"); 843 844 uint64_t Pos = I - Table.begin(); 845 OS << "/* " << Pos << " */"; 846 OS.PadToColumn(12); 847 848 switch (*I) { 849 default: 850 PrintFatalError("invalid decode table opcode"); 851 case MCD::OPC_ExtractField: { 852 ++I; 853 OS.indent(Indent) << "MCD::OPC_ExtractField, "; 854 855 // ULEB128 encoded start value. 856 const char *ErrMsg = nullptr; 857 unsigned Start = decodeULEB128(Table.data() + Pos + 1, nullptr, 858 Table.data() + Table.size(), &ErrMsg); 859 assert(ErrMsg == nullptr && "ULEB128 value too large!"); 860 I += emitULEB128(I, OS); 861 862 unsigned Len = *I++; 863 OS << Len << ", // Inst{"; 864 if (Len > 1) 865 OS << (Start + Len - 1) << "-"; 866 OS << Start << "} ...\n"; 867 break; 868 } 869 case MCD::OPC_FilterValue: { 870 ++I; 871 OS.indent(Indent) << "MCD::OPC_FilterValue, "; 872 // The filter value is ULEB128 encoded. 873 I += emitULEB128(I, OS); 874 875 // 24-bit numtoskip value. 876 uint32_t NumToSkip = emitNumToSkip(I, OS); 877 I += 3; 878 OS << "// Skip to: " << ((I - Table.begin()) + NumToSkip) << "\n"; 879 break; 880 } 881 case MCD::OPC_CheckField: { 882 ++I; 883 OS.indent(Indent) << "MCD::OPC_CheckField, "; 884 // ULEB128 encoded start value. 885 I += emitULEB128(I, OS); 886 // 8-bit length. 887 unsigned Len = *I++; 888 OS << Len << ", "; 889 // ULEB128 encoded field value. 890 I += emitULEB128(I, OS); 891 892 // 24-bit numtoskip value. 893 uint32_t NumToSkip = emitNumToSkip(I, OS); 894 I += 3; 895 OS << "// Skip to: " << ((I - Table.begin()) + NumToSkip) << "\n"; 896 break; 897 } 898 case MCD::OPC_CheckPredicate: { 899 ++I; 900 OS.indent(Indent) << "MCD::OPC_CheckPredicate, "; 901 I += emitULEB128(I, OS); 902 903 // 24-bit numtoskip value. 904 uint32_t NumToSkip = emitNumToSkip(I, OS); 905 I += 3; 906 OS << "// Skip to: " << ((I - Table.begin()) + NumToSkip) << "\n"; 907 break; 908 } 909 case MCD::OPC_Decode: 910 case MCD::OPC_TryDecode: { 911 bool IsTry = *I == MCD::OPC_TryDecode; 912 ++I; 913 // Decode the Opcode value. 914 const char *ErrMsg = nullptr; 915 unsigned Opc = decodeULEB128(Table.data() + Pos + 1, nullptr, 916 Table.data() + Table.size(), &ErrMsg); 917 assert(ErrMsg == nullptr && "ULEB128 value too large!"); 918 919 OS.indent(Indent) << "MCD::OPC_" << (IsTry ? "Try" : "") << "Decode, "; 920 I += emitULEB128(I, OS); 921 922 // Decoder index. 923 I += emitULEB128(I, OS); 924 925 auto EncI = OpcodeToEncodingID.find(Opc); 926 assert(EncI != OpcodeToEncodingID.end() && "no encoding entry"); 927 auto EncodingID = EncI->second; 928 929 if (!IsTry) { 930 OS << "// Opcode: " << NumberedEncodings[EncodingID] << "\n"; 931 break; 932 } 933 934 // Fallthrough for OPC_TryDecode. 935 936 // 24-bit numtoskip value. 937 uint32_t NumToSkip = emitNumToSkip(I, OS); 938 I += 3; 939 940 OS << "// Opcode: " << NumberedEncodings[EncodingID] 941 << ", skip to: " << ((I - Table.begin()) + NumToSkip) << "\n"; 942 break; 943 } 944 case MCD::OPC_SoftFail: { 945 ++I; 946 OS.indent(Indent) << "MCD::OPC_SoftFail"; 947 // Positive mask 948 uint64_t Value = 0; 949 unsigned Shift = 0; 950 do { 951 OS << ", " << (unsigned)*I; 952 Value += ((uint64_t)(*I & 0x7f)) << Shift; 953 Shift += 7; 954 } while (*I++ >= 128); 955 if (Value > 127) { 956 OS << " /* 0x"; 957 OS.write_hex(Value); 958 OS << " */"; 959 } 960 // Negative mask 961 Value = 0; 962 Shift = 0; 963 do { 964 OS << ", " << (unsigned)*I; 965 Value += ((uint64_t)(*I & 0x7f)) << Shift; 966 Shift += 7; 967 } while (*I++ >= 128); 968 if (Value > 127) { 969 OS << " /* 0x"; 970 OS.write_hex(Value); 971 OS << " */"; 972 } 973 OS << ",\n"; 974 break; 975 } 976 case MCD::OPC_Fail: { 977 ++I; 978 OS.indent(Indent) << "MCD::OPC_Fail,\n"; 979 break; 980 } 981 } 982 } 983 OS.indent(Indent) << "0\n"; 984 985 Indent -= 2; 986 987 OS.indent(Indent) << "};\n\n"; 988 } 989 990 void DecoderEmitter::emitInstrLenTable(formatted_raw_ostream &OS, 991 std::vector<unsigned> &InstrLen) const { 992 OS << "static const uint8_t InstrLenTable[] = {\n"; 993 for (unsigned &Len : InstrLen) { 994 OS << Len << ",\n"; 995 } 996 OS << "};\n\n"; 997 } 998 999 void DecoderEmitter::emitPredicateFunction(formatted_raw_ostream &OS, 1000 PredicateSet &Predicates, 1001 unsigned Indent) const { 1002 // The predicate function is just a big switch statement based on the 1003 // input predicate index. 1004 OS.indent(Indent) << "static bool checkDecoderPredicate(unsigned Idx, " 1005 << "const FeatureBitset &Bits) {\n"; 1006 Indent += 2; 1007 if (!Predicates.empty()) { 1008 OS.indent(Indent) << "switch (Idx) {\n"; 1009 OS.indent(Indent) << "default: llvm_unreachable(\"Invalid index!\");\n"; 1010 unsigned Index = 0; 1011 for (const auto &Predicate : Predicates) { 1012 OS.indent(Indent) << "case " << Index++ << ":\n"; 1013 OS.indent(Indent + 2) << "return (" << Predicate << ");\n"; 1014 } 1015 OS.indent(Indent) << "}\n"; 1016 } else { 1017 // No case statement to emit 1018 OS.indent(Indent) << "llvm_unreachable(\"Invalid index!\");\n"; 1019 } 1020 Indent -= 2; 1021 OS.indent(Indent) << "}\n\n"; 1022 } 1023 1024 void DecoderEmitter::emitDecoderFunction(formatted_raw_ostream &OS, 1025 DecoderSet &Decoders, 1026 unsigned Indent) const { 1027 // The decoder function is just a big switch statement based on the 1028 // input decoder index. 1029 OS.indent(Indent) << "template <typename InsnType>\n"; 1030 OS.indent(Indent) << "static DecodeStatus decodeToMCInst(DecodeStatus S," 1031 << " unsigned Idx, InsnType insn, MCInst &MI,\n"; 1032 OS.indent(Indent) 1033 << " uint64_t " 1034 << "Address, const MCDisassembler *Decoder, bool &DecodeComplete) {\n"; 1035 Indent += 2; 1036 OS.indent(Indent) << "DecodeComplete = true;\n"; 1037 // TODO: When InsnType is large, using uint64_t limits all fields to 64 bits 1038 // It would be better for emitBinaryParser to use a 64-bit tmp whenever 1039 // possible but fall back to an InsnType-sized tmp for truly large fields. 1040 OS.indent(Indent) << "using TmpType = " 1041 "std::conditional_t<std::is_integral<InsnType>::" 1042 "value, InsnType, uint64_t>;\n"; 1043 OS.indent(Indent) << "TmpType tmp;\n"; 1044 OS.indent(Indent) << "switch (Idx) {\n"; 1045 OS.indent(Indent) << "default: llvm_unreachable(\"Invalid index!\");\n"; 1046 unsigned Index = 0; 1047 for (const auto &Decoder : Decoders) { 1048 OS.indent(Indent) << "case " << Index++ << ":\n"; 1049 OS << Decoder; 1050 OS.indent(Indent + 2) << "return S;\n"; 1051 } 1052 OS.indent(Indent) << "}\n"; 1053 Indent -= 2; 1054 OS.indent(Indent) << "}\n"; 1055 } 1056 1057 // Populates the field of the insn given the start position and the number of 1058 // consecutive bits to scan for. 1059 // 1060 // Returns a pair of values (indicator, field), where the indicator is false 1061 // if there exists any uninitialized bit value in the range and true if all 1062 // bits are well-known. The second value is the potentially populated field. 1063 std::pair<bool, uint64_t> FilterChooser::fieldFromInsn(const insn_t &Insn, 1064 unsigned StartBit, 1065 unsigned NumBits) const { 1066 uint64_t Field = 0; 1067 1068 for (unsigned i = 0; i < NumBits; ++i) { 1069 if (Insn[StartBit + i] == BIT_UNSET) 1070 return {false, Field}; 1071 1072 if (Insn[StartBit + i] == BIT_TRUE) 1073 Field = Field | (1ULL << i); 1074 } 1075 1076 return {true, Field}; 1077 } 1078 1079 /// dumpFilterArray - dumpFilterArray prints out debugging info for the given 1080 /// filter array as a series of chars. 1081 void FilterChooser::dumpFilterArray( 1082 raw_ostream &OS, const std::vector<bit_value_t> &filter) const { 1083 for (unsigned bitIndex = BitWidth; bitIndex > 0; bitIndex--) { 1084 switch (filter[bitIndex - 1]) { 1085 case BIT_UNFILTERED: 1086 OS << "."; 1087 break; 1088 case BIT_UNSET: 1089 OS << "_"; 1090 break; 1091 case BIT_TRUE: 1092 OS << "1"; 1093 break; 1094 case BIT_FALSE: 1095 OS << "0"; 1096 break; 1097 } 1098 } 1099 } 1100 1101 /// dumpStack - dumpStack traverses the filter chooser chain and calls 1102 /// dumpFilterArray on each filter chooser up to the top level one. 1103 void FilterChooser::dumpStack(raw_ostream &OS, const char *prefix) const { 1104 const FilterChooser *current = this; 1105 1106 while (current) { 1107 OS << prefix; 1108 dumpFilterArray(OS, current->FilterBitValues); 1109 OS << '\n'; 1110 current = current->Parent; 1111 } 1112 } 1113 1114 // Calculates the island(s) needed to decode the instruction. 1115 // This returns a list of undecoded bits of an instructions, for example, 1116 // Inst{20} = 1 && Inst{3-0} == 0b1111 represents two islands of yet-to-be 1117 // decoded bits in order to verify that the instruction matches the Opcode. 1118 unsigned FilterChooser::getIslands(std::vector<unsigned> &StartBits, 1119 std::vector<unsigned> &EndBits, 1120 std::vector<uint64_t> &FieldVals, 1121 const insn_t &Insn) const { 1122 unsigned Num, BitNo; 1123 Num = BitNo = 0; 1124 1125 uint64_t FieldVal = 0; 1126 1127 // 0: Init 1128 // 1: Water (the bit value does not affect decoding) 1129 // 2: Island (well-known bit value needed for decoding) 1130 int State = 0; 1131 1132 for (unsigned i = 0; i < BitWidth; ++i) { 1133 int64_t Val = Value(Insn[i]); 1134 bool Filtered = PositionFiltered(i); 1135 switch (State) { 1136 default: 1137 llvm_unreachable("Unreachable code!"); 1138 case 0: 1139 case 1: 1140 if (Filtered || Val == -1) 1141 State = 1; // Still in Water 1142 else { 1143 State = 2; // Into the Island 1144 BitNo = 0; 1145 StartBits.push_back(i); 1146 FieldVal = Val; 1147 } 1148 break; 1149 case 2: 1150 if (Filtered || Val == -1) { 1151 State = 1; // Into the Water 1152 EndBits.push_back(i - 1); 1153 FieldVals.push_back(FieldVal); 1154 ++Num; 1155 } else { 1156 State = 2; // Still in Island 1157 ++BitNo; 1158 FieldVal = FieldVal | Val << BitNo; 1159 } 1160 break; 1161 } 1162 } 1163 // If we are still in Island after the loop, do some housekeeping. 1164 if (State == 2) { 1165 EndBits.push_back(BitWidth - 1); 1166 FieldVals.push_back(FieldVal); 1167 ++Num; 1168 } 1169 1170 assert(StartBits.size() == Num && EndBits.size() == Num && 1171 FieldVals.size() == Num); 1172 return Num; 1173 } 1174 1175 void FilterChooser::emitBinaryParser(raw_ostream &OS, unsigned Indent, 1176 const OperandInfo &OpInfo, 1177 bool &OpHasCompleteDecoder) const { 1178 const std::string &Decoder = OpInfo.Decoder; 1179 1180 bool UseInsertBits = OpInfo.numFields() != 1 || OpInfo.InitValue != 0; 1181 1182 if (UseInsertBits) { 1183 OS.indent(Indent) << "tmp = 0x"; 1184 OS.write_hex(OpInfo.InitValue); 1185 OS << ";\n"; 1186 } 1187 1188 for (const EncodingField &EF : OpInfo) { 1189 OS.indent(Indent); 1190 if (UseInsertBits) 1191 OS << "insertBits(tmp, "; 1192 else 1193 OS << "tmp = "; 1194 OS << "fieldFromInstruction(insn, " << EF.Base << ", " << EF.Width << ')'; 1195 if (UseInsertBits) 1196 OS << ", " << EF.Offset << ", " << EF.Width << ')'; 1197 else if (EF.Offset != 0) 1198 OS << " << " << EF.Offset; 1199 OS << ";\n"; 1200 } 1201 1202 if (Decoder != "") { 1203 OpHasCompleteDecoder = OpInfo.HasCompleteDecoder; 1204 OS.indent(Indent) << "if (!Check(S, " << Decoder 1205 << "(MI, tmp, Address, Decoder))) { " 1206 << (OpHasCompleteDecoder ? "" 1207 : "DecodeComplete = false; ") 1208 << "return MCDisassembler::Fail; }\n"; 1209 } else { 1210 OpHasCompleteDecoder = true; 1211 OS.indent(Indent) << "MI.addOperand(MCOperand::createImm(tmp));\n"; 1212 } 1213 } 1214 1215 void FilterChooser::emitDecoder(raw_ostream &OS, unsigned Indent, unsigned Opc, 1216 bool &HasCompleteDecoder) const { 1217 HasCompleteDecoder = true; 1218 1219 for (const auto &Op : Operands.find(Opc)->second) { 1220 // If a custom instruction decoder was specified, use that. 1221 if (Op.numFields() == 0 && !Op.Decoder.empty()) { 1222 HasCompleteDecoder = Op.HasCompleteDecoder; 1223 OS.indent(Indent) << "if (!Check(S, " << Op.Decoder 1224 << "(MI, insn, Address, Decoder))) { " 1225 << (HasCompleteDecoder ? "" 1226 : "DecodeComplete = false; ") 1227 << "return MCDisassembler::Fail; }\n"; 1228 break; 1229 } 1230 1231 bool OpHasCompleteDecoder; 1232 emitBinaryParser(OS, Indent, Op, OpHasCompleteDecoder); 1233 if (!OpHasCompleteDecoder) 1234 HasCompleteDecoder = false; 1235 } 1236 } 1237 1238 unsigned FilterChooser::getDecoderIndex(DecoderSet &Decoders, unsigned Opc, 1239 bool &HasCompleteDecoder) const { 1240 // Build up the predicate string. 1241 SmallString<256> Decoder; 1242 // FIXME: emitDecoder() function can take a buffer directly rather than 1243 // a stream. 1244 raw_svector_ostream S(Decoder); 1245 unsigned I = 4; 1246 emitDecoder(S, I, Opc, HasCompleteDecoder); 1247 1248 // Using the full decoder string as the key value here is a bit 1249 // heavyweight, but is effective. If the string comparisons become a 1250 // performance concern, we can implement a mangling of the predicate 1251 // data easily enough with a map back to the actual string. That's 1252 // overkill for now, though. 1253 1254 // Make sure the predicate is in the table. 1255 Decoders.insert(CachedHashString(Decoder)); 1256 // Now figure out the index for when we write out the table. 1257 DecoderSet::const_iterator P = find(Decoders, Decoder.str()); 1258 return (unsigned)(P - Decoders.begin()); 1259 } 1260 1261 // If ParenIfBinOp is true, print a surrounding () if Val uses && or ||. 1262 bool FilterChooser::emitPredicateMatchAux(const Init &Val, bool ParenIfBinOp, 1263 raw_ostream &OS) const { 1264 if (const auto *D = dyn_cast<DefInit>(&Val)) { 1265 if (!D->getDef()->isSubClassOf("SubtargetFeature")) 1266 return true; 1267 OS << "Bits[" << Emitter->PredicateNamespace << "::" << D->getAsString() 1268 << "]"; 1269 return false; 1270 } 1271 if (const auto *D = dyn_cast<DagInit>(&Val)) { 1272 std::string Op = D->getOperator()->getAsString(); 1273 if (Op == "not" && D->getNumArgs() == 1) { 1274 OS << '!'; 1275 return emitPredicateMatchAux(*D->getArg(0), true, OS); 1276 } 1277 if ((Op == "any_of" || Op == "all_of") && D->getNumArgs() > 0) { 1278 bool Paren = D->getNumArgs() > 1 && std::exchange(ParenIfBinOp, true); 1279 if (Paren) 1280 OS << '('; 1281 ListSeparator LS(Op == "any_of" ? " || " : " && "); 1282 for (auto *Arg : D->getArgs()) { 1283 OS << LS; 1284 if (emitPredicateMatchAux(*Arg, ParenIfBinOp, OS)) 1285 return true; 1286 } 1287 if (Paren) 1288 OS << ')'; 1289 return false; 1290 } 1291 } 1292 return true; 1293 } 1294 1295 bool FilterChooser::emitPredicateMatch(raw_ostream &OS, unsigned Opc) const { 1296 ListInit *Predicates = 1297 AllInstructions[Opc].EncodingDef->getValueAsListInit("Predicates"); 1298 bool IsFirstEmission = true; 1299 for (unsigned i = 0; i < Predicates->size(); ++i) { 1300 const Record *Pred = Predicates->getElementAsRecord(i); 1301 if (!Pred->getValue("AssemblerMatcherPredicate")) 1302 continue; 1303 1304 if (!isa<DagInit>(Pred->getValue("AssemblerCondDag")->getValue())) 1305 continue; 1306 1307 if (!IsFirstEmission) 1308 OS << " && "; 1309 if (emitPredicateMatchAux(*Pred->getValueAsDag("AssemblerCondDag"), 1310 Predicates->size() > 1, OS)) 1311 PrintFatalError(Pred->getLoc(), "Invalid AssemblerCondDag!"); 1312 IsFirstEmission = false; 1313 } 1314 return !Predicates->empty(); 1315 } 1316 1317 bool FilterChooser::doesOpcodeNeedPredicate(unsigned Opc) const { 1318 const ListInit *Predicates = 1319 AllInstructions[Opc].EncodingDef->getValueAsListInit("Predicates"); 1320 for (unsigned i = 0; i < Predicates->size(); ++i) { 1321 const Record *Pred = Predicates->getElementAsRecord(i); 1322 if (!Pred->getValue("AssemblerMatcherPredicate")) 1323 continue; 1324 1325 if (isa<DagInit>(Pred->getValue("AssemblerCondDag")->getValue())) 1326 return true; 1327 } 1328 return false; 1329 } 1330 1331 unsigned FilterChooser::getPredicateIndex(DecoderTableInfo &TableInfo, 1332 StringRef Predicate) const { 1333 // Using the full predicate string as the key value here is a bit 1334 // heavyweight, but is effective. If the string comparisons become a 1335 // performance concern, we can implement a mangling of the predicate 1336 // data easily enough with a map back to the actual string. That's 1337 // overkill for now, though. 1338 1339 // Make sure the predicate is in the table. 1340 TableInfo.Predicates.insert(CachedHashString(Predicate)); 1341 // Now figure out the index for when we write out the table. 1342 PredicateSet::const_iterator P = find(TableInfo.Predicates, Predicate); 1343 return (unsigned)(P - TableInfo.Predicates.begin()); 1344 } 1345 1346 void FilterChooser::emitPredicateTableEntry(DecoderTableInfo &TableInfo, 1347 unsigned Opc) const { 1348 if (!doesOpcodeNeedPredicate(Opc)) 1349 return; 1350 1351 // Build up the predicate string. 1352 SmallString<256> Predicate; 1353 // FIXME: emitPredicateMatch() functions can take a buffer directly rather 1354 // than a stream. 1355 raw_svector_ostream PS(Predicate); 1356 emitPredicateMatch(PS, Opc); 1357 1358 // Figure out the index into the predicate table for the predicate just 1359 // computed. 1360 unsigned PIdx = getPredicateIndex(TableInfo, PS.str()); 1361 SmallString<16> PBytes; 1362 raw_svector_ostream S(PBytes); 1363 encodeULEB128(PIdx, S); 1364 1365 TableInfo.Table.push_back(MCD::OPC_CheckPredicate); 1366 // Predicate index. 1367 for (const auto PB : PBytes) 1368 TableInfo.Table.push_back(PB); 1369 // Push location for NumToSkip backpatching. 1370 TableInfo.FixupStack.back().push_back(TableInfo.Table.size()); 1371 TableInfo.Table.push_back(0); 1372 TableInfo.Table.push_back(0); 1373 TableInfo.Table.push_back(0); 1374 } 1375 1376 void FilterChooser::emitSoftFailTableEntry(DecoderTableInfo &TableInfo, 1377 unsigned Opc) const { 1378 const Record *EncodingDef = AllInstructions[Opc].EncodingDef; 1379 const RecordVal *RV = EncodingDef->getValue("SoftFail"); 1380 BitsInit *SFBits = RV ? dyn_cast<BitsInit>(RV->getValue()) : nullptr; 1381 1382 if (!SFBits) 1383 return; 1384 BitsInit *InstBits = EncodingDef->getValueAsBitsInit("Inst"); 1385 1386 APInt PositiveMask(BitWidth, 0ULL); 1387 APInt NegativeMask(BitWidth, 0ULL); 1388 for (unsigned i = 0; i < BitWidth; ++i) { 1389 bit_value_t B = bitFromBits(*SFBits, i); 1390 bit_value_t IB = bitFromBits(*InstBits, i); 1391 1392 if (B != BIT_TRUE) 1393 continue; 1394 1395 switch (IB) { 1396 case BIT_FALSE: 1397 // The bit is meant to be false, so emit a check to see if it is true. 1398 PositiveMask.setBit(i); 1399 break; 1400 case BIT_TRUE: 1401 // The bit is meant to be true, so emit a check to see if it is false. 1402 NegativeMask.setBit(i); 1403 break; 1404 default: 1405 // The bit is not set; this must be an error! 1406 errs() << "SoftFail Conflict: bit SoftFail{" << i << "} in " 1407 << AllInstructions[Opc] << " is set but Inst{" << i 1408 << "} is unset!\n" 1409 << " - You can only mark a bit as SoftFail if it is fully defined" 1410 << " (1/0 - not '?') in Inst\n"; 1411 return; 1412 } 1413 } 1414 1415 bool NeedPositiveMask = PositiveMask.getBoolValue(); 1416 bool NeedNegativeMask = NegativeMask.getBoolValue(); 1417 1418 if (!NeedPositiveMask && !NeedNegativeMask) 1419 return; 1420 1421 TableInfo.Table.push_back(MCD::OPC_SoftFail); 1422 1423 SmallString<16> MaskBytes; 1424 raw_svector_ostream S(MaskBytes); 1425 if (NeedPositiveMask) { 1426 encodeULEB128(PositiveMask.getZExtValue(), S); 1427 for (unsigned i = 0, e = MaskBytes.size(); i != e; ++i) 1428 TableInfo.Table.push_back(MaskBytes[i]); 1429 } else 1430 TableInfo.Table.push_back(0); 1431 if (NeedNegativeMask) { 1432 MaskBytes.clear(); 1433 encodeULEB128(NegativeMask.getZExtValue(), S); 1434 for (unsigned i = 0, e = MaskBytes.size(); i != e; ++i) 1435 TableInfo.Table.push_back(MaskBytes[i]); 1436 } else 1437 TableInfo.Table.push_back(0); 1438 } 1439 1440 // Emits table entries to decode the singleton. 1441 void FilterChooser::emitSingletonTableEntry(DecoderTableInfo &TableInfo, 1442 EncodingIDAndOpcode Opc) const { 1443 std::vector<unsigned> StartBits; 1444 std::vector<unsigned> EndBits; 1445 std::vector<uint64_t> FieldVals; 1446 insn_t Insn; 1447 insnWithID(Insn, Opc.EncodingID); 1448 1449 // Look for islands of undecoded bits of the singleton. 1450 getIslands(StartBits, EndBits, FieldVals, Insn); 1451 1452 unsigned Size = StartBits.size(); 1453 1454 // Emit the predicate table entry if one is needed. 1455 emitPredicateTableEntry(TableInfo, Opc.EncodingID); 1456 1457 // Check any additional encoding fields needed. 1458 for (unsigned I = Size; I != 0; --I) { 1459 unsigned NumBits = EndBits[I - 1] - StartBits[I - 1] + 1; 1460 assert((NumBits < (1u << 8)) && "NumBits overflowed uint8 table entry!"); 1461 TableInfo.Table.push_back(MCD::OPC_CheckField); 1462 uint8_t Buffer[16], *P; 1463 encodeULEB128(StartBits[I - 1], Buffer); 1464 for (P = Buffer; *P >= 128; ++P) 1465 TableInfo.Table.push_back(*P); 1466 TableInfo.Table.push_back(*P); 1467 TableInfo.Table.push_back(NumBits); 1468 encodeULEB128(FieldVals[I - 1], Buffer); 1469 for (P = Buffer; *P >= 128; ++P) 1470 TableInfo.Table.push_back(*P); 1471 TableInfo.Table.push_back(*P); 1472 // Push location for NumToSkip backpatching. 1473 TableInfo.FixupStack.back().push_back(TableInfo.Table.size()); 1474 // The fixup is always 24-bits, so go ahead and allocate the space 1475 // in the table so all our relative position calculations work OK even 1476 // before we fully resolve the real value here. 1477 TableInfo.Table.push_back(0); 1478 TableInfo.Table.push_back(0); 1479 TableInfo.Table.push_back(0); 1480 } 1481 1482 // Check for soft failure of the match. 1483 emitSoftFailTableEntry(TableInfo, Opc.EncodingID); 1484 1485 bool HasCompleteDecoder; 1486 unsigned DIdx = 1487 getDecoderIndex(TableInfo.Decoders, Opc.EncodingID, HasCompleteDecoder); 1488 1489 // Produce OPC_Decode or OPC_TryDecode opcode based on the information 1490 // whether the instruction decoder is complete or not. If it is complete 1491 // then it handles all possible values of remaining variable/unfiltered bits 1492 // and for any value can determine if the bitpattern is a valid instruction 1493 // or not. This means OPC_Decode will be the final step in the decoding 1494 // process. If it is not complete, then the Fail return code from the 1495 // decoder method indicates that additional processing should be done to see 1496 // if there is any other instruction that also matches the bitpattern and 1497 // can decode it. 1498 TableInfo.Table.push_back(HasCompleteDecoder ? MCD::OPC_Decode 1499 : MCD::OPC_TryDecode); 1500 NumEncodingsSupported++; 1501 uint8_t Buffer[16], *p; 1502 encodeULEB128(Opc.Opcode, Buffer); 1503 for (p = Buffer; *p >= 128; ++p) 1504 TableInfo.Table.push_back(*p); 1505 TableInfo.Table.push_back(*p); 1506 1507 SmallString<16> Bytes; 1508 raw_svector_ostream S(Bytes); 1509 encodeULEB128(DIdx, S); 1510 1511 // Decoder index. 1512 for (const auto B : Bytes) 1513 TableInfo.Table.push_back(B); 1514 1515 if (!HasCompleteDecoder) { 1516 // Push location for NumToSkip backpatching. 1517 TableInfo.FixupStack.back().push_back(TableInfo.Table.size()); 1518 // Allocate the space for the fixup. 1519 TableInfo.Table.push_back(0); 1520 TableInfo.Table.push_back(0); 1521 TableInfo.Table.push_back(0); 1522 } 1523 } 1524 1525 // Emits table entries to decode the singleton, and then to decode the rest. 1526 void FilterChooser::emitSingletonTableEntry(DecoderTableInfo &TableInfo, 1527 const Filter &Best) const { 1528 EncodingIDAndOpcode Opc = Best.getSingletonOpc(); 1529 1530 // complex singletons need predicate checks from the first singleton 1531 // to refer forward to the variable filterchooser that follows. 1532 TableInfo.FixupStack.emplace_back(); 1533 1534 emitSingletonTableEntry(TableInfo, Opc); 1535 1536 resolveTableFixups(TableInfo.Table, TableInfo.FixupStack.back(), 1537 TableInfo.Table.size()); 1538 TableInfo.FixupStack.pop_back(); 1539 1540 Best.getVariableFC().emitTableEntries(TableInfo); 1541 } 1542 1543 // Assign a single filter and run with it. Top level API client can initialize 1544 // with a single filter to start the filtering process. 1545 void FilterChooser::runSingleFilter(unsigned startBit, unsigned numBit, 1546 bool mixed) { 1547 Filters.clear(); 1548 Filters.emplace_back(*this, startBit, numBit, true); 1549 BestIndex = 0; // Sole Filter instance to choose from. 1550 bestFilter().recurse(); 1551 } 1552 1553 // reportRegion is a helper function for filterProcessor to mark a region as 1554 // eligible for use as a filter region. 1555 void FilterChooser::reportRegion(bitAttr_t RA, unsigned StartBit, 1556 unsigned BitIndex, bool AllowMixed) { 1557 if (RA == ATTR_MIXED && AllowMixed) 1558 Filters.emplace_back(*this, StartBit, BitIndex - StartBit, true); 1559 else if (RA == ATTR_ALL_SET && !AllowMixed) 1560 Filters.emplace_back(*this, StartBit, BitIndex - StartBit, false); 1561 } 1562 1563 // FilterProcessor scans the well-known encoding bits of the instructions and 1564 // builds up a list of candidate filters. It chooses the best filter and 1565 // recursively descends down the decoding tree. 1566 bool FilterChooser::filterProcessor(bool AllowMixed, bool Greedy) { 1567 Filters.clear(); 1568 BestIndex = -1; 1569 unsigned numInstructions = Opcodes.size(); 1570 1571 assert(numInstructions && "Filter created with no instructions"); 1572 1573 // No further filtering is necessary. 1574 if (numInstructions == 1) 1575 return true; 1576 1577 // Heuristics. See also doFilter()'s "Heuristics" comment when num of 1578 // instructions is 3. 1579 if (AllowMixed && !Greedy) { 1580 assert(numInstructions == 3); 1581 1582 for (const auto &Opcode : Opcodes) { 1583 std::vector<unsigned> StartBits; 1584 std::vector<unsigned> EndBits; 1585 std::vector<uint64_t> FieldVals; 1586 insn_t Insn; 1587 1588 insnWithID(Insn, Opcode.EncodingID); 1589 1590 // Look for islands of undecoded bits of any instruction. 1591 if (getIslands(StartBits, EndBits, FieldVals, Insn) > 0) { 1592 // Found an instruction with island(s). Now just assign a filter. 1593 runSingleFilter(StartBits[0], EndBits[0] - StartBits[0] + 1, true); 1594 return true; 1595 } 1596 } 1597 } 1598 1599 unsigned BitIndex; 1600 1601 // We maintain BIT_WIDTH copies of the bitAttrs automaton. 1602 // The automaton consumes the corresponding bit from each 1603 // instruction. 1604 // 1605 // Input symbols: 0, 1, and _ (unset). 1606 // States: NONE, FILTERED, ALL_SET, ALL_UNSET, and MIXED. 1607 // Initial state: NONE. 1608 // 1609 // (NONE) ------- [01] -> (ALL_SET) 1610 // (NONE) ------- _ ----> (ALL_UNSET) 1611 // (ALL_SET) ---- [01] -> (ALL_SET) 1612 // (ALL_SET) ---- _ ----> (MIXED) 1613 // (ALL_UNSET) -- [01] -> (MIXED) 1614 // (ALL_UNSET) -- _ ----> (ALL_UNSET) 1615 // (MIXED) ------ . ----> (MIXED) 1616 // (FILTERED)---- . ----> (FILTERED) 1617 1618 std::vector<bitAttr_t> bitAttrs; 1619 1620 // FILTERED bit positions provide no entropy and are not worthy of pursuing. 1621 // Filter::recurse() set either BIT_TRUE or BIT_FALSE for each position. 1622 for (BitIndex = 0; BitIndex < BitWidth; ++BitIndex) 1623 if (FilterBitValues[BitIndex] == BIT_TRUE || 1624 FilterBitValues[BitIndex] == BIT_FALSE) 1625 bitAttrs.push_back(ATTR_FILTERED); 1626 else 1627 bitAttrs.push_back(ATTR_NONE); 1628 1629 for (const auto &OpcPair : Opcodes) { 1630 insn_t insn; 1631 1632 insnWithID(insn, OpcPair.EncodingID); 1633 1634 for (BitIndex = 0; BitIndex < BitWidth; ++BitIndex) { 1635 switch (bitAttrs[BitIndex]) { 1636 case ATTR_NONE: 1637 if (insn[BitIndex] == BIT_UNSET) 1638 bitAttrs[BitIndex] = ATTR_ALL_UNSET; 1639 else 1640 bitAttrs[BitIndex] = ATTR_ALL_SET; 1641 break; 1642 case ATTR_ALL_SET: 1643 if (insn[BitIndex] == BIT_UNSET) 1644 bitAttrs[BitIndex] = ATTR_MIXED; 1645 break; 1646 case ATTR_ALL_UNSET: 1647 if (insn[BitIndex] != BIT_UNSET) 1648 bitAttrs[BitIndex] = ATTR_MIXED; 1649 break; 1650 case ATTR_MIXED: 1651 case ATTR_FILTERED: 1652 break; 1653 } 1654 } 1655 } 1656 1657 // The regionAttr automaton consumes the bitAttrs automatons' state, 1658 // lowest-to-highest. 1659 // 1660 // Input symbols: F(iltered), (all_)S(et), (all_)U(nset), M(ixed) 1661 // States: NONE, ALL_SET, MIXED 1662 // Initial state: NONE 1663 // 1664 // (NONE) ----- F --> (NONE) 1665 // (NONE) ----- S --> (ALL_SET) ; and set region start 1666 // (NONE) ----- U --> (NONE) 1667 // (NONE) ----- M --> (MIXED) ; and set region start 1668 // (ALL_SET) -- F --> (NONE) ; and report an ALL_SET region 1669 // (ALL_SET) -- S --> (ALL_SET) 1670 // (ALL_SET) -- U --> (NONE) ; and report an ALL_SET region 1671 // (ALL_SET) -- M --> (MIXED) ; and report an ALL_SET region 1672 // (MIXED) ---- F --> (NONE) ; and report a MIXED region 1673 // (MIXED) ---- S --> (ALL_SET) ; and report a MIXED region 1674 // (MIXED) ---- U --> (NONE) ; and report a MIXED region 1675 // (MIXED) ---- M --> (MIXED) 1676 1677 bitAttr_t RA = ATTR_NONE; 1678 unsigned StartBit = 0; 1679 1680 for (BitIndex = 0; BitIndex < BitWidth; ++BitIndex) { 1681 bitAttr_t bitAttr = bitAttrs[BitIndex]; 1682 1683 assert(bitAttr != ATTR_NONE && "Bit without attributes"); 1684 1685 switch (RA) { 1686 case ATTR_NONE: 1687 switch (bitAttr) { 1688 case ATTR_FILTERED: 1689 break; 1690 case ATTR_ALL_SET: 1691 StartBit = BitIndex; 1692 RA = ATTR_ALL_SET; 1693 break; 1694 case ATTR_ALL_UNSET: 1695 break; 1696 case ATTR_MIXED: 1697 StartBit = BitIndex; 1698 RA = ATTR_MIXED; 1699 break; 1700 default: 1701 llvm_unreachable("Unexpected bitAttr!"); 1702 } 1703 break; 1704 case ATTR_ALL_SET: 1705 switch (bitAttr) { 1706 case ATTR_FILTERED: 1707 reportRegion(RA, StartBit, BitIndex, AllowMixed); 1708 RA = ATTR_NONE; 1709 break; 1710 case ATTR_ALL_SET: 1711 break; 1712 case ATTR_ALL_UNSET: 1713 reportRegion(RA, StartBit, BitIndex, AllowMixed); 1714 RA = ATTR_NONE; 1715 break; 1716 case ATTR_MIXED: 1717 reportRegion(RA, StartBit, BitIndex, AllowMixed); 1718 StartBit = BitIndex; 1719 RA = ATTR_MIXED; 1720 break; 1721 default: 1722 llvm_unreachable("Unexpected bitAttr!"); 1723 } 1724 break; 1725 case ATTR_MIXED: 1726 switch (bitAttr) { 1727 case ATTR_FILTERED: 1728 reportRegion(RA, StartBit, BitIndex, AllowMixed); 1729 StartBit = BitIndex; 1730 RA = ATTR_NONE; 1731 break; 1732 case ATTR_ALL_SET: 1733 reportRegion(RA, StartBit, BitIndex, AllowMixed); 1734 StartBit = BitIndex; 1735 RA = ATTR_ALL_SET; 1736 break; 1737 case ATTR_ALL_UNSET: 1738 reportRegion(RA, StartBit, BitIndex, AllowMixed); 1739 RA = ATTR_NONE; 1740 break; 1741 case ATTR_MIXED: 1742 break; 1743 default: 1744 llvm_unreachable("Unexpected bitAttr!"); 1745 } 1746 break; 1747 case ATTR_ALL_UNSET: 1748 llvm_unreachable("regionAttr state machine has no ATTR_UNSET state"); 1749 case ATTR_FILTERED: 1750 llvm_unreachable("regionAttr state machine has no ATTR_FILTERED state"); 1751 } 1752 } 1753 1754 // At the end, if we're still in ALL_SET or MIXED states, report a region 1755 switch (RA) { 1756 case ATTR_NONE: 1757 break; 1758 case ATTR_FILTERED: 1759 break; 1760 case ATTR_ALL_SET: 1761 reportRegion(RA, StartBit, BitIndex, AllowMixed); 1762 break; 1763 case ATTR_ALL_UNSET: 1764 break; 1765 case ATTR_MIXED: 1766 reportRegion(RA, StartBit, BitIndex, AllowMixed); 1767 break; 1768 } 1769 1770 // We have finished with the filter processings. Now it's time to choose 1771 // the best performing filter. 1772 BestIndex = 0; 1773 bool AllUseless = true; 1774 unsigned BestScore = 0; 1775 1776 for (const auto &[Idx, Filter] : enumerate(Filters)) { 1777 unsigned Usefulness = Filter.usefulness(); 1778 1779 if (Usefulness) 1780 AllUseless = false; 1781 1782 if (Usefulness > BestScore) { 1783 BestIndex = Idx; 1784 BestScore = Usefulness; 1785 } 1786 } 1787 1788 if (!AllUseless) 1789 bestFilter().recurse(); 1790 1791 return !AllUseless; 1792 } // end of FilterChooser::filterProcessor(bool) 1793 1794 // Decides on the best configuration of filter(s) to use in order to decode 1795 // the instructions. A conflict of instructions may occur, in which case we 1796 // dump the conflict set to the standard error. 1797 void FilterChooser::doFilter() { 1798 unsigned Num = Opcodes.size(); 1799 assert(Num && "FilterChooser created with no instructions"); 1800 1801 // Try regions of consecutive known bit values first. 1802 if (filterProcessor(false)) 1803 return; 1804 1805 // Then regions of mixed bits (both known and unitialized bit values allowed). 1806 if (filterProcessor(true)) 1807 return; 1808 1809 // Heuristics to cope with conflict set {t2CMPrs, t2SUBSrr, t2SUBSrs} where 1810 // no single instruction for the maximum ATTR_MIXED region Inst{14-4} has a 1811 // well-known encoding pattern. In such case, we backtrack and scan for the 1812 // the very first consecutive ATTR_ALL_SET region and assign a filter to it. 1813 if (Num == 3 && filterProcessor(true, false)) 1814 return; 1815 1816 // If we come to here, the instruction decoding has failed. 1817 // Set the BestIndex to -1 to indicate so. 1818 BestIndex = -1; 1819 } 1820 1821 // emitTableEntries - Emit state machine entries to decode our share of 1822 // instructions. 1823 void FilterChooser::emitTableEntries(DecoderTableInfo &TableInfo) const { 1824 if (Opcodes.size() == 1) { 1825 // There is only one instruction in the set, which is great! 1826 // Call emitSingletonDecoder() to see whether there are any remaining 1827 // encodings bits. 1828 emitSingletonTableEntry(TableInfo, Opcodes[0]); 1829 return; 1830 } 1831 1832 // Choose the best filter to do the decodings! 1833 if (BestIndex != -1) { 1834 const Filter &Best = Filters[BestIndex]; 1835 if (Best.getNumFiltered() == 1) 1836 emitSingletonTableEntry(TableInfo, Best); 1837 else 1838 Best.emitTableEntry(TableInfo); 1839 return; 1840 } 1841 1842 // We don't know how to decode these instructions! Dump the 1843 // conflict set and bail. 1844 1845 // Print out useful conflict information for postmortem analysis. 1846 errs() << "Decoding Conflict:\n"; 1847 1848 dumpStack(errs(), "\t\t"); 1849 1850 for (auto Opcode : Opcodes) { 1851 errs() << '\t'; 1852 emitNameWithID(errs(), Opcode.EncodingID); 1853 errs() << " "; 1854 dumpBits( 1855 errs(), 1856 getBitsField(*AllInstructions[Opcode.EncodingID].EncodingDef, "Inst")); 1857 errs() << '\n'; 1858 } 1859 } 1860 1861 static std::string findOperandDecoderMethod(const Record *Record) { 1862 std::string Decoder; 1863 1864 const RecordVal *DecoderString = Record->getValue("DecoderMethod"); 1865 const StringInit *String = 1866 DecoderString ? dyn_cast<StringInit>(DecoderString->getValue()) : nullptr; 1867 if (String) { 1868 Decoder = std::string(String->getValue()); 1869 if (!Decoder.empty()) 1870 return Decoder; 1871 } 1872 1873 if (Record->isSubClassOf("RegisterOperand")) 1874 // Allows use of a DecoderMethod in referenced RegisterClass if set. 1875 return findOperandDecoderMethod(Record->getValueAsDef("RegClass")); 1876 1877 if (Record->isSubClassOf("RegisterClass")) { 1878 Decoder = "Decode" + Record->getName().str() + "RegisterClass"; 1879 } else if (Record->isSubClassOf("PointerLikeRegClass")) { 1880 Decoder = "DecodePointerLikeRegClass" + 1881 utostr(Record->getValueAsInt("RegClassKind")); 1882 } 1883 1884 return Decoder; 1885 } 1886 1887 OperandInfo getOpInfo(const Record *TypeRecord) { 1888 std::string Decoder = findOperandDecoderMethod(TypeRecord); 1889 1890 const RecordVal *HasCompleteDecoderVal = 1891 TypeRecord->getValue("hasCompleteDecoder"); 1892 BitInit *HasCompleteDecoderBit = 1893 HasCompleteDecoderVal 1894 ? dyn_cast<BitInit>(HasCompleteDecoderVal->getValue()) 1895 : nullptr; 1896 bool HasCompleteDecoder = 1897 HasCompleteDecoderBit ? HasCompleteDecoderBit->getValue() : true; 1898 1899 return OperandInfo(Decoder, HasCompleteDecoder); 1900 } 1901 1902 static void parseVarLenInstOperand(const Record &Def, 1903 std::vector<OperandInfo> &Operands, 1904 const CodeGenInstruction &CGI) { 1905 1906 const RecordVal *RV = Def.getValue("Inst"); 1907 VarLenInst VLI(cast<DagInit>(RV->getValue()), RV); 1908 SmallVector<int> TiedTo; 1909 1910 for (const auto &[Idx, Op] : enumerate(CGI.Operands)) { 1911 if (Op.MIOperandInfo && Op.MIOperandInfo->getNumArgs() > 0) 1912 for (auto *Arg : Op.MIOperandInfo->getArgs()) 1913 Operands.push_back(getOpInfo(cast<DefInit>(Arg)->getDef())); 1914 else 1915 Operands.push_back(getOpInfo(Op.Rec)); 1916 1917 int TiedReg = Op.getTiedRegister(); 1918 TiedTo.push_back(-1); 1919 if (TiedReg != -1) { 1920 TiedTo[Idx] = TiedReg; 1921 TiedTo[TiedReg] = Idx; 1922 } 1923 } 1924 1925 unsigned CurrBitPos = 0; 1926 for (const auto &EncodingSegment : VLI) { 1927 unsigned Offset = 0; 1928 StringRef OpName; 1929 1930 if (const StringInit *SI = dyn_cast<StringInit>(EncodingSegment.Value)) { 1931 OpName = SI->getValue(); 1932 } else if (const DagInit *DI = dyn_cast<DagInit>(EncodingSegment.Value)) { 1933 OpName = cast<StringInit>(DI->getArg(0))->getValue(); 1934 Offset = cast<IntInit>(DI->getArg(2))->getValue(); 1935 } 1936 1937 if (!OpName.empty()) { 1938 auto OpSubOpPair = 1939 const_cast<CodeGenInstruction &>(CGI).Operands.ParseOperandName( 1940 OpName); 1941 unsigned OpIdx = CGI.Operands.getFlattenedOperandNumber(OpSubOpPair); 1942 Operands[OpIdx].addField(CurrBitPos, EncodingSegment.BitWidth, Offset); 1943 if (!EncodingSegment.CustomDecoder.empty()) 1944 Operands[OpIdx].Decoder = EncodingSegment.CustomDecoder.str(); 1945 1946 int TiedReg = TiedTo[OpSubOpPair.first]; 1947 if (TiedReg != -1) { 1948 unsigned OpIdx = CGI.Operands.getFlattenedOperandNumber( 1949 std::pair(TiedReg, OpSubOpPair.second)); 1950 Operands[OpIdx].addField(CurrBitPos, EncodingSegment.BitWidth, Offset); 1951 } 1952 } 1953 1954 CurrBitPos += EncodingSegment.BitWidth; 1955 } 1956 } 1957 1958 static void debugDumpRecord(const Record &Rec) { 1959 // Dump the record, so we can see what's going on. 1960 PrintNote([&Rec](raw_ostream &OS) { 1961 OS << "Dumping record for previous error:\n"; 1962 OS << Rec; 1963 }); 1964 } 1965 1966 /// For an operand field named OpName: populate OpInfo.InitValue with the 1967 /// constant-valued bit values, and OpInfo.Fields with the ranges of bits to 1968 /// insert from the decoded instruction. 1969 static void addOneOperandFields(const Record &EncodingDef, const BitsInit &Bits, 1970 std::map<std::string, std::string> &TiedNames, 1971 StringRef OpName, OperandInfo &OpInfo) { 1972 // Some bits of the operand may be required to be 1 depending on the 1973 // instruction's encoding. Collect those bits. 1974 if (const RecordVal *EncodedValue = EncodingDef.getValue(OpName)) 1975 if (const BitsInit *OpBits = dyn_cast<BitsInit>(EncodedValue->getValue())) 1976 for (unsigned I = 0; I < OpBits->getNumBits(); ++I) 1977 if (const BitInit *OpBit = dyn_cast<BitInit>(OpBits->getBit(I))) 1978 if (OpBit->getValue()) 1979 OpInfo.InitValue |= 1ULL << I; 1980 1981 for (unsigned I = 0, J = 0; I != Bits.getNumBits(); I = J) { 1982 VarInit *Var; 1983 unsigned Offset = 0; 1984 for (; J != Bits.getNumBits(); ++J) { 1985 VarBitInit *BJ = dyn_cast<VarBitInit>(Bits.getBit(J)); 1986 if (BJ) { 1987 Var = dyn_cast<VarInit>(BJ->getBitVar()); 1988 if (I == J) 1989 Offset = BJ->getBitNum(); 1990 else if (BJ->getBitNum() != Offset + J - I) 1991 break; 1992 } else { 1993 Var = dyn_cast<VarInit>(Bits.getBit(J)); 1994 } 1995 if (!Var || (Var->getName() != OpName && 1996 Var->getName() != TiedNames[std::string(OpName)])) 1997 break; 1998 } 1999 if (I == J) 2000 ++J; 2001 else 2002 OpInfo.addField(I, J - I, Offset); 2003 } 2004 } 2005 2006 static unsigned 2007 populateInstruction(const CodeGenTarget &Target, const Record &EncodingDef, 2008 const CodeGenInstruction &CGI, unsigned Opc, 2009 std::map<unsigned, std::vector<OperandInfo>> &Operands, 2010 bool IsVarLenInst) { 2011 const Record &Def = *CGI.TheDef; 2012 // If all the bit positions are not specified; do not decode this instruction. 2013 // We are bound to fail! For proper disassembly, the well-known encoding bits 2014 // of the instruction must be fully specified. 2015 2016 BitsInit &Bits = getBitsField(EncodingDef, "Inst"); 2017 if (Bits.allInComplete()) 2018 return 0; 2019 2020 std::vector<OperandInfo> InsnOperands; 2021 2022 // If the instruction has specified a custom decoding hook, use that instead 2023 // of trying to auto-generate the decoder. 2024 StringRef InstDecoder = EncodingDef.getValueAsString("DecoderMethod"); 2025 if (InstDecoder != "") { 2026 bool HasCompleteInstDecoder = 2027 EncodingDef.getValueAsBit("hasCompleteDecoder"); 2028 InsnOperands.push_back( 2029 OperandInfo(std::string(InstDecoder), HasCompleteInstDecoder)); 2030 Operands[Opc] = InsnOperands; 2031 return Bits.getNumBits(); 2032 } 2033 2034 // Generate a description of the operand of the instruction that we know 2035 // how to decode automatically. 2036 // FIXME: We'll need to have a way to manually override this as needed. 2037 2038 // Gather the outputs/inputs of the instruction, so we can find their 2039 // positions in the encoding. This assumes for now that they appear in the 2040 // MCInst in the order that they're listed. 2041 std::vector<std::pair<Init *, StringRef>> InOutOperands; 2042 DagInit *Out = Def.getValueAsDag("OutOperandList"); 2043 DagInit *In = Def.getValueAsDag("InOperandList"); 2044 for (const auto &[Idx, Arg] : enumerate(Out->getArgs())) 2045 InOutOperands.push_back(std::pair(Arg, Out->getArgNameStr(Idx))); 2046 for (const auto &[Idx, Arg] : enumerate(In->getArgs())) 2047 InOutOperands.push_back(std::pair(Arg, In->getArgNameStr(Idx))); 2048 2049 // Search for tied operands, so that we can correctly instantiate 2050 // operands that are not explicitly represented in the encoding. 2051 std::map<std::string, std::string> TiedNames; 2052 for (const auto &[I, Op] : enumerate(CGI.Operands)) { 2053 for (const auto &[J, CI] : enumerate(Op.Constraints)) { 2054 if (CI.isTied()) { 2055 std::pair<unsigned, unsigned> SO = 2056 CGI.Operands.getSubOperandNumber(CI.getTiedOperand()); 2057 std::string TiedName = CGI.Operands[SO.first].SubOpNames[SO.second]; 2058 if (TiedName.empty()) 2059 TiedName = CGI.Operands[SO.first].Name; 2060 std::string MyName = Op.SubOpNames[J]; 2061 if (MyName.empty()) 2062 MyName = Op.Name; 2063 2064 TiedNames[MyName] = TiedName; 2065 TiedNames[TiedName] = MyName; 2066 } 2067 } 2068 } 2069 2070 if (IsVarLenInst) { 2071 parseVarLenInstOperand(EncodingDef, InsnOperands, CGI); 2072 } else { 2073 // For each operand, see if we can figure out where it is encoded. 2074 for (const auto &Op : InOutOperands) { 2075 Init *OpInit = Op.first; 2076 StringRef OpName = Op.second; 2077 2078 // We're ready to find the instruction encoding locations for this 2079 // operand. 2080 2081 // First, find the operand type ("OpInit"), and sub-op names 2082 // ("SubArgDag") if present. 2083 DagInit *SubArgDag = dyn_cast<DagInit>(OpInit); 2084 if (SubArgDag) 2085 OpInit = SubArgDag->getOperator(); 2086 const Record *OpTypeRec = cast<DefInit>(OpInit)->getDef(); 2087 // Lookup the sub-operands from the operand type record (note that only 2088 // Operand subclasses have MIOperandInfo, see CodeGenInstruction.cpp). 2089 const DagInit *SubOps = OpTypeRec->isSubClassOf("Operand") 2090 ? OpTypeRec->getValueAsDag("MIOperandInfo") 2091 : nullptr; 2092 2093 // Lookup the decoder method and construct a new OperandInfo to hold our 2094 // result. 2095 OperandInfo OpInfo = getOpInfo(OpTypeRec); 2096 2097 // If we have named sub-operands... 2098 if (SubArgDag) { 2099 // Then there should not be a custom decoder specified on the top-level 2100 // type. 2101 if (!OpInfo.Decoder.empty()) { 2102 PrintError(EncodingDef.getLoc(), 2103 "DecoderEmitter: operand \"" + OpName + "\" has type \"" + 2104 OpInit->getAsString() + 2105 "\" with a custom DecoderMethod, but also named " 2106 "sub-operands."); 2107 continue; 2108 } 2109 2110 // Decode each of the sub-ops separately. 2111 assert(SubOps && SubArgDag->getNumArgs() == SubOps->getNumArgs()); 2112 for (const auto &[I, Arg] : enumerate(SubOps->getArgs())) { 2113 StringRef SubOpName = SubArgDag->getArgNameStr(I); 2114 OperandInfo SubOpInfo = getOpInfo(cast<DefInit>(Arg)->getDef()); 2115 2116 addOneOperandFields(EncodingDef, Bits, TiedNames, SubOpName, 2117 SubOpInfo); 2118 InsnOperands.push_back(SubOpInfo); 2119 } 2120 continue; 2121 } 2122 2123 // Otherwise, if we have an operand with sub-operands, but they aren't 2124 // named... 2125 if (SubOps && OpInfo.Decoder.empty()) { 2126 // If it's a single sub-operand, and no custom decoder, use the decoder 2127 // from the one sub-operand. 2128 if (SubOps->getNumArgs() == 1) 2129 OpInfo = getOpInfo(cast<DefInit>(SubOps->getArg(0))->getDef()); 2130 2131 // If we have multiple sub-ops, there'd better have a custom 2132 // decoder. (Otherwise we don't know how to populate them properly...) 2133 if (SubOps->getNumArgs() > 1) { 2134 PrintError(EncodingDef.getLoc(), 2135 "DecoderEmitter: operand \"" + OpName + 2136 "\" uses MIOperandInfo with multiple ops, but doesn't " 2137 "have a custom decoder!"); 2138 debugDumpRecord(EncodingDef); 2139 continue; 2140 } 2141 } 2142 2143 addOneOperandFields(EncodingDef, Bits, TiedNames, OpName, OpInfo); 2144 // FIXME: it should be an error not to find a definition for a given 2145 // operand, rather than just failing to add it to the resulting 2146 // instruction! (This is a longstanding bug, which will be addressed in an 2147 // upcoming change.) 2148 if (OpInfo.numFields() > 0) 2149 InsnOperands.push_back(OpInfo); 2150 } 2151 } 2152 Operands[Opc] = InsnOperands; 2153 2154 #if 0 2155 LLVM_DEBUG({ 2156 // Dumps the instruction encoding bits. 2157 dumpBits(errs(), Bits); 2158 2159 errs() << '\n'; 2160 2161 // Dumps the list of operand info. 2162 for (unsigned i = 0, e = CGI.Operands.size(); i != e; ++i) { 2163 const CGIOperandList::OperandInfo &Info = CGI.Operands[i]; 2164 const std::string &OperandName = Info.Name; 2165 const Record &OperandDef = *Info.Rec; 2166 2167 errs() << "\t" << OperandName << " (" << OperandDef.getName() << ")\n"; 2168 } 2169 }); 2170 #endif 2171 2172 return Bits.getNumBits(); 2173 } 2174 2175 // emitFieldFromInstruction - Emit the templated helper function 2176 // fieldFromInstruction(). 2177 // On Windows we make sure that this function is not inlined when 2178 // using the VS compiler. It has a bug which causes the function 2179 // to be optimized out in some circumstances. See llvm.org/pr38292 2180 static void emitFieldFromInstruction(formatted_raw_ostream &OS) { 2181 OS << R"( 2182 // Helper functions for extracting fields from encoded instructions. 2183 // InsnType must either be integral or an APInt-like object that must: 2184 // * be default-constructible and copy-constructible 2185 // * be constructible from an APInt (this can be private) 2186 // * Support insertBits(bits, startBit, numBits) 2187 // * Support extractBitsAsZExtValue(numBits, startBit) 2188 // * Support the ~, &, ==, and != operators with other objects of the same type 2189 // * Support the != and bitwise & with uint64_t 2190 // * Support put (<<) to raw_ostream& 2191 template <typename InsnType> 2192 #if defined(_MSC_VER) && !defined(__clang__) 2193 __declspec(noinline) 2194 #endif 2195 static std::enable_if_t<std::is_integral<InsnType>::value, InsnType> 2196 fieldFromInstruction(const InsnType &insn, unsigned startBit, 2197 unsigned numBits) { 2198 assert(startBit + numBits <= 64 && "Cannot support >64-bit extractions!"); 2199 assert(startBit + numBits <= (sizeof(InsnType) * 8) && 2200 "Instruction field out of bounds!"); 2201 InsnType fieldMask; 2202 if (numBits == sizeof(InsnType) * 8) 2203 fieldMask = (InsnType)(-1LL); 2204 else 2205 fieldMask = (((InsnType)1 << numBits) - 1) << startBit; 2206 return (insn & fieldMask) >> startBit; 2207 } 2208 2209 template <typename InsnType> 2210 static std::enable_if_t<!std::is_integral<InsnType>::value, uint64_t> 2211 fieldFromInstruction(const InsnType &insn, unsigned startBit, 2212 unsigned numBits) { 2213 return insn.extractBitsAsZExtValue(numBits, startBit); 2214 } 2215 )"; 2216 } 2217 2218 // emitInsertBits - Emit the templated helper function insertBits(). 2219 static void emitInsertBits(formatted_raw_ostream &OS) { 2220 OS << R"( 2221 // Helper function for inserting bits extracted from an encoded instruction into 2222 // a field. 2223 template <typename InsnType> 2224 static std::enable_if_t<std::is_integral<InsnType>::value> 2225 insertBits(InsnType &field, InsnType bits, unsigned startBit, unsigned numBits) { 2226 assert(startBit + numBits <= sizeof field * 8); 2227 field |= (InsnType)bits << startBit; 2228 } 2229 2230 template <typename InsnType> 2231 static std::enable_if_t<!std::is_integral<InsnType>::value> 2232 insertBits(InsnType &field, uint64_t bits, unsigned startBit, unsigned numBits) { 2233 field.insertBits(bits, startBit, numBits); 2234 } 2235 )"; 2236 } 2237 2238 // emitDecodeInstruction - Emit the templated helper function 2239 // decodeInstruction(). 2240 static void emitDecodeInstruction(formatted_raw_ostream &OS, 2241 bool IsVarLenInst) { 2242 OS << R"( 2243 template <typename InsnType> 2244 static DecodeStatus decodeInstruction(const uint8_t DecodeTable[], MCInst &MI, 2245 InsnType insn, uint64_t Address, 2246 const MCDisassembler *DisAsm, 2247 const MCSubtargetInfo &STI)"; 2248 if (IsVarLenInst) { 2249 OS << ",\n " 2250 "llvm::function_ref<void(APInt &, uint64_t)> makeUp"; 2251 } 2252 OS << R"() { 2253 const FeatureBitset &Bits = STI.getFeatureBits(); 2254 2255 const uint8_t *Ptr = DecodeTable; 2256 uint64_t CurFieldValue = 0; 2257 DecodeStatus S = MCDisassembler::Success; 2258 while (true) { 2259 ptrdiff_t Loc = Ptr - DecodeTable; 2260 switch (*Ptr) { 2261 default: 2262 errs() << Loc << ": Unexpected decode table opcode!\n"; 2263 return MCDisassembler::Fail; 2264 case MCD::OPC_ExtractField: { 2265 // Decode the start value. 2266 unsigned Start = decodeULEB128AndIncUnsafe(++Ptr); 2267 unsigned Len = *Ptr++;)"; 2268 if (IsVarLenInst) 2269 OS << "\n makeUp(insn, Start + Len);"; 2270 OS << R"( 2271 CurFieldValue = fieldFromInstruction(insn, Start, Len); 2272 LLVM_DEBUG(dbgs() << Loc << ": OPC_ExtractField(" << Start << ", " 2273 << Len << "): " << CurFieldValue << "\n"); 2274 break; 2275 } 2276 case MCD::OPC_FilterValue: { 2277 // Decode the field value. 2278 uint64_t Val = decodeULEB128AndIncUnsafe(++Ptr); 2279 // NumToSkip is a plain 24-bit integer. 2280 unsigned NumToSkip = *Ptr++; 2281 NumToSkip |= (*Ptr++) << 8; 2282 NumToSkip |= (*Ptr++) << 16; 2283 2284 // Perform the filter operation. 2285 if (Val != CurFieldValue) 2286 Ptr += NumToSkip; 2287 LLVM_DEBUG(dbgs() << Loc << ": OPC_FilterValue(" << Val << ", " << NumToSkip 2288 << "): " << ((Val != CurFieldValue) ? "FAIL:" : "PASS:") 2289 << " continuing at " << (Ptr - DecodeTable) << "\n"); 2290 2291 break; 2292 } 2293 case MCD::OPC_CheckField: { 2294 // Decode the start value. 2295 unsigned Start = decodeULEB128AndIncUnsafe(++Ptr); 2296 unsigned Len = *Ptr;)"; 2297 if (IsVarLenInst) 2298 OS << "\n makeUp(insn, Start + Len);"; 2299 OS << R"( 2300 uint64_t FieldValue = fieldFromInstruction(insn, Start, Len); 2301 // Decode the field value. 2302 unsigned PtrLen = 0; 2303 uint64_t ExpectedValue = decodeULEB128(++Ptr, &PtrLen); 2304 Ptr += PtrLen; 2305 // NumToSkip is a plain 24-bit integer. 2306 unsigned NumToSkip = *Ptr++; 2307 NumToSkip |= (*Ptr++) << 8; 2308 NumToSkip |= (*Ptr++) << 16; 2309 2310 // If the actual and expected values don't match, skip. 2311 if (ExpectedValue != FieldValue) 2312 Ptr += NumToSkip; 2313 LLVM_DEBUG(dbgs() << Loc << ": OPC_CheckField(" << Start << ", " 2314 << Len << ", " << ExpectedValue << ", " << NumToSkip 2315 << "): FieldValue = " << FieldValue << ", ExpectedValue = " 2316 << ExpectedValue << ": " 2317 << ((ExpectedValue == FieldValue) ? "PASS\n" : "FAIL\n")); 2318 break; 2319 } 2320 case MCD::OPC_CheckPredicate: { 2321 // Decode the Predicate Index value. 2322 unsigned PIdx = decodeULEB128AndIncUnsafe(++Ptr); 2323 // NumToSkip is a plain 24-bit integer. 2324 unsigned NumToSkip = *Ptr++; 2325 NumToSkip |= (*Ptr++) << 8; 2326 NumToSkip |= (*Ptr++) << 16; 2327 // Check the predicate. 2328 bool Pred; 2329 if (!(Pred = checkDecoderPredicate(PIdx, Bits))) 2330 Ptr += NumToSkip; 2331 (void)Pred; 2332 LLVM_DEBUG(dbgs() << Loc << ": OPC_CheckPredicate(" << PIdx << "): " 2333 << (Pred ? "PASS\n" : "FAIL\n")); 2334 2335 break; 2336 } 2337 case MCD::OPC_Decode: { 2338 // Decode the Opcode value. 2339 unsigned Opc = decodeULEB128AndIncUnsafe(++Ptr); 2340 unsigned DecodeIdx = decodeULEB128AndIncUnsafe(Ptr); 2341 2342 MI.clear(); 2343 MI.setOpcode(Opc); 2344 bool DecodeComplete;)"; 2345 if (IsVarLenInst) { 2346 OS << "\n unsigned Len = InstrLenTable[Opc];\n" 2347 << " makeUp(insn, Len);"; 2348 } 2349 OS << R"( 2350 S = decodeToMCInst(S, DecodeIdx, insn, MI, Address, DisAsm, DecodeComplete); 2351 assert(DecodeComplete); 2352 2353 LLVM_DEBUG(dbgs() << Loc << ": OPC_Decode: opcode " << Opc 2354 << ", using decoder " << DecodeIdx << ": " 2355 << (S != MCDisassembler::Fail ? "PASS" : "FAIL") << "\n"); 2356 return S; 2357 } 2358 case MCD::OPC_TryDecode: { 2359 // Decode the Opcode value. 2360 unsigned Opc = decodeULEB128AndIncUnsafe(++Ptr); 2361 unsigned DecodeIdx = decodeULEB128AndIncUnsafe(Ptr); 2362 // NumToSkip is a plain 24-bit integer. 2363 unsigned NumToSkip = *Ptr++; 2364 NumToSkip |= (*Ptr++) << 8; 2365 NumToSkip |= (*Ptr++) << 16; 2366 2367 // Perform the decode operation. 2368 MCInst TmpMI; 2369 TmpMI.setOpcode(Opc); 2370 bool DecodeComplete; 2371 S = decodeToMCInst(S, DecodeIdx, insn, TmpMI, Address, DisAsm, DecodeComplete); 2372 LLVM_DEBUG(dbgs() << Loc << ": OPC_TryDecode: opcode " << Opc 2373 << ", using decoder " << DecodeIdx << ": "); 2374 2375 if (DecodeComplete) { 2376 // Decoding complete. 2377 LLVM_DEBUG(dbgs() << (S != MCDisassembler::Fail ? "PASS" : "FAIL") << "\n"); 2378 MI = TmpMI; 2379 return S; 2380 } else { 2381 assert(S == MCDisassembler::Fail); 2382 // If the decoding was incomplete, skip. 2383 Ptr += NumToSkip; 2384 LLVM_DEBUG(dbgs() << "FAIL: continuing at " << (Ptr - DecodeTable) << "\n"); 2385 // Reset decode status. This also drops a SoftFail status that could be 2386 // set before the decode attempt. 2387 S = MCDisassembler::Success; 2388 } 2389 break; 2390 } 2391 case MCD::OPC_SoftFail: { 2392 // Decode the mask values. 2393 uint64_t PositiveMask = decodeULEB128AndIncUnsafe(++Ptr); 2394 uint64_t NegativeMask = decodeULEB128AndIncUnsafe(Ptr); 2395 bool Fail = (insn & PositiveMask) != 0 || (~insn & NegativeMask) != 0; 2396 if (Fail) 2397 S = MCDisassembler::SoftFail; 2398 LLVM_DEBUG(dbgs() << Loc << ": OPC_SoftFail: " << (Fail ? "FAIL\n" : "PASS\n")); 2399 break; 2400 } 2401 case MCD::OPC_Fail: { 2402 LLVM_DEBUG(dbgs() << Loc << ": OPC_Fail\n"); 2403 return MCDisassembler::Fail; 2404 } 2405 } 2406 } 2407 llvm_unreachable("bogosity detected in disassembler state machine!"); 2408 } 2409 2410 )"; 2411 } 2412 2413 // Helper to propagate SoftFail status. Returns false if the status is Fail; 2414 // callers are expected to early-exit in that condition. (Note, the '&' operator 2415 // is correct to propagate the values of this enum; see comment on 'enum 2416 // DecodeStatus'.) 2417 static void emitCheck(formatted_raw_ostream &OS) { 2418 OS << R"( 2419 static bool Check(DecodeStatus &Out, DecodeStatus In) { 2420 Out = static_cast<DecodeStatus>(Out & In); 2421 return Out != MCDisassembler::Fail; 2422 } 2423 2424 )"; 2425 } 2426 2427 // Collect all HwModes referenced by the target for encoding purposes, 2428 // returning a vector of corresponding names. 2429 static void collectHwModesReferencedForEncodings( 2430 const CodeGenHwModes &HWM, std::vector<StringRef> &Names, 2431 NamespacesHwModesMap &NamespacesWithHwModes) { 2432 SmallBitVector BV(HWM.getNumModeIds()); 2433 for (const auto &MS : HWM.getHwModeSelects()) { 2434 for (const HwModeSelect::PairType &P : MS.second.Items) { 2435 if (P.second->isSubClassOf("InstructionEncoding")) { 2436 std::string DecoderNamespace = 2437 std::string(P.second->getValueAsString("DecoderNamespace")); 2438 if (P.first == DefaultMode) { 2439 NamespacesWithHwModes[DecoderNamespace].insert(""); 2440 } else { 2441 NamespacesWithHwModes[DecoderNamespace].insert( 2442 HWM.getMode(P.first).Name); 2443 } 2444 BV.set(P.first); 2445 } 2446 } 2447 } 2448 transform(BV.set_bits(), std::back_inserter(Names), [&HWM](const int &M) { 2449 if (M == DefaultMode) 2450 return StringRef(""); 2451 return HWM.getModeName(M, /*IncludeDefault=*/true); 2452 }); 2453 } 2454 2455 static void 2456 handleHwModesUnrelatedEncodings(const CodeGenInstruction *Instr, 2457 const std::vector<StringRef> &HwModeNames, 2458 NamespacesHwModesMap &NamespacesWithHwModes, 2459 std::vector<EncodingAndInst> &GlobalEncodings) { 2460 const Record *InstDef = Instr->TheDef; 2461 2462 switch (DecoderEmitterSuppressDuplicates) { 2463 case SUPPRESSION_DISABLE: { 2464 for (StringRef HwModeName : HwModeNames) 2465 GlobalEncodings.emplace_back(InstDef, Instr, HwModeName); 2466 break; 2467 } 2468 case SUPPRESSION_LEVEL1: { 2469 std::string DecoderNamespace = 2470 std::string(InstDef->getValueAsString("DecoderNamespace")); 2471 auto It = NamespacesWithHwModes.find(DecoderNamespace); 2472 if (It != NamespacesWithHwModes.end()) { 2473 for (StringRef HwModeName : It->second) 2474 GlobalEncodings.emplace_back(InstDef, Instr, HwModeName); 2475 } else { 2476 // Only emit the encoding once, as it's DecoderNamespace doesn't 2477 // contain any HwModes. 2478 GlobalEncodings.emplace_back(InstDef, Instr, ""); 2479 } 2480 break; 2481 } 2482 case SUPPRESSION_LEVEL2: 2483 GlobalEncodings.emplace_back(InstDef, Instr, ""); 2484 break; 2485 } 2486 } 2487 2488 // Emits disassembler code for instruction decoding. 2489 void DecoderEmitter::run(raw_ostream &o) { 2490 formatted_raw_ostream OS(o); 2491 OS << R"( 2492 #include "llvm/MC/MCInst.h" 2493 #include "llvm/MC/MCSubtargetInfo.h" 2494 #include "llvm/Support/DataTypes.h" 2495 #include "llvm/Support/Debug.h" 2496 #include "llvm/Support/LEB128.h" 2497 #include "llvm/Support/raw_ostream.h" 2498 #include "llvm/TargetParser/SubtargetFeature.h" 2499 #include <assert.h> 2500 2501 namespace llvm { 2502 )"; 2503 2504 emitFieldFromInstruction(OS); 2505 emitInsertBits(OS); 2506 emitCheck(OS); 2507 2508 Target.reverseBitsForLittleEndianEncoding(); 2509 2510 // Parameterize the decoders based on namespace and instruction width. 2511 2512 // First, collect all encoding-related HwModes referenced by the target. 2513 // And establish a mapping table between DecoderNamespace and HwMode. 2514 // If HwModeNames is empty, add the empty string so we always have one HwMode. 2515 const CodeGenHwModes &HWM = Target.getHwModes(); 2516 std::vector<StringRef> HwModeNames; 2517 NamespacesHwModesMap NamespacesWithHwModes; 2518 collectHwModesReferencedForEncodings(HWM, HwModeNames, NamespacesWithHwModes); 2519 if (HwModeNames.empty()) 2520 HwModeNames.push_back(""); 2521 2522 const auto &NumberedInstructions = Target.getInstructionsByEnumValue(); 2523 NumberedEncodings.reserve(NumberedInstructions.size()); 2524 for (const auto &NumberedInstruction : NumberedInstructions) { 2525 const Record *InstDef = NumberedInstruction->TheDef; 2526 if (const RecordVal *RV = InstDef->getValue("EncodingInfos")) { 2527 if (DefInit *DI = dyn_cast_or_null<DefInit>(RV->getValue())) { 2528 EncodingInfoByHwMode EBM(DI->getDef(), HWM); 2529 for (auto &[ModeId, Encoding] : EBM) { 2530 // DecoderTables with DefaultMode should not have any suffix. 2531 if (ModeId == DefaultMode) { 2532 NumberedEncodings.emplace_back(Encoding, NumberedInstruction, ""); 2533 } else { 2534 NumberedEncodings.emplace_back(Encoding, NumberedInstruction, 2535 HWM.getMode(ModeId).Name); 2536 } 2537 } 2538 continue; 2539 } 2540 } 2541 // This instruction is encoded the same on all HwModes. 2542 // According to user needs, provide varying degrees of suppression. 2543 handleHwModesUnrelatedEncodings(NumberedInstruction, HwModeNames, 2544 NamespacesWithHwModes, NumberedEncodings); 2545 } 2546 for (const Record *NumberedAlias : 2547 RK.getAllDerivedDefinitions("AdditionalEncoding")) 2548 NumberedEncodings.emplace_back( 2549 NumberedAlias, 2550 &Target.getInstruction(NumberedAlias->getValueAsDef("AliasOf"))); 2551 2552 std::map<std::pair<std::string, unsigned>, std::vector<EncodingIDAndOpcode>> 2553 OpcMap; 2554 std::map<unsigned, std::vector<OperandInfo>> Operands; 2555 std::vector<unsigned> InstrLen; 2556 bool IsVarLenInst = Target.hasVariableLengthEncodings(); 2557 unsigned MaxInstLen = 0; 2558 2559 for (const auto &[NEI, NumberedEncoding] : enumerate(NumberedEncodings)) { 2560 const Record *EncodingDef = NumberedEncoding.EncodingDef; 2561 const CodeGenInstruction *Inst = NumberedEncoding.Inst; 2562 const Record *Def = Inst->TheDef; 2563 unsigned Size = EncodingDef->getValueAsInt("Size"); 2564 if (Def->getValueAsString("Namespace") == "TargetOpcode" || 2565 Def->getValueAsBit("isPseudo") || 2566 Def->getValueAsBit("isAsmParserOnly") || 2567 Def->getValueAsBit("isCodeGenOnly")) { 2568 NumEncodingsLackingDisasm++; 2569 continue; 2570 } 2571 2572 if (NEI < NumberedInstructions.size()) 2573 NumInstructions++; 2574 NumEncodings++; 2575 2576 if (!Size && !IsVarLenInst) 2577 continue; 2578 2579 if (IsVarLenInst) 2580 InstrLen.resize(NumberedInstructions.size(), 0); 2581 2582 if (unsigned Len = populateInstruction(Target, *EncodingDef, *Inst, NEI, 2583 Operands, IsVarLenInst)) { 2584 if (IsVarLenInst) { 2585 MaxInstLen = std::max(MaxInstLen, Len); 2586 InstrLen[NEI] = Len; 2587 } 2588 std::string DecoderNamespace = 2589 std::string(EncodingDef->getValueAsString("DecoderNamespace")); 2590 if (!NumberedEncoding.HwModeName.empty()) 2591 DecoderNamespace += 2592 std::string("_") + NumberedEncoding.HwModeName.str(); 2593 OpcMap[std::pair(DecoderNamespace, Size)].emplace_back( 2594 NEI, Target.getInstrIntValue(Def)); 2595 } else { 2596 NumEncodingsOmitted++; 2597 } 2598 } 2599 2600 DecoderTableInfo TableInfo; 2601 for (const auto &Opc : OpcMap) { 2602 // Emit the decoder for this namespace+width combination. 2603 ArrayRef<EncodingAndInst> NumberedEncodingsRef(NumberedEncodings.data(), 2604 NumberedEncodings.size()); 2605 FilterChooser FC(NumberedEncodingsRef, Opc.second, Operands, 2606 IsVarLenInst ? MaxInstLen : 8 * Opc.first.second, this); 2607 2608 // The decode table is cleared for each top level decoder function. The 2609 // predicates and decoders themselves, however, are shared across all 2610 // decoders to give more opportunities for uniqueing. 2611 TableInfo.Table.clear(); 2612 TableInfo.FixupStack.clear(); 2613 TableInfo.Table.reserve(16384); 2614 TableInfo.FixupStack.emplace_back(); 2615 FC.emitTableEntries(TableInfo); 2616 // Any NumToSkip fixups in the top level scope can resolve to the 2617 // OPC_Fail at the end of the table. 2618 assert(TableInfo.FixupStack.size() == 1 && "fixup stack phasing error!"); 2619 // Resolve any NumToSkip fixups in the current scope. 2620 resolveTableFixups(TableInfo.Table, TableInfo.FixupStack.back(), 2621 TableInfo.Table.size()); 2622 TableInfo.FixupStack.clear(); 2623 2624 TableInfo.Table.push_back(MCD::OPC_Fail); 2625 2626 // Print the table to the output stream. 2627 emitTable(OS, TableInfo.Table, 0, FC.getBitWidth(), Opc.first.first, 2628 Opc.second); 2629 } 2630 2631 // For variable instruction, we emit a instruction length table 2632 // to let the decoder know how long the instructions are. 2633 // You can see example usage in M68k's disassembler. 2634 if (IsVarLenInst) 2635 emitInstrLenTable(OS, InstrLen); 2636 // Emit the predicate function. 2637 emitPredicateFunction(OS, TableInfo.Predicates, 0); 2638 2639 // Emit the decoder function. 2640 emitDecoderFunction(OS, TableInfo.Decoders, 0); 2641 2642 // Emit the main entry point for the decoder, decodeInstruction(). 2643 emitDecodeInstruction(OS, IsVarLenInst); 2644 2645 OS << "\n} // end namespace llvm\n"; 2646 } 2647 2648 namespace llvm { 2649 2650 void EmitDecoder(RecordKeeper &RK, raw_ostream &OS, 2651 const std::string &PredicateNamespace) { 2652 DecoderEmitter(RK, PredicateNamespace).run(OS); 2653 } 2654 2655 } // end namespace llvm 2656