1 //===---------------- DecoderEmitter.cpp - Decoder Generator --------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // It contains the tablegen backend that emits the decoder functions for 10 // targets with fixed/variable length instruction set. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "CodeGenHwModes.h" 15 #include "CodeGenInstruction.h" 16 #include "CodeGenTarget.h" 17 #include "InfoByHwMode.h" 18 #include "TableGenBackends.h" 19 #include "VarLenCodeEmitterGen.h" 20 #include "llvm/ADT/APInt.h" 21 #include "llvm/ADT/ArrayRef.h" 22 #include "llvm/ADT/CachedHashString.h" 23 #include "llvm/ADT/STLExtras.h" 24 #include "llvm/ADT/SetVector.h" 25 #include "llvm/ADT/SmallBitVector.h" 26 #include "llvm/ADT/SmallString.h" 27 #include "llvm/ADT/Statistic.h" 28 #include "llvm/ADT/StringExtras.h" 29 #include "llvm/ADT/StringRef.h" 30 #include "llvm/MC/MCDecoderOps.h" 31 #include "llvm/Support/Casting.h" 32 #include "llvm/Support/CommandLine.h" 33 #include "llvm/Support/Debug.h" 34 #include "llvm/Support/ErrorHandling.h" 35 #include "llvm/Support/FormattedStream.h" 36 #include "llvm/Support/LEB128.h" 37 #include "llvm/Support/raw_ostream.h" 38 #include "llvm/TableGen/Error.h" 39 #include "llvm/TableGen/Record.h" 40 #include <algorithm> 41 #include <cassert> 42 #include <cstddef> 43 #include <cstdint> 44 #include <map> 45 #include <memory> 46 #include <set> 47 #include <string> 48 #include <utility> 49 #include <vector> 50 51 using namespace llvm; 52 53 #define DEBUG_TYPE "decoder-emitter" 54 55 extern cl::OptionCategory DisassemblerEmitterCat; 56 57 cl::opt<bool> DecoderEmitterSuppressDuplicates( 58 "suppress-per-hwmode-duplicates", 59 cl::desc("Suppress duplication of instrs into per-HwMode decoder tables"), 60 cl::init(false), cl::cat(DisassemblerEmitterCat)); 61 62 namespace { 63 64 STATISTIC(NumEncodings, "Number of encodings considered"); 65 STATISTIC(NumEncodingsLackingDisasm, 66 "Number of encodings without disassembler info"); 67 STATISTIC(NumInstructions, "Number of instructions considered"); 68 STATISTIC(NumEncodingsSupported, "Number of encodings supported"); 69 STATISTIC(NumEncodingsOmitted, "Number of encodings omitted"); 70 71 struct EncodingField { 72 unsigned Base, Width, Offset; 73 EncodingField(unsigned B, unsigned W, unsigned O) 74 : Base(B), Width(W), Offset(O) {} 75 }; 76 77 struct OperandInfo { 78 std::vector<EncodingField> Fields; 79 std::string Decoder; 80 bool HasCompleteDecoder; 81 uint64_t InitValue; 82 83 OperandInfo(std::string D, bool HCD) 84 : Decoder(std::move(D)), HasCompleteDecoder(HCD), InitValue(0) {} 85 86 void addField(unsigned Base, unsigned Width, unsigned Offset) { 87 Fields.push_back(EncodingField(Base, Width, Offset)); 88 } 89 90 unsigned numFields() const { return Fields.size(); } 91 92 typedef std::vector<EncodingField>::const_iterator const_iterator; 93 94 const_iterator begin() const { return Fields.begin(); } 95 const_iterator end() const { return Fields.end(); } 96 }; 97 98 typedef std::vector<uint8_t> DecoderTable; 99 typedef uint32_t DecoderFixup; 100 typedef std::vector<DecoderFixup> FixupList; 101 typedef std::vector<FixupList> FixupScopeList; 102 typedef SmallSetVector<CachedHashString, 16> PredicateSet; 103 typedef SmallSetVector<CachedHashString, 16> DecoderSet; 104 struct DecoderTableInfo { 105 DecoderTable Table; 106 FixupScopeList FixupStack; 107 PredicateSet Predicates; 108 DecoderSet Decoders; 109 }; 110 111 struct EncodingAndInst { 112 const Record *EncodingDef; 113 const CodeGenInstruction *Inst; 114 StringRef HwModeName; 115 116 EncodingAndInst(const Record *EncodingDef, const CodeGenInstruction *Inst, 117 StringRef HwModeName = "") 118 : EncodingDef(EncodingDef), Inst(Inst), HwModeName(HwModeName) {} 119 }; 120 121 struct EncodingIDAndOpcode { 122 unsigned EncodingID; 123 unsigned Opcode; 124 125 EncodingIDAndOpcode() : EncodingID(0), Opcode(0) {} 126 EncodingIDAndOpcode(unsigned EncodingID, unsigned Opcode) 127 : EncodingID(EncodingID), Opcode(Opcode) {} 128 }; 129 130 using EncodingIDsVec = std::vector<EncodingIDAndOpcode>; 131 132 raw_ostream &operator<<(raw_ostream &OS, const EncodingAndInst &Value) { 133 if (Value.EncodingDef != Value.Inst->TheDef) 134 OS << Value.EncodingDef->getName() << ":"; 135 OS << Value.Inst->TheDef->getName(); 136 return OS; 137 } 138 139 class DecoderEmitter { 140 RecordKeeper &RK; 141 std::vector<EncodingAndInst> NumberedEncodings; 142 143 public: 144 DecoderEmitter(RecordKeeper &R, std::string PredicateNamespace) 145 : RK(R), Target(R), PredicateNamespace(std::move(PredicateNamespace)) {} 146 147 // Emit the decoder state machine table. 148 void emitTable(formatted_raw_ostream &o, DecoderTable &Table, 149 unsigned Indentation, unsigned BitWidth, StringRef Namespace, 150 const EncodingIDsVec &EncodingIDs) const; 151 void emitInstrLenTable(formatted_raw_ostream &OS, 152 std::vector<unsigned> &InstrLen) const; 153 void emitPredicateFunction(formatted_raw_ostream &OS, 154 PredicateSet &Predicates, 155 unsigned Indentation) const; 156 void emitDecoderFunction(formatted_raw_ostream &OS, DecoderSet &Decoders, 157 unsigned Indentation) const; 158 159 // run - Output the code emitter 160 void run(raw_ostream &o); 161 162 private: 163 CodeGenTarget Target; 164 165 public: 166 std::string PredicateNamespace; 167 }; 168 169 } // end anonymous namespace 170 171 // The set (BIT_TRUE, BIT_FALSE, BIT_UNSET) represents a ternary logic system 172 // for a bit value. 173 // 174 // BIT_UNFILTERED is used as the init value for a filter position. It is used 175 // only for filter processings. 176 typedef enum { 177 BIT_TRUE, // '1' 178 BIT_FALSE, // '0' 179 BIT_UNSET, // '?' 180 BIT_UNFILTERED // unfiltered 181 } bit_value_t; 182 183 static bool ValueSet(bit_value_t V) { 184 return (V == BIT_TRUE || V == BIT_FALSE); 185 } 186 187 static bool ValueNotSet(bit_value_t V) { return (V == BIT_UNSET); } 188 189 static int Value(bit_value_t V) { 190 return ValueNotSet(V) ? -1 : (V == BIT_FALSE ? 0 : 1); 191 } 192 193 static bit_value_t bitFromBits(const BitsInit &bits, unsigned index) { 194 if (BitInit *bit = dyn_cast<BitInit>(bits.getBit(index))) 195 return bit->getValue() ? BIT_TRUE : BIT_FALSE; 196 197 // The bit is uninitialized. 198 return BIT_UNSET; 199 } 200 201 // Prints the bit value for each position. 202 static void dumpBits(raw_ostream &o, const BitsInit &bits) { 203 for (unsigned index = bits.getNumBits(); index > 0; --index) { 204 switch (bitFromBits(bits, index - 1)) { 205 case BIT_TRUE: 206 o << "1"; 207 break; 208 case BIT_FALSE: 209 o << "0"; 210 break; 211 case BIT_UNSET: 212 o << "_"; 213 break; 214 default: 215 llvm_unreachable("unexpected return value from bitFromBits"); 216 } 217 } 218 } 219 220 static BitsInit &getBitsField(const Record &def, StringRef str) { 221 const RecordVal *RV = def.getValue(str); 222 if (BitsInit *Bits = dyn_cast<BitsInit>(RV->getValue())) 223 return *Bits; 224 225 // variable length instruction 226 VarLenInst VLI = VarLenInst(cast<DagInit>(RV->getValue()), RV); 227 SmallVector<Init *, 16> Bits; 228 229 for (const auto &SI : VLI) { 230 if (const BitsInit *BI = dyn_cast<BitsInit>(SI.Value)) { 231 for (unsigned Idx = 0U; Idx < BI->getNumBits(); ++Idx) { 232 Bits.push_back(BI->getBit(Idx)); 233 } 234 } else if (const BitInit *BI = dyn_cast<BitInit>(SI.Value)) { 235 Bits.push_back(const_cast<BitInit *>(BI)); 236 } else { 237 for (unsigned Idx = 0U; Idx < SI.BitWidth; ++Idx) 238 Bits.push_back(UnsetInit::get(def.getRecords())); 239 } 240 } 241 242 return *BitsInit::get(def.getRecords(), Bits); 243 } 244 245 // Representation of the instruction to work on. 246 typedef std::vector<bit_value_t> insn_t; 247 248 namespace { 249 250 static const uint64_t NO_FIXED_SEGMENTS_SENTINEL = -1ULL; 251 252 class FilterChooser; 253 254 /// Filter - Filter works with FilterChooser to produce the decoding tree for 255 /// the ISA. 256 /// 257 /// It is useful to think of a Filter as governing the switch stmts of the 258 /// decoding tree in a certain level. Each case stmt delegates to an inferior 259 /// FilterChooser to decide what further decoding logic to employ, or in another 260 /// words, what other remaining bits to look at. The FilterChooser eventually 261 /// chooses a best Filter to do its job. 262 /// 263 /// This recursive scheme ends when the number of Opcodes assigned to the 264 /// FilterChooser becomes 1 or if there is a conflict. A conflict happens when 265 /// the Filter/FilterChooser combo does not know how to distinguish among the 266 /// Opcodes assigned. 267 /// 268 /// An example of a conflict is 269 /// 270 /// Conflict: 271 /// 111101000.00........00010000.... 272 /// 111101000.00........0001........ 273 /// 1111010...00........0001........ 274 /// 1111010...00.................... 275 /// 1111010......................... 276 /// 1111............................ 277 /// ................................ 278 /// VST4q8a 111101000_00________00010000____ 279 /// VST4q8b 111101000_00________00010000____ 280 /// 281 /// The Debug output shows the path that the decoding tree follows to reach the 282 /// the conclusion that there is a conflict. VST4q8a is a vst4 to double-spaced 283 /// even registers, while VST4q8b is a vst4 to double-spaced odd registers. 284 /// 285 /// The encoding info in the .td files does not specify this meta information, 286 /// which could have been used by the decoder to resolve the conflict. The 287 /// decoder could try to decode the even/odd register numbering and assign to 288 /// VST4q8a or VST4q8b, but for the time being, the decoder chooses the "a" 289 /// version and return the Opcode since the two have the same Asm format string. 290 class Filter { 291 protected: 292 const FilterChooser 293 *Owner; // points to the FilterChooser who owns this filter 294 unsigned StartBit; // the starting bit position 295 unsigned NumBits; // number of bits to filter 296 bool Mixed; // a mixed region contains both set and unset bits 297 298 // Map of well-known segment value to the set of uid's with that value. 299 std::map<uint64_t, std::vector<EncodingIDAndOpcode>> FilteredInstructions; 300 301 // Set of uid's with non-constant segment values. 302 std::vector<EncodingIDAndOpcode> VariableInstructions; 303 304 // Map of well-known segment value to its delegate. 305 std::map<uint64_t, std::unique_ptr<const FilterChooser>> FilterChooserMap; 306 307 // Number of instructions which fall under FilteredInstructions category. 308 unsigned NumFiltered; 309 310 // Keeps track of the last opcode in the filtered bucket. 311 EncodingIDAndOpcode LastOpcFiltered; 312 313 public: 314 Filter(Filter &&f); 315 Filter(FilterChooser &owner, unsigned startBit, unsigned numBits, bool mixed); 316 317 ~Filter() = default; 318 319 unsigned getNumFiltered() const { return NumFiltered; } 320 321 EncodingIDAndOpcode getSingletonOpc() const { 322 assert(NumFiltered == 1); 323 return LastOpcFiltered; 324 } 325 326 // Return the filter chooser for the group of instructions without constant 327 // segment values. 328 const FilterChooser &getVariableFC() const { 329 assert(NumFiltered == 1); 330 assert(FilterChooserMap.size() == 1); 331 return *(FilterChooserMap.find(NO_FIXED_SEGMENTS_SENTINEL)->second); 332 } 333 334 // Divides the decoding task into sub tasks and delegates them to the 335 // inferior FilterChooser's. 336 // 337 // A special case arises when there's only one entry in the filtered 338 // instructions. In order to unambiguously decode the singleton, we need to 339 // match the remaining undecoded encoding bits against the singleton. 340 void recurse(); 341 342 // Emit table entries to decode instructions given a segment or segments of 343 // bits. 344 void emitTableEntry(DecoderTableInfo &TableInfo) const; 345 346 // Returns the number of fanout produced by the filter. More fanout implies 347 // the filter distinguishes more categories of instructions. 348 unsigned usefulness() const; 349 }; // end class Filter 350 351 } // end anonymous namespace 352 353 // These are states of our finite state machines used in FilterChooser's 354 // filterProcessor() which produces the filter candidates to use. 355 typedef enum { 356 ATTR_NONE, 357 ATTR_FILTERED, 358 ATTR_ALL_SET, 359 ATTR_ALL_UNSET, 360 ATTR_MIXED 361 } bitAttr_t; 362 363 /// FilterChooser - FilterChooser chooses the best filter among a set of Filters 364 /// in order to perform the decoding of instructions at the current level. 365 /// 366 /// Decoding proceeds from the top down. Based on the well-known encoding bits 367 /// of instructions available, FilterChooser builds up the possible Filters that 368 /// can further the task of decoding by distinguishing among the remaining 369 /// candidate instructions. 370 /// 371 /// Once a filter has been chosen, it is called upon to divide the decoding task 372 /// into sub-tasks and delegates them to its inferior FilterChoosers for further 373 /// processings. 374 /// 375 /// It is useful to think of a Filter as governing the switch stmts of the 376 /// decoding tree. And each case is delegated to an inferior FilterChooser to 377 /// decide what further remaining bits to look at. 378 namespace { 379 380 class FilterChooser { 381 protected: 382 friend class Filter; 383 384 // Vector of codegen instructions to choose our filter. 385 ArrayRef<EncodingAndInst> AllInstructions; 386 387 // Vector of uid's for this filter chooser to work on. 388 // The first member of the pair is the opcode id being decoded, the second is 389 // the opcode id that should be emitted. 390 const std::vector<EncodingIDAndOpcode> &Opcodes; 391 392 // Lookup table for the operand decoding of instructions. 393 const std::map<unsigned, std::vector<OperandInfo>> &Operands; 394 395 // Vector of candidate filters. 396 std::vector<Filter> Filters; 397 398 // Array of bit values passed down from our parent. 399 // Set to all BIT_UNFILTERED's for Parent == NULL. 400 std::vector<bit_value_t> FilterBitValues; 401 402 // Links to the FilterChooser above us in the decoding tree. 403 const FilterChooser *Parent; 404 405 // Index of the best filter from Filters. 406 int BestIndex; 407 408 // Width of instructions 409 unsigned BitWidth; 410 411 // Parent emitter 412 const DecoderEmitter *Emitter; 413 414 public: 415 FilterChooser(ArrayRef<EncodingAndInst> Insts, 416 const std::vector<EncodingIDAndOpcode> &IDs, 417 const std::map<unsigned, std::vector<OperandInfo>> &Ops, 418 unsigned BW, const DecoderEmitter *E) 419 : AllInstructions(Insts), Opcodes(IDs), Operands(Ops), 420 FilterBitValues(BW, BIT_UNFILTERED), Parent(nullptr), BestIndex(-1), 421 BitWidth(BW), Emitter(E) { 422 doFilter(); 423 } 424 425 FilterChooser(ArrayRef<EncodingAndInst> Insts, 426 const std::vector<EncodingIDAndOpcode> &IDs, 427 const std::map<unsigned, std::vector<OperandInfo>> &Ops, 428 const std::vector<bit_value_t> &ParentFilterBitValues, 429 const FilterChooser &parent) 430 : AllInstructions(Insts), Opcodes(IDs), Operands(Ops), 431 FilterBitValues(ParentFilterBitValues), Parent(&parent), BestIndex(-1), 432 BitWidth(parent.BitWidth), Emitter(parent.Emitter) { 433 doFilter(); 434 } 435 436 FilterChooser(const FilterChooser &) = delete; 437 void operator=(const FilterChooser &) = delete; 438 439 unsigned getBitWidth() const { return BitWidth; } 440 441 protected: 442 // Populates the insn given the uid. 443 void insnWithID(insn_t &Insn, unsigned Opcode) const { 444 const Record *EncodingDef = AllInstructions[Opcode].EncodingDef; 445 BitsInit &Bits = getBitsField(*EncodingDef, "Inst"); 446 Insn.resize(std::max(BitWidth, Bits.getNumBits()), BIT_UNSET); 447 // We may have a SoftFail bitmask, which specifies a mask where an encoding 448 // may differ from the value in "Inst" and yet still be valid, but the 449 // disassembler should return SoftFail instead of Success. 450 // 451 // This is used for marking UNPREDICTABLE instructions in the ARM world. 452 const RecordVal *RV = EncodingDef->getValue("SoftFail"); 453 const BitsInit *SFBits = RV ? dyn_cast<BitsInit>(RV->getValue()) : nullptr; 454 for (unsigned i = 0; i < Bits.getNumBits(); ++i) { 455 if (SFBits && bitFromBits(*SFBits, i) == BIT_TRUE) 456 Insn[i] = BIT_UNSET; 457 else 458 Insn[i] = bitFromBits(Bits, i); 459 } 460 } 461 462 // Emit the name of the encoding/instruction pair. 463 void emitNameWithID(raw_ostream &OS, unsigned Opcode) const { 464 const Record *EncodingDef = AllInstructions[Opcode].EncodingDef; 465 const Record *InstDef = AllInstructions[Opcode].Inst->TheDef; 466 if (EncodingDef != InstDef) 467 OS << EncodingDef->getName() << ":"; 468 OS << InstDef->getName(); 469 } 470 471 // Populates the field of the insn given the start position and the number of 472 // consecutive bits to scan for. 473 // 474 // Returns a pair of values (indicator, field), where the indicator is false 475 // if there exists any uninitialized bit value in the range and true if all 476 // bits are well-known. The second value is the potentially populated field. 477 std::pair<bool, uint64_t> fieldFromInsn(const insn_t &Insn, unsigned StartBit, 478 unsigned NumBits) const; 479 480 /// dumpFilterArray - dumpFilterArray prints out debugging info for the given 481 /// filter array as a series of chars. 482 void dumpFilterArray(raw_ostream &o, 483 const std::vector<bit_value_t> &filter) const; 484 485 /// dumpStack - dumpStack traverses the filter chooser chain and calls 486 /// dumpFilterArray on each filter chooser up to the top level one. 487 void dumpStack(raw_ostream &o, const char *prefix) const; 488 489 Filter &bestFilter() { 490 assert(BestIndex != -1 && "BestIndex not set"); 491 return Filters[BestIndex]; 492 } 493 494 bool PositionFiltered(unsigned i) const { 495 return ValueSet(FilterBitValues[i]); 496 } 497 498 // Calculates the island(s) needed to decode the instruction. 499 // This returns a lit of undecoded bits of an instructions, for example, 500 // Inst{20} = 1 && Inst{3-0} == 0b1111 represents two islands of yet-to-be 501 // decoded bits in order to verify that the instruction matches the Opcode. 502 unsigned getIslands(std::vector<unsigned> &StartBits, 503 std::vector<unsigned> &EndBits, 504 std::vector<uint64_t> &FieldVals, 505 const insn_t &Insn) const; 506 507 // Emits code to check the Predicates member of an instruction are true. 508 // Returns true if predicate matches were emitted, false otherwise. 509 bool emitPredicateMatch(raw_ostream &o, unsigned &Indentation, 510 unsigned Opc) const; 511 bool emitPredicateMatchAux(const Init &Val, bool ParenIfBinOp, 512 raw_ostream &OS) const; 513 514 bool doesOpcodeNeedPredicate(unsigned Opc) const; 515 unsigned getPredicateIndex(DecoderTableInfo &TableInfo, StringRef P) const; 516 void emitPredicateTableEntry(DecoderTableInfo &TableInfo, unsigned Opc) const; 517 518 void emitSoftFailTableEntry(DecoderTableInfo &TableInfo, unsigned Opc) const; 519 520 // Emits table entries to decode the singleton. 521 void emitSingletonTableEntry(DecoderTableInfo &TableInfo, 522 EncodingIDAndOpcode Opc) const; 523 524 // Emits code to decode the singleton, and then to decode the rest. 525 void emitSingletonTableEntry(DecoderTableInfo &TableInfo, 526 const Filter &Best) const; 527 528 void emitBinaryParser(raw_ostream &o, unsigned &Indentation, 529 const OperandInfo &OpInfo, 530 bool &OpHasCompleteDecoder) const; 531 532 void emitDecoder(raw_ostream &OS, unsigned Indentation, unsigned Opc, 533 bool &HasCompleteDecoder) const; 534 unsigned getDecoderIndex(DecoderSet &Decoders, unsigned Opc, 535 bool &HasCompleteDecoder) const; 536 537 // Assign a single filter and run with it. 538 void runSingleFilter(unsigned startBit, unsigned numBit, bool mixed); 539 540 // reportRegion is a helper function for filterProcessor to mark a region as 541 // eligible for use as a filter region. 542 void reportRegion(bitAttr_t RA, unsigned StartBit, unsigned BitIndex, 543 bool AllowMixed); 544 545 // FilterProcessor scans the well-known encoding bits of the instructions and 546 // builds up a list of candidate filters. It chooses the best filter and 547 // recursively descends down the decoding tree. 548 bool filterProcessor(bool AllowMixed, bool Greedy = true); 549 550 // Decides on the best configuration of filter(s) to use in order to decode 551 // the instructions. A conflict of instructions may occur, in which case we 552 // dump the conflict set to the standard error. 553 void doFilter(); 554 555 public: 556 // emitTableEntries - Emit state machine entries to decode our share of 557 // instructions. 558 void emitTableEntries(DecoderTableInfo &TableInfo) const; 559 }; 560 561 } // end anonymous namespace 562 563 /////////////////////////// 564 // // 565 // Filter Implementation // 566 // // 567 /////////////////////////// 568 569 Filter::Filter(Filter &&f) 570 : Owner(f.Owner), StartBit(f.StartBit), NumBits(f.NumBits), Mixed(f.Mixed), 571 FilteredInstructions(std::move(f.FilteredInstructions)), 572 VariableInstructions(std::move(f.VariableInstructions)), 573 FilterChooserMap(std::move(f.FilterChooserMap)), 574 NumFiltered(f.NumFiltered), LastOpcFiltered(f.LastOpcFiltered) {} 575 576 Filter::Filter(FilterChooser &owner, unsigned startBit, unsigned numBits, 577 bool mixed) 578 : Owner(&owner), StartBit(startBit), NumBits(numBits), Mixed(mixed) { 579 assert(StartBit + NumBits - 1 < Owner->BitWidth); 580 581 NumFiltered = 0; 582 LastOpcFiltered = {0, 0}; 583 584 for (const auto &OpcPair : Owner->Opcodes) { 585 insn_t Insn; 586 587 // Populates the insn given the uid. 588 Owner->insnWithID(Insn, OpcPair.EncodingID); 589 590 // Scans the segment for possibly well-specified encoding bits. 591 auto [Ok, Field] = Owner->fieldFromInsn(Insn, StartBit, NumBits); 592 593 if (Ok) { 594 // The encoding bits are well-known. Lets add the uid of the 595 // instruction into the bucket keyed off the constant field value. 596 LastOpcFiltered = OpcPair; 597 FilteredInstructions[Field].push_back(LastOpcFiltered); 598 ++NumFiltered; 599 } else { 600 // Some of the encoding bit(s) are unspecified. This contributes to 601 // one additional member of "Variable" instructions. 602 VariableInstructions.push_back(OpcPair); 603 } 604 } 605 606 assert((FilteredInstructions.size() + VariableInstructions.size() > 0) && 607 "Filter returns no instruction categories"); 608 } 609 610 // Divides the decoding task into sub tasks and delegates them to the 611 // inferior FilterChooser's. 612 // 613 // A special case arises when there's only one entry in the filtered 614 // instructions. In order to unambiguously decode the singleton, we need to 615 // match the remaining undecoded encoding bits against the singleton. 616 void Filter::recurse() { 617 // Starts by inheriting our parent filter chooser's filter bit values. 618 std::vector<bit_value_t> BitValueArray(Owner->FilterBitValues); 619 620 if (!VariableInstructions.empty()) { 621 // Conservatively marks each segment position as BIT_UNSET. 622 for (unsigned bitIndex = 0; bitIndex < NumBits; ++bitIndex) 623 BitValueArray[StartBit + bitIndex] = BIT_UNSET; 624 625 // Delegates to an inferior filter chooser for further processing on this 626 // group of instructions whose segment values are variable. 627 FilterChooserMap.insert(std::pair( 628 NO_FIXED_SEGMENTS_SENTINEL, 629 std::make_unique<FilterChooser>(Owner->AllInstructions, 630 VariableInstructions, Owner->Operands, 631 BitValueArray, *Owner))); 632 } 633 634 // No need to recurse for a singleton filtered instruction. 635 // See also Filter::emit*(). 636 if (getNumFiltered() == 1) { 637 assert(FilterChooserMap.size() == 1); 638 return; 639 } 640 641 // Otherwise, create sub choosers. 642 for (const auto &Inst : FilteredInstructions) { 643 644 // Marks all the segment positions with either BIT_TRUE or BIT_FALSE. 645 for (unsigned bitIndex = 0; bitIndex < NumBits; ++bitIndex) { 646 if (Inst.first & (1ULL << bitIndex)) 647 BitValueArray[StartBit + bitIndex] = BIT_TRUE; 648 else 649 BitValueArray[StartBit + bitIndex] = BIT_FALSE; 650 } 651 652 // Delegates to an inferior filter chooser for further processing on this 653 // category of instructions. 654 FilterChooserMap.insert( 655 std::pair(Inst.first, std::make_unique<FilterChooser>( 656 Owner->AllInstructions, Inst.second, 657 Owner->Operands, BitValueArray, *Owner))); 658 } 659 } 660 661 static void resolveTableFixups(DecoderTable &Table, const FixupList &Fixups, 662 uint32_t DestIdx) { 663 // Any NumToSkip fixups in the current scope can resolve to the 664 // current location. 665 for (FixupList::const_reverse_iterator I = Fixups.rbegin(), E = Fixups.rend(); 666 I != E; ++I) { 667 // Calculate the distance from the byte following the fixup entry byte 668 // to the destination. The Target is calculated from after the 16-bit 669 // NumToSkip entry itself, so subtract two from the displacement here 670 // to account for that. 671 uint32_t FixupIdx = *I; 672 uint32_t Delta = DestIdx - FixupIdx - 3; 673 // Our NumToSkip entries are 24-bits. Make sure our table isn't too 674 // big. 675 assert(Delta < (1u << 24)); 676 Table[FixupIdx] = (uint8_t)Delta; 677 Table[FixupIdx + 1] = (uint8_t)(Delta >> 8); 678 Table[FixupIdx + 2] = (uint8_t)(Delta >> 16); 679 } 680 } 681 682 // Emit table entries to decode instructions given a segment or segments 683 // of bits. 684 void Filter::emitTableEntry(DecoderTableInfo &TableInfo) const { 685 assert((NumBits < (1u << 8)) && "NumBits overflowed uint8 table entry!"); 686 TableInfo.Table.push_back(MCD::OPC_ExtractField); 687 688 SmallString<16> SBytes; 689 raw_svector_ostream S(SBytes); 690 encodeULEB128(StartBit, S); 691 TableInfo.Table.insert(TableInfo.Table.end(), SBytes.begin(), SBytes.end()); 692 TableInfo.Table.push_back(NumBits); 693 694 // A new filter entry begins a new scope for fixup resolution. 695 TableInfo.FixupStack.emplace_back(); 696 697 DecoderTable &Table = TableInfo.Table; 698 699 size_t PrevFilter = 0; 700 bool HasFallthrough = false; 701 for (const auto &Filter : FilterChooserMap) { 702 // Field value -1 implies a non-empty set of variable instructions. 703 // See also recurse(). 704 if (Filter.first == NO_FIXED_SEGMENTS_SENTINEL) { 705 HasFallthrough = true; 706 707 // Each scope should always have at least one filter value to check 708 // for. 709 assert(PrevFilter != 0 && "empty filter set!"); 710 FixupList &CurScope = TableInfo.FixupStack.back(); 711 // Resolve any NumToSkip fixups in the current scope. 712 resolveTableFixups(Table, CurScope, Table.size()); 713 CurScope.clear(); 714 PrevFilter = 0; // Don't re-process the filter's fallthrough. 715 } else { 716 Table.push_back(MCD::OPC_FilterValue); 717 // Encode and emit the value to filter against. 718 uint8_t Buffer[16]; 719 unsigned Len = encodeULEB128(Filter.first, Buffer); 720 Table.insert(Table.end(), Buffer, Buffer + Len); 721 // Reserve space for the NumToSkip entry. We'll backpatch the value 722 // later. 723 PrevFilter = Table.size(); 724 Table.push_back(0); 725 Table.push_back(0); 726 Table.push_back(0); 727 } 728 729 // We arrive at a category of instructions with the same segment value. 730 // Now delegate to the sub filter chooser for further decodings. 731 // The case may fallthrough, which happens if the remaining well-known 732 // encoding bits do not match exactly. 733 Filter.second->emitTableEntries(TableInfo); 734 735 // Now that we've emitted the body of the handler, update the NumToSkip 736 // of the filter itself to be able to skip forward when false. Subtract 737 // two as to account for the width of the NumToSkip field itself. 738 if (PrevFilter) { 739 uint32_t NumToSkip = Table.size() - PrevFilter - 3; 740 assert(NumToSkip < (1u << 24) && 741 "disassembler decoding table too large!"); 742 Table[PrevFilter] = (uint8_t)NumToSkip; 743 Table[PrevFilter + 1] = (uint8_t)(NumToSkip >> 8); 744 Table[PrevFilter + 2] = (uint8_t)(NumToSkip >> 16); 745 } 746 } 747 748 // Any remaining unresolved fixups bubble up to the parent fixup scope. 749 assert(TableInfo.FixupStack.size() > 1 && "fixup stack underflow!"); 750 FixupScopeList::iterator Source = TableInfo.FixupStack.end() - 1; 751 FixupScopeList::iterator Dest = Source - 1; 752 llvm::append_range(*Dest, *Source); 753 TableInfo.FixupStack.pop_back(); 754 755 // If there is no fallthrough, then the final filter should get fixed 756 // up according to the enclosing scope rather than the current position. 757 if (!HasFallthrough) 758 TableInfo.FixupStack.back().push_back(PrevFilter); 759 } 760 761 // Returns the number of fanout produced by the filter. More fanout implies 762 // the filter distinguishes more categories of instructions. 763 unsigned Filter::usefulness() const { 764 if (!VariableInstructions.empty()) 765 return FilteredInstructions.size(); 766 else 767 return FilteredInstructions.size() + 1; 768 } 769 770 ////////////////////////////////// 771 // // 772 // Filterchooser Implementation // 773 // // 774 ////////////////////////////////// 775 776 // Emit the decoder state machine table. 777 void DecoderEmitter::emitTable(formatted_raw_ostream &OS, DecoderTable &Table, 778 unsigned Indentation, unsigned BitWidth, 779 StringRef Namespace, 780 const EncodingIDsVec &EncodingIDs) const { 781 // We'll need to be able to map from a decoded opcode into the corresponding 782 // EncodingID for this specific combination of BitWidth and Namespace. This 783 // is used below to index into NumberedEncodings. 784 DenseMap<unsigned, unsigned> OpcodeToEncodingID; 785 OpcodeToEncodingID.reserve(EncodingIDs.size()); 786 for (const auto &EI : EncodingIDs) 787 OpcodeToEncodingID[EI.Opcode] = EI.EncodingID; 788 789 OS.indent(Indentation) << "static const uint8_t DecoderTable" << Namespace 790 << BitWidth << "[] = {\n"; 791 792 Indentation += 2; 793 794 // Emit ULEB128 encoded value to OS, returning the number of bytes emitted. 795 auto emitULEB128 = [](DecoderTable::const_iterator I, 796 formatted_raw_ostream &OS) { 797 unsigned Len = 0; 798 while (*I >= 128) { 799 OS << (unsigned)*I++ << ", "; 800 Len++; 801 } 802 OS << (unsigned)*I++ << ", "; 803 return Len + 1; 804 }; 805 806 // Emit 24-bit numtoskip value to OS, returning the NumToSkip value. 807 auto emitNumToSkip = [](DecoderTable::const_iterator I, 808 formatted_raw_ostream &OS) { 809 uint8_t Byte = *I++; 810 uint32_t NumToSkip = Byte; 811 OS << (unsigned)Byte << ", "; 812 Byte = *I++; 813 OS << (unsigned)Byte << ", "; 814 NumToSkip |= Byte << 8; 815 Byte = *I++; 816 OS << utostr(Byte) << ", "; 817 NumToSkip |= Byte << 16; 818 return NumToSkip; 819 }; 820 821 // FIXME: We may be able to use the NumToSkip values to recover 822 // appropriate indentation levels. 823 DecoderTable::const_iterator I = Table.begin(); 824 DecoderTable::const_iterator E = Table.end(); 825 while (I != E) { 826 assert(I < E && "incomplete decode table entry!"); 827 828 uint64_t Pos = I - Table.begin(); 829 OS << "/* " << Pos << " */"; 830 OS.PadToColumn(12); 831 832 switch (*I) { 833 default: 834 PrintFatalError("invalid decode table opcode"); 835 case MCD::OPC_ExtractField: { 836 ++I; 837 OS.indent(Indentation) << "MCD::OPC_ExtractField, "; 838 839 // ULEB128 encoded start value. 840 const char *ErrMsg = nullptr; 841 unsigned Start = decodeULEB128(Table.data() + Pos + 1, nullptr, 842 Table.data() + Table.size(), &ErrMsg); 843 assert(ErrMsg == nullptr && "ULEB128 value too large!"); 844 I += emitULEB128(I, OS); 845 846 unsigned Len = *I++; 847 OS << Len << ", // Inst{"; 848 if (Len > 1) 849 OS << (Start + Len - 1) << "-"; 850 OS << Start << "} ...\n"; 851 break; 852 } 853 case MCD::OPC_FilterValue: { 854 ++I; 855 OS.indent(Indentation) << "MCD::OPC_FilterValue, "; 856 // The filter value is ULEB128 encoded. 857 I += emitULEB128(I, OS); 858 859 // 24-bit numtoskip value. 860 uint32_t NumToSkip = emitNumToSkip(I, OS); 861 I += 3; 862 OS << "// Skip to: " << ((I - Table.begin()) + NumToSkip) << "\n"; 863 break; 864 } 865 case MCD::OPC_CheckField: { 866 ++I; 867 OS.indent(Indentation) << "MCD::OPC_CheckField, "; 868 // ULEB128 encoded start value. 869 I += emitULEB128(I, OS); 870 // 8-bit length. 871 unsigned Len = *I++; 872 OS << Len << ", "; 873 // ULEB128 encoded field value. 874 I += emitULEB128(I, OS); 875 876 // 24-bit numtoskip value. 877 uint32_t NumToSkip = emitNumToSkip(I, OS); 878 I += 3; 879 OS << "// Skip to: " << ((I - Table.begin()) + NumToSkip) << "\n"; 880 break; 881 } 882 case MCD::OPC_CheckPredicate: { 883 ++I; 884 OS.indent(Indentation) << "MCD::OPC_CheckPredicate, "; 885 I += emitULEB128(I, OS); 886 887 // 24-bit numtoskip value. 888 uint32_t NumToSkip = emitNumToSkip(I, OS); 889 I += 3; 890 OS << "// Skip to: " << ((I - Table.begin()) + NumToSkip) << "\n"; 891 break; 892 } 893 case MCD::OPC_Decode: 894 case MCD::OPC_TryDecode: { 895 bool IsTry = *I == MCD::OPC_TryDecode; 896 ++I; 897 // Decode the Opcode value. 898 const char *ErrMsg = nullptr; 899 unsigned Opc = decodeULEB128(Table.data() + Pos + 1, nullptr, 900 Table.data() + Table.size(), &ErrMsg); 901 assert(ErrMsg == nullptr && "ULEB128 value too large!"); 902 903 OS.indent(Indentation) 904 << "MCD::OPC_" << (IsTry ? "Try" : "") << "Decode, "; 905 I += emitULEB128(I, OS); 906 907 // Decoder index. 908 I += emitULEB128(I, OS); 909 910 auto EncI = OpcodeToEncodingID.find(Opc); 911 assert(EncI != OpcodeToEncodingID.end() && "no encoding entry"); 912 auto EncodingID = EncI->second; 913 914 if (!IsTry) { 915 OS << "// Opcode: " << NumberedEncodings[EncodingID] << "\n"; 916 break; 917 } 918 919 // Fallthrough for OPC_TryDecode. 920 921 // 24-bit numtoskip value. 922 uint32_t NumToSkip = emitNumToSkip(I, OS); 923 I += 3; 924 925 OS << "// Opcode: " << NumberedEncodings[EncodingID] 926 << ", skip to: " << ((I - Table.begin()) + NumToSkip) << "\n"; 927 break; 928 } 929 case MCD::OPC_SoftFail: { 930 ++I; 931 OS.indent(Indentation) << "MCD::OPC_SoftFail"; 932 // Positive mask 933 uint64_t Value = 0; 934 unsigned Shift = 0; 935 do { 936 OS << ", " << (unsigned)*I; 937 Value += (*I & 0x7f) << Shift; 938 Shift += 7; 939 } while (*I++ >= 128); 940 if (Value > 127) { 941 OS << " /* 0x"; 942 OS.write_hex(Value); 943 OS << " */"; 944 } 945 // Negative mask 946 Value = 0; 947 Shift = 0; 948 do { 949 OS << ", " << (unsigned)*I; 950 Value += (*I & 0x7f) << Shift; 951 Shift += 7; 952 } while (*I++ >= 128); 953 if (Value > 127) { 954 OS << " /* 0x"; 955 OS.write_hex(Value); 956 OS << " */"; 957 } 958 OS << ",\n"; 959 break; 960 } 961 case MCD::OPC_Fail: { 962 ++I; 963 OS.indent(Indentation) << "MCD::OPC_Fail,\n"; 964 break; 965 } 966 } 967 } 968 OS.indent(Indentation) << "0\n"; 969 970 Indentation -= 2; 971 972 OS.indent(Indentation) << "};\n\n"; 973 } 974 975 void DecoderEmitter::emitInstrLenTable(formatted_raw_ostream &OS, 976 std::vector<unsigned> &InstrLen) const { 977 OS << "static const uint8_t InstrLenTable[] = {\n"; 978 for (unsigned &Len : InstrLen) { 979 OS << Len << ",\n"; 980 } 981 OS << "};\n\n"; 982 } 983 984 void DecoderEmitter::emitPredicateFunction(formatted_raw_ostream &OS, 985 PredicateSet &Predicates, 986 unsigned Indentation) const { 987 // The predicate function is just a big switch statement based on the 988 // input predicate index. 989 OS.indent(Indentation) << "static bool checkDecoderPredicate(unsigned Idx, " 990 << "const FeatureBitset &Bits) {\n"; 991 Indentation += 2; 992 if (!Predicates.empty()) { 993 OS.indent(Indentation) << "switch (Idx) {\n"; 994 OS.indent(Indentation) 995 << "default: llvm_unreachable(\"Invalid index!\");\n"; 996 unsigned Index = 0; 997 for (const auto &Predicate : Predicates) { 998 OS.indent(Indentation) << "case " << Index++ << ":\n"; 999 OS.indent(Indentation + 2) << "return (" << Predicate << ");\n"; 1000 } 1001 OS.indent(Indentation) << "}\n"; 1002 } else { 1003 // No case statement to emit 1004 OS.indent(Indentation) << "llvm_unreachable(\"Invalid index!\");\n"; 1005 } 1006 Indentation -= 2; 1007 OS.indent(Indentation) << "}\n\n"; 1008 } 1009 1010 void DecoderEmitter::emitDecoderFunction(formatted_raw_ostream &OS, 1011 DecoderSet &Decoders, 1012 unsigned Indentation) const { 1013 // The decoder function is just a big switch statement based on the 1014 // input decoder index. 1015 OS.indent(Indentation) << "template <typename InsnType>\n"; 1016 OS.indent(Indentation) << "static DecodeStatus decodeToMCInst(DecodeStatus S," 1017 << " unsigned Idx, InsnType insn, MCInst &MI,\n"; 1018 OS.indent(Indentation) 1019 << " uint64_t " 1020 << "Address, const MCDisassembler *Decoder, bool &DecodeComplete) {\n"; 1021 Indentation += 2; 1022 OS.indent(Indentation) << "DecodeComplete = true;\n"; 1023 // TODO: When InsnType is large, using uint64_t limits all fields to 64 bits 1024 // It would be better for emitBinaryParser to use a 64-bit tmp whenever 1025 // possible but fall back to an InsnType-sized tmp for truly large fields. 1026 OS.indent(Indentation) << "using TmpType = " 1027 "std::conditional_t<std::is_integral<InsnType>::" 1028 "value, InsnType, uint64_t>;\n"; 1029 OS.indent(Indentation) << "TmpType tmp;\n"; 1030 OS.indent(Indentation) << "switch (Idx) {\n"; 1031 OS.indent(Indentation) << "default: llvm_unreachable(\"Invalid index!\");\n"; 1032 unsigned Index = 0; 1033 for (const auto &Decoder : Decoders) { 1034 OS.indent(Indentation) << "case " << Index++ << ":\n"; 1035 OS << Decoder; 1036 OS.indent(Indentation + 2) << "return S;\n"; 1037 } 1038 OS.indent(Indentation) << "}\n"; 1039 Indentation -= 2; 1040 OS.indent(Indentation) << "}\n"; 1041 } 1042 1043 // Populates the field of the insn given the start position and the number of 1044 // consecutive bits to scan for. 1045 // 1046 // Returns a pair of values (indicator, field), where the indicator is false 1047 // if there exists any uninitialized bit value in the range and true if all 1048 // bits are well-known. The second value is the potentially populated field. 1049 std::pair<bool, uint64_t> FilterChooser::fieldFromInsn(const insn_t &Insn, 1050 unsigned StartBit, 1051 unsigned NumBits) const { 1052 uint64_t Field = 0; 1053 1054 for (unsigned i = 0; i < NumBits; ++i) { 1055 if (Insn[StartBit + i] == BIT_UNSET) 1056 return {false, Field}; 1057 1058 if (Insn[StartBit + i] == BIT_TRUE) 1059 Field = Field | (1ULL << i); 1060 } 1061 1062 return {true, Field}; 1063 } 1064 1065 /// dumpFilterArray - dumpFilterArray prints out debugging info for the given 1066 /// filter array as a series of chars. 1067 void FilterChooser::dumpFilterArray( 1068 raw_ostream &o, const std::vector<bit_value_t> &filter) const { 1069 for (unsigned bitIndex = BitWidth; bitIndex > 0; bitIndex--) { 1070 switch (filter[bitIndex - 1]) { 1071 case BIT_UNFILTERED: 1072 o << "."; 1073 break; 1074 case BIT_UNSET: 1075 o << "_"; 1076 break; 1077 case BIT_TRUE: 1078 o << "1"; 1079 break; 1080 case BIT_FALSE: 1081 o << "0"; 1082 break; 1083 } 1084 } 1085 } 1086 1087 /// dumpStack - dumpStack traverses the filter chooser chain and calls 1088 /// dumpFilterArray on each filter chooser up to the top level one. 1089 void FilterChooser::dumpStack(raw_ostream &o, const char *prefix) const { 1090 const FilterChooser *current = this; 1091 1092 while (current) { 1093 o << prefix; 1094 dumpFilterArray(o, current->FilterBitValues); 1095 o << '\n'; 1096 current = current->Parent; 1097 } 1098 } 1099 1100 // Calculates the island(s) needed to decode the instruction. 1101 // This returns a list of undecoded bits of an instructions, for example, 1102 // Inst{20} = 1 && Inst{3-0} == 0b1111 represents two islands of yet-to-be 1103 // decoded bits in order to verify that the instruction matches the Opcode. 1104 unsigned FilterChooser::getIslands(std::vector<unsigned> &StartBits, 1105 std::vector<unsigned> &EndBits, 1106 std::vector<uint64_t> &FieldVals, 1107 const insn_t &Insn) const { 1108 unsigned Num, BitNo; 1109 Num = BitNo = 0; 1110 1111 uint64_t FieldVal = 0; 1112 1113 // 0: Init 1114 // 1: Water (the bit value does not affect decoding) 1115 // 2: Island (well-known bit value needed for decoding) 1116 int State = 0; 1117 1118 for (unsigned i = 0; i < BitWidth; ++i) { 1119 int64_t Val = Value(Insn[i]); 1120 bool Filtered = PositionFiltered(i); 1121 switch (State) { 1122 default: 1123 llvm_unreachable("Unreachable code!"); 1124 case 0: 1125 case 1: 1126 if (Filtered || Val == -1) 1127 State = 1; // Still in Water 1128 else { 1129 State = 2; // Into the Island 1130 BitNo = 0; 1131 StartBits.push_back(i); 1132 FieldVal = Val; 1133 } 1134 break; 1135 case 2: 1136 if (Filtered || Val == -1) { 1137 State = 1; // Into the Water 1138 EndBits.push_back(i - 1); 1139 FieldVals.push_back(FieldVal); 1140 ++Num; 1141 } else { 1142 State = 2; // Still in Island 1143 ++BitNo; 1144 FieldVal = FieldVal | Val << BitNo; 1145 } 1146 break; 1147 } 1148 } 1149 // If we are still in Island after the loop, do some housekeeping. 1150 if (State == 2) { 1151 EndBits.push_back(BitWidth - 1); 1152 FieldVals.push_back(FieldVal); 1153 ++Num; 1154 } 1155 1156 assert(StartBits.size() == Num && EndBits.size() == Num && 1157 FieldVals.size() == Num); 1158 return Num; 1159 } 1160 1161 void FilterChooser::emitBinaryParser(raw_ostream &o, unsigned &Indentation, 1162 const OperandInfo &OpInfo, 1163 bool &OpHasCompleteDecoder) const { 1164 const std::string &Decoder = OpInfo.Decoder; 1165 1166 bool UseInsertBits = OpInfo.numFields() != 1 || OpInfo.InitValue != 0; 1167 1168 if (UseInsertBits) { 1169 o.indent(Indentation) << "tmp = 0x"; 1170 o.write_hex(OpInfo.InitValue); 1171 o << ";\n"; 1172 } 1173 1174 for (const EncodingField &EF : OpInfo) { 1175 o.indent(Indentation); 1176 if (UseInsertBits) 1177 o << "insertBits(tmp, "; 1178 else 1179 o << "tmp = "; 1180 o << "fieldFromInstruction(insn, " << EF.Base << ", " << EF.Width << ')'; 1181 if (UseInsertBits) 1182 o << ", " << EF.Offset << ", " << EF.Width << ')'; 1183 else if (EF.Offset != 0) 1184 o << " << " << EF.Offset; 1185 o << ";\n"; 1186 } 1187 1188 if (Decoder != "") { 1189 OpHasCompleteDecoder = OpInfo.HasCompleteDecoder; 1190 o.indent(Indentation) << "if (!Check(S, " << Decoder 1191 << "(MI, tmp, Address, Decoder))) { " 1192 << (OpHasCompleteDecoder ? "" 1193 : "DecodeComplete = false; ") 1194 << "return MCDisassembler::Fail; }\n"; 1195 } else { 1196 OpHasCompleteDecoder = true; 1197 o.indent(Indentation) << "MI.addOperand(MCOperand::createImm(tmp));\n"; 1198 } 1199 } 1200 1201 void FilterChooser::emitDecoder(raw_ostream &OS, unsigned Indentation, 1202 unsigned Opc, bool &HasCompleteDecoder) const { 1203 HasCompleteDecoder = true; 1204 1205 for (const auto &Op : Operands.find(Opc)->second) { 1206 // If a custom instruction decoder was specified, use that. 1207 if (Op.numFields() == 0 && !Op.Decoder.empty()) { 1208 HasCompleteDecoder = Op.HasCompleteDecoder; 1209 OS.indent(Indentation) 1210 << "if (!Check(S, " << Op.Decoder 1211 << "(MI, insn, Address, Decoder))) { " 1212 << (HasCompleteDecoder ? "" : "DecodeComplete = false; ") 1213 << "return MCDisassembler::Fail; }\n"; 1214 break; 1215 } 1216 1217 bool OpHasCompleteDecoder; 1218 emitBinaryParser(OS, Indentation, Op, OpHasCompleteDecoder); 1219 if (!OpHasCompleteDecoder) 1220 HasCompleteDecoder = false; 1221 } 1222 } 1223 1224 unsigned FilterChooser::getDecoderIndex(DecoderSet &Decoders, unsigned Opc, 1225 bool &HasCompleteDecoder) const { 1226 // Build up the predicate string. 1227 SmallString<256> Decoder; 1228 // FIXME: emitDecoder() function can take a buffer directly rather than 1229 // a stream. 1230 raw_svector_ostream S(Decoder); 1231 unsigned I = 4; 1232 emitDecoder(S, I, Opc, HasCompleteDecoder); 1233 1234 // Using the full decoder string as the key value here is a bit 1235 // heavyweight, but is effective. If the string comparisons become a 1236 // performance concern, we can implement a mangling of the predicate 1237 // data easily enough with a map back to the actual string. That's 1238 // overkill for now, though. 1239 1240 // Make sure the predicate is in the table. 1241 Decoders.insert(CachedHashString(Decoder)); 1242 // Now figure out the index for when we write out the table. 1243 DecoderSet::const_iterator P = find(Decoders, Decoder.str()); 1244 return (unsigned)(P - Decoders.begin()); 1245 } 1246 1247 // If ParenIfBinOp is true, print a surrounding () if Val uses && or ||. 1248 bool FilterChooser::emitPredicateMatchAux(const Init &Val, bool ParenIfBinOp, 1249 raw_ostream &OS) const { 1250 if (const auto *D = dyn_cast<DefInit>(&Val)) { 1251 if (!D->getDef()->isSubClassOf("SubtargetFeature")) 1252 return true; 1253 OS << "Bits[" << Emitter->PredicateNamespace << "::" << D->getAsString() 1254 << "]"; 1255 return false; 1256 } 1257 if (const auto *D = dyn_cast<DagInit>(&Val)) { 1258 std::string Op = D->getOperator()->getAsString(); 1259 if (Op == "not" && D->getNumArgs() == 1) { 1260 OS << '!'; 1261 return emitPredicateMatchAux(*D->getArg(0), true, OS); 1262 } 1263 if ((Op == "any_of" || Op == "all_of") && D->getNumArgs() > 0) { 1264 bool Paren = D->getNumArgs() > 1 && std::exchange(ParenIfBinOp, true); 1265 if (Paren) 1266 OS << '('; 1267 ListSeparator LS(Op == "any_of" ? " || " : " && "); 1268 for (auto *Arg : D->getArgs()) { 1269 OS << LS; 1270 if (emitPredicateMatchAux(*Arg, ParenIfBinOp, OS)) 1271 return true; 1272 } 1273 if (Paren) 1274 OS << ')'; 1275 return false; 1276 } 1277 } 1278 return true; 1279 } 1280 1281 bool FilterChooser::emitPredicateMatch(raw_ostream &o, unsigned &Indentation, 1282 unsigned Opc) const { 1283 ListInit *Predicates = 1284 AllInstructions[Opc].EncodingDef->getValueAsListInit("Predicates"); 1285 bool IsFirstEmission = true; 1286 for (unsigned i = 0; i < Predicates->size(); ++i) { 1287 Record *Pred = Predicates->getElementAsRecord(i); 1288 if (!Pred->getValue("AssemblerMatcherPredicate")) 1289 continue; 1290 1291 if (!isa<DagInit>(Pred->getValue("AssemblerCondDag")->getValue())) 1292 continue; 1293 1294 if (!IsFirstEmission) 1295 o << " && "; 1296 if (emitPredicateMatchAux(*Pred->getValueAsDag("AssemblerCondDag"), 1297 Predicates->size() > 1, o)) 1298 PrintFatalError(Pred->getLoc(), "Invalid AssemblerCondDag!"); 1299 IsFirstEmission = false; 1300 } 1301 return !Predicates->empty(); 1302 } 1303 1304 bool FilterChooser::doesOpcodeNeedPredicate(unsigned Opc) const { 1305 ListInit *Predicates = 1306 AllInstructions[Opc].EncodingDef->getValueAsListInit("Predicates"); 1307 for (unsigned i = 0; i < Predicates->size(); ++i) { 1308 Record *Pred = Predicates->getElementAsRecord(i); 1309 if (!Pred->getValue("AssemblerMatcherPredicate")) 1310 continue; 1311 1312 if (isa<DagInit>(Pred->getValue("AssemblerCondDag")->getValue())) 1313 return true; 1314 } 1315 return false; 1316 } 1317 1318 unsigned FilterChooser::getPredicateIndex(DecoderTableInfo &TableInfo, 1319 StringRef Predicate) const { 1320 // Using the full predicate string as the key value here is a bit 1321 // heavyweight, but is effective. If the string comparisons become a 1322 // performance concern, we can implement a mangling of the predicate 1323 // data easily enough with a map back to the actual string. That's 1324 // overkill for now, though. 1325 1326 // Make sure the predicate is in the table. 1327 TableInfo.Predicates.insert(CachedHashString(Predicate)); 1328 // Now figure out the index for when we write out the table. 1329 PredicateSet::const_iterator P = find(TableInfo.Predicates, Predicate); 1330 return (unsigned)(P - TableInfo.Predicates.begin()); 1331 } 1332 1333 void FilterChooser::emitPredicateTableEntry(DecoderTableInfo &TableInfo, 1334 unsigned Opc) const { 1335 if (!doesOpcodeNeedPredicate(Opc)) 1336 return; 1337 1338 // Build up the predicate string. 1339 SmallString<256> Predicate; 1340 // FIXME: emitPredicateMatch() functions can take a buffer directly rather 1341 // than a stream. 1342 raw_svector_ostream PS(Predicate); 1343 unsigned I = 0; 1344 emitPredicateMatch(PS, I, Opc); 1345 1346 // Figure out the index into the predicate table for the predicate just 1347 // computed. 1348 unsigned PIdx = getPredicateIndex(TableInfo, PS.str()); 1349 SmallString<16> PBytes; 1350 raw_svector_ostream S(PBytes); 1351 encodeULEB128(PIdx, S); 1352 1353 TableInfo.Table.push_back(MCD::OPC_CheckPredicate); 1354 // Predicate index. 1355 for (const auto PB : PBytes) 1356 TableInfo.Table.push_back(PB); 1357 // Push location for NumToSkip backpatching. 1358 TableInfo.FixupStack.back().push_back(TableInfo.Table.size()); 1359 TableInfo.Table.push_back(0); 1360 TableInfo.Table.push_back(0); 1361 TableInfo.Table.push_back(0); 1362 } 1363 1364 void FilterChooser::emitSoftFailTableEntry(DecoderTableInfo &TableInfo, 1365 unsigned Opc) const { 1366 const Record *EncodingDef = AllInstructions[Opc].EncodingDef; 1367 const RecordVal *RV = EncodingDef->getValue("SoftFail"); 1368 BitsInit *SFBits = RV ? dyn_cast<BitsInit>(RV->getValue()) : nullptr; 1369 1370 if (!SFBits) 1371 return; 1372 BitsInit *InstBits = EncodingDef->getValueAsBitsInit("Inst"); 1373 1374 APInt PositiveMask(BitWidth, 0ULL); 1375 APInt NegativeMask(BitWidth, 0ULL); 1376 for (unsigned i = 0; i < BitWidth; ++i) { 1377 bit_value_t B = bitFromBits(*SFBits, i); 1378 bit_value_t IB = bitFromBits(*InstBits, i); 1379 1380 if (B != BIT_TRUE) 1381 continue; 1382 1383 switch (IB) { 1384 case BIT_FALSE: 1385 // The bit is meant to be false, so emit a check to see if it is true. 1386 PositiveMask.setBit(i); 1387 break; 1388 case BIT_TRUE: 1389 // The bit is meant to be true, so emit a check to see if it is false. 1390 NegativeMask.setBit(i); 1391 break; 1392 default: 1393 // The bit is not set; this must be an error! 1394 errs() << "SoftFail Conflict: bit SoftFail{" << i << "} in " 1395 << AllInstructions[Opc] << " is set but Inst{" << i 1396 << "} is unset!\n" 1397 << " - You can only mark a bit as SoftFail if it is fully defined" 1398 << " (1/0 - not '?') in Inst\n"; 1399 return; 1400 } 1401 } 1402 1403 bool NeedPositiveMask = PositiveMask.getBoolValue(); 1404 bool NeedNegativeMask = NegativeMask.getBoolValue(); 1405 1406 if (!NeedPositiveMask && !NeedNegativeMask) 1407 return; 1408 1409 TableInfo.Table.push_back(MCD::OPC_SoftFail); 1410 1411 SmallString<16> MaskBytes; 1412 raw_svector_ostream S(MaskBytes); 1413 if (NeedPositiveMask) { 1414 encodeULEB128(PositiveMask.getZExtValue(), S); 1415 for (unsigned i = 0, e = MaskBytes.size(); i != e; ++i) 1416 TableInfo.Table.push_back(MaskBytes[i]); 1417 } else 1418 TableInfo.Table.push_back(0); 1419 if (NeedNegativeMask) { 1420 MaskBytes.clear(); 1421 encodeULEB128(NegativeMask.getZExtValue(), S); 1422 for (unsigned i = 0, e = MaskBytes.size(); i != e; ++i) 1423 TableInfo.Table.push_back(MaskBytes[i]); 1424 } else 1425 TableInfo.Table.push_back(0); 1426 } 1427 1428 // Emits table entries to decode the singleton. 1429 void FilterChooser::emitSingletonTableEntry(DecoderTableInfo &TableInfo, 1430 EncodingIDAndOpcode Opc) const { 1431 std::vector<unsigned> StartBits; 1432 std::vector<unsigned> EndBits; 1433 std::vector<uint64_t> FieldVals; 1434 insn_t Insn; 1435 insnWithID(Insn, Opc.EncodingID); 1436 1437 // Look for islands of undecoded bits of the singleton. 1438 getIslands(StartBits, EndBits, FieldVals, Insn); 1439 1440 unsigned Size = StartBits.size(); 1441 1442 // Emit the predicate table entry if one is needed. 1443 emitPredicateTableEntry(TableInfo, Opc.EncodingID); 1444 1445 // Check any additional encoding fields needed. 1446 for (unsigned I = Size; I != 0; --I) { 1447 unsigned NumBits = EndBits[I - 1] - StartBits[I - 1] + 1; 1448 assert((NumBits < (1u << 8)) && "NumBits overflowed uint8 table entry!"); 1449 TableInfo.Table.push_back(MCD::OPC_CheckField); 1450 uint8_t Buffer[16], *P; 1451 encodeULEB128(StartBits[I - 1], Buffer); 1452 for (P = Buffer; *P >= 128; ++P) 1453 TableInfo.Table.push_back(*P); 1454 TableInfo.Table.push_back(*P); 1455 TableInfo.Table.push_back(NumBits); 1456 encodeULEB128(FieldVals[I - 1], Buffer); 1457 for (P = Buffer; *P >= 128; ++P) 1458 TableInfo.Table.push_back(*P); 1459 TableInfo.Table.push_back(*P); 1460 // Push location for NumToSkip backpatching. 1461 TableInfo.FixupStack.back().push_back(TableInfo.Table.size()); 1462 // The fixup is always 24-bits, so go ahead and allocate the space 1463 // in the table so all our relative position calculations work OK even 1464 // before we fully resolve the real value here. 1465 TableInfo.Table.push_back(0); 1466 TableInfo.Table.push_back(0); 1467 TableInfo.Table.push_back(0); 1468 } 1469 1470 // Check for soft failure of the match. 1471 emitSoftFailTableEntry(TableInfo, Opc.EncodingID); 1472 1473 bool HasCompleteDecoder; 1474 unsigned DIdx = 1475 getDecoderIndex(TableInfo.Decoders, Opc.EncodingID, HasCompleteDecoder); 1476 1477 // Produce OPC_Decode or OPC_TryDecode opcode based on the information 1478 // whether the instruction decoder is complete or not. If it is complete 1479 // then it handles all possible values of remaining variable/unfiltered bits 1480 // and for any value can determine if the bitpattern is a valid instruction 1481 // or not. This means OPC_Decode will be the final step in the decoding 1482 // process. If it is not complete, then the Fail return code from the 1483 // decoder method indicates that additional processing should be done to see 1484 // if there is any other instruction that also matches the bitpattern and 1485 // can decode it. 1486 TableInfo.Table.push_back(HasCompleteDecoder ? MCD::OPC_Decode 1487 : MCD::OPC_TryDecode); 1488 NumEncodingsSupported++; 1489 uint8_t Buffer[16], *p; 1490 encodeULEB128(Opc.Opcode, Buffer); 1491 for (p = Buffer; *p >= 128; ++p) 1492 TableInfo.Table.push_back(*p); 1493 TableInfo.Table.push_back(*p); 1494 1495 SmallString<16> Bytes; 1496 raw_svector_ostream S(Bytes); 1497 encodeULEB128(DIdx, S); 1498 1499 // Decoder index. 1500 for (const auto B : Bytes) 1501 TableInfo.Table.push_back(B); 1502 1503 if (!HasCompleteDecoder) { 1504 // Push location for NumToSkip backpatching. 1505 TableInfo.FixupStack.back().push_back(TableInfo.Table.size()); 1506 // Allocate the space for the fixup. 1507 TableInfo.Table.push_back(0); 1508 TableInfo.Table.push_back(0); 1509 TableInfo.Table.push_back(0); 1510 } 1511 } 1512 1513 // Emits table entries to decode the singleton, and then to decode the rest. 1514 void FilterChooser::emitSingletonTableEntry(DecoderTableInfo &TableInfo, 1515 const Filter &Best) const { 1516 EncodingIDAndOpcode Opc = Best.getSingletonOpc(); 1517 1518 // complex singletons need predicate checks from the first singleton 1519 // to refer forward to the variable filterchooser that follows. 1520 TableInfo.FixupStack.emplace_back(); 1521 1522 emitSingletonTableEntry(TableInfo, Opc); 1523 1524 resolveTableFixups(TableInfo.Table, TableInfo.FixupStack.back(), 1525 TableInfo.Table.size()); 1526 TableInfo.FixupStack.pop_back(); 1527 1528 Best.getVariableFC().emitTableEntries(TableInfo); 1529 } 1530 1531 // Assign a single filter and run with it. Top level API client can initialize 1532 // with a single filter to start the filtering process. 1533 void FilterChooser::runSingleFilter(unsigned startBit, unsigned numBit, 1534 bool mixed) { 1535 Filters.clear(); 1536 Filters.emplace_back(*this, startBit, numBit, true); 1537 BestIndex = 0; // Sole Filter instance to choose from. 1538 bestFilter().recurse(); 1539 } 1540 1541 // reportRegion is a helper function for filterProcessor to mark a region as 1542 // eligible for use as a filter region. 1543 void FilterChooser::reportRegion(bitAttr_t RA, unsigned StartBit, 1544 unsigned BitIndex, bool AllowMixed) { 1545 if (RA == ATTR_MIXED && AllowMixed) 1546 Filters.emplace_back(*this, StartBit, BitIndex - StartBit, true); 1547 else if (RA == ATTR_ALL_SET && !AllowMixed) 1548 Filters.emplace_back(*this, StartBit, BitIndex - StartBit, false); 1549 } 1550 1551 // FilterProcessor scans the well-known encoding bits of the instructions and 1552 // builds up a list of candidate filters. It chooses the best filter and 1553 // recursively descends down the decoding tree. 1554 bool FilterChooser::filterProcessor(bool AllowMixed, bool Greedy) { 1555 Filters.clear(); 1556 BestIndex = -1; 1557 unsigned numInstructions = Opcodes.size(); 1558 1559 assert(numInstructions && "Filter created with no instructions"); 1560 1561 // No further filtering is necessary. 1562 if (numInstructions == 1) 1563 return true; 1564 1565 // Heuristics. See also doFilter()'s "Heuristics" comment when num of 1566 // instructions is 3. 1567 if (AllowMixed && !Greedy) { 1568 assert(numInstructions == 3); 1569 1570 for (const auto &Opcode : Opcodes) { 1571 std::vector<unsigned> StartBits; 1572 std::vector<unsigned> EndBits; 1573 std::vector<uint64_t> FieldVals; 1574 insn_t Insn; 1575 1576 insnWithID(Insn, Opcode.EncodingID); 1577 1578 // Look for islands of undecoded bits of any instruction. 1579 if (getIslands(StartBits, EndBits, FieldVals, Insn) > 0) { 1580 // Found an instruction with island(s). Now just assign a filter. 1581 runSingleFilter(StartBits[0], EndBits[0] - StartBits[0] + 1, true); 1582 return true; 1583 } 1584 } 1585 } 1586 1587 unsigned BitIndex; 1588 1589 // We maintain BIT_WIDTH copies of the bitAttrs automaton. 1590 // The automaton consumes the corresponding bit from each 1591 // instruction. 1592 // 1593 // Input symbols: 0, 1, and _ (unset). 1594 // States: NONE, FILTERED, ALL_SET, ALL_UNSET, and MIXED. 1595 // Initial state: NONE. 1596 // 1597 // (NONE) ------- [01] -> (ALL_SET) 1598 // (NONE) ------- _ ----> (ALL_UNSET) 1599 // (ALL_SET) ---- [01] -> (ALL_SET) 1600 // (ALL_SET) ---- _ ----> (MIXED) 1601 // (ALL_UNSET) -- [01] -> (MIXED) 1602 // (ALL_UNSET) -- _ ----> (ALL_UNSET) 1603 // (MIXED) ------ . ----> (MIXED) 1604 // (FILTERED)---- . ----> (FILTERED) 1605 1606 std::vector<bitAttr_t> bitAttrs; 1607 1608 // FILTERED bit positions provide no entropy and are not worthy of pursuing. 1609 // Filter::recurse() set either BIT_TRUE or BIT_FALSE for each position. 1610 for (BitIndex = 0; BitIndex < BitWidth; ++BitIndex) 1611 if (FilterBitValues[BitIndex] == BIT_TRUE || 1612 FilterBitValues[BitIndex] == BIT_FALSE) 1613 bitAttrs.push_back(ATTR_FILTERED); 1614 else 1615 bitAttrs.push_back(ATTR_NONE); 1616 1617 for (const auto &OpcPair : Opcodes) { 1618 insn_t insn; 1619 1620 insnWithID(insn, OpcPair.EncodingID); 1621 1622 for (BitIndex = 0; BitIndex < BitWidth; ++BitIndex) { 1623 switch (bitAttrs[BitIndex]) { 1624 case ATTR_NONE: 1625 if (insn[BitIndex] == BIT_UNSET) 1626 bitAttrs[BitIndex] = ATTR_ALL_UNSET; 1627 else 1628 bitAttrs[BitIndex] = ATTR_ALL_SET; 1629 break; 1630 case ATTR_ALL_SET: 1631 if (insn[BitIndex] == BIT_UNSET) 1632 bitAttrs[BitIndex] = ATTR_MIXED; 1633 break; 1634 case ATTR_ALL_UNSET: 1635 if (insn[BitIndex] != BIT_UNSET) 1636 bitAttrs[BitIndex] = ATTR_MIXED; 1637 break; 1638 case ATTR_MIXED: 1639 case ATTR_FILTERED: 1640 break; 1641 } 1642 } 1643 } 1644 1645 // The regionAttr automaton consumes the bitAttrs automatons' state, 1646 // lowest-to-highest. 1647 // 1648 // Input symbols: F(iltered), (all_)S(et), (all_)U(nset), M(ixed) 1649 // States: NONE, ALL_SET, MIXED 1650 // Initial state: NONE 1651 // 1652 // (NONE) ----- F --> (NONE) 1653 // (NONE) ----- S --> (ALL_SET) ; and set region start 1654 // (NONE) ----- U --> (NONE) 1655 // (NONE) ----- M --> (MIXED) ; and set region start 1656 // (ALL_SET) -- F --> (NONE) ; and report an ALL_SET region 1657 // (ALL_SET) -- S --> (ALL_SET) 1658 // (ALL_SET) -- U --> (NONE) ; and report an ALL_SET region 1659 // (ALL_SET) -- M --> (MIXED) ; and report an ALL_SET region 1660 // (MIXED) ---- F --> (NONE) ; and report a MIXED region 1661 // (MIXED) ---- S --> (ALL_SET) ; and report a MIXED region 1662 // (MIXED) ---- U --> (NONE) ; and report a MIXED region 1663 // (MIXED) ---- M --> (MIXED) 1664 1665 bitAttr_t RA = ATTR_NONE; 1666 unsigned StartBit = 0; 1667 1668 for (BitIndex = 0; BitIndex < BitWidth; ++BitIndex) { 1669 bitAttr_t bitAttr = bitAttrs[BitIndex]; 1670 1671 assert(bitAttr != ATTR_NONE && "Bit without attributes"); 1672 1673 switch (RA) { 1674 case ATTR_NONE: 1675 switch (bitAttr) { 1676 case ATTR_FILTERED: 1677 break; 1678 case ATTR_ALL_SET: 1679 StartBit = BitIndex; 1680 RA = ATTR_ALL_SET; 1681 break; 1682 case ATTR_ALL_UNSET: 1683 break; 1684 case ATTR_MIXED: 1685 StartBit = BitIndex; 1686 RA = ATTR_MIXED; 1687 break; 1688 default: 1689 llvm_unreachable("Unexpected bitAttr!"); 1690 } 1691 break; 1692 case ATTR_ALL_SET: 1693 switch (bitAttr) { 1694 case ATTR_FILTERED: 1695 reportRegion(RA, StartBit, BitIndex, AllowMixed); 1696 RA = ATTR_NONE; 1697 break; 1698 case ATTR_ALL_SET: 1699 break; 1700 case ATTR_ALL_UNSET: 1701 reportRegion(RA, StartBit, BitIndex, AllowMixed); 1702 RA = ATTR_NONE; 1703 break; 1704 case ATTR_MIXED: 1705 reportRegion(RA, StartBit, BitIndex, AllowMixed); 1706 StartBit = BitIndex; 1707 RA = ATTR_MIXED; 1708 break; 1709 default: 1710 llvm_unreachable("Unexpected bitAttr!"); 1711 } 1712 break; 1713 case ATTR_MIXED: 1714 switch (bitAttr) { 1715 case ATTR_FILTERED: 1716 reportRegion(RA, StartBit, BitIndex, AllowMixed); 1717 StartBit = BitIndex; 1718 RA = ATTR_NONE; 1719 break; 1720 case ATTR_ALL_SET: 1721 reportRegion(RA, StartBit, BitIndex, AllowMixed); 1722 StartBit = BitIndex; 1723 RA = ATTR_ALL_SET; 1724 break; 1725 case ATTR_ALL_UNSET: 1726 reportRegion(RA, StartBit, BitIndex, AllowMixed); 1727 RA = ATTR_NONE; 1728 break; 1729 case ATTR_MIXED: 1730 break; 1731 default: 1732 llvm_unreachable("Unexpected bitAttr!"); 1733 } 1734 break; 1735 case ATTR_ALL_UNSET: 1736 llvm_unreachable("regionAttr state machine has no ATTR_UNSET state"); 1737 case ATTR_FILTERED: 1738 llvm_unreachable("regionAttr state machine has no ATTR_FILTERED state"); 1739 } 1740 } 1741 1742 // At the end, if we're still in ALL_SET or MIXED states, report a region 1743 switch (RA) { 1744 case ATTR_NONE: 1745 break; 1746 case ATTR_FILTERED: 1747 break; 1748 case ATTR_ALL_SET: 1749 reportRegion(RA, StartBit, BitIndex, AllowMixed); 1750 break; 1751 case ATTR_ALL_UNSET: 1752 break; 1753 case ATTR_MIXED: 1754 reportRegion(RA, StartBit, BitIndex, AllowMixed); 1755 break; 1756 } 1757 1758 // We have finished with the filter processings. Now it's time to choose 1759 // the best performing filter. 1760 BestIndex = 0; 1761 bool AllUseless = true; 1762 unsigned BestScore = 0; 1763 1764 for (const auto &[Idx, Filter] : enumerate(Filters)) { 1765 unsigned Usefulness = Filter.usefulness(); 1766 1767 if (Usefulness) 1768 AllUseless = false; 1769 1770 if (Usefulness > BestScore) { 1771 BestIndex = Idx; 1772 BestScore = Usefulness; 1773 } 1774 } 1775 1776 if (!AllUseless) 1777 bestFilter().recurse(); 1778 1779 return !AllUseless; 1780 } // end of FilterChooser::filterProcessor(bool) 1781 1782 // Decides on the best configuration of filter(s) to use in order to decode 1783 // the instructions. A conflict of instructions may occur, in which case we 1784 // dump the conflict set to the standard error. 1785 void FilterChooser::doFilter() { 1786 unsigned Num = Opcodes.size(); 1787 assert(Num && "FilterChooser created with no instructions"); 1788 1789 // Try regions of consecutive known bit values first. 1790 if (filterProcessor(false)) 1791 return; 1792 1793 // Then regions of mixed bits (both known and unitialized bit values allowed). 1794 if (filterProcessor(true)) 1795 return; 1796 1797 // Heuristics to cope with conflict set {t2CMPrs, t2SUBSrr, t2SUBSrs} where 1798 // no single instruction for the maximum ATTR_MIXED region Inst{14-4} has a 1799 // well-known encoding pattern. In such case, we backtrack and scan for the 1800 // the very first consecutive ATTR_ALL_SET region and assign a filter to it. 1801 if (Num == 3 && filterProcessor(true, false)) 1802 return; 1803 1804 // If we come to here, the instruction decoding has failed. 1805 // Set the BestIndex to -1 to indicate so. 1806 BestIndex = -1; 1807 } 1808 1809 // emitTableEntries - Emit state machine entries to decode our share of 1810 // instructions. 1811 void FilterChooser::emitTableEntries(DecoderTableInfo &TableInfo) const { 1812 if (Opcodes.size() == 1) { 1813 // There is only one instruction in the set, which is great! 1814 // Call emitSingletonDecoder() to see whether there are any remaining 1815 // encodings bits. 1816 emitSingletonTableEntry(TableInfo, Opcodes[0]); 1817 return; 1818 } 1819 1820 // Choose the best filter to do the decodings! 1821 if (BestIndex != -1) { 1822 const Filter &Best = Filters[BestIndex]; 1823 if (Best.getNumFiltered() == 1) 1824 emitSingletonTableEntry(TableInfo, Best); 1825 else 1826 Best.emitTableEntry(TableInfo); 1827 return; 1828 } 1829 1830 // We don't know how to decode these instructions! Dump the 1831 // conflict set and bail. 1832 1833 // Print out useful conflict information for postmortem analysis. 1834 errs() << "Decoding Conflict:\n"; 1835 1836 dumpStack(errs(), "\t\t"); 1837 1838 for (auto Opcode : Opcodes) { 1839 errs() << '\t'; 1840 emitNameWithID(errs(), Opcode.EncodingID); 1841 errs() << " "; 1842 dumpBits( 1843 errs(), 1844 getBitsField(*AllInstructions[Opcode.EncodingID].EncodingDef, "Inst")); 1845 errs() << '\n'; 1846 } 1847 } 1848 1849 static std::string findOperandDecoderMethod(Record *Record) { 1850 std::string Decoder; 1851 1852 RecordVal *DecoderString = Record->getValue("DecoderMethod"); 1853 StringInit *String = 1854 DecoderString ? dyn_cast<StringInit>(DecoderString->getValue()) : nullptr; 1855 if (String) { 1856 Decoder = std::string(String->getValue()); 1857 if (!Decoder.empty()) 1858 return Decoder; 1859 } 1860 1861 if (Record->isSubClassOf("RegisterOperand")) 1862 Record = Record->getValueAsDef("RegClass"); 1863 1864 if (Record->isSubClassOf("RegisterClass")) { 1865 Decoder = "Decode" + Record->getName().str() + "RegisterClass"; 1866 } else if (Record->isSubClassOf("PointerLikeRegClass")) { 1867 Decoder = "DecodePointerLikeRegClass" + 1868 utostr(Record->getValueAsInt("RegClassKind")); 1869 } 1870 1871 return Decoder; 1872 } 1873 1874 OperandInfo getOpInfo(Record *TypeRecord) { 1875 std::string Decoder = findOperandDecoderMethod(TypeRecord); 1876 1877 RecordVal *HasCompleteDecoderVal = TypeRecord->getValue("hasCompleteDecoder"); 1878 BitInit *HasCompleteDecoderBit = 1879 HasCompleteDecoderVal 1880 ? dyn_cast<BitInit>(HasCompleteDecoderVal->getValue()) 1881 : nullptr; 1882 bool HasCompleteDecoder = 1883 HasCompleteDecoderBit ? HasCompleteDecoderBit->getValue() : true; 1884 1885 return OperandInfo(Decoder, HasCompleteDecoder); 1886 } 1887 1888 void parseVarLenInstOperand(const Record &Def, 1889 std::vector<OperandInfo> &Operands, 1890 const CodeGenInstruction &CGI) { 1891 1892 const RecordVal *RV = Def.getValue("Inst"); 1893 VarLenInst VLI(cast<DagInit>(RV->getValue()), RV); 1894 SmallVector<int> TiedTo; 1895 1896 for (const auto &[Idx, Op] : enumerate(CGI.Operands)) { 1897 if (Op.MIOperandInfo && Op.MIOperandInfo->getNumArgs() > 0) 1898 for (auto *Arg : Op.MIOperandInfo->getArgs()) 1899 Operands.push_back(getOpInfo(cast<DefInit>(Arg)->getDef())); 1900 else 1901 Operands.push_back(getOpInfo(Op.Rec)); 1902 1903 int TiedReg = Op.getTiedRegister(); 1904 TiedTo.push_back(-1); 1905 if (TiedReg != -1) { 1906 TiedTo[Idx] = TiedReg; 1907 TiedTo[TiedReg] = Idx; 1908 } 1909 } 1910 1911 unsigned CurrBitPos = 0; 1912 for (const auto &EncodingSegment : VLI) { 1913 unsigned Offset = 0; 1914 StringRef OpName; 1915 1916 if (const StringInit *SI = dyn_cast<StringInit>(EncodingSegment.Value)) { 1917 OpName = SI->getValue(); 1918 } else if (const DagInit *DI = dyn_cast<DagInit>(EncodingSegment.Value)) { 1919 OpName = cast<StringInit>(DI->getArg(0))->getValue(); 1920 Offset = cast<IntInit>(DI->getArg(2))->getValue(); 1921 } 1922 1923 if (!OpName.empty()) { 1924 auto OpSubOpPair = 1925 const_cast<CodeGenInstruction &>(CGI).Operands.ParseOperandName( 1926 OpName); 1927 unsigned OpIdx = CGI.Operands.getFlattenedOperandNumber(OpSubOpPair); 1928 Operands[OpIdx].addField(CurrBitPos, EncodingSegment.BitWidth, Offset); 1929 if (!EncodingSegment.CustomDecoder.empty()) 1930 Operands[OpIdx].Decoder = EncodingSegment.CustomDecoder.str(); 1931 1932 int TiedReg = TiedTo[OpSubOpPair.first]; 1933 if (TiedReg != -1) { 1934 unsigned OpIdx = CGI.Operands.getFlattenedOperandNumber( 1935 std::pair(TiedReg, OpSubOpPair.second)); 1936 Operands[OpIdx].addField(CurrBitPos, EncodingSegment.BitWidth, Offset); 1937 } 1938 } 1939 1940 CurrBitPos += EncodingSegment.BitWidth; 1941 } 1942 } 1943 1944 static void debugDumpRecord(const Record &Rec) { 1945 // Dump the record, so we can see what's going on... 1946 std::string E; 1947 raw_string_ostream S(E); 1948 S << "Dumping record for previous error:\n"; 1949 S << Rec; 1950 PrintNote(E); 1951 } 1952 1953 /// For an operand field named OpName: populate OpInfo.InitValue with the 1954 /// constant-valued bit values, and OpInfo.Fields with the ranges of bits to 1955 /// insert from the decoded instruction. 1956 static void addOneOperandFields(const Record &EncodingDef, const BitsInit &Bits, 1957 std::map<std::string, std::string> &TiedNames, 1958 StringRef OpName, OperandInfo &OpInfo) { 1959 // Some bits of the operand may be required to be 1 depending on the 1960 // instruction's encoding. Collect those bits. 1961 if (const RecordVal *EncodedValue = EncodingDef.getValue(OpName)) 1962 if (const BitsInit *OpBits = dyn_cast<BitsInit>(EncodedValue->getValue())) 1963 for (unsigned I = 0; I < OpBits->getNumBits(); ++I) 1964 if (const BitInit *OpBit = dyn_cast<BitInit>(OpBits->getBit(I))) 1965 if (OpBit->getValue()) 1966 OpInfo.InitValue |= 1ULL << I; 1967 1968 for (unsigned I = 0, J = 0; I != Bits.getNumBits(); I = J) { 1969 VarInit *Var; 1970 unsigned Offset = 0; 1971 for (; J != Bits.getNumBits(); ++J) { 1972 VarBitInit *BJ = dyn_cast<VarBitInit>(Bits.getBit(J)); 1973 if (BJ) { 1974 Var = dyn_cast<VarInit>(BJ->getBitVar()); 1975 if (I == J) 1976 Offset = BJ->getBitNum(); 1977 else if (BJ->getBitNum() != Offset + J - I) 1978 break; 1979 } else { 1980 Var = dyn_cast<VarInit>(Bits.getBit(J)); 1981 } 1982 if (!Var || (Var->getName() != OpName && 1983 Var->getName() != TiedNames[std::string(OpName)])) 1984 break; 1985 } 1986 if (I == J) 1987 ++J; 1988 else 1989 OpInfo.addField(I, J - I, Offset); 1990 } 1991 } 1992 1993 static unsigned 1994 populateInstruction(CodeGenTarget &Target, const Record &EncodingDef, 1995 const CodeGenInstruction &CGI, unsigned Opc, 1996 std::map<unsigned, std::vector<OperandInfo>> &Operands, 1997 bool IsVarLenInst) { 1998 const Record &Def = *CGI.TheDef; 1999 // If all the bit positions are not specified; do not decode this instruction. 2000 // We are bound to fail! For proper disassembly, the well-known encoding bits 2001 // of the instruction must be fully specified. 2002 2003 BitsInit &Bits = getBitsField(EncodingDef, "Inst"); 2004 if (Bits.allInComplete()) 2005 return 0; 2006 2007 std::vector<OperandInfo> InsnOperands; 2008 2009 // If the instruction has specified a custom decoding hook, use that instead 2010 // of trying to auto-generate the decoder. 2011 StringRef InstDecoder = EncodingDef.getValueAsString("DecoderMethod"); 2012 if (InstDecoder != "") { 2013 bool HasCompleteInstDecoder = 2014 EncodingDef.getValueAsBit("hasCompleteDecoder"); 2015 InsnOperands.push_back( 2016 OperandInfo(std::string(InstDecoder), HasCompleteInstDecoder)); 2017 Operands[Opc] = InsnOperands; 2018 return Bits.getNumBits(); 2019 } 2020 2021 // Generate a description of the operand of the instruction that we know 2022 // how to decode automatically. 2023 // FIXME: We'll need to have a way to manually override this as needed. 2024 2025 // Gather the outputs/inputs of the instruction, so we can find their 2026 // positions in the encoding. This assumes for now that they appear in the 2027 // MCInst in the order that they're listed. 2028 std::vector<std::pair<Init *, StringRef>> InOutOperands; 2029 DagInit *Out = Def.getValueAsDag("OutOperandList"); 2030 DagInit *In = Def.getValueAsDag("InOperandList"); 2031 for (const auto &[Idx, Arg] : enumerate(Out->getArgs())) 2032 InOutOperands.push_back(std::pair(Arg, Out->getArgNameStr(Idx))); 2033 for (const auto &[Idx, Arg] : enumerate(In->getArgs())) 2034 InOutOperands.push_back(std::pair(Arg, In->getArgNameStr(Idx))); 2035 2036 // Search for tied operands, so that we can correctly instantiate 2037 // operands that are not explicitly represented in the encoding. 2038 std::map<std::string, std::string> TiedNames; 2039 for (const auto &[I, Op] : enumerate(CGI.Operands)) { 2040 for (const auto &[J, CI] : enumerate(Op.Constraints)) { 2041 if (CI.isTied()) { 2042 std::pair<unsigned, unsigned> SO = 2043 CGI.Operands.getSubOperandNumber(CI.getTiedOperand()); 2044 std::string TiedName = CGI.Operands[SO.first].SubOpNames[SO.second]; 2045 if (TiedName.empty()) 2046 TiedName = CGI.Operands[SO.first].Name; 2047 std::string MyName = Op.SubOpNames[J]; 2048 if (MyName.empty()) 2049 MyName = Op.Name; 2050 2051 TiedNames[MyName] = TiedName; 2052 TiedNames[TiedName] = MyName; 2053 } 2054 } 2055 } 2056 2057 if (IsVarLenInst) { 2058 parseVarLenInstOperand(EncodingDef, InsnOperands, CGI); 2059 } else { 2060 // For each operand, see if we can figure out where it is encoded. 2061 for (const auto &Op : InOutOperands) { 2062 Init *OpInit = Op.first; 2063 StringRef OpName = Op.second; 2064 2065 // We're ready to find the instruction encoding locations for this 2066 // operand. 2067 2068 // First, find the operand type ("OpInit"), and sub-op names 2069 // ("SubArgDag") if present. 2070 DagInit *SubArgDag = dyn_cast<DagInit>(OpInit); 2071 if (SubArgDag) 2072 OpInit = SubArgDag->getOperator(); 2073 Record *OpTypeRec = cast<DefInit>(OpInit)->getDef(); 2074 // Lookup the sub-operands from the operand type record (note that only 2075 // Operand subclasses have MIOperandInfo, see CodeGenInstruction.cpp). 2076 DagInit *SubOps = OpTypeRec->isSubClassOf("Operand") 2077 ? OpTypeRec->getValueAsDag("MIOperandInfo") 2078 : nullptr; 2079 2080 // Lookup the decoder method and construct a new OperandInfo to hold our 2081 // result. 2082 OperandInfo OpInfo = getOpInfo(OpTypeRec); 2083 2084 // If we have named sub-operands... 2085 if (SubArgDag) { 2086 // Then there should not be a custom decoder specified on the top-level 2087 // type. 2088 if (!OpInfo.Decoder.empty()) { 2089 PrintError(EncodingDef.getLoc(), 2090 "DecoderEmitter: operand \"" + OpName + "\" has type \"" + 2091 OpInit->getAsString() + 2092 "\" with a custom DecoderMethod, but also named " 2093 "sub-operands."); 2094 continue; 2095 } 2096 2097 // Decode each of the sub-ops separately. 2098 assert(SubOps && SubArgDag->getNumArgs() == SubOps->getNumArgs()); 2099 for (const auto &[I, Arg] : enumerate(SubOps->getArgs())) { 2100 StringRef SubOpName = SubArgDag->getArgNameStr(I); 2101 OperandInfo SubOpInfo = getOpInfo(cast<DefInit>(Arg)->getDef()); 2102 2103 addOneOperandFields(EncodingDef, Bits, TiedNames, SubOpName, 2104 SubOpInfo); 2105 InsnOperands.push_back(SubOpInfo); 2106 } 2107 continue; 2108 } 2109 2110 // Otherwise, if we have an operand with sub-operands, but they aren't 2111 // named... 2112 if (SubOps && OpInfo.Decoder.empty()) { 2113 // If it's a single sub-operand, and no custom decoder, use the decoder 2114 // from the one sub-operand. 2115 if (SubOps->getNumArgs() == 1) 2116 OpInfo = getOpInfo(cast<DefInit>(SubOps->getArg(0))->getDef()); 2117 2118 // If we have multiple sub-ops, there'd better have a custom 2119 // decoder. (Otherwise we don't know how to populate them properly...) 2120 if (SubOps->getNumArgs() > 1) { 2121 PrintError(EncodingDef.getLoc(), 2122 "DecoderEmitter: operand \"" + OpName + 2123 "\" uses MIOperandInfo with multiple ops, but doesn't " 2124 "have a custom decoder!"); 2125 debugDumpRecord(EncodingDef); 2126 continue; 2127 } 2128 } 2129 2130 addOneOperandFields(EncodingDef, Bits, TiedNames, OpName, OpInfo); 2131 // FIXME: it should be an error not to find a definition for a given 2132 // operand, rather than just failing to add it to the resulting 2133 // instruction! (This is a longstanding bug, which will be addressed in an 2134 // upcoming change.) 2135 if (OpInfo.numFields() > 0) 2136 InsnOperands.push_back(OpInfo); 2137 } 2138 } 2139 Operands[Opc] = InsnOperands; 2140 2141 #if 0 2142 LLVM_DEBUG({ 2143 // Dumps the instruction encoding bits. 2144 dumpBits(errs(), Bits); 2145 2146 errs() << '\n'; 2147 2148 // Dumps the list of operand info. 2149 for (unsigned i = 0, e = CGI.Operands.size(); i != e; ++i) { 2150 const CGIOperandList::OperandInfo &Info = CGI.Operands[i]; 2151 const std::string &OperandName = Info.Name; 2152 const Record &OperandDef = *Info.Rec; 2153 2154 errs() << "\t" << OperandName << " (" << OperandDef.getName() << ")\n"; 2155 } 2156 }); 2157 #endif 2158 2159 return Bits.getNumBits(); 2160 } 2161 2162 // emitFieldFromInstruction - Emit the templated helper function 2163 // fieldFromInstruction(). 2164 // On Windows we make sure that this function is not inlined when 2165 // using the VS compiler. It has a bug which causes the function 2166 // to be optimized out in some circumstances. See llvm.org/pr38292 2167 static void emitFieldFromInstruction(formatted_raw_ostream &OS) { 2168 OS << R"( 2169 // Helper functions for extracting fields from encoded instructions. 2170 // InsnType must either be integral or an APInt-like object that must: 2171 // * be default-constructible and copy-constructible 2172 // * be constructible from an APInt (this can be private) 2173 // * Support insertBits(bits, startBit, numBits) 2174 // * Support extractBitsAsZExtValue(numBits, startBit) 2175 // * Support the ~, &, ==, and != operators with other objects of the same type 2176 // * Support the != and bitwise & with uint64_t 2177 // * Support put (<<) to raw_ostream& 2178 template <typename InsnType> 2179 #if defined(_MSC_VER) && !defined(__clang__) 2180 __declspec(noinline) 2181 #endif 2182 static std::enable_if_t<std::is_integral<InsnType>::value, InsnType> 2183 fieldFromInstruction(const InsnType &insn, unsigned startBit, 2184 unsigned numBits) { 2185 assert(startBit + numBits <= 64 && "Cannot support >64-bit extractions!"); 2186 assert(startBit + numBits <= (sizeof(InsnType) * 8) && 2187 "Instruction field out of bounds!"); 2188 InsnType fieldMask; 2189 if (numBits == sizeof(InsnType) * 8) 2190 fieldMask = (InsnType)(-1LL); 2191 else 2192 fieldMask = (((InsnType)1 << numBits) - 1) << startBit; 2193 return (insn & fieldMask) >> startBit; 2194 } 2195 2196 template <typename InsnType> 2197 static std::enable_if_t<!std::is_integral<InsnType>::value, uint64_t> 2198 fieldFromInstruction(const InsnType &insn, unsigned startBit, 2199 unsigned numBits) { 2200 return insn.extractBitsAsZExtValue(numBits, startBit); 2201 } 2202 )"; 2203 } 2204 2205 // emitInsertBits - Emit the templated helper function insertBits(). 2206 static void emitInsertBits(formatted_raw_ostream &OS) { 2207 OS << R"( 2208 // Helper function for inserting bits extracted from an encoded instruction into 2209 // a field. 2210 template <typename InsnType> 2211 static std::enable_if_t<std::is_integral<InsnType>::value> 2212 insertBits(InsnType &field, InsnType bits, unsigned startBit, unsigned numBits) { 2213 assert(startBit + numBits <= sizeof field * 8); 2214 field |= (InsnType)bits << startBit; 2215 } 2216 2217 template <typename InsnType> 2218 static std::enable_if_t<!std::is_integral<InsnType>::value> 2219 insertBits(InsnType &field, uint64_t bits, unsigned startBit, unsigned numBits) { 2220 field.insertBits(bits, startBit, numBits); 2221 } 2222 )"; 2223 } 2224 2225 // emitDecodeInstruction - Emit the templated helper function 2226 // decodeInstruction(). 2227 static void emitDecodeInstruction(formatted_raw_ostream &OS, 2228 bool IsVarLenInst) { 2229 OS << R"( 2230 template <typename InsnType> 2231 static DecodeStatus decodeInstruction(const uint8_t DecodeTable[], MCInst &MI, 2232 InsnType insn, uint64_t Address, 2233 const MCDisassembler *DisAsm, 2234 const MCSubtargetInfo &STI)"; 2235 if (IsVarLenInst) { 2236 OS << ",\n " 2237 "llvm::function_ref<void(APInt &, uint64_t)> makeUp"; 2238 } 2239 OS << R"() { 2240 const FeatureBitset &Bits = STI.getFeatureBits(); 2241 2242 const uint8_t *Ptr = DecodeTable; 2243 uint64_t CurFieldValue = 0; 2244 DecodeStatus S = MCDisassembler::Success; 2245 while (true) { 2246 ptrdiff_t Loc = Ptr - DecodeTable; 2247 switch (*Ptr) { 2248 default: 2249 errs() << Loc << ": Unexpected decode table opcode!\n"; 2250 return MCDisassembler::Fail; 2251 case MCD::OPC_ExtractField: { 2252 // Decode the start value. 2253 unsigned DecodedLen; 2254 unsigned Start = decodeULEB128(++Ptr, &DecodedLen); 2255 Ptr += DecodedLen; 2256 unsigned Len = *Ptr++;)"; 2257 if (IsVarLenInst) 2258 OS << "\n makeUp(insn, Start + Len);"; 2259 OS << R"( 2260 CurFieldValue = fieldFromInstruction(insn, Start, Len); 2261 LLVM_DEBUG(dbgs() << Loc << ": OPC_ExtractField(" << Start << ", " 2262 << Len << "): " << CurFieldValue << "\n"); 2263 break; 2264 } 2265 case MCD::OPC_FilterValue: { 2266 // Decode the field value. 2267 unsigned Len; 2268 uint64_t Val = decodeULEB128(++Ptr, &Len); 2269 Ptr += Len; 2270 // NumToSkip is a plain 24-bit integer. 2271 unsigned NumToSkip = *Ptr++; 2272 NumToSkip |= (*Ptr++) << 8; 2273 NumToSkip |= (*Ptr++) << 16; 2274 2275 // Perform the filter operation. 2276 if (Val != CurFieldValue) 2277 Ptr += NumToSkip; 2278 LLVM_DEBUG(dbgs() << Loc << ": OPC_FilterValue(" << Val << ", " << NumToSkip 2279 << "): " << ((Val != CurFieldValue) ? "FAIL:" : "PASS:") 2280 << " continuing at " << (Ptr - DecodeTable) << "\n"); 2281 2282 break; 2283 } 2284 case MCD::OPC_CheckField: { 2285 // Decode the start value. 2286 unsigned Len; 2287 unsigned Start = decodeULEB128(++Ptr, &Len); 2288 Ptr += Len; 2289 Len = *Ptr;)"; 2290 if (IsVarLenInst) 2291 OS << "\n makeUp(insn, Start + Len);"; 2292 OS << R"( 2293 uint64_t FieldValue = fieldFromInstruction(insn, Start, Len); 2294 // Decode the field value. 2295 unsigned PtrLen = 0; 2296 uint64_t ExpectedValue = decodeULEB128(++Ptr, &PtrLen); 2297 Ptr += PtrLen; 2298 // NumToSkip is a plain 24-bit integer. 2299 unsigned NumToSkip = *Ptr++; 2300 NumToSkip |= (*Ptr++) << 8; 2301 NumToSkip |= (*Ptr++) << 16; 2302 2303 // If the actual and expected values don't match, skip. 2304 if (ExpectedValue != FieldValue) 2305 Ptr += NumToSkip; 2306 LLVM_DEBUG(dbgs() << Loc << ": OPC_CheckField(" << Start << ", " 2307 << Len << ", " << ExpectedValue << ", " << NumToSkip 2308 << "): FieldValue = " << FieldValue << ", ExpectedValue = " 2309 << ExpectedValue << ": " 2310 << ((ExpectedValue == FieldValue) ? "PASS\n" : "FAIL\n")); 2311 break; 2312 } 2313 case MCD::OPC_CheckPredicate: { 2314 unsigned Len; 2315 // Decode the Predicate Index value. 2316 unsigned PIdx = decodeULEB128(++Ptr, &Len); 2317 Ptr += Len; 2318 // NumToSkip is a plain 24-bit integer. 2319 unsigned NumToSkip = *Ptr++; 2320 NumToSkip |= (*Ptr++) << 8; 2321 NumToSkip |= (*Ptr++) << 16; 2322 // Check the predicate. 2323 bool Pred; 2324 if (!(Pred = checkDecoderPredicate(PIdx, Bits))) 2325 Ptr += NumToSkip; 2326 (void)Pred; 2327 LLVM_DEBUG(dbgs() << Loc << ": OPC_CheckPredicate(" << PIdx << "): " 2328 << (Pred ? "PASS\n" : "FAIL\n")); 2329 2330 break; 2331 } 2332 case MCD::OPC_Decode: { 2333 unsigned Len; 2334 // Decode the Opcode value. 2335 unsigned Opc = decodeULEB128(++Ptr, &Len); 2336 Ptr += Len; 2337 unsigned DecodeIdx = decodeULEB128(Ptr, &Len); 2338 Ptr += Len; 2339 2340 MI.clear(); 2341 MI.setOpcode(Opc); 2342 bool DecodeComplete;)"; 2343 if (IsVarLenInst) { 2344 OS << "\n Len = InstrLenTable[Opc];\n" 2345 << " makeUp(insn, Len);"; 2346 } 2347 OS << R"( 2348 S = decodeToMCInst(S, DecodeIdx, insn, MI, Address, DisAsm, DecodeComplete); 2349 assert(DecodeComplete); 2350 2351 LLVM_DEBUG(dbgs() << Loc << ": OPC_Decode: opcode " << Opc 2352 << ", using decoder " << DecodeIdx << ": " 2353 << (S != MCDisassembler::Fail ? "PASS" : "FAIL") << "\n"); 2354 return S; 2355 } 2356 case MCD::OPC_TryDecode: { 2357 unsigned Len; 2358 // Decode the Opcode value. 2359 unsigned Opc = decodeULEB128(++Ptr, &Len); 2360 Ptr += Len; 2361 unsigned DecodeIdx = decodeULEB128(Ptr, &Len); 2362 Ptr += Len; 2363 // NumToSkip is a plain 24-bit integer. 2364 unsigned NumToSkip = *Ptr++; 2365 NumToSkip |= (*Ptr++) << 8; 2366 NumToSkip |= (*Ptr++) << 16; 2367 2368 // Perform the decode operation. 2369 MCInst TmpMI; 2370 TmpMI.setOpcode(Opc); 2371 bool DecodeComplete; 2372 S = decodeToMCInst(S, DecodeIdx, insn, TmpMI, Address, DisAsm, DecodeComplete); 2373 LLVM_DEBUG(dbgs() << Loc << ": OPC_TryDecode: opcode " << Opc 2374 << ", using decoder " << DecodeIdx << ": "); 2375 2376 if (DecodeComplete) { 2377 // Decoding complete. 2378 LLVM_DEBUG(dbgs() << (S != MCDisassembler::Fail ? "PASS" : "FAIL") << "\n"); 2379 MI = TmpMI; 2380 return S; 2381 } else { 2382 assert(S == MCDisassembler::Fail); 2383 // If the decoding was incomplete, skip. 2384 Ptr += NumToSkip; 2385 LLVM_DEBUG(dbgs() << "FAIL: continuing at " << (Ptr - DecodeTable) << "\n"); 2386 // Reset decode status. This also drops a SoftFail status that could be 2387 // set before the decode attempt. 2388 S = MCDisassembler::Success; 2389 } 2390 break; 2391 } 2392 case MCD::OPC_SoftFail: { 2393 // Decode the mask values. 2394 unsigned Len; 2395 uint64_t PositiveMask = decodeULEB128(++Ptr, &Len); 2396 Ptr += Len; 2397 uint64_t NegativeMask = decodeULEB128(Ptr, &Len); 2398 Ptr += Len; 2399 bool Fail = (insn & PositiveMask) != 0 || (~insn & NegativeMask) != 0; 2400 if (Fail) 2401 S = MCDisassembler::SoftFail; 2402 LLVM_DEBUG(dbgs() << Loc << ": OPC_SoftFail: " << (Fail ? "FAIL\n" : "PASS\n")); 2403 break; 2404 } 2405 case MCD::OPC_Fail: { 2406 LLVM_DEBUG(dbgs() << Loc << ": OPC_Fail\n"); 2407 return MCDisassembler::Fail; 2408 } 2409 } 2410 } 2411 llvm_unreachable("bogosity detected in disassembler state machine!"); 2412 } 2413 2414 )"; 2415 } 2416 2417 // Helper to propagate SoftFail status. Returns false if the status is Fail; 2418 // callers are expected to early-exit in that condition. (Note, the '&' operator 2419 // is correct to propagate the values of this enum; see comment on 'enum 2420 // DecodeStatus'.) 2421 static void emitCheck(formatted_raw_ostream &OS) { 2422 OS << R"( 2423 static bool Check(DecodeStatus &Out, DecodeStatus In) { 2424 Out = static_cast<DecodeStatus>(Out & In); 2425 return Out != MCDisassembler::Fail; 2426 } 2427 2428 )"; 2429 } 2430 2431 // Collect all HwModes referenced by the target for encoding purposes, 2432 // returning a vector of corresponding names. 2433 static void 2434 collectHwModesReferencedForEncodings(const CodeGenHwModes &HWM, 2435 std::vector<StringRef> &Names) { 2436 SmallBitVector BV(HWM.getNumModeIds()); 2437 for (const auto &MS : HWM.getHwModeSelects()) { 2438 for (const HwModeSelect::PairType &P : MS.second.Items) { 2439 if (P.second->isSubClassOf("InstructionEncoding")) 2440 BV.set(P.first); 2441 } 2442 } 2443 transform(BV.set_bits(), std::back_inserter(Names), [&HWM](const int &M) { 2444 return HWM.getModeName(M, /*IncludeDefault=*/true); 2445 }); 2446 } 2447 2448 // Emits disassembler code for instruction decoding. 2449 void DecoderEmitter::run(raw_ostream &o) { 2450 formatted_raw_ostream OS(o); 2451 OS << R"( 2452 #include "llvm/MC/MCInst.h" 2453 #include "llvm/MC/MCSubtargetInfo.h" 2454 #include "llvm/Support/DataTypes.h" 2455 #include "llvm/Support/Debug.h" 2456 #include "llvm/Support/LEB128.h" 2457 #include "llvm/Support/raw_ostream.h" 2458 #include "llvm/TargetParser/SubtargetFeature.h" 2459 #include <assert.h> 2460 2461 namespace llvm { 2462 )"; 2463 2464 emitFieldFromInstruction(OS); 2465 emitInsertBits(OS); 2466 emitCheck(OS); 2467 2468 Target.reverseBitsForLittleEndianEncoding(); 2469 2470 // Parameterize the decoders based on namespace and instruction width. 2471 2472 // First, collect all encoding-related HwModes referenced by the target. 2473 // If HwModeNames is empty, add the empty string so we always have one HwMode. 2474 const CodeGenHwModes &HWM = Target.getHwModes(); 2475 std::vector<StringRef> HwModeNames; 2476 collectHwModesReferencedForEncodings(HWM, HwModeNames); 2477 if (HwModeNames.empty()) 2478 HwModeNames.push_back(""); 2479 2480 const auto &NumberedInstructions = Target.getInstructionsByEnumValue(); 2481 NumberedEncodings.reserve(NumberedInstructions.size()); 2482 for (const auto &NumberedInstruction : NumberedInstructions) { 2483 const Record *InstDef = NumberedInstruction->TheDef; 2484 if (const RecordVal *RV = InstDef->getValue("EncodingInfos")) { 2485 if (DefInit *DI = dyn_cast_or_null<DefInit>(RV->getValue())) { 2486 EncodingInfoByHwMode EBM(DI->getDef(), HWM); 2487 for (auto &KV : EBM) 2488 NumberedEncodings.emplace_back( 2489 KV.second, NumberedInstruction, 2490 HWM.getModeName(KV.first, /*IncludeDefault=*/true)); 2491 continue; 2492 } 2493 } 2494 // This instruction is encoded the same on all HwModes. Emit it for all 2495 // HwModes by default, otherwise leave it in a single common table. 2496 if (DecoderEmitterSuppressDuplicates) { 2497 NumberedEncodings.emplace_back(InstDef, NumberedInstruction, "AllModes"); 2498 } else { 2499 for (StringRef HwModeName : HwModeNames) 2500 NumberedEncodings.emplace_back(InstDef, NumberedInstruction, 2501 HwModeName); 2502 } 2503 } 2504 for (const auto &NumberedAlias : 2505 RK.getAllDerivedDefinitions("AdditionalEncoding")) 2506 NumberedEncodings.emplace_back( 2507 NumberedAlias, 2508 &Target.getInstruction(NumberedAlias->getValueAsDef("AliasOf"))); 2509 2510 std::map<std::pair<std::string, unsigned>, std::vector<EncodingIDAndOpcode>> 2511 OpcMap; 2512 std::map<unsigned, std::vector<OperandInfo>> Operands; 2513 std::vector<unsigned> InstrLen; 2514 bool IsVarLenInst = Target.hasVariableLengthEncodings(); 2515 unsigned MaxInstLen = 0; 2516 2517 for (const auto &[NEI, NumberedEncoding] : enumerate(NumberedEncodings)) { 2518 const Record *EncodingDef = NumberedEncoding.EncodingDef; 2519 const CodeGenInstruction *Inst = NumberedEncoding.Inst; 2520 const Record *Def = Inst->TheDef; 2521 unsigned Size = EncodingDef->getValueAsInt("Size"); 2522 if (Def->getValueAsString("Namespace") == "TargetOpcode" || 2523 Def->getValueAsBit("isPseudo") || 2524 Def->getValueAsBit("isAsmParserOnly") || 2525 Def->getValueAsBit("isCodeGenOnly")) { 2526 NumEncodingsLackingDisasm++; 2527 continue; 2528 } 2529 2530 if (NEI < NumberedInstructions.size()) 2531 NumInstructions++; 2532 NumEncodings++; 2533 2534 if (!Size && !IsVarLenInst) 2535 continue; 2536 2537 if (IsVarLenInst) 2538 InstrLen.resize(NumberedInstructions.size(), 0); 2539 2540 if (unsigned Len = populateInstruction(Target, *EncodingDef, *Inst, NEI, 2541 Operands, IsVarLenInst)) { 2542 if (IsVarLenInst) { 2543 MaxInstLen = std::max(MaxInstLen, Len); 2544 InstrLen[NEI] = Len; 2545 } 2546 std::string DecoderNamespace = 2547 std::string(EncodingDef->getValueAsString("DecoderNamespace")); 2548 if (!NumberedEncoding.HwModeName.empty()) 2549 DecoderNamespace += 2550 std::string("_") + NumberedEncoding.HwModeName.str(); 2551 OpcMap[std::pair(DecoderNamespace, Size)].emplace_back( 2552 NEI, Target.getInstrIntValue(Def)); 2553 } else { 2554 NumEncodingsOmitted++; 2555 } 2556 } 2557 2558 DecoderTableInfo TableInfo; 2559 for (const auto &Opc : OpcMap) { 2560 // Emit the decoder for this namespace+width combination. 2561 ArrayRef<EncodingAndInst> NumberedEncodingsRef(NumberedEncodings.data(), 2562 NumberedEncodings.size()); 2563 FilterChooser FC(NumberedEncodingsRef, Opc.second, Operands, 2564 IsVarLenInst ? MaxInstLen : 8 * Opc.first.second, this); 2565 2566 // The decode table is cleared for each top level decoder function. The 2567 // predicates and decoders themselves, however, are shared across all 2568 // decoders to give more opportunities for uniqueing. 2569 TableInfo.Table.clear(); 2570 TableInfo.FixupStack.clear(); 2571 TableInfo.Table.reserve(16384); 2572 TableInfo.FixupStack.emplace_back(); 2573 FC.emitTableEntries(TableInfo); 2574 // Any NumToSkip fixups in the top level scope can resolve to the 2575 // OPC_Fail at the end of the table. 2576 assert(TableInfo.FixupStack.size() == 1 && "fixup stack phasing error!"); 2577 // Resolve any NumToSkip fixups in the current scope. 2578 resolveTableFixups(TableInfo.Table, TableInfo.FixupStack.back(), 2579 TableInfo.Table.size()); 2580 TableInfo.FixupStack.clear(); 2581 2582 TableInfo.Table.push_back(MCD::OPC_Fail); 2583 2584 // Print the table to the output stream. 2585 emitTable(OS, TableInfo.Table, 0, FC.getBitWidth(), Opc.first.first, 2586 Opc.second); 2587 } 2588 2589 // For variable instruction, we emit a instruction length table 2590 // to let the decoder know how long the instructions are. 2591 // You can see example usage in M68k's disassembler. 2592 if (IsVarLenInst) 2593 emitInstrLenTable(OS, InstrLen); 2594 // Emit the predicate function. 2595 emitPredicateFunction(OS, TableInfo.Predicates, 0); 2596 2597 // Emit the decoder function. 2598 emitDecoderFunction(OS, TableInfo.Decoders, 0); 2599 2600 // Emit the main entry point for the decoder, decodeInstruction(). 2601 emitDecodeInstruction(OS, IsVarLenInst); 2602 2603 OS << "\n} // end namespace llvm\n"; 2604 } 2605 2606 namespace llvm { 2607 2608 void EmitDecoder(RecordKeeper &RK, raw_ostream &OS, 2609 const std::string &PredicateNamespace) { 2610 DecoderEmitter(RK, PredicateNamespace).run(OS); 2611 } 2612 2613 } // end namespace llvm 2614