1 //===---------------- DecoderEmitter.cpp - Decoder Generator --------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // It contains the tablegen backend that emits the decoder functions for 10 // targets with fixed/variable length instruction set. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "CodeGenHwModes.h" 15 #include "CodeGenInstruction.h" 16 #include "CodeGenTarget.h" 17 #include "InfoByHwMode.h" 18 #include "TableGenBackends.h" 19 #include "VarLenCodeEmitterGen.h" 20 #include "llvm/ADT/APInt.h" 21 #include "llvm/ADT/ArrayRef.h" 22 #include "llvm/ADT/CachedHashString.h" 23 #include "llvm/ADT/STLExtras.h" 24 #include "llvm/ADT/SetVector.h" 25 #include "llvm/ADT/SmallBitVector.h" 26 #include "llvm/ADT/SmallString.h" 27 #include "llvm/ADT/Statistic.h" 28 #include "llvm/ADT/StringExtras.h" 29 #include "llvm/ADT/StringRef.h" 30 #include "llvm/MC/MCDecoderOps.h" 31 #include "llvm/Support/Casting.h" 32 #include "llvm/Support/CommandLine.h" 33 #include "llvm/Support/Debug.h" 34 #include "llvm/Support/ErrorHandling.h" 35 #include "llvm/Support/FormattedStream.h" 36 #include "llvm/Support/LEB128.h" 37 #include "llvm/Support/raw_ostream.h" 38 #include "llvm/TableGen/Error.h" 39 #include "llvm/TableGen/Record.h" 40 #include <algorithm> 41 #include <cassert> 42 #include <cstddef> 43 #include <cstdint> 44 #include <map> 45 #include <memory> 46 #include <set> 47 #include <string> 48 #include <utility> 49 #include <vector> 50 51 using namespace llvm; 52 53 #define DEBUG_TYPE "decoder-emitter" 54 55 extern cl::OptionCategory DisassemblerEmitterCat; 56 57 cl::opt<bool> DecoderEmitterSuppressDuplicates( 58 "suppress-per-hwmode-duplicates", 59 cl::desc("Suppress duplication of instrs into per-HwMode decoder tables"), 60 cl::init(false), cl::cat(DisassemblerEmitterCat)); 61 62 namespace { 63 64 STATISTIC(NumEncodings, "Number of encodings considered"); 65 STATISTIC(NumEncodingsLackingDisasm, 66 "Number of encodings without disassembler info"); 67 STATISTIC(NumInstructions, "Number of instructions considered"); 68 STATISTIC(NumEncodingsSupported, "Number of encodings supported"); 69 STATISTIC(NumEncodingsOmitted, "Number of encodings omitted"); 70 71 struct EncodingField { 72 unsigned Base, Width, Offset; 73 EncodingField(unsigned B, unsigned W, unsigned O) 74 : Base(B), Width(W), Offset(O) {} 75 }; 76 77 struct OperandInfo { 78 std::vector<EncodingField> Fields; 79 std::string Decoder; 80 bool HasCompleteDecoder; 81 uint64_t InitValue; 82 83 OperandInfo(std::string D, bool HCD) 84 : Decoder(std::move(D)), HasCompleteDecoder(HCD), InitValue(0) {} 85 86 void addField(unsigned Base, unsigned Width, unsigned Offset) { 87 Fields.push_back(EncodingField(Base, Width, Offset)); 88 } 89 90 unsigned numFields() const { return Fields.size(); } 91 92 typedef std::vector<EncodingField>::const_iterator const_iterator; 93 94 const_iterator begin() const { return Fields.begin(); } 95 const_iterator end() const { return Fields.end(); } 96 }; 97 98 typedef std::vector<uint8_t> DecoderTable; 99 typedef uint32_t DecoderFixup; 100 typedef std::vector<DecoderFixup> FixupList; 101 typedef std::vector<FixupList> FixupScopeList; 102 typedef SmallSetVector<CachedHashString, 16> PredicateSet; 103 typedef SmallSetVector<CachedHashString, 16> DecoderSet; 104 struct DecoderTableInfo { 105 DecoderTable Table; 106 FixupScopeList FixupStack; 107 PredicateSet Predicates; 108 DecoderSet Decoders; 109 }; 110 111 struct EncodingAndInst { 112 const Record *EncodingDef; 113 const CodeGenInstruction *Inst; 114 StringRef HwModeName; 115 116 EncodingAndInst(const Record *EncodingDef, const CodeGenInstruction *Inst, 117 StringRef HwModeName = "") 118 : EncodingDef(EncodingDef), Inst(Inst), HwModeName(HwModeName) {} 119 }; 120 121 struct EncodingIDAndOpcode { 122 unsigned EncodingID; 123 unsigned Opcode; 124 125 EncodingIDAndOpcode() : EncodingID(0), Opcode(0) {} 126 EncodingIDAndOpcode(unsigned EncodingID, unsigned Opcode) 127 : EncodingID(EncodingID), Opcode(Opcode) {} 128 }; 129 130 using EncodingIDsVec = std::vector<EncodingIDAndOpcode>; 131 132 raw_ostream &operator<<(raw_ostream &OS, const EncodingAndInst &Value) { 133 if (Value.EncodingDef != Value.Inst->TheDef) 134 OS << Value.EncodingDef->getName() << ":"; 135 OS << Value.Inst->TheDef->getName(); 136 return OS; 137 } 138 139 class DecoderEmitter { 140 RecordKeeper &RK; 141 std::vector<EncodingAndInst> NumberedEncodings; 142 143 public: 144 DecoderEmitter(RecordKeeper &R, std::string PredicateNamespace) 145 : RK(R), Target(R), PredicateNamespace(std::move(PredicateNamespace)) {} 146 147 // Emit the decoder state machine table. 148 void emitTable(formatted_raw_ostream &o, DecoderTable &Table, 149 unsigned Indentation, unsigned BitWidth, StringRef Namespace, 150 const EncodingIDsVec &EncodingIDs) const; 151 void emitInstrLenTable(formatted_raw_ostream &OS, 152 std::vector<unsigned> &InstrLen) const; 153 void emitPredicateFunction(formatted_raw_ostream &OS, 154 PredicateSet &Predicates, 155 unsigned Indentation) const; 156 void emitDecoderFunction(formatted_raw_ostream &OS, DecoderSet &Decoders, 157 unsigned Indentation) const; 158 159 // run - Output the code emitter 160 void run(raw_ostream &o); 161 162 private: 163 CodeGenTarget Target; 164 165 public: 166 std::string PredicateNamespace; 167 }; 168 169 } // end anonymous namespace 170 171 // The set (BIT_TRUE, BIT_FALSE, BIT_UNSET) represents a ternary logic system 172 // for a bit value. 173 // 174 // BIT_UNFILTERED is used as the init value for a filter position. It is used 175 // only for filter processings. 176 typedef enum { 177 BIT_TRUE, // '1' 178 BIT_FALSE, // '0' 179 BIT_UNSET, // '?' 180 BIT_UNFILTERED // unfiltered 181 } bit_value_t; 182 183 static bool ValueSet(bit_value_t V) { 184 return (V == BIT_TRUE || V == BIT_FALSE); 185 } 186 187 static bool ValueNotSet(bit_value_t V) { return (V == BIT_UNSET); } 188 189 static int Value(bit_value_t V) { 190 return ValueNotSet(V) ? -1 : (V == BIT_FALSE ? 0 : 1); 191 } 192 193 static bit_value_t bitFromBits(const BitsInit &bits, unsigned index) { 194 if (BitInit *bit = dyn_cast<BitInit>(bits.getBit(index))) 195 return bit->getValue() ? BIT_TRUE : BIT_FALSE; 196 197 // The bit is uninitialized. 198 return BIT_UNSET; 199 } 200 201 // Prints the bit value for each position. 202 static void dumpBits(raw_ostream &o, const BitsInit &bits) { 203 for (unsigned index = bits.getNumBits(); index > 0; --index) { 204 switch (bitFromBits(bits, index - 1)) { 205 case BIT_TRUE: 206 o << "1"; 207 break; 208 case BIT_FALSE: 209 o << "0"; 210 break; 211 case BIT_UNSET: 212 o << "_"; 213 break; 214 default: 215 llvm_unreachable("unexpected return value from bitFromBits"); 216 } 217 } 218 } 219 220 static BitsInit &getBitsField(const Record &def, StringRef str) { 221 const RecordVal *RV = def.getValue(str); 222 if (BitsInit *Bits = dyn_cast<BitsInit>(RV->getValue())) 223 return *Bits; 224 225 // variable length instruction 226 VarLenInst VLI = VarLenInst(cast<DagInit>(RV->getValue()), RV); 227 SmallVector<Init *, 16> Bits; 228 229 for (auto &SI : VLI) { 230 if (const BitsInit *BI = dyn_cast<BitsInit>(SI.Value)) { 231 for (unsigned Idx = 0U; Idx < BI->getNumBits(); ++Idx) { 232 Bits.push_back(BI->getBit(Idx)); 233 } 234 } else if (const BitInit *BI = dyn_cast<BitInit>(SI.Value)) { 235 Bits.push_back(const_cast<BitInit *>(BI)); 236 } else { 237 for (unsigned Idx = 0U; Idx < SI.BitWidth; ++Idx) 238 Bits.push_back(UnsetInit::get(def.getRecords())); 239 } 240 } 241 242 return *BitsInit::get(def.getRecords(), Bits); 243 } 244 245 // Representation of the instruction to work on. 246 typedef std::vector<bit_value_t> insn_t; 247 248 namespace { 249 250 static const uint64_t NO_FIXED_SEGMENTS_SENTINEL = -1ULL; 251 252 class FilterChooser; 253 254 /// Filter - Filter works with FilterChooser to produce the decoding tree for 255 /// the ISA. 256 /// 257 /// It is useful to think of a Filter as governing the switch stmts of the 258 /// decoding tree in a certain level. Each case stmt delegates to an inferior 259 /// FilterChooser to decide what further decoding logic to employ, or in another 260 /// words, what other remaining bits to look at. The FilterChooser eventually 261 /// chooses a best Filter to do its job. 262 /// 263 /// This recursive scheme ends when the number of Opcodes assigned to the 264 /// FilterChooser becomes 1 or if there is a conflict. A conflict happens when 265 /// the Filter/FilterChooser combo does not know how to distinguish among the 266 /// Opcodes assigned. 267 /// 268 /// An example of a conflict is 269 /// 270 /// Conflict: 271 /// 111101000.00........00010000.... 272 /// 111101000.00........0001........ 273 /// 1111010...00........0001........ 274 /// 1111010...00.................... 275 /// 1111010......................... 276 /// 1111............................ 277 /// ................................ 278 /// VST4q8a 111101000_00________00010000____ 279 /// VST4q8b 111101000_00________00010000____ 280 /// 281 /// The Debug output shows the path that the decoding tree follows to reach the 282 /// the conclusion that there is a conflict. VST4q8a is a vst4 to double-spaced 283 /// even registers, while VST4q8b is a vst4 to double-spaced odd registers. 284 /// 285 /// The encoding info in the .td files does not specify this meta information, 286 /// which could have been used by the decoder to resolve the conflict. The 287 /// decoder could try to decode the even/odd register numbering and assign to 288 /// VST4q8a or VST4q8b, but for the time being, the decoder chooses the "a" 289 /// version and return the Opcode since the two have the same Asm format string. 290 class Filter { 291 protected: 292 const FilterChooser 293 *Owner; // points to the FilterChooser who owns this filter 294 unsigned StartBit; // the starting bit position 295 unsigned NumBits; // number of bits to filter 296 bool Mixed; // a mixed region contains both set and unset bits 297 298 // Map of well-known segment value to the set of uid's with that value. 299 std::map<uint64_t, std::vector<EncodingIDAndOpcode>> FilteredInstructions; 300 301 // Set of uid's with non-constant segment values. 302 std::vector<EncodingIDAndOpcode> VariableInstructions; 303 304 // Map of well-known segment value to its delegate. 305 std::map<uint64_t, std::unique_ptr<const FilterChooser>> FilterChooserMap; 306 307 // Number of instructions which fall under FilteredInstructions category. 308 unsigned NumFiltered; 309 310 // Keeps track of the last opcode in the filtered bucket. 311 EncodingIDAndOpcode LastOpcFiltered; 312 313 public: 314 Filter(Filter &&f); 315 Filter(FilterChooser &owner, unsigned startBit, unsigned numBits, bool mixed); 316 317 ~Filter() = default; 318 319 unsigned getNumFiltered() const { return NumFiltered; } 320 321 EncodingIDAndOpcode getSingletonOpc() const { 322 assert(NumFiltered == 1); 323 return LastOpcFiltered; 324 } 325 326 // Return the filter chooser for the group of instructions without constant 327 // segment values. 328 const FilterChooser &getVariableFC() const { 329 assert(NumFiltered == 1); 330 assert(FilterChooserMap.size() == 1); 331 return *(FilterChooserMap.find(NO_FIXED_SEGMENTS_SENTINEL)->second); 332 } 333 334 // Divides the decoding task into sub tasks and delegates them to the 335 // inferior FilterChooser's. 336 // 337 // A special case arises when there's only one entry in the filtered 338 // instructions. In order to unambiguously decode the singleton, we need to 339 // match the remaining undecoded encoding bits against the singleton. 340 void recurse(); 341 342 // Emit table entries to decode instructions given a segment or segments of 343 // bits. 344 void emitTableEntry(DecoderTableInfo &TableInfo) const; 345 346 // Returns the number of fanout produced by the filter. More fanout implies 347 // the filter distinguishes more categories of instructions. 348 unsigned usefulness() const; 349 }; // end class Filter 350 351 } // end anonymous namespace 352 353 // These are states of our finite state machines used in FilterChooser's 354 // filterProcessor() which produces the filter candidates to use. 355 typedef enum { 356 ATTR_NONE, 357 ATTR_FILTERED, 358 ATTR_ALL_SET, 359 ATTR_ALL_UNSET, 360 ATTR_MIXED 361 } bitAttr_t; 362 363 /// FilterChooser - FilterChooser chooses the best filter among a set of Filters 364 /// in order to perform the decoding of instructions at the current level. 365 /// 366 /// Decoding proceeds from the top down. Based on the well-known encoding bits 367 /// of instructions available, FilterChooser builds up the possible Filters that 368 /// can further the task of decoding by distinguishing among the remaining 369 /// candidate instructions. 370 /// 371 /// Once a filter has been chosen, it is called upon to divide the decoding task 372 /// into sub-tasks and delegates them to its inferior FilterChoosers for further 373 /// processings. 374 /// 375 /// It is useful to think of a Filter as governing the switch stmts of the 376 /// decoding tree. And each case is delegated to an inferior FilterChooser to 377 /// decide what further remaining bits to look at. 378 namespace { 379 380 class FilterChooser { 381 protected: 382 friend class Filter; 383 384 // Vector of codegen instructions to choose our filter. 385 ArrayRef<EncodingAndInst> AllInstructions; 386 387 // Vector of uid's for this filter chooser to work on. 388 // The first member of the pair is the opcode id being decoded, the second is 389 // the opcode id that should be emitted. 390 const std::vector<EncodingIDAndOpcode> &Opcodes; 391 392 // Lookup table for the operand decoding of instructions. 393 const std::map<unsigned, std::vector<OperandInfo>> &Operands; 394 395 // Vector of candidate filters. 396 std::vector<Filter> Filters; 397 398 // Array of bit values passed down from our parent. 399 // Set to all BIT_UNFILTERED's for Parent == NULL. 400 std::vector<bit_value_t> FilterBitValues; 401 402 // Links to the FilterChooser above us in the decoding tree. 403 const FilterChooser *Parent; 404 405 // Index of the best filter from Filters. 406 int BestIndex; 407 408 // Width of instructions 409 unsigned BitWidth; 410 411 // Parent emitter 412 const DecoderEmitter *Emitter; 413 414 public: 415 FilterChooser(ArrayRef<EncodingAndInst> Insts, 416 const std::vector<EncodingIDAndOpcode> &IDs, 417 const std::map<unsigned, std::vector<OperandInfo>> &Ops, 418 unsigned BW, const DecoderEmitter *E) 419 : AllInstructions(Insts), Opcodes(IDs), Operands(Ops), 420 FilterBitValues(BW, BIT_UNFILTERED), Parent(nullptr), BestIndex(-1), 421 BitWidth(BW), Emitter(E) { 422 doFilter(); 423 } 424 425 FilterChooser(ArrayRef<EncodingAndInst> Insts, 426 const std::vector<EncodingIDAndOpcode> &IDs, 427 const std::map<unsigned, std::vector<OperandInfo>> &Ops, 428 const std::vector<bit_value_t> &ParentFilterBitValues, 429 const FilterChooser &parent) 430 : AllInstructions(Insts), Opcodes(IDs), Operands(Ops), 431 FilterBitValues(ParentFilterBitValues), Parent(&parent), BestIndex(-1), 432 BitWidth(parent.BitWidth), Emitter(parent.Emitter) { 433 doFilter(); 434 } 435 436 FilterChooser(const FilterChooser &) = delete; 437 void operator=(const FilterChooser &) = delete; 438 439 unsigned getBitWidth() const { return BitWidth; } 440 441 protected: 442 // Populates the insn given the uid. 443 void insnWithID(insn_t &Insn, unsigned Opcode) const { 444 BitsInit &Bits = getBitsField(*AllInstructions[Opcode].EncodingDef, "Inst"); 445 Insn.resize(BitWidth > Bits.getNumBits() ? BitWidth : Bits.getNumBits(), 446 BIT_UNSET); 447 // We may have a SoftFail bitmask, which specifies a mask where an encoding 448 // may differ from the value in "Inst" and yet still be valid, but the 449 // disassembler should return SoftFail instead of Success. 450 // 451 // This is used for marking UNPREDICTABLE instructions in the ARM world. 452 const RecordVal *RV = 453 AllInstructions[Opcode].EncodingDef->getValue("SoftFail"); 454 const BitsInit *SFBits = RV ? dyn_cast<BitsInit>(RV->getValue()) : nullptr; 455 for (unsigned i = 0; i < Bits.getNumBits(); ++i) { 456 if (SFBits && bitFromBits(*SFBits, i) == BIT_TRUE) 457 Insn[i] = BIT_UNSET; 458 else 459 Insn[i] = bitFromBits(Bits, i); 460 } 461 } 462 463 // Emit the name of the encoding/instruction pair. 464 void emitNameWithID(raw_ostream &OS, unsigned Opcode) const { 465 const Record *EncodingDef = AllInstructions[Opcode].EncodingDef; 466 const Record *InstDef = AllInstructions[Opcode].Inst->TheDef; 467 if (EncodingDef != InstDef) 468 OS << EncodingDef->getName() << ":"; 469 OS << InstDef->getName(); 470 } 471 472 // Populates the field of the insn given the start position and the number of 473 // consecutive bits to scan for. 474 // 475 // Returns false if there exists any uninitialized bit value in the range. 476 // Returns true, otherwise. 477 bool fieldFromInsn(uint64_t &Field, insn_t &Insn, unsigned StartBit, 478 unsigned NumBits) const; 479 480 /// dumpFilterArray - dumpFilterArray prints out debugging info for the given 481 /// filter array as a series of chars. 482 void dumpFilterArray(raw_ostream &o, 483 const std::vector<bit_value_t> &filter) const; 484 485 /// dumpStack - dumpStack traverses the filter chooser chain and calls 486 /// dumpFilterArray on each filter chooser up to the top level one. 487 void dumpStack(raw_ostream &o, const char *prefix) const; 488 489 Filter &bestFilter() { 490 assert(BestIndex != -1 && "BestIndex not set"); 491 return Filters[BestIndex]; 492 } 493 494 bool PositionFiltered(unsigned i) const { 495 return ValueSet(FilterBitValues[i]); 496 } 497 498 // Calculates the island(s) needed to decode the instruction. 499 // This returns a lit of undecoded bits of an instructions, for example, 500 // Inst{20} = 1 && Inst{3-0} == 0b1111 represents two islands of yet-to-be 501 // decoded bits in order to verify that the instruction matches the Opcode. 502 unsigned getIslands(std::vector<unsigned> &StartBits, 503 std::vector<unsigned> &EndBits, 504 std::vector<uint64_t> &FieldVals, 505 const insn_t &Insn) const; 506 507 // Emits code to check the Predicates member of an instruction are true. 508 // Returns true if predicate matches were emitted, false otherwise. 509 bool emitPredicateMatch(raw_ostream &o, unsigned &Indentation, 510 unsigned Opc) const; 511 bool emitPredicateMatchAux(const Init &Val, bool ParenIfBinOp, 512 raw_ostream &OS) const; 513 514 bool doesOpcodeNeedPredicate(unsigned Opc) const; 515 unsigned getPredicateIndex(DecoderTableInfo &TableInfo, StringRef P) const; 516 void emitPredicateTableEntry(DecoderTableInfo &TableInfo, unsigned Opc) const; 517 518 void emitSoftFailTableEntry(DecoderTableInfo &TableInfo, unsigned Opc) const; 519 520 // Emits table entries to decode the singleton. 521 void emitSingletonTableEntry(DecoderTableInfo &TableInfo, 522 EncodingIDAndOpcode Opc) const; 523 524 // Emits code to decode the singleton, and then to decode the rest. 525 void emitSingletonTableEntry(DecoderTableInfo &TableInfo, 526 const Filter &Best) const; 527 528 void emitBinaryParser(raw_ostream &o, unsigned &Indentation, 529 const OperandInfo &OpInfo, 530 bool &OpHasCompleteDecoder) const; 531 532 void emitDecoder(raw_ostream &OS, unsigned Indentation, unsigned Opc, 533 bool &HasCompleteDecoder) const; 534 unsigned getDecoderIndex(DecoderSet &Decoders, unsigned Opc, 535 bool &HasCompleteDecoder) const; 536 537 // Assign a single filter and run with it. 538 void runSingleFilter(unsigned startBit, unsigned numBit, bool mixed); 539 540 // reportRegion is a helper function for filterProcessor to mark a region as 541 // eligible for use as a filter region. 542 void reportRegion(bitAttr_t RA, unsigned StartBit, unsigned BitIndex, 543 bool AllowMixed); 544 545 // FilterProcessor scans the well-known encoding bits of the instructions and 546 // builds up a list of candidate filters. It chooses the best filter and 547 // recursively descends down the decoding tree. 548 bool filterProcessor(bool AllowMixed, bool Greedy = true); 549 550 // Decides on the best configuration of filter(s) to use in order to decode 551 // the instructions. A conflict of instructions may occur, in which case we 552 // dump the conflict set to the standard error. 553 void doFilter(); 554 555 public: 556 // emitTableEntries - Emit state machine entries to decode our share of 557 // instructions. 558 void emitTableEntries(DecoderTableInfo &TableInfo) const; 559 }; 560 561 } // end anonymous namespace 562 563 /////////////////////////// 564 // // 565 // Filter Implementation // 566 // // 567 /////////////////////////// 568 569 Filter::Filter(Filter &&f) 570 : Owner(f.Owner), StartBit(f.StartBit), NumBits(f.NumBits), Mixed(f.Mixed), 571 FilteredInstructions(std::move(f.FilteredInstructions)), 572 VariableInstructions(std::move(f.VariableInstructions)), 573 FilterChooserMap(std::move(f.FilterChooserMap)), 574 NumFiltered(f.NumFiltered), LastOpcFiltered(f.LastOpcFiltered) {} 575 576 Filter::Filter(FilterChooser &owner, unsigned startBit, unsigned numBits, 577 bool mixed) 578 : Owner(&owner), StartBit(startBit), NumBits(numBits), Mixed(mixed) { 579 assert(StartBit + NumBits - 1 < Owner->BitWidth); 580 581 NumFiltered = 0; 582 LastOpcFiltered = {0, 0}; 583 584 for (unsigned i = 0, e = Owner->Opcodes.size(); i != e; ++i) { 585 insn_t Insn; 586 587 // Populates the insn given the uid. 588 Owner->insnWithID(Insn, Owner->Opcodes[i].EncodingID); 589 590 uint64_t Field; 591 // Scans the segment for possibly well-specified encoding bits. 592 bool ok = Owner->fieldFromInsn(Field, Insn, StartBit, NumBits); 593 594 if (ok) { 595 // The encoding bits are well-known. Lets add the uid of the 596 // instruction into the bucket keyed off the constant field value. 597 LastOpcFiltered = Owner->Opcodes[i]; 598 FilteredInstructions[Field].push_back(LastOpcFiltered); 599 ++NumFiltered; 600 } else { 601 // Some of the encoding bit(s) are unspecified. This contributes to 602 // one additional member of "Variable" instructions. 603 VariableInstructions.push_back(Owner->Opcodes[i]); 604 } 605 } 606 607 assert((FilteredInstructions.size() + VariableInstructions.size() > 0) && 608 "Filter returns no instruction categories"); 609 } 610 611 // Divides the decoding task into sub tasks and delegates them to the 612 // inferior FilterChooser's. 613 // 614 // A special case arises when there's only one entry in the filtered 615 // instructions. In order to unambiguously decode the singleton, we need to 616 // match the remaining undecoded encoding bits against the singleton. 617 void Filter::recurse() { 618 // Starts by inheriting our parent filter chooser's filter bit values. 619 std::vector<bit_value_t> BitValueArray(Owner->FilterBitValues); 620 621 if (!VariableInstructions.empty()) { 622 // Conservatively marks each segment position as BIT_UNSET. 623 for (unsigned bitIndex = 0; bitIndex < NumBits; ++bitIndex) 624 BitValueArray[StartBit + bitIndex] = BIT_UNSET; 625 626 // Delegates to an inferior filter chooser for further processing on this 627 // group of instructions whose segment values are variable. 628 FilterChooserMap.insert(std::pair( 629 NO_FIXED_SEGMENTS_SENTINEL, 630 std::make_unique<FilterChooser>(Owner->AllInstructions, 631 VariableInstructions, Owner->Operands, 632 BitValueArray, *Owner))); 633 } 634 635 // No need to recurse for a singleton filtered instruction. 636 // See also Filter::emit*(). 637 if (getNumFiltered() == 1) { 638 assert(FilterChooserMap.size() == 1); 639 return; 640 } 641 642 // Otherwise, create sub choosers. 643 for (const auto &Inst : FilteredInstructions) { 644 645 // Marks all the segment positions with either BIT_TRUE or BIT_FALSE. 646 for (unsigned bitIndex = 0; bitIndex < NumBits; ++bitIndex) { 647 if (Inst.first & (1ULL << bitIndex)) 648 BitValueArray[StartBit + bitIndex] = BIT_TRUE; 649 else 650 BitValueArray[StartBit + bitIndex] = BIT_FALSE; 651 } 652 653 // Delegates to an inferior filter chooser for further processing on this 654 // category of instructions. 655 FilterChooserMap.insert( 656 std::pair(Inst.first, std::make_unique<FilterChooser>( 657 Owner->AllInstructions, Inst.second, 658 Owner->Operands, BitValueArray, *Owner))); 659 } 660 } 661 662 static void resolveTableFixups(DecoderTable &Table, const FixupList &Fixups, 663 uint32_t DestIdx) { 664 // Any NumToSkip fixups in the current scope can resolve to the 665 // current location. 666 for (FixupList::const_reverse_iterator I = Fixups.rbegin(), E = Fixups.rend(); 667 I != E; ++I) { 668 // Calculate the distance from the byte following the fixup entry byte 669 // to the destination. The Target is calculated from after the 16-bit 670 // NumToSkip entry itself, so subtract two from the displacement here 671 // to account for that. 672 uint32_t FixupIdx = *I; 673 uint32_t Delta = DestIdx - FixupIdx - 3; 674 // Our NumToSkip entries are 24-bits. Make sure our table isn't too 675 // big. 676 assert(Delta < (1u << 24)); 677 Table[FixupIdx] = (uint8_t)Delta; 678 Table[FixupIdx + 1] = (uint8_t)(Delta >> 8); 679 Table[FixupIdx + 2] = (uint8_t)(Delta >> 16); 680 } 681 } 682 683 // Emit table entries to decode instructions given a segment or segments 684 // of bits. 685 void Filter::emitTableEntry(DecoderTableInfo &TableInfo) const { 686 assert((NumBits < (1u << 8)) && "NumBits overflowed uint8 table entry!"); 687 TableInfo.Table.push_back(MCD::OPC_ExtractField); 688 689 SmallString<16> SBytes; 690 raw_svector_ostream S(SBytes); 691 encodeULEB128(StartBit, S); 692 TableInfo.Table.insert(TableInfo.Table.end(), SBytes.begin(), SBytes.end()); 693 TableInfo.Table.push_back(NumBits); 694 695 // A new filter entry begins a new scope for fixup resolution. 696 TableInfo.FixupStack.emplace_back(); 697 698 DecoderTable &Table = TableInfo.Table; 699 700 size_t PrevFilter = 0; 701 bool HasFallthrough = false; 702 for (auto &Filter : FilterChooserMap) { 703 // Field value -1 implies a non-empty set of variable instructions. 704 // See also recurse(). 705 if (Filter.first == NO_FIXED_SEGMENTS_SENTINEL) { 706 HasFallthrough = true; 707 708 // Each scope should always have at least one filter value to check 709 // for. 710 assert(PrevFilter != 0 && "empty filter set!"); 711 FixupList &CurScope = TableInfo.FixupStack.back(); 712 // Resolve any NumToSkip fixups in the current scope. 713 resolveTableFixups(Table, CurScope, Table.size()); 714 CurScope.clear(); 715 PrevFilter = 0; // Don't re-process the filter's fallthrough. 716 } else { 717 Table.push_back(MCD::OPC_FilterValue); 718 // Encode and emit the value to filter against. 719 uint8_t Buffer[16]; 720 unsigned Len = encodeULEB128(Filter.first, Buffer); 721 Table.insert(Table.end(), Buffer, Buffer + Len); 722 // Reserve space for the NumToSkip entry. We'll backpatch the value 723 // later. 724 PrevFilter = Table.size(); 725 Table.push_back(0); 726 Table.push_back(0); 727 Table.push_back(0); 728 } 729 730 // We arrive at a category of instructions with the same segment value. 731 // Now delegate to the sub filter chooser for further decodings. 732 // The case may fallthrough, which happens if the remaining well-known 733 // encoding bits do not match exactly. 734 Filter.second->emitTableEntries(TableInfo); 735 736 // Now that we've emitted the body of the handler, update the NumToSkip 737 // of the filter itself to be able to skip forward when false. Subtract 738 // two as to account for the width of the NumToSkip field itself. 739 if (PrevFilter) { 740 uint32_t NumToSkip = Table.size() - PrevFilter - 3; 741 assert(NumToSkip < (1u << 24) && 742 "disassembler decoding table too large!"); 743 Table[PrevFilter] = (uint8_t)NumToSkip; 744 Table[PrevFilter + 1] = (uint8_t)(NumToSkip >> 8); 745 Table[PrevFilter + 2] = (uint8_t)(NumToSkip >> 16); 746 } 747 } 748 749 // Any remaining unresolved fixups bubble up to the parent fixup scope. 750 assert(TableInfo.FixupStack.size() > 1 && "fixup stack underflow!"); 751 FixupScopeList::iterator Source = TableInfo.FixupStack.end() - 1; 752 FixupScopeList::iterator Dest = Source - 1; 753 llvm::append_range(*Dest, *Source); 754 TableInfo.FixupStack.pop_back(); 755 756 // If there is no fallthrough, then the final filter should get fixed 757 // up according to the enclosing scope rather than the current position. 758 if (!HasFallthrough) 759 TableInfo.FixupStack.back().push_back(PrevFilter); 760 } 761 762 // Returns the number of fanout produced by the filter. More fanout implies 763 // the filter distinguishes more categories of instructions. 764 unsigned Filter::usefulness() const { 765 if (!VariableInstructions.empty()) 766 return FilteredInstructions.size(); 767 else 768 return FilteredInstructions.size() + 1; 769 } 770 771 ////////////////////////////////// 772 // // 773 // Filterchooser Implementation // 774 // // 775 ////////////////////////////////// 776 777 // Emit the decoder state machine table. 778 void DecoderEmitter::emitTable(formatted_raw_ostream &OS, DecoderTable &Table, 779 unsigned Indentation, unsigned BitWidth, 780 StringRef Namespace, 781 const EncodingIDsVec &EncodingIDs) const { 782 // We'll need to be able to map from a decoded opcode into the corresponding 783 // EncodingID for this specific combination of BitWidth and Namespace. This 784 // is used below to index into NumberedEncodings. 785 DenseMap<unsigned, unsigned> OpcodeToEncodingID; 786 OpcodeToEncodingID.reserve(EncodingIDs.size()); 787 for (auto &EI : EncodingIDs) 788 OpcodeToEncodingID[EI.Opcode] = EI.EncodingID; 789 790 OS.indent(Indentation) << "static const uint8_t DecoderTable" << Namespace 791 << BitWidth << "[] = {\n"; 792 793 Indentation += 2; 794 795 // Emit ULEB128 encoded value to OS, returning the number of bytes emitted. 796 auto emitULEB128 = [](DecoderTable::const_iterator I, 797 formatted_raw_ostream &OS) { 798 unsigned Len = 0; 799 while (*I >= 128) { 800 OS << (unsigned)*I++ << ", "; 801 Len++; 802 } 803 OS << (unsigned)*I++ << ", "; 804 return Len + 1; 805 }; 806 807 // Emit 24-bit numtoskip value to OS, returning the NumToSkip value. 808 auto emitNumToSkip = [](DecoderTable::const_iterator I, 809 formatted_raw_ostream &OS) { 810 uint8_t Byte = *I++; 811 uint32_t NumToSkip = Byte; 812 OS << (unsigned)Byte << ", "; 813 Byte = *I++; 814 OS << (unsigned)Byte << ", "; 815 NumToSkip |= Byte << 8; 816 Byte = *I++; 817 OS << utostr(Byte) << ", "; 818 NumToSkip |= Byte << 16; 819 return NumToSkip; 820 }; 821 822 // FIXME: We may be able to use the NumToSkip values to recover 823 // appropriate indentation levels. 824 DecoderTable::const_iterator I = Table.begin(); 825 DecoderTable::const_iterator E = Table.end(); 826 while (I != E) { 827 assert(I < E && "incomplete decode table entry!"); 828 829 uint64_t Pos = I - Table.begin(); 830 OS << "/* " << Pos << " */"; 831 OS.PadToColumn(12); 832 833 switch (*I) { 834 default: 835 PrintFatalError("invalid decode table opcode"); 836 case MCD::OPC_ExtractField: { 837 ++I; 838 OS.indent(Indentation) << "MCD::OPC_ExtractField, "; 839 840 // ULEB128 encoded start value. 841 const char *ErrMsg = nullptr; 842 unsigned Start = decodeULEB128(Table.data() + Pos + 1, nullptr, 843 Table.data() + Table.size(), &ErrMsg); 844 assert(ErrMsg == nullptr && "ULEB128 value too large!"); 845 I += emitULEB128(I, OS); 846 847 unsigned Len = *I++; 848 OS << Len << ", // Inst{"; 849 if (Len > 1) 850 OS << (Start + Len - 1) << "-"; 851 OS << Start << "} ...\n"; 852 break; 853 } 854 case MCD::OPC_FilterValue: { 855 ++I; 856 OS.indent(Indentation) << "MCD::OPC_FilterValue, "; 857 // The filter value is ULEB128 encoded. 858 I += emitULEB128(I, OS); 859 860 // 24-bit numtoskip value. 861 uint32_t NumToSkip = emitNumToSkip(I, OS); 862 I += 3; 863 OS << "// Skip to: " << ((I - Table.begin()) + NumToSkip) << "\n"; 864 break; 865 } 866 case MCD::OPC_CheckField: { 867 ++I; 868 OS.indent(Indentation) << "MCD::OPC_CheckField, "; 869 // ULEB128 encoded start value. 870 I += emitULEB128(I, OS); 871 // 8-bit length. 872 unsigned Len = *I++; 873 OS << Len << ", "; 874 // ULEB128 encoded field value. 875 I += emitULEB128(I, OS); 876 877 // 24-bit numtoskip value. 878 uint32_t NumToSkip = emitNumToSkip(I, OS); 879 I += 3; 880 OS << "// Skip to: " << ((I - Table.begin()) + NumToSkip) << "\n"; 881 break; 882 } 883 case MCD::OPC_CheckPredicate: { 884 ++I; 885 OS.indent(Indentation) << "MCD::OPC_CheckPredicate, "; 886 I += emitULEB128(I, OS); 887 888 // 24-bit numtoskip value. 889 uint32_t NumToSkip = emitNumToSkip(I, OS); 890 I += 3; 891 OS << "// Skip to: " << ((I - Table.begin()) + NumToSkip) << "\n"; 892 break; 893 } 894 case MCD::OPC_Decode: 895 case MCD::OPC_TryDecode: { 896 bool IsTry = *I == MCD::OPC_TryDecode; 897 ++I; 898 // Decode the Opcode value. 899 const char *ErrMsg = nullptr; 900 unsigned Opc = decodeULEB128(Table.data() + Pos + 1, nullptr, 901 Table.data() + Table.size(), &ErrMsg); 902 assert(ErrMsg == nullptr && "ULEB128 value too large!"); 903 904 OS.indent(Indentation) 905 << "MCD::OPC_" << (IsTry ? "Try" : "") << "Decode, "; 906 I += emitULEB128(I, OS); 907 908 // Decoder index. 909 I += emitULEB128(I, OS); 910 911 auto EncI = OpcodeToEncodingID.find(Opc); 912 assert(EncI != OpcodeToEncodingID.end() && "no encoding entry"); 913 auto EncodingID = EncI->second; 914 915 if (!IsTry) { 916 OS << "// Opcode: " << NumberedEncodings[EncodingID] << "\n"; 917 break; 918 } 919 920 // Fallthrough for OPC_TryDecode. 921 922 // 24-bit numtoskip value. 923 uint32_t NumToSkip = emitNumToSkip(I, OS); 924 I += 3; 925 926 OS << "// Opcode: " << NumberedEncodings[EncodingID] 927 << ", skip to: " << ((I - Table.begin()) + NumToSkip) << "\n"; 928 break; 929 } 930 case MCD::OPC_SoftFail: { 931 ++I; 932 OS.indent(Indentation) << "MCD::OPC_SoftFail"; 933 // Positive mask 934 uint64_t Value = 0; 935 unsigned Shift = 0; 936 do { 937 OS << ", " << (unsigned)*I; 938 Value += (*I & 0x7f) << Shift; 939 Shift += 7; 940 } while (*I++ >= 128); 941 if (Value > 127) { 942 OS << " /* 0x"; 943 OS.write_hex(Value); 944 OS << " */"; 945 } 946 // Negative mask 947 Value = 0; 948 Shift = 0; 949 do { 950 OS << ", " << (unsigned)*I; 951 Value += (*I & 0x7f) << Shift; 952 Shift += 7; 953 } while (*I++ >= 128); 954 if (Value > 127) { 955 OS << " /* 0x"; 956 OS.write_hex(Value); 957 OS << " */"; 958 } 959 OS << ",\n"; 960 break; 961 } 962 case MCD::OPC_Fail: { 963 ++I; 964 OS.indent(Indentation) << "MCD::OPC_Fail,\n"; 965 break; 966 } 967 } 968 } 969 OS.indent(Indentation) << "0\n"; 970 971 Indentation -= 2; 972 973 OS.indent(Indentation) << "};\n\n"; 974 } 975 976 void DecoderEmitter::emitInstrLenTable(formatted_raw_ostream &OS, 977 std::vector<unsigned> &InstrLen) const { 978 OS << "static const uint8_t InstrLenTable[] = {\n"; 979 for (unsigned &Len : InstrLen) { 980 OS << Len << ",\n"; 981 } 982 OS << "};\n\n"; 983 } 984 985 void DecoderEmitter::emitPredicateFunction(formatted_raw_ostream &OS, 986 PredicateSet &Predicates, 987 unsigned Indentation) const { 988 // The predicate function is just a big switch statement based on the 989 // input predicate index. 990 OS.indent(Indentation) << "static bool checkDecoderPredicate(unsigned Idx, " 991 << "const FeatureBitset &Bits) {\n"; 992 Indentation += 2; 993 if (!Predicates.empty()) { 994 OS.indent(Indentation) << "switch (Idx) {\n"; 995 OS.indent(Indentation) 996 << "default: llvm_unreachable(\"Invalid index!\");\n"; 997 unsigned Index = 0; 998 for (const auto &Predicate : Predicates) { 999 OS.indent(Indentation) << "case " << Index++ << ":\n"; 1000 OS.indent(Indentation + 2) << "return (" << Predicate << ");\n"; 1001 } 1002 OS.indent(Indentation) << "}\n"; 1003 } else { 1004 // No case statement to emit 1005 OS.indent(Indentation) << "llvm_unreachable(\"Invalid index!\");\n"; 1006 } 1007 Indentation -= 2; 1008 OS.indent(Indentation) << "}\n\n"; 1009 } 1010 1011 void DecoderEmitter::emitDecoderFunction(formatted_raw_ostream &OS, 1012 DecoderSet &Decoders, 1013 unsigned Indentation) const { 1014 // The decoder function is just a big switch statement based on the 1015 // input decoder index. 1016 OS.indent(Indentation) << "template <typename InsnType>\n"; 1017 OS.indent(Indentation) << "static DecodeStatus decodeToMCInst(DecodeStatus S," 1018 << " unsigned Idx, InsnType insn, MCInst &MI,\n"; 1019 OS.indent(Indentation) 1020 << " uint64_t " 1021 << "Address, const MCDisassembler *Decoder, bool &DecodeComplete) {\n"; 1022 Indentation += 2; 1023 OS.indent(Indentation) << "DecodeComplete = true;\n"; 1024 // TODO: When InsnType is large, using uint64_t limits all fields to 64 bits 1025 // It would be better for emitBinaryParser to use a 64-bit tmp whenever 1026 // possible but fall back to an InsnType-sized tmp for truly large fields. 1027 OS.indent(Indentation) << "using TmpType = " 1028 "std::conditional_t<std::is_integral<InsnType>::" 1029 "value, InsnType, uint64_t>;\n"; 1030 OS.indent(Indentation) << "TmpType tmp;\n"; 1031 OS.indent(Indentation) << "switch (Idx) {\n"; 1032 OS.indent(Indentation) << "default: llvm_unreachable(\"Invalid index!\");\n"; 1033 unsigned Index = 0; 1034 for (const auto &Decoder : Decoders) { 1035 OS.indent(Indentation) << "case " << Index++ << ":\n"; 1036 OS << Decoder; 1037 OS.indent(Indentation + 2) << "return S;\n"; 1038 } 1039 OS.indent(Indentation) << "}\n"; 1040 Indentation -= 2; 1041 OS.indent(Indentation) << "}\n\n"; 1042 } 1043 1044 // Populates the field of the insn given the start position and the number of 1045 // consecutive bits to scan for. 1046 // 1047 // Returns false if and on the first uninitialized bit value encountered. 1048 // Returns true, otherwise. 1049 bool FilterChooser::fieldFromInsn(uint64_t &Field, insn_t &Insn, 1050 unsigned StartBit, unsigned NumBits) const { 1051 Field = 0; 1052 1053 for (unsigned i = 0; i < NumBits; ++i) { 1054 if (Insn[StartBit + i] == BIT_UNSET) 1055 return false; 1056 1057 if (Insn[StartBit + i] == BIT_TRUE) 1058 Field = Field | (1ULL << i); 1059 } 1060 1061 return true; 1062 } 1063 1064 /// dumpFilterArray - dumpFilterArray prints out debugging info for the given 1065 /// filter array as a series of chars. 1066 void FilterChooser::dumpFilterArray( 1067 raw_ostream &o, const std::vector<bit_value_t> &filter) const { 1068 for (unsigned bitIndex = BitWidth; bitIndex > 0; bitIndex--) { 1069 switch (filter[bitIndex - 1]) { 1070 case BIT_UNFILTERED: 1071 o << "."; 1072 break; 1073 case BIT_UNSET: 1074 o << "_"; 1075 break; 1076 case BIT_TRUE: 1077 o << "1"; 1078 break; 1079 case BIT_FALSE: 1080 o << "0"; 1081 break; 1082 } 1083 } 1084 } 1085 1086 /// dumpStack - dumpStack traverses the filter chooser chain and calls 1087 /// dumpFilterArray on each filter chooser up to the top level one. 1088 void FilterChooser::dumpStack(raw_ostream &o, const char *prefix) const { 1089 const FilterChooser *current = this; 1090 1091 while (current) { 1092 o << prefix; 1093 dumpFilterArray(o, current->FilterBitValues); 1094 o << '\n'; 1095 current = current->Parent; 1096 } 1097 } 1098 1099 // Calculates the island(s) needed to decode the instruction. 1100 // This returns a list of undecoded bits of an instructions, for example, 1101 // Inst{20} = 1 && Inst{3-0} == 0b1111 represents two islands of yet-to-be 1102 // decoded bits in order to verify that the instruction matches the Opcode. 1103 unsigned FilterChooser::getIslands(std::vector<unsigned> &StartBits, 1104 std::vector<unsigned> &EndBits, 1105 std::vector<uint64_t> &FieldVals, 1106 const insn_t &Insn) const { 1107 unsigned Num, BitNo; 1108 Num = BitNo = 0; 1109 1110 uint64_t FieldVal = 0; 1111 1112 // 0: Init 1113 // 1: Water (the bit value does not affect decoding) 1114 // 2: Island (well-known bit value needed for decoding) 1115 int State = 0; 1116 1117 for (unsigned i = 0; i < BitWidth; ++i) { 1118 int64_t Val = Value(Insn[i]); 1119 bool Filtered = PositionFiltered(i); 1120 switch (State) { 1121 default: 1122 llvm_unreachable("Unreachable code!"); 1123 case 0: 1124 case 1: 1125 if (Filtered || Val == -1) 1126 State = 1; // Still in Water 1127 else { 1128 State = 2; // Into the Island 1129 BitNo = 0; 1130 StartBits.push_back(i); 1131 FieldVal = Val; 1132 } 1133 break; 1134 case 2: 1135 if (Filtered || Val == -1) { 1136 State = 1; // Into the Water 1137 EndBits.push_back(i - 1); 1138 FieldVals.push_back(FieldVal); 1139 ++Num; 1140 } else { 1141 State = 2; // Still in Island 1142 ++BitNo; 1143 FieldVal = FieldVal | Val << BitNo; 1144 } 1145 break; 1146 } 1147 } 1148 // If we are still in Island after the loop, do some housekeeping. 1149 if (State == 2) { 1150 EndBits.push_back(BitWidth - 1); 1151 FieldVals.push_back(FieldVal); 1152 ++Num; 1153 } 1154 1155 assert(StartBits.size() == Num && EndBits.size() == Num && 1156 FieldVals.size() == Num); 1157 return Num; 1158 } 1159 1160 void FilterChooser::emitBinaryParser(raw_ostream &o, unsigned &Indentation, 1161 const OperandInfo &OpInfo, 1162 bool &OpHasCompleteDecoder) const { 1163 const std::string &Decoder = OpInfo.Decoder; 1164 1165 bool UseInsertBits = OpInfo.numFields() != 1 || OpInfo.InitValue != 0; 1166 1167 if (UseInsertBits) { 1168 o.indent(Indentation) << "tmp = 0x"; 1169 o.write_hex(OpInfo.InitValue); 1170 o << ";\n"; 1171 } 1172 1173 for (const EncodingField &EF : OpInfo) { 1174 o.indent(Indentation); 1175 if (UseInsertBits) 1176 o << "insertBits(tmp, "; 1177 else 1178 o << "tmp = "; 1179 o << "fieldFromInstruction(insn, " << EF.Base << ", " << EF.Width << ')'; 1180 if (UseInsertBits) 1181 o << ", " << EF.Offset << ", " << EF.Width << ')'; 1182 else if (EF.Offset != 0) 1183 o << " << " << EF.Offset; 1184 o << ";\n"; 1185 } 1186 1187 if (Decoder != "") { 1188 OpHasCompleteDecoder = OpInfo.HasCompleteDecoder; 1189 o.indent(Indentation) << "if (!Check(S, " << Decoder 1190 << "(MI, tmp, Address, Decoder))) { " 1191 << (OpHasCompleteDecoder ? "" 1192 : "DecodeComplete = false; ") 1193 << "return MCDisassembler::Fail; }\n"; 1194 } else { 1195 OpHasCompleteDecoder = true; 1196 o.indent(Indentation) << "MI.addOperand(MCOperand::createImm(tmp));\n"; 1197 } 1198 } 1199 1200 void FilterChooser::emitDecoder(raw_ostream &OS, unsigned Indentation, 1201 unsigned Opc, bool &HasCompleteDecoder) const { 1202 HasCompleteDecoder = true; 1203 1204 for (const auto &Op : Operands.find(Opc)->second) { 1205 // If a custom instruction decoder was specified, use that. 1206 if (Op.numFields() == 0 && !Op.Decoder.empty()) { 1207 HasCompleteDecoder = Op.HasCompleteDecoder; 1208 OS.indent(Indentation) 1209 << "if (!Check(S, " << Op.Decoder 1210 << "(MI, insn, Address, Decoder))) { " 1211 << (HasCompleteDecoder ? "" : "DecodeComplete = false; ") 1212 << "return MCDisassembler::Fail; }\n"; 1213 break; 1214 } 1215 1216 bool OpHasCompleteDecoder; 1217 emitBinaryParser(OS, Indentation, Op, OpHasCompleteDecoder); 1218 if (!OpHasCompleteDecoder) 1219 HasCompleteDecoder = false; 1220 } 1221 } 1222 1223 unsigned FilterChooser::getDecoderIndex(DecoderSet &Decoders, unsigned Opc, 1224 bool &HasCompleteDecoder) const { 1225 // Build up the predicate string. 1226 SmallString<256> Decoder; 1227 // FIXME: emitDecoder() function can take a buffer directly rather than 1228 // a stream. 1229 raw_svector_ostream S(Decoder); 1230 unsigned I = 4; 1231 emitDecoder(S, I, Opc, HasCompleteDecoder); 1232 1233 // Using the full decoder string as the key value here is a bit 1234 // heavyweight, but is effective. If the string comparisons become a 1235 // performance concern, we can implement a mangling of the predicate 1236 // data easily enough with a map back to the actual string. That's 1237 // overkill for now, though. 1238 1239 // Make sure the predicate is in the table. 1240 Decoders.insert(CachedHashString(Decoder)); 1241 // Now figure out the index for when we write out the table. 1242 DecoderSet::const_iterator P = find(Decoders, Decoder.str()); 1243 return (unsigned)(P - Decoders.begin()); 1244 } 1245 1246 // If ParenIfBinOp is true, print a surrounding () if Val uses && or ||. 1247 bool FilterChooser::emitPredicateMatchAux(const Init &Val, bool ParenIfBinOp, 1248 raw_ostream &OS) const { 1249 if (auto *D = dyn_cast<DefInit>(&Val)) { 1250 if (!D->getDef()->isSubClassOf("SubtargetFeature")) 1251 return true; 1252 OS << "Bits[" << Emitter->PredicateNamespace << "::" << D->getAsString() 1253 << "]"; 1254 return false; 1255 } 1256 if (auto *D = dyn_cast<DagInit>(&Val)) { 1257 std::string Op = D->getOperator()->getAsString(); 1258 if (Op == "not" && D->getNumArgs() == 1) { 1259 OS << '!'; 1260 return emitPredicateMatchAux(*D->getArg(0), true, OS); 1261 } 1262 if ((Op == "any_of" || Op == "all_of") && D->getNumArgs() > 0) { 1263 bool Paren = D->getNumArgs() > 1 && std::exchange(ParenIfBinOp, true); 1264 if (Paren) 1265 OS << '('; 1266 ListSeparator LS(Op == "any_of" ? " || " : " && "); 1267 for (auto *Arg : D->getArgs()) { 1268 OS << LS; 1269 if (emitPredicateMatchAux(*Arg, ParenIfBinOp, OS)) 1270 return true; 1271 } 1272 if (Paren) 1273 OS << ')'; 1274 return false; 1275 } 1276 } 1277 return true; 1278 } 1279 1280 bool FilterChooser::emitPredicateMatch(raw_ostream &o, unsigned &Indentation, 1281 unsigned Opc) const { 1282 ListInit *Predicates = 1283 AllInstructions[Opc].EncodingDef->getValueAsListInit("Predicates"); 1284 bool IsFirstEmission = true; 1285 for (unsigned i = 0; i < Predicates->size(); ++i) { 1286 Record *Pred = Predicates->getElementAsRecord(i); 1287 if (!Pred->getValue("AssemblerMatcherPredicate")) 1288 continue; 1289 1290 if (!isa<DagInit>(Pred->getValue("AssemblerCondDag")->getValue())) 1291 continue; 1292 1293 if (!IsFirstEmission) 1294 o << " && "; 1295 if (emitPredicateMatchAux(*Pred->getValueAsDag("AssemblerCondDag"), 1296 Predicates->size() > 1, o)) 1297 PrintFatalError(Pred->getLoc(), "Invalid AssemblerCondDag!"); 1298 IsFirstEmission = false; 1299 } 1300 return !Predicates->empty(); 1301 } 1302 1303 bool FilterChooser::doesOpcodeNeedPredicate(unsigned Opc) const { 1304 ListInit *Predicates = 1305 AllInstructions[Opc].EncodingDef->getValueAsListInit("Predicates"); 1306 for (unsigned i = 0; i < Predicates->size(); ++i) { 1307 Record *Pred = Predicates->getElementAsRecord(i); 1308 if (!Pred->getValue("AssemblerMatcherPredicate")) 1309 continue; 1310 1311 if (isa<DagInit>(Pred->getValue("AssemblerCondDag")->getValue())) 1312 return true; 1313 } 1314 return false; 1315 } 1316 1317 unsigned FilterChooser::getPredicateIndex(DecoderTableInfo &TableInfo, 1318 StringRef Predicate) const { 1319 // Using the full predicate string as the key value here is a bit 1320 // heavyweight, but is effective. If the string comparisons become a 1321 // performance concern, we can implement a mangling of the predicate 1322 // data easily enough with a map back to the actual string. That's 1323 // overkill for now, though. 1324 1325 // Make sure the predicate is in the table. 1326 TableInfo.Predicates.insert(CachedHashString(Predicate)); 1327 // Now figure out the index for when we write out the table. 1328 PredicateSet::const_iterator P = find(TableInfo.Predicates, Predicate); 1329 return (unsigned)(P - TableInfo.Predicates.begin()); 1330 } 1331 1332 void FilterChooser::emitPredicateTableEntry(DecoderTableInfo &TableInfo, 1333 unsigned Opc) const { 1334 if (!doesOpcodeNeedPredicate(Opc)) 1335 return; 1336 1337 // Build up the predicate string. 1338 SmallString<256> Predicate; 1339 // FIXME: emitPredicateMatch() functions can take a buffer directly rather 1340 // than a stream. 1341 raw_svector_ostream PS(Predicate); 1342 unsigned I = 0; 1343 emitPredicateMatch(PS, I, Opc); 1344 1345 // Figure out the index into the predicate table for the predicate just 1346 // computed. 1347 unsigned PIdx = getPredicateIndex(TableInfo, PS.str()); 1348 SmallString<16> PBytes; 1349 raw_svector_ostream S(PBytes); 1350 encodeULEB128(PIdx, S); 1351 1352 TableInfo.Table.push_back(MCD::OPC_CheckPredicate); 1353 // Predicate index 1354 for (unsigned i = 0, e = PBytes.size(); i != e; ++i) 1355 TableInfo.Table.push_back(PBytes[i]); 1356 // Push location for NumToSkip backpatching. 1357 TableInfo.FixupStack.back().push_back(TableInfo.Table.size()); 1358 TableInfo.Table.push_back(0); 1359 TableInfo.Table.push_back(0); 1360 TableInfo.Table.push_back(0); 1361 } 1362 1363 void FilterChooser::emitSoftFailTableEntry(DecoderTableInfo &TableInfo, 1364 unsigned Opc) const { 1365 const RecordVal *RV = AllInstructions[Opc].EncodingDef->getValue("SoftFail"); 1366 BitsInit *SFBits = RV ? dyn_cast<BitsInit>(RV->getValue()) : nullptr; 1367 1368 if (!SFBits) 1369 return; 1370 BitsInit *InstBits = 1371 AllInstructions[Opc].EncodingDef->getValueAsBitsInit("Inst"); 1372 1373 APInt PositiveMask(BitWidth, 0ULL); 1374 APInt NegativeMask(BitWidth, 0ULL); 1375 for (unsigned i = 0; i < BitWidth; ++i) { 1376 bit_value_t B = bitFromBits(*SFBits, i); 1377 bit_value_t IB = bitFromBits(*InstBits, i); 1378 1379 if (B != BIT_TRUE) 1380 continue; 1381 1382 switch (IB) { 1383 case BIT_FALSE: 1384 // The bit is meant to be false, so emit a check to see if it is true. 1385 PositiveMask.setBit(i); 1386 break; 1387 case BIT_TRUE: 1388 // The bit is meant to be true, so emit a check to see if it is false. 1389 NegativeMask.setBit(i); 1390 break; 1391 default: 1392 // The bit is not set; this must be an error! 1393 errs() << "SoftFail Conflict: bit SoftFail{" << i << "} in " 1394 << AllInstructions[Opc] << " is set but Inst{" << i 1395 << "} is unset!\n" 1396 << " - You can only mark a bit as SoftFail if it is fully defined" 1397 << " (1/0 - not '?') in Inst\n"; 1398 return; 1399 } 1400 } 1401 1402 bool NeedPositiveMask = PositiveMask.getBoolValue(); 1403 bool NeedNegativeMask = NegativeMask.getBoolValue(); 1404 1405 if (!NeedPositiveMask && !NeedNegativeMask) 1406 return; 1407 1408 TableInfo.Table.push_back(MCD::OPC_SoftFail); 1409 1410 SmallString<16> MaskBytes; 1411 raw_svector_ostream S(MaskBytes); 1412 if (NeedPositiveMask) { 1413 encodeULEB128(PositiveMask.getZExtValue(), S); 1414 for (unsigned i = 0, e = MaskBytes.size(); i != e; ++i) 1415 TableInfo.Table.push_back(MaskBytes[i]); 1416 } else 1417 TableInfo.Table.push_back(0); 1418 if (NeedNegativeMask) { 1419 MaskBytes.clear(); 1420 encodeULEB128(NegativeMask.getZExtValue(), S); 1421 for (unsigned i = 0, e = MaskBytes.size(); i != e; ++i) 1422 TableInfo.Table.push_back(MaskBytes[i]); 1423 } else 1424 TableInfo.Table.push_back(0); 1425 } 1426 1427 // Emits table entries to decode the singleton. 1428 void FilterChooser::emitSingletonTableEntry(DecoderTableInfo &TableInfo, 1429 EncodingIDAndOpcode Opc) const { 1430 std::vector<unsigned> StartBits; 1431 std::vector<unsigned> EndBits; 1432 std::vector<uint64_t> FieldVals; 1433 insn_t Insn; 1434 insnWithID(Insn, Opc.EncodingID); 1435 1436 // Look for islands of undecoded bits of the singleton. 1437 getIslands(StartBits, EndBits, FieldVals, Insn); 1438 1439 unsigned Size = StartBits.size(); 1440 1441 // Emit the predicate table entry if one is needed. 1442 emitPredicateTableEntry(TableInfo, Opc.EncodingID); 1443 1444 // Check any additional encoding fields needed. 1445 for (unsigned I = Size; I != 0; --I) { 1446 unsigned NumBits = EndBits[I - 1] - StartBits[I - 1] + 1; 1447 assert((NumBits < (1u << 8)) && "NumBits overflowed uint8 table entry!"); 1448 TableInfo.Table.push_back(MCD::OPC_CheckField); 1449 uint8_t Buffer[16], *P; 1450 encodeULEB128(StartBits[I - 1], Buffer); 1451 for (P = Buffer; *P >= 128; ++P) 1452 TableInfo.Table.push_back(*P); 1453 TableInfo.Table.push_back(*P); 1454 TableInfo.Table.push_back(NumBits); 1455 encodeULEB128(FieldVals[I - 1], Buffer); 1456 for (P = Buffer; *P >= 128; ++P) 1457 TableInfo.Table.push_back(*P); 1458 TableInfo.Table.push_back(*P); 1459 // Push location for NumToSkip backpatching. 1460 TableInfo.FixupStack.back().push_back(TableInfo.Table.size()); 1461 // The fixup is always 24-bits, so go ahead and allocate the space 1462 // in the table so all our relative position calculations work OK even 1463 // before we fully resolve the real value here. 1464 TableInfo.Table.push_back(0); 1465 TableInfo.Table.push_back(0); 1466 TableInfo.Table.push_back(0); 1467 } 1468 1469 // Check for soft failure of the match. 1470 emitSoftFailTableEntry(TableInfo, Opc.EncodingID); 1471 1472 bool HasCompleteDecoder; 1473 unsigned DIdx = 1474 getDecoderIndex(TableInfo.Decoders, Opc.EncodingID, HasCompleteDecoder); 1475 1476 // Produce OPC_Decode or OPC_TryDecode opcode based on the information 1477 // whether the instruction decoder is complete or not. If it is complete 1478 // then it handles all possible values of remaining variable/unfiltered bits 1479 // and for any value can determine if the bitpattern is a valid instruction 1480 // or not. This means OPC_Decode will be the final step in the decoding 1481 // process. If it is not complete, then the Fail return code from the 1482 // decoder method indicates that additional processing should be done to see 1483 // if there is any other instruction that also matches the bitpattern and 1484 // can decode it. 1485 TableInfo.Table.push_back(HasCompleteDecoder ? MCD::OPC_Decode 1486 : MCD::OPC_TryDecode); 1487 NumEncodingsSupported++; 1488 uint8_t Buffer[16], *p; 1489 encodeULEB128(Opc.Opcode, Buffer); 1490 for (p = Buffer; *p >= 128; ++p) 1491 TableInfo.Table.push_back(*p); 1492 TableInfo.Table.push_back(*p); 1493 1494 SmallString<16> Bytes; 1495 raw_svector_ostream S(Bytes); 1496 encodeULEB128(DIdx, S); 1497 1498 // Decoder index 1499 for (unsigned i = 0, e = Bytes.size(); i != e; ++i) 1500 TableInfo.Table.push_back(Bytes[i]); 1501 1502 if (!HasCompleteDecoder) { 1503 // Push location for NumToSkip backpatching. 1504 TableInfo.FixupStack.back().push_back(TableInfo.Table.size()); 1505 // Allocate the space for the fixup. 1506 TableInfo.Table.push_back(0); 1507 TableInfo.Table.push_back(0); 1508 TableInfo.Table.push_back(0); 1509 } 1510 } 1511 1512 // Emits table entries to decode the singleton, and then to decode the rest. 1513 void FilterChooser::emitSingletonTableEntry(DecoderTableInfo &TableInfo, 1514 const Filter &Best) const { 1515 EncodingIDAndOpcode Opc = Best.getSingletonOpc(); 1516 1517 // complex singletons need predicate checks from the first singleton 1518 // to refer forward to the variable filterchooser that follows. 1519 TableInfo.FixupStack.emplace_back(); 1520 1521 emitSingletonTableEntry(TableInfo, Opc); 1522 1523 resolveTableFixups(TableInfo.Table, TableInfo.FixupStack.back(), 1524 TableInfo.Table.size()); 1525 TableInfo.FixupStack.pop_back(); 1526 1527 Best.getVariableFC().emitTableEntries(TableInfo); 1528 } 1529 1530 // Assign a single filter and run with it. Top level API client can initialize 1531 // with a single filter to start the filtering process. 1532 void FilterChooser::runSingleFilter(unsigned startBit, unsigned numBit, 1533 bool mixed) { 1534 Filters.clear(); 1535 Filters.emplace_back(*this, startBit, numBit, true); 1536 BestIndex = 0; // Sole Filter instance to choose from. 1537 bestFilter().recurse(); 1538 } 1539 1540 // reportRegion is a helper function for filterProcessor to mark a region as 1541 // eligible for use as a filter region. 1542 void FilterChooser::reportRegion(bitAttr_t RA, unsigned StartBit, 1543 unsigned BitIndex, bool AllowMixed) { 1544 if (RA == ATTR_MIXED && AllowMixed) 1545 Filters.emplace_back(*this, StartBit, BitIndex - StartBit, true); 1546 else if (RA == ATTR_ALL_SET && !AllowMixed) 1547 Filters.emplace_back(*this, StartBit, BitIndex - StartBit, false); 1548 } 1549 1550 // FilterProcessor scans the well-known encoding bits of the instructions and 1551 // builds up a list of candidate filters. It chooses the best filter and 1552 // recursively descends down the decoding tree. 1553 bool FilterChooser::filterProcessor(bool AllowMixed, bool Greedy) { 1554 Filters.clear(); 1555 BestIndex = -1; 1556 unsigned numInstructions = Opcodes.size(); 1557 1558 assert(numInstructions && "Filter created with no instructions"); 1559 1560 // No further filtering is necessary. 1561 if (numInstructions == 1) 1562 return true; 1563 1564 // Heuristics. See also doFilter()'s "Heuristics" comment when num of 1565 // instructions is 3. 1566 if (AllowMixed && !Greedy) { 1567 assert(numInstructions == 3); 1568 1569 for (auto Opcode : Opcodes) { 1570 std::vector<unsigned> StartBits; 1571 std::vector<unsigned> EndBits; 1572 std::vector<uint64_t> FieldVals; 1573 insn_t Insn; 1574 1575 insnWithID(Insn, Opcode.EncodingID); 1576 1577 // Look for islands of undecoded bits of any instruction. 1578 if (getIslands(StartBits, EndBits, FieldVals, Insn) > 0) { 1579 // Found an instruction with island(s). Now just assign a filter. 1580 runSingleFilter(StartBits[0], EndBits[0] - StartBits[0] + 1, true); 1581 return true; 1582 } 1583 } 1584 } 1585 1586 unsigned BitIndex; 1587 1588 // We maintain BIT_WIDTH copies of the bitAttrs automaton. 1589 // The automaton consumes the corresponding bit from each 1590 // instruction. 1591 // 1592 // Input symbols: 0, 1, and _ (unset). 1593 // States: NONE, FILTERED, ALL_SET, ALL_UNSET, and MIXED. 1594 // Initial state: NONE. 1595 // 1596 // (NONE) ------- [01] -> (ALL_SET) 1597 // (NONE) ------- _ ----> (ALL_UNSET) 1598 // (ALL_SET) ---- [01] -> (ALL_SET) 1599 // (ALL_SET) ---- _ ----> (MIXED) 1600 // (ALL_UNSET) -- [01] -> (MIXED) 1601 // (ALL_UNSET) -- _ ----> (ALL_UNSET) 1602 // (MIXED) ------ . ----> (MIXED) 1603 // (FILTERED)---- . ----> (FILTERED) 1604 1605 std::vector<bitAttr_t> bitAttrs; 1606 1607 // FILTERED bit positions provide no entropy and are not worthy of pursuing. 1608 // Filter::recurse() set either BIT_TRUE or BIT_FALSE for each position. 1609 for (BitIndex = 0; BitIndex < BitWidth; ++BitIndex) 1610 if (FilterBitValues[BitIndex] == BIT_TRUE || 1611 FilterBitValues[BitIndex] == BIT_FALSE) 1612 bitAttrs.push_back(ATTR_FILTERED); 1613 else 1614 bitAttrs.push_back(ATTR_NONE); 1615 1616 for (unsigned InsnIndex = 0; InsnIndex < numInstructions; ++InsnIndex) { 1617 insn_t insn; 1618 1619 insnWithID(insn, Opcodes[InsnIndex].EncodingID); 1620 1621 for (BitIndex = 0; BitIndex < BitWidth; ++BitIndex) { 1622 switch (bitAttrs[BitIndex]) { 1623 case ATTR_NONE: 1624 if (insn[BitIndex] == BIT_UNSET) 1625 bitAttrs[BitIndex] = ATTR_ALL_UNSET; 1626 else 1627 bitAttrs[BitIndex] = ATTR_ALL_SET; 1628 break; 1629 case ATTR_ALL_SET: 1630 if (insn[BitIndex] == BIT_UNSET) 1631 bitAttrs[BitIndex] = ATTR_MIXED; 1632 break; 1633 case ATTR_ALL_UNSET: 1634 if (insn[BitIndex] != BIT_UNSET) 1635 bitAttrs[BitIndex] = ATTR_MIXED; 1636 break; 1637 case ATTR_MIXED: 1638 case ATTR_FILTERED: 1639 break; 1640 } 1641 } 1642 } 1643 1644 // The regionAttr automaton consumes the bitAttrs automatons' state, 1645 // lowest-to-highest. 1646 // 1647 // Input symbols: F(iltered), (all_)S(et), (all_)U(nset), M(ixed) 1648 // States: NONE, ALL_SET, MIXED 1649 // Initial state: NONE 1650 // 1651 // (NONE) ----- F --> (NONE) 1652 // (NONE) ----- S --> (ALL_SET) ; and set region start 1653 // (NONE) ----- U --> (NONE) 1654 // (NONE) ----- M --> (MIXED) ; and set region start 1655 // (ALL_SET) -- F --> (NONE) ; and report an ALL_SET region 1656 // (ALL_SET) -- S --> (ALL_SET) 1657 // (ALL_SET) -- U --> (NONE) ; and report an ALL_SET region 1658 // (ALL_SET) -- M --> (MIXED) ; and report an ALL_SET region 1659 // (MIXED) ---- F --> (NONE) ; and report a MIXED region 1660 // (MIXED) ---- S --> (ALL_SET) ; and report a MIXED region 1661 // (MIXED) ---- U --> (NONE) ; and report a MIXED region 1662 // (MIXED) ---- M --> (MIXED) 1663 1664 bitAttr_t RA = ATTR_NONE; 1665 unsigned StartBit = 0; 1666 1667 for (BitIndex = 0; BitIndex < BitWidth; ++BitIndex) { 1668 bitAttr_t bitAttr = bitAttrs[BitIndex]; 1669 1670 assert(bitAttr != ATTR_NONE && "Bit without attributes"); 1671 1672 switch (RA) { 1673 case ATTR_NONE: 1674 switch (bitAttr) { 1675 case ATTR_FILTERED: 1676 break; 1677 case ATTR_ALL_SET: 1678 StartBit = BitIndex; 1679 RA = ATTR_ALL_SET; 1680 break; 1681 case ATTR_ALL_UNSET: 1682 break; 1683 case ATTR_MIXED: 1684 StartBit = BitIndex; 1685 RA = ATTR_MIXED; 1686 break; 1687 default: 1688 llvm_unreachable("Unexpected bitAttr!"); 1689 } 1690 break; 1691 case ATTR_ALL_SET: 1692 switch (bitAttr) { 1693 case ATTR_FILTERED: 1694 reportRegion(RA, StartBit, BitIndex, AllowMixed); 1695 RA = ATTR_NONE; 1696 break; 1697 case ATTR_ALL_SET: 1698 break; 1699 case ATTR_ALL_UNSET: 1700 reportRegion(RA, StartBit, BitIndex, AllowMixed); 1701 RA = ATTR_NONE; 1702 break; 1703 case ATTR_MIXED: 1704 reportRegion(RA, StartBit, BitIndex, AllowMixed); 1705 StartBit = BitIndex; 1706 RA = ATTR_MIXED; 1707 break; 1708 default: 1709 llvm_unreachable("Unexpected bitAttr!"); 1710 } 1711 break; 1712 case ATTR_MIXED: 1713 switch (bitAttr) { 1714 case ATTR_FILTERED: 1715 reportRegion(RA, StartBit, BitIndex, AllowMixed); 1716 StartBit = BitIndex; 1717 RA = ATTR_NONE; 1718 break; 1719 case ATTR_ALL_SET: 1720 reportRegion(RA, StartBit, BitIndex, AllowMixed); 1721 StartBit = BitIndex; 1722 RA = ATTR_ALL_SET; 1723 break; 1724 case ATTR_ALL_UNSET: 1725 reportRegion(RA, StartBit, BitIndex, AllowMixed); 1726 RA = ATTR_NONE; 1727 break; 1728 case ATTR_MIXED: 1729 break; 1730 default: 1731 llvm_unreachable("Unexpected bitAttr!"); 1732 } 1733 break; 1734 case ATTR_ALL_UNSET: 1735 llvm_unreachable("regionAttr state machine has no ATTR_UNSET state"); 1736 case ATTR_FILTERED: 1737 llvm_unreachable("regionAttr state machine has no ATTR_FILTERED state"); 1738 } 1739 } 1740 1741 // At the end, if we're still in ALL_SET or MIXED states, report a region 1742 switch (RA) { 1743 case ATTR_NONE: 1744 break; 1745 case ATTR_FILTERED: 1746 break; 1747 case ATTR_ALL_SET: 1748 reportRegion(RA, StartBit, BitIndex, AllowMixed); 1749 break; 1750 case ATTR_ALL_UNSET: 1751 break; 1752 case ATTR_MIXED: 1753 reportRegion(RA, StartBit, BitIndex, AllowMixed); 1754 break; 1755 } 1756 1757 // We have finished with the filter processings. Now it's time to choose 1758 // the best performing filter. 1759 BestIndex = 0; 1760 bool AllUseless = true; 1761 unsigned BestScore = 0; 1762 1763 for (unsigned i = 0, e = Filters.size(); i != e; ++i) { 1764 unsigned Usefulness = Filters[i].usefulness(); 1765 1766 if (Usefulness) 1767 AllUseless = false; 1768 1769 if (Usefulness > BestScore) { 1770 BestIndex = i; 1771 BestScore = Usefulness; 1772 } 1773 } 1774 1775 if (!AllUseless) 1776 bestFilter().recurse(); 1777 1778 return !AllUseless; 1779 } // end of FilterChooser::filterProcessor(bool) 1780 1781 // Decides on the best configuration of filter(s) to use in order to decode 1782 // the instructions. A conflict of instructions may occur, in which case we 1783 // dump the conflict set to the standard error. 1784 void FilterChooser::doFilter() { 1785 unsigned Num = Opcodes.size(); 1786 assert(Num && "FilterChooser created with no instructions"); 1787 1788 // Try regions of consecutive known bit values first. 1789 if (filterProcessor(false)) 1790 return; 1791 1792 // Then regions of mixed bits (both known and unitialized bit values allowed). 1793 if (filterProcessor(true)) 1794 return; 1795 1796 // Heuristics to cope with conflict set {t2CMPrs, t2SUBSrr, t2SUBSrs} where 1797 // no single instruction for the maximum ATTR_MIXED region Inst{14-4} has a 1798 // well-known encoding pattern. In such case, we backtrack and scan for the 1799 // the very first consecutive ATTR_ALL_SET region and assign a filter to it. 1800 if (Num == 3 && filterProcessor(true, false)) 1801 return; 1802 1803 // If we come to here, the instruction decoding has failed. 1804 // Set the BestIndex to -1 to indicate so. 1805 BestIndex = -1; 1806 } 1807 1808 // emitTableEntries - Emit state machine entries to decode our share of 1809 // instructions. 1810 void FilterChooser::emitTableEntries(DecoderTableInfo &TableInfo) const { 1811 if (Opcodes.size() == 1) { 1812 // There is only one instruction in the set, which is great! 1813 // Call emitSingletonDecoder() to see whether there are any remaining 1814 // encodings bits. 1815 emitSingletonTableEntry(TableInfo, Opcodes[0]); 1816 return; 1817 } 1818 1819 // Choose the best filter to do the decodings! 1820 if (BestIndex != -1) { 1821 const Filter &Best = Filters[BestIndex]; 1822 if (Best.getNumFiltered() == 1) 1823 emitSingletonTableEntry(TableInfo, Best); 1824 else 1825 Best.emitTableEntry(TableInfo); 1826 return; 1827 } 1828 1829 // We don't know how to decode these instructions! Dump the 1830 // conflict set and bail. 1831 1832 // Print out useful conflict information for postmortem analysis. 1833 errs() << "Decoding Conflict:\n"; 1834 1835 dumpStack(errs(), "\t\t"); 1836 1837 for (auto Opcode : Opcodes) { 1838 errs() << '\t'; 1839 emitNameWithID(errs(), Opcode.EncodingID); 1840 errs() << " "; 1841 dumpBits( 1842 errs(), 1843 getBitsField(*AllInstructions[Opcode.EncodingID].EncodingDef, "Inst")); 1844 errs() << '\n'; 1845 } 1846 } 1847 1848 static std::string findOperandDecoderMethod(Record *Record) { 1849 std::string Decoder; 1850 1851 RecordVal *DecoderString = Record->getValue("DecoderMethod"); 1852 StringInit *String = 1853 DecoderString ? dyn_cast<StringInit>(DecoderString->getValue()) : nullptr; 1854 if (String) { 1855 Decoder = std::string(String->getValue()); 1856 if (!Decoder.empty()) 1857 return Decoder; 1858 } 1859 1860 if (Record->isSubClassOf("RegisterOperand")) 1861 Record = Record->getValueAsDef("RegClass"); 1862 1863 if (Record->isSubClassOf("RegisterClass")) { 1864 Decoder = "Decode" + Record->getName().str() + "RegisterClass"; 1865 } else if (Record->isSubClassOf("PointerLikeRegClass")) { 1866 Decoder = "DecodePointerLikeRegClass" + 1867 utostr(Record->getValueAsInt("RegClassKind")); 1868 } 1869 1870 return Decoder; 1871 } 1872 1873 OperandInfo getOpInfo(Record *TypeRecord) { 1874 std::string Decoder = findOperandDecoderMethod(TypeRecord); 1875 1876 RecordVal *HasCompleteDecoderVal = TypeRecord->getValue("hasCompleteDecoder"); 1877 BitInit *HasCompleteDecoderBit = 1878 HasCompleteDecoderVal 1879 ? dyn_cast<BitInit>(HasCompleteDecoderVal->getValue()) 1880 : nullptr; 1881 bool HasCompleteDecoder = 1882 HasCompleteDecoderBit ? HasCompleteDecoderBit->getValue() : true; 1883 1884 return OperandInfo(Decoder, HasCompleteDecoder); 1885 } 1886 1887 void parseVarLenInstOperand(const Record &Def, 1888 std::vector<OperandInfo> &Operands, 1889 const CodeGenInstruction &CGI) { 1890 1891 const RecordVal *RV = Def.getValue("Inst"); 1892 VarLenInst VLI(cast<DagInit>(RV->getValue()), RV); 1893 SmallVector<int> TiedTo; 1894 1895 for (unsigned Idx = 0; Idx < CGI.Operands.size(); ++Idx) { 1896 auto &Op = CGI.Operands[Idx]; 1897 if (Op.MIOperandInfo && Op.MIOperandInfo->getNumArgs() > 0) 1898 for (auto *Arg : Op.MIOperandInfo->getArgs()) 1899 Operands.push_back(getOpInfo(cast<DefInit>(Arg)->getDef())); 1900 else 1901 Operands.push_back(getOpInfo(Op.Rec)); 1902 1903 int TiedReg = Op.getTiedRegister(); 1904 TiedTo.push_back(-1); 1905 if (TiedReg != -1) { 1906 TiedTo[Idx] = TiedReg; 1907 TiedTo[TiedReg] = Idx; 1908 } 1909 } 1910 1911 unsigned CurrBitPos = 0; 1912 for (auto &EncodingSegment : VLI) { 1913 unsigned Offset = 0; 1914 StringRef OpName; 1915 1916 if (const StringInit *SI = dyn_cast<StringInit>(EncodingSegment.Value)) { 1917 OpName = SI->getValue(); 1918 } else if (const DagInit *DI = dyn_cast<DagInit>(EncodingSegment.Value)) { 1919 OpName = cast<StringInit>(DI->getArg(0))->getValue(); 1920 Offset = cast<IntInit>(DI->getArg(2))->getValue(); 1921 } 1922 1923 if (!OpName.empty()) { 1924 auto OpSubOpPair = 1925 const_cast<CodeGenInstruction &>(CGI).Operands.ParseOperandName( 1926 OpName); 1927 unsigned OpIdx = CGI.Operands.getFlattenedOperandNumber(OpSubOpPair); 1928 Operands[OpIdx].addField(CurrBitPos, EncodingSegment.BitWidth, Offset); 1929 if (!EncodingSegment.CustomDecoder.empty()) 1930 Operands[OpIdx].Decoder = EncodingSegment.CustomDecoder.str(); 1931 1932 int TiedReg = TiedTo[OpSubOpPair.first]; 1933 if (TiedReg != -1) { 1934 unsigned OpIdx = CGI.Operands.getFlattenedOperandNumber( 1935 std::pair(TiedReg, OpSubOpPair.second)); 1936 Operands[OpIdx].addField(CurrBitPos, EncodingSegment.BitWidth, Offset); 1937 } 1938 } 1939 1940 CurrBitPos += EncodingSegment.BitWidth; 1941 } 1942 } 1943 1944 static void debugDumpRecord(const Record &Rec) { 1945 // Dump the record, so we can see what's going on... 1946 std::string E; 1947 raw_string_ostream S(E); 1948 S << "Dumping record for previous error:\n"; 1949 S << Rec; 1950 PrintNote(E); 1951 } 1952 1953 /// For an operand field named OpName: populate OpInfo.InitValue with the 1954 /// constant-valued bit values, and OpInfo.Fields with the ranges of bits to 1955 /// insert from the decoded instruction. 1956 static void addOneOperandFields(const Record &EncodingDef, const BitsInit &Bits, 1957 std::map<std::string, std::string> &TiedNames, 1958 StringRef OpName, OperandInfo &OpInfo) { 1959 // Some bits of the operand may be required to be 1 depending on the 1960 // instruction's encoding. Collect those bits. 1961 if (const RecordVal *EncodedValue = EncodingDef.getValue(OpName)) 1962 if (const BitsInit *OpBits = dyn_cast<BitsInit>(EncodedValue->getValue())) 1963 for (unsigned I = 0; I < OpBits->getNumBits(); ++I) 1964 if (const BitInit *OpBit = dyn_cast<BitInit>(OpBits->getBit(I))) 1965 if (OpBit->getValue()) 1966 OpInfo.InitValue |= 1ULL << I; 1967 1968 for (unsigned I = 0, J = 0; I != Bits.getNumBits(); I = J) { 1969 VarInit *Var; 1970 unsigned Offset = 0; 1971 for (; J != Bits.getNumBits(); ++J) { 1972 VarBitInit *BJ = dyn_cast<VarBitInit>(Bits.getBit(J)); 1973 if (BJ) { 1974 Var = dyn_cast<VarInit>(BJ->getBitVar()); 1975 if (I == J) 1976 Offset = BJ->getBitNum(); 1977 else if (BJ->getBitNum() != Offset + J - I) 1978 break; 1979 } else { 1980 Var = dyn_cast<VarInit>(Bits.getBit(J)); 1981 } 1982 if (!Var || (Var->getName() != OpName && 1983 Var->getName() != TiedNames[std::string(OpName)])) 1984 break; 1985 } 1986 if (I == J) 1987 ++J; 1988 else 1989 OpInfo.addField(I, J - I, Offset); 1990 } 1991 } 1992 1993 static unsigned 1994 populateInstruction(CodeGenTarget &Target, const Record &EncodingDef, 1995 const CodeGenInstruction &CGI, unsigned Opc, 1996 std::map<unsigned, std::vector<OperandInfo>> &Operands, 1997 bool IsVarLenInst) { 1998 const Record &Def = *CGI.TheDef; 1999 // If all the bit positions are not specified; do not decode this instruction. 2000 // We are bound to fail! For proper disassembly, the well-known encoding bits 2001 // of the instruction must be fully specified. 2002 2003 BitsInit &Bits = getBitsField(EncodingDef, "Inst"); 2004 if (Bits.allInComplete()) 2005 return 0; 2006 2007 std::vector<OperandInfo> InsnOperands; 2008 2009 // If the instruction has specified a custom decoding hook, use that instead 2010 // of trying to auto-generate the decoder. 2011 StringRef InstDecoder = EncodingDef.getValueAsString("DecoderMethod"); 2012 if (InstDecoder != "") { 2013 bool HasCompleteInstDecoder = 2014 EncodingDef.getValueAsBit("hasCompleteDecoder"); 2015 InsnOperands.push_back( 2016 OperandInfo(std::string(InstDecoder), HasCompleteInstDecoder)); 2017 Operands[Opc] = InsnOperands; 2018 return Bits.getNumBits(); 2019 } 2020 2021 // Generate a description of the operand of the instruction that we know 2022 // how to decode automatically. 2023 // FIXME: We'll need to have a way to manually override this as needed. 2024 2025 // Gather the outputs/inputs of the instruction, so we can find their 2026 // positions in the encoding. This assumes for now that they appear in the 2027 // MCInst in the order that they're listed. 2028 std::vector<std::pair<Init *, StringRef>> InOutOperands; 2029 DagInit *Out = Def.getValueAsDag("OutOperandList"); 2030 DagInit *In = Def.getValueAsDag("InOperandList"); 2031 for (unsigned i = 0; i < Out->getNumArgs(); ++i) 2032 InOutOperands.push_back(std::pair(Out->getArg(i), Out->getArgNameStr(i))); 2033 for (unsigned i = 0; i < In->getNumArgs(); ++i) 2034 InOutOperands.push_back(std::pair(In->getArg(i), In->getArgNameStr(i))); 2035 2036 // Search for tied operands, so that we can correctly instantiate 2037 // operands that are not explicitly represented in the encoding. 2038 std::map<std::string, std::string> TiedNames; 2039 for (unsigned i = 0; i < CGI.Operands.size(); ++i) { 2040 auto &Op = CGI.Operands[i]; 2041 for (unsigned j = 0; j < Op.Constraints.size(); ++j) { 2042 const CGIOperandList::ConstraintInfo &CI = Op.Constraints[j]; 2043 if (CI.isTied()) { 2044 int tiedTo = CI.getTiedOperand(); 2045 std::pair<unsigned, unsigned> SO = 2046 CGI.Operands.getSubOperandNumber(tiedTo); 2047 std::string TiedName = CGI.Operands[SO.first].SubOpNames[SO.second]; 2048 if (TiedName.empty()) 2049 TiedName = CGI.Operands[SO.first].Name; 2050 std::string MyName = Op.SubOpNames[j]; 2051 if (MyName.empty()) 2052 MyName = Op.Name; 2053 2054 TiedNames[MyName] = TiedName; 2055 TiedNames[TiedName] = MyName; 2056 } 2057 } 2058 } 2059 2060 if (IsVarLenInst) { 2061 parseVarLenInstOperand(EncodingDef, InsnOperands, CGI); 2062 } else { 2063 // For each operand, see if we can figure out where it is encoded. 2064 for (const auto &Op : InOutOperands) { 2065 Init *OpInit = Op.first; 2066 StringRef OpName = Op.second; 2067 2068 // We're ready to find the instruction encoding locations for this 2069 // operand. 2070 2071 // First, find the operand type ("OpInit"), and sub-op names 2072 // ("SubArgDag") if present. 2073 DagInit *SubArgDag = dyn_cast<DagInit>(OpInit); 2074 if (SubArgDag) 2075 OpInit = SubArgDag->getOperator(); 2076 Record *OpTypeRec = cast<DefInit>(OpInit)->getDef(); 2077 // Lookup the sub-operands from the operand type record (note that only 2078 // Operand subclasses have MIOperandInfo, see CodeGenInstruction.cpp). 2079 DagInit *SubOps = OpTypeRec->isSubClassOf("Operand") 2080 ? OpTypeRec->getValueAsDag("MIOperandInfo") 2081 : nullptr; 2082 2083 // Lookup the decoder method and construct a new OperandInfo to hold our 2084 // result. 2085 OperandInfo OpInfo = getOpInfo(OpTypeRec); 2086 2087 // If we have named sub-operands... 2088 if (SubArgDag) { 2089 // Then there should not be a custom decoder specified on the top-level 2090 // type. 2091 if (!OpInfo.Decoder.empty()) { 2092 PrintError(EncodingDef.getLoc(), 2093 "DecoderEmitter: operand \"" + OpName + "\" has type \"" + 2094 OpInit->getAsString() + 2095 "\" with a custom DecoderMethod, but also named " 2096 "sub-operands."); 2097 continue; 2098 } 2099 2100 // Decode each of the sub-ops separately. 2101 assert(SubOps && SubArgDag->getNumArgs() == SubOps->getNumArgs()); 2102 for (unsigned i = 0; i < SubOps->getNumArgs(); ++i) { 2103 StringRef SubOpName = SubArgDag->getArgNameStr(i); 2104 OperandInfo SubOpInfo = 2105 getOpInfo(cast<DefInit>(SubOps->getArg(i))->getDef()); 2106 2107 addOneOperandFields(EncodingDef, Bits, TiedNames, SubOpName, 2108 SubOpInfo); 2109 InsnOperands.push_back(SubOpInfo); 2110 } 2111 continue; 2112 } 2113 2114 // Otherwise, if we have an operand with sub-operands, but they aren't 2115 // named... 2116 if (SubOps && OpInfo.Decoder.empty()) { 2117 // If it's a single sub-operand, and no custom decoder, use the decoder 2118 // from the one sub-operand. 2119 if (SubOps->getNumArgs() == 1) 2120 OpInfo = getOpInfo(cast<DefInit>(SubOps->getArg(0))->getDef()); 2121 2122 // If we have multiple sub-ops, there'd better have a custom 2123 // decoder. (Otherwise we don't know how to populate them properly...) 2124 if (SubOps->getNumArgs() > 1) { 2125 PrintError(EncodingDef.getLoc(), 2126 "DecoderEmitter: operand \"" + OpName + 2127 "\" uses MIOperandInfo with multiple ops, but doesn't " 2128 "have a custom decoder!"); 2129 debugDumpRecord(EncodingDef); 2130 continue; 2131 } 2132 } 2133 2134 addOneOperandFields(EncodingDef, Bits, TiedNames, OpName, OpInfo); 2135 // FIXME: it should be an error not to find a definition for a given 2136 // operand, rather than just failing to add it to the resulting 2137 // instruction! (This is a longstanding bug, which will be addressed in an 2138 // upcoming change.) 2139 if (OpInfo.numFields() > 0) 2140 InsnOperands.push_back(OpInfo); 2141 } 2142 } 2143 Operands[Opc] = InsnOperands; 2144 2145 #if 0 2146 LLVM_DEBUG({ 2147 // Dumps the instruction encoding bits. 2148 dumpBits(errs(), Bits); 2149 2150 errs() << '\n'; 2151 2152 // Dumps the list of operand info. 2153 for (unsigned i = 0, e = CGI.Operands.size(); i != e; ++i) { 2154 const CGIOperandList::OperandInfo &Info = CGI.Operands[i]; 2155 const std::string &OperandName = Info.Name; 2156 const Record &OperandDef = *Info.Rec; 2157 2158 errs() << "\t" << OperandName << " (" << OperandDef.getName() << ")\n"; 2159 } 2160 }); 2161 #endif 2162 2163 return Bits.getNumBits(); 2164 } 2165 2166 // emitFieldFromInstruction - Emit the templated helper function 2167 // fieldFromInstruction(). 2168 // On Windows we make sure that this function is not inlined when 2169 // using the VS compiler. It has a bug which causes the function 2170 // to be optimized out in some circumstances. See llvm.org/pr38292 2171 static void emitFieldFromInstruction(formatted_raw_ostream &OS) { 2172 OS << "// Helper functions for extracting fields from encoded instructions.\n" 2173 << "// InsnType must either be integral or an APInt-like object that " 2174 "must:\n" 2175 << "// * be default-constructible and copy-constructible\n" 2176 << "// * be constructible from an APInt (this can be private)\n" 2177 << "// * Support insertBits(bits, startBit, numBits)\n" 2178 << "// * Support extractBitsAsZExtValue(numBits, startBit)\n" 2179 << "// * Support the ~, &, ==, and != operators with other objects of " 2180 "the same type\n" 2181 << "// * Support the != and bitwise & with uint64_t\n" 2182 << "// * Support put (<<) to raw_ostream&\n" 2183 << "template <typename InsnType>\n" 2184 << "#if defined(_MSC_VER) && !defined(__clang__)\n" 2185 << "__declspec(noinline)\n" 2186 << "#endif\n" 2187 << "static std::enable_if_t<std::is_integral<InsnType>::value, InsnType>\n" 2188 << "fieldFromInstruction(const InsnType &insn, unsigned startBit,\n" 2189 << " unsigned numBits) {\n" 2190 << " assert(startBit + numBits <= 64 && \"Cannot support >64-bit " 2191 "extractions!\");\n" 2192 << " assert(startBit + numBits <= (sizeof(InsnType) * 8) &&\n" 2193 << " \"Instruction field out of bounds!\");\n" 2194 << " InsnType fieldMask;\n" 2195 << " if (numBits == sizeof(InsnType) * 8)\n" 2196 << " fieldMask = (InsnType)(-1LL);\n" 2197 << " else\n" 2198 << " fieldMask = (((InsnType)1 << numBits) - 1) << startBit;\n" 2199 << " return (insn & fieldMask) >> startBit;\n" 2200 << "}\n" 2201 << "\n" 2202 << "template <typename InsnType>\n" 2203 << "static std::enable_if_t<!std::is_integral<InsnType>::value, " 2204 "uint64_t>\n" 2205 << "fieldFromInstruction(const InsnType &insn, unsigned startBit,\n" 2206 << " unsigned numBits) {\n" 2207 << " return insn.extractBitsAsZExtValue(numBits, startBit);\n" 2208 << "}\n\n"; 2209 } 2210 2211 // emitInsertBits - Emit the templated helper function insertBits(). 2212 static void emitInsertBits(formatted_raw_ostream &OS) { 2213 OS << "// Helper function for inserting bits extracted from an encoded " 2214 "instruction into\n" 2215 << "// a field.\n" 2216 << "template <typename InsnType>\n" 2217 << "static std::enable_if_t<std::is_integral<InsnType>::value>\n" 2218 << "insertBits(InsnType &field, InsnType bits, unsigned startBit, " 2219 "unsigned numBits) {\n" 2220 << " assert(startBit + numBits <= sizeof field * 8);\n" 2221 << " field |= (InsnType)bits << startBit;\n" 2222 << "}\n" 2223 << "\n" 2224 << "template <typename InsnType>\n" 2225 << "static std::enable_if_t<!std::is_integral<InsnType>::value>\n" 2226 << "insertBits(InsnType &field, uint64_t bits, unsigned startBit, " 2227 "unsigned numBits) {\n" 2228 << " field.insertBits(bits, startBit, numBits);\n" 2229 << "}\n\n"; 2230 } 2231 2232 // emitDecodeInstruction - Emit the templated helper function 2233 // decodeInstruction(). 2234 static void emitDecodeInstruction(formatted_raw_ostream &OS, 2235 bool IsVarLenInst) { 2236 OS << "template <typename InsnType>\n" 2237 << "static DecodeStatus decodeInstruction(const uint8_t DecodeTable[], " 2238 "MCInst &MI,\n" 2239 << " InsnType insn, uint64_t " 2240 "Address,\n" 2241 << " const MCDisassembler *DisAsm,\n" 2242 << " const MCSubtargetInfo &STI"; 2243 if (IsVarLenInst) { 2244 OS << ",\n" 2245 << " llvm::function_ref<void(APInt " 2246 "&," 2247 << " uint64_t)> makeUp"; 2248 } 2249 OS << ") {\n" 2250 << " const FeatureBitset &Bits = STI.getFeatureBits();\n" 2251 << "\n" 2252 << " const uint8_t *Ptr = DecodeTable;\n" 2253 << " uint64_t CurFieldValue = 0;\n" 2254 << " DecodeStatus S = MCDisassembler::Success;\n" 2255 << " while (true) {\n" 2256 << " ptrdiff_t Loc = Ptr - DecodeTable;\n" 2257 << " switch (*Ptr) {\n" 2258 << " default:\n" 2259 << " errs() << Loc << \": Unexpected decode table opcode!\\n\";\n" 2260 << " return MCDisassembler::Fail;\n" 2261 << " case MCD::OPC_ExtractField: {\n" 2262 << " // Decode the start value.\n" 2263 << " unsigned DecodedLen;\n" 2264 << " unsigned Start = decodeULEB128(++Ptr, &DecodedLen);\n" 2265 << " Ptr += DecodedLen;\n" 2266 << " unsigned Len = *Ptr++;\n"; 2267 if (IsVarLenInst) 2268 OS << " makeUp(insn, Start + Len);\n"; 2269 OS << " CurFieldValue = fieldFromInstruction(insn, Start, Len);\n" 2270 << " LLVM_DEBUG(dbgs() << Loc << \": OPC_ExtractField(\" << Start << " 2271 "\", \"\n" 2272 << " << Len << \"): \" << CurFieldValue << \"\\n\");\n" 2273 << " break;\n" 2274 << " }\n" 2275 << " case MCD::OPC_FilterValue: {\n" 2276 << " // Decode the field value.\n" 2277 << " unsigned Len;\n" 2278 << " uint64_t Val = decodeULEB128(++Ptr, &Len);\n" 2279 << " Ptr += Len;\n" 2280 << " // NumToSkip is a plain 24-bit integer.\n" 2281 << " unsigned NumToSkip = *Ptr++;\n" 2282 << " NumToSkip |= (*Ptr++) << 8;\n" 2283 << " NumToSkip |= (*Ptr++) << 16;\n" 2284 << "\n" 2285 << " // Perform the filter operation.\n" 2286 << " if (Val != CurFieldValue)\n" 2287 << " Ptr += NumToSkip;\n" 2288 << " LLVM_DEBUG(dbgs() << Loc << \": OPC_FilterValue(\" << Val << " 2289 "\", \" << NumToSkip\n" 2290 << " << \"): \" << ((Val != CurFieldValue) ? \"FAIL:\" " 2291 ": \"PASS:\")\n" 2292 << " << \" continuing at \" << (Ptr - DecodeTable) << " 2293 "\"\\n\");\n" 2294 << "\n" 2295 << " break;\n" 2296 << " }\n" 2297 << " case MCD::OPC_CheckField: {\n" 2298 << " // Decode the start value.\n" 2299 << " unsigned Len;\n" 2300 << " unsigned Start = decodeULEB128(++Ptr, &Len);\n" 2301 << " Ptr += Len;\n" 2302 << " Len = *Ptr;\n"; 2303 if (IsVarLenInst) 2304 OS << " makeUp(insn, Start + Len);\n"; 2305 OS << " uint64_t FieldValue = fieldFromInstruction(insn, Start, Len);\n" 2306 << " // Decode the field value.\n" 2307 << " unsigned PtrLen = 0;\n" 2308 << " uint64_t ExpectedValue = decodeULEB128(++Ptr, &PtrLen);\n" 2309 << " Ptr += PtrLen;\n" 2310 << " // NumToSkip is a plain 24-bit integer.\n" 2311 << " unsigned NumToSkip = *Ptr++;\n" 2312 << " NumToSkip |= (*Ptr++) << 8;\n" 2313 << " NumToSkip |= (*Ptr++) << 16;\n" 2314 << "\n" 2315 << " // If the actual and expected values don't match, skip.\n" 2316 << " if (ExpectedValue != FieldValue)\n" 2317 << " Ptr += NumToSkip;\n" 2318 << " LLVM_DEBUG(dbgs() << Loc << \": OPC_CheckField(\" << Start << " 2319 "\", \"\n" 2320 << " << Len << \", \" << ExpectedValue << \", \" << " 2321 "NumToSkip\n" 2322 << " << \"): FieldValue = \" << FieldValue << \", " 2323 "ExpectedValue = \"\n" 2324 << " << ExpectedValue << \": \"\n" 2325 << " << ((ExpectedValue == FieldValue) ? \"PASS\\n\" : " 2326 "\"FAIL\\n\"));\n" 2327 << " break;\n" 2328 << " }\n" 2329 << " case MCD::OPC_CheckPredicate: {\n" 2330 << " unsigned Len;\n" 2331 << " // Decode the Predicate Index value.\n" 2332 << " unsigned PIdx = decodeULEB128(++Ptr, &Len);\n" 2333 << " Ptr += Len;\n" 2334 << " // NumToSkip is a plain 24-bit integer.\n" 2335 << " unsigned NumToSkip = *Ptr++;\n" 2336 << " NumToSkip |= (*Ptr++) << 8;\n" 2337 << " NumToSkip |= (*Ptr++) << 16;\n" 2338 << " // Check the predicate.\n" 2339 << " bool Pred;\n" 2340 << " if (!(Pred = checkDecoderPredicate(PIdx, Bits)))\n" 2341 << " Ptr += NumToSkip;\n" 2342 << " (void)Pred;\n" 2343 << " LLVM_DEBUG(dbgs() << Loc << \": OPC_CheckPredicate(\" << PIdx " 2344 "<< \"): \"\n" 2345 << " << (Pred ? \"PASS\\n\" : \"FAIL\\n\"));\n" 2346 << "\n" 2347 << " break;\n" 2348 << " }\n" 2349 << " case MCD::OPC_Decode: {\n" 2350 << " unsigned Len;\n" 2351 << " // Decode the Opcode value.\n" 2352 << " unsigned Opc = decodeULEB128(++Ptr, &Len);\n" 2353 << " Ptr += Len;\n" 2354 << " unsigned DecodeIdx = decodeULEB128(Ptr, &Len);\n" 2355 << " Ptr += Len;\n" 2356 << "\n" 2357 << " MI.clear();\n" 2358 << " MI.setOpcode(Opc);\n" 2359 << " bool DecodeComplete;\n"; 2360 if (IsVarLenInst) { 2361 OS << " Len = InstrLenTable[Opc];\n" 2362 << " makeUp(insn, Len);\n"; 2363 } 2364 OS << " S = decodeToMCInst(S, DecodeIdx, insn, MI, Address, DisAsm, " 2365 "DecodeComplete);\n" 2366 << " assert(DecodeComplete);\n" 2367 << "\n" 2368 << " LLVM_DEBUG(dbgs() << Loc << \": OPC_Decode: opcode \" << Opc\n" 2369 << " << \", using decoder \" << DecodeIdx << \": \"\n" 2370 << " << (S != MCDisassembler::Fail ? \"PASS\" : " 2371 "\"FAIL\") << \"\\n\");\n" 2372 << " return S;\n" 2373 << " }\n" 2374 << " case MCD::OPC_TryDecode: {\n" 2375 << " unsigned Len;\n" 2376 << " // Decode the Opcode value.\n" 2377 << " unsigned Opc = decodeULEB128(++Ptr, &Len);\n" 2378 << " Ptr += Len;\n" 2379 << " unsigned DecodeIdx = decodeULEB128(Ptr, &Len);\n" 2380 << " Ptr += Len;\n" 2381 << " // NumToSkip is a plain 24-bit integer.\n" 2382 << " unsigned NumToSkip = *Ptr++;\n" 2383 << " NumToSkip |= (*Ptr++) << 8;\n" 2384 << " NumToSkip |= (*Ptr++) << 16;\n" 2385 << "\n" 2386 << " // Perform the decode operation.\n" 2387 << " MCInst TmpMI;\n" 2388 << " TmpMI.setOpcode(Opc);\n" 2389 << " bool DecodeComplete;\n" 2390 << " S = decodeToMCInst(S, DecodeIdx, insn, TmpMI, Address, DisAsm, " 2391 "DecodeComplete);\n" 2392 << " LLVM_DEBUG(dbgs() << Loc << \": OPC_TryDecode: opcode \" << " 2393 "Opc\n" 2394 << " << \", using decoder \" << DecodeIdx << \": \");\n" 2395 << "\n" 2396 << " if (DecodeComplete) {\n" 2397 << " // Decoding complete.\n" 2398 << " LLVM_DEBUG(dbgs() << (S != MCDisassembler::Fail ? \"PASS\" : " 2399 "\"FAIL\") << \"\\n\");\n" 2400 << " MI = TmpMI;\n" 2401 << " return S;\n" 2402 << " } else {\n" 2403 << " assert(S == MCDisassembler::Fail);\n" 2404 << " // If the decoding was incomplete, skip.\n" 2405 << " Ptr += NumToSkip;\n" 2406 << " LLVM_DEBUG(dbgs() << \"FAIL: continuing at \" << (Ptr - " 2407 "DecodeTable) << \"\\n\");\n" 2408 << " // Reset decode status. This also drops a SoftFail status " 2409 "that could be\n" 2410 << " // set before the decode attempt.\n" 2411 << " S = MCDisassembler::Success;\n" 2412 << " }\n" 2413 << " break;\n" 2414 << " }\n" 2415 << " case MCD::OPC_SoftFail: {\n" 2416 << " // Decode the mask values.\n" 2417 << " unsigned Len;\n" 2418 << " uint64_t PositiveMask = decodeULEB128(++Ptr, &Len);\n" 2419 << " Ptr += Len;\n" 2420 << " uint64_t NegativeMask = decodeULEB128(Ptr, &Len);\n" 2421 << " Ptr += Len;\n" 2422 << " bool Fail = (insn & PositiveMask) != 0 || (~insn & " 2423 "NegativeMask) != 0;\n" 2424 << " if (Fail)\n" 2425 << " S = MCDisassembler::SoftFail;\n" 2426 << " LLVM_DEBUG(dbgs() << Loc << \": OPC_SoftFail: \" << (Fail ? " 2427 "\"FAIL\\n\" : \"PASS\\n\"));\n" 2428 << " break;\n" 2429 << " }\n" 2430 << " case MCD::OPC_Fail: {\n" 2431 << " LLVM_DEBUG(dbgs() << Loc << \": OPC_Fail\\n\");\n" 2432 << " return MCDisassembler::Fail;\n" 2433 << " }\n" 2434 << " }\n" 2435 << " }\n" 2436 << " llvm_unreachable(\"bogosity detected in disassembler state " 2437 "machine!\");\n" 2438 << "}\n\n"; 2439 } 2440 2441 // Helper to propagate SoftFail status. Returns false if the status is Fail; 2442 // callers are expected to early-exit in that condition. (Note, the '&' operator 2443 // is correct to propagate the values of this enum; see comment on 'enum 2444 // DecodeStatus'.) 2445 static void emitCheck(formatted_raw_ostream &OS) { 2446 OS << "static bool Check(DecodeStatus &Out, DecodeStatus In) {\n" 2447 << " Out = static_cast<DecodeStatus>(Out & In);\n" 2448 << " return Out != MCDisassembler::Fail;\n" 2449 << "}\n\n"; 2450 } 2451 2452 // Collect all HwModes referenced by the target for encoding purposes, 2453 // returning a vector of corresponding names. 2454 static void 2455 collectHwModesReferencedForEncodings(const CodeGenHwModes &HWM, 2456 std::vector<StringRef> &Names) { 2457 SmallBitVector BV(HWM.getNumModeIds()); 2458 for (const auto &MS : HWM.getHwModeSelects()) { 2459 for (const HwModeSelect::PairType &P : MS.second.Items) { 2460 if (P.second->isSubClassOf("InstructionEncoding")) 2461 BV.set(P.first); 2462 } 2463 } 2464 transform(BV.set_bits(), std::back_inserter(Names), 2465 [&HWM](const int &M) { return HWM.getMode(M).Name; }); 2466 } 2467 2468 // Emits disassembler code for instruction decoding. 2469 void DecoderEmitter::run(raw_ostream &o) { 2470 formatted_raw_ostream OS(o); 2471 OS << "#include \"llvm/MC/MCInst.h\"\n"; 2472 OS << "#include \"llvm/MC/MCSubtargetInfo.h\"\n"; 2473 OS << "#include \"llvm/Support/DataTypes.h\"\n"; 2474 OS << "#include \"llvm/Support/Debug.h\"\n"; 2475 OS << "#include \"llvm/Support/LEB128.h\"\n"; 2476 OS << "#include \"llvm/Support/raw_ostream.h\"\n"; 2477 OS << "#include \"llvm/TargetParser/SubtargetFeature.h\"\n"; 2478 OS << "#include <assert.h>\n"; 2479 OS << '\n'; 2480 OS << "namespace llvm {\n\n"; 2481 2482 emitFieldFromInstruction(OS); 2483 emitInsertBits(OS); 2484 emitCheck(OS); 2485 2486 Target.reverseBitsForLittleEndianEncoding(); 2487 2488 // Parameterize the decoders based on namespace and instruction width. 2489 2490 // First, collect all encoding-related HwModes referenced by the target. 2491 // If HwModeNames is empty, add the empty string so we always have one HwMode. 2492 const CodeGenHwModes &HWM = Target.getHwModes(); 2493 std::vector<StringRef> HwModeNames; 2494 collectHwModesReferencedForEncodings(HWM, HwModeNames); 2495 if (HwModeNames.empty()) 2496 HwModeNames.push_back(""); 2497 2498 const auto &NumberedInstructions = Target.getInstructionsByEnumValue(); 2499 NumberedEncodings.reserve(NumberedInstructions.size()); 2500 for (const auto &NumberedInstruction : NumberedInstructions) { 2501 if (const RecordVal *RV = 2502 NumberedInstruction->TheDef->getValue("EncodingInfos")) { 2503 if (DefInit *DI = dyn_cast_or_null<DefInit>(RV->getValue())) { 2504 EncodingInfoByHwMode EBM(DI->getDef(), HWM); 2505 for (auto &KV : EBM) 2506 NumberedEncodings.emplace_back(KV.second, NumberedInstruction, 2507 HWM.getMode(KV.first).Name); 2508 continue; 2509 } 2510 } 2511 // This instruction is encoded the same on all HwModes. Emit it for all 2512 // HwModes by default, otherwise leave it in a single common table. 2513 if (DecoderEmitterSuppressDuplicates) { 2514 NumberedEncodings.emplace_back(NumberedInstruction->TheDef, 2515 NumberedInstruction, "AllModes"); 2516 } else { 2517 for (StringRef HwModeName : HwModeNames) 2518 NumberedEncodings.emplace_back(NumberedInstruction->TheDef, 2519 NumberedInstruction, HwModeName); 2520 } 2521 } 2522 for (const auto &NumberedAlias : 2523 RK.getAllDerivedDefinitions("AdditionalEncoding")) 2524 NumberedEncodings.emplace_back( 2525 NumberedAlias, 2526 &Target.getInstruction(NumberedAlias->getValueAsDef("AliasOf"))); 2527 2528 std::map<std::pair<std::string, unsigned>, std::vector<EncodingIDAndOpcode>> 2529 OpcMap; 2530 std::map<unsigned, std::vector<OperandInfo>> Operands; 2531 std::vector<unsigned> InstrLen; 2532 2533 bool IsVarLenInst = 2534 any_of(NumberedInstructions, [](const CodeGenInstruction *CGI) { 2535 RecordVal *RV = CGI->TheDef->getValue("Inst"); 2536 return RV && isa<DagInit>(RV->getValue()); 2537 }); 2538 unsigned MaxInstLen = 0; 2539 2540 for (unsigned i = 0; i < NumberedEncodings.size(); ++i) { 2541 const Record *EncodingDef = NumberedEncodings[i].EncodingDef; 2542 const CodeGenInstruction *Inst = NumberedEncodings[i].Inst; 2543 const Record *Def = Inst->TheDef; 2544 unsigned Size = EncodingDef->getValueAsInt("Size"); 2545 if (Def->getValueAsString("Namespace") == "TargetOpcode" || 2546 Def->getValueAsBit("isPseudo") || 2547 Def->getValueAsBit("isAsmParserOnly") || 2548 Def->getValueAsBit("isCodeGenOnly")) { 2549 NumEncodingsLackingDisasm++; 2550 continue; 2551 } 2552 2553 if (i < NumberedInstructions.size()) 2554 NumInstructions++; 2555 NumEncodings++; 2556 2557 if (!Size && !IsVarLenInst) 2558 continue; 2559 2560 if (IsVarLenInst) 2561 InstrLen.resize(NumberedInstructions.size(), 0); 2562 2563 if (unsigned Len = populateInstruction(Target, *EncodingDef, *Inst, i, 2564 Operands, IsVarLenInst)) { 2565 if (IsVarLenInst) { 2566 MaxInstLen = std::max(MaxInstLen, Len); 2567 InstrLen[i] = Len; 2568 } 2569 std::string DecoderNamespace = 2570 std::string(EncodingDef->getValueAsString("DecoderNamespace")); 2571 if (!NumberedEncodings[i].HwModeName.empty()) 2572 DecoderNamespace += 2573 std::string("_") + NumberedEncodings[i].HwModeName.str(); 2574 OpcMap[std::pair(DecoderNamespace, Size)].emplace_back( 2575 i, Target.getInstrIntValue(Def)); 2576 } else { 2577 NumEncodingsOmitted++; 2578 } 2579 } 2580 2581 DecoderTableInfo TableInfo; 2582 for (const auto &Opc : OpcMap) { 2583 // Emit the decoder for this namespace+width combination. 2584 ArrayRef<EncodingAndInst> NumberedEncodingsRef(NumberedEncodings.data(), 2585 NumberedEncodings.size()); 2586 FilterChooser FC(NumberedEncodingsRef, Opc.second, Operands, 2587 IsVarLenInst ? MaxInstLen : 8 * Opc.first.second, this); 2588 2589 // The decode table is cleared for each top level decoder function. The 2590 // predicates and decoders themselves, however, are shared across all 2591 // decoders to give more opportunities for uniqueing. 2592 TableInfo.Table.clear(); 2593 TableInfo.FixupStack.clear(); 2594 TableInfo.Table.reserve(16384); 2595 TableInfo.FixupStack.emplace_back(); 2596 FC.emitTableEntries(TableInfo); 2597 // Any NumToSkip fixups in the top level scope can resolve to the 2598 // OPC_Fail at the end of the table. 2599 assert(TableInfo.FixupStack.size() == 1 && "fixup stack phasing error!"); 2600 // Resolve any NumToSkip fixups in the current scope. 2601 resolveTableFixups(TableInfo.Table, TableInfo.FixupStack.back(), 2602 TableInfo.Table.size()); 2603 TableInfo.FixupStack.clear(); 2604 2605 TableInfo.Table.push_back(MCD::OPC_Fail); 2606 2607 // Print the table to the output stream. 2608 emitTable(OS, TableInfo.Table, 0, FC.getBitWidth(), Opc.first.first, 2609 Opc.second); 2610 } 2611 2612 // For variable instruction, we emit a instruction length table 2613 // to let the decoder know how long the instructions are. 2614 // You can see example usage in M68k's disassembler. 2615 if (IsVarLenInst) 2616 emitInstrLenTable(OS, InstrLen); 2617 // Emit the predicate function. 2618 emitPredicateFunction(OS, TableInfo.Predicates, 0); 2619 2620 // Emit the decoder function. 2621 emitDecoderFunction(OS, TableInfo.Decoders, 0); 2622 2623 // Emit the main entry point for the decoder, decodeInstruction(). 2624 emitDecodeInstruction(OS, IsVarLenInst); 2625 2626 OS << "\n} // end namespace llvm\n"; 2627 } 2628 2629 namespace llvm { 2630 2631 void EmitDecoder(RecordKeeper &RK, raw_ostream &OS, 2632 const std::string &PredicateNamespace) { 2633 DecoderEmitter(RK, PredicateNamespace).run(OS); 2634 } 2635 2636 } // end namespace llvm 2637