1 //===---------------- DecoderEmitter.cpp - Decoder Generator --------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // It contains the tablegen backend that emits the decoder functions for 10 // targets with fixed/variable length instruction set. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "CodeGenHwModes.h" 15 #include "CodeGenInstruction.h" 16 #include "CodeGenTarget.h" 17 #include "InfoByHwMode.h" 18 #include "TableGenBackends.h" 19 #include "VarLenCodeEmitterGen.h" 20 #include "llvm/ADT/APInt.h" 21 #include "llvm/ADT/ArrayRef.h" 22 #include "llvm/ADT/CachedHashString.h" 23 #include "llvm/ADT/STLExtras.h" 24 #include "llvm/ADT/SetVector.h" 25 #include "llvm/ADT/SmallString.h" 26 #include "llvm/ADT/Statistic.h" 27 #include "llvm/ADT/StringExtras.h" 28 #include "llvm/ADT/StringRef.h" 29 #include "llvm/MC/MCDecoderOps.h" 30 #include "llvm/Support/Casting.h" 31 #include "llvm/Support/Debug.h" 32 #include "llvm/Support/ErrorHandling.h" 33 #include "llvm/Support/FormattedStream.h" 34 #include "llvm/Support/LEB128.h" 35 #include "llvm/Support/raw_ostream.h" 36 #include "llvm/TableGen/Error.h" 37 #include "llvm/TableGen/Record.h" 38 #include <algorithm> 39 #include <cassert> 40 #include <cstddef> 41 #include <cstdint> 42 #include <map> 43 #include <memory> 44 #include <set> 45 #include <string> 46 #include <utility> 47 #include <vector> 48 49 using namespace llvm; 50 51 #define DEBUG_TYPE "decoder-emitter" 52 53 namespace { 54 55 STATISTIC(NumEncodings, "Number of encodings considered"); 56 STATISTIC(NumEncodingsLackingDisasm, 57 "Number of encodings without disassembler info"); 58 STATISTIC(NumInstructions, "Number of instructions considered"); 59 STATISTIC(NumEncodingsSupported, "Number of encodings supported"); 60 STATISTIC(NumEncodingsOmitted, "Number of encodings omitted"); 61 62 struct EncodingField { 63 unsigned Base, Width, Offset; 64 EncodingField(unsigned B, unsigned W, unsigned O) 65 : Base(B), Width(W), Offset(O) {} 66 }; 67 68 struct OperandInfo { 69 std::vector<EncodingField> Fields; 70 std::string Decoder; 71 bool HasCompleteDecoder; 72 uint64_t InitValue; 73 74 OperandInfo(std::string D, bool HCD) 75 : Decoder(std::move(D)), HasCompleteDecoder(HCD), InitValue(0) {} 76 77 void addField(unsigned Base, unsigned Width, unsigned Offset) { 78 Fields.push_back(EncodingField(Base, Width, Offset)); 79 } 80 81 unsigned numFields() const { return Fields.size(); } 82 83 typedef std::vector<EncodingField>::const_iterator const_iterator; 84 85 const_iterator begin() const { return Fields.begin(); } 86 const_iterator end() const { return Fields.end(); } 87 }; 88 89 typedef std::vector<uint8_t> DecoderTable; 90 typedef uint32_t DecoderFixup; 91 typedef std::vector<DecoderFixup> FixupList; 92 typedef std::vector<FixupList> FixupScopeList; 93 typedef SmallSetVector<CachedHashString, 16> PredicateSet; 94 typedef SmallSetVector<CachedHashString, 16> DecoderSet; 95 struct DecoderTableInfo { 96 DecoderTable Table; 97 FixupScopeList FixupStack; 98 PredicateSet Predicates; 99 DecoderSet Decoders; 100 }; 101 102 struct EncodingAndInst { 103 const Record *EncodingDef; 104 const CodeGenInstruction *Inst; 105 StringRef HwModeName; 106 107 EncodingAndInst(const Record *EncodingDef, const CodeGenInstruction *Inst, 108 StringRef HwModeName = "") 109 : EncodingDef(EncodingDef), Inst(Inst), HwModeName(HwModeName) {} 110 }; 111 112 struct EncodingIDAndOpcode { 113 unsigned EncodingID; 114 unsigned Opcode; 115 116 EncodingIDAndOpcode() : EncodingID(0), Opcode(0) {} 117 EncodingIDAndOpcode(unsigned EncodingID, unsigned Opcode) 118 : EncodingID(EncodingID), Opcode(Opcode) {} 119 }; 120 121 using EncodingIDsVec = std::vector<EncodingIDAndOpcode>; 122 123 raw_ostream &operator<<(raw_ostream &OS, const EncodingAndInst &Value) { 124 if (Value.EncodingDef != Value.Inst->TheDef) 125 OS << Value.EncodingDef->getName() << ":"; 126 OS << Value.Inst->TheDef->getName(); 127 return OS; 128 } 129 130 class DecoderEmitter { 131 RecordKeeper &RK; 132 std::vector<EncodingAndInst> NumberedEncodings; 133 134 public: 135 DecoderEmitter(RecordKeeper &R, std::string PredicateNamespace) 136 : RK(R), Target(R), PredicateNamespace(std::move(PredicateNamespace)) {} 137 138 // Emit the decoder state machine table. 139 void emitTable(formatted_raw_ostream &o, DecoderTable &Table, 140 unsigned Indentation, unsigned BitWidth, StringRef Namespace, 141 const EncodingIDsVec &EncodingIDs) const; 142 void emitInstrLenTable(formatted_raw_ostream &OS, 143 std::vector<unsigned> &InstrLen) const; 144 void emitPredicateFunction(formatted_raw_ostream &OS, 145 PredicateSet &Predicates, 146 unsigned Indentation) const; 147 void emitDecoderFunction(formatted_raw_ostream &OS, DecoderSet &Decoders, 148 unsigned Indentation) const; 149 150 // run - Output the code emitter 151 void run(raw_ostream &o); 152 153 private: 154 CodeGenTarget Target; 155 156 public: 157 std::string PredicateNamespace; 158 }; 159 160 } // end anonymous namespace 161 162 // The set (BIT_TRUE, BIT_FALSE, BIT_UNSET) represents a ternary logic system 163 // for a bit value. 164 // 165 // BIT_UNFILTERED is used as the init value for a filter position. It is used 166 // only for filter processings. 167 typedef enum { 168 BIT_TRUE, // '1' 169 BIT_FALSE, // '0' 170 BIT_UNSET, // '?' 171 BIT_UNFILTERED // unfiltered 172 } bit_value_t; 173 174 static bool ValueSet(bit_value_t V) { 175 return (V == BIT_TRUE || V == BIT_FALSE); 176 } 177 178 static bool ValueNotSet(bit_value_t V) { return (V == BIT_UNSET); } 179 180 static int Value(bit_value_t V) { 181 return ValueNotSet(V) ? -1 : (V == BIT_FALSE ? 0 : 1); 182 } 183 184 static bit_value_t bitFromBits(const BitsInit &bits, unsigned index) { 185 if (BitInit *bit = dyn_cast<BitInit>(bits.getBit(index))) 186 return bit->getValue() ? BIT_TRUE : BIT_FALSE; 187 188 // The bit is uninitialized. 189 return BIT_UNSET; 190 } 191 192 // Prints the bit value for each position. 193 static void dumpBits(raw_ostream &o, const BitsInit &bits) { 194 for (unsigned index = bits.getNumBits(); index > 0; --index) { 195 switch (bitFromBits(bits, index - 1)) { 196 case BIT_TRUE: 197 o << "1"; 198 break; 199 case BIT_FALSE: 200 o << "0"; 201 break; 202 case BIT_UNSET: 203 o << "_"; 204 break; 205 default: 206 llvm_unreachable("unexpected return value from bitFromBits"); 207 } 208 } 209 } 210 211 static BitsInit &getBitsField(const Record &def, StringRef str) { 212 const RecordVal *RV = def.getValue(str); 213 if (BitsInit *Bits = dyn_cast<BitsInit>(RV->getValue())) 214 return *Bits; 215 216 // variable length instruction 217 VarLenInst VLI = VarLenInst(cast<DagInit>(RV->getValue()), RV); 218 SmallVector<Init *, 16> Bits; 219 220 for (auto &SI : VLI) { 221 if (const BitsInit *BI = dyn_cast<BitsInit>(SI.Value)) { 222 for (unsigned Idx = 0U; Idx < BI->getNumBits(); ++Idx) { 223 Bits.push_back(BI->getBit(Idx)); 224 } 225 } else if (const BitInit *BI = dyn_cast<BitInit>(SI.Value)) { 226 Bits.push_back(const_cast<BitInit *>(BI)); 227 } else { 228 for (unsigned Idx = 0U; Idx < SI.BitWidth; ++Idx) 229 Bits.push_back(UnsetInit::get(def.getRecords())); 230 } 231 } 232 233 return *BitsInit::get(def.getRecords(), Bits); 234 } 235 236 // Representation of the instruction to work on. 237 typedef std::vector<bit_value_t> insn_t; 238 239 namespace { 240 241 static const uint64_t NO_FIXED_SEGMENTS_SENTINEL = -1ULL; 242 243 class FilterChooser; 244 245 /// Filter - Filter works with FilterChooser to produce the decoding tree for 246 /// the ISA. 247 /// 248 /// It is useful to think of a Filter as governing the switch stmts of the 249 /// decoding tree in a certain level. Each case stmt delegates to an inferior 250 /// FilterChooser to decide what further decoding logic to employ, or in another 251 /// words, what other remaining bits to look at. The FilterChooser eventually 252 /// chooses a best Filter to do its job. 253 /// 254 /// This recursive scheme ends when the number of Opcodes assigned to the 255 /// FilterChooser becomes 1 or if there is a conflict. A conflict happens when 256 /// the Filter/FilterChooser combo does not know how to distinguish among the 257 /// Opcodes assigned. 258 /// 259 /// An example of a conflict is 260 /// 261 /// Conflict: 262 /// 111101000.00........00010000.... 263 /// 111101000.00........0001........ 264 /// 1111010...00........0001........ 265 /// 1111010...00.................... 266 /// 1111010......................... 267 /// 1111............................ 268 /// ................................ 269 /// VST4q8a 111101000_00________00010000____ 270 /// VST4q8b 111101000_00________00010000____ 271 /// 272 /// The Debug output shows the path that the decoding tree follows to reach the 273 /// the conclusion that there is a conflict. VST4q8a is a vst4 to double-spaced 274 /// even registers, while VST4q8b is a vst4 to double-spaced odd registers. 275 /// 276 /// The encoding info in the .td files does not specify this meta information, 277 /// which could have been used by the decoder to resolve the conflict. The 278 /// decoder could try to decode the even/odd register numbering and assign to 279 /// VST4q8a or VST4q8b, but for the time being, the decoder chooses the "a" 280 /// version and return the Opcode since the two have the same Asm format string. 281 class Filter { 282 protected: 283 const FilterChooser 284 *Owner; // points to the FilterChooser who owns this filter 285 unsigned StartBit; // the starting bit position 286 unsigned NumBits; // number of bits to filter 287 bool Mixed; // a mixed region contains both set and unset bits 288 289 // Map of well-known segment value to the set of uid's with that value. 290 std::map<uint64_t, std::vector<EncodingIDAndOpcode>> FilteredInstructions; 291 292 // Set of uid's with non-constant segment values. 293 std::vector<EncodingIDAndOpcode> VariableInstructions; 294 295 // Map of well-known segment value to its delegate. 296 std::map<uint64_t, std::unique_ptr<const FilterChooser>> FilterChooserMap; 297 298 // Number of instructions which fall under FilteredInstructions category. 299 unsigned NumFiltered; 300 301 // Keeps track of the last opcode in the filtered bucket. 302 EncodingIDAndOpcode LastOpcFiltered; 303 304 public: 305 Filter(Filter &&f); 306 Filter(FilterChooser &owner, unsigned startBit, unsigned numBits, bool mixed); 307 308 ~Filter() = default; 309 310 unsigned getNumFiltered() const { return NumFiltered; } 311 312 EncodingIDAndOpcode getSingletonOpc() const { 313 assert(NumFiltered == 1); 314 return LastOpcFiltered; 315 } 316 317 // Return the filter chooser for the group of instructions without constant 318 // segment values. 319 const FilterChooser &getVariableFC() const { 320 assert(NumFiltered == 1); 321 assert(FilterChooserMap.size() == 1); 322 return *(FilterChooserMap.find(NO_FIXED_SEGMENTS_SENTINEL)->second); 323 } 324 325 // Divides the decoding task into sub tasks and delegates them to the 326 // inferior FilterChooser's. 327 // 328 // A special case arises when there's only one entry in the filtered 329 // instructions. In order to unambiguously decode the singleton, we need to 330 // match the remaining undecoded encoding bits against the singleton. 331 void recurse(); 332 333 // Emit table entries to decode instructions given a segment or segments of 334 // bits. 335 void emitTableEntry(DecoderTableInfo &TableInfo) const; 336 337 // Returns the number of fanout produced by the filter. More fanout implies 338 // the filter distinguishes more categories of instructions. 339 unsigned usefulness() const; 340 }; // end class Filter 341 342 } // end anonymous namespace 343 344 // These are states of our finite state machines used in FilterChooser's 345 // filterProcessor() which produces the filter candidates to use. 346 typedef enum { 347 ATTR_NONE, 348 ATTR_FILTERED, 349 ATTR_ALL_SET, 350 ATTR_ALL_UNSET, 351 ATTR_MIXED 352 } bitAttr_t; 353 354 /// FilterChooser - FilterChooser chooses the best filter among a set of Filters 355 /// in order to perform the decoding of instructions at the current level. 356 /// 357 /// Decoding proceeds from the top down. Based on the well-known encoding bits 358 /// of instructions available, FilterChooser builds up the possible Filters that 359 /// can further the task of decoding by distinguishing among the remaining 360 /// candidate instructions. 361 /// 362 /// Once a filter has been chosen, it is called upon to divide the decoding task 363 /// into sub-tasks and delegates them to its inferior FilterChoosers for further 364 /// processings. 365 /// 366 /// It is useful to think of a Filter as governing the switch stmts of the 367 /// decoding tree. And each case is delegated to an inferior FilterChooser to 368 /// decide what further remaining bits to look at. 369 namespace { 370 371 class FilterChooser { 372 protected: 373 friend class Filter; 374 375 // Vector of codegen instructions to choose our filter. 376 ArrayRef<EncodingAndInst> AllInstructions; 377 378 // Vector of uid's for this filter chooser to work on. 379 // The first member of the pair is the opcode id being decoded, the second is 380 // the opcode id that should be emitted. 381 const std::vector<EncodingIDAndOpcode> &Opcodes; 382 383 // Lookup table for the operand decoding of instructions. 384 const std::map<unsigned, std::vector<OperandInfo>> &Operands; 385 386 // Vector of candidate filters. 387 std::vector<Filter> Filters; 388 389 // Array of bit values passed down from our parent. 390 // Set to all BIT_UNFILTERED's for Parent == NULL. 391 std::vector<bit_value_t> FilterBitValues; 392 393 // Links to the FilterChooser above us in the decoding tree. 394 const FilterChooser *Parent; 395 396 // Index of the best filter from Filters. 397 int BestIndex; 398 399 // Width of instructions 400 unsigned BitWidth; 401 402 // Parent emitter 403 const DecoderEmitter *Emitter; 404 405 public: 406 FilterChooser(ArrayRef<EncodingAndInst> Insts, 407 const std::vector<EncodingIDAndOpcode> &IDs, 408 const std::map<unsigned, std::vector<OperandInfo>> &Ops, 409 unsigned BW, const DecoderEmitter *E) 410 : AllInstructions(Insts), Opcodes(IDs), Operands(Ops), 411 FilterBitValues(BW, BIT_UNFILTERED), Parent(nullptr), BestIndex(-1), 412 BitWidth(BW), Emitter(E) { 413 doFilter(); 414 } 415 416 FilterChooser(ArrayRef<EncodingAndInst> Insts, 417 const std::vector<EncodingIDAndOpcode> &IDs, 418 const std::map<unsigned, std::vector<OperandInfo>> &Ops, 419 const std::vector<bit_value_t> &ParentFilterBitValues, 420 const FilterChooser &parent) 421 : AllInstructions(Insts), Opcodes(IDs), Operands(Ops), 422 FilterBitValues(ParentFilterBitValues), Parent(&parent), BestIndex(-1), 423 BitWidth(parent.BitWidth), Emitter(parent.Emitter) { 424 doFilter(); 425 } 426 427 FilterChooser(const FilterChooser &) = delete; 428 void operator=(const FilterChooser &) = delete; 429 430 unsigned getBitWidth() const { return BitWidth; } 431 432 protected: 433 // Populates the insn given the uid. 434 void insnWithID(insn_t &Insn, unsigned Opcode) const { 435 BitsInit &Bits = getBitsField(*AllInstructions[Opcode].EncodingDef, "Inst"); 436 Insn.resize(BitWidth > Bits.getNumBits() ? BitWidth : Bits.getNumBits(), 437 BIT_UNSET); 438 // We may have a SoftFail bitmask, which specifies a mask where an encoding 439 // may differ from the value in "Inst" and yet still be valid, but the 440 // disassembler should return SoftFail instead of Success. 441 // 442 // This is used for marking UNPREDICTABLE instructions in the ARM world. 443 const RecordVal *RV = 444 AllInstructions[Opcode].EncodingDef->getValue("SoftFail"); 445 const BitsInit *SFBits = RV ? dyn_cast<BitsInit>(RV->getValue()) : nullptr; 446 for (unsigned i = 0; i < Bits.getNumBits(); ++i) { 447 if (SFBits && bitFromBits(*SFBits, i) == BIT_TRUE) 448 Insn[i] = BIT_UNSET; 449 else 450 Insn[i] = bitFromBits(Bits, i); 451 } 452 } 453 454 // Emit the name of the encoding/instruction pair. 455 void emitNameWithID(raw_ostream &OS, unsigned Opcode) const { 456 const Record *EncodingDef = AllInstructions[Opcode].EncodingDef; 457 const Record *InstDef = AllInstructions[Opcode].Inst->TheDef; 458 if (EncodingDef != InstDef) 459 OS << EncodingDef->getName() << ":"; 460 OS << InstDef->getName(); 461 } 462 463 // Populates the field of the insn given the start position and the number of 464 // consecutive bits to scan for. 465 // 466 // Returns false if there exists any uninitialized bit value in the range. 467 // Returns true, otherwise. 468 bool fieldFromInsn(uint64_t &Field, insn_t &Insn, unsigned StartBit, 469 unsigned NumBits) const; 470 471 /// dumpFilterArray - dumpFilterArray prints out debugging info for the given 472 /// filter array as a series of chars. 473 void dumpFilterArray(raw_ostream &o, 474 const std::vector<bit_value_t> &filter) const; 475 476 /// dumpStack - dumpStack traverses the filter chooser chain and calls 477 /// dumpFilterArray on each filter chooser up to the top level one. 478 void dumpStack(raw_ostream &o, const char *prefix) const; 479 480 Filter &bestFilter() { 481 assert(BestIndex != -1 && "BestIndex not set"); 482 return Filters[BestIndex]; 483 } 484 485 bool PositionFiltered(unsigned i) const { 486 return ValueSet(FilterBitValues[i]); 487 } 488 489 // Calculates the island(s) needed to decode the instruction. 490 // This returns a lit of undecoded bits of an instructions, for example, 491 // Inst{20} = 1 && Inst{3-0} == 0b1111 represents two islands of yet-to-be 492 // decoded bits in order to verify that the instruction matches the Opcode. 493 unsigned getIslands(std::vector<unsigned> &StartBits, 494 std::vector<unsigned> &EndBits, 495 std::vector<uint64_t> &FieldVals, 496 const insn_t &Insn) const; 497 498 // Emits code to check the Predicates member of an instruction are true. 499 // Returns true if predicate matches were emitted, false otherwise. 500 bool emitPredicateMatch(raw_ostream &o, unsigned &Indentation, 501 unsigned Opc) const; 502 bool emitPredicateMatchAux(const Init &Val, bool ParenIfBinOp, 503 raw_ostream &OS) const; 504 505 bool doesOpcodeNeedPredicate(unsigned Opc) const; 506 unsigned getPredicateIndex(DecoderTableInfo &TableInfo, StringRef P) const; 507 void emitPredicateTableEntry(DecoderTableInfo &TableInfo, unsigned Opc) const; 508 509 void emitSoftFailTableEntry(DecoderTableInfo &TableInfo, unsigned Opc) const; 510 511 // Emits table entries to decode the singleton. 512 void emitSingletonTableEntry(DecoderTableInfo &TableInfo, 513 EncodingIDAndOpcode Opc) const; 514 515 // Emits code to decode the singleton, and then to decode the rest. 516 void emitSingletonTableEntry(DecoderTableInfo &TableInfo, 517 const Filter &Best) const; 518 519 void emitBinaryParser(raw_ostream &o, unsigned &Indentation, 520 const OperandInfo &OpInfo, 521 bool &OpHasCompleteDecoder) const; 522 523 void emitDecoder(raw_ostream &OS, unsigned Indentation, unsigned Opc, 524 bool &HasCompleteDecoder) const; 525 unsigned getDecoderIndex(DecoderSet &Decoders, unsigned Opc, 526 bool &HasCompleteDecoder) const; 527 528 // Assign a single filter and run with it. 529 void runSingleFilter(unsigned startBit, unsigned numBit, bool mixed); 530 531 // reportRegion is a helper function for filterProcessor to mark a region as 532 // eligible for use as a filter region. 533 void reportRegion(bitAttr_t RA, unsigned StartBit, unsigned BitIndex, 534 bool AllowMixed); 535 536 // FilterProcessor scans the well-known encoding bits of the instructions and 537 // builds up a list of candidate filters. It chooses the best filter and 538 // recursively descends down the decoding tree. 539 bool filterProcessor(bool AllowMixed, bool Greedy = true); 540 541 // Decides on the best configuration of filter(s) to use in order to decode 542 // the instructions. A conflict of instructions may occur, in which case we 543 // dump the conflict set to the standard error. 544 void doFilter(); 545 546 public: 547 // emitTableEntries - Emit state machine entries to decode our share of 548 // instructions. 549 void emitTableEntries(DecoderTableInfo &TableInfo) const; 550 }; 551 552 } // end anonymous namespace 553 554 /////////////////////////// 555 // // 556 // Filter Implementation // 557 // // 558 /////////////////////////// 559 560 Filter::Filter(Filter &&f) 561 : Owner(f.Owner), StartBit(f.StartBit), NumBits(f.NumBits), Mixed(f.Mixed), 562 FilteredInstructions(std::move(f.FilteredInstructions)), 563 VariableInstructions(std::move(f.VariableInstructions)), 564 FilterChooserMap(std::move(f.FilterChooserMap)), 565 NumFiltered(f.NumFiltered), LastOpcFiltered(f.LastOpcFiltered) {} 566 567 Filter::Filter(FilterChooser &owner, unsigned startBit, unsigned numBits, 568 bool mixed) 569 : Owner(&owner), StartBit(startBit), NumBits(numBits), Mixed(mixed) { 570 assert(StartBit + NumBits - 1 < Owner->BitWidth); 571 572 NumFiltered = 0; 573 LastOpcFiltered = {0, 0}; 574 575 for (unsigned i = 0, e = Owner->Opcodes.size(); i != e; ++i) { 576 insn_t Insn; 577 578 // Populates the insn given the uid. 579 Owner->insnWithID(Insn, Owner->Opcodes[i].EncodingID); 580 581 uint64_t Field; 582 // Scans the segment for possibly well-specified encoding bits. 583 bool ok = Owner->fieldFromInsn(Field, Insn, StartBit, NumBits); 584 585 if (ok) { 586 // The encoding bits are well-known. Lets add the uid of the 587 // instruction into the bucket keyed off the constant field value. 588 LastOpcFiltered = Owner->Opcodes[i]; 589 FilteredInstructions[Field].push_back(LastOpcFiltered); 590 ++NumFiltered; 591 } else { 592 // Some of the encoding bit(s) are unspecified. This contributes to 593 // one additional member of "Variable" instructions. 594 VariableInstructions.push_back(Owner->Opcodes[i]); 595 } 596 } 597 598 assert((FilteredInstructions.size() + VariableInstructions.size() > 0) && 599 "Filter returns no instruction categories"); 600 } 601 602 // Divides the decoding task into sub tasks and delegates them to the 603 // inferior FilterChooser's. 604 // 605 // A special case arises when there's only one entry in the filtered 606 // instructions. In order to unambiguously decode the singleton, we need to 607 // match the remaining undecoded encoding bits against the singleton. 608 void Filter::recurse() { 609 // Starts by inheriting our parent filter chooser's filter bit values. 610 std::vector<bit_value_t> BitValueArray(Owner->FilterBitValues); 611 612 if (!VariableInstructions.empty()) { 613 // Conservatively marks each segment position as BIT_UNSET. 614 for (unsigned bitIndex = 0; bitIndex < NumBits; ++bitIndex) 615 BitValueArray[StartBit + bitIndex] = BIT_UNSET; 616 617 // Delegates to an inferior filter chooser for further processing on this 618 // group of instructions whose segment values are variable. 619 FilterChooserMap.insert(std::pair( 620 NO_FIXED_SEGMENTS_SENTINEL, 621 std::make_unique<FilterChooser>(Owner->AllInstructions, 622 VariableInstructions, Owner->Operands, 623 BitValueArray, *Owner))); 624 } 625 626 // No need to recurse for a singleton filtered instruction. 627 // See also Filter::emit*(). 628 if (getNumFiltered() == 1) { 629 assert(FilterChooserMap.size() == 1); 630 return; 631 } 632 633 // Otherwise, create sub choosers. 634 for (const auto &Inst : FilteredInstructions) { 635 636 // Marks all the segment positions with either BIT_TRUE or BIT_FALSE. 637 for (unsigned bitIndex = 0; bitIndex < NumBits; ++bitIndex) { 638 if (Inst.first & (1ULL << bitIndex)) 639 BitValueArray[StartBit + bitIndex] = BIT_TRUE; 640 else 641 BitValueArray[StartBit + bitIndex] = BIT_FALSE; 642 } 643 644 // Delegates to an inferior filter chooser for further processing on this 645 // category of instructions. 646 FilterChooserMap.insert( 647 std::pair(Inst.first, std::make_unique<FilterChooser>( 648 Owner->AllInstructions, Inst.second, 649 Owner->Operands, BitValueArray, *Owner))); 650 } 651 } 652 653 static void resolveTableFixups(DecoderTable &Table, const FixupList &Fixups, 654 uint32_t DestIdx) { 655 // Any NumToSkip fixups in the current scope can resolve to the 656 // current location. 657 for (FixupList::const_reverse_iterator I = Fixups.rbegin(), E = Fixups.rend(); 658 I != E; ++I) { 659 // Calculate the distance from the byte following the fixup entry byte 660 // to the destination. The Target is calculated from after the 16-bit 661 // NumToSkip entry itself, so subtract two from the displacement here 662 // to account for that. 663 uint32_t FixupIdx = *I; 664 uint32_t Delta = DestIdx - FixupIdx - 3; 665 // Our NumToSkip entries are 24-bits. Make sure our table isn't too 666 // big. 667 assert(Delta < (1u << 24)); 668 Table[FixupIdx] = (uint8_t)Delta; 669 Table[FixupIdx + 1] = (uint8_t)(Delta >> 8); 670 Table[FixupIdx + 2] = (uint8_t)(Delta >> 16); 671 } 672 } 673 674 // Emit table entries to decode instructions given a segment or segments 675 // of bits. 676 void Filter::emitTableEntry(DecoderTableInfo &TableInfo) const { 677 assert((NumBits < (1u << 8)) && "NumBits overflowed uint8 table entry!"); 678 TableInfo.Table.push_back(MCD::OPC_ExtractField); 679 680 SmallString<16> SBytes; 681 raw_svector_ostream S(SBytes); 682 encodeULEB128(StartBit, S); 683 TableInfo.Table.insert(TableInfo.Table.end(), SBytes.begin(), SBytes.end()); 684 TableInfo.Table.push_back(NumBits); 685 686 // A new filter entry begins a new scope for fixup resolution. 687 TableInfo.FixupStack.emplace_back(); 688 689 DecoderTable &Table = TableInfo.Table; 690 691 size_t PrevFilter = 0; 692 bool HasFallthrough = false; 693 for (auto &Filter : FilterChooserMap) { 694 // Field value -1 implies a non-empty set of variable instructions. 695 // See also recurse(). 696 if (Filter.first == NO_FIXED_SEGMENTS_SENTINEL) { 697 HasFallthrough = true; 698 699 // Each scope should always have at least one filter value to check 700 // for. 701 assert(PrevFilter != 0 && "empty filter set!"); 702 FixupList &CurScope = TableInfo.FixupStack.back(); 703 // Resolve any NumToSkip fixups in the current scope. 704 resolveTableFixups(Table, CurScope, Table.size()); 705 CurScope.clear(); 706 PrevFilter = 0; // Don't re-process the filter's fallthrough. 707 } else { 708 Table.push_back(MCD::OPC_FilterValue); 709 // Encode and emit the value to filter against. 710 uint8_t Buffer[16]; 711 unsigned Len = encodeULEB128(Filter.first, Buffer); 712 Table.insert(Table.end(), Buffer, Buffer + Len); 713 // Reserve space for the NumToSkip entry. We'll backpatch the value 714 // later. 715 PrevFilter = Table.size(); 716 Table.push_back(0); 717 Table.push_back(0); 718 Table.push_back(0); 719 } 720 721 // We arrive at a category of instructions with the same segment value. 722 // Now delegate to the sub filter chooser for further decodings. 723 // The case may fallthrough, which happens if the remaining well-known 724 // encoding bits do not match exactly. 725 Filter.second->emitTableEntries(TableInfo); 726 727 // Now that we've emitted the body of the handler, update the NumToSkip 728 // of the filter itself to be able to skip forward when false. Subtract 729 // two as to account for the width of the NumToSkip field itself. 730 if (PrevFilter) { 731 uint32_t NumToSkip = Table.size() - PrevFilter - 3; 732 assert(NumToSkip < (1u << 24) && 733 "disassembler decoding table too large!"); 734 Table[PrevFilter] = (uint8_t)NumToSkip; 735 Table[PrevFilter + 1] = (uint8_t)(NumToSkip >> 8); 736 Table[PrevFilter + 2] = (uint8_t)(NumToSkip >> 16); 737 } 738 } 739 740 // Any remaining unresolved fixups bubble up to the parent fixup scope. 741 assert(TableInfo.FixupStack.size() > 1 && "fixup stack underflow!"); 742 FixupScopeList::iterator Source = TableInfo.FixupStack.end() - 1; 743 FixupScopeList::iterator Dest = Source - 1; 744 llvm::append_range(*Dest, *Source); 745 TableInfo.FixupStack.pop_back(); 746 747 // If there is no fallthrough, then the final filter should get fixed 748 // up according to the enclosing scope rather than the current position. 749 if (!HasFallthrough) 750 TableInfo.FixupStack.back().push_back(PrevFilter); 751 } 752 753 // Returns the number of fanout produced by the filter. More fanout implies 754 // the filter distinguishes more categories of instructions. 755 unsigned Filter::usefulness() const { 756 if (!VariableInstructions.empty()) 757 return FilteredInstructions.size(); 758 else 759 return FilteredInstructions.size() + 1; 760 } 761 762 ////////////////////////////////// 763 // // 764 // Filterchooser Implementation // 765 // // 766 ////////////////////////////////// 767 768 // Emit the decoder state machine table. 769 void DecoderEmitter::emitTable(formatted_raw_ostream &OS, DecoderTable &Table, 770 unsigned Indentation, unsigned BitWidth, 771 StringRef Namespace, 772 const EncodingIDsVec &EncodingIDs) const { 773 // We'll need to be able to map from a decoded opcode into the corresponding 774 // EncodingID for this specific combination of BitWidth and Namespace. This 775 // is used below to index into NumberedEncodings. 776 DenseMap<unsigned, unsigned> OpcodeToEncodingID; 777 OpcodeToEncodingID.reserve(EncodingIDs.size()); 778 for (auto &EI : EncodingIDs) 779 OpcodeToEncodingID[EI.Opcode] = EI.EncodingID; 780 781 OS.indent(Indentation) << "static const uint8_t DecoderTable" << Namespace 782 << BitWidth << "[] = {\n"; 783 784 Indentation += 2; 785 786 // Emit ULEB128 encoded value to OS, returning the number of bytes emitted. 787 auto emitULEB128 = [](DecoderTable::const_iterator I, 788 formatted_raw_ostream &OS) { 789 unsigned Len = 0; 790 while (*I >= 128) { 791 OS << (unsigned)*I++ << ", "; 792 Len++; 793 } 794 OS << (unsigned)*I++ << ", "; 795 return Len + 1; 796 }; 797 798 // Emit 24-bit numtoskip value to OS, returning the NumToSkip value. 799 auto emitNumToSkip = [](DecoderTable::const_iterator I, 800 formatted_raw_ostream &OS) { 801 uint8_t Byte = *I++; 802 uint32_t NumToSkip = Byte; 803 OS << (unsigned)Byte << ", "; 804 Byte = *I++; 805 OS << (unsigned)Byte << ", "; 806 NumToSkip |= Byte << 8; 807 Byte = *I++; 808 OS << utostr(Byte) << ", "; 809 NumToSkip |= Byte << 16; 810 return NumToSkip; 811 }; 812 813 // FIXME: We may be able to use the NumToSkip values to recover 814 // appropriate indentation levels. 815 DecoderTable::const_iterator I = Table.begin(); 816 DecoderTable::const_iterator E = Table.end(); 817 while (I != E) { 818 assert(I < E && "incomplete decode table entry!"); 819 820 uint64_t Pos = I - Table.begin(); 821 OS << "/* " << Pos << " */"; 822 OS.PadToColumn(12); 823 824 switch (*I) { 825 default: 826 PrintFatalError("invalid decode table opcode"); 827 case MCD::OPC_ExtractField: { 828 ++I; 829 OS.indent(Indentation) << "MCD::OPC_ExtractField, "; 830 831 // ULEB128 encoded start value. 832 const char *ErrMsg = nullptr; 833 unsigned Start = decodeULEB128(Table.data() + Pos + 1, nullptr, 834 Table.data() + Table.size(), &ErrMsg); 835 assert(ErrMsg == nullptr && "ULEB128 value too large!"); 836 I += emitULEB128(I, OS); 837 838 unsigned Len = *I++; 839 OS << Len << ", // Inst{"; 840 if (Len > 1) 841 OS << (Start + Len - 1) << "-"; 842 OS << Start << "} ...\n"; 843 break; 844 } 845 case MCD::OPC_FilterValue: { 846 ++I; 847 OS.indent(Indentation) << "MCD::OPC_FilterValue, "; 848 // The filter value is ULEB128 encoded. 849 I += emitULEB128(I, OS); 850 851 // 24-bit numtoskip value. 852 uint32_t NumToSkip = emitNumToSkip(I, OS); 853 I += 3; 854 OS << "// Skip to: " << ((I - Table.begin()) + NumToSkip) << "\n"; 855 break; 856 } 857 case MCD::OPC_CheckField: { 858 ++I; 859 OS.indent(Indentation) << "MCD::OPC_CheckField, "; 860 // ULEB128 encoded start value. 861 I += emitULEB128(I, OS); 862 // 8-bit length. 863 unsigned Len = *I++; 864 OS << Len << ", "; 865 // ULEB128 encoded field value. 866 I += emitULEB128(I, OS); 867 868 // 24-bit numtoskip value. 869 uint32_t NumToSkip = emitNumToSkip(I, OS); 870 I += 3; 871 OS << "// Skip to: " << ((I - Table.begin()) + NumToSkip) << "\n"; 872 break; 873 } 874 case MCD::OPC_CheckPredicate: { 875 ++I; 876 OS.indent(Indentation) << "MCD::OPC_CheckPredicate, "; 877 I += emitULEB128(I, OS); 878 879 // 24-bit numtoskip value. 880 uint32_t NumToSkip = emitNumToSkip(I, OS); 881 I += 3; 882 OS << "// Skip to: " << ((I - Table.begin()) + NumToSkip) << "\n"; 883 break; 884 } 885 case MCD::OPC_Decode: 886 case MCD::OPC_TryDecode: { 887 bool IsTry = *I == MCD::OPC_TryDecode; 888 ++I; 889 // Decode the Opcode value. 890 const char *ErrMsg = nullptr; 891 unsigned Opc = decodeULEB128(Table.data() + Pos + 1, nullptr, 892 Table.data() + Table.size(), &ErrMsg); 893 assert(ErrMsg == nullptr && "ULEB128 value too large!"); 894 895 OS.indent(Indentation) 896 << "MCD::OPC_" << (IsTry ? "Try" : "") << "Decode, "; 897 I += emitULEB128(I, OS); 898 899 // Decoder index. 900 I += emitULEB128(I, OS); 901 902 auto EncI = OpcodeToEncodingID.find(Opc); 903 assert(EncI != OpcodeToEncodingID.end() && "no encoding entry"); 904 auto EncodingID = EncI->second; 905 906 if (!IsTry) { 907 OS << "// Opcode: " << NumberedEncodings[EncodingID] << "\n"; 908 break; 909 } 910 911 // Fallthrough for OPC_TryDecode. 912 913 // 24-bit numtoskip value. 914 uint32_t NumToSkip = emitNumToSkip(I, OS); 915 I += 3; 916 917 OS << "// Opcode: " << NumberedEncodings[EncodingID] 918 << ", skip to: " << ((I - Table.begin()) + NumToSkip) << "\n"; 919 break; 920 } 921 case MCD::OPC_SoftFail: { 922 ++I; 923 OS.indent(Indentation) << "MCD::OPC_SoftFail"; 924 // Positive mask 925 uint64_t Value = 0; 926 unsigned Shift = 0; 927 do { 928 OS << ", " << (unsigned)*I; 929 Value += (*I & 0x7f) << Shift; 930 Shift += 7; 931 } while (*I++ >= 128); 932 if (Value > 127) { 933 OS << " /* 0x"; 934 OS.write_hex(Value); 935 OS << " */"; 936 } 937 // Negative mask 938 Value = 0; 939 Shift = 0; 940 do { 941 OS << ", " << (unsigned)*I; 942 Value += (*I & 0x7f) << Shift; 943 Shift += 7; 944 } while (*I++ >= 128); 945 if (Value > 127) { 946 OS << " /* 0x"; 947 OS.write_hex(Value); 948 OS << " */"; 949 } 950 OS << ",\n"; 951 break; 952 } 953 case MCD::OPC_Fail: { 954 ++I; 955 OS.indent(Indentation) << "MCD::OPC_Fail,\n"; 956 break; 957 } 958 } 959 } 960 OS.indent(Indentation) << "0\n"; 961 962 Indentation -= 2; 963 964 OS.indent(Indentation) << "};\n\n"; 965 } 966 967 void DecoderEmitter::emitInstrLenTable(formatted_raw_ostream &OS, 968 std::vector<unsigned> &InstrLen) const { 969 OS << "static const uint8_t InstrLenTable[] = {\n"; 970 for (unsigned &Len : InstrLen) { 971 OS << Len << ",\n"; 972 } 973 OS << "};\n\n"; 974 } 975 976 void DecoderEmitter::emitPredicateFunction(formatted_raw_ostream &OS, 977 PredicateSet &Predicates, 978 unsigned Indentation) const { 979 // The predicate function is just a big switch statement based on the 980 // input predicate index. 981 OS.indent(Indentation) << "static bool checkDecoderPredicate(unsigned Idx, " 982 << "const FeatureBitset &Bits) {\n"; 983 Indentation += 2; 984 if (!Predicates.empty()) { 985 OS.indent(Indentation) << "switch (Idx) {\n"; 986 OS.indent(Indentation) 987 << "default: llvm_unreachable(\"Invalid index!\");\n"; 988 unsigned Index = 0; 989 for (const auto &Predicate : Predicates) { 990 OS.indent(Indentation) << "case " << Index++ << ":\n"; 991 OS.indent(Indentation + 2) << "return (" << Predicate << ");\n"; 992 } 993 OS.indent(Indentation) << "}\n"; 994 } else { 995 // No case statement to emit 996 OS.indent(Indentation) << "llvm_unreachable(\"Invalid index!\");\n"; 997 } 998 Indentation -= 2; 999 OS.indent(Indentation) << "}\n\n"; 1000 } 1001 1002 void DecoderEmitter::emitDecoderFunction(formatted_raw_ostream &OS, 1003 DecoderSet &Decoders, 1004 unsigned Indentation) const { 1005 // The decoder function is just a big switch statement based on the 1006 // input decoder index. 1007 OS.indent(Indentation) << "template <typename InsnType>\n"; 1008 OS.indent(Indentation) << "static DecodeStatus decodeToMCInst(DecodeStatus S," 1009 << " unsigned Idx, InsnType insn, MCInst &MI,\n"; 1010 OS.indent(Indentation) 1011 << " uint64_t " 1012 << "Address, const MCDisassembler *Decoder, bool &DecodeComplete) {\n"; 1013 Indentation += 2; 1014 OS.indent(Indentation) << "DecodeComplete = true;\n"; 1015 // TODO: When InsnType is large, using uint64_t limits all fields to 64 bits 1016 // It would be better for emitBinaryParser to use a 64-bit tmp whenever 1017 // possible but fall back to an InsnType-sized tmp for truly large fields. 1018 OS.indent(Indentation) << "using TmpType = " 1019 "std::conditional_t<std::is_integral<InsnType>::" 1020 "value, InsnType, uint64_t>;\n"; 1021 OS.indent(Indentation) << "TmpType tmp;\n"; 1022 OS.indent(Indentation) << "switch (Idx) {\n"; 1023 OS.indent(Indentation) << "default: llvm_unreachable(\"Invalid index!\");\n"; 1024 unsigned Index = 0; 1025 for (const auto &Decoder : Decoders) { 1026 OS.indent(Indentation) << "case " << Index++ << ":\n"; 1027 OS << Decoder; 1028 OS.indent(Indentation + 2) << "return S;\n"; 1029 } 1030 OS.indent(Indentation) << "}\n"; 1031 Indentation -= 2; 1032 OS.indent(Indentation) << "}\n\n"; 1033 } 1034 1035 // Populates the field of the insn given the start position and the number of 1036 // consecutive bits to scan for. 1037 // 1038 // Returns false if and on the first uninitialized bit value encountered. 1039 // Returns true, otherwise. 1040 bool FilterChooser::fieldFromInsn(uint64_t &Field, insn_t &Insn, 1041 unsigned StartBit, unsigned NumBits) const { 1042 Field = 0; 1043 1044 for (unsigned i = 0; i < NumBits; ++i) { 1045 if (Insn[StartBit + i] == BIT_UNSET) 1046 return false; 1047 1048 if (Insn[StartBit + i] == BIT_TRUE) 1049 Field = Field | (1ULL << i); 1050 } 1051 1052 return true; 1053 } 1054 1055 /// dumpFilterArray - dumpFilterArray prints out debugging info for the given 1056 /// filter array as a series of chars. 1057 void FilterChooser::dumpFilterArray( 1058 raw_ostream &o, const std::vector<bit_value_t> &filter) const { 1059 for (unsigned bitIndex = BitWidth; bitIndex > 0; bitIndex--) { 1060 switch (filter[bitIndex - 1]) { 1061 case BIT_UNFILTERED: 1062 o << "."; 1063 break; 1064 case BIT_UNSET: 1065 o << "_"; 1066 break; 1067 case BIT_TRUE: 1068 o << "1"; 1069 break; 1070 case BIT_FALSE: 1071 o << "0"; 1072 break; 1073 } 1074 } 1075 } 1076 1077 /// dumpStack - dumpStack traverses the filter chooser chain and calls 1078 /// dumpFilterArray on each filter chooser up to the top level one. 1079 void FilterChooser::dumpStack(raw_ostream &o, const char *prefix) const { 1080 const FilterChooser *current = this; 1081 1082 while (current) { 1083 o << prefix; 1084 dumpFilterArray(o, current->FilterBitValues); 1085 o << '\n'; 1086 current = current->Parent; 1087 } 1088 } 1089 1090 // Calculates the island(s) needed to decode the instruction. 1091 // This returns a list of undecoded bits of an instructions, for example, 1092 // Inst{20} = 1 && Inst{3-0} == 0b1111 represents two islands of yet-to-be 1093 // decoded bits in order to verify that the instruction matches the Opcode. 1094 unsigned FilterChooser::getIslands(std::vector<unsigned> &StartBits, 1095 std::vector<unsigned> &EndBits, 1096 std::vector<uint64_t> &FieldVals, 1097 const insn_t &Insn) const { 1098 unsigned Num, BitNo; 1099 Num = BitNo = 0; 1100 1101 uint64_t FieldVal = 0; 1102 1103 // 0: Init 1104 // 1: Water (the bit value does not affect decoding) 1105 // 2: Island (well-known bit value needed for decoding) 1106 int State = 0; 1107 1108 for (unsigned i = 0; i < BitWidth; ++i) { 1109 int64_t Val = Value(Insn[i]); 1110 bool Filtered = PositionFiltered(i); 1111 switch (State) { 1112 default: 1113 llvm_unreachable("Unreachable code!"); 1114 case 0: 1115 case 1: 1116 if (Filtered || Val == -1) 1117 State = 1; // Still in Water 1118 else { 1119 State = 2; // Into the Island 1120 BitNo = 0; 1121 StartBits.push_back(i); 1122 FieldVal = Val; 1123 } 1124 break; 1125 case 2: 1126 if (Filtered || Val == -1) { 1127 State = 1; // Into the Water 1128 EndBits.push_back(i - 1); 1129 FieldVals.push_back(FieldVal); 1130 ++Num; 1131 } else { 1132 State = 2; // Still in Island 1133 ++BitNo; 1134 FieldVal = FieldVal | Val << BitNo; 1135 } 1136 break; 1137 } 1138 } 1139 // If we are still in Island after the loop, do some housekeeping. 1140 if (State == 2) { 1141 EndBits.push_back(BitWidth - 1); 1142 FieldVals.push_back(FieldVal); 1143 ++Num; 1144 } 1145 1146 assert(StartBits.size() == Num && EndBits.size() == Num && 1147 FieldVals.size() == Num); 1148 return Num; 1149 } 1150 1151 void FilterChooser::emitBinaryParser(raw_ostream &o, unsigned &Indentation, 1152 const OperandInfo &OpInfo, 1153 bool &OpHasCompleteDecoder) const { 1154 const std::string &Decoder = OpInfo.Decoder; 1155 1156 bool UseInsertBits = OpInfo.numFields() != 1 || OpInfo.InitValue != 0; 1157 1158 if (UseInsertBits) { 1159 o.indent(Indentation) << "tmp = 0x"; 1160 o.write_hex(OpInfo.InitValue); 1161 o << ";\n"; 1162 } 1163 1164 for (const EncodingField &EF : OpInfo) { 1165 o.indent(Indentation); 1166 if (UseInsertBits) 1167 o << "insertBits(tmp, "; 1168 else 1169 o << "tmp = "; 1170 o << "fieldFromInstruction(insn, " << EF.Base << ", " << EF.Width << ')'; 1171 if (UseInsertBits) 1172 o << ", " << EF.Offset << ", " << EF.Width << ')'; 1173 else if (EF.Offset != 0) 1174 o << " << " << EF.Offset; 1175 o << ";\n"; 1176 } 1177 1178 if (Decoder != "") { 1179 OpHasCompleteDecoder = OpInfo.HasCompleteDecoder; 1180 o.indent(Indentation) << "if (!Check(S, " << Decoder 1181 << "(MI, tmp, Address, Decoder))) { " 1182 << (OpHasCompleteDecoder ? "" 1183 : "DecodeComplete = false; ") 1184 << "return MCDisassembler::Fail; }\n"; 1185 } else { 1186 OpHasCompleteDecoder = true; 1187 o.indent(Indentation) << "MI.addOperand(MCOperand::createImm(tmp));\n"; 1188 } 1189 } 1190 1191 void FilterChooser::emitDecoder(raw_ostream &OS, unsigned Indentation, 1192 unsigned Opc, bool &HasCompleteDecoder) const { 1193 HasCompleteDecoder = true; 1194 1195 for (const auto &Op : Operands.find(Opc)->second) { 1196 // If a custom instruction decoder was specified, use that. 1197 if (Op.numFields() == 0 && !Op.Decoder.empty()) { 1198 HasCompleteDecoder = Op.HasCompleteDecoder; 1199 OS.indent(Indentation) 1200 << "if (!Check(S, " << Op.Decoder 1201 << "(MI, insn, Address, Decoder))) { " 1202 << (HasCompleteDecoder ? "" : "DecodeComplete = false; ") 1203 << "return MCDisassembler::Fail; }\n"; 1204 break; 1205 } 1206 1207 bool OpHasCompleteDecoder; 1208 emitBinaryParser(OS, Indentation, Op, OpHasCompleteDecoder); 1209 if (!OpHasCompleteDecoder) 1210 HasCompleteDecoder = false; 1211 } 1212 } 1213 1214 unsigned FilterChooser::getDecoderIndex(DecoderSet &Decoders, unsigned Opc, 1215 bool &HasCompleteDecoder) const { 1216 // Build up the predicate string. 1217 SmallString<256> Decoder; 1218 // FIXME: emitDecoder() function can take a buffer directly rather than 1219 // a stream. 1220 raw_svector_ostream S(Decoder); 1221 unsigned I = 4; 1222 emitDecoder(S, I, Opc, HasCompleteDecoder); 1223 1224 // Using the full decoder string as the key value here is a bit 1225 // heavyweight, but is effective. If the string comparisons become a 1226 // performance concern, we can implement a mangling of the predicate 1227 // data easily enough with a map back to the actual string. That's 1228 // overkill for now, though. 1229 1230 // Make sure the predicate is in the table. 1231 Decoders.insert(CachedHashString(Decoder)); 1232 // Now figure out the index for when we write out the table. 1233 DecoderSet::const_iterator P = find(Decoders, Decoder.str()); 1234 return (unsigned)(P - Decoders.begin()); 1235 } 1236 1237 // If ParenIfBinOp is true, print a surrounding () if Val uses && or ||. 1238 bool FilterChooser::emitPredicateMatchAux(const Init &Val, bool ParenIfBinOp, 1239 raw_ostream &OS) const { 1240 if (auto *D = dyn_cast<DefInit>(&Val)) { 1241 if (!D->getDef()->isSubClassOf("SubtargetFeature")) 1242 return true; 1243 OS << "Bits[" << Emitter->PredicateNamespace << "::" << D->getAsString() 1244 << "]"; 1245 return false; 1246 } 1247 if (auto *D = dyn_cast<DagInit>(&Val)) { 1248 std::string Op = D->getOperator()->getAsString(); 1249 if (Op == "not" && D->getNumArgs() == 1) { 1250 OS << '!'; 1251 return emitPredicateMatchAux(*D->getArg(0), true, OS); 1252 } 1253 if ((Op == "any_of" || Op == "all_of") && D->getNumArgs() > 0) { 1254 bool Paren = D->getNumArgs() > 1 && std::exchange(ParenIfBinOp, true); 1255 if (Paren) 1256 OS << '('; 1257 ListSeparator LS(Op == "any_of" ? " || " : " && "); 1258 for (auto *Arg : D->getArgs()) { 1259 OS << LS; 1260 if (emitPredicateMatchAux(*Arg, ParenIfBinOp, OS)) 1261 return true; 1262 } 1263 if (Paren) 1264 OS << ')'; 1265 return false; 1266 } 1267 } 1268 return true; 1269 } 1270 1271 bool FilterChooser::emitPredicateMatch(raw_ostream &o, unsigned &Indentation, 1272 unsigned Opc) const { 1273 ListInit *Predicates = 1274 AllInstructions[Opc].EncodingDef->getValueAsListInit("Predicates"); 1275 bool IsFirstEmission = true; 1276 for (unsigned i = 0; i < Predicates->size(); ++i) { 1277 Record *Pred = Predicates->getElementAsRecord(i); 1278 if (!Pred->getValue("AssemblerMatcherPredicate")) 1279 continue; 1280 1281 if (!isa<DagInit>(Pred->getValue("AssemblerCondDag")->getValue())) 1282 continue; 1283 1284 if (!IsFirstEmission) 1285 o << " && "; 1286 if (emitPredicateMatchAux(*Pred->getValueAsDag("AssemblerCondDag"), 1287 Predicates->size() > 1, o)) 1288 PrintFatalError(Pred->getLoc(), "Invalid AssemblerCondDag!"); 1289 IsFirstEmission = false; 1290 } 1291 return !Predicates->empty(); 1292 } 1293 1294 bool FilterChooser::doesOpcodeNeedPredicate(unsigned Opc) const { 1295 ListInit *Predicates = 1296 AllInstructions[Opc].EncodingDef->getValueAsListInit("Predicates"); 1297 for (unsigned i = 0; i < Predicates->size(); ++i) { 1298 Record *Pred = Predicates->getElementAsRecord(i); 1299 if (!Pred->getValue("AssemblerMatcherPredicate")) 1300 continue; 1301 1302 if (isa<DagInit>(Pred->getValue("AssemblerCondDag")->getValue())) 1303 return true; 1304 } 1305 return false; 1306 } 1307 1308 unsigned FilterChooser::getPredicateIndex(DecoderTableInfo &TableInfo, 1309 StringRef Predicate) const { 1310 // Using the full predicate string as the key value here is a bit 1311 // heavyweight, but is effective. If the string comparisons become a 1312 // performance concern, we can implement a mangling of the predicate 1313 // data easily enough with a map back to the actual string. That's 1314 // overkill for now, though. 1315 1316 // Make sure the predicate is in the table. 1317 TableInfo.Predicates.insert(CachedHashString(Predicate)); 1318 // Now figure out the index for when we write out the table. 1319 PredicateSet::const_iterator P = find(TableInfo.Predicates, Predicate); 1320 return (unsigned)(P - TableInfo.Predicates.begin()); 1321 } 1322 1323 void FilterChooser::emitPredicateTableEntry(DecoderTableInfo &TableInfo, 1324 unsigned Opc) const { 1325 if (!doesOpcodeNeedPredicate(Opc)) 1326 return; 1327 1328 // Build up the predicate string. 1329 SmallString<256> Predicate; 1330 // FIXME: emitPredicateMatch() functions can take a buffer directly rather 1331 // than a stream. 1332 raw_svector_ostream PS(Predicate); 1333 unsigned I = 0; 1334 emitPredicateMatch(PS, I, Opc); 1335 1336 // Figure out the index into the predicate table for the predicate just 1337 // computed. 1338 unsigned PIdx = getPredicateIndex(TableInfo, PS.str()); 1339 SmallString<16> PBytes; 1340 raw_svector_ostream S(PBytes); 1341 encodeULEB128(PIdx, S); 1342 1343 TableInfo.Table.push_back(MCD::OPC_CheckPredicate); 1344 // Predicate index 1345 for (unsigned i = 0, e = PBytes.size(); i != e; ++i) 1346 TableInfo.Table.push_back(PBytes[i]); 1347 // Push location for NumToSkip backpatching. 1348 TableInfo.FixupStack.back().push_back(TableInfo.Table.size()); 1349 TableInfo.Table.push_back(0); 1350 TableInfo.Table.push_back(0); 1351 TableInfo.Table.push_back(0); 1352 } 1353 1354 void FilterChooser::emitSoftFailTableEntry(DecoderTableInfo &TableInfo, 1355 unsigned Opc) const { 1356 const RecordVal *RV = AllInstructions[Opc].EncodingDef->getValue("SoftFail"); 1357 BitsInit *SFBits = RV ? dyn_cast<BitsInit>(RV->getValue()) : nullptr; 1358 1359 if (!SFBits) 1360 return; 1361 BitsInit *InstBits = 1362 AllInstructions[Opc].EncodingDef->getValueAsBitsInit("Inst"); 1363 1364 APInt PositiveMask(BitWidth, 0ULL); 1365 APInt NegativeMask(BitWidth, 0ULL); 1366 for (unsigned i = 0; i < BitWidth; ++i) { 1367 bit_value_t B = bitFromBits(*SFBits, i); 1368 bit_value_t IB = bitFromBits(*InstBits, i); 1369 1370 if (B != BIT_TRUE) 1371 continue; 1372 1373 switch (IB) { 1374 case BIT_FALSE: 1375 // The bit is meant to be false, so emit a check to see if it is true. 1376 PositiveMask.setBit(i); 1377 break; 1378 case BIT_TRUE: 1379 // The bit is meant to be true, so emit a check to see if it is false. 1380 NegativeMask.setBit(i); 1381 break; 1382 default: 1383 // The bit is not set; this must be an error! 1384 errs() << "SoftFail Conflict: bit SoftFail{" << i << "} in " 1385 << AllInstructions[Opc] << " is set but Inst{" << i 1386 << "} is unset!\n" 1387 << " - You can only mark a bit as SoftFail if it is fully defined" 1388 << " (1/0 - not '?') in Inst\n"; 1389 return; 1390 } 1391 } 1392 1393 bool NeedPositiveMask = PositiveMask.getBoolValue(); 1394 bool NeedNegativeMask = NegativeMask.getBoolValue(); 1395 1396 if (!NeedPositiveMask && !NeedNegativeMask) 1397 return; 1398 1399 TableInfo.Table.push_back(MCD::OPC_SoftFail); 1400 1401 SmallString<16> MaskBytes; 1402 raw_svector_ostream S(MaskBytes); 1403 if (NeedPositiveMask) { 1404 encodeULEB128(PositiveMask.getZExtValue(), S); 1405 for (unsigned i = 0, e = MaskBytes.size(); i != e; ++i) 1406 TableInfo.Table.push_back(MaskBytes[i]); 1407 } else 1408 TableInfo.Table.push_back(0); 1409 if (NeedNegativeMask) { 1410 MaskBytes.clear(); 1411 encodeULEB128(NegativeMask.getZExtValue(), S); 1412 for (unsigned i = 0, e = MaskBytes.size(); i != e; ++i) 1413 TableInfo.Table.push_back(MaskBytes[i]); 1414 } else 1415 TableInfo.Table.push_back(0); 1416 } 1417 1418 // Emits table entries to decode the singleton. 1419 void FilterChooser::emitSingletonTableEntry(DecoderTableInfo &TableInfo, 1420 EncodingIDAndOpcode Opc) const { 1421 std::vector<unsigned> StartBits; 1422 std::vector<unsigned> EndBits; 1423 std::vector<uint64_t> FieldVals; 1424 insn_t Insn; 1425 insnWithID(Insn, Opc.EncodingID); 1426 1427 // Look for islands of undecoded bits of the singleton. 1428 getIslands(StartBits, EndBits, FieldVals, Insn); 1429 1430 unsigned Size = StartBits.size(); 1431 1432 // Emit the predicate table entry if one is needed. 1433 emitPredicateTableEntry(TableInfo, Opc.EncodingID); 1434 1435 // Check any additional encoding fields needed. 1436 for (unsigned I = Size; I != 0; --I) { 1437 unsigned NumBits = EndBits[I - 1] - StartBits[I - 1] + 1; 1438 assert((NumBits < (1u << 8)) && "NumBits overflowed uint8 table entry!"); 1439 TableInfo.Table.push_back(MCD::OPC_CheckField); 1440 uint8_t Buffer[16], *P; 1441 encodeULEB128(StartBits[I - 1], Buffer); 1442 for (P = Buffer; *P >= 128; ++P) 1443 TableInfo.Table.push_back(*P); 1444 TableInfo.Table.push_back(*P); 1445 TableInfo.Table.push_back(NumBits); 1446 encodeULEB128(FieldVals[I - 1], Buffer); 1447 for (P = Buffer; *P >= 128; ++P) 1448 TableInfo.Table.push_back(*P); 1449 TableInfo.Table.push_back(*P); 1450 // Push location for NumToSkip backpatching. 1451 TableInfo.FixupStack.back().push_back(TableInfo.Table.size()); 1452 // The fixup is always 24-bits, so go ahead and allocate the space 1453 // in the table so all our relative position calculations work OK even 1454 // before we fully resolve the real value here. 1455 TableInfo.Table.push_back(0); 1456 TableInfo.Table.push_back(0); 1457 TableInfo.Table.push_back(0); 1458 } 1459 1460 // Check for soft failure of the match. 1461 emitSoftFailTableEntry(TableInfo, Opc.EncodingID); 1462 1463 bool HasCompleteDecoder; 1464 unsigned DIdx = 1465 getDecoderIndex(TableInfo.Decoders, Opc.EncodingID, HasCompleteDecoder); 1466 1467 // Produce OPC_Decode or OPC_TryDecode opcode based on the information 1468 // whether the instruction decoder is complete or not. If it is complete 1469 // then it handles all possible values of remaining variable/unfiltered bits 1470 // and for any value can determine if the bitpattern is a valid instruction 1471 // or not. This means OPC_Decode will be the final step in the decoding 1472 // process. If it is not complete, then the Fail return code from the 1473 // decoder method indicates that additional processing should be done to see 1474 // if there is any other instruction that also matches the bitpattern and 1475 // can decode it. 1476 TableInfo.Table.push_back(HasCompleteDecoder ? MCD::OPC_Decode 1477 : MCD::OPC_TryDecode); 1478 NumEncodingsSupported++; 1479 uint8_t Buffer[16], *p; 1480 encodeULEB128(Opc.Opcode, Buffer); 1481 for (p = Buffer; *p >= 128; ++p) 1482 TableInfo.Table.push_back(*p); 1483 TableInfo.Table.push_back(*p); 1484 1485 SmallString<16> Bytes; 1486 raw_svector_ostream S(Bytes); 1487 encodeULEB128(DIdx, S); 1488 1489 // Decoder index 1490 for (unsigned i = 0, e = Bytes.size(); i != e; ++i) 1491 TableInfo.Table.push_back(Bytes[i]); 1492 1493 if (!HasCompleteDecoder) { 1494 // Push location for NumToSkip backpatching. 1495 TableInfo.FixupStack.back().push_back(TableInfo.Table.size()); 1496 // Allocate the space for the fixup. 1497 TableInfo.Table.push_back(0); 1498 TableInfo.Table.push_back(0); 1499 TableInfo.Table.push_back(0); 1500 } 1501 } 1502 1503 // Emits table entries to decode the singleton, and then to decode the rest. 1504 void FilterChooser::emitSingletonTableEntry(DecoderTableInfo &TableInfo, 1505 const Filter &Best) const { 1506 EncodingIDAndOpcode Opc = Best.getSingletonOpc(); 1507 1508 // complex singletons need predicate checks from the first singleton 1509 // to refer forward to the variable filterchooser that follows. 1510 TableInfo.FixupStack.emplace_back(); 1511 1512 emitSingletonTableEntry(TableInfo, Opc); 1513 1514 resolveTableFixups(TableInfo.Table, TableInfo.FixupStack.back(), 1515 TableInfo.Table.size()); 1516 TableInfo.FixupStack.pop_back(); 1517 1518 Best.getVariableFC().emitTableEntries(TableInfo); 1519 } 1520 1521 // Assign a single filter and run with it. Top level API client can initialize 1522 // with a single filter to start the filtering process. 1523 void FilterChooser::runSingleFilter(unsigned startBit, unsigned numBit, 1524 bool mixed) { 1525 Filters.clear(); 1526 Filters.emplace_back(*this, startBit, numBit, true); 1527 BestIndex = 0; // Sole Filter instance to choose from. 1528 bestFilter().recurse(); 1529 } 1530 1531 // reportRegion is a helper function for filterProcessor to mark a region as 1532 // eligible for use as a filter region. 1533 void FilterChooser::reportRegion(bitAttr_t RA, unsigned StartBit, 1534 unsigned BitIndex, bool AllowMixed) { 1535 if (RA == ATTR_MIXED && AllowMixed) 1536 Filters.emplace_back(*this, StartBit, BitIndex - StartBit, true); 1537 else if (RA == ATTR_ALL_SET && !AllowMixed) 1538 Filters.emplace_back(*this, StartBit, BitIndex - StartBit, false); 1539 } 1540 1541 // FilterProcessor scans the well-known encoding bits of the instructions and 1542 // builds up a list of candidate filters. It chooses the best filter and 1543 // recursively descends down the decoding tree. 1544 bool FilterChooser::filterProcessor(bool AllowMixed, bool Greedy) { 1545 Filters.clear(); 1546 BestIndex = -1; 1547 unsigned numInstructions = Opcodes.size(); 1548 1549 assert(numInstructions && "Filter created with no instructions"); 1550 1551 // No further filtering is necessary. 1552 if (numInstructions == 1) 1553 return true; 1554 1555 // Heuristics. See also doFilter()'s "Heuristics" comment when num of 1556 // instructions is 3. 1557 if (AllowMixed && !Greedy) { 1558 assert(numInstructions == 3); 1559 1560 for (auto Opcode : Opcodes) { 1561 std::vector<unsigned> StartBits; 1562 std::vector<unsigned> EndBits; 1563 std::vector<uint64_t> FieldVals; 1564 insn_t Insn; 1565 1566 insnWithID(Insn, Opcode.EncodingID); 1567 1568 // Look for islands of undecoded bits of any instruction. 1569 if (getIslands(StartBits, EndBits, FieldVals, Insn) > 0) { 1570 // Found an instruction with island(s). Now just assign a filter. 1571 runSingleFilter(StartBits[0], EndBits[0] - StartBits[0] + 1, true); 1572 return true; 1573 } 1574 } 1575 } 1576 1577 unsigned BitIndex; 1578 1579 // We maintain BIT_WIDTH copies of the bitAttrs automaton. 1580 // The automaton consumes the corresponding bit from each 1581 // instruction. 1582 // 1583 // Input symbols: 0, 1, and _ (unset). 1584 // States: NONE, FILTERED, ALL_SET, ALL_UNSET, and MIXED. 1585 // Initial state: NONE. 1586 // 1587 // (NONE) ------- [01] -> (ALL_SET) 1588 // (NONE) ------- _ ----> (ALL_UNSET) 1589 // (ALL_SET) ---- [01] -> (ALL_SET) 1590 // (ALL_SET) ---- _ ----> (MIXED) 1591 // (ALL_UNSET) -- [01] -> (MIXED) 1592 // (ALL_UNSET) -- _ ----> (ALL_UNSET) 1593 // (MIXED) ------ . ----> (MIXED) 1594 // (FILTERED)---- . ----> (FILTERED) 1595 1596 std::vector<bitAttr_t> bitAttrs; 1597 1598 // FILTERED bit positions provide no entropy and are not worthy of pursuing. 1599 // Filter::recurse() set either BIT_TRUE or BIT_FALSE for each position. 1600 for (BitIndex = 0; BitIndex < BitWidth; ++BitIndex) 1601 if (FilterBitValues[BitIndex] == BIT_TRUE || 1602 FilterBitValues[BitIndex] == BIT_FALSE) 1603 bitAttrs.push_back(ATTR_FILTERED); 1604 else 1605 bitAttrs.push_back(ATTR_NONE); 1606 1607 for (unsigned InsnIndex = 0; InsnIndex < numInstructions; ++InsnIndex) { 1608 insn_t insn; 1609 1610 insnWithID(insn, Opcodes[InsnIndex].EncodingID); 1611 1612 for (BitIndex = 0; BitIndex < BitWidth; ++BitIndex) { 1613 switch (bitAttrs[BitIndex]) { 1614 case ATTR_NONE: 1615 if (insn[BitIndex] == BIT_UNSET) 1616 bitAttrs[BitIndex] = ATTR_ALL_UNSET; 1617 else 1618 bitAttrs[BitIndex] = ATTR_ALL_SET; 1619 break; 1620 case ATTR_ALL_SET: 1621 if (insn[BitIndex] == BIT_UNSET) 1622 bitAttrs[BitIndex] = ATTR_MIXED; 1623 break; 1624 case ATTR_ALL_UNSET: 1625 if (insn[BitIndex] != BIT_UNSET) 1626 bitAttrs[BitIndex] = ATTR_MIXED; 1627 break; 1628 case ATTR_MIXED: 1629 case ATTR_FILTERED: 1630 break; 1631 } 1632 } 1633 } 1634 1635 // The regionAttr automaton consumes the bitAttrs automatons' state, 1636 // lowest-to-highest. 1637 // 1638 // Input symbols: F(iltered), (all_)S(et), (all_)U(nset), M(ixed) 1639 // States: NONE, ALL_SET, MIXED 1640 // Initial state: NONE 1641 // 1642 // (NONE) ----- F --> (NONE) 1643 // (NONE) ----- S --> (ALL_SET) ; and set region start 1644 // (NONE) ----- U --> (NONE) 1645 // (NONE) ----- M --> (MIXED) ; and set region start 1646 // (ALL_SET) -- F --> (NONE) ; and report an ALL_SET region 1647 // (ALL_SET) -- S --> (ALL_SET) 1648 // (ALL_SET) -- U --> (NONE) ; and report an ALL_SET region 1649 // (ALL_SET) -- M --> (MIXED) ; and report an ALL_SET region 1650 // (MIXED) ---- F --> (NONE) ; and report a MIXED region 1651 // (MIXED) ---- S --> (ALL_SET) ; and report a MIXED region 1652 // (MIXED) ---- U --> (NONE) ; and report a MIXED region 1653 // (MIXED) ---- M --> (MIXED) 1654 1655 bitAttr_t RA = ATTR_NONE; 1656 unsigned StartBit = 0; 1657 1658 for (BitIndex = 0; BitIndex < BitWidth; ++BitIndex) { 1659 bitAttr_t bitAttr = bitAttrs[BitIndex]; 1660 1661 assert(bitAttr != ATTR_NONE && "Bit without attributes"); 1662 1663 switch (RA) { 1664 case ATTR_NONE: 1665 switch (bitAttr) { 1666 case ATTR_FILTERED: 1667 break; 1668 case ATTR_ALL_SET: 1669 StartBit = BitIndex; 1670 RA = ATTR_ALL_SET; 1671 break; 1672 case ATTR_ALL_UNSET: 1673 break; 1674 case ATTR_MIXED: 1675 StartBit = BitIndex; 1676 RA = ATTR_MIXED; 1677 break; 1678 default: 1679 llvm_unreachable("Unexpected bitAttr!"); 1680 } 1681 break; 1682 case ATTR_ALL_SET: 1683 switch (bitAttr) { 1684 case ATTR_FILTERED: 1685 reportRegion(RA, StartBit, BitIndex, AllowMixed); 1686 RA = ATTR_NONE; 1687 break; 1688 case ATTR_ALL_SET: 1689 break; 1690 case ATTR_ALL_UNSET: 1691 reportRegion(RA, StartBit, BitIndex, AllowMixed); 1692 RA = ATTR_NONE; 1693 break; 1694 case ATTR_MIXED: 1695 reportRegion(RA, StartBit, BitIndex, AllowMixed); 1696 StartBit = BitIndex; 1697 RA = ATTR_MIXED; 1698 break; 1699 default: 1700 llvm_unreachable("Unexpected bitAttr!"); 1701 } 1702 break; 1703 case ATTR_MIXED: 1704 switch (bitAttr) { 1705 case ATTR_FILTERED: 1706 reportRegion(RA, StartBit, BitIndex, AllowMixed); 1707 StartBit = BitIndex; 1708 RA = ATTR_NONE; 1709 break; 1710 case ATTR_ALL_SET: 1711 reportRegion(RA, StartBit, BitIndex, AllowMixed); 1712 StartBit = BitIndex; 1713 RA = ATTR_ALL_SET; 1714 break; 1715 case ATTR_ALL_UNSET: 1716 reportRegion(RA, StartBit, BitIndex, AllowMixed); 1717 RA = ATTR_NONE; 1718 break; 1719 case ATTR_MIXED: 1720 break; 1721 default: 1722 llvm_unreachable("Unexpected bitAttr!"); 1723 } 1724 break; 1725 case ATTR_ALL_UNSET: 1726 llvm_unreachable("regionAttr state machine has no ATTR_UNSET state"); 1727 case ATTR_FILTERED: 1728 llvm_unreachable("regionAttr state machine has no ATTR_FILTERED state"); 1729 } 1730 } 1731 1732 // At the end, if we're still in ALL_SET or MIXED states, report a region 1733 switch (RA) { 1734 case ATTR_NONE: 1735 break; 1736 case ATTR_FILTERED: 1737 break; 1738 case ATTR_ALL_SET: 1739 reportRegion(RA, StartBit, BitIndex, AllowMixed); 1740 break; 1741 case ATTR_ALL_UNSET: 1742 break; 1743 case ATTR_MIXED: 1744 reportRegion(RA, StartBit, BitIndex, AllowMixed); 1745 break; 1746 } 1747 1748 // We have finished with the filter processings. Now it's time to choose 1749 // the best performing filter. 1750 BestIndex = 0; 1751 bool AllUseless = true; 1752 unsigned BestScore = 0; 1753 1754 for (unsigned i = 0, e = Filters.size(); i != e; ++i) { 1755 unsigned Usefulness = Filters[i].usefulness(); 1756 1757 if (Usefulness) 1758 AllUseless = false; 1759 1760 if (Usefulness > BestScore) { 1761 BestIndex = i; 1762 BestScore = Usefulness; 1763 } 1764 } 1765 1766 if (!AllUseless) 1767 bestFilter().recurse(); 1768 1769 return !AllUseless; 1770 } // end of FilterChooser::filterProcessor(bool) 1771 1772 // Decides on the best configuration of filter(s) to use in order to decode 1773 // the instructions. A conflict of instructions may occur, in which case we 1774 // dump the conflict set to the standard error. 1775 void FilterChooser::doFilter() { 1776 unsigned Num = Opcodes.size(); 1777 assert(Num && "FilterChooser created with no instructions"); 1778 1779 // Try regions of consecutive known bit values first. 1780 if (filterProcessor(false)) 1781 return; 1782 1783 // Then regions of mixed bits (both known and unitialized bit values allowed). 1784 if (filterProcessor(true)) 1785 return; 1786 1787 // Heuristics to cope with conflict set {t2CMPrs, t2SUBSrr, t2SUBSrs} where 1788 // no single instruction for the maximum ATTR_MIXED region Inst{14-4} has a 1789 // well-known encoding pattern. In such case, we backtrack and scan for the 1790 // the very first consecutive ATTR_ALL_SET region and assign a filter to it. 1791 if (Num == 3 && filterProcessor(true, false)) 1792 return; 1793 1794 // If we come to here, the instruction decoding has failed. 1795 // Set the BestIndex to -1 to indicate so. 1796 BestIndex = -1; 1797 } 1798 1799 // emitTableEntries - Emit state machine entries to decode our share of 1800 // instructions. 1801 void FilterChooser::emitTableEntries(DecoderTableInfo &TableInfo) const { 1802 if (Opcodes.size() == 1) { 1803 // There is only one instruction in the set, which is great! 1804 // Call emitSingletonDecoder() to see whether there are any remaining 1805 // encodings bits. 1806 emitSingletonTableEntry(TableInfo, Opcodes[0]); 1807 return; 1808 } 1809 1810 // Choose the best filter to do the decodings! 1811 if (BestIndex != -1) { 1812 const Filter &Best = Filters[BestIndex]; 1813 if (Best.getNumFiltered() == 1) 1814 emitSingletonTableEntry(TableInfo, Best); 1815 else 1816 Best.emitTableEntry(TableInfo); 1817 return; 1818 } 1819 1820 // We don't know how to decode these instructions! Dump the 1821 // conflict set and bail. 1822 1823 // Print out useful conflict information for postmortem analysis. 1824 errs() << "Decoding Conflict:\n"; 1825 1826 dumpStack(errs(), "\t\t"); 1827 1828 for (auto Opcode : Opcodes) { 1829 errs() << '\t'; 1830 emitNameWithID(errs(), Opcode.EncodingID); 1831 errs() << " "; 1832 dumpBits( 1833 errs(), 1834 getBitsField(*AllInstructions[Opcode.EncodingID].EncodingDef, "Inst")); 1835 errs() << '\n'; 1836 } 1837 } 1838 1839 static std::string findOperandDecoderMethod(Record *Record) { 1840 std::string Decoder; 1841 1842 RecordVal *DecoderString = Record->getValue("DecoderMethod"); 1843 StringInit *String = 1844 DecoderString ? dyn_cast<StringInit>(DecoderString->getValue()) : nullptr; 1845 if (String) { 1846 Decoder = std::string(String->getValue()); 1847 if (!Decoder.empty()) 1848 return Decoder; 1849 } 1850 1851 if (Record->isSubClassOf("RegisterOperand")) 1852 Record = Record->getValueAsDef("RegClass"); 1853 1854 if (Record->isSubClassOf("RegisterClass")) { 1855 Decoder = "Decode" + Record->getName().str() + "RegisterClass"; 1856 } else if (Record->isSubClassOf("PointerLikeRegClass")) { 1857 Decoder = "DecodePointerLikeRegClass" + 1858 utostr(Record->getValueAsInt("RegClassKind")); 1859 } 1860 1861 return Decoder; 1862 } 1863 1864 OperandInfo getOpInfo(Record *TypeRecord) { 1865 std::string Decoder = findOperandDecoderMethod(TypeRecord); 1866 1867 RecordVal *HasCompleteDecoderVal = TypeRecord->getValue("hasCompleteDecoder"); 1868 BitInit *HasCompleteDecoderBit = 1869 HasCompleteDecoderVal 1870 ? dyn_cast<BitInit>(HasCompleteDecoderVal->getValue()) 1871 : nullptr; 1872 bool HasCompleteDecoder = 1873 HasCompleteDecoderBit ? HasCompleteDecoderBit->getValue() : true; 1874 1875 return OperandInfo(Decoder, HasCompleteDecoder); 1876 } 1877 1878 void parseVarLenInstOperand(const Record &Def, 1879 std::vector<OperandInfo> &Operands, 1880 const CodeGenInstruction &CGI) { 1881 1882 const RecordVal *RV = Def.getValue("Inst"); 1883 VarLenInst VLI(cast<DagInit>(RV->getValue()), RV); 1884 SmallVector<int> TiedTo; 1885 1886 for (unsigned Idx = 0; Idx < CGI.Operands.size(); ++Idx) { 1887 auto &Op = CGI.Operands[Idx]; 1888 if (Op.MIOperandInfo && Op.MIOperandInfo->getNumArgs() > 0) 1889 for (auto *Arg : Op.MIOperandInfo->getArgs()) 1890 Operands.push_back(getOpInfo(cast<DefInit>(Arg)->getDef())); 1891 else 1892 Operands.push_back(getOpInfo(Op.Rec)); 1893 1894 int TiedReg = Op.getTiedRegister(); 1895 TiedTo.push_back(-1); 1896 if (TiedReg != -1) { 1897 TiedTo[Idx] = TiedReg; 1898 TiedTo[TiedReg] = Idx; 1899 } 1900 } 1901 1902 unsigned CurrBitPos = 0; 1903 for (auto &EncodingSegment : VLI) { 1904 unsigned Offset = 0; 1905 StringRef OpName; 1906 1907 if (const StringInit *SI = dyn_cast<StringInit>(EncodingSegment.Value)) { 1908 OpName = SI->getValue(); 1909 } else if (const DagInit *DI = dyn_cast<DagInit>(EncodingSegment.Value)) { 1910 OpName = cast<StringInit>(DI->getArg(0))->getValue(); 1911 Offset = cast<IntInit>(DI->getArg(2))->getValue(); 1912 } 1913 1914 if (!OpName.empty()) { 1915 auto OpSubOpPair = 1916 const_cast<CodeGenInstruction &>(CGI).Operands.ParseOperandName( 1917 OpName); 1918 unsigned OpIdx = CGI.Operands.getFlattenedOperandNumber(OpSubOpPair); 1919 Operands[OpIdx].addField(CurrBitPos, EncodingSegment.BitWidth, Offset); 1920 if (!EncodingSegment.CustomDecoder.empty()) 1921 Operands[OpIdx].Decoder = EncodingSegment.CustomDecoder.str(); 1922 1923 int TiedReg = TiedTo[OpSubOpPair.first]; 1924 if (TiedReg != -1) { 1925 unsigned OpIdx = CGI.Operands.getFlattenedOperandNumber( 1926 std::pair(TiedReg, OpSubOpPair.second)); 1927 Operands[OpIdx].addField(CurrBitPos, EncodingSegment.BitWidth, Offset); 1928 } 1929 } 1930 1931 CurrBitPos += EncodingSegment.BitWidth; 1932 } 1933 } 1934 1935 static void debugDumpRecord(const Record &Rec) { 1936 // Dump the record, so we can see what's going on... 1937 std::string E; 1938 raw_string_ostream S(E); 1939 S << "Dumping record for previous error:\n"; 1940 S << Rec; 1941 PrintNote(E); 1942 } 1943 1944 /// For an operand field named OpName: populate OpInfo.InitValue with the 1945 /// constant-valued bit values, and OpInfo.Fields with the ranges of bits to 1946 /// insert from the decoded instruction. 1947 static void addOneOperandFields(const Record &EncodingDef, const BitsInit &Bits, 1948 std::map<std::string, std::string> &TiedNames, 1949 StringRef OpName, OperandInfo &OpInfo) { 1950 // Some bits of the operand may be required to be 1 depending on the 1951 // instruction's encoding. Collect those bits. 1952 if (const RecordVal *EncodedValue = EncodingDef.getValue(OpName)) 1953 if (const BitsInit *OpBits = dyn_cast<BitsInit>(EncodedValue->getValue())) 1954 for (unsigned I = 0; I < OpBits->getNumBits(); ++I) 1955 if (const BitInit *OpBit = dyn_cast<BitInit>(OpBits->getBit(I))) 1956 if (OpBit->getValue()) 1957 OpInfo.InitValue |= 1ULL << I; 1958 1959 for (unsigned I = 0, J = 0; I != Bits.getNumBits(); I = J) { 1960 VarInit *Var; 1961 unsigned Offset = 0; 1962 for (; J != Bits.getNumBits(); ++J) { 1963 VarBitInit *BJ = dyn_cast<VarBitInit>(Bits.getBit(J)); 1964 if (BJ) { 1965 Var = dyn_cast<VarInit>(BJ->getBitVar()); 1966 if (I == J) 1967 Offset = BJ->getBitNum(); 1968 else if (BJ->getBitNum() != Offset + J - I) 1969 break; 1970 } else { 1971 Var = dyn_cast<VarInit>(Bits.getBit(J)); 1972 } 1973 if (!Var || (Var->getName() != OpName && 1974 Var->getName() != TiedNames[std::string(OpName)])) 1975 break; 1976 } 1977 if (I == J) 1978 ++J; 1979 else 1980 OpInfo.addField(I, J - I, Offset); 1981 } 1982 } 1983 1984 static unsigned 1985 populateInstruction(CodeGenTarget &Target, const Record &EncodingDef, 1986 const CodeGenInstruction &CGI, unsigned Opc, 1987 std::map<unsigned, std::vector<OperandInfo>> &Operands, 1988 bool IsVarLenInst) { 1989 const Record &Def = *CGI.TheDef; 1990 // If all the bit positions are not specified; do not decode this instruction. 1991 // We are bound to fail! For proper disassembly, the well-known encoding bits 1992 // of the instruction must be fully specified. 1993 1994 BitsInit &Bits = getBitsField(EncodingDef, "Inst"); 1995 if (Bits.allInComplete()) 1996 return 0; 1997 1998 std::vector<OperandInfo> InsnOperands; 1999 2000 // If the instruction has specified a custom decoding hook, use that instead 2001 // of trying to auto-generate the decoder. 2002 StringRef InstDecoder = EncodingDef.getValueAsString("DecoderMethod"); 2003 if (InstDecoder != "") { 2004 bool HasCompleteInstDecoder = 2005 EncodingDef.getValueAsBit("hasCompleteDecoder"); 2006 InsnOperands.push_back( 2007 OperandInfo(std::string(InstDecoder), HasCompleteInstDecoder)); 2008 Operands[Opc] = InsnOperands; 2009 return Bits.getNumBits(); 2010 } 2011 2012 // Generate a description of the operand of the instruction that we know 2013 // how to decode automatically. 2014 // FIXME: We'll need to have a way to manually override this as needed. 2015 2016 // Gather the outputs/inputs of the instruction, so we can find their 2017 // positions in the encoding. This assumes for now that they appear in the 2018 // MCInst in the order that they're listed. 2019 std::vector<std::pair<Init *, StringRef>> InOutOperands; 2020 DagInit *Out = Def.getValueAsDag("OutOperandList"); 2021 DagInit *In = Def.getValueAsDag("InOperandList"); 2022 for (unsigned i = 0; i < Out->getNumArgs(); ++i) 2023 InOutOperands.push_back(std::pair(Out->getArg(i), Out->getArgNameStr(i))); 2024 for (unsigned i = 0; i < In->getNumArgs(); ++i) 2025 InOutOperands.push_back(std::pair(In->getArg(i), In->getArgNameStr(i))); 2026 2027 // Search for tied operands, so that we can correctly instantiate 2028 // operands that are not explicitly represented in the encoding. 2029 std::map<std::string, std::string> TiedNames; 2030 for (unsigned i = 0; i < CGI.Operands.size(); ++i) { 2031 auto &Op = CGI.Operands[i]; 2032 for (unsigned j = 0; j < Op.Constraints.size(); ++j) { 2033 const CGIOperandList::ConstraintInfo &CI = Op.Constraints[j]; 2034 if (CI.isTied()) { 2035 int tiedTo = CI.getTiedOperand(); 2036 std::pair<unsigned, unsigned> SO = 2037 CGI.Operands.getSubOperandNumber(tiedTo); 2038 std::string TiedName = CGI.Operands[SO.first].SubOpNames[SO.second]; 2039 if (TiedName.empty()) 2040 TiedName = CGI.Operands[SO.first].Name; 2041 std::string MyName = Op.SubOpNames[j]; 2042 if (MyName.empty()) 2043 MyName = Op.Name; 2044 2045 TiedNames[MyName] = TiedName; 2046 TiedNames[TiedName] = MyName; 2047 } 2048 } 2049 } 2050 2051 if (IsVarLenInst) { 2052 parseVarLenInstOperand(EncodingDef, InsnOperands, CGI); 2053 } else { 2054 // For each operand, see if we can figure out where it is encoded. 2055 for (const auto &Op : InOutOperands) { 2056 Init *OpInit = Op.first; 2057 StringRef OpName = Op.second; 2058 2059 // We're ready to find the instruction encoding locations for this 2060 // operand. 2061 2062 // First, find the operand type ("OpInit"), and sub-op names 2063 // ("SubArgDag") if present. 2064 DagInit *SubArgDag = dyn_cast<DagInit>(OpInit); 2065 if (SubArgDag) 2066 OpInit = SubArgDag->getOperator(); 2067 Record *OpTypeRec = cast<DefInit>(OpInit)->getDef(); 2068 // Lookup the sub-operands from the operand type record (note that only 2069 // Operand subclasses have MIOperandInfo, see CodeGenInstruction.cpp). 2070 DagInit *SubOps = OpTypeRec->isSubClassOf("Operand") 2071 ? OpTypeRec->getValueAsDag("MIOperandInfo") 2072 : nullptr; 2073 2074 // Lookup the decoder method and construct a new OperandInfo to hold our 2075 // result. 2076 OperandInfo OpInfo = getOpInfo(OpTypeRec); 2077 2078 // If we have named sub-operands... 2079 if (SubArgDag) { 2080 // Then there should not be a custom decoder specified on the top-level 2081 // type. 2082 if (!OpInfo.Decoder.empty()) { 2083 PrintError(EncodingDef.getLoc(), 2084 "DecoderEmitter: operand \"" + OpName + "\" has type \"" + 2085 OpInit->getAsString() + 2086 "\" with a custom DecoderMethod, but also named " 2087 "sub-operands."); 2088 continue; 2089 } 2090 2091 // Decode each of the sub-ops separately. 2092 assert(SubOps && SubArgDag->getNumArgs() == SubOps->getNumArgs()); 2093 for (unsigned i = 0; i < SubOps->getNumArgs(); ++i) { 2094 StringRef SubOpName = SubArgDag->getArgNameStr(i); 2095 OperandInfo SubOpInfo = 2096 getOpInfo(cast<DefInit>(SubOps->getArg(i))->getDef()); 2097 2098 addOneOperandFields(EncodingDef, Bits, TiedNames, SubOpName, 2099 SubOpInfo); 2100 InsnOperands.push_back(SubOpInfo); 2101 } 2102 continue; 2103 } 2104 2105 // Otherwise, if we have an operand with sub-operands, but they aren't 2106 // named... 2107 if (SubOps && OpInfo.Decoder.empty()) { 2108 // If it's a single sub-operand, and no custom decoder, use the decoder 2109 // from the one sub-operand. 2110 if (SubOps->getNumArgs() == 1) 2111 OpInfo = getOpInfo(cast<DefInit>(SubOps->getArg(0))->getDef()); 2112 2113 // If we have multiple sub-ops, there'd better have a custom 2114 // decoder. (Otherwise we don't know how to populate them properly...) 2115 if (SubOps->getNumArgs() > 1) { 2116 PrintError(EncodingDef.getLoc(), 2117 "DecoderEmitter: operand \"" + OpName + 2118 "\" uses MIOperandInfo with multiple ops, but doesn't " 2119 "have a custom decoder!"); 2120 debugDumpRecord(EncodingDef); 2121 continue; 2122 } 2123 } 2124 2125 addOneOperandFields(EncodingDef, Bits, TiedNames, OpName, OpInfo); 2126 // FIXME: it should be an error not to find a definition for a given 2127 // operand, rather than just failing to add it to the resulting 2128 // instruction! (This is a longstanding bug, which will be addressed in an 2129 // upcoming change.) 2130 if (OpInfo.numFields() > 0) 2131 InsnOperands.push_back(OpInfo); 2132 } 2133 } 2134 Operands[Opc] = InsnOperands; 2135 2136 #if 0 2137 LLVM_DEBUG({ 2138 // Dumps the instruction encoding bits. 2139 dumpBits(errs(), Bits); 2140 2141 errs() << '\n'; 2142 2143 // Dumps the list of operand info. 2144 for (unsigned i = 0, e = CGI.Operands.size(); i != e; ++i) { 2145 const CGIOperandList::OperandInfo &Info = CGI.Operands[i]; 2146 const std::string &OperandName = Info.Name; 2147 const Record &OperandDef = *Info.Rec; 2148 2149 errs() << "\t" << OperandName << " (" << OperandDef.getName() << ")\n"; 2150 } 2151 }); 2152 #endif 2153 2154 return Bits.getNumBits(); 2155 } 2156 2157 // emitFieldFromInstruction - Emit the templated helper function 2158 // fieldFromInstruction(). 2159 // On Windows we make sure that this function is not inlined when 2160 // using the VS compiler. It has a bug which causes the function 2161 // to be optimized out in some circumstances. See llvm.org/pr38292 2162 static void emitFieldFromInstruction(formatted_raw_ostream &OS) { 2163 OS << "// Helper functions for extracting fields from encoded instructions.\n" 2164 << "// InsnType must either be integral or an APInt-like object that " 2165 "must:\n" 2166 << "// * be default-constructible and copy-constructible\n" 2167 << "// * be constructible from an APInt (this can be private)\n" 2168 << "// * Support insertBits(bits, startBit, numBits)\n" 2169 << "// * Support extractBitsAsZExtValue(numBits, startBit)\n" 2170 << "// * Support the ~, &, ==, and != operators with other objects of " 2171 "the same type\n" 2172 << "// * Support the != and bitwise & with uint64_t\n" 2173 << "// * Support put (<<) to raw_ostream&\n" 2174 << "template <typename InsnType>\n" 2175 << "#if defined(_MSC_VER) && !defined(__clang__)\n" 2176 << "__declspec(noinline)\n" 2177 << "#endif\n" 2178 << "static std::enable_if_t<std::is_integral<InsnType>::value, InsnType>\n" 2179 << "fieldFromInstruction(const InsnType &insn, unsigned startBit,\n" 2180 << " unsigned numBits) {\n" 2181 << " assert(startBit + numBits <= 64 && \"Cannot support >64-bit " 2182 "extractions!\");\n" 2183 << " assert(startBit + numBits <= (sizeof(InsnType) * 8) &&\n" 2184 << " \"Instruction field out of bounds!\");\n" 2185 << " InsnType fieldMask;\n" 2186 << " if (numBits == sizeof(InsnType) * 8)\n" 2187 << " fieldMask = (InsnType)(-1LL);\n" 2188 << " else\n" 2189 << " fieldMask = (((InsnType)1 << numBits) - 1) << startBit;\n" 2190 << " return (insn & fieldMask) >> startBit;\n" 2191 << "}\n" 2192 << "\n" 2193 << "template <typename InsnType>\n" 2194 << "static std::enable_if_t<!std::is_integral<InsnType>::value, " 2195 "uint64_t>\n" 2196 << "fieldFromInstruction(const InsnType &insn, unsigned startBit,\n" 2197 << " unsigned numBits) {\n" 2198 << " return insn.extractBitsAsZExtValue(numBits, startBit);\n" 2199 << "}\n\n"; 2200 } 2201 2202 // emitInsertBits - Emit the templated helper function insertBits(). 2203 static void emitInsertBits(formatted_raw_ostream &OS) { 2204 OS << "// Helper function for inserting bits extracted from an encoded " 2205 "instruction into\n" 2206 << "// a field.\n" 2207 << "template <typename InsnType>\n" 2208 << "static std::enable_if_t<std::is_integral<InsnType>::value>\n" 2209 << "insertBits(InsnType &field, InsnType bits, unsigned startBit, " 2210 "unsigned numBits) {\n" 2211 << " assert(startBit + numBits <= sizeof field * 8);\n" 2212 << " field |= (InsnType)bits << startBit;\n" 2213 << "}\n" 2214 << "\n" 2215 << "template <typename InsnType>\n" 2216 << "static std::enable_if_t<!std::is_integral<InsnType>::value>\n" 2217 << "insertBits(InsnType &field, uint64_t bits, unsigned startBit, " 2218 "unsigned numBits) {\n" 2219 << " field.insertBits(bits, startBit, numBits);\n" 2220 << "}\n\n"; 2221 } 2222 2223 // emitDecodeInstruction - Emit the templated helper function 2224 // decodeInstruction(). 2225 static void emitDecodeInstruction(formatted_raw_ostream &OS, 2226 bool IsVarLenInst) { 2227 OS << "template <typename InsnType>\n" 2228 << "static DecodeStatus decodeInstruction(const uint8_t DecodeTable[], " 2229 "MCInst &MI,\n" 2230 << " InsnType insn, uint64_t " 2231 "Address,\n" 2232 << " const MCDisassembler *DisAsm,\n" 2233 << " const MCSubtargetInfo &STI"; 2234 if (IsVarLenInst) { 2235 OS << ",\n" 2236 << " llvm::function_ref<void(APInt " 2237 "&," 2238 << " uint64_t)> makeUp"; 2239 } 2240 OS << ") {\n" 2241 << " const FeatureBitset &Bits = STI.getFeatureBits();\n" 2242 << "\n" 2243 << " const uint8_t *Ptr = DecodeTable;\n" 2244 << " uint64_t CurFieldValue = 0;\n" 2245 << " DecodeStatus S = MCDisassembler::Success;\n" 2246 << " while (true) {\n" 2247 << " ptrdiff_t Loc = Ptr - DecodeTable;\n" 2248 << " switch (*Ptr) {\n" 2249 << " default:\n" 2250 << " errs() << Loc << \": Unexpected decode table opcode!\\n\";\n" 2251 << " return MCDisassembler::Fail;\n" 2252 << " case MCD::OPC_ExtractField: {\n" 2253 << " // Decode the start value.\n" 2254 << " unsigned DecodedLen;\n" 2255 << " unsigned Start = decodeULEB128(++Ptr, &DecodedLen);\n" 2256 << " Ptr += DecodedLen;\n" 2257 << " unsigned Len = *Ptr++;\n"; 2258 if (IsVarLenInst) 2259 OS << " makeUp(insn, Start + Len);\n"; 2260 OS << " CurFieldValue = fieldFromInstruction(insn, Start, Len);\n" 2261 << " LLVM_DEBUG(dbgs() << Loc << \": OPC_ExtractField(\" << Start << " 2262 "\", \"\n" 2263 << " << Len << \"): \" << CurFieldValue << \"\\n\");\n" 2264 << " break;\n" 2265 << " }\n" 2266 << " case MCD::OPC_FilterValue: {\n" 2267 << " // Decode the field value.\n" 2268 << " unsigned Len;\n" 2269 << " uint64_t Val = decodeULEB128(++Ptr, &Len);\n" 2270 << " Ptr += Len;\n" 2271 << " // NumToSkip is a plain 24-bit integer.\n" 2272 << " unsigned NumToSkip = *Ptr++;\n" 2273 << " NumToSkip |= (*Ptr++) << 8;\n" 2274 << " NumToSkip |= (*Ptr++) << 16;\n" 2275 << "\n" 2276 << " // Perform the filter operation.\n" 2277 << " if (Val != CurFieldValue)\n" 2278 << " Ptr += NumToSkip;\n" 2279 << " LLVM_DEBUG(dbgs() << Loc << \": OPC_FilterValue(\" << Val << " 2280 "\", \" << NumToSkip\n" 2281 << " << \"): \" << ((Val != CurFieldValue) ? \"FAIL:\" " 2282 ": \"PASS:\")\n" 2283 << " << \" continuing at \" << (Ptr - DecodeTable) << " 2284 "\"\\n\");\n" 2285 << "\n" 2286 << " break;\n" 2287 << " }\n" 2288 << " case MCD::OPC_CheckField: {\n" 2289 << " // Decode the start value.\n" 2290 << " unsigned Len;\n" 2291 << " unsigned Start = decodeULEB128(++Ptr, &Len);\n" 2292 << " Ptr += Len;\n" 2293 << " Len = *Ptr;\n"; 2294 if (IsVarLenInst) 2295 OS << " makeUp(insn, Start + Len);\n"; 2296 OS << " uint64_t FieldValue = fieldFromInstruction(insn, Start, Len);\n" 2297 << " // Decode the field value.\n" 2298 << " unsigned PtrLen = 0;\n" 2299 << " uint64_t ExpectedValue = decodeULEB128(++Ptr, &PtrLen);\n" 2300 << " Ptr += PtrLen;\n" 2301 << " // NumToSkip is a plain 24-bit integer.\n" 2302 << " unsigned NumToSkip = *Ptr++;\n" 2303 << " NumToSkip |= (*Ptr++) << 8;\n" 2304 << " NumToSkip |= (*Ptr++) << 16;\n" 2305 << "\n" 2306 << " // If the actual and expected values don't match, skip.\n" 2307 << " if (ExpectedValue != FieldValue)\n" 2308 << " Ptr += NumToSkip;\n" 2309 << " LLVM_DEBUG(dbgs() << Loc << \": OPC_CheckField(\" << Start << " 2310 "\", \"\n" 2311 << " << Len << \", \" << ExpectedValue << \", \" << " 2312 "NumToSkip\n" 2313 << " << \"): FieldValue = \" << FieldValue << \", " 2314 "ExpectedValue = \"\n" 2315 << " << ExpectedValue << \": \"\n" 2316 << " << ((ExpectedValue == FieldValue) ? \"PASS\\n\" : " 2317 "\"FAIL\\n\"));\n" 2318 << " break;\n" 2319 << " }\n" 2320 << " case MCD::OPC_CheckPredicate: {\n" 2321 << " unsigned Len;\n" 2322 << " // Decode the Predicate Index value.\n" 2323 << " unsigned PIdx = decodeULEB128(++Ptr, &Len);\n" 2324 << " Ptr += Len;\n" 2325 << " // NumToSkip is a plain 24-bit integer.\n" 2326 << " unsigned NumToSkip = *Ptr++;\n" 2327 << " NumToSkip |= (*Ptr++) << 8;\n" 2328 << " NumToSkip |= (*Ptr++) << 16;\n" 2329 << " // Check the predicate.\n" 2330 << " bool Pred;\n" 2331 << " if (!(Pred = checkDecoderPredicate(PIdx, Bits)))\n" 2332 << " Ptr += NumToSkip;\n" 2333 << " (void)Pred;\n" 2334 << " LLVM_DEBUG(dbgs() << Loc << \": OPC_CheckPredicate(\" << PIdx " 2335 "<< \"): \"\n" 2336 << " << (Pred ? \"PASS\\n\" : \"FAIL\\n\"));\n" 2337 << "\n" 2338 << " break;\n" 2339 << " }\n" 2340 << " case MCD::OPC_Decode: {\n" 2341 << " unsigned Len;\n" 2342 << " // Decode the Opcode value.\n" 2343 << " unsigned Opc = decodeULEB128(++Ptr, &Len);\n" 2344 << " Ptr += Len;\n" 2345 << " unsigned DecodeIdx = decodeULEB128(Ptr, &Len);\n" 2346 << " Ptr += Len;\n" 2347 << "\n" 2348 << " MI.clear();\n" 2349 << " MI.setOpcode(Opc);\n" 2350 << " bool DecodeComplete;\n"; 2351 if (IsVarLenInst) { 2352 OS << " Len = InstrLenTable[Opc];\n" 2353 << " makeUp(insn, Len);\n"; 2354 } 2355 OS << " S = decodeToMCInst(S, DecodeIdx, insn, MI, Address, DisAsm, " 2356 "DecodeComplete);\n" 2357 << " assert(DecodeComplete);\n" 2358 << "\n" 2359 << " LLVM_DEBUG(dbgs() << Loc << \": OPC_Decode: opcode \" << Opc\n" 2360 << " << \", using decoder \" << DecodeIdx << \": \"\n" 2361 << " << (S != MCDisassembler::Fail ? \"PASS\" : " 2362 "\"FAIL\") << \"\\n\");\n" 2363 << " return S;\n" 2364 << " }\n" 2365 << " case MCD::OPC_TryDecode: {\n" 2366 << " unsigned Len;\n" 2367 << " // Decode the Opcode value.\n" 2368 << " unsigned Opc = decodeULEB128(++Ptr, &Len);\n" 2369 << " Ptr += Len;\n" 2370 << " unsigned DecodeIdx = decodeULEB128(Ptr, &Len);\n" 2371 << " Ptr += Len;\n" 2372 << " // NumToSkip is a plain 24-bit integer.\n" 2373 << " unsigned NumToSkip = *Ptr++;\n" 2374 << " NumToSkip |= (*Ptr++) << 8;\n" 2375 << " NumToSkip |= (*Ptr++) << 16;\n" 2376 << "\n" 2377 << " // Perform the decode operation.\n" 2378 << " MCInst TmpMI;\n" 2379 << " TmpMI.setOpcode(Opc);\n" 2380 << " bool DecodeComplete;\n" 2381 << " S = decodeToMCInst(S, DecodeIdx, insn, TmpMI, Address, DisAsm, " 2382 "DecodeComplete);\n" 2383 << " LLVM_DEBUG(dbgs() << Loc << \": OPC_TryDecode: opcode \" << " 2384 "Opc\n" 2385 << " << \", using decoder \" << DecodeIdx << \": \");\n" 2386 << "\n" 2387 << " if (DecodeComplete) {\n" 2388 << " // Decoding complete.\n" 2389 << " LLVM_DEBUG(dbgs() << (S != MCDisassembler::Fail ? \"PASS\" : " 2390 "\"FAIL\") << \"\\n\");\n" 2391 << " MI = TmpMI;\n" 2392 << " return S;\n" 2393 << " } else {\n" 2394 << " assert(S == MCDisassembler::Fail);\n" 2395 << " // If the decoding was incomplete, skip.\n" 2396 << " Ptr += NumToSkip;\n" 2397 << " LLVM_DEBUG(dbgs() << \"FAIL: continuing at \" << (Ptr - " 2398 "DecodeTable) << \"\\n\");\n" 2399 << " // Reset decode status. This also drops a SoftFail status " 2400 "that could be\n" 2401 << " // set before the decode attempt.\n" 2402 << " S = MCDisassembler::Success;\n" 2403 << " }\n" 2404 << " break;\n" 2405 << " }\n" 2406 << " case MCD::OPC_SoftFail: {\n" 2407 << " // Decode the mask values.\n" 2408 << " unsigned Len;\n" 2409 << " uint64_t PositiveMask = decodeULEB128(++Ptr, &Len);\n" 2410 << " Ptr += Len;\n" 2411 << " uint64_t NegativeMask = decodeULEB128(Ptr, &Len);\n" 2412 << " Ptr += Len;\n" 2413 << " bool Fail = (insn & PositiveMask) != 0 || (~insn & " 2414 "NegativeMask) != 0;\n" 2415 << " if (Fail)\n" 2416 << " S = MCDisassembler::SoftFail;\n" 2417 << " LLVM_DEBUG(dbgs() << Loc << \": OPC_SoftFail: \" << (Fail ? " 2418 "\"FAIL\\n\" : \"PASS\\n\"));\n" 2419 << " break;\n" 2420 << " }\n" 2421 << " case MCD::OPC_Fail: {\n" 2422 << " LLVM_DEBUG(dbgs() << Loc << \": OPC_Fail\\n\");\n" 2423 << " return MCDisassembler::Fail;\n" 2424 << " }\n" 2425 << " }\n" 2426 << " }\n" 2427 << " llvm_unreachable(\"bogosity detected in disassembler state " 2428 "machine!\");\n" 2429 << "}\n\n"; 2430 } 2431 2432 // Helper to propagate SoftFail status. Returns false if the status is Fail; 2433 // callers are expected to early-exit in that condition. (Note, the '&' operator 2434 // is correct to propagate the values of this enum; see comment on 'enum 2435 // DecodeStatus'.) 2436 static void emitCheck(formatted_raw_ostream &OS) { 2437 OS << "static bool Check(DecodeStatus &Out, DecodeStatus In) {\n" 2438 << " Out = static_cast<DecodeStatus>(Out & In);\n" 2439 << " return Out != MCDisassembler::Fail;\n" 2440 << "}\n\n"; 2441 } 2442 2443 // Emits disassembler code for instruction decoding. 2444 void DecoderEmitter::run(raw_ostream &o) { 2445 formatted_raw_ostream OS(o); 2446 OS << "#include \"llvm/MC/MCInst.h\"\n"; 2447 OS << "#include \"llvm/MC/MCSubtargetInfo.h\"\n"; 2448 OS << "#include \"llvm/Support/DataTypes.h\"\n"; 2449 OS << "#include \"llvm/Support/Debug.h\"\n"; 2450 OS << "#include \"llvm/Support/LEB128.h\"\n"; 2451 OS << "#include \"llvm/Support/raw_ostream.h\"\n"; 2452 OS << "#include \"llvm/TargetParser/SubtargetFeature.h\"\n"; 2453 OS << "#include <assert.h>\n"; 2454 OS << '\n'; 2455 OS << "namespace llvm {\n\n"; 2456 2457 emitFieldFromInstruction(OS); 2458 emitInsertBits(OS); 2459 emitCheck(OS); 2460 2461 Target.reverseBitsForLittleEndianEncoding(); 2462 2463 // Parameterize the decoders based on namespace and instruction width. 2464 std::set<StringRef> HwModeNames; 2465 const auto &NumberedInstructions = Target.getInstructionsByEnumValue(); 2466 NumberedEncodings.reserve(NumberedInstructions.size()); 2467 // First, collect all HwModes referenced by the target. 2468 for (const auto &NumberedInstruction : NumberedInstructions) { 2469 if (const RecordVal *RV = 2470 NumberedInstruction->TheDef->getValue("EncodingInfos")) { 2471 if (auto *DI = dyn_cast_or_null<DefInit>(RV->getValue())) { 2472 const CodeGenHwModes &HWM = Target.getHwModes(); 2473 EncodingInfoByHwMode EBM(DI->getDef(), HWM); 2474 for (auto &KV : EBM) 2475 HwModeNames.insert(HWM.getMode(KV.first).Name); 2476 } 2477 } 2478 } 2479 2480 // If HwModeNames is empty, add the empty string so we always have one HwMode. 2481 if (HwModeNames.empty()) 2482 HwModeNames.insert(""); 2483 2484 for (const auto &NumberedInstruction : NumberedInstructions) { 2485 if (const RecordVal *RV = 2486 NumberedInstruction->TheDef->getValue("EncodingInfos")) { 2487 if (DefInit *DI = dyn_cast_or_null<DefInit>(RV->getValue())) { 2488 const CodeGenHwModes &HWM = Target.getHwModes(); 2489 EncodingInfoByHwMode EBM(DI->getDef(), HWM); 2490 for (auto &KV : EBM) { 2491 NumberedEncodings.emplace_back(KV.second, NumberedInstruction, 2492 HWM.getMode(KV.first).Name); 2493 HwModeNames.insert(HWM.getMode(KV.first).Name); 2494 } 2495 continue; 2496 } 2497 } 2498 // This instruction is encoded the same on all HwModes. Emit it for all 2499 // HwModes. 2500 for (StringRef HwModeName : HwModeNames) 2501 NumberedEncodings.emplace_back(NumberedInstruction->TheDef, 2502 NumberedInstruction, HwModeName); 2503 } 2504 for (const auto &NumberedAlias : 2505 RK.getAllDerivedDefinitions("AdditionalEncoding")) 2506 NumberedEncodings.emplace_back( 2507 NumberedAlias, 2508 &Target.getInstruction(NumberedAlias->getValueAsDef("AliasOf"))); 2509 2510 std::map<std::pair<std::string, unsigned>, std::vector<EncodingIDAndOpcode>> 2511 OpcMap; 2512 std::map<unsigned, std::vector<OperandInfo>> Operands; 2513 std::vector<unsigned> InstrLen; 2514 2515 bool IsVarLenInst = 2516 any_of(NumberedInstructions, [](const CodeGenInstruction *CGI) { 2517 RecordVal *RV = CGI->TheDef->getValue("Inst"); 2518 return RV && isa<DagInit>(RV->getValue()); 2519 }); 2520 unsigned MaxInstLen = 0; 2521 2522 for (unsigned i = 0; i < NumberedEncodings.size(); ++i) { 2523 const Record *EncodingDef = NumberedEncodings[i].EncodingDef; 2524 const CodeGenInstruction *Inst = NumberedEncodings[i].Inst; 2525 const Record *Def = Inst->TheDef; 2526 unsigned Size = EncodingDef->getValueAsInt("Size"); 2527 if (Def->getValueAsString("Namespace") == "TargetOpcode" || 2528 Def->getValueAsBit("isPseudo") || 2529 Def->getValueAsBit("isAsmParserOnly") || 2530 Def->getValueAsBit("isCodeGenOnly")) { 2531 NumEncodingsLackingDisasm++; 2532 continue; 2533 } 2534 2535 if (i < NumberedInstructions.size()) 2536 NumInstructions++; 2537 NumEncodings++; 2538 2539 if (!Size && !IsVarLenInst) 2540 continue; 2541 2542 if (IsVarLenInst) 2543 InstrLen.resize(NumberedInstructions.size(), 0); 2544 2545 if (unsigned Len = populateInstruction(Target, *EncodingDef, *Inst, i, 2546 Operands, IsVarLenInst)) { 2547 if (IsVarLenInst) { 2548 MaxInstLen = std::max(MaxInstLen, Len); 2549 InstrLen[i] = Len; 2550 } 2551 std::string DecoderNamespace = 2552 std::string(EncodingDef->getValueAsString("DecoderNamespace")); 2553 if (!NumberedEncodings[i].HwModeName.empty()) 2554 DecoderNamespace += 2555 std::string("_") + NumberedEncodings[i].HwModeName.str(); 2556 OpcMap[std::pair(DecoderNamespace, Size)].emplace_back( 2557 i, Target.getInstrIntValue(Def)); 2558 } else { 2559 NumEncodingsOmitted++; 2560 } 2561 } 2562 2563 DecoderTableInfo TableInfo; 2564 for (const auto &Opc : OpcMap) { 2565 // Emit the decoder for this namespace+width combination. 2566 ArrayRef<EncodingAndInst> NumberedEncodingsRef(NumberedEncodings.data(), 2567 NumberedEncodings.size()); 2568 FilterChooser FC(NumberedEncodingsRef, Opc.second, Operands, 2569 IsVarLenInst ? MaxInstLen : 8 * Opc.first.second, this); 2570 2571 // The decode table is cleared for each top level decoder function. The 2572 // predicates and decoders themselves, however, are shared across all 2573 // decoders to give more opportunities for uniqueing. 2574 TableInfo.Table.clear(); 2575 TableInfo.FixupStack.clear(); 2576 TableInfo.Table.reserve(16384); 2577 TableInfo.FixupStack.emplace_back(); 2578 FC.emitTableEntries(TableInfo); 2579 // Any NumToSkip fixups in the top level scope can resolve to the 2580 // OPC_Fail at the end of the table. 2581 assert(TableInfo.FixupStack.size() == 1 && "fixup stack phasing error!"); 2582 // Resolve any NumToSkip fixups in the current scope. 2583 resolveTableFixups(TableInfo.Table, TableInfo.FixupStack.back(), 2584 TableInfo.Table.size()); 2585 TableInfo.FixupStack.clear(); 2586 2587 TableInfo.Table.push_back(MCD::OPC_Fail); 2588 2589 // Print the table to the output stream. 2590 emitTable(OS, TableInfo.Table, 0, FC.getBitWidth(), Opc.first.first, 2591 Opc.second); 2592 } 2593 2594 // For variable instruction, we emit a instruction length table 2595 // to let the decoder know how long the instructions are. 2596 // You can see example usage in M68k's disassembler. 2597 if (IsVarLenInst) 2598 emitInstrLenTable(OS, InstrLen); 2599 // Emit the predicate function. 2600 emitPredicateFunction(OS, TableInfo.Predicates, 0); 2601 2602 // Emit the decoder function. 2603 emitDecoderFunction(OS, TableInfo.Decoders, 0); 2604 2605 // Emit the main entry point for the decoder, decodeInstruction(). 2606 emitDecodeInstruction(OS, IsVarLenInst); 2607 2608 OS << "\n} // end namespace llvm\n"; 2609 } 2610 2611 namespace llvm { 2612 2613 void EmitDecoder(RecordKeeper &RK, raw_ostream &OS, 2614 const std::string &PredicateNamespace) { 2615 DecoderEmitter(RK, PredicateNamespace).run(OS); 2616 } 2617 2618 } // end namespace llvm 2619