1 //===---------------- DecoderEmitter.cpp - Decoder Generator --------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // It contains the tablegen backend that emits the decoder functions for 10 // targets with fixed/variable length instruction set. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "CodeGenHwModes.h" 15 #include "CodeGenInstruction.h" 16 #include "CodeGenTarget.h" 17 #include "InfoByHwMode.h" 18 #include "TableGenBackends.h" 19 #include "VarLenCodeEmitterGen.h" 20 #include "llvm/ADT/APInt.h" 21 #include "llvm/ADT/ArrayRef.h" 22 #include "llvm/ADT/CachedHashString.h" 23 #include "llvm/ADT/STLExtras.h" 24 #include "llvm/ADT/SetVector.h" 25 #include "llvm/ADT/SmallString.h" 26 #include "llvm/ADT/Statistic.h" 27 #include "llvm/ADT/StringExtras.h" 28 #include "llvm/ADT/StringRef.h" 29 #include "llvm/MC/MCDecoderOps.h" 30 #include "llvm/Support/Casting.h" 31 #include "llvm/Support/Debug.h" 32 #include "llvm/Support/ErrorHandling.h" 33 #include "llvm/Support/FormattedStream.h" 34 #include "llvm/Support/LEB128.h" 35 #include "llvm/Support/raw_ostream.h" 36 #include "llvm/TableGen/Error.h" 37 #include "llvm/TableGen/Record.h" 38 #include <algorithm> 39 #include <cassert> 40 #include <cstddef> 41 #include <cstdint> 42 #include <map> 43 #include <memory> 44 #include <set> 45 #include <string> 46 #include <utility> 47 #include <vector> 48 49 using namespace llvm; 50 51 #define DEBUG_TYPE "decoder-emitter" 52 53 namespace { 54 55 STATISTIC(NumEncodings, "Number of encodings considered"); 56 STATISTIC(NumEncodingsLackingDisasm, 57 "Number of encodings without disassembler info"); 58 STATISTIC(NumInstructions, "Number of instructions considered"); 59 STATISTIC(NumEncodingsSupported, "Number of encodings supported"); 60 STATISTIC(NumEncodingsOmitted, "Number of encodings omitted"); 61 62 struct EncodingField { 63 unsigned Base, Width, Offset; 64 EncodingField(unsigned B, unsigned W, unsigned O) 65 : Base(B), Width(W), Offset(O) {} 66 }; 67 68 struct OperandInfo { 69 std::vector<EncodingField> Fields; 70 std::string Decoder; 71 bool HasCompleteDecoder; 72 uint64_t InitValue; 73 74 OperandInfo(std::string D, bool HCD) 75 : Decoder(std::move(D)), HasCompleteDecoder(HCD), InitValue(0) {} 76 77 void addField(unsigned Base, unsigned Width, unsigned Offset) { 78 Fields.push_back(EncodingField(Base, Width, Offset)); 79 } 80 81 unsigned numFields() const { return Fields.size(); } 82 83 typedef std::vector<EncodingField>::const_iterator const_iterator; 84 85 const_iterator begin() const { return Fields.begin(); } 86 const_iterator end() const { return Fields.end(); } 87 }; 88 89 typedef std::vector<uint8_t> DecoderTable; 90 typedef uint32_t DecoderFixup; 91 typedef std::vector<DecoderFixup> FixupList; 92 typedef std::vector<FixupList> FixupScopeList; 93 typedef SmallSetVector<CachedHashString, 16> PredicateSet; 94 typedef SmallSetVector<CachedHashString, 16> DecoderSet; 95 struct DecoderTableInfo { 96 DecoderTable Table; 97 FixupScopeList FixupStack; 98 PredicateSet Predicates; 99 DecoderSet Decoders; 100 }; 101 102 struct EncodingAndInst { 103 const Record *EncodingDef; 104 const CodeGenInstruction *Inst; 105 StringRef HwModeName; 106 107 EncodingAndInst(const Record *EncodingDef, const CodeGenInstruction *Inst, 108 StringRef HwModeName = "") 109 : EncodingDef(EncodingDef), Inst(Inst), HwModeName(HwModeName) {} 110 }; 111 112 struct EncodingIDAndOpcode { 113 unsigned EncodingID; 114 unsigned Opcode; 115 116 EncodingIDAndOpcode() : EncodingID(0), Opcode(0) {} 117 EncodingIDAndOpcode(unsigned EncodingID, unsigned Opcode) 118 : EncodingID(EncodingID), Opcode(Opcode) {} 119 }; 120 121 raw_ostream &operator<<(raw_ostream &OS, const EncodingAndInst &Value) { 122 if (Value.EncodingDef != Value.Inst->TheDef) 123 OS << Value.EncodingDef->getName() << ":"; 124 OS << Value.Inst->TheDef->getName(); 125 return OS; 126 } 127 128 class DecoderEmitter { 129 RecordKeeper &RK; 130 std::vector<EncodingAndInst> NumberedEncodings; 131 132 public: 133 DecoderEmitter(RecordKeeper &R, std::string PredicateNamespace) 134 : RK(R), Target(R), PredicateNamespace(std::move(PredicateNamespace)) {} 135 136 // Emit the decoder state machine table. 137 void emitTable(formatted_raw_ostream &o, DecoderTable &Table, 138 unsigned Indentation, unsigned BitWidth, 139 StringRef Namespace) const; 140 void emitInstrLenTable(formatted_raw_ostream &OS, 141 std::vector<unsigned> &InstrLen) const; 142 void emitPredicateFunction(formatted_raw_ostream &OS, 143 PredicateSet &Predicates, 144 unsigned Indentation) const; 145 void emitDecoderFunction(formatted_raw_ostream &OS, DecoderSet &Decoders, 146 unsigned Indentation) const; 147 148 // run - Output the code emitter 149 void run(raw_ostream &o); 150 151 private: 152 CodeGenTarget Target; 153 154 public: 155 std::string PredicateNamespace; 156 }; 157 158 } // end anonymous namespace 159 160 // The set (BIT_TRUE, BIT_FALSE, BIT_UNSET) represents a ternary logic system 161 // for a bit value. 162 // 163 // BIT_UNFILTERED is used as the init value for a filter position. It is used 164 // only for filter processings. 165 typedef enum { 166 BIT_TRUE, // '1' 167 BIT_FALSE, // '0' 168 BIT_UNSET, // '?' 169 BIT_UNFILTERED // unfiltered 170 } bit_value_t; 171 172 static bool ValueSet(bit_value_t V) { 173 return (V == BIT_TRUE || V == BIT_FALSE); 174 } 175 176 static bool ValueNotSet(bit_value_t V) { return (V == BIT_UNSET); } 177 178 static int Value(bit_value_t V) { 179 return ValueNotSet(V) ? -1 : (V == BIT_FALSE ? 0 : 1); 180 } 181 182 static bit_value_t bitFromBits(const BitsInit &bits, unsigned index) { 183 if (BitInit *bit = dyn_cast<BitInit>(bits.getBit(index))) 184 return bit->getValue() ? BIT_TRUE : BIT_FALSE; 185 186 // The bit is uninitialized. 187 return BIT_UNSET; 188 } 189 190 // Prints the bit value for each position. 191 static void dumpBits(raw_ostream &o, const BitsInit &bits) { 192 for (unsigned index = bits.getNumBits(); index > 0; --index) { 193 switch (bitFromBits(bits, index - 1)) { 194 case BIT_TRUE: 195 o << "1"; 196 break; 197 case BIT_FALSE: 198 o << "0"; 199 break; 200 case BIT_UNSET: 201 o << "_"; 202 break; 203 default: 204 llvm_unreachable("unexpected return value from bitFromBits"); 205 } 206 } 207 } 208 209 static BitsInit &getBitsField(const Record &def, StringRef str) { 210 const RecordVal *RV = def.getValue(str); 211 if (BitsInit *Bits = dyn_cast<BitsInit>(RV->getValue())) 212 return *Bits; 213 214 // variable length instruction 215 VarLenInst VLI = VarLenInst(cast<DagInit>(RV->getValue()), RV); 216 SmallVector<Init *, 16> Bits; 217 218 for (auto &SI : VLI) { 219 if (const BitsInit *BI = dyn_cast<BitsInit>(SI.Value)) { 220 for (unsigned Idx = 0U; Idx < BI->getNumBits(); ++Idx) { 221 Bits.push_back(BI->getBit(Idx)); 222 } 223 } else if (const BitInit *BI = dyn_cast<BitInit>(SI.Value)) { 224 Bits.push_back(const_cast<BitInit *>(BI)); 225 } else { 226 for (unsigned Idx = 0U; Idx < SI.BitWidth; ++Idx) 227 Bits.push_back(UnsetInit::get(def.getRecords())); 228 } 229 } 230 231 return *BitsInit::get(def.getRecords(), Bits); 232 } 233 234 // Representation of the instruction to work on. 235 typedef std::vector<bit_value_t> insn_t; 236 237 namespace { 238 239 static const uint64_t NO_FIXED_SEGMENTS_SENTINEL = -1ULL; 240 241 class FilterChooser; 242 243 /// Filter - Filter works with FilterChooser to produce the decoding tree for 244 /// the ISA. 245 /// 246 /// It is useful to think of a Filter as governing the switch stmts of the 247 /// decoding tree in a certain level. Each case stmt delegates to an inferior 248 /// FilterChooser to decide what further decoding logic to employ, or in another 249 /// words, what other remaining bits to look at. The FilterChooser eventually 250 /// chooses a best Filter to do its job. 251 /// 252 /// This recursive scheme ends when the number of Opcodes assigned to the 253 /// FilterChooser becomes 1 or if there is a conflict. A conflict happens when 254 /// the Filter/FilterChooser combo does not know how to distinguish among the 255 /// Opcodes assigned. 256 /// 257 /// An example of a conflict is 258 /// 259 /// Conflict: 260 /// 111101000.00........00010000.... 261 /// 111101000.00........0001........ 262 /// 1111010...00........0001........ 263 /// 1111010...00.................... 264 /// 1111010......................... 265 /// 1111............................ 266 /// ................................ 267 /// VST4q8a 111101000_00________00010000____ 268 /// VST4q8b 111101000_00________00010000____ 269 /// 270 /// The Debug output shows the path that the decoding tree follows to reach the 271 /// the conclusion that there is a conflict. VST4q8a is a vst4 to double-spaced 272 /// even registers, while VST4q8b is a vst4 to double-spaced odd registers. 273 /// 274 /// The encoding info in the .td files does not specify this meta information, 275 /// which could have been used by the decoder to resolve the conflict. The 276 /// decoder could try to decode the even/odd register numbering and assign to 277 /// VST4q8a or VST4q8b, but for the time being, the decoder chooses the "a" 278 /// version and return the Opcode since the two have the same Asm format string. 279 class Filter { 280 protected: 281 const FilterChooser 282 *Owner; // points to the FilterChooser who owns this filter 283 unsigned StartBit; // the starting bit position 284 unsigned NumBits; // number of bits to filter 285 bool Mixed; // a mixed region contains both set and unset bits 286 287 // Map of well-known segment value to the set of uid's with that value. 288 std::map<uint64_t, std::vector<EncodingIDAndOpcode>> FilteredInstructions; 289 290 // Set of uid's with non-constant segment values. 291 std::vector<EncodingIDAndOpcode> VariableInstructions; 292 293 // Map of well-known segment value to its delegate. 294 std::map<uint64_t, std::unique_ptr<const FilterChooser>> FilterChooserMap; 295 296 // Number of instructions which fall under FilteredInstructions category. 297 unsigned NumFiltered; 298 299 // Keeps track of the last opcode in the filtered bucket. 300 EncodingIDAndOpcode LastOpcFiltered; 301 302 public: 303 Filter(Filter &&f); 304 Filter(FilterChooser &owner, unsigned startBit, unsigned numBits, bool mixed); 305 306 ~Filter() = default; 307 308 unsigned getNumFiltered() const { return NumFiltered; } 309 310 EncodingIDAndOpcode getSingletonOpc() const { 311 assert(NumFiltered == 1); 312 return LastOpcFiltered; 313 } 314 315 // Return the filter chooser for the group of instructions without constant 316 // segment values. 317 const FilterChooser &getVariableFC() const { 318 assert(NumFiltered == 1); 319 assert(FilterChooserMap.size() == 1); 320 return *(FilterChooserMap.find(NO_FIXED_SEGMENTS_SENTINEL)->second); 321 } 322 323 // Divides the decoding task into sub tasks and delegates them to the 324 // inferior FilterChooser's. 325 // 326 // A special case arises when there's only one entry in the filtered 327 // instructions. In order to unambiguously decode the singleton, we need to 328 // match the remaining undecoded encoding bits against the singleton. 329 void recurse(); 330 331 // Emit table entries to decode instructions given a segment or segments of 332 // bits. 333 void emitTableEntry(DecoderTableInfo &TableInfo) const; 334 335 // Returns the number of fanout produced by the filter. More fanout implies 336 // the filter distinguishes more categories of instructions. 337 unsigned usefulness() const; 338 }; // end class Filter 339 340 } // end anonymous namespace 341 342 // These are states of our finite state machines used in FilterChooser's 343 // filterProcessor() which produces the filter candidates to use. 344 typedef enum { 345 ATTR_NONE, 346 ATTR_FILTERED, 347 ATTR_ALL_SET, 348 ATTR_ALL_UNSET, 349 ATTR_MIXED 350 } bitAttr_t; 351 352 /// FilterChooser - FilterChooser chooses the best filter among a set of Filters 353 /// in order to perform the decoding of instructions at the current level. 354 /// 355 /// Decoding proceeds from the top down. Based on the well-known encoding bits 356 /// of instructions available, FilterChooser builds up the possible Filters that 357 /// can further the task of decoding by distinguishing among the remaining 358 /// candidate instructions. 359 /// 360 /// Once a filter has been chosen, it is called upon to divide the decoding task 361 /// into sub-tasks and delegates them to its inferior FilterChoosers for further 362 /// processings. 363 /// 364 /// It is useful to think of a Filter as governing the switch stmts of the 365 /// decoding tree. And each case is delegated to an inferior FilterChooser to 366 /// decide what further remaining bits to look at. 367 namespace { 368 369 class FilterChooser { 370 protected: 371 friend class Filter; 372 373 // Vector of codegen instructions to choose our filter. 374 ArrayRef<EncodingAndInst> AllInstructions; 375 376 // Vector of uid's for this filter chooser to work on. 377 // The first member of the pair is the opcode id being decoded, the second is 378 // the opcode id that should be emitted. 379 const std::vector<EncodingIDAndOpcode> &Opcodes; 380 381 // Lookup table for the operand decoding of instructions. 382 const std::map<unsigned, std::vector<OperandInfo>> &Operands; 383 384 // Vector of candidate filters. 385 std::vector<Filter> Filters; 386 387 // Array of bit values passed down from our parent. 388 // Set to all BIT_UNFILTERED's for Parent == NULL. 389 std::vector<bit_value_t> FilterBitValues; 390 391 // Links to the FilterChooser above us in the decoding tree. 392 const FilterChooser *Parent; 393 394 // Index of the best filter from Filters. 395 int BestIndex; 396 397 // Width of instructions 398 unsigned BitWidth; 399 400 // Parent emitter 401 const DecoderEmitter *Emitter; 402 403 public: 404 FilterChooser(ArrayRef<EncodingAndInst> Insts, 405 const std::vector<EncodingIDAndOpcode> &IDs, 406 const std::map<unsigned, std::vector<OperandInfo>> &Ops, 407 unsigned BW, const DecoderEmitter *E) 408 : AllInstructions(Insts), Opcodes(IDs), Operands(Ops), 409 FilterBitValues(BW, BIT_UNFILTERED), Parent(nullptr), BestIndex(-1), 410 BitWidth(BW), Emitter(E) { 411 doFilter(); 412 } 413 414 FilterChooser(ArrayRef<EncodingAndInst> Insts, 415 const std::vector<EncodingIDAndOpcode> &IDs, 416 const std::map<unsigned, std::vector<OperandInfo>> &Ops, 417 const std::vector<bit_value_t> &ParentFilterBitValues, 418 const FilterChooser &parent) 419 : AllInstructions(Insts), Opcodes(IDs), Operands(Ops), 420 FilterBitValues(ParentFilterBitValues), Parent(&parent), BestIndex(-1), 421 BitWidth(parent.BitWidth), Emitter(parent.Emitter) { 422 doFilter(); 423 } 424 425 FilterChooser(const FilterChooser &) = delete; 426 void operator=(const FilterChooser &) = delete; 427 428 unsigned getBitWidth() const { return BitWidth; } 429 430 protected: 431 // Populates the insn given the uid. 432 void insnWithID(insn_t &Insn, unsigned Opcode) const { 433 BitsInit &Bits = getBitsField(*AllInstructions[Opcode].EncodingDef, "Inst"); 434 Insn.resize(BitWidth > Bits.getNumBits() ? BitWidth : Bits.getNumBits(), 435 BIT_UNSET); 436 // We may have a SoftFail bitmask, which specifies a mask where an encoding 437 // may differ from the value in "Inst" and yet still be valid, but the 438 // disassembler should return SoftFail instead of Success. 439 // 440 // This is used for marking UNPREDICTABLE instructions in the ARM world. 441 const RecordVal *RV = 442 AllInstructions[Opcode].EncodingDef->getValue("SoftFail"); 443 const BitsInit *SFBits = RV ? dyn_cast<BitsInit>(RV->getValue()) : nullptr; 444 for (unsigned i = 0; i < Bits.getNumBits(); ++i) { 445 if (SFBits && bitFromBits(*SFBits, i) == BIT_TRUE) 446 Insn[i] = BIT_UNSET; 447 else 448 Insn[i] = bitFromBits(Bits, i); 449 } 450 } 451 452 // Emit the name of the encoding/instruction pair. 453 void emitNameWithID(raw_ostream &OS, unsigned Opcode) const { 454 const Record *EncodingDef = AllInstructions[Opcode].EncodingDef; 455 const Record *InstDef = AllInstructions[Opcode].Inst->TheDef; 456 if (EncodingDef != InstDef) 457 OS << EncodingDef->getName() << ":"; 458 OS << InstDef->getName(); 459 } 460 461 // Populates the field of the insn given the start position and the number of 462 // consecutive bits to scan for. 463 // 464 // Returns false if there exists any uninitialized bit value in the range. 465 // Returns true, otherwise. 466 bool fieldFromInsn(uint64_t &Field, insn_t &Insn, unsigned StartBit, 467 unsigned NumBits) const; 468 469 /// dumpFilterArray - dumpFilterArray prints out debugging info for the given 470 /// filter array as a series of chars. 471 void dumpFilterArray(raw_ostream &o, 472 const std::vector<bit_value_t> &filter) const; 473 474 /// dumpStack - dumpStack traverses the filter chooser chain and calls 475 /// dumpFilterArray on each filter chooser up to the top level one. 476 void dumpStack(raw_ostream &o, const char *prefix) const; 477 478 Filter &bestFilter() { 479 assert(BestIndex != -1 && "BestIndex not set"); 480 return Filters[BestIndex]; 481 } 482 483 bool PositionFiltered(unsigned i) const { 484 return ValueSet(FilterBitValues[i]); 485 } 486 487 // Calculates the island(s) needed to decode the instruction. 488 // This returns a lit of undecoded bits of an instructions, for example, 489 // Inst{20} = 1 && Inst{3-0} == 0b1111 represents two islands of yet-to-be 490 // decoded bits in order to verify that the instruction matches the Opcode. 491 unsigned getIslands(std::vector<unsigned> &StartBits, 492 std::vector<unsigned> &EndBits, 493 std::vector<uint64_t> &FieldVals, 494 const insn_t &Insn) const; 495 496 // Emits code to check the Predicates member of an instruction are true. 497 // Returns true if predicate matches were emitted, false otherwise. 498 bool emitPredicateMatch(raw_ostream &o, unsigned &Indentation, 499 unsigned Opc) const; 500 bool emitPredicateMatchAux(const Init &Val, bool ParenIfBinOp, 501 raw_ostream &OS) const; 502 503 bool doesOpcodeNeedPredicate(unsigned Opc) const; 504 unsigned getPredicateIndex(DecoderTableInfo &TableInfo, StringRef P) const; 505 void emitPredicateTableEntry(DecoderTableInfo &TableInfo, unsigned Opc) const; 506 507 void emitSoftFailTableEntry(DecoderTableInfo &TableInfo, unsigned Opc) const; 508 509 // Emits table entries to decode the singleton. 510 void emitSingletonTableEntry(DecoderTableInfo &TableInfo, 511 EncodingIDAndOpcode Opc) const; 512 513 // Emits code to decode the singleton, and then to decode the rest. 514 void emitSingletonTableEntry(DecoderTableInfo &TableInfo, 515 const Filter &Best) const; 516 517 void emitBinaryParser(raw_ostream &o, unsigned &Indentation, 518 const OperandInfo &OpInfo, 519 bool &OpHasCompleteDecoder) const; 520 521 void emitDecoder(raw_ostream &OS, unsigned Indentation, unsigned Opc, 522 bool &HasCompleteDecoder) const; 523 unsigned getDecoderIndex(DecoderSet &Decoders, unsigned Opc, 524 bool &HasCompleteDecoder) const; 525 526 // Assign a single filter and run with it. 527 void runSingleFilter(unsigned startBit, unsigned numBit, bool mixed); 528 529 // reportRegion is a helper function for filterProcessor to mark a region as 530 // eligible for use as a filter region. 531 void reportRegion(bitAttr_t RA, unsigned StartBit, unsigned BitIndex, 532 bool AllowMixed); 533 534 // FilterProcessor scans the well-known encoding bits of the instructions and 535 // builds up a list of candidate filters. It chooses the best filter and 536 // recursively descends down the decoding tree. 537 bool filterProcessor(bool AllowMixed, bool Greedy = true); 538 539 // Decides on the best configuration of filter(s) to use in order to decode 540 // the instructions. A conflict of instructions may occur, in which case we 541 // dump the conflict set to the standard error. 542 void doFilter(); 543 544 public: 545 // emitTableEntries - Emit state machine entries to decode our share of 546 // instructions. 547 void emitTableEntries(DecoderTableInfo &TableInfo) const; 548 }; 549 550 } // end anonymous namespace 551 552 /////////////////////////// 553 // // 554 // Filter Implementation // 555 // // 556 /////////////////////////// 557 558 Filter::Filter(Filter &&f) 559 : Owner(f.Owner), StartBit(f.StartBit), NumBits(f.NumBits), Mixed(f.Mixed), 560 FilteredInstructions(std::move(f.FilteredInstructions)), 561 VariableInstructions(std::move(f.VariableInstructions)), 562 FilterChooserMap(std::move(f.FilterChooserMap)), 563 NumFiltered(f.NumFiltered), LastOpcFiltered(f.LastOpcFiltered) {} 564 565 Filter::Filter(FilterChooser &owner, unsigned startBit, unsigned numBits, 566 bool mixed) 567 : Owner(&owner), StartBit(startBit), NumBits(numBits), Mixed(mixed) { 568 assert(StartBit + NumBits - 1 < Owner->BitWidth); 569 570 NumFiltered = 0; 571 LastOpcFiltered = {0, 0}; 572 573 for (unsigned i = 0, e = Owner->Opcodes.size(); i != e; ++i) { 574 insn_t Insn; 575 576 // Populates the insn given the uid. 577 Owner->insnWithID(Insn, Owner->Opcodes[i].EncodingID); 578 579 uint64_t Field; 580 // Scans the segment for possibly well-specified encoding bits. 581 bool ok = Owner->fieldFromInsn(Field, Insn, StartBit, NumBits); 582 583 if (ok) { 584 // The encoding bits are well-known. Lets add the uid of the 585 // instruction into the bucket keyed off the constant field value. 586 LastOpcFiltered = Owner->Opcodes[i]; 587 FilteredInstructions[Field].push_back(LastOpcFiltered); 588 ++NumFiltered; 589 } else { 590 // Some of the encoding bit(s) are unspecified. This contributes to 591 // one additional member of "Variable" instructions. 592 VariableInstructions.push_back(Owner->Opcodes[i]); 593 } 594 } 595 596 assert((FilteredInstructions.size() + VariableInstructions.size() > 0) && 597 "Filter returns no instruction categories"); 598 } 599 600 // Divides the decoding task into sub tasks and delegates them to the 601 // inferior FilterChooser's. 602 // 603 // A special case arises when there's only one entry in the filtered 604 // instructions. In order to unambiguously decode the singleton, we need to 605 // match the remaining undecoded encoding bits against the singleton. 606 void Filter::recurse() { 607 // Starts by inheriting our parent filter chooser's filter bit values. 608 std::vector<bit_value_t> BitValueArray(Owner->FilterBitValues); 609 610 if (!VariableInstructions.empty()) { 611 // Conservatively marks each segment position as BIT_UNSET. 612 for (unsigned bitIndex = 0; bitIndex < NumBits; ++bitIndex) 613 BitValueArray[StartBit + bitIndex] = BIT_UNSET; 614 615 // Delegates to an inferior filter chooser for further processing on this 616 // group of instructions whose segment values are variable. 617 FilterChooserMap.insert(std::pair( 618 NO_FIXED_SEGMENTS_SENTINEL, 619 std::make_unique<FilterChooser>(Owner->AllInstructions, 620 VariableInstructions, Owner->Operands, 621 BitValueArray, *Owner))); 622 } 623 624 // No need to recurse for a singleton filtered instruction. 625 // See also Filter::emit*(). 626 if (getNumFiltered() == 1) { 627 assert(FilterChooserMap.size() == 1); 628 return; 629 } 630 631 // Otherwise, create sub choosers. 632 for (const auto &Inst : FilteredInstructions) { 633 634 // Marks all the segment positions with either BIT_TRUE or BIT_FALSE. 635 for (unsigned bitIndex = 0; bitIndex < NumBits; ++bitIndex) { 636 if (Inst.first & (1ULL << bitIndex)) 637 BitValueArray[StartBit + bitIndex] = BIT_TRUE; 638 else 639 BitValueArray[StartBit + bitIndex] = BIT_FALSE; 640 } 641 642 // Delegates to an inferior filter chooser for further processing on this 643 // category of instructions. 644 FilterChooserMap.insert( 645 std::pair(Inst.first, std::make_unique<FilterChooser>( 646 Owner->AllInstructions, Inst.second, 647 Owner->Operands, BitValueArray, *Owner))); 648 } 649 } 650 651 static void resolveTableFixups(DecoderTable &Table, const FixupList &Fixups, 652 uint32_t DestIdx) { 653 // Any NumToSkip fixups in the current scope can resolve to the 654 // current location. 655 for (FixupList::const_reverse_iterator I = Fixups.rbegin(), E = Fixups.rend(); 656 I != E; ++I) { 657 // Calculate the distance from the byte following the fixup entry byte 658 // to the destination. The Target is calculated from after the 16-bit 659 // NumToSkip entry itself, so subtract two from the displacement here 660 // to account for that. 661 uint32_t FixupIdx = *I; 662 uint32_t Delta = DestIdx - FixupIdx - 3; 663 // Our NumToSkip entries are 24-bits. Make sure our table isn't too 664 // big. 665 assert(Delta < (1u << 24)); 666 Table[FixupIdx] = (uint8_t)Delta; 667 Table[FixupIdx + 1] = (uint8_t)(Delta >> 8); 668 Table[FixupIdx + 2] = (uint8_t)(Delta >> 16); 669 } 670 } 671 672 // Emit table entries to decode instructions given a segment or segments 673 // of bits. 674 void Filter::emitTableEntry(DecoderTableInfo &TableInfo) const { 675 assert((NumBits < (1u << 8)) && "NumBits overflowed uint8 table entry!"); 676 TableInfo.Table.push_back(MCD::OPC_ExtractField); 677 678 SmallString<16> SBytes; 679 raw_svector_ostream S(SBytes); 680 encodeULEB128(StartBit, S); 681 TableInfo.Table.insert(TableInfo.Table.end(), SBytes.begin(), SBytes.end()); 682 TableInfo.Table.push_back(NumBits); 683 684 // A new filter entry begins a new scope for fixup resolution. 685 TableInfo.FixupStack.emplace_back(); 686 687 DecoderTable &Table = TableInfo.Table; 688 689 size_t PrevFilter = 0; 690 bool HasFallthrough = false; 691 for (auto &Filter : FilterChooserMap) { 692 // Field value -1 implies a non-empty set of variable instructions. 693 // See also recurse(). 694 if (Filter.first == NO_FIXED_SEGMENTS_SENTINEL) { 695 HasFallthrough = true; 696 697 // Each scope should always have at least one filter value to check 698 // for. 699 assert(PrevFilter != 0 && "empty filter set!"); 700 FixupList &CurScope = TableInfo.FixupStack.back(); 701 // Resolve any NumToSkip fixups in the current scope. 702 resolveTableFixups(Table, CurScope, Table.size()); 703 CurScope.clear(); 704 PrevFilter = 0; // Don't re-process the filter's fallthrough. 705 } else { 706 Table.push_back(MCD::OPC_FilterValue); 707 // Encode and emit the value to filter against. 708 uint8_t Buffer[16]; 709 unsigned Len = encodeULEB128(Filter.first, Buffer); 710 Table.insert(Table.end(), Buffer, Buffer + Len); 711 // Reserve space for the NumToSkip entry. We'll backpatch the value 712 // later. 713 PrevFilter = Table.size(); 714 Table.push_back(0); 715 Table.push_back(0); 716 Table.push_back(0); 717 } 718 719 // We arrive at a category of instructions with the same segment value. 720 // Now delegate to the sub filter chooser for further decodings. 721 // The case may fallthrough, which happens if the remaining well-known 722 // encoding bits do not match exactly. 723 Filter.second->emitTableEntries(TableInfo); 724 725 // Now that we've emitted the body of the handler, update the NumToSkip 726 // of the filter itself to be able to skip forward when false. Subtract 727 // two as to account for the width of the NumToSkip field itself. 728 if (PrevFilter) { 729 uint32_t NumToSkip = Table.size() - PrevFilter - 3; 730 assert(NumToSkip < (1u << 24) && 731 "disassembler decoding table too large!"); 732 Table[PrevFilter] = (uint8_t)NumToSkip; 733 Table[PrevFilter + 1] = (uint8_t)(NumToSkip >> 8); 734 Table[PrevFilter + 2] = (uint8_t)(NumToSkip >> 16); 735 } 736 } 737 738 // Any remaining unresolved fixups bubble up to the parent fixup scope. 739 assert(TableInfo.FixupStack.size() > 1 && "fixup stack underflow!"); 740 FixupScopeList::iterator Source = TableInfo.FixupStack.end() - 1; 741 FixupScopeList::iterator Dest = Source - 1; 742 llvm::append_range(*Dest, *Source); 743 TableInfo.FixupStack.pop_back(); 744 745 // If there is no fallthrough, then the final filter should get fixed 746 // up according to the enclosing scope rather than the current position. 747 if (!HasFallthrough) 748 TableInfo.FixupStack.back().push_back(PrevFilter); 749 } 750 751 // Returns the number of fanout produced by the filter. More fanout implies 752 // the filter distinguishes more categories of instructions. 753 unsigned Filter::usefulness() const { 754 if (!VariableInstructions.empty()) 755 return FilteredInstructions.size(); 756 else 757 return FilteredInstructions.size() + 1; 758 } 759 760 ////////////////////////////////// 761 // // 762 // Filterchooser Implementation // 763 // // 764 ////////////////////////////////// 765 766 // Emit the decoder state machine table. 767 void DecoderEmitter::emitTable(formatted_raw_ostream &OS, DecoderTable &Table, 768 unsigned Indentation, unsigned BitWidth, 769 StringRef Namespace) const { 770 OS.indent(Indentation) << "static const uint8_t DecoderTable" << Namespace 771 << BitWidth << "[] = {\n"; 772 773 Indentation += 2; 774 775 // Emit ULEB128 encoded value to OS, returning the number of bytes emitted. 776 auto emitULEB128 = [](DecoderTable::const_iterator I, 777 formatted_raw_ostream &OS) { 778 unsigned Len = 0; 779 while (*I >= 128) { 780 OS << (unsigned)*I++ << ", "; 781 Len++; 782 } 783 OS << (unsigned)*I++ << ", "; 784 return Len + 1; 785 }; 786 787 // Emit 24-bit numtoskip value to OS, returning the NumToSkip value. 788 auto emitNumToSkip = [](DecoderTable::const_iterator I, 789 formatted_raw_ostream &OS) { 790 uint8_t Byte = *I++; 791 uint32_t NumToSkip = Byte; 792 OS << (unsigned)Byte << ", "; 793 Byte = *I++; 794 OS << (unsigned)Byte << ", "; 795 NumToSkip |= Byte << 8; 796 Byte = *I++; 797 OS << utostr(Byte) << ", "; 798 NumToSkip |= Byte << 16; 799 return NumToSkip; 800 }; 801 802 // FIXME: We may be able to use the NumToSkip values to recover 803 // appropriate indentation levels. 804 DecoderTable::const_iterator I = Table.begin(); 805 DecoderTable::const_iterator E = Table.end(); 806 while (I != E) { 807 assert(I < E && "incomplete decode table entry!"); 808 809 uint64_t Pos = I - Table.begin(); 810 OS << "/* " << Pos << " */"; 811 OS.PadToColumn(12); 812 813 switch (*I) { 814 default: 815 PrintFatalError("invalid decode table opcode"); 816 case MCD::OPC_ExtractField: { 817 ++I; 818 OS.indent(Indentation) << "MCD::OPC_ExtractField, "; 819 820 // ULEB128 encoded start value. 821 const char *ErrMsg = nullptr; 822 unsigned Start = decodeULEB128(Table.data() + Pos + 1, nullptr, 823 Table.data() + Table.size(), &ErrMsg); 824 assert(ErrMsg == nullptr && "ULEB128 value too large!"); 825 I += emitULEB128(I, OS); 826 827 unsigned Len = *I++; 828 OS << Len << ", // Inst{"; 829 if (Len > 1) 830 OS << (Start + Len - 1) << "-"; 831 OS << Start << "} ...\n"; 832 break; 833 } 834 case MCD::OPC_FilterValue: { 835 ++I; 836 OS.indent(Indentation) << "MCD::OPC_FilterValue, "; 837 // The filter value is ULEB128 encoded. 838 I += emitULEB128(I, OS); 839 840 // 24-bit numtoskip value. 841 uint32_t NumToSkip = emitNumToSkip(I, OS); 842 I += 3; 843 OS << "// Skip to: " << ((I - Table.begin()) + NumToSkip) << "\n"; 844 break; 845 } 846 case MCD::OPC_CheckField: { 847 ++I; 848 OS.indent(Indentation) << "MCD::OPC_CheckField, "; 849 // ULEB128 encoded start value. 850 I += emitULEB128(I, OS); 851 // 8-bit length. 852 unsigned Len = *I++; 853 OS << Len << ", "; 854 // ULEB128 encoded field value. 855 I += emitULEB128(I, OS); 856 857 // 24-bit numtoskip value. 858 uint32_t NumToSkip = emitNumToSkip(I, OS); 859 I += 3; 860 OS << "// Skip to: " << ((I - Table.begin()) + NumToSkip) << "\n"; 861 break; 862 } 863 case MCD::OPC_CheckPredicate: { 864 ++I; 865 OS.indent(Indentation) << "MCD::OPC_CheckPredicate, "; 866 I += emitULEB128(I, OS); 867 868 // 24-bit numtoskip value. 869 uint32_t NumToSkip = emitNumToSkip(I, OS); 870 I += 3; 871 OS << "// Skip to: " << ((I - Table.begin()) + NumToSkip) << "\n"; 872 break; 873 } 874 case MCD::OPC_Decode: 875 case MCD::OPC_TryDecode: { 876 bool IsTry = *I == MCD::OPC_TryDecode; 877 ++I; 878 // Decode the Opcode value. 879 const char *ErrMsg = nullptr; 880 unsigned Opc = decodeULEB128(Table.data() + Pos + 1, nullptr, 881 Table.data() + Table.size(), &ErrMsg); 882 assert(ErrMsg == nullptr && "ULEB128 value too large!"); 883 884 OS.indent(Indentation) 885 << "MCD::OPC_" << (IsTry ? "Try" : "") << "Decode, "; 886 I += emitULEB128(I, OS); 887 888 // Decoder index. 889 I += emitULEB128(I, OS); 890 891 if (!IsTry) { 892 OS << "// Opcode: " << NumberedEncodings[Opc] << "\n"; 893 break; 894 } 895 896 // Fallthrough for OPC_TryDecode. 897 898 // 24-bit numtoskip value. 899 uint32_t NumToSkip = emitNumToSkip(I, OS); 900 I += 3; 901 902 OS << "// Opcode: " << NumberedEncodings[Opc] 903 << ", skip to: " << ((I - Table.begin()) + NumToSkip) << "\n"; 904 break; 905 } 906 case MCD::OPC_SoftFail: { 907 ++I; 908 OS.indent(Indentation) << "MCD::OPC_SoftFail"; 909 // Positive mask 910 uint64_t Value = 0; 911 unsigned Shift = 0; 912 do { 913 OS << ", " << (unsigned)*I; 914 Value += (*I & 0x7f) << Shift; 915 Shift += 7; 916 } while (*I++ >= 128); 917 if (Value > 127) { 918 OS << " /* 0x"; 919 OS.write_hex(Value); 920 OS << " */"; 921 } 922 // Negative mask 923 Value = 0; 924 Shift = 0; 925 do { 926 OS << ", " << (unsigned)*I; 927 Value += (*I & 0x7f) << Shift; 928 Shift += 7; 929 } while (*I++ >= 128); 930 if (Value > 127) { 931 OS << " /* 0x"; 932 OS.write_hex(Value); 933 OS << " */"; 934 } 935 OS << ",\n"; 936 break; 937 } 938 case MCD::OPC_Fail: { 939 ++I; 940 OS.indent(Indentation) << "MCD::OPC_Fail,\n"; 941 break; 942 } 943 } 944 } 945 OS.indent(Indentation) << "0\n"; 946 947 Indentation -= 2; 948 949 OS.indent(Indentation) << "};\n\n"; 950 } 951 952 void DecoderEmitter::emitInstrLenTable(formatted_raw_ostream &OS, 953 std::vector<unsigned> &InstrLen) const { 954 OS << "static const uint8_t InstrLenTable[] = {\n"; 955 for (unsigned &Len : InstrLen) { 956 OS << Len << ",\n"; 957 } 958 OS << "};\n\n"; 959 } 960 961 void DecoderEmitter::emitPredicateFunction(formatted_raw_ostream &OS, 962 PredicateSet &Predicates, 963 unsigned Indentation) const { 964 // The predicate function is just a big switch statement based on the 965 // input predicate index. 966 OS.indent(Indentation) << "static bool checkDecoderPredicate(unsigned Idx, " 967 << "const FeatureBitset &Bits) {\n"; 968 Indentation += 2; 969 if (!Predicates.empty()) { 970 OS.indent(Indentation) << "switch (Idx) {\n"; 971 OS.indent(Indentation) 972 << "default: llvm_unreachable(\"Invalid index!\");\n"; 973 unsigned Index = 0; 974 for (const auto &Predicate : Predicates) { 975 OS.indent(Indentation) << "case " << Index++ << ":\n"; 976 OS.indent(Indentation + 2) << "return (" << Predicate << ");\n"; 977 } 978 OS.indent(Indentation) << "}\n"; 979 } else { 980 // No case statement to emit 981 OS.indent(Indentation) << "llvm_unreachable(\"Invalid index!\");\n"; 982 } 983 Indentation -= 2; 984 OS.indent(Indentation) << "}\n\n"; 985 } 986 987 void DecoderEmitter::emitDecoderFunction(formatted_raw_ostream &OS, 988 DecoderSet &Decoders, 989 unsigned Indentation) const { 990 // The decoder function is just a big switch statement based on the 991 // input decoder index. 992 OS.indent(Indentation) << "template <typename InsnType>\n"; 993 OS.indent(Indentation) << "static DecodeStatus decodeToMCInst(DecodeStatus S," 994 << " unsigned Idx, InsnType insn, MCInst &MI,\n"; 995 OS.indent(Indentation) 996 << " uint64_t " 997 << "Address, const MCDisassembler *Decoder, bool &DecodeComplete) {\n"; 998 Indentation += 2; 999 OS.indent(Indentation) << "DecodeComplete = true;\n"; 1000 // TODO: When InsnType is large, using uint64_t limits all fields to 64 bits 1001 // It would be better for emitBinaryParser to use a 64-bit tmp whenever 1002 // possible but fall back to an InsnType-sized tmp for truly large fields. 1003 OS.indent(Indentation) << "using TmpType = " 1004 "std::conditional_t<std::is_integral<InsnType>::" 1005 "value, InsnType, uint64_t>;\n"; 1006 OS.indent(Indentation) << "TmpType tmp;\n"; 1007 OS.indent(Indentation) << "switch (Idx) {\n"; 1008 OS.indent(Indentation) << "default: llvm_unreachable(\"Invalid index!\");\n"; 1009 unsigned Index = 0; 1010 for (const auto &Decoder : Decoders) { 1011 OS.indent(Indentation) << "case " << Index++ << ":\n"; 1012 OS << Decoder; 1013 OS.indent(Indentation + 2) << "return S;\n"; 1014 } 1015 OS.indent(Indentation) << "}\n"; 1016 Indentation -= 2; 1017 OS.indent(Indentation) << "}\n\n"; 1018 } 1019 1020 // Populates the field of the insn given the start position and the number of 1021 // consecutive bits to scan for. 1022 // 1023 // Returns false if and on the first uninitialized bit value encountered. 1024 // Returns true, otherwise. 1025 bool FilterChooser::fieldFromInsn(uint64_t &Field, insn_t &Insn, 1026 unsigned StartBit, unsigned NumBits) const { 1027 Field = 0; 1028 1029 for (unsigned i = 0; i < NumBits; ++i) { 1030 if (Insn[StartBit + i] == BIT_UNSET) 1031 return false; 1032 1033 if (Insn[StartBit + i] == BIT_TRUE) 1034 Field = Field | (1ULL << i); 1035 } 1036 1037 return true; 1038 } 1039 1040 /// dumpFilterArray - dumpFilterArray prints out debugging info for the given 1041 /// filter array as a series of chars. 1042 void FilterChooser::dumpFilterArray( 1043 raw_ostream &o, const std::vector<bit_value_t> &filter) const { 1044 for (unsigned bitIndex = BitWidth; bitIndex > 0; bitIndex--) { 1045 switch (filter[bitIndex - 1]) { 1046 case BIT_UNFILTERED: 1047 o << "."; 1048 break; 1049 case BIT_UNSET: 1050 o << "_"; 1051 break; 1052 case BIT_TRUE: 1053 o << "1"; 1054 break; 1055 case BIT_FALSE: 1056 o << "0"; 1057 break; 1058 } 1059 } 1060 } 1061 1062 /// dumpStack - dumpStack traverses the filter chooser chain and calls 1063 /// dumpFilterArray on each filter chooser up to the top level one. 1064 void FilterChooser::dumpStack(raw_ostream &o, const char *prefix) const { 1065 const FilterChooser *current = this; 1066 1067 while (current) { 1068 o << prefix; 1069 dumpFilterArray(o, current->FilterBitValues); 1070 o << '\n'; 1071 current = current->Parent; 1072 } 1073 } 1074 1075 // Calculates the island(s) needed to decode the instruction. 1076 // This returns a list of undecoded bits of an instructions, for example, 1077 // Inst{20} = 1 && Inst{3-0} == 0b1111 represents two islands of yet-to-be 1078 // decoded bits in order to verify that the instruction matches the Opcode. 1079 unsigned FilterChooser::getIslands(std::vector<unsigned> &StartBits, 1080 std::vector<unsigned> &EndBits, 1081 std::vector<uint64_t> &FieldVals, 1082 const insn_t &Insn) const { 1083 unsigned Num, BitNo; 1084 Num = BitNo = 0; 1085 1086 uint64_t FieldVal = 0; 1087 1088 // 0: Init 1089 // 1: Water (the bit value does not affect decoding) 1090 // 2: Island (well-known bit value needed for decoding) 1091 int State = 0; 1092 1093 for (unsigned i = 0; i < BitWidth; ++i) { 1094 int64_t Val = Value(Insn[i]); 1095 bool Filtered = PositionFiltered(i); 1096 switch (State) { 1097 default: 1098 llvm_unreachable("Unreachable code!"); 1099 case 0: 1100 case 1: 1101 if (Filtered || Val == -1) 1102 State = 1; // Still in Water 1103 else { 1104 State = 2; // Into the Island 1105 BitNo = 0; 1106 StartBits.push_back(i); 1107 FieldVal = Val; 1108 } 1109 break; 1110 case 2: 1111 if (Filtered || Val == -1) { 1112 State = 1; // Into the Water 1113 EndBits.push_back(i - 1); 1114 FieldVals.push_back(FieldVal); 1115 ++Num; 1116 } else { 1117 State = 2; // Still in Island 1118 ++BitNo; 1119 FieldVal = FieldVal | Val << BitNo; 1120 } 1121 break; 1122 } 1123 } 1124 // If we are still in Island after the loop, do some housekeeping. 1125 if (State == 2) { 1126 EndBits.push_back(BitWidth - 1); 1127 FieldVals.push_back(FieldVal); 1128 ++Num; 1129 } 1130 1131 assert(StartBits.size() == Num && EndBits.size() == Num && 1132 FieldVals.size() == Num); 1133 return Num; 1134 } 1135 1136 void FilterChooser::emitBinaryParser(raw_ostream &o, unsigned &Indentation, 1137 const OperandInfo &OpInfo, 1138 bool &OpHasCompleteDecoder) const { 1139 const std::string &Decoder = OpInfo.Decoder; 1140 1141 bool UseInsertBits = OpInfo.numFields() != 1 || OpInfo.InitValue != 0; 1142 1143 if (UseInsertBits) { 1144 o.indent(Indentation) << "tmp = 0x"; 1145 o.write_hex(OpInfo.InitValue); 1146 o << ";\n"; 1147 } 1148 1149 for (const EncodingField &EF : OpInfo) { 1150 o.indent(Indentation); 1151 if (UseInsertBits) 1152 o << "insertBits(tmp, "; 1153 else 1154 o << "tmp = "; 1155 o << "fieldFromInstruction(insn, " << EF.Base << ", " << EF.Width << ')'; 1156 if (UseInsertBits) 1157 o << ", " << EF.Offset << ", " << EF.Width << ')'; 1158 else if (EF.Offset != 0) 1159 o << " << " << EF.Offset; 1160 o << ";\n"; 1161 } 1162 1163 if (Decoder != "") { 1164 OpHasCompleteDecoder = OpInfo.HasCompleteDecoder; 1165 o.indent(Indentation) << "if (!Check(S, " << Decoder 1166 << "(MI, tmp, Address, Decoder))) { " 1167 << (OpHasCompleteDecoder ? "" 1168 : "DecodeComplete = false; ") 1169 << "return MCDisassembler::Fail; }\n"; 1170 } else { 1171 OpHasCompleteDecoder = true; 1172 o.indent(Indentation) << "MI.addOperand(MCOperand::createImm(tmp));\n"; 1173 } 1174 } 1175 1176 void FilterChooser::emitDecoder(raw_ostream &OS, unsigned Indentation, 1177 unsigned Opc, bool &HasCompleteDecoder) const { 1178 HasCompleteDecoder = true; 1179 1180 for (const auto &Op : Operands.find(Opc)->second) { 1181 // If a custom instruction decoder was specified, use that. 1182 if (Op.numFields() == 0 && !Op.Decoder.empty()) { 1183 HasCompleteDecoder = Op.HasCompleteDecoder; 1184 OS.indent(Indentation) 1185 << "if (!Check(S, " << Op.Decoder 1186 << "(MI, insn, Address, Decoder))) { " 1187 << (HasCompleteDecoder ? "" : "DecodeComplete = false; ") 1188 << "return MCDisassembler::Fail; }\n"; 1189 break; 1190 } 1191 1192 bool OpHasCompleteDecoder; 1193 emitBinaryParser(OS, Indentation, Op, OpHasCompleteDecoder); 1194 if (!OpHasCompleteDecoder) 1195 HasCompleteDecoder = false; 1196 } 1197 } 1198 1199 unsigned FilterChooser::getDecoderIndex(DecoderSet &Decoders, unsigned Opc, 1200 bool &HasCompleteDecoder) const { 1201 // Build up the predicate string. 1202 SmallString<256> Decoder; 1203 // FIXME: emitDecoder() function can take a buffer directly rather than 1204 // a stream. 1205 raw_svector_ostream S(Decoder); 1206 unsigned I = 4; 1207 emitDecoder(S, I, Opc, HasCompleteDecoder); 1208 1209 // Using the full decoder string as the key value here is a bit 1210 // heavyweight, but is effective. If the string comparisons become a 1211 // performance concern, we can implement a mangling of the predicate 1212 // data easily enough with a map back to the actual string. That's 1213 // overkill for now, though. 1214 1215 // Make sure the predicate is in the table. 1216 Decoders.insert(CachedHashString(Decoder)); 1217 // Now figure out the index for when we write out the table. 1218 DecoderSet::const_iterator P = find(Decoders, Decoder.str()); 1219 return (unsigned)(P - Decoders.begin()); 1220 } 1221 1222 // If ParenIfBinOp is true, print a surrounding () if Val uses && or ||. 1223 bool FilterChooser::emitPredicateMatchAux(const Init &Val, bool ParenIfBinOp, 1224 raw_ostream &OS) const { 1225 if (auto *D = dyn_cast<DefInit>(&Val)) { 1226 if (!D->getDef()->isSubClassOf("SubtargetFeature")) 1227 return true; 1228 OS << "Bits[" << Emitter->PredicateNamespace << "::" << D->getAsString() 1229 << "]"; 1230 return false; 1231 } 1232 if (auto *D = dyn_cast<DagInit>(&Val)) { 1233 std::string Op = D->getOperator()->getAsString(); 1234 if (Op == "not" && D->getNumArgs() == 1) { 1235 OS << '!'; 1236 return emitPredicateMatchAux(*D->getArg(0), true, OS); 1237 } 1238 if ((Op == "any_of" || Op == "all_of") && D->getNumArgs() > 0) { 1239 bool Paren = D->getNumArgs() > 1 && std::exchange(ParenIfBinOp, true); 1240 if (Paren) 1241 OS << '('; 1242 ListSeparator LS(Op == "any_of" ? " || " : " && "); 1243 for (auto *Arg : D->getArgs()) { 1244 OS << LS; 1245 if (emitPredicateMatchAux(*Arg, ParenIfBinOp, OS)) 1246 return true; 1247 } 1248 if (Paren) 1249 OS << ')'; 1250 return false; 1251 } 1252 } 1253 return true; 1254 } 1255 1256 bool FilterChooser::emitPredicateMatch(raw_ostream &o, unsigned &Indentation, 1257 unsigned Opc) const { 1258 ListInit *Predicates = 1259 AllInstructions[Opc].EncodingDef->getValueAsListInit("Predicates"); 1260 bool IsFirstEmission = true; 1261 for (unsigned i = 0; i < Predicates->size(); ++i) { 1262 Record *Pred = Predicates->getElementAsRecord(i); 1263 if (!Pred->getValue("AssemblerMatcherPredicate")) 1264 continue; 1265 1266 if (!isa<DagInit>(Pred->getValue("AssemblerCondDag")->getValue())) 1267 continue; 1268 1269 if (!IsFirstEmission) 1270 o << " && "; 1271 if (emitPredicateMatchAux(*Pred->getValueAsDag("AssemblerCondDag"), 1272 Predicates->size() > 1, o)) 1273 PrintFatalError(Pred->getLoc(), "Invalid AssemblerCondDag!"); 1274 IsFirstEmission = false; 1275 } 1276 return !Predicates->empty(); 1277 } 1278 1279 bool FilterChooser::doesOpcodeNeedPredicate(unsigned Opc) const { 1280 ListInit *Predicates = 1281 AllInstructions[Opc].EncodingDef->getValueAsListInit("Predicates"); 1282 for (unsigned i = 0; i < Predicates->size(); ++i) { 1283 Record *Pred = Predicates->getElementAsRecord(i); 1284 if (!Pred->getValue("AssemblerMatcherPredicate")) 1285 continue; 1286 1287 if (isa<DagInit>(Pred->getValue("AssemblerCondDag")->getValue())) 1288 return true; 1289 } 1290 return false; 1291 } 1292 1293 unsigned FilterChooser::getPredicateIndex(DecoderTableInfo &TableInfo, 1294 StringRef Predicate) const { 1295 // Using the full predicate string as the key value here is a bit 1296 // heavyweight, but is effective. If the string comparisons become a 1297 // performance concern, we can implement a mangling of the predicate 1298 // data easily enough with a map back to the actual string. That's 1299 // overkill for now, though. 1300 1301 // Make sure the predicate is in the table. 1302 TableInfo.Predicates.insert(CachedHashString(Predicate)); 1303 // Now figure out the index for when we write out the table. 1304 PredicateSet::const_iterator P = find(TableInfo.Predicates, Predicate); 1305 return (unsigned)(P - TableInfo.Predicates.begin()); 1306 } 1307 1308 void FilterChooser::emitPredicateTableEntry(DecoderTableInfo &TableInfo, 1309 unsigned Opc) const { 1310 if (!doesOpcodeNeedPredicate(Opc)) 1311 return; 1312 1313 // Build up the predicate string. 1314 SmallString<256> Predicate; 1315 // FIXME: emitPredicateMatch() functions can take a buffer directly rather 1316 // than a stream. 1317 raw_svector_ostream PS(Predicate); 1318 unsigned I = 0; 1319 emitPredicateMatch(PS, I, Opc); 1320 1321 // Figure out the index into the predicate table for the predicate just 1322 // computed. 1323 unsigned PIdx = getPredicateIndex(TableInfo, PS.str()); 1324 SmallString<16> PBytes; 1325 raw_svector_ostream S(PBytes); 1326 encodeULEB128(PIdx, S); 1327 1328 TableInfo.Table.push_back(MCD::OPC_CheckPredicate); 1329 // Predicate index 1330 for (unsigned i = 0, e = PBytes.size(); i != e; ++i) 1331 TableInfo.Table.push_back(PBytes[i]); 1332 // Push location for NumToSkip backpatching. 1333 TableInfo.FixupStack.back().push_back(TableInfo.Table.size()); 1334 TableInfo.Table.push_back(0); 1335 TableInfo.Table.push_back(0); 1336 TableInfo.Table.push_back(0); 1337 } 1338 1339 void FilterChooser::emitSoftFailTableEntry(DecoderTableInfo &TableInfo, 1340 unsigned Opc) const { 1341 const RecordVal *RV = AllInstructions[Opc].EncodingDef->getValue("SoftFail"); 1342 BitsInit *SFBits = RV ? dyn_cast<BitsInit>(RV->getValue()) : nullptr; 1343 1344 if (!SFBits) 1345 return; 1346 BitsInit *InstBits = 1347 AllInstructions[Opc].EncodingDef->getValueAsBitsInit("Inst"); 1348 1349 APInt PositiveMask(BitWidth, 0ULL); 1350 APInt NegativeMask(BitWidth, 0ULL); 1351 for (unsigned i = 0; i < BitWidth; ++i) { 1352 bit_value_t B = bitFromBits(*SFBits, i); 1353 bit_value_t IB = bitFromBits(*InstBits, i); 1354 1355 if (B != BIT_TRUE) 1356 continue; 1357 1358 switch (IB) { 1359 case BIT_FALSE: 1360 // The bit is meant to be false, so emit a check to see if it is true. 1361 PositiveMask.setBit(i); 1362 break; 1363 case BIT_TRUE: 1364 // The bit is meant to be true, so emit a check to see if it is false. 1365 NegativeMask.setBit(i); 1366 break; 1367 default: 1368 // The bit is not set; this must be an error! 1369 errs() << "SoftFail Conflict: bit SoftFail{" << i << "} in " 1370 << AllInstructions[Opc] << " is set but Inst{" << i 1371 << "} is unset!\n" 1372 << " - You can only mark a bit as SoftFail if it is fully defined" 1373 << " (1/0 - not '?') in Inst\n"; 1374 return; 1375 } 1376 } 1377 1378 bool NeedPositiveMask = PositiveMask.getBoolValue(); 1379 bool NeedNegativeMask = NegativeMask.getBoolValue(); 1380 1381 if (!NeedPositiveMask && !NeedNegativeMask) 1382 return; 1383 1384 TableInfo.Table.push_back(MCD::OPC_SoftFail); 1385 1386 SmallString<16> MaskBytes; 1387 raw_svector_ostream S(MaskBytes); 1388 if (NeedPositiveMask) { 1389 encodeULEB128(PositiveMask.getZExtValue(), S); 1390 for (unsigned i = 0, e = MaskBytes.size(); i != e; ++i) 1391 TableInfo.Table.push_back(MaskBytes[i]); 1392 } else 1393 TableInfo.Table.push_back(0); 1394 if (NeedNegativeMask) { 1395 MaskBytes.clear(); 1396 encodeULEB128(NegativeMask.getZExtValue(), S); 1397 for (unsigned i = 0, e = MaskBytes.size(); i != e; ++i) 1398 TableInfo.Table.push_back(MaskBytes[i]); 1399 } else 1400 TableInfo.Table.push_back(0); 1401 } 1402 1403 // Emits table entries to decode the singleton. 1404 void FilterChooser::emitSingletonTableEntry(DecoderTableInfo &TableInfo, 1405 EncodingIDAndOpcode Opc) const { 1406 std::vector<unsigned> StartBits; 1407 std::vector<unsigned> EndBits; 1408 std::vector<uint64_t> FieldVals; 1409 insn_t Insn; 1410 insnWithID(Insn, Opc.EncodingID); 1411 1412 // Look for islands of undecoded bits of the singleton. 1413 getIslands(StartBits, EndBits, FieldVals, Insn); 1414 1415 unsigned Size = StartBits.size(); 1416 1417 // Emit the predicate table entry if one is needed. 1418 emitPredicateTableEntry(TableInfo, Opc.EncodingID); 1419 1420 // Check any additional encoding fields needed. 1421 for (unsigned I = Size; I != 0; --I) { 1422 unsigned NumBits = EndBits[I - 1] - StartBits[I - 1] + 1; 1423 assert((NumBits < (1u << 8)) && "NumBits overflowed uint8 table entry!"); 1424 TableInfo.Table.push_back(MCD::OPC_CheckField); 1425 uint8_t Buffer[16], *P; 1426 encodeULEB128(StartBits[I - 1], Buffer); 1427 for (P = Buffer; *P >= 128; ++P) 1428 TableInfo.Table.push_back(*P); 1429 TableInfo.Table.push_back(*P); 1430 TableInfo.Table.push_back(NumBits); 1431 encodeULEB128(FieldVals[I - 1], Buffer); 1432 for (P = Buffer; *P >= 128; ++P) 1433 TableInfo.Table.push_back(*P); 1434 TableInfo.Table.push_back(*P); 1435 // Push location for NumToSkip backpatching. 1436 TableInfo.FixupStack.back().push_back(TableInfo.Table.size()); 1437 // The fixup is always 24-bits, so go ahead and allocate the space 1438 // in the table so all our relative position calculations work OK even 1439 // before we fully resolve the real value here. 1440 TableInfo.Table.push_back(0); 1441 TableInfo.Table.push_back(0); 1442 TableInfo.Table.push_back(0); 1443 } 1444 1445 // Check for soft failure of the match. 1446 emitSoftFailTableEntry(TableInfo, Opc.EncodingID); 1447 1448 bool HasCompleteDecoder; 1449 unsigned DIdx = 1450 getDecoderIndex(TableInfo.Decoders, Opc.EncodingID, HasCompleteDecoder); 1451 1452 // Produce OPC_Decode or OPC_TryDecode opcode based on the information 1453 // whether the instruction decoder is complete or not. If it is complete 1454 // then it handles all possible values of remaining variable/unfiltered bits 1455 // and for any value can determine if the bitpattern is a valid instruction 1456 // or not. This means OPC_Decode will be the final step in the decoding 1457 // process. If it is not complete, then the Fail return code from the 1458 // decoder method indicates that additional processing should be done to see 1459 // if there is any other instruction that also matches the bitpattern and 1460 // can decode it. 1461 TableInfo.Table.push_back(HasCompleteDecoder ? MCD::OPC_Decode 1462 : MCD::OPC_TryDecode); 1463 NumEncodingsSupported++; 1464 uint8_t Buffer[16], *p; 1465 encodeULEB128(Opc.Opcode, Buffer); 1466 for (p = Buffer; *p >= 128; ++p) 1467 TableInfo.Table.push_back(*p); 1468 TableInfo.Table.push_back(*p); 1469 1470 SmallString<16> Bytes; 1471 raw_svector_ostream S(Bytes); 1472 encodeULEB128(DIdx, S); 1473 1474 // Decoder index 1475 for (unsigned i = 0, e = Bytes.size(); i != e; ++i) 1476 TableInfo.Table.push_back(Bytes[i]); 1477 1478 if (!HasCompleteDecoder) { 1479 // Push location for NumToSkip backpatching. 1480 TableInfo.FixupStack.back().push_back(TableInfo.Table.size()); 1481 // Allocate the space for the fixup. 1482 TableInfo.Table.push_back(0); 1483 TableInfo.Table.push_back(0); 1484 TableInfo.Table.push_back(0); 1485 } 1486 } 1487 1488 // Emits table entries to decode the singleton, and then to decode the rest. 1489 void FilterChooser::emitSingletonTableEntry(DecoderTableInfo &TableInfo, 1490 const Filter &Best) const { 1491 EncodingIDAndOpcode Opc = Best.getSingletonOpc(); 1492 1493 // complex singletons need predicate checks from the first singleton 1494 // to refer forward to the variable filterchooser that follows. 1495 TableInfo.FixupStack.emplace_back(); 1496 1497 emitSingletonTableEntry(TableInfo, Opc); 1498 1499 resolveTableFixups(TableInfo.Table, TableInfo.FixupStack.back(), 1500 TableInfo.Table.size()); 1501 TableInfo.FixupStack.pop_back(); 1502 1503 Best.getVariableFC().emitTableEntries(TableInfo); 1504 } 1505 1506 // Assign a single filter and run with it. Top level API client can initialize 1507 // with a single filter to start the filtering process. 1508 void FilterChooser::runSingleFilter(unsigned startBit, unsigned numBit, 1509 bool mixed) { 1510 Filters.clear(); 1511 Filters.emplace_back(*this, startBit, numBit, true); 1512 BestIndex = 0; // Sole Filter instance to choose from. 1513 bestFilter().recurse(); 1514 } 1515 1516 // reportRegion is a helper function for filterProcessor to mark a region as 1517 // eligible for use as a filter region. 1518 void FilterChooser::reportRegion(bitAttr_t RA, unsigned StartBit, 1519 unsigned BitIndex, bool AllowMixed) { 1520 if (RA == ATTR_MIXED && AllowMixed) 1521 Filters.emplace_back(*this, StartBit, BitIndex - StartBit, true); 1522 else if (RA == ATTR_ALL_SET && !AllowMixed) 1523 Filters.emplace_back(*this, StartBit, BitIndex - StartBit, false); 1524 } 1525 1526 // FilterProcessor scans the well-known encoding bits of the instructions and 1527 // builds up a list of candidate filters. It chooses the best filter and 1528 // recursively descends down the decoding tree. 1529 bool FilterChooser::filterProcessor(bool AllowMixed, bool Greedy) { 1530 Filters.clear(); 1531 BestIndex = -1; 1532 unsigned numInstructions = Opcodes.size(); 1533 1534 assert(numInstructions && "Filter created with no instructions"); 1535 1536 // No further filtering is necessary. 1537 if (numInstructions == 1) 1538 return true; 1539 1540 // Heuristics. See also doFilter()'s "Heuristics" comment when num of 1541 // instructions is 3. 1542 if (AllowMixed && !Greedy) { 1543 assert(numInstructions == 3); 1544 1545 for (auto Opcode : Opcodes) { 1546 std::vector<unsigned> StartBits; 1547 std::vector<unsigned> EndBits; 1548 std::vector<uint64_t> FieldVals; 1549 insn_t Insn; 1550 1551 insnWithID(Insn, Opcode.EncodingID); 1552 1553 // Look for islands of undecoded bits of any instruction. 1554 if (getIslands(StartBits, EndBits, FieldVals, Insn) > 0) { 1555 // Found an instruction with island(s). Now just assign a filter. 1556 runSingleFilter(StartBits[0], EndBits[0] - StartBits[0] + 1, true); 1557 return true; 1558 } 1559 } 1560 } 1561 1562 unsigned BitIndex; 1563 1564 // We maintain BIT_WIDTH copies of the bitAttrs automaton. 1565 // The automaton consumes the corresponding bit from each 1566 // instruction. 1567 // 1568 // Input symbols: 0, 1, and _ (unset). 1569 // States: NONE, FILTERED, ALL_SET, ALL_UNSET, and MIXED. 1570 // Initial state: NONE. 1571 // 1572 // (NONE) ------- [01] -> (ALL_SET) 1573 // (NONE) ------- _ ----> (ALL_UNSET) 1574 // (ALL_SET) ---- [01] -> (ALL_SET) 1575 // (ALL_SET) ---- _ ----> (MIXED) 1576 // (ALL_UNSET) -- [01] -> (MIXED) 1577 // (ALL_UNSET) -- _ ----> (ALL_UNSET) 1578 // (MIXED) ------ . ----> (MIXED) 1579 // (FILTERED)---- . ----> (FILTERED) 1580 1581 std::vector<bitAttr_t> bitAttrs; 1582 1583 // FILTERED bit positions provide no entropy and are not worthy of pursuing. 1584 // Filter::recurse() set either BIT_TRUE or BIT_FALSE for each position. 1585 for (BitIndex = 0; BitIndex < BitWidth; ++BitIndex) 1586 if (FilterBitValues[BitIndex] == BIT_TRUE || 1587 FilterBitValues[BitIndex] == BIT_FALSE) 1588 bitAttrs.push_back(ATTR_FILTERED); 1589 else 1590 bitAttrs.push_back(ATTR_NONE); 1591 1592 for (unsigned InsnIndex = 0; InsnIndex < numInstructions; ++InsnIndex) { 1593 insn_t insn; 1594 1595 insnWithID(insn, Opcodes[InsnIndex].EncodingID); 1596 1597 for (BitIndex = 0; BitIndex < BitWidth; ++BitIndex) { 1598 switch (bitAttrs[BitIndex]) { 1599 case ATTR_NONE: 1600 if (insn[BitIndex] == BIT_UNSET) 1601 bitAttrs[BitIndex] = ATTR_ALL_UNSET; 1602 else 1603 bitAttrs[BitIndex] = ATTR_ALL_SET; 1604 break; 1605 case ATTR_ALL_SET: 1606 if (insn[BitIndex] == BIT_UNSET) 1607 bitAttrs[BitIndex] = ATTR_MIXED; 1608 break; 1609 case ATTR_ALL_UNSET: 1610 if (insn[BitIndex] != BIT_UNSET) 1611 bitAttrs[BitIndex] = ATTR_MIXED; 1612 break; 1613 case ATTR_MIXED: 1614 case ATTR_FILTERED: 1615 break; 1616 } 1617 } 1618 } 1619 1620 // The regionAttr automaton consumes the bitAttrs automatons' state, 1621 // lowest-to-highest. 1622 // 1623 // Input symbols: F(iltered), (all_)S(et), (all_)U(nset), M(ixed) 1624 // States: NONE, ALL_SET, MIXED 1625 // Initial state: NONE 1626 // 1627 // (NONE) ----- F --> (NONE) 1628 // (NONE) ----- S --> (ALL_SET) ; and set region start 1629 // (NONE) ----- U --> (NONE) 1630 // (NONE) ----- M --> (MIXED) ; and set region start 1631 // (ALL_SET) -- F --> (NONE) ; and report an ALL_SET region 1632 // (ALL_SET) -- S --> (ALL_SET) 1633 // (ALL_SET) -- U --> (NONE) ; and report an ALL_SET region 1634 // (ALL_SET) -- M --> (MIXED) ; and report an ALL_SET region 1635 // (MIXED) ---- F --> (NONE) ; and report a MIXED region 1636 // (MIXED) ---- S --> (ALL_SET) ; and report a MIXED region 1637 // (MIXED) ---- U --> (NONE) ; and report a MIXED region 1638 // (MIXED) ---- M --> (MIXED) 1639 1640 bitAttr_t RA = ATTR_NONE; 1641 unsigned StartBit = 0; 1642 1643 for (BitIndex = 0; BitIndex < BitWidth; ++BitIndex) { 1644 bitAttr_t bitAttr = bitAttrs[BitIndex]; 1645 1646 assert(bitAttr != ATTR_NONE && "Bit without attributes"); 1647 1648 switch (RA) { 1649 case ATTR_NONE: 1650 switch (bitAttr) { 1651 case ATTR_FILTERED: 1652 break; 1653 case ATTR_ALL_SET: 1654 StartBit = BitIndex; 1655 RA = ATTR_ALL_SET; 1656 break; 1657 case ATTR_ALL_UNSET: 1658 break; 1659 case ATTR_MIXED: 1660 StartBit = BitIndex; 1661 RA = ATTR_MIXED; 1662 break; 1663 default: 1664 llvm_unreachable("Unexpected bitAttr!"); 1665 } 1666 break; 1667 case ATTR_ALL_SET: 1668 switch (bitAttr) { 1669 case ATTR_FILTERED: 1670 reportRegion(RA, StartBit, BitIndex, AllowMixed); 1671 RA = ATTR_NONE; 1672 break; 1673 case ATTR_ALL_SET: 1674 break; 1675 case ATTR_ALL_UNSET: 1676 reportRegion(RA, StartBit, BitIndex, AllowMixed); 1677 RA = ATTR_NONE; 1678 break; 1679 case ATTR_MIXED: 1680 reportRegion(RA, StartBit, BitIndex, AllowMixed); 1681 StartBit = BitIndex; 1682 RA = ATTR_MIXED; 1683 break; 1684 default: 1685 llvm_unreachable("Unexpected bitAttr!"); 1686 } 1687 break; 1688 case ATTR_MIXED: 1689 switch (bitAttr) { 1690 case ATTR_FILTERED: 1691 reportRegion(RA, StartBit, BitIndex, AllowMixed); 1692 StartBit = BitIndex; 1693 RA = ATTR_NONE; 1694 break; 1695 case ATTR_ALL_SET: 1696 reportRegion(RA, StartBit, BitIndex, AllowMixed); 1697 StartBit = BitIndex; 1698 RA = ATTR_ALL_SET; 1699 break; 1700 case ATTR_ALL_UNSET: 1701 reportRegion(RA, StartBit, BitIndex, AllowMixed); 1702 RA = ATTR_NONE; 1703 break; 1704 case ATTR_MIXED: 1705 break; 1706 default: 1707 llvm_unreachable("Unexpected bitAttr!"); 1708 } 1709 break; 1710 case ATTR_ALL_UNSET: 1711 llvm_unreachable("regionAttr state machine has no ATTR_UNSET state"); 1712 case ATTR_FILTERED: 1713 llvm_unreachable("regionAttr state machine has no ATTR_FILTERED state"); 1714 } 1715 } 1716 1717 // At the end, if we're still in ALL_SET or MIXED states, report a region 1718 switch (RA) { 1719 case ATTR_NONE: 1720 break; 1721 case ATTR_FILTERED: 1722 break; 1723 case ATTR_ALL_SET: 1724 reportRegion(RA, StartBit, BitIndex, AllowMixed); 1725 break; 1726 case ATTR_ALL_UNSET: 1727 break; 1728 case ATTR_MIXED: 1729 reportRegion(RA, StartBit, BitIndex, AllowMixed); 1730 break; 1731 } 1732 1733 // We have finished with the filter processings. Now it's time to choose 1734 // the best performing filter. 1735 BestIndex = 0; 1736 bool AllUseless = true; 1737 unsigned BestScore = 0; 1738 1739 for (unsigned i = 0, e = Filters.size(); i != e; ++i) { 1740 unsigned Usefulness = Filters[i].usefulness(); 1741 1742 if (Usefulness) 1743 AllUseless = false; 1744 1745 if (Usefulness > BestScore) { 1746 BestIndex = i; 1747 BestScore = Usefulness; 1748 } 1749 } 1750 1751 if (!AllUseless) 1752 bestFilter().recurse(); 1753 1754 return !AllUseless; 1755 } // end of FilterChooser::filterProcessor(bool) 1756 1757 // Decides on the best configuration of filter(s) to use in order to decode 1758 // the instructions. A conflict of instructions may occur, in which case we 1759 // dump the conflict set to the standard error. 1760 void FilterChooser::doFilter() { 1761 unsigned Num = Opcodes.size(); 1762 assert(Num && "FilterChooser created with no instructions"); 1763 1764 // Try regions of consecutive known bit values first. 1765 if (filterProcessor(false)) 1766 return; 1767 1768 // Then regions of mixed bits (both known and unitialized bit values allowed). 1769 if (filterProcessor(true)) 1770 return; 1771 1772 // Heuristics to cope with conflict set {t2CMPrs, t2SUBSrr, t2SUBSrs} where 1773 // no single instruction for the maximum ATTR_MIXED region Inst{14-4} has a 1774 // well-known encoding pattern. In such case, we backtrack and scan for the 1775 // the very first consecutive ATTR_ALL_SET region and assign a filter to it. 1776 if (Num == 3 && filterProcessor(true, false)) 1777 return; 1778 1779 // If we come to here, the instruction decoding has failed. 1780 // Set the BestIndex to -1 to indicate so. 1781 BestIndex = -1; 1782 } 1783 1784 // emitTableEntries - Emit state machine entries to decode our share of 1785 // instructions. 1786 void FilterChooser::emitTableEntries(DecoderTableInfo &TableInfo) const { 1787 if (Opcodes.size() == 1) { 1788 // There is only one instruction in the set, which is great! 1789 // Call emitSingletonDecoder() to see whether there are any remaining 1790 // encodings bits. 1791 emitSingletonTableEntry(TableInfo, Opcodes[0]); 1792 return; 1793 } 1794 1795 // Choose the best filter to do the decodings! 1796 if (BestIndex != -1) { 1797 const Filter &Best = Filters[BestIndex]; 1798 if (Best.getNumFiltered() == 1) 1799 emitSingletonTableEntry(TableInfo, Best); 1800 else 1801 Best.emitTableEntry(TableInfo); 1802 return; 1803 } 1804 1805 // We don't know how to decode these instructions! Dump the 1806 // conflict set and bail. 1807 1808 // Print out useful conflict information for postmortem analysis. 1809 errs() << "Decoding Conflict:\n"; 1810 1811 dumpStack(errs(), "\t\t"); 1812 1813 for (auto Opcode : Opcodes) { 1814 errs() << '\t'; 1815 emitNameWithID(errs(), Opcode.EncodingID); 1816 errs() << " "; 1817 dumpBits( 1818 errs(), 1819 getBitsField(*AllInstructions[Opcode.EncodingID].EncodingDef, "Inst")); 1820 errs() << '\n'; 1821 } 1822 } 1823 1824 static std::string findOperandDecoderMethod(Record *Record) { 1825 std::string Decoder; 1826 1827 RecordVal *DecoderString = Record->getValue("DecoderMethod"); 1828 StringInit *String = 1829 DecoderString ? dyn_cast<StringInit>(DecoderString->getValue()) : nullptr; 1830 if (String) { 1831 Decoder = std::string(String->getValue()); 1832 if (!Decoder.empty()) 1833 return Decoder; 1834 } 1835 1836 if (Record->isSubClassOf("RegisterOperand")) 1837 Record = Record->getValueAsDef("RegClass"); 1838 1839 if (Record->isSubClassOf("RegisterClass")) { 1840 Decoder = "Decode" + Record->getName().str() + "RegisterClass"; 1841 } else if (Record->isSubClassOf("PointerLikeRegClass")) { 1842 Decoder = "DecodePointerLikeRegClass" + 1843 utostr(Record->getValueAsInt("RegClassKind")); 1844 } 1845 1846 return Decoder; 1847 } 1848 1849 OperandInfo getOpInfo(Record *TypeRecord) { 1850 std::string Decoder = findOperandDecoderMethod(TypeRecord); 1851 1852 RecordVal *HasCompleteDecoderVal = TypeRecord->getValue("hasCompleteDecoder"); 1853 BitInit *HasCompleteDecoderBit = 1854 HasCompleteDecoderVal 1855 ? dyn_cast<BitInit>(HasCompleteDecoderVal->getValue()) 1856 : nullptr; 1857 bool HasCompleteDecoder = 1858 HasCompleteDecoderBit ? HasCompleteDecoderBit->getValue() : true; 1859 1860 return OperandInfo(Decoder, HasCompleteDecoder); 1861 } 1862 1863 void parseVarLenInstOperand(const Record &Def, 1864 std::vector<OperandInfo> &Operands, 1865 const CodeGenInstruction &CGI) { 1866 1867 const RecordVal *RV = Def.getValue("Inst"); 1868 VarLenInst VLI(cast<DagInit>(RV->getValue()), RV); 1869 SmallVector<int> TiedTo; 1870 1871 for (unsigned Idx = 0; Idx < CGI.Operands.size(); ++Idx) { 1872 auto &Op = CGI.Operands[Idx]; 1873 if (Op.MIOperandInfo && Op.MIOperandInfo->getNumArgs() > 0) 1874 for (auto *Arg : Op.MIOperandInfo->getArgs()) 1875 Operands.push_back(getOpInfo(cast<DefInit>(Arg)->getDef())); 1876 else 1877 Operands.push_back(getOpInfo(Op.Rec)); 1878 1879 int TiedReg = Op.getTiedRegister(); 1880 TiedTo.push_back(-1); 1881 if (TiedReg != -1) { 1882 TiedTo[Idx] = TiedReg; 1883 TiedTo[TiedReg] = Idx; 1884 } 1885 } 1886 1887 unsigned CurrBitPos = 0; 1888 for (auto &EncodingSegment : VLI) { 1889 unsigned Offset = 0; 1890 StringRef OpName; 1891 1892 if (const StringInit *SI = dyn_cast<StringInit>(EncodingSegment.Value)) { 1893 OpName = SI->getValue(); 1894 } else if (const DagInit *DI = dyn_cast<DagInit>(EncodingSegment.Value)) { 1895 OpName = cast<StringInit>(DI->getArg(0))->getValue(); 1896 Offset = cast<IntInit>(DI->getArg(2))->getValue(); 1897 } 1898 1899 if (!OpName.empty()) { 1900 auto OpSubOpPair = 1901 const_cast<CodeGenInstruction &>(CGI).Operands.ParseOperandName( 1902 OpName); 1903 unsigned OpIdx = CGI.Operands.getFlattenedOperandNumber(OpSubOpPair); 1904 Operands[OpIdx].addField(CurrBitPos, EncodingSegment.BitWidth, Offset); 1905 if (!EncodingSegment.CustomDecoder.empty()) 1906 Operands[OpIdx].Decoder = EncodingSegment.CustomDecoder.str(); 1907 1908 int TiedReg = TiedTo[OpSubOpPair.first]; 1909 if (TiedReg != -1) { 1910 unsigned OpIdx = CGI.Operands.getFlattenedOperandNumber( 1911 std::pair(TiedReg, OpSubOpPair.second)); 1912 Operands[OpIdx].addField(CurrBitPos, EncodingSegment.BitWidth, Offset); 1913 } 1914 } 1915 1916 CurrBitPos += EncodingSegment.BitWidth; 1917 } 1918 } 1919 1920 static void debugDumpRecord(const Record &Rec) { 1921 // Dump the record, so we can see what's going on... 1922 std::string E; 1923 raw_string_ostream S(E); 1924 S << "Dumping record for previous error:\n"; 1925 S << Rec; 1926 PrintNote(E); 1927 } 1928 1929 /// For an operand field named OpName: populate OpInfo.InitValue with the 1930 /// constant-valued bit values, and OpInfo.Fields with the ranges of bits to 1931 /// insert from the decoded instruction. 1932 static void addOneOperandFields(const Record &EncodingDef, const BitsInit &Bits, 1933 std::map<std::string, std::string> &TiedNames, 1934 StringRef OpName, OperandInfo &OpInfo) { 1935 // Some bits of the operand may be required to be 1 depending on the 1936 // instruction's encoding. Collect those bits. 1937 if (const RecordVal *EncodedValue = EncodingDef.getValue(OpName)) 1938 if (const BitsInit *OpBits = dyn_cast<BitsInit>(EncodedValue->getValue())) 1939 for (unsigned I = 0; I < OpBits->getNumBits(); ++I) 1940 if (const BitInit *OpBit = dyn_cast<BitInit>(OpBits->getBit(I))) 1941 if (OpBit->getValue()) 1942 OpInfo.InitValue |= 1ULL << I; 1943 1944 for (unsigned I = 0, J = 0; I != Bits.getNumBits(); I = J) { 1945 VarInit *Var; 1946 unsigned Offset = 0; 1947 for (; J != Bits.getNumBits(); ++J) { 1948 VarBitInit *BJ = dyn_cast<VarBitInit>(Bits.getBit(J)); 1949 if (BJ) { 1950 Var = dyn_cast<VarInit>(BJ->getBitVar()); 1951 if (I == J) 1952 Offset = BJ->getBitNum(); 1953 else if (BJ->getBitNum() != Offset + J - I) 1954 break; 1955 } else { 1956 Var = dyn_cast<VarInit>(Bits.getBit(J)); 1957 } 1958 if (!Var || (Var->getName() != OpName && 1959 Var->getName() != TiedNames[std::string(OpName)])) 1960 break; 1961 } 1962 if (I == J) 1963 ++J; 1964 else 1965 OpInfo.addField(I, J - I, Offset); 1966 } 1967 } 1968 1969 static unsigned 1970 populateInstruction(CodeGenTarget &Target, const Record &EncodingDef, 1971 const CodeGenInstruction &CGI, unsigned Opc, 1972 std::map<unsigned, std::vector<OperandInfo>> &Operands, 1973 bool IsVarLenInst) { 1974 const Record &Def = *CGI.TheDef; 1975 // If all the bit positions are not specified; do not decode this instruction. 1976 // We are bound to fail! For proper disassembly, the well-known encoding bits 1977 // of the instruction must be fully specified. 1978 1979 BitsInit &Bits = getBitsField(EncodingDef, "Inst"); 1980 if (Bits.allInComplete()) 1981 return 0; 1982 1983 std::vector<OperandInfo> InsnOperands; 1984 1985 // If the instruction has specified a custom decoding hook, use that instead 1986 // of trying to auto-generate the decoder. 1987 StringRef InstDecoder = EncodingDef.getValueAsString("DecoderMethod"); 1988 if (InstDecoder != "") { 1989 bool HasCompleteInstDecoder = 1990 EncodingDef.getValueAsBit("hasCompleteDecoder"); 1991 InsnOperands.push_back( 1992 OperandInfo(std::string(InstDecoder), HasCompleteInstDecoder)); 1993 Operands[Opc] = InsnOperands; 1994 return Bits.getNumBits(); 1995 } 1996 1997 // Generate a description of the operand of the instruction that we know 1998 // how to decode automatically. 1999 // FIXME: We'll need to have a way to manually override this as needed. 2000 2001 // Gather the outputs/inputs of the instruction, so we can find their 2002 // positions in the encoding. This assumes for now that they appear in the 2003 // MCInst in the order that they're listed. 2004 std::vector<std::pair<Init *, StringRef>> InOutOperands; 2005 DagInit *Out = Def.getValueAsDag("OutOperandList"); 2006 DagInit *In = Def.getValueAsDag("InOperandList"); 2007 for (unsigned i = 0; i < Out->getNumArgs(); ++i) 2008 InOutOperands.push_back(std::pair(Out->getArg(i), Out->getArgNameStr(i))); 2009 for (unsigned i = 0; i < In->getNumArgs(); ++i) 2010 InOutOperands.push_back(std::pair(In->getArg(i), In->getArgNameStr(i))); 2011 2012 // Search for tied operands, so that we can correctly instantiate 2013 // operands that are not explicitly represented in the encoding. 2014 std::map<std::string, std::string> TiedNames; 2015 for (unsigned i = 0; i < CGI.Operands.size(); ++i) { 2016 auto &Op = CGI.Operands[i]; 2017 for (unsigned j = 0; j < Op.Constraints.size(); ++j) { 2018 const CGIOperandList::ConstraintInfo &CI = Op.Constraints[j]; 2019 if (CI.isTied()) { 2020 int tiedTo = CI.getTiedOperand(); 2021 std::pair<unsigned, unsigned> SO = 2022 CGI.Operands.getSubOperandNumber(tiedTo); 2023 std::string TiedName = CGI.Operands[SO.first].SubOpNames[SO.second]; 2024 if (TiedName.empty()) 2025 TiedName = CGI.Operands[SO.first].Name; 2026 std::string MyName = Op.SubOpNames[j]; 2027 if (MyName.empty()) 2028 MyName = Op.Name; 2029 2030 TiedNames[MyName] = TiedName; 2031 TiedNames[TiedName] = MyName; 2032 } 2033 } 2034 } 2035 2036 if (IsVarLenInst) { 2037 parseVarLenInstOperand(EncodingDef, InsnOperands, CGI); 2038 } else { 2039 // For each operand, see if we can figure out where it is encoded. 2040 for (const auto &Op : InOutOperands) { 2041 Init *OpInit = Op.first; 2042 StringRef OpName = Op.second; 2043 2044 // We're ready to find the instruction encoding locations for this 2045 // operand. 2046 2047 // First, find the operand type ("OpInit"), and sub-op names 2048 // ("SubArgDag") if present. 2049 DagInit *SubArgDag = dyn_cast<DagInit>(OpInit); 2050 if (SubArgDag) 2051 OpInit = SubArgDag->getOperator(); 2052 Record *OpTypeRec = cast<DefInit>(OpInit)->getDef(); 2053 // Lookup the sub-operands from the operand type record (note that only 2054 // Operand subclasses have MIOperandInfo, see CodeGenInstruction.cpp). 2055 DagInit *SubOps = OpTypeRec->isSubClassOf("Operand") 2056 ? OpTypeRec->getValueAsDag("MIOperandInfo") 2057 : nullptr; 2058 2059 // Lookup the decoder method and construct a new OperandInfo to hold our 2060 // result. 2061 OperandInfo OpInfo = getOpInfo(OpTypeRec); 2062 2063 // If we have named sub-operands... 2064 if (SubArgDag) { 2065 // Then there should not be a custom decoder specified on the top-level 2066 // type. 2067 if (!OpInfo.Decoder.empty()) { 2068 PrintError(EncodingDef.getLoc(), 2069 "DecoderEmitter: operand \"" + OpName + "\" has type \"" + 2070 OpInit->getAsString() + 2071 "\" with a custom DecoderMethod, but also named " 2072 "sub-operands."); 2073 continue; 2074 } 2075 2076 // Decode each of the sub-ops separately. 2077 assert(SubOps && SubArgDag->getNumArgs() == SubOps->getNumArgs()); 2078 for (unsigned i = 0; i < SubOps->getNumArgs(); ++i) { 2079 StringRef SubOpName = SubArgDag->getArgNameStr(i); 2080 OperandInfo SubOpInfo = 2081 getOpInfo(cast<DefInit>(SubOps->getArg(i))->getDef()); 2082 2083 addOneOperandFields(EncodingDef, Bits, TiedNames, SubOpName, 2084 SubOpInfo); 2085 InsnOperands.push_back(SubOpInfo); 2086 } 2087 continue; 2088 } 2089 2090 // Otherwise, if we have an operand with sub-operands, but they aren't 2091 // named... 2092 if (SubOps && OpInfo.Decoder.empty()) { 2093 // If it's a single sub-operand, and no custom decoder, use the decoder 2094 // from the one sub-operand. 2095 if (SubOps->getNumArgs() == 1) 2096 OpInfo = getOpInfo(cast<DefInit>(SubOps->getArg(0))->getDef()); 2097 2098 // If we have multiple sub-ops, there'd better have a custom 2099 // decoder. (Otherwise we don't know how to populate them properly...) 2100 if (SubOps->getNumArgs() > 1) { 2101 PrintError(EncodingDef.getLoc(), 2102 "DecoderEmitter: operand \"" + OpName + 2103 "\" uses MIOperandInfo with multiple ops, but doesn't " 2104 "have a custom decoder!"); 2105 debugDumpRecord(EncodingDef); 2106 continue; 2107 } 2108 } 2109 2110 addOneOperandFields(EncodingDef, Bits, TiedNames, OpName, OpInfo); 2111 // FIXME: it should be an error not to find a definition for a given 2112 // operand, rather than just failing to add it to the resulting 2113 // instruction! (This is a longstanding bug, which will be addressed in an 2114 // upcoming change.) 2115 if (OpInfo.numFields() > 0) 2116 InsnOperands.push_back(OpInfo); 2117 } 2118 } 2119 Operands[Opc] = InsnOperands; 2120 2121 #if 0 2122 LLVM_DEBUG({ 2123 // Dumps the instruction encoding bits. 2124 dumpBits(errs(), Bits); 2125 2126 errs() << '\n'; 2127 2128 // Dumps the list of operand info. 2129 for (unsigned i = 0, e = CGI.Operands.size(); i != e; ++i) { 2130 const CGIOperandList::OperandInfo &Info = CGI.Operands[i]; 2131 const std::string &OperandName = Info.Name; 2132 const Record &OperandDef = *Info.Rec; 2133 2134 errs() << "\t" << OperandName << " (" << OperandDef.getName() << ")\n"; 2135 } 2136 }); 2137 #endif 2138 2139 return Bits.getNumBits(); 2140 } 2141 2142 // emitFieldFromInstruction - Emit the templated helper function 2143 // fieldFromInstruction(). 2144 // On Windows we make sure that this function is not inlined when 2145 // using the VS compiler. It has a bug which causes the function 2146 // to be optimized out in some circumstances. See llvm.org/pr38292 2147 static void emitFieldFromInstruction(formatted_raw_ostream &OS) { 2148 OS << "// Helper functions for extracting fields from encoded instructions.\n" 2149 << "// InsnType must either be integral or an APInt-like object that " 2150 "must:\n" 2151 << "// * be default-constructible and copy-constructible\n" 2152 << "// * be constructible from an APInt (this can be private)\n" 2153 << "// * Support insertBits(bits, startBit, numBits)\n" 2154 << "// * Support extractBitsAsZExtValue(numBits, startBit)\n" 2155 << "// * Support the ~, &, ==, and != operators with other objects of " 2156 "the same type\n" 2157 << "// * Support the != and bitwise & with uint64_t\n" 2158 << "// * Support put (<<) to raw_ostream&\n" 2159 << "template <typename InsnType>\n" 2160 << "#if defined(_MSC_VER) && !defined(__clang__)\n" 2161 << "__declspec(noinline)\n" 2162 << "#endif\n" 2163 << "static std::enable_if_t<std::is_integral<InsnType>::value, InsnType>\n" 2164 << "fieldFromInstruction(const InsnType &insn, unsigned startBit,\n" 2165 << " unsigned numBits) {\n" 2166 << " assert(startBit + numBits <= 64 && \"Cannot support >64-bit " 2167 "extractions!\");\n" 2168 << " assert(startBit + numBits <= (sizeof(InsnType) * 8) &&\n" 2169 << " \"Instruction field out of bounds!\");\n" 2170 << " InsnType fieldMask;\n" 2171 << " if (numBits == sizeof(InsnType) * 8)\n" 2172 << " fieldMask = (InsnType)(-1LL);\n" 2173 << " else\n" 2174 << " fieldMask = (((InsnType)1 << numBits) - 1) << startBit;\n" 2175 << " return (insn & fieldMask) >> startBit;\n" 2176 << "}\n" 2177 << "\n" 2178 << "template <typename InsnType>\n" 2179 << "static std::enable_if_t<!std::is_integral<InsnType>::value, " 2180 "uint64_t>\n" 2181 << "fieldFromInstruction(const InsnType &insn, unsigned startBit,\n" 2182 << " unsigned numBits) {\n" 2183 << " return insn.extractBitsAsZExtValue(numBits, startBit);\n" 2184 << "}\n\n"; 2185 } 2186 2187 // emitInsertBits - Emit the templated helper function insertBits(). 2188 static void emitInsertBits(formatted_raw_ostream &OS) { 2189 OS << "// Helper function for inserting bits extracted from an encoded " 2190 "instruction into\n" 2191 << "// a field.\n" 2192 << "template <typename InsnType>\n" 2193 << "static std::enable_if_t<std::is_integral<InsnType>::value>\n" 2194 << "insertBits(InsnType &field, InsnType bits, unsigned startBit, " 2195 "unsigned numBits) {\n" 2196 << " assert(startBit + numBits <= sizeof field * 8);\n" 2197 << " field |= (InsnType)bits << startBit;\n" 2198 << "}\n" 2199 << "\n" 2200 << "template <typename InsnType>\n" 2201 << "static std::enable_if_t<!std::is_integral<InsnType>::value>\n" 2202 << "insertBits(InsnType &field, uint64_t bits, unsigned startBit, " 2203 "unsigned numBits) {\n" 2204 << " field.insertBits(bits, startBit, numBits);\n" 2205 << "}\n\n"; 2206 } 2207 2208 // emitDecodeInstruction - Emit the templated helper function 2209 // decodeInstruction(). 2210 static void emitDecodeInstruction(formatted_raw_ostream &OS, 2211 bool IsVarLenInst) { 2212 OS << "template <typename InsnType>\n" 2213 << "static DecodeStatus decodeInstruction(const uint8_t DecodeTable[], " 2214 "MCInst &MI,\n" 2215 << " InsnType insn, uint64_t " 2216 "Address,\n" 2217 << " const MCDisassembler *DisAsm,\n" 2218 << " const MCSubtargetInfo &STI"; 2219 if (IsVarLenInst) { 2220 OS << ",\n" 2221 << " llvm::function_ref<void(APInt " 2222 "&," 2223 << " uint64_t)> makeUp"; 2224 } 2225 OS << ") {\n" 2226 << " const FeatureBitset &Bits = STI.getFeatureBits();\n" 2227 << "\n" 2228 << " const uint8_t *Ptr = DecodeTable;\n" 2229 << " uint64_t CurFieldValue = 0;\n" 2230 << " DecodeStatus S = MCDisassembler::Success;\n" 2231 << " while (true) {\n" 2232 << " ptrdiff_t Loc = Ptr - DecodeTable;\n" 2233 << " switch (*Ptr) {\n" 2234 << " default:\n" 2235 << " errs() << Loc << \": Unexpected decode table opcode!\\n\";\n" 2236 << " return MCDisassembler::Fail;\n" 2237 << " case MCD::OPC_ExtractField: {\n" 2238 << " // Decode the start value.\n" 2239 << " unsigned DecodedLen;\n" 2240 << " unsigned Start = decodeULEB128(++Ptr, &DecodedLen);\n" 2241 << " Ptr += DecodedLen;\n" 2242 << " unsigned Len = *Ptr++;\n"; 2243 if (IsVarLenInst) 2244 OS << " makeUp(insn, Start + Len);\n"; 2245 OS << " CurFieldValue = fieldFromInstruction(insn, Start, Len);\n" 2246 << " LLVM_DEBUG(dbgs() << Loc << \": OPC_ExtractField(\" << Start << " 2247 "\", \"\n" 2248 << " << Len << \"): \" << CurFieldValue << \"\\n\");\n" 2249 << " break;\n" 2250 << " }\n" 2251 << " case MCD::OPC_FilterValue: {\n" 2252 << " // Decode the field value.\n" 2253 << " unsigned Len;\n" 2254 << " uint64_t Val = decodeULEB128(++Ptr, &Len);\n" 2255 << " Ptr += Len;\n" 2256 << " // NumToSkip is a plain 24-bit integer.\n" 2257 << " unsigned NumToSkip = *Ptr++;\n" 2258 << " NumToSkip |= (*Ptr++) << 8;\n" 2259 << " NumToSkip |= (*Ptr++) << 16;\n" 2260 << "\n" 2261 << " // Perform the filter operation.\n" 2262 << " if (Val != CurFieldValue)\n" 2263 << " Ptr += NumToSkip;\n" 2264 << " LLVM_DEBUG(dbgs() << Loc << \": OPC_FilterValue(\" << Val << " 2265 "\", \" << NumToSkip\n" 2266 << " << \"): \" << ((Val != CurFieldValue) ? \"FAIL:\" " 2267 ": \"PASS:\")\n" 2268 << " << \" continuing at \" << (Ptr - DecodeTable) << " 2269 "\"\\n\");\n" 2270 << "\n" 2271 << " break;\n" 2272 << " }\n" 2273 << " case MCD::OPC_CheckField: {\n" 2274 << " // Decode the start value.\n" 2275 << " unsigned Len;\n" 2276 << " unsigned Start = decodeULEB128(++Ptr, &Len);\n" 2277 << " Ptr += Len;\n" 2278 << " Len = *Ptr;\n"; 2279 if (IsVarLenInst) 2280 OS << " makeUp(insn, Start + Len);\n"; 2281 OS << " uint64_t FieldValue = fieldFromInstruction(insn, Start, Len);\n" 2282 << " // Decode the field value.\n" 2283 << " unsigned PtrLen = 0;\n" 2284 << " uint64_t ExpectedValue = decodeULEB128(++Ptr, &PtrLen);\n" 2285 << " Ptr += PtrLen;\n" 2286 << " // NumToSkip is a plain 24-bit integer.\n" 2287 << " unsigned NumToSkip = *Ptr++;\n" 2288 << " NumToSkip |= (*Ptr++) << 8;\n" 2289 << " NumToSkip |= (*Ptr++) << 16;\n" 2290 << "\n" 2291 << " // If the actual and expected values don't match, skip.\n" 2292 << " if (ExpectedValue != FieldValue)\n" 2293 << " Ptr += NumToSkip;\n" 2294 << " LLVM_DEBUG(dbgs() << Loc << \": OPC_CheckField(\" << Start << " 2295 "\", \"\n" 2296 << " << Len << \", \" << ExpectedValue << \", \" << " 2297 "NumToSkip\n" 2298 << " << \"): FieldValue = \" << FieldValue << \", " 2299 "ExpectedValue = \"\n" 2300 << " << ExpectedValue << \": \"\n" 2301 << " << ((ExpectedValue == FieldValue) ? \"PASS\\n\" : " 2302 "\"FAIL\\n\"));\n" 2303 << " break;\n" 2304 << " }\n" 2305 << " case MCD::OPC_CheckPredicate: {\n" 2306 << " unsigned Len;\n" 2307 << " // Decode the Predicate Index value.\n" 2308 << " unsigned PIdx = decodeULEB128(++Ptr, &Len);\n" 2309 << " Ptr += Len;\n" 2310 << " // NumToSkip is a plain 24-bit integer.\n" 2311 << " unsigned NumToSkip = *Ptr++;\n" 2312 << " NumToSkip |= (*Ptr++) << 8;\n" 2313 << " NumToSkip |= (*Ptr++) << 16;\n" 2314 << " // Check the predicate.\n" 2315 << " bool Pred;\n" 2316 << " if (!(Pred = checkDecoderPredicate(PIdx, Bits)))\n" 2317 << " Ptr += NumToSkip;\n" 2318 << " (void)Pred;\n" 2319 << " LLVM_DEBUG(dbgs() << Loc << \": OPC_CheckPredicate(\" << PIdx " 2320 "<< \"): \"\n" 2321 << " << (Pred ? \"PASS\\n\" : \"FAIL\\n\"));\n" 2322 << "\n" 2323 << " break;\n" 2324 << " }\n" 2325 << " case MCD::OPC_Decode: {\n" 2326 << " unsigned Len;\n" 2327 << " // Decode the Opcode value.\n" 2328 << " unsigned Opc = decodeULEB128(++Ptr, &Len);\n" 2329 << " Ptr += Len;\n" 2330 << " unsigned DecodeIdx = decodeULEB128(Ptr, &Len);\n" 2331 << " Ptr += Len;\n" 2332 << "\n" 2333 << " MI.clear();\n" 2334 << " MI.setOpcode(Opc);\n" 2335 << " bool DecodeComplete;\n"; 2336 if (IsVarLenInst) { 2337 OS << " Len = InstrLenTable[Opc];\n" 2338 << " makeUp(insn, Len);\n"; 2339 } 2340 OS << " S = decodeToMCInst(S, DecodeIdx, insn, MI, Address, DisAsm, " 2341 "DecodeComplete);\n" 2342 << " assert(DecodeComplete);\n" 2343 << "\n" 2344 << " LLVM_DEBUG(dbgs() << Loc << \": OPC_Decode: opcode \" << Opc\n" 2345 << " << \", using decoder \" << DecodeIdx << \": \"\n" 2346 << " << (S != MCDisassembler::Fail ? \"PASS\" : " 2347 "\"FAIL\") << \"\\n\");\n" 2348 << " return S;\n" 2349 << " }\n" 2350 << " case MCD::OPC_TryDecode: {\n" 2351 << " unsigned Len;\n" 2352 << " // Decode the Opcode value.\n" 2353 << " unsigned Opc = decodeULEB128(++Ptr, &Len);\n" 2354 << " Ptr += Len;\n" 2355 << " unsigned DecodeIdx = decodeULEB128(Ptr, &Len);\n" 2356 << " Ptr += Len;\n" 2357 << " // NumToSkip is a plain 24-bit integer.\n" 2358 << " unsigned NumToSkip = *Ptr++;\n" 2359 << " NumToSkip |= (*Ptr++) << 8;\n" 2360 << " NumToSkip |= (*Ptr++) << 16;\n" 2361 << "\n" 2362 << " // Perform the decode operation.\n" 2363 << " MCInst TmpMI;\n" 2364 << " TmpMI.setOpcode(Opc);\n" 2365 << " bool DecodeComplete;\n" 2366 << " S = decodeToMCInst(S, DecodeIdx, insn, TmpMI, Address, DisAsm, " 2367 "DecodeComplete);\n" 2368 << " LLVM_DEBUG(dbgs() << Loc << \": OPC_TryDecode: opcode \" << " 2369 "Opc\n" 2370 << " << \", using decoder \" << DecodeIdx << \": \");\n" 2371 << "\n" 2372 << " if (DecodeComplete) {\n" 2373 << " // Decoding complete.\n" 2374 << " LLVM_DEBUG(dbgs() << (S != MCDisassembler::Fail ? \"PASS\" : " 2375 "\"FAIL\") << \"\\n\");\n" 2376 << " MI = TmpMI;\n" 2377 << " return S;\n" 2378 << " } else {\n" 2379 << " assert(S == MCDisassembler::Fail);\n" 2380 << " // If the decoding was incomplete, skip.\n" 2381 << " Ptr += NumToSkip;\n" 2382 << " LLVM_DEBUG(dbgs() << \"FAIL: continuing at \" << (Ptr - " 2383 "DecodeTable) << \"\\n\");\n" 2384 << " // Reset decode status. This also drops a SoftFail status " 2385 "that could be\n" 2386 << " // set before the decode attempt.\n" 2387 << " S = MCDisassembler::Success;\n" 2388 << " }\n" 2389 << " break;\n" 2390 << " }\n" 2391 << " case MCD::OPC_SoftFail: {\n" 2392 << " // Decode the mask values.\n" 2393 << " unsigned Len;\n" 2394 << " uint64_t PositiveMask = decodeULEB128(++Ptr, &Len);\n" 2395 << " Ptr += Len;\n" 2396 << " uint64_t NegativeMask = decodeULEB128(Ptr, &Len);\n" 2397 << " Ptr += Len;\n" 2398 << " bool Fail = (insn & PositiveMask) != 0 || (~insn & " 2399 "NegativeMask) != 0;\n" 2400 << " if (Fail)\n" 2401 << " S = MCDisassembler::SoftFail;\n" 2402 << " LLVM_DEBUG(dbgs() << Loc << \": OPC_SoftFail: \" << (Fail ? " 2403 "\"FAIL\\n\" : \"PASS\\n\"));\n" 2404 << " break;\n" 2405 << " }\n" 2406 << " case MCD::OPC_Fail: {\n" 2407 << " LLVM_DEBUG(dbgs() << Loc << \": OPC_Fail\\n\");\n" 2408 << " return MCDisassembler::Fail;\n" 2409 << " }\n" 2410 << " }\n" 2411 << " }\n" 2412 << " llvm_unreachable(\"bogosity detected in disassembler state " 2413 "machine!\");\n" 2414 << "}\n\n"; 2415 } 2416 2417 // Helper to propagate SoftFail status. Returns false if the status is Fail; 2418 // callers are expected to early-exit in that condition. (Note, the '&' operator 2419 // is correct to propagate the values of this enum; see comment on 'enum 2420 // DecodeStatus'.) 2421 static void emitCheck(formatted_raw_ostream &OS) { 2422 OS << "static bool Check(DecodeStatus &Out, DecodeStatus In) {\n" 2423 << " Out = static_cast<DecodeStatus>(Out & In);\n" 2424 << " return Out != MCDisassembler::Fail;\n" 2425 << "}\n\n"; 2426 } 2427 2428 // Emits disassembler code for instruction decoding. 2429 void DecoderEmitter::run(raw_ostream &o) { 2430 formatted_raw_ostream OS(o); 2431 OS << "#include \"llvm/MC/MCInst.h\"\n"; 2432 OS << "#include \"llvm/MC/MCSubtargetInfo.h\"\n"; 2433 OS << "#include \"llvm/Support/DataTypes.h\"\n"; 2434 OS << "#include \"llvm/Support/Debug.h\"\n"; 2435 OS << "#include \"llvm/Support/LEB128.h\"\n"; 2436 OS << "#include \"llvm/Support/raw_ostream.h\"\n"; 2437 OS << "#include \"llvm/TargetParser/SubtargetFeature.h\"\n"; 2438 OS << "#include <assert.h>\n"; 2439 OS << '\n'; 2440 OS << "namespace llvm {\n\n"; 2441 2442 emitFieldFromInstruction(OS); 2443 emitInsertBits(OS); 2444 emitCheck(OS); 2445 2446 Target.reverseBitsForLittleEndianEncoding(); 2447 2448 // Parameterize the decoders based on namespace and instruction width. 2449 std::set<StringRef> HwModeNames; 2450 const auto &NumberedInstructions = Target.getInstructionsByEnumValue(); 2451 NumberedEncodings.reserve(NumberedInstructions.size()); 2452 DenseMap<Record *, unsigned> IndexOfInstruction; 2453 // First, collect all HwModes referenced by the target. 2454 for (const auto &NumberedInstruction : NumberedInstructions) { 2455 IndexOfInstruction[NumberedInstruction->TheDef] = NumberedEncodings.size(); 2456 2457 if (const RecordVal *RV = 2458 NumberedInstruction->TheDef->getValue("EncodingInfos")) { 2459 if (auto *DI = dyn_cast_or_null<DefInit>(RV->getValue())) { 2460 const CodeGenHwModes &HWM = Target.getHwModes(); 2461 EncodingInfoByHwMode EBM(DI->getDef(), HWM); 2462 for (auto &KV : EBM) 2463 HwModeNames.insert(HWM.getMode(KV.first).Name); 2464 } 2465 } 2466 } 2467 2468 // If HwModeNames is empty, add the empty string so we always have one HwMode. 2469 if (HwModeNames.empty()) 2470 HwModeNames.insert(""); 2471 2472 for (const auto &NumberedInstruction : NumberedInstructions) { 2473 IndexOfInstruction[NumberedInstruction->TheDef] = NumberedEncodings.size(); 2474 2475 if (const RecordVal *RV = 2476 NumberedInstruction->TheDef->getValue("EncodingInfos")) { 2477 if (DefInit *DI = dyn_cast_or_null<DefInit>(RV->getValue())) { 2478 const CodeGenHwModes &HWM = Target.getHwModes(); 2479 EncodingInfoByHwMode EBM(DI->getDef(), HWM); 2480 for (auto &KV : EBM) { 2481 NumberedEncodings.emplace_back(KV.second, NumberedInstruction, 2482 HWM.getMode(KV.first).Name); 2483 HwModeNames.insert(HWM.getMode(KV.first).Name); 2484 } 2485 continue; 2486 } 2487 } 2488 // This instruction is encoded the same on all HwModes. Emit it for all 2489 // HwModes. 2490 for (StringRef HwModeName : HwModeNames) 2491 NumberedEncodings.emplace_back(NumberedInstruction->TheDef, 2492 NumberedInstruction, HwModeName); 2493 } 2494 for (const auto &NumberedAlias : 2495 RK.getAllDerivedDefinitions("AdditionalEncoding")) 2496 NumberedEncodings.emplace_back( 2497 NumberedAlias, 2498 &Target.getInstruction(NumberedAlias->getValueAsDef("AliasOf"))); 2499 2500 std::map<std::pair<std::string, unsigned>, std::vector<EncodingIDAndOpcode>> 2501 OpcMap; 2502 std::map<unsigned, std::vector<OperandInfo>> Operands; 2503 std::vector<unsigned> InstrLen; 2504 2505 bool IsVarLenInst = 2506 any_of(NumberedInstructions, [](const CodeGenInstruction *CGI) { 2507 RecordVal *RV = CGI->TheDef->getValue("Inst"); 2508 return RV && isa<DagInit>(RV->getValue()); 2509 }); 2510 unsigned MaxInstLen = 0; 2511 2512 for (unsigned i = 0; i < NumberedEncodings.size(); ++i) { 2513 const Record *EncodingDef = NumberedEncodings[i].EncodingDef; 2514 const CodeGenInstruction *Inst = NumberedEncodings[i].Inst; 2515 const Record *Def = Inst->TheDef; 2516 unsigned Size = EncodingDef->getValueAsInt("Size"); 2517 if (Def->getValueAsString("Namespace") == "TargetOpcode" || 2518 Def->getValueAsBit("isPseudo") || 2519 Def->getValueAsBit("isAsmParserOnly") || 2520 Def->getValueAsBit("isCodeGenOnly")) { 2521 NumEncodingsLackingDisasm++; 2522 continue; 2523 } 2524 2525 if (i < NumberedInstructions.size()) 2526 NumInstructions++; 2527 NumEncodings++; 2528 2529 if (!Size && !IsVarLenInst) 2530 continue; 2531 2532 if (IsVarLenInst) 2533 InstrLen.resize(NumberedInstructions.size(), 0); 2534 2535 if (unsigned Len = populateInstruction(Target, *EncodingDef, *Inst, i, 2536 Operands, IsVarLenInst)) { 2537 if (IsVarLenInst) { 2538 MaxInstLen = std::max(MaxInstLen, Len); 2539 InstrLen[i] = Len; 2540 } 2541 std::string DecoderNamespace = 2542 std::string(EncodingDef->getValueAsString("DecoderNamespace")); 2543 if (!NumberedEncodings[i].HwModeName.empty()) 2544 DecoderNamespace += 2545 std::string("_") + NumberedEncodings[i].HwModeName.str(); 2546 OpcMap[std::pair(DecoderNamespace, Size)].emplace_back( 2547 i, IndexOfInstruction.find(Def)->second); 2548 } else { 2549 NumEncodingsOmitted++; 2550 } 2551 } 2552 2553 DecoderTableInfo TableInfo; 2554 for (const auto &Opc : OpcMap) { 2555 // Emit the decoder for this namespace+width combination. 2556 ArrayRef<EncodingAndInst> NumberedEncodingsRef(NumberedEncodings.data(), 2557 NumberedEncodings.size()); 2558 FilterChooser FC(NumberedEncodingsRef, Opc.second, Operands, 2559 IsVarLenInst ? MaxInstLen : 8 * Opc.first.second, this); 2560 2561 // The decode table is cleared for each top level decoder function. The 2562 // predicates and decoders themselves, however, are shared across all 2563 // decoders to give more opportunities for uniqueing. 2564 TableInfo.Table.clear(); 2565 TableInfo.FixupStack.clear(); 2566 TableInfo.Table.reserve(16384); 2567 TableInfo.FixupStack.emplace_back(); 2568 FC.emitTableEntries(TableInfo); 2569 // Any NumToSkip fixups in the top level scope can resolve to the 2570 // OPC_Fail at the end of the table. 2571 assert(TableInfo.FixupStack.size() == 1 && "fixup stack phasing error!"); 2572 // Resolve any NumToSkip fixups in the current scope. 2573 resolveTableFixups(TableInfo.Table, TableInfo.FixupStack.back(), 2574 TableInfo.Table.size()); 2575 TableInfo.FixupStack.clear(); 2576 2577 TableInfo.Table.push_back(MCD::OPC_Fail); 2578 2579 // Print the table to the output stream. 2580 emitTable(OS, TableInfo.Table, 0, FC.getBitWidth(), Opc.first.first); 2581 } 2582 2583 // For variable instruction, we emit a instruction length table 2584 // to let the decoder know how long the instructions are. 2585 // You can see example usage in M68k's disassembler. 2586 if (IsVarLenInst) 2587 emitInstrLenTable(OS, InstrLen); 2588 // Emit the predicate function. 2589 emitPredicateFunction(OS, TableInfo.Predicates, 0); 2590 2591 // Emit the decoder function. 2592 emitDecoderFunction(OS, TableInfo.Decoders, 0); 2593 2594 // Emit the main entry point for the decoder, decodeInstruction(). 2595 emitDecodeInstruction(OS, IsVarLenInst); 2596 2597 OS << "\n} // end namespace llvm\n"; 2598 } 2599 2600 namespace llvm { 2601 2602 void EmitDecoder(RecordKeeper &RK, raw_ostream &OS, 2603 const std::string &PredicateNamespace) { 2604 DecoderEmitter(RK, PredicateNamespace).run(OS); 2605 } 2606 2607 } // end namespace llvm 2608