10b57cec5SDimitry Andric //===- DisassemblerEmitter.cpp - Generate a disassembler ------------------===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric 9*0fca6ea1SDimitry Andric #include "Common/CodeGenTarget.h" 1006c3fb27SDimitry Andric #include "TableGenBackends.h" 110b57cec5SDimitry Andric #include "WebAssemblyDisassemblerEmitter.h" 120b57cec5SDimitry Andric #include "X86DisassemblerTables.h" 130b57cec5SDimitry Andric #include "X86RecognizableInstr.h" 140b57cec5SDimitry Andric #include "llvm/TableGen/Error.h" 150b57cec5SDimitry Andric #include "llvm/TableGen/Record.h" 160b57cec5SDimitry Andric #include "llvm/TableGen/TableGenBackend.h" 170b57cec5SDimitry Andric 180b57cec5SDimitry Andric using namespace llvm; 190b57cec5SDimitry Andric using namespace llvm::X86Disassembler; 200b57cec5SDimitry Andric 210b57cec5SDimitry Andric /// DisassemblerEmitter - Contains disassembler table emitters for various 220b57cec5SDimitry Andric /// architectures. 230b57cec5SDimitry Andric 240b57cec5SDimitry Andric /// X86 Disassembler Emitter 250b57cec5SDimitry Andric /// 260b57cec5SDimitry Andric /// *** IF YOU'RE HERE TO RESOLVE A "Primary decode conflict", LOOK DOWN NEAR 270b57cec5SDimitry Andric /// THE END OF THIS COMMENT! 280b57cec5SDimitry Andric /// 290b57cec5SDimitry Andric /// The X86 disassembler emitter is part of the X86 Disassembler, which is 300b57cec5SDimitry Andric /// documented in lib/Target/X86/X86Disassembler.h. 310b57cec5SDimitry Andric /// 320b57cec5SDimitry Andric /// The emitter produces the tables that the disassembler uses to translate 330b57cec5SDimitry Andric /// instructions. The emitter generates the following tables: 340b57cec5SDimitry Andric /// 350b57cec5SDimitry Andric /// - One table (CONTEXTS_SYM) that contains a mapping of attribute masks to 360b57cec5SDimitry Andric /// instruction contexts. Although for each attribute there are cases where 370b57cec5SDimitry Andric /// that attribute determines decoding, in the majority of cases decoding is 380b57cec5SDimitry Andric /// the same whether or not an attribute is present. For example, a 64-bit 390b57cec5SDimitry Andric /// instruction with an OPSIZE prefix and an XS prefix decodes the same way in 400b57cec5SDimitry Andric /// all cases as a 64-bit instruction with only OPSIZE set. (The XS prefix 410b57cec5SDimitry Andric /// may have effects on its execution, but does not change the instruction 420b57cec5SDimitry Andric /// returned.) This allows considerable space savings in other tables. 430b57cec5SDimitry Andric /// - Six tables (ONEBYTE_SYM, TWOBYTE_SYM, THREEBYTE38_SYM, THREEBYTE3A_SYM, 440b57cec5SDimitry Andric /// THREEBYTEA6_SYM, and THREEBYTEA7_SYM contain the hierarchy that the 450b57cec5SDimitry Andric /// decoder traverses while decoding an instruction. At the lowest level of 460b57cec5SDimitry Andric /// this hierarchy are instruction UIDs, 16-bit integers that can be used to 470b57cec5SDimitry Andric /// uniquely identify the instruction and correspond exactly to its position 480b57cec5SDimitry Andric /// in the list of CodeGenInstructions for the target. 490b57cec5SDimitry Andric /// - One table (INSTRUCTIONS_SYM) contains information about the operands of 500b57cec5SDimitry Andric /// each instruction and how to decode them. 510b57cec5SDimitry Andric /// 520b57cec5SDimitry Andric /// During table generation, there may be conflicts between instructions that 530b57cec5SDimitry Andric /// occupy the same space in the decode tables. These conflicts are resolved as 540b57cec5SDimitry Andric /// follows in setTableFields() (X86DisassemblerTables.cpp) 550b57cec5SDimitry Andric /// 560b57cec5SDimitry Andric /// - If the current context is the native context for one of the instructions 570b57cec5SDimitry Andric /// (that is, the attributes specified for it in the LLVM tables specify 580b57cec5SDimitry Andric /// precisely the current context), then it has priority. 590b57cec5SDimitry Andric /// - If the current context isn't native for either of the instructions, then 600b57cec5SDimitry Andric /// the higher-priority context wins (that is, the one that is more specific). 610b57cec5SDimitry Andric /// That hierarchy is determined by outranks() (X86DisassemblerTables.cpp) 620b57cec5SDimitry Andric /// - If the current context is native for both instructions, then the table 630b57cec5SDimitry Andric /// emitter reports a conflict and dies. 640b57cec5SDimitry Andric /// 650b57cec5SDimitry Andric /// *** RESOLUTION FOR "Primary decode conflict"S 660b57cec5SDimitry Andric /// 670b57cec5SDimitry Andric /// If two instructions collide, typically the solution is (in order of 680b57cec5SDimitry Andric /// likelihood): 690b57cec5SDimitry Andric /// 700b57cec5SDimitry Andric /// (1) to filter out one of the instructions by editing filter() 710b57cec5SDimitry Andric /// (X86RecognizableInstr.cpp). This is the most common resolution, but 720b57cec5SDimitry Andric /// check the Intel manuals first to make sure that (2) and (3) are not the 730b57cec5SDimitry Andric /// problem. 740b57cec5SDimitry Andric /// (2) to fix the tables (X86.td and its subsidiaries) so the opcodes are 750b57cec5SDimitry Andric /// accurate. Sometimes they are not. 760b57cec5SDimitry Andric /// (3) to fix the tables to reflect the actual context (for example, required 770b57cec5SDimitry Andric /// prefixes), and possibly to add a new context by editing 780b57cec5SDimitry Andric /// include/llvm/Support/X86DisassemblerDecoderCommon.h. This is unlikely 790b57cec5SDimitry Andric /// to be the cause. 800b57cec5SDimitry Andric /// 810b57cec5SDimitry Andric /// DisassemblerEmitter.cpp contains the implementation for the emitter, 820b57cec5SDimitry Andric /// which simply pulls out instructions from the CodeGenTarget and pushes them 830b57cec5SDimitry Andric /// into X86DisassemblerTables. 840b57cec5SDimitry Andric /// X86DisassemblerTables.h contains the interface for the instruction tables, 850b57cec5SDimitry Andric /// which manage and emit the structures discussed above. 860b57cec5SDimitry Andric /// X86DisassemblerTables.cpp contains the implementation for the instruction 870b57cec5SDimitry Andric /// tables. 880b57cec5SDimitry Andric /// X86ModRMFilters.h contains filters that can be used to determine which 890b57cec5SDimitry Andric /// ModR/M values are valid for a particular instruction. These are used to 900b57cec5SDimitry Andric /// populate ModRMDecisions. 910b57cec5SDimitry Andric /// X86RecognizableInstr.h contains the interface for a single instruction, 920b57cec5SDimitry Andric /// which knows how to translate itself from a CodeGenInstruction and provide 930b57cec5SDimitry Andric /// the information necessary for integration into the tables. 940b57cec5SDimitry Andric /// X86RecognizableInstr.cpp contains the implementation for a single 950b57cec5SDimitry Andric /// instruction. 960b57cec5SDimitry Andric 9706c3fb27SDimitry Andric static void EmitDisassembler(RecordKeeper &Records, raw_ostream &OS) { 980b57cec5SDimitry Andric CodeGenTarget Target(Records); 990b57cec5SDimitry Andric emitSourceFileHeader(" * " + Target.getName().str() + " Disassembler", OS); 1000b57cec5SDimitry Andric 1010b57cec5SDimitry Andric // X86 uses a custom disassembler. 1020b57cec5SDimitry Andric if (Target.getName() == "X86") { 1030b57cec5SDimitry Andric DisassemblerTables Tables; 1040b57cec5SDimitry Andric 1050b57cec5SDimitry Andric ArrayRef<const CodeGenInstruction *> numberedInstructions = 1060b57cec5SDimitry Andric Target.getInstructionsByEnumValue(); 1070b57cec5SDimitry Andric 1080b57cec5SDimitry Andric for (unsigned i = 0, e = numberedInstructions.size(); i != e; ++i) 1090b57cec5SDimitry Andric RecognizableInstr::processInstr(Tables, *numberedInstructions[i], i); 1100b57cec5SDimitry Andric 1110b57cec5SDimitry Andric if (Tables.hasConflicts()) { 1120b57cec5SDimitry Andric PrintError(Target.getTargetRecord()->getLoc(), "Primary decode conflict"); 1130b57cec5SDimitry Andric return; 1140b57cec5SDimitry Andric } 1150b57cec5SDimitry Andric 1160b57cec5SDimitry Andric Tables.emit(OS); 1170b57cec5SDimitry Andric return; 1180b57cec5SDimitry Andric } 1190b57cec5SDimitry Andric 1200b57cec5SDimitry Andric // WebAssembly has variable length opcodes, so can't use EmitFixedLenDecoder 1210b57cec5SDimitry Andric // below (which depends on a Size table-gen Record), and also uses a custom 1220b57cec5SDimitry Andric // disassembler. 1230b57cec5SDimitry Andric if (Target.getName() == "WebAssembly") { 1240b57cec5SDimitry Andric emitWebAssemblyDisassemblerTables(OS, Target.getInstructionsByEnumValue()); 1250b57cec5SDimitry Andric return; 1260b57cec5SDimitry Andric } 1270b57cec5SDimitry Andric 1285ffd83dbSDimitry Andric std::string PredicateNamespace = std::string(Target.getName()); 1290b57cec5SDimitry Andric if (PredicateNamespace == "Thumb") 1300b57cec5SDimitry Andric PredicateNamespace = "ARM"; 131bdd1243dSDimitry Andric EmitDecoder(Records, OS, PredicateNamespace); 1320b57cec5SDimitry Andric } 1330b57cec5SDimitry Andric 134*0fca6ea1SDimitry Andric cl::OptionCategory DisassemblerEmitterCat("Options for -gen-disassembler"); 135*0fca6ea1SDimitry Andric 13606c3fb27SDimitry Andric static TableGen::Emitter::Opt X("gen-disassembler", EmitDisassembler, 13706c3fb27SDimitry Andric "Generate disassembler"); 138