10b57cec5SDimitry Andric //===-- X86Disassembler.cpp - Disassembler for x86 and x86_64 -------------===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric // 90b57cec5SDimitry Andric // This file is part of the X86 Disassembler. 100b57cec5SDimitry Andric // It contains code to translate the data produced by the decoder into 110b57cec5SDimitry Andric // MCInsts. 120b57cec5SDimitry Andric // 130b57cec5SDimitry Andric // 140b57cec5SDimitry Andric // The X86 disassembler is a table-driven disassembler for the 16-, 32-, and 150b57cec5SDimitry Andric // 64-bit X86 instruction sets. The main decode sequence for an assembly 160b57cec5SDimitry Andric // instruction in this disassembler is: 170b57cec5SDimitry Andric // 180b57cec5SDimitry Andric // 1. Read the prefix bytes and determine the attributes of the instruction. 190b57cec5SDimitry Andric // These attributes, recorded in enum attributeBits 200b57cec5SDimitry Andric // (X86DisassemblerDecoderCommon.h), form a bitmask. The table CONTEXTS_SYM 210b57cec5SDimitry Andric // provides a mapping from bitmasks to contexts, which are represented by 220b57cec5SDimitry Andric // enum InstructionContext (ibid.). 230b57cec5SDimitry Andric // 240b57cec5SDimitry Andric // 2. Read the opcode, and determine what kind of opcode it is. The 250b57cec5SDimitry Andric // disassembler distinguishes four kinds of opcodes, which are enumerated in 260b57cec5SDimitry Andric // OpcodeType (X86DisassemblerDecoderCommon.h): one-byte (0xnn), two-byte 270b57cec5SDimitry Andric // (0x0f 0xnn), three-byte-38 (0x0f 0x38 0xnn), or three-byte-3a 280b57cec5SDimitry Andric // (0x0f 0x3a 0xnn). Mandatory prefixes are treated as part of the context. 290b57cec5SDimitry Andric // 300b57cec5SDimitry Andric // 3. Depending on the opcode type, look in one of four ClassDecision structures 310b57cec5SDimitry Andric // (X86DisassemblerDecoderCommon.h). Use the opcode class to determine which 320b57cec5SDimitry Andric // OpcodeDecision (ibid.) to look the opcode in. Look up the opcode, to get 330b57cec5SDimitry Andric // a ModRMDecision (ibid.). 340b57cec5SDimitry Andric // 350b57cec5SDimitry Andric // 4. Some instructions, such as escape opcodes or extended opcodes, or even 360b57cec5SDimitry Andric // instructions that have ModRM*Reg / ModRM*Mem forms in LLVM, need the 370b57cec5SDimitry Andric // ModR/M byte to complete decode. The ModRMDecision's type is an entry from 380b57cec5SDimitry Andric // ModRMDecisionType (X86DisassemblerDecoderCommon.h) that indicates if the 390b57cec5SDimitry Andric // ModR/M byte is required and how to interpret it. 400b57cec5SDimitry Andric // 410b57cec5SDimitry Andric // 5. After resolving the ModRMDecision, the disassembler has a unique ID 420b57cec5SDimitry Andric // of type InstrUID (X86DisassemblerDecoderCommon.h). Looking this ID up in 430b57cec5SDimitry Andric // INSTRUCTIONS_SYM yields the name of the instruction and the encodings and 440b57cec5SDimitry Andric // meanings of its operands. 450b57cec5SDimitry Andric // 460b57cec5SDimitry Andric // 6. For each operand, its encoding is an entry from OperandEncoding 470b57cec5SDimitry Andric // (X86DisassemblerDecoderCommon.h) and its type is an entry from 480b57cec5SDimitry Andric // OperandType (ibid.). The encoding indicates how to read it from the 490b57cec5SDimitry Andric // instruction; the type indicates how to interpret the value once it has 500b57cec5SDimitry Andric // been read. For example, a register operand could be stored in the R/M 510b57cec5SDimitry Andric // field of the ModR/M byte, the REG field of the ModR/M byte, or added to 520b57cec5SDimitry Andric // the main opcode. This is orthogonal from its meaning (an GPR or an XMM 530b57cec5SDimitry Andric // register, for instance). Given this information, the operands can be 540b57cec5SDimitry Andric // extracted and interpreted. 550b57cec5SDimitry Andric // 560b57cec5SDimitry Andric // 7. As the last step, the disassembler translates the instruction information 570b57cec5SDimitry Andric // and operands into a format understandable by the client - in this case, an 580b57cec5SDimitry Andric // MCInst for use by the MC infrastructure. 590b57cec5SDimitry Andric // 600b57cec5SDimitry Andric // The disassembler is broken broadly into two parts: the table emitter that 610b57cec5SDimitry Andric // emits the instruction decode tables discussed above during compilation, and 620b57cec5SDimitry Andric // the disassembler itself. The table emitter is documented in more detail in 630b57cec5SDimitry Andric // utils/TableGen/X86DisassemblerEmitter.h. 640b57cec5SDimitry Andric // 650b57cec5SDimitry Andric // X86Disassembler.cpp contains the code responsible for step 7, and for 660b57cec5SDimitry Andric // invoking the decoder to execute steps 1-6. 670b57cec5SDimitry Andric // X86DisassemblerDecoderCommon.h contains the definitions needed by both the 680b57cec5SDimitry Andric // table emitter and the disassembler. 690b57cec5SDimitry Andric // X86DisassemblerDecoder.h contains the public interface of the decoder, 700b57cec5SDimitry Andric // factored out into C for possible use by other projects. 710b57cec5SDimitry Andric // X86DisassemblerDecoder.c contains the source code of the decoder, which is 720b57cec5SDimitry Andric // responsible for steps 1-6. 730b57cec5SDimitry Andric // 740b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 750b57cec5SDimitry Andric 760b57cec5SDimitry Andric #include "MCTargetDesc/X86BaseInfo.h" 770b57cec5SDimitry Andric #include "MCTargetDesc/X86MCTargetDesc.h" 780b57cec5SDimitry Andric #include "TargetInfo/X86TargetInfo.h" 790b57cec5SDimitry Andric #include "X86DisassemblerDecoder.h" 800b57cec5SDimitry Andric #include "llvm/MC/MCContext.h" 810b57cec5SDimitry Andric #include "llvm/MC/MCDisassembler/MCDisassembler.h" 820b57cec5SDimitry Andric #include "llvm/MC/MCExpr.h" 830b57cec5SDimitry Andric #include "llvm/MC/MCInst.h" 840b57cec5SDimitry Andric #include "llvm/MC/MCInstrInfo.h" 850b57cec5SDimitry Andric #include "llvm/MC/MCSubtargetInfo.h" 86349cc55cSDimitry Andric #include "llvm/MC/TargetRegistry.h" 870b57cec5SDimitry Andric #include "llvm/Support/Debug.h" 88480093f4SDimitry Andric #include "llvm/Support/Format.h" 890b57cec5SDimitry Andric #include "llvm/Support/raw_ostream.h" 900b57cec5SDimitry Andric 910b57cec5SDimitry Andric using namespace llvm; 920b57cec5SDimitry Andric using namespace llvm::X86Disassembler; 930b57cec5SDimitry Andric 940b57cec5SDimitry Andric #define DEBUG_TYPE "x86-disassembler" 950b57cec5SDimitry Andric 96480093f4SDimitry Andric #define debug(s) LLVM_DEBUG(dbgs() << __LINE__ << ": " << s); 97480093f4SDimitry Andric 98480093f4SDimitry Andric // Specifies whether a ModR/M byte is needed and (if so) which 99480093f4SDimitry Andric // instruction each possible value of the ModR/M byte corresponds to. Once 100480093f4SDimitry Andric // this information is known, we have narrowed down to a single instruction. 101480093f4SDimitry Andric struct ModRMDecision { 102480093f4SDimitry Andric uint8_t modrm_type; 103480093f4SDimitry Andric uint16_t instructionIDs; 104480093f4SDimitry Andric }; 105480093f4SDimitry Andric 106480093f4SDimitry Andric // Specifies which set of ModR/M->instruction tables to look at 107480093f4SDimitry Andric // given a particular opcode. 108480093f4SDimitry Andric struct OpcodeDecision { 109480093f4SDimitry Andric ModRMDecision modRMDecisions[256]; 110480093f4SDimitry Andric }; 111480093f4SDimitry Andric 112480093f4SDimitry Andric // Specifies which opcode->instruction tables to look at given 113480093f4SDimitry Andric // a particular context (set of attributes). Since there are many possible 114480093f4SDimitry Andric // contexts, the decoder first uses CONTEXTS_SYM to determine which context 115480093f4SDimitry Andric // applies given a specific set of attributes. Hence there are only IC_max 116480093f4SDimitry Andric // entries in this table, rather than 2^(ATTR_max). 117480093f4SDimitry Andric struct ContextDecision { 118480093f4SDimitry Andric OpcodeDecision opcodeDecisions[IC_max]; 119480093f4SDimitry Andric }; 120480093f4SDimitry Andric 121480093f4SDimitry Andric #include "X86GenDisassemblerTables.inc" 122480093f4SDimitry Andric 123480093f4SDimitry Andric static InstrUID decode(OpcodeType type, InstructionContext insnContext, 124480093f4SDimitry Andric uint8_t opcode, uint8_t modRM) { 125480093f4SDimitry Andric const struct ModRMDecision *dec; 126480093f4SDimitry Andric 127480093f4SDimitry Andric switch (type) { 128480093f4SDimitry Andric case ONEBYTE: 129480093f4SDimitry Andric dec = &ONEBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 130480093f4SDimitry Andric break; 131480093f4SDimitry Andric case TWOBYTE: 132480093f4SDimitry Andric dec = &TWOBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 133480093f4SDimitry Andric break; 134480093f4SDimitry Andric case THREEBYTE_38: 135480093f4SDimitry Andric dec = &THREEBYTE38_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 136480093f4SDimitry Andric break; 137480093f4SDimitry Andric case THREEBYTE_3A: 138480093f4SDimitry Andric dec = &THREEBYTE3A_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 139480093f4SDimitry Andric break; 140480093f4SDimitry Andric case XOP8_MAP: 141480093f4SDimitry Andric dec = &XOP8_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 142480093f4SDimitry Andric break; 143480093f4SDimitry Andric case XOP9_MAP: 144480093f4SDimitry Andric dec = &XOP9_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 145480093f4SDimitry Andric break; 146480093f4SDimitry Andric case XOPA_MAP: 147480093f4SDimitry Andric dec = &XOPA_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 148480093f4SDimitry Andric break; 149480093f4SDimitry Andric case THREEDNOW_MAP: 150480093f4SDimitry Andric dec = 151480093f4SDimitry Andric &THREEDNOW_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 152480093f4SDimitry Andric break; 1535f757f3fSDimitry Andric case MAP4: 1545f757f3fSDimitry Andric dec = &MAP4_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 1555f757f3fSDimitry Andric break; 156349cc55cSDimitry Andric case MAP5: 157349cc55cSDimitry Andric dec = &MAP5_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 158349cc55cSDimitry Andric break; 159349cc55cSDimitry Andric case MAP6: 160349cc55cSDimitry Andric dec = &MAP6_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 161349cc55cSDimitry Andric break; 1625f757f3fSDimitry Andric case MAP7: 1635f757f3fSDimitry Andric dec = &MAP7_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 1645f757f3fSDimitry Andric break; 1650b57cec5SDimitry Andric } 1660b57cec5SDimitry Andric 167480093f4SDimitry Andric switch (dec->modrm_type) { 168480093f4SDimitry Andric default: 169480093f4SDimitry Andric llvm_unreachable("Corrupt table! Unknown modrm_type"); 170480093f4SDimitry Andric return 0; 171480093f4SDimitry Andric case MODRM_ONEENTRY: 172480093f4SDimitry Andric return modRMTable[dec->instructionIDs]; 173480093f4SDimitry Andric case MODRM_SPLITRM: 174480093f4SDimitry Andric if (modFromModRM(modRM) == 0x3) 175480093f4SDimitry Andric return modRMTable[dec->instructionIDs + 1]; 176480093f4SDimitry Andric return modRMTable[dec->instructionIDs]; 177480093f4SDimitry Andric case MODRM_SPLITREG: 178480093f4SDimitry Andric if (modFromModRM(modRM) == 0x3) 179480093f4SDimitry Andric return modRMTable[dec->instructionIDs + ((modRM & 0x38) >> 3) + 8]; 180480093f4SDimitry Andric return modRMTable[dec->instructionIDs + ((modRM & 0x38) >> 3)]; 181480093f4SDimitry Andric case MODRM_SPLITMISC: 182480093f4SDimitry Andric if (modFromModRM(modRM) == 0x3) 183480093f4SDimitry Andric return modRMTable[dec->instructionIDs + (modRM & 0x3f) + 8]; 184480093f4SDimitry Andric return modRMTable[dec->instructionIDs + ((modRM & 0x38) >> 3)]; 185480093f4SDimitry Andric case MODRM_FULL: 186480093f4SDimitry Andric return modRMTable[dec->instructionIDs + modRM]; 187480093f4SDimitry Andric } 1880b57cec5SDimitry Andric } 1890b57cec5SDimitry Andric 190480093f4SDimitry Andric static bool peek(struct InternalInstruction *insn, uint8_t &byte) { 191480093f4SDimitry Andric uint64_t offset = insn->readerCursor - insn->startLocation; 192480093f4SDimitry Andric if (offset >= insn->bytes.size()) 193480093f4SDimitry Andric return true; 194480093f4SDimitry Andric byte = insn->bytes[offset]; 195480093f4SDimitry Andric return false; 196480093f4SDimitry Andric } 197480093f4SDimitry Andric 198480093f4SDimitry Andric template <typename T> static bool consume(InternalInstruction *insn, T &ptr) { 199480093f4SDimitry Andric auto r = insn->bytes; 200480093f4SDimitry Andric uint64_t offset = insn->readerCursor - insn->startLocation; 201480093f4SDimitry Andric if (offset + sizeof(T) > r.size()) 202480093f4SDimitry Andric return true; 2035f757f3fSDimitry Andric ptr = support::endian::read<T>(&r[offset], llvm::endianness::little); 204480093f4SDimitry Andric insn->readerCursor += sizeof(T); 205480093f4SDimitry Andric return false; 206480093f4SDimitry Andric } 207480093f4SDimitry Andric 208480093f4SDimitry Andric static bool isREX(struct InternalInstruction *insn, uint8_t prefix) { 209480093f4SDimitry Andric return insn->mode == MODE_64BIT && prefix >= 0x40 && prefix <= 0x4f; 210480093f4SDimitry Andric } 211480093f4SDimitry Andric 2125f757f3fSDimitry Andric static bool isREX2(struct InternalInstruction *insn, uint8_t prefix) { 2135f757f3fSDimitry Andric return insn->mode == MODE_64BIT && prefix == 0xd5; 2145f757f3fSDimitry Andric } 2155f757f3fSDimitry Andric 216480093f4SDimitry Andric // Consumes all of an instruction's prefix bytes, and marks the 217480093f4SDimitry Andric // instruction as having them. Also sets the instruction's default operand, 218480093f4SDimitry Andric // address, and other relevant data sizes to report operands correctly. 219480093f4SDimitry Andric // 220480093f4SDimitry Andric // insn must not be empty. 221480093f4SDimitry Andric static int readPrefixes(struct InternalInstruction *insn) { 222480093f4SDimitry Andric bool isPrefix = true; 223480093f4SDimitry Andric uint8_t byte = 0; 224480093f4SDimitry Andric uint8_t nextByte; 225480093f4SDimitry Andric 226480093f4SDimitry Andric LLVM_DEBUG(dbgs() << "readPrefixes()"); 227480093f4SDimitry Andric 228480093f4SDimitry Andric while (isPrefix) { 229480093f4SDimitry Andric // If we fail reading prefixes, just stop here and let the opcode reader 230480093f4SDimitry Andric // deal with it. 231480093f4SDimitry Andric if (consume(insn, byte)) 232480093f4SDimitry Andric break; 233480093f4SDimitry Andric 234480093f4SDimitry Andric // If the byte is a LOCK/REP/REPNE prefix and not a part of the opcode, then 235480093f4SDimitry Andric // break and let it be disassembled as a normal "instruction". 236480093f4SDimitry Andric if (insn->readerCursor - 1 == insn->startLocation && byte == 0xf0) // LOCK 237480093f4SDimitry Andric break; 238480093f4SDimitry Andric 239480093f4SDimitry Andric if ((byte == 0xf2 || byte == 0xf3) && !peek(insn, nextByte)) { 240480093f4SDimitry Andric // If the byte is 0xf2 or 0xf3, and any of the following conditions are 241480093f4SDimitry Andric // met: 242480093f4SDimitry Andric // - it is followed by a LOCK (0xf0) prefix 243480093f4SDimitry Andric // - it is followed by an xchg instruction 244480093f4SDimitry Andric // then it should be disassembled as a xacquire/xrelease not repne/rep. 245480093f4SDimitry Andric if (((nextByte == 0xf0) || 246480093f4SDimitry Andric ((nextByte & 0xfe) == 0x86 || (nextByte & 0xf8) == 0x90))) { 247480093f4SDimitry Andric insn->xAcquireRelease = true; 248480093f4SDimitry Andric if (!(byte == 0xf3 && nextByte == 0x90)) // PAUSE instruction support 249480093f4SDimitry Andric break; 250480093f4SDimitry Andric } 251480093f4SDimitry Andric // Also if the byte is 0xf3, and the following condition is met: 252480093f4SDimitry Andric // - it is followed by a "mov mem, reg" (opcode 0x88/0x89) or 253480093f4SDimitry Andric // "mov mem, imm" (opcode 0xc6/0xc7) instructions. 254480093f4SDimitry Andric // then it should be disassembled as an xrelease not rep. 255480093f4SDimitry Andric if (byte == 0xf3 && (nextByte == 0x88 || nextByte == 0x89 || 256480093f4SDimitry Andric nextByte == 0xc6 || nextByte == 0xc7)) { 257480093f4SDimitry Andric insn->xAcquireRelease = true; 258480093f4SDimitry Andric break; 259480093f4SDimitry Andric } 260480093f4SDimitry Andric if (isREX(insn, nextByte)) { 261480093f4SDimitry Andric uint8_t nnextByte; 262480093f4SDimitry Andric // Go to REX prefix after the current one 263480093f4SDimitry Andric if (consume(insn, nnextByte)) 264480093f4SDimitry Andric return -1; 265480093f4SDimitry Andric // We should be able to read next byte after REX prefix 266480093f4SDimitry Andric if (peek(insn, nnextByte)) 267480093f4SDimitry Andric return -1; 268480093f4SDimitry Andric --insn->readerCursor; 269480093f4SDimitry Andric } 270480093f4SDimitry Andric } 271480093f4SDimitry Andric 272480093f4SDimitry Andric switch (byte) { 273480093f4SDimitry Andric case 0xf0: // LOCK 274480093f4SDimitry Andric insn->hasLockPrefix = true; 275480093f4SDimitry Andric break; 276480093f4SDimitry Andric case 0xf2: // REPNE/REPNZ 277480093f4SDimitry Andric case 0xf3: { // REP or REPE/REPZ 278480093f4SDimitry Andric uint8_t nextByte; 279480093f4SDimitry Andric if (peek(insn, nextByte)) 280480093f4SDimitry Andric break; 281480093f4SDimitry Andric // TODO: 282480093f4SDimitry Andric // 1. There could be several 0x66 283480093f4SDimitry Andric // 2. if (nextByte == 0x66) and nextNextByte != 0x0f then 284480093f4SDimitry Andric // it's not mandatory prefix 285480093f4SDimitry Andric // 3. if (nextByte >= 0x40 && nextByte <= 0x4f) it's REX and we need 286480093f4SDimitry Andric // 0x0f exactly after it to be mandatory prefix 287*0fca6ea1SDimitry Andric // 4. if (nextByte == 0xd5) it's REX2 and we need 288*0fca6ea1SDimitry Andric // 0x0f exactly after it to be mandatory prefix 289*0fca6ea1SDimitry Andric if (isREX(insn, nextByte) || isREX2(insn, nextByte) || nextByte == 0x0f || 290*0fca6ea1SDimitry Andric nextByte == 0x66) 291480093f4SDimitry Andric // The last of 0xf2 /0xf3 is mandatory prefix 292480093f4SDimitry Andric insn->mandatoryPrefix = byte; 293480093f4SDimitry Andric insn->repeatPrefix = byte; 294480093f4SDimitry Andric break; 295480093f4SDimitry Andric } 296480093f4SDimitry Andric case 0x2e: // CS segment override -OR- Branch not taken 297480093f4SDimitry Andric insn->segmentOverride = SEG_OVERRIDE_CS; 298480093f4SDimitry Andric break; 299480093f4SDimitry Andric case 0x36: // SS segment override -OR- Branch taken 300480093f4SDimitry Andric insn->segmentOverride = SEG_OVERRIDE_SS; 301480093f4SDimitry Andric break; 302480093f4SDimitry Andric case 0x3e: // DS segment override 303480093f4SDimitry Andric insn->segmentOverride = SEG_OVERRIDE_DS; 304480093f4SDimitry Andric break; 305480093f4SDimitry Andric case 0x26: // ES segment override 306480093f4SDimitry Andric insn->segmentOverride = SEG_OVERRIDE_ES; 307480093f4SDimitry Andric break; 308480093f4SDimitry Andric case 0x64: // FS segment override 309480093f4SDimitry Andric insn->segmentOverride = SEG_OVERRIDE_FS; 310480093f4SDimitry Andric break; 311480093f4SDimitry Andric case 0x65: // GS segment override 312480093f4SDimitry Andric insn->segmentOverride = SEG_OVERRIDE_GS; 313480093f4SDimitry Andric break; 314480093f4SDimitry Andric case 0x66: { // Operand-size override { 315480093f4SDimitry Andric uint8_t nextByte; 316480093f4SDimitry Andric insn->hasOpSize = true; 317480093f4SDimitry Andric if (peek(insn, nextByte)) 318480093f4SDimitry Andric break; 319480093f4SDimitry Andric // 0x66 can't overwrite existing mandatory prefix and should be ignored 320480093f4SDimitry Andric if (!insn->mandatoryPrefix && (nextByte == 0x0f || isREX(insn, nextByte))) 321480093f4SDimitry Andric insn->mandatoryPrefix = byte; 322480093f4SDimitry Andric break; 323480093f4SDimitry Andric } 324480093f4SDimitry Andric case 0x67: // Address-size override 325480093f4SDimitry Andric insn->hasAdSize = true; 326480093f4SDimitry Andric break; 327480093f4SDimitry Andric default: // Not a prefix byte 328480093f4SDimitry Andric isPrefix = false; 329480093f4SDimitry Andric break; 330480093f4SDimitry Andric } 331480093f4SDimitry Andric 332480093f4SDimitry Andric if (isPrefix) 333480093f4SDimitry Andric LLVM_DEBUG(dbgs() << format("Found prefix 0x%hhx", byte)); 334480093f4SDimitry Andric } 335480093f4SDimitry Andric 336480093f4SDimitry Andric insn->vectorExtensionType = TYPE_NO_VEX_XOP; 337480093f4SDimitry Andric 338480093f4SDimitry Andric if (byte == 0x62) { 339480093f4SDimitry Andric uint8_t byte1, byte2; 340480093f4SDimitry Andric if (consume(insn, byte1)) { 341480093f4SDimitry Andric LLVM_DEBUG(dbgs() << "Couldn't read second byte of EVEX prefix"); 342480093f4SDimitry Andric return -1; 343480093f4SDimitry Andric } 344480093f4SDimitry Andric 345480093f4SDimitry Andric if (peek(insn, byte2)) { 346480093f4SDimitry Andric LLVM_DEBUG(dbgs() << "Couldn't read third byte of EVEX prefix"); 347480093f4SDimitry Andric return -1; 348480093f4SDimitry Andric } 349480093f4SDimitry Andric 3505f757f3fSDimitry Andric if ((insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0)) { 351480093f4SDimitry Andric insn->vectorExtensionType = TYPE_EVEX; 352480093f4SDimitry Andric } else { 353480093f4SDimitry Andric --insn->readerCursor; // unconsume byte1 354480093f4SDimitry Andric --insn->readerCursor; // unconsume byte 355480093f4SDimitry Andric } 356480093f4SDimitry Andric 357480093f4SDimitry Andric if (insn->vectorExtensionType == TYPE_EVEX) { 358480093f4SDimitry Andric insn->vectorExtensionPrefix[0] = byte; 359480093f4SDimitry Andric insn->vectorExtensionPrefix[1] = byte1; 360480093f4SDimitry Andric if (consume(insn, insn->vectorExtensionPrefix[2])) { 361480093f4SDimitry Andric LLVM_DEBUG(dbgs() << "Couldn't read third byte of EVEX prefix"); 362480093f4SDimitry Andric return -1; 363480093f4SDimitry Andric } 364480093f4SDimitry Andric if (consume(insn, insn->vectorExtensionPrefix[3])) { 365480093f4SDimitry Andric LLVM_DEBUG(dbgs() << "Couldn't read fourth byte of EVEX prefix"); 366480093f4SDimitry Andric return -1; 367480093f4SDimitry Andric } 368480093f4SDimitry Andric 369480093f4SDimitry Andric if (insn->mode == MODE_64BIT) { 3705f757f3fSDimitry Andric // We simulate the REX prefix for simplicity's sake 371480093f4SDimitry Andric insn->rexPrefix = 0x40 | 372480093f4SDimitry Andric (wFromEVEX3of4(insn->vectorExtensionPrefix[2]) << 3) | 373480093f4SDimitry Andric (rFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 2) | 374480093f4SDimitry Andric (xFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 1) | 375480093f4SDimitry Andric (bFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 0); 3765f757f3fSDimitry Andric 3775f757f3fSDimitry Andric // We simulate the REX2 prefix for simplicity's sake 3785f757f3fSDimitry Andric insn->rex2ExtensionPrefix[1] = 3795f757f3fSDimitry Andric (r2FromEVEX2of4(insn->vectorExtensionPrefix[1]) << 6) | 3805f757f3fSDimitry Andric (x2FromEVEX3of4(insn->vectorExtensionPrefix[2]) << 5) | 3815f757f3fSDimitry Andric (b2FromEVEX2of4(insn->vectorExtensionPrefix[1]) << 4); 382480093f4SDimitry Andric } 383480093f4SDimitry Andric 384480093f4SDimitry Andric LLVM_DEBUG( 385480093f4SDimitry Andric dbgs() << format( 386480093f4SDimitry Andric "Found EVEX prefix 0x%hhx 0x%hhx 0x%hhx 0x%hhx", 387480093f4SDimitry Andric insn->vectorExtensionPrefix[0], insn->vectorExtensionPrefix[1], 388480093f4SDimitry Andric insn->vectorExtensionPrefix[2], insn->vectorExtensionPrefix[3])); 389480093f4SDimitry Andric } 390480093f4SDimitry Andric } else if (byte == 0xc4) { 391480093f4SDimitry Andric uint8_t byte1; 392480093f4SDimitry Andric if (peek(insn, byte1)) { 393480093f4SDimitry Andric LLVM_DEBUG(dbgs() << "Couldn't read second byte of VEX"); 394480093f4SDimitry Andric return -1; 395480093f4SDimitry Andric } 396480093f4SDimitry Andric 397480093f4SDimitry Andric if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) 398480093f4SDimitry Andric insn->vectorExtensionType = TYPE_VEX_3B; 399480093f4SDimitry Andric else 400480093f4SDimitry Andric --insn->readerCursor; 401480093f4SDimitry Andric 402480093f4SDimitry Andric if (insn->vectorExtensionType == TYPE_VEX_3B) { 403480093f4SDimitry Andric insn->vectorExtensionPrefix[0] = byte; 404480093f4SDimitry Andric consume(insn, insn->vectorExtensionPrefix[1]); 405480093f4SDimitry Andric consume(insn, insn->vectorExtensionPrefix[2]); 406480093f4SDimitry Andric 407480093f4SDimitry Andric // We simulate the REX prefix for simplicity's sake 408480093f4SDimitry Andric 409480093f4SDimitry Andric if (insn->mode == MODE_64BIT) 410480093f4SDimitry Andric insn->rexPrefix = 0x40 | 411480093f4SDimitry Andric (wFromVEX3of3(insn->vectorExtensionPrefix[2]) << 3) | 412480093f4SDimitry Andric (rFromVEX2of3(insn->vectorExtensionPrefix[1]) << 2) | 413480093f4SDimitry Andric (xFromVEX2of3(insn->vectorExtensionPrefix[1]) << 1) | 414480093f4SDimitry Andric (bFromVEX2of3(insn->vectorExtensionPrefix[1]) << 0); 415480093f4SDimitry Andric 416480093f4SDimitry Andric LLVM_DEBUG(dbgs() << format("Found VEX prefix 0x%hhx 0x%hhx 0x%hhx", 417480093f4SDimitry Andric insn->vectorExtensionPrefix[0], 418480093f4SDimitry Andric insn->vectorExtensionPrefix[1], 419480093f4SDimitry Andric insn->vectorExtensionPrefix[2])); 420480093f4SDimitry Andric } 421480093f4SDimitry Andric } else if (byte == 0xc5) { 422480093f4SDimitry Andric uint8_t byte1; 423480093f4SDimitry Andric if (peek(insn, byte1)) { 424480093f4SDimitry Andric LLVM_DEBUG(dbgs() << "Couldn't read second byte of VEX"); 425480093f4SDimitry Andric return -1; 426480093f4SDimitry Andric } 427480093f4SDimitry Andric 428480093f4SDimitry Andric if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) 429480093f4SDimitry Andric insn->vectorExtensionType = TYPE_VEX_2B; 430480093f4SDimitry Andric else 431480093f4SDimitry Andric --insn->readerCursor; 432480093f4SDimitry Andric 433480093f4SDimitry Andric if (insn->vectorExtensionType == TYPE_VEX_2B) { 434480093f4SDimitry Andric insn->vectorExtensionPrefix[0] = byte; 435480093f4SDimitry Andric consume(insn, insn->vectorExtensionPrefix[1]); 436480093f4SDimitry Andric 437480093f4SDimitry Andric if (insn->mode == MODE_64BIT) 438480093f4SDimitry Andric insn->rexPrefix = 439480093f4SDimitry Andric 0x40 | (rFromVEX2of2(insn->vectorExtensionPrefix[1]) << 2); 440480093f4SDimitry Andric 441480093f4SDimitry Andric switch (ppFromVEX2of2(insn->vectorExtensionPrefix[1])) { 442480093f4SDimitry Andric default: 443480093f4SDimitry Andric break; 444480093f4SDimitry Andric case VEX_PREFIX_66: 445480093f4SDimitry Andric insn->hasOpSize = true; 446480093f4SDimitry Andric break; 447480093f4SDimitry Andric } 448480093f4SDimitry Andric 449480093f4SDimitry Andric LLVM_DEBUG(dbgs() << format("Found VEX prefix 0x%hhx 0x%hhx", 450480093f4SDimitry Andric insn->vectorExtensionPrefix[0], 451480093f4SDimitry Andric insn->vectorExtensionPrefix[1])); 452480093f4SDimitry Andric } 453480093f4SDimitry Andric } else if (byte == 0x8f) { 454480093f4SDimitry Andric uint8_t byte1; 455480093f4SDimitry Andric if (peek(insn, byte1)) { 456480093f4SDimitry Andric LLVM_DEBUG(dbgs() << "Couldn't read second byte of XOP"); 457480093f4SDimitry Andric return -1; 458480093f4SDimitry Andric } 459480093f4SDimitry Andric 460480093f4SDimitry Andric if ((byte1 & 0x38) != 0x0) // 0 in these 3 bits is a POP instruction. 461480093f4SDimitry Andric insn->vectorExtensionType = TYPE_XOP; 462480093f4SDimitry Andric else 463480093f4SDimitry Andric --insn->readerCursor; 464480093f4SDimitry Andric 465480093f4SDimitry Andric if (insn->vectorExtensionType == TYPE_XOP) { 466480093f4SDimitry Andric insn->vectorExtensionPrefix[0] = byte; 467480093f4SDimitry Andric consume(insn, insn->vectorExtensionPrefix[1]); 468480093f4SDimitry Andric consume(insn, insn->vectorExtensionPrefix[2]); 469480093f4SDimitry Andric 470480093f4SDimitry Andric // We simulate the REX prefix for simplicity's sake 471480093f4SDimitry Andric 472480093f4SDimitry Andric if (insn->mode == MODE_64BIT) 473480093f4SDimitry Andric insn->rexPrefix = 0x40 | 474480093f4SDimitry Andric (wFromXOP3of3(insn->vectorExtensionPrefix[2]) << 3) | 475480093f4SDimitry Andric (rFromXOP2of3(insn->vectorExtensionPrefix[1]) << 2) | 476480093f4SDimitry Andric (xFromXOP2of3(insn->vectorExtensionPrefix[1]) << 1) | 477480093f4SDimitry Andric (bFromXOP2of3(insn->vectorExtensionPrefix[1]) << 0); 478480093f4SDimitry Andric 479480093f4SDimitry Andric switch (ppFromXOP3of3(insn->vectorExtensionPrefix[2])) { 480480093f4SDimitry Andric default: 481480093f4SDimitry Andric break; 482480093f4SDimitry Andric case VEX_PREFIX_66: 483480093f4SDimitry Andric insn->hasOpSize = true; 484480093f4SDimitry Andric break; 485480093f4SDimitry Andric } 486480093f4SDimitry Andric 487480093f4SDimitry Andric LLVM_DEBUG(dbgs() << format("Found XOP prefix 0x%hhx 0x%hhx 0x%hhx", 488480093f4SDimitry Andric insn->vectorExtensionPrefix[0], 489480093f4SDimitry Andric insn->vectorExtensionPrefix[1], 490480093f4SDimitry Andric insn->vectorExtensionPrefix[2])); 491480093f4SDimitry Andric } 4925f757f3fSDimitry Andric } else if (isREX2(insn, byte)) { 4935f757f3fSDimitry Andric uint8_t byte1; 4945f757f3fSDimitry Andric if (peek(insn, byte1)) { 4955f757f3fSDimitry Andric LLVM_DEBUG(dbgs() << "Couldn't read second byte of REX2"); 4965f757f3fSDimitry Andric return -1; 4975f757f3fSDimitry Andric } 4985f757f3fSDimitry Andric insn->rex2ExtensionPrefix[0] = byte; 4995f757f3fSDimitry Andric consume(insn, insn->rex2ExtensionPrefix[1]); 5005f757f3fSDimitry Andric 5015f757f3fSDimitry Andric // We simulate the REX prefix for simplicity's sake 5025f757f3fSDimitry Andric insn->rexPrefix = 0x40 | (wFromREX2(insn->rex2ExtensionPrefix[1]) << 3) | 5035f757f3fSDimitry Andric (rFromREX2(insn->rex2ExtensionPrefix[1]) << 2) | 5045f757f3fSDimitry Andric (xFromREX2(insn->rex2ExtensionPrefix[1]) << 1) | 5055f757f3fSDimitry Andric (bFromREX2(insn->rex2ExtensionPrefix[1]) << 0); 5065f757f3fSDimitry Andric LLVM_DEBUG(dbgs() << format("Found REX2 prefix 0x%hhx 0x%hhx", 5075f757f3fSDimitry Andric insn->rex2ExtensionPrefix[0], 5085f757f3fSDimitry Andric insn->rex2ExtensionPrefix[1])); 509480093f4SDimitry Andric } else if (isREX(insn, byte)) { 510480093f4SDimitry Andric if (peek(insn, nextByte)) 511480093f4SDimitry Andric return -1; 512480093f4SDimitry Andric insn->rexPrefix = byte; 513480093f4SDimitry Andric LLVM_DEBUG(dbgs() << format("Found REX prefix 0x%hhx", byte)); 514480093f4SDimitry Andric } else 515480093f4SDimitry Andric --insn->readerCursor; 516480093f4SDimitry Andric 517480093f4SDimitry Andric if (insn->mode == MODE_16BIT) { 518480093f4SDimitry Andric insn->registerSize = (insn->hasOpSize ? 4 : 2); 519480093f4SDimitry Andric insn->addressSize = (insn->hasAdSize ? 4 : 2); 520480093f4SDimitry Andric insn->displacementSize = (insn->hasAdSize ? 4 : 2); 521480093f4SDimitry Andric insn->immediateSize = (insn->hasOpSize ? 4 : 2); 522480093f4SDimitry Andric } else if (insn->mode == MODE_32BIT) { 523480093f4SDimitry Andric insn->registerSize = (insn->hasOpSize ? 2 : 4); 524480093f4SDimitry Andric insn->addressSize = (insn->hasAdSize ? 2 : 4); 525480093f4SDimitry Andric insn->displacementSize = (insn->hasAdSize ? 2 : 4); 526480093f4SDimitry Andric insn->immediateSize = (insn->hasOpSize ? 2 : 4); 527480093f4SDimitry Andric } else if (insn->mode == MODE_64BIT) { 52881ad6265SDimitry Andric insn->displacementSize = 4; 529480093f4SDimitry Andric if (insn->rexPrefix && wFromREX(insn->rexPrefix)) { 530480093f4SDimitry Andric insn->registerSize = 8; 531480093f4SDimitry Andric insn->addressSize = (insn->hasAdSize ? 4 : 8); 532480093f4SDimitry Andric insn->immediateSize = 4; 533d409305fSDimitry Andric insn->hasOpSize = false; 534480093f4SDimitry Andric } else { 535480093f4SDimitry Andric insn->registerSize = (insn->hasOpSize ? 2 : 4); 536480093f4SDimitry Andric insn->addressSize = (insn->hasAdSize ? 4 : 8); 537480093f4SDimitry Andric insn->immediateSize = (insn->hasOpSize ? 2 : 4); 538480093f4SDimitry Andric } 539480093f4SDimitry Andric } 540480093f4SDimitry Andric 541480093f4SDimitry Andric return 0; 542480093f4SDimitry Andric } 543480093f4SDimitry Andric 544480093f4SDimitry Andric // Consumes the SIB byte to determine addressing information. 545480093f4SDimitry Andric static int readSIB(struct InternalInstruction *insn) { 546480093f4SDimitry Andric SIBBase sibBaseBase = SIB_BASE_NONE; 547480093f4SDimitry Andric uint8_t index, base; 548480093f4SDimitry Andric 549480093f4SDimitry Andric LLVM_DEBUG(dbgs() << "readSIB()"); 550480093f4SDimitry Andric switch (insn->addressSize) { 551480093f4SDimitry Andric case 2: 552480093f4SDimitry Andric default: 553480093f4SDimitry Andric llvm_unreachable("SIB-based addressing doesn't work in 16-bit mode"); 554480093f4SDimitry Andric case 4: 555480093f4SDimitry Andric insn->sibIndexBase = SIB_INDEX_EAX; 556480093f4SDimitry Andric sibBaseBase = SIB_BASE_EAX; 557480093f4SDimitry Andric break; 558480093f4SDimitry Andric case 8: 559480093f4SDimitry Andric insn->sibIndexBase = SIB_INDEX_RAX; 560480093f4SDimitry Andric sibBaseBase = SIB_BASE_RAX; 561480093f4SDimitry Andric break; 562480093f4SDimitry Andric } 563480093f4SDimitry Andric 564480093f4SDimitry Andric if (consume(insn, insn->sib)) 565480093f4SDimitry Andric return -1; 566480093f4SDimitry Andric 5675f757f3fSDimitry Andric index = indexFromSIB(insn->sib) | (xFromREX(insn->rexPrefix) << 3) | 5685f757f3fSDimitry Andric (x2FromREX2(insn->rex2ExtensionPrefix[1]) << 4); 569480093f4SDimitry Andric 570480093f4SDimitry Andric if (index == 0x4) { 571480093f4SDimitry Andric insn->sibIndex = SIB_INDEX_NONE; 572480093f4SDimitry Andric } else { 573480093f4SDimitry Andric insn->sibIndex = (SIBIndex)(insn->sibIndexBase + index); 574480093f4SDimitry Andric } 575480093f4SDimitry Andric 576480093f4SDimitry Andric insn->sibScale = 1 << scaleFromSIB(insn->sib); 577480093f4SDimitry Andric 5785f757f3fSDimitry Andric base = baseFromSIB(insn->sib) | (bFromREX(insn->rexPrefix) << 3) | 5795f757f3fSDimitry Andric (b2FromREX2(insn->rex2ExtensionPrefix[1]) << 4); 580480093f4SDimitry Andric 581480093f4SDimitry Andric switch (base) { 582480093f4SDimitry Andric case 0x5: 583480093f4SDimitry Andric case 0xd: 584480093f4SDimitry Andric switch (modFromModRM(insn->modRM)) { 585480093f4SDimitry Andric case 0x0: 586480093f4SDimitry Andric insn->eaDisplacement = EA_DISP_32; 587480093f4SDimitry Andric insn->sibBase = SIB_BASE_NONE; 588480093f4SDimitry Andric break; 589480093f4SDimitry Andric case 0x1: 590480093f4SDimitry Andric insn->eaDisplacement = EA_DISP_8; 591480093f4SDimitry Andric insn->sibBase = (SIBBase)(sibBaseBase + base); 592480093f4SDimitry Andric break; 593480093f4SDimitry Andric case 0x2: 594480093f4SDimitry Andric insn->eaDisplacement = EA_DISP_32; 595480093f4SDimitry Andric insn->sibBase = (SIBBase)(sibBaseBase + base); 596480093f4SDimitry Andric break; 597480093f4SDimitry Andric default: 598480093f4SDimitry Andric llvm_unreachable("Cannot have Mod = 0b11 and a SIB byte"); 599480093f4SDimitry Andric } 600480093f4SDimitry Andric break; 601480093f4SDimitry Andric default: 602480093f4SDimitry Andric insn->sibBase = (SIBBase)(sibBaseBase + base); 603480093f4SDimitry Andric break; 604480093f4SDimitry Andric } 605480093f4SDimitry Andric 606480093f4SDimitry Andric return 0; 607480093f4SDimitry Andric } 608480093f4SDimitry Andric 609480093f4SDimitry Andric static int readDisplacement(struct InternalInstruction *insn) { 610480093f4SDimitry Andric int8_t d8; 611480093f4SDimitry Andric int16_t d16; 612480093f4SDimitry Andric int32_t d32; 613480093f4SDimitry Andric LLVM_DEBUG(dbgs() << "readDisplacement()"); 614480093f4SDimitry Andric 615480093f4SDimitry Andric insn->displacementOffset = insn->readerCursor - insn->startLocation; 616480093f4SDimitry Andric switch (insn->eaDisplacement) { 617480093f4SDimitry Andric case EA_DISP_NONE: 618480093f4SDimitry Andric break; 619480093f4SDimitry Andric case EA_DISP_8: 620480093f4SDimitry Andric if (consume(insn, d8)) 621480093f4SDimitry Andric return -1; 622480093f4SDimitry Andric insn->displacement = d8; 623480093f4SDimitry Andric break; 624480093f4SDimitry Andric case EA_DISP_16: 625480093f4SDimitry Andric if (consume(insn, d16)) 626480093f4SDimitry Andric return -1; 627480093f4SDimitry Andric insn->displacement = d16; 628480093f4SDimitry Andric break; 629480093f4SDimitry Andric case EA_DISP_32: 630480093f4SDimitry Andric if (consume(insn, d32)) 631480093f4SDimitry Andric return -1; 632480093f4SDimitry Andric insn->displacement = d32; 633480093f4SDimitry Andric break; 634480093f4SDimitry Andric } 635480093f4SDimitry Andric 636480093f4SDimitry Andric return 0; 637480093f4SDimitry Andric } 638480093f4SDimitry Andric 639480093f4SDimitry Andric // Consumes all addressing information (ModR/M byte, SIB byte, and displacement. 640480093f4SDimitry Andric static int readModRM(struct InternalInstruction *insn) { 6415f757f3fSDimitry Andric uint8_t mod, rm, reg; 642480093f4SDimitry Andric LLVM_DEBUG(dbgs() << "readModRM()"); 643480093f4SDimitry Andric 644480093f4SDimitry Andric if (insn->consumedModRM) 645480093f4SDimitry Andric return 0; 646480093f4SDimitry Andric 647480093f4SDimitry Andric if (consume(insn, insn->modRM)) 648480093f4SDimitry Andric return -1; 649480093f4SDimitry Andric insn->consumedModRM = true; 650480093f4SDimitry Andric 651480093f4SDimitry Andric mod = modFromModRM(insn->modRM); 652480093f4SDimitry Andric rm = rmFromModRM(insn->modRM); 653480093f4SDimitry Andric reg = regFromModRM(insn->modRM); 654480093f4SDimitry Andric 655480093f4SDimitry Andric // This goes by insn->registerSize to pick the correct register, which messes 656480093f4SDimitry Andric // up if we're using (say) XMM or 8-bit register operands. That gets fixed in 657480093f4SDimitry Andric // fixupReg(). 658480093f4SDimitry Andric switch (insn->registerSize) { 659480093f4SDimitry Andric case 2: 660480093f4SDimitry Andric insn->regBase = MODRM_REG_AX; 661480093f4SDimitry Andric insn->eaRegBase = EA_REG_AX; 662480093f4SDimitry Andric break; 663480093f4SDimitry Andric case 4: 664480093f4SDimitry Andric insn->regBase = MODRM_REG_EAX; 665480093f4SDimitry Andric insn->eaRegBase = EA_REG_EAX; 666480093f4SDimitry Andric break; 667480093f4SDimitry Andric case 8: 668480093f4SDimitry Andric insn->regBase = MODRM_REG_RAX; 669480093f4SDimitry Andric insn->eaRegBase = EA_REG_RAX; 670480093f4SDimitry Andric break; 671480093f4SDimitry Andric } 672480093f4SDimitry Andric 6735f757f3fSDimitry Andric reg |= (rFromREX(insn->rexPrefix) << 3) | 6745f757f3fSDimitry Andric (r2FromREX2(insn->rex2ExtensionPrefix[1]) << 4); 6755f757f3fSDimitry Andric rm |= (bFromREX(insn->rexPrefix) << 3) | 6765f757f3fSDimitry Andric (b2FromREX2(insn->rex2ExtensionPrefix[1]) << 4); 677480093f4SDimitry Andric 6785f757f3fSDimitry Andric if (insn->vectorExtensionType == TYPE_EVEX && insn->mode == MODE_64BIT) 679480093f4SDimitry Andric reg |= r2FromEVEX2of4(insn->vectorExtensionPrefix[1]) << 4; 680480093f4SDimitry Andric 681480093f4SDimitry Andric insn->reg = (Reg)(insn->regBase + reg); 682480093f4SDimitry Andric 683480093f4SDimitry Andric switch (insn->addressSize) { 684480093f4SDimitry Andric case 2: { 685480093f4SDimitry Andric EABase eaBaseBase = EA_BASE_BX_SI; 686480093f4SDimitry Andric 687480093f4SDimitry Andric switch (mod) { 688480093f4SDimitry Andric case 0x0: 689480093f4SDimitry Andric if (rm == 0x6) { 690480093f4SDimitry Andric insn->eaBase = EA_BASE_NONE; 691480093f4SDimitry Andric insn->eaDisplacement = EA_DISP_16; 692480093f4SDimitry Andric if (readDisplacement(insn)) 693480093f4SDimitry Andric return -1; 694480093f4SDimitry Andric } else { 695480093f4SDimitry Andric insn->eaBase = (EABase)(eaBaseBase + rm); 696480093f4SDimitry Andric insn->eaDisplacement = EA_DISP_NONE; 697480093f4SDimitry Andric } 698480093f4SDimitry Andric break; 699480093f4SDimitry Andric case 0x1: 700480093f4SDimitry Andric insn->eaBase = (EABase)(eaBaseBase + rm); 701480093f4SDimitry Andric insn->eaDisplacement = EA_DISP_8; 702480093f4SDimitry Andric insn->displacementSize = 1; 703480093f4SDimitry Andric if (readDisplacement(insn)) 704480093f4SDimitry Andric return -1; 705480093f4SDimitry Andric break; 706480093f4SDimitry Andric case 0x2: 707480093f4SDimitry Andric insn->eaBase = (EABase)(eaBaseBase + rm); 708480093f4SDimitry Andric insn->eaDisplacement = EA_DISP_16; 709480093f4SDimitry Andric if (readDisplacement(insn)) 710480093f4SDimitry Andric return -1; 711480093f4SDimitry Andric break; 712480093f4SDimitry Andric case 0x3: 713480093f4SDimitry Andric insn->eaBase = (EABase)(insn->eaRegBase + rm); 714480093f4SDimitry Andric if (readDisplacement(insn)) 715480093f4SDimitry Andric return -1; 716480093f4SDimitry Andric break; 717480093f4SDimitry Andric } 718480093f4SDimitry Andric break; 719480093f4SDimitry Andric } 720480093f4SDimitry Andric case 4: 721480093f4SDimitry Andric case 8: { 722480093f4SDimitry Andric EABase eaBaseBase = (insn->addressSize == 4 ? EA_BASE_EAX : EA_BASE_RAX); 723480093f4SDimitry Andric 724480093f4SDimitry Andric switch (mod) { 725480093f4SDimitry Andric case 0x0: 726480093f4SDimitry Andric insn->eaDisplacement = EA_DISP_NONE; // readSIB may override this 727480093f4SDimitry Andric // In determining whether RIP-relative mode is used (rm=5), 728480093f4SDimitry Andric // or whether a SIB byte is present (rm=4), 729480093f4SDimitry Andric // the extension bits (REX.b and EVEX.x) are ignored. 730480093f4SDimitry Andric switch (rm & 7) { 731480093f4SDimitry Andric case 0x4: // SIB byte is present 732480093f4SDimitry Andric insn->eaBase = (insn->addressSize == 4 ? EA_BASE_sib : EA_BASE_sib64); 733480093f4SDimitry Andric if (readSIB(insn) || readDisplacement(insn)) 734480093f4SDimitry Andric return -1; 735480093f4SDimitry Andric break; 736480093f4SDimitry Andric case 0x5: // RIP-relative 737480093f4SDimitry Andric insn->eaBase = EA_BASE_NONE; 738480093f4SDimitry Andric insn->eaDisplacement = EA_DISP_32; 739480093f4SDimitry Andric if (readDisplacement(insn)) 740480093f4SDimitry Andric return -1; 741480093f4SDimitry Andric break; 742480093f4SDimitry Andric default: 743480093f4SDimitry Andric insn->eaBase = (EABase)(eaBaseBase + rm); 744480093f4SDimitry Andric break; 745480093f4SDimitry Andric } 746480093f4SDimitry Andric break; 747480093f4SDimitry Andric case 0x1: 748480093f4SDimitry Andric insn->displacementSize = 1; 749bdd1243dSDimitry Andric [[fallthrough]]; 750480093f4SDimitry Andric case 0x2: 751480093f4SDimitry Andric insn->eaDisplacement = (mod == 0x1 ? EA_DISP_8 : EA_DISP_32); 752480093f4SDimitry Andric switch (rm & 7) { 753480093f4SDimitry Andric case 0x4: // SIB byte is present 754480093f4SDimitry Andric insn->eaBase = EA_BASE_sib; 755480093f4SDimitry Andric if (readSIB(insn) || readDisplacement(insn)) 756480093f4SDimitry Andric return -1; 757480093f4SDimitry Andric break; 758480093f4SDimitry Andric default: 759480093f4SDimitry Andric insn->eaBase = (EABase)(eaBaseBase + rm); 760480093f4SDimitry Andric if (readDisplacement(insn)) 761480093f4SDimitry Andric return -1; 762480093f4SDimitry Andric break; 763480093f4SDimitry Andric } 764480093f4SDimitry Andric break; 765480093f4SDimitry Andric case 0x3: 766480093f4SDimitry Andric insn->eaDisplacement = EA_DISP_NONE; 7675f757f3fSDimitry Andric insn->eaBase = (EABase)(insn->eaRegBase + rm); 768480093f4SDimitry Andric break; 769480093f4SDimitry Andric } 770480093f4SDimitry Andric break; 771480093f4SDimitry Andric } 772480093f4SDimitry Andric } // switch (insn->addressSize) 773480093f4SDimitry Andric 774480093f4SDimitry Andric return 0; 775480093f4SDimitry Andric } 776480093f4SDimitry Andric 7775f757f3fSDimitry Andric #define GENERIC_FIXUP_FUNC(name, base, prefix) \ 778480093f4SDimitry Andric static uint16_t name(struct InternalInstruction *insn, OperandType type, \ 779480093f4SDimitry Andric uint8_t index, uint8_t *valid) { \ 780480093f4SDimitry Andric *valid = 1; \ 781480093f4SDimitry Andric switch (type) { \ 782480093f4SDimitry Andric default: \ 783480093f4SDimitry Andric debug("Unhandled register type"); \ 784480093f4SDimitry Andric *valid = 0; \ 785480093f4SDimitry Andric return 0; \ 786480093f4SDimitry Andric case TYPE_Rv: \ 787480093f4SDimitry Andric return base + index; \ 788480093f4SDimitry Andric case TYPE_R8: \ 7895f757f3fSDimitry Andric if (insn->rexPrefix && index >= 4 && index <= 7) \ 790480093f4SDimitry Andric return prefix##_SPL + (index - 4); \ 7915f757f3fSDimitry Andric else \ 792480093f4SDimitry Andric return prefix##_AL + index; \ 793480093f4SDimitry Andric case TYPE_R16: \ 794480093f4SDimitry Andric return prefix##_AX + index; \ 795480093f4SDimitry Andric case TYPE_R32: \ 796480093f4SDimitry Andric return prefix##_EAX + index; \ 797480093f4SDimitry Andric case TYPE_R64: \ 798480093f4SDimitry Andric return prefix##_RAX + index; \ 799480093f4SDimitry Andric case TYPE_ZMM: \ 800480093f4SDimitry Andric return prefix##_ZMM0 + index; \ 801480093f4SDimitry Andric case TYPE_YMM: \ 802480093f4SDimitry Andric return prefix##_YMM0 + index; \ 803480093f4SDimitry Andric case TYPE_XMM: \ 804480093f4SDimitry Andric return prefix##_XMM0 + index; \ 8055ffd83dbSDimitry Andric case TYPE_TMM: \ 8065ffd83dbSDimitry Andric if (index > 7) \ 8075ffd83dbSDimitry Andric *valid = 0; \ 8085ffd83dbSDimitry Andric return prefix##_TMM0 + index; \ 809480093f4SDimitry Andric case TYPE_VK: \ 810480093f4SDimitry Andric index &= 0xf; \ 811480093f4SDimitry Andric if (index > 7) \ 812480093f4SDimitry Andric *valid = 0; \ 813480093f4SDimitry Andric return prefix##_K0 + index; \ 814480093f4SDimitry Andric case TYPE_VK_PAIR: \ 815480093f4SDimitry Andric if (index > 7) \ 816480093f4SDimitry Andric *valid = 0; \ 817480093f4SDimitry Andric return prefix##_K0_K1 + (index / 2); \ 818480093f4SDimitry Andric case TYPE_MM64: \ 819480093f4SDimitry Andric return prefix##_MM0 + (index & 0x7); \ 820480093f4SDimitry Andric case TYPE_SEGMENTREG: \ 821480093f4SDimitry Andric if ((index & 7) > 5) \ 822480093f4SDimitry Andric *valid = 0; \ 823480093f4SDimitry Andric return prefix##_ES + (index & 7); \ 824480093f4SDimitry Andric case TYPE_DEBUGREG: \ 825*0fca6ea1SDimitry Andric if (index > 15) \ 826*0fca6ea1SDimitry Andric *valid = 0; \ 827480093f4SDimitry Andric return prefix##_DR0 + index; \ 828480093f4SDimitry Andric case TYPE_CONTROLREG: \ 829*0fca6ea1SDimitry Andric if (index > 15) \ 830*0fca6ea1SDimitry Andric *valid = 0; \ 831480093f4SDimitry Andric return prefix##_CR0 + index; \ 832480093f4SDimitry Andric case TYPE_MVSIBX: \ 833480093f4SDimitry Andric return prefix##_XMM0 + index; \ 834480093f4SDimitry Andric case TYPE_MVSIBY: \ 835480093f4SDimitry Andric return prefix##_YMM0 + index; \ 836480093f4SDimitry Andric case TYPE_MVSIBZ: \ 837480093f4SDimitry Andric return prefix##_ZMM0 + index; \ 838480093f4SDimitry Andric } \ 839480093f4SDimitry Andric } 840480093f4SDimitry Andric 841480093f4SDimitry Andric // Consult an operand type to determine the meaning of the reg or R/M field. If 842480093f4SDimitry Andric // the operand is an XMM operand, for example, an operand would be XMM0 instead 843480093f4SDimitry Andric // of AX, which readModRM() would otherwise misinterpret it as. 844480093f4SDimitry Andric // 845480093f4SDimitry Andric // @param insn - The instruction containing the operand. 846480093f4SDimitry Andric // @param type - The operand type. 847480093f4SDimitry Andric // @param index - The existing value of the field as reported by readModRM(). 848480093f4SDimitry Andric // @param valid - The address of a uint8_t. The target is set to 1 if the 849480093f4SDimitry Andric // field is valid for the register class; 0 if not. 850480093f4SDimitry Andric // @return - The proper value. 8515f757f3fSDimitry Andric GENERIC_FIXUP_FUNC(fixupRegValue, insn->regBase, MODRM_REG) 8525f757f3fSDimitry Andric GENERIC_FIXUP_FUNC(fixupRMValue, insn->eaRegBase, EA_REG) 853480093f4SDimitry Andric 854480093f4SDimitry Andric // Consult an operand specifier to determine which of the fixup*Value functions 855480093f4SDimitry Andric // to use in correcting readModRM()'ss interpretation. 856480093f4SDimitry Andric // 857480093f4SDimitry Andric // @param insn - See fixup*Value(). 858480093f4SDimitry Andric // @param op - The operand specifier. 859480093f4SDimitry Andric // @return - 0 if fixup was successful; -1 if the register returned was 860480093f4SDimitry Andric // invalid for its class. 861480093f4SDimitry Andric static int fixupReg(struct InternalInstruction *insn, 862480093f4SDimitry Andric const struct OperandSpecifier *op) { 863480093f4SDimitry Andric uint8_t valid; 864480093f4SDimitry Andric LLVM_DEBUG(dbgs() << "fixupReg()"); 865480093f4SDimitry Andric 866480093f4SDimitry Andric switch ((OperandEncoding)op->encoding) { 867480093f4SDimitry Andric default: 868480093f4SDimitry Andric debug("Expected a REG or R/M encoding in fixupReg"); 869480093f4SDimitry Andric return -1; 870480093f4SDimitry Andric case ENCODING_VVVV: 871480093f4SDimitry Andric insn->vvvv = 872480093f4SDimitry Andric (Reg)fixupRegValue(insn, (OperandType)op->type, insn->vvvv, &valid); 873480093f4SDimitry Andric if (!valid) 874480093f4SDimitry Andric return -1; 875480093f4SDimitry Andric break; 876480093f4SDimitry Andric case ENCODING_REG: 877480093f4SDimitry Andric insn->reg = (Reg)fixupRegValue(insn, (OperandType)op->type, 878480093f4SDimitry Andric insn->reg - insn->regBase, &valid); 879480093f4SDimitry Andric if (!valid) 880480093f4SDimitry Andric return -1; 881480093f4SDimitry Andric break; 882480093f4SDimitry Andric CASE_ENCODING_RM: 8835f757f3fSDimitry Andric if (insn->vectorExtensionType == TYPE_EVEX && insn->mode == MODE_64BIT && 8845f757f3fSDimitry Andric modFromModRM(insn->modRM) == 3) { 8855f757f3fSDimitry Andric // EVEX_X can extend the register id to 32 for a non-GPR register that is 8865f757f3fSDimitry Andric // encoded in RM. 8875f757f3fSDimitry Andric // mode : MODE_64_BIT 8885f757f3fSDimitry Andric // Only 8 vector registers are available in 32 bit mode 8895f757f3fSDimitry Andric // mod : 3 8905f757f3fSDimitry Andric // RM encodes a register 8915f757f3fSDimitry Andric switch (op->type) { 8925f757f3fSDimitry Andric case TYPE_Rv: 8935f757f3fSDimitry Andric case TYPE_R8: 8945f757f3fSDimitry Andric case TYPE_R16: 8955f757f3fSDimitry Andric case TYPE_R32: 8965f757f3fSDimitry Andric case TYPE_R64: 8975f757f3fSDimitry Andric break; 8985f757f3fSDimitry Andric default: 8995f757f3fSDimitry Andric insn->eaBase = 9005f757f3fSDimitry Andric (EABase)(insn->eaBase + 9015f757f3fSDimitry Andric (xFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 4)); 9025f757f3fSDimitry Andric break; 9035f757f3fSDimitry Andric } 9045f757f3fSDimitry Andric } 9055f757f3fSDimitry Andric [[fallthrough]]; 9065f757f3fSDimitry Andric case ENCODING_SIB: 907480093f4SDimitry Andric if (insn->eaBase >= insn->eaRegBase) { 908480093f4SDimitry Andric insn->eaBase = (EABase)fixupRMValue( 909480093f4SDimitry Andric insn, (OperandType)op->type, insn->eaBase - insn->eaRegBase, &valid); 910480093f4SDimitry Andric if (!valid) 911480093f4SDimitry Andric return -1; 912480093f4SDimitry Andric } 913480093f4SDimitry Andric break; 914480093f4SDimitry Andric } 915480093f4SDimitry Andric 916480093f4SDimitry Andric return 0; 917480093f4SDimitry Andric } 918480093f4SDimitry Andric 919480093f4SDimitry Andric // Read the opcode (except the ModR/M byte in the case of extended or escape 920480093f4SDimitry Andric // opcodes). 921480093f4SDimitry Andric static bool readOpcode(struct InternalInstruction *insn) { 922480093f4SDimitry Andric uint8_t current; 923480093f4SDimitry Andric LLVM_DEBUG(dbgs() << "readOpcode()"); 924480093f4SDimitry Andric 925480093f4SDimitry Andric insn->opcodeType = ONEBYTE; 926480093f4SDimitry Andric if (insn->vectorExtensionType == TYPE_EVEX) { 927349cc55cSDimitry Andric switch (mmmFromEVEX2of4(insn->vectorExtensionPrefix[1])) { 928480093f4SDimitry Andric default: 929480093f4SDimitry Andric LLVM_DEBUG( 930349cc55cSDimitry Andric dbgs() << format("Unhandled mmm field for instruction (0x%hhx)", 931349cc55cSDimitry Andric mmmFromEVEX2of4(insn->vectorExtensionPrefix[1]))); 932480093f4SDimitry Andric return true; 933480093f4SDimitry Andric case VEX_LOB_0F: 934480093f4SDimitry Andric insn->opcodeType = TWOBYTE; 935480093f4SDimitry Andric return consume(insn, insn->opcode); 936480093f4SDimitry Andric case VEX_LOB_0F38: 937480093f4SDimitry Andric insn->opcodeType = THREEBYTE_38; 938480093f4SDimitry Andric return consume(insn, insn->opcode); 939480093f4SDimitry Andric case VEX_LOB_0F3A: 940480093f4SDimitry Andric insn->opcodeType = THREEBYTE_3A; 941480093f4SDimitry Andric return consume(insn, insn->opcode); 9425f757f3fSDimitry Andric case VEX_LOB_MAP4: 9435f757f3fSDimitry Andric insn->opcodeType = MAP4; 9445f757f3fSDimitry Andric return consume(insn, insn->opcode); 945349cc55cSDimitry Andric case VEX_LOB_MAP5: 946349cc55cSDimitry Andric insn->opcodeType = MAP5; 947349cc55cSDimitry Andric return consume(insn, insn->opcode); 948349cc55cSDimitry Andric case VEX_LOB_MAP6: 949349cc55cSDimitry Andric insn->opcodeType = MAP6; 950349cc55cSDimitry Andric return consume(insn, insn->opcode); 951*0fca6ea1SDimitry Andric case VEX_LOB_MAP7: 952*0fca6ea1SDimitry Andric insn->opcodeType = MAP7; 953*0fca6ea1SDimitry Andric return consume(insn, insn->opcode); 954480093f4SDimitry Andric } 955480093f4SDimitry Andric } else if (insn->vectorExtensionType == TYPE_VEX_3B) { 956480093f4SDimitry Andric switch (mmmmmFromVEX2of3(insn->vectorExtensionPrefix[1])) { 957480093f4SDimitry Andric default: 958480093f4SDimitry Andric LLVM_DEBUG( 959480093f4SDimitry Andric dbgs() << format("Unhandled m-mmmm field for instruction (0x%hhx)", 960480093f4SDimitry Andric mmmmmFromVEX2of3(insn->vectorExtensionPrefix[1]))); 961480093f4SDimitry Andric return true; 962480093f4SDimitry Andric case VEX_LOB_0F: 963480093f4SDimitry Andric insn->opcodeType = TWOBYTE; 964480093f4SDimitry Andric return consume(insn, insn->opcode); 965480093f4SDimitry Andric case VEX_LOB_0F38: 966480093f4SDimitry Andric insn->opcodeType = THREEBYTE_38; 967480093f4SDimitry Andric return consume(insn, insn->opcode); 968480093f4SDimitry Andric case VEX_LOB_0F3A: 969480093f4SDimitry Andric insn->opcodeType = THREEBYTE_3A; 970480093f4SDimitry Andric return consume(insn, insn->opcode); 971349cc55cSDimitry Andric case VEX_LOB_MAP5: 972349cc55cSDimitry Andric insn->opcodeType = MAP5; 973349cc55cSDimitry Andric return consume(insn, insn->opcode); 974349cc55cSDimitry Andric case VEX_LOB_MAP6: 975349cc55cSDimitry Andric insn->opcodeType = MAP6; 976349cc55cSDimitry Andric return consume(insn, insn->opcode); 9775f757f3fSDimitry Andric case VEX_LOB_MAP7: 9785f757f3fSDimitry Andric insn->opcodeType = MAP7; 9795f757f3fSDimitry Andric return consume(insn, insn->opcode); 980480093f4SDimitry Andric } 981480093f4SDimitry Andric } else if (insn->vectorExtensionType == TYPE_VEX_2B) { 982480093f4SDimitry Andric insn->opcodeType = TWOBYTE; 983480093f4SDimitry Andric return consume(insn, insn->opcode); 984480093f4SDimitry Andric } else if (insn->vectorExtensionType == TYPE_XOP) { 985480093f4SDimitry Andric switch (mmmmmFromXOP2of3(insn->vectorExtensionPrefix[1])) { 986480093f4SDimitry Andric default: 987480093f4SDimitry Andric LLVM_DEBUG( 988480093f4SDimitry Andric dbgs() << format("Unhandled m-mmmm field for instruction (0x%hhx)", 989480093f4SDimitry Andric mmmmmFromVEX2of3(insn->vectorExtensionPrefix[1]))); 990480093f4SDimitry Andric return true; 991480093f4SDimitry Andric case XOP_MAP_SELECT_8: 992480093f4SDimitry Andric insn->opcodeType = XOP8_MAP; 993480093f4SDimitry Andric return consume(insn, insn->opcode); 994480093f4SDimitry Andric case XOP_MAP_SELECT_9: 995480093f4SDimitry Andric insn->opcodeType = XOP9_MAP; 996480093f4SDimitry Andric return consume(insn, insn->opcode); 997480093f4SDimitry Andric case XOP_MAP_SELECT_A: 998480093f4SDimitry Andric insn->opcodeType = XOPA_MAP; 999480093f4SDimitry Andric return consume(insn, insn->opcode); 1000480093f4SDimitry Andric } 10015f757f3fSDimitry Andric } else if (mFromREX2(insn->rex2ExtensionPrefix[1])) { 10025f757f3fSDimitry Andric // m bit indicates opcode map 1 10035f757f3fSDimitry Andric insn->opcodeType = TWOBYTE; 10045f757f3fSDimitry Andric return consume(insn, insn->opcode); 1005480093f4SDimitry Andric } 1006480093f4SDimitry Andric 1007480093f4SDimitry Andric if (consume(insn, current)) 1008480093f4SDimitry Andric return true; 1009480093f4SDimitry Andric 1010480093f4SDimitry Andric if (current == 0x0f) { 1011480093f4SDimitry Andric LLVM_DEBUG( 1012480093f4SDimitry Andric dbgs() << format("Found a two-byte escape prefix (0x%hhx)", current)); 1013480093f4SDimitry Andric if (consume(insn, current)) 1014480093f4SDimitry Andric return true; 1015480093f4SDimitry Andric 1016480093f4SDimitry Andric if (current == 0x38) { 1017480093f4SDimitry Andric LLVM_DEBUG(dbgs() << format("Found a three-byte escape prefix (0x%hhx)", 1018480093f4SDimitry Andric current)); 1019480093f4SDimitry Andric if (consume(insn, current)) 1020480093f4SDimitry Andric return true; 1021480093f4SDimitry Andric 1022480093f4SDimitry Andric insn->opcodeType = THREEBYTE_38; 1023480093f4SDimitry Andric } else if (current == 0x3a) { 1024480093f4SDimitry Andric LLVM_DEBUG(dbgs() << format("Found a three-byte escape prefix (0x%hhx)", 1025480093f4SDimitry Andric current)); 1026480093f4SDimitry Andric if (consume(insn, current)) 1027480093f4SDimitry Andric return true; 1028480093f4SDimitry Andric 1029480093f4SDimitry Andric insn->opcodeType = THREEBYTE_3A; 1030480093f4SDimitry Andric } else if (current == 0x0f) { 1031480093f4SDimitry Andric LLVM_DEBUG( 1032480093f4SDimitry Andric dbgs() << format("Found a 3dnow escape prefix (0x%hhx)", current)); 1033480093f4SDimitry Andric 1034480093f4SDimitry Andric // Consume operands before the opcode to comply with the 3DNow encoding 1035480093f4SDimitry Andric if (readModRM(insn)) 1036480093f4SDimitry Andric return true; 1037480093f4SDimitry Andric 1038480093f4SDimitry Andric if (consume(insn, current)) 1039480093f4SDimitry Andric return true; 1040480093f4SDimitry Andric 1041480093f4SDimitry Andric insn->opcodeType = THREEDNOW_MAP; 1042480093f4SDimitry Andric } else { 1043480093f4SDimitry Andric LLVM_DEBUG(dbgs() << "Didn't find a three-byte escape prefix"); 1044480093f4SDimitry Andric insn->opcodeType = TWOBYTE; 1045480093f4SDimitry Andric } 1046480093f4SDimitry Andric } else if (insn->mandatoryPrefix) 1047480093f4SDimitry Andric // The opcode with mandatory prefix must start with opcode escape. 1048480093f4SDimitry Andric // If not it's legacy repeat prefix 1049480093f4SDimitry Andric insn->mandatoryPrefix = 0; 1050480093f4SDimitry Andric 1051480093f4SDimitry Andric // At this point we have consumed the full opcode. 1052480093f4SDimitry Andric // Anything we consume from here on must be unconsumed. 1053480093f4SDimitry Andric insn->opcode = current; 1054480093f4SDimitry Andric 1055480093f4SDimitry Andric return false; 1056480093f4SDimitry Andric } 1057480093f4SDimitry Andric 1058480093f4SDimitry Andric // Determine whether equiv is the 16-bit equivalent of orig (32-bit or 64-bit). 1059480093f4SDimitry Andric static bool is16BitEquivalent(const char *orig, const char *equiv) { 1060480093f4SDimitry Andric for (int i = 0;; i++) { 1061480093f4SDimitry Andric if (orig[i] == '\0' && equiv[i] == '\0') 1062480093f4SDimitry Andric return true; 1063480093f4SDimitry Andric if (orig[i] == '\0' || equiv[i] == '\0') 1064480093f4SDimitry Andric return false; 1065480093f4SDimitry Andric if (orig[i] != equiv[i]) { 1066480093f4SDimitry Andric if ((orig[i] == 'Q' || orig[i] == 'L') && equiv[i] == 'W') 1067480093f4SDimitry Andric continue; 1068480093f4SDimitry Andric if ((orig[i] == '6' || orig[i] == '3') && equiv[i] == '1') 1069480093f4SDimitry Andric continue; 1070480093f4SDimitry Andric if ((orig[i] == '4' || orig[i] == '2') && equiv[i] == '6') 1071480093f4SDimitry Andric continue; 1072480093f4SDimitry Andric return false; 1073480093f4SDimitry Andric } 1074480093f4SDimitry Andric } 1075480093f4SDimitry Andric } 1076480093f4SDimitry Andric 1077480093f4SDimitry Andric // Determine whether this instruction is a 64-bit instruction. 1078480093f4SDimitry Andric static bool is64Bit(const char *name) { 1079480093f4SDimitry Andric for (int i = 0;; ++i) { 1080480093f4SDimitry Andric if (name[i] == '\0') 1081480093f4SDimitry Andric return false; 1082480093f4SDimitry Andric if (name[i] == '6' && name[i + 1] == '4') 1083480093f4SDimitry Andric return true; 1084480093f4SDimitry Andric } 1085480093f4SDimitry Andric } 1086480093f4SDimitry Andric 1087480093f4SDimitry Andric // Determine the ID of an instruction, consuming the ModR/M byte as appropriate 1088480093f4SDimitry Andric // for extended and escape opcodes, and using a supplied attribute mask. 1089480093f4SDimitry Andric static int getInstructionIDWithAttrMask(uint16_t *instructionID, 1090480093f4SDimitry Andric struct InternalInstruction *insn, 1091480093f4SDimitry Andric uint16_t attrMask) { 1092480093f4SDimitry Andric auto insnCtx = InstructionContext(x86DisassemblerContexts[attrMask]); 1093480093f4SDimitry Andric const ContextDecision *decision; 1094480093f4SDimitry Andric switch (insn->opcodeType) { 1095480093f4SDimitry Andric case ONEBYTE: 1096480093f4SDimitry Andric decision = &ONEBYTE_SYM; 1097480093f4SDimitry Andric break; 1098480093f4SDimitry Andric case TWOBYTE: 1099480093f4SDimitry Andric decision = &TWOBYTE_SYM; 1100480093f4SDimitry Andric break; 1101480093f4SDimitry Andric case THREEBYTE_38: 1102480093f4SDimitry Andric decision = &THREEBYTE38_SYM; 1103480093f4SDimitry Andric break; 1104480093f4SDimitry Andric case THREEBYTE_3A: 1105480093f4SDimitry Andric decision = &THREEBYTE3A_SYM; 1106480093f4SDimitry Andric break; 1107480093f4SDimitry Andric case XOP8_MAP: 1108480093f4SDimitry Andric decision = &XOP8_MAP_SYM; 1109480093f4SDimitry Andric break; 1110480093f4SDimitry Andric case XOP9_MAP: 1111480093f4SDimitry Andric decision = &XOP9_MAP_SYM; 1112480093f4SDimitry Andric break; 1113480093f4SDimitry Andric case XOPA_MAP: 1114480093f4SDimitry Andric decision = &XOPA_MAP_SYM; 1115480093f4SDimitry Andric break; 1116480093f4SDimitry Andric case THREEDNOW_MAP: 1117480093f4SDimitry Andric decision = &THREEDNOW_MAP_SYM; 1118480093f4SDimitry Andric break; 11195f757f3fSDimitry Andric case MAP4: 11205f757f3fSDimitry Andric decision = &MAP4_SYM; 11215f757f3fSDimitry Andric break; 1122349cc55cSDimitry Andric case MAP5: 1123349cc55cSDimitry Andric decision = &MAP5_SYM; 1124349cc55cSDimitry Andric break; 1125349cc55cSDimitry Andric case MAP6: 1126349cc55cSDimitry Andric decision = &MAP6_SYM; 1127349cc55cSDimitry Andric break; 11285f757f3fSDimitry Andric case MAP7: 11295f757f3fSDimitry Andric decision = &MAP7_SYM; 11305f757f3fSDimitry Andric break; 1131480093f4SDimitry Andric } 1132480093f4SDimitry Andric 1133480093f4SDimitry Andric if (decision->opcodeDecisions[insnCtx] 1134480093f4SDimitry Andric .modRMDecisions[insn->opcode] 1135480093f4SDimitry Andric .modrm_type != MODRM_ONEENTRY) { 1136480093f4SDimitry Andric if (readModRM(insn)) 1137480093f4SDimitry Andric return -1; 1138480093f4SDimitry Andric *instructionID = 1139480093f4SDimitry Andric decode(insn->opcodeType, insnCtx, insn->opcode, insn->modRM); 1140480093f4SDimitry Andric } else { 1141480093f4SDimitry Andric *instructionID = decode(insn->opcodeType, insnCtx, insn->opcode, 0); 1142480093f4SDimitry Andric } 1143480093f4SDimitry Andric 1144480093f4SDimitry Andric return 0; 1145480093f4SDimitry Andric } 1146480093f4SDimitry Andric 1147*0fca6ea1SDimitry Andric static bool isCCMPOrCTEST(InternalInstruction *insn) { 1148*0fca6ea1SDimitry Andric if (insn->opcodeType != MAP4) 1149*0fca6ea1SDimitry Andric return false; 1150*0fca6ea1SDimitry Andric if (insn->opcode == 0x83 && regFromModRM(insn->modRM) == 7) 1151*0fca6ea1SDimitry Andric return true; 1152*0fca6ea1SDimitry Andric switch (insn->opcode & 0xfe) { 1153*0fca6ea1SDimitry Andric default: 1154*0fca6ea1SDimitry Andric return false; 1155*0fca6ea1SDimitry Andric case 0x38: 1156*0fca6ea1SDimitry Andric case 0x3a: 1157*0fca6ea1SDimitry Andric case 0x84: 1158*0fca6ea1SDimitry Andric return true; 1159*0fca6ea1SDimitry Andric case 0x80: 1160*0fca6ea1SDimitry Andric return regFromModRM(insn->modRM) == 7; 1161*0fca6ea1SDimitry Andric case 0xf6: 1162*0fca6ea1SDimitry Andric return regFromModRM(insn->modRM) == 0; 1163*0fca6ea1SDimitry Andric } 1164*0fca6ea1SDimitry Andric } 1165*0fca6ea1SDimitry Andric 1166*0fca6ea1SDimitry Andric static bool isNF(InternalInstruction *insn) { 1167*0fca6ea1SDimitry Andric if (!nfFromEVEX4of4(insn->vectorExtensionPrefix[3])) 1168*0fca6ea1SDimitry Andric return false; 1169*0fca6ea1SDimitry Andric if (insn->opcodeType == MAP4) 1170*0fca6ea1SDimitry Andric return true; 1171*0fca6ea1SDimitry Andric // Below NF instructions are not in map4. 1172*0fca6ea1SDimitry Andric if (insn->opcodeType == THREEBYTE_38 && 1173*0fca6ea1SDimitry Andric ppFromEVEX3of4(insn->vectorExtensionPrefix[2]) == VEX_PREFIX_NONE) { 1174*0fca6ea1SDimitry Andric switch (insn->opcode) { 1175*0fca6ea1SDimitry Andric case 0xf2: // ANDN 1176*0fca6ea1SDimitry Andric case 0xf3: // BLSI, BLSR, BLSMSK 1177*0fca6ea1SDimitry Andric case 0xf5: // BZHI 1178*0fca6ea1SDimitry Andric case 0xf7: // BEXTR 1179*0fca6ea1SDimitry Andric return true; 1180*0fca6ea1SDimitry Andric default: 1181*0fca6ea1SDimitry Andric break; 1182*0fca6ea1SDimitry Andric } 1183*0fca6ea1SDimitry Andric } 1184*0fca6ea1SDimitry Andric return false; 1185*0fca6ea1SDimitry Andric } 1186*0fca6ea1SDimitry Andric 1187480093f4SDimitry Andric // Determine the ID of an instruction, consuming the ModR/M byte as appropriate 1188480093f4SDimitry Andric // for extended and escape opcodes. Determines the attributes and context for 1189480093f4SDimitry Andric // the instruction before doing so. 1190480093f4SDimitry Andric static int getInstructionID(struct InternalInstruction *insn, 1191480093f4SDimitry Andric const MCInstrInfo *mii) { 1192480093f4SDimitry Andric uint16_t attrMask; 1193480093f4SDimitry Andric uint16_t instructionID; 1194480093f4SDimitry Andric 1195480093f4SDimitry Andric LLVM_DEBUG(dbgs() << "getID()"); 1196480093f4SDimitry Andric 1197480093f4SDimitry Andric attrMask = ATTR_NONE; 1198480093f4SDimitry Andric 1199480093f4SDimitry Andric if (insn->mode == MODE_64BIT) 1200480093f4SDimitry Andric attrMask |= ATTR_64BIT; 1201480093f4SDimitry Andric 1202480093f4SDimitry Andric if (insn->vectorExtensionType != TYPE_NO_VEX_XOP) { 1203480093f4SDimitry Andric attrMask |= (insn->vectorExtensionType == TYPE_EVEX) ? ATTR_EVEX : ATTR_VEX; 1204480093f4SDimitry Andric 1205480093f4SDimitry Andric if (insn->vectorExtensionType == TYPE_EVEX) { 1206480093f4SDimitry Andric switch (ppFromEVEX3of4(insn->vectorExtensionPrefix[2])) { 1207480093f4SDimitry Andric case VEX_PREFIX_66: 1208480093f4SDimitry Andric attrMask |= ATTR_OPSIZE; 1209480093f4SDimitry Andric break; 1210480093f4SDimitry Andric case VEX_PREFIX_F3: 1211480093f4SDimitry Andric attrMask |= ATTR_XS; 1212480093f4SDimitry Andric break; 1213480093f4SDimitry Andric case VEX_PREFIX_F2: 1214480093f4SDimitry Andric attrMask |= ATTR_XD; 1215480093f4SDimitry Andric break; 1216480093f4SDimitry Andric } 1217480093f4SDimitry Andric 1218480093f4SDimitry Andric if (zFromEVEX4of4(insn->vectorExtensionPrefix[3])) 1219480093f4SDimitry Andric attrMask |= ATTR_EVEXKZ; 1220480093f4SDimitry Andric if (bFromEVEX4of4(insn->vectorExtensionPrefix[3])) 1221480093f4SDimitry Andric attrMask |= ATTR_EVEXB; 1222*0fca6ea1SDimitry Andric if (isNF(insn) && !readModRM(insn) && 1223*0fca6ea1SDimitry Andric !isCCMPOrCTEST(insn)) // NF bit is the MSB of aaa. 1224647cbc5dSDimitry Andric attrMask |= ATTR_EVEXNF; 1225*0fca6ea1SDimitry Andric // aaa is not used a opmask in MAP4 1226*0fca6ea1SDimitry Andric else if (aaaFromEVEX4of4(insn->vectorExtensionPrefix[3]) && 1227*0fca6ea1SDimitry Andric (insn->opcodeType != MAP4)) 1228480093f4SDimitry Andric attrMask |= ATTR_EVEXK; 1229480093f4SDimitry Andric if (lFromEVEX4of4(insn->vectorExtensionPrefix[3])) 1230480093f4SDimitry Andric attrMask |= ATTR_VEXL; 1231480093f4SDimitry Andric if (l2FromEVEX4of4(insn->vectorExtensionPrefix[3])) 1232480093f4SDimitry Andric attrMask |= ATTR_EVEXL2; 1233480093f4SDimitry Andric } else if (insn->vectorExtensionType == TYPE_VEX_3B) { 1234480093f4SDimitry Andric switch (ppFromVEX3of3(insn->vectorExtensionPrefix[2])) { 1235480093f4SDimitry Andric case VEX_PREFIX_66: 1236480093f4SDimitry Andric attrMask |= ATTR_OPSIZE; 1237480093f4SDimitry Andric break; 1238480093f4SDimitry Andric case VEX_PREFIX_F3: 1239480093f4SDimitry Andric attrMask |= ATTR_XS; 1240480093f4SDimitry Andric break; 1241480093f4SDimitry Andric case VEX_PREFIX_F2: 1242480093f4SDimitry Andric attrMask |= ATTR_XD; 1243480093f4SDimitry Andric break; 1244480093f4SDimitry Andric } 1245480093f4SDimitry Andric 1246480093f4SDimitry Andric if (lFromVEX3of3(insn->vectorExtensionPrefix[2])) 1247480093f4SDimitry Andric attrMask |= ATTR_VEXL; 1248480093f4SDimitry Andric } else if (insn->vectorExtensionType == TYPE_VEX_2B) { 1249480093f4SDimitry Andric switch (ppFromVEX2of2(insn->vectorExtensionPrefix[1])) { 1250480093f4SDimitry Andric case VEX_PREFIX_66: 1251480093f4SDimitry Andric attrMask |= ATTR_OPSIZE; 1252fe6060f1SDimitry Andric if (insn->hasAdSize) 1253fe6060f1SDimitry Andric attrMask |= ATTR_ADSIZE; 1254480093f4SDimitry Andric break; 1255480093f4SDimitry Andric case VEX_PREFIX_F3: 1256480093f4SDimitry Andric attrMask |= ATTR_XS; 1257480093f4SDimitry Andric break; 1258480093f4SDimitry Andric case VEX_PREFIX_F2: 1259480093f4SDimitry Andric attrMask |= ATTR_XD; 1260480093f4SDimitry Andric break; 1261480093f4SDimitry Andric } 1262480093f4SDimitry Andric 1263480093f4SDimitry Andric if (lFromVEX2of2(insn->vectorExtensionPrefix[1])) 1264480093f4SDimitry Andric attrMask |= ATTR_VEXL; 1265480093f4SDimitry Andric } else if (insn->vectorExtensionType == TYPE_XOP) { 1266480093f4SDimitry Andric switch (ppFromXOP3of3(insn->vectorExtensionPrefix[2])) { 1267480093f4SDimitry Andric case VEX_PREFIX_66: 1268480093f4SDimitry Andric attrMask |= ATTR_OPSIZE; 1269480093f4SDimitry Andric break; 1270480093f4SDimitry Andric case VEX_PREFIX_F3: 1271480093f4SDimitry Andric attrMask |= ATTR_XS; 1272480093f4SDimitry Andric break; 1273480093f4SDimitry Andric case VEX_PREFIX_F2: 1274480093f4SDimitry Andric attrMask |= ATTR_XD; 1275480093f4SDimitry Andric break; 1276480093f4SDimitry Andric } 1277480093f4SDimitry Andric 1278480093f4SDimitry Andric if (lFromXOP3of3(insn->vectorExtensionPrefix[2])) 1279480093f4SDimitry Andric attrMask |= ATTR_VEXL; 1280480093f4SDimitry Andric } else { 1281480093f4SDimitry Andric return -1; 1282480093f4SDimitry Andric } 1283480093f4SDimitry Andric } else if (!insn->mandatoryPrefix) { 1284480093f4SDimitry Andric // If we don't have mandatory prefix we should use legacy prefixes here 1285480093f4SDimitry Andric if (insn->hasOpSize && (insn->mode != MODE_16BIT)) 1286480093f4SDimitry Andric attrMask |= ATTR_OPSIZE; 1287480093f4SDimitry Andric if (insn->hasAdSize) 1288480093f4SDimitry Andric attrMask |= ATTR_ADSIZE; 1289480093f4SDimitry Andric if (insn->opcodeType == ONEBYTE) { 1290480093f4SDimitry Andric if (insn->repeatPrefix == 0xf3 && (insn->opcode == 0x90)) 1291480093f4SDimitry Andric // Special support for PAUSE 1292480093f4SDimitry Andric attrMask |= ATTR_XS; 1293480093f4SDimitry Andric } else { 1294480093f4SDimitry Andric if (insn->repeatPrefix == 0xf2) 1295480093f4SDimitry Andric attrMask |= ATTR_XD; 1296480093f4SDimitry Andric else if (insn->repeatPrefix == 0xf3) 1297480093f4SDimitry Andric attrMask |= ATTR_XS; 1298480093f4SDimitry Andric } 1299480093f4SDimitry Andric } else { 1300480093f4SDimitry Andric switch (insn->mandatoryPrefix) { 1301480093f4SDimitry Andric case 0xf2: 1302480093f4SDimitry Andric attrMask |= ATTR_XD; 1303480093f4SDimitry Andric break; 1304480093f4SDimitry Andric case 0xf3: 1305480093f4SDimitry Andric attrMask |= ATTR_XS; 1306480093f4SDimitry Andric break; 1307480093f4SDimitry Andric case 0x66: 1308480093f4SDimitry Andric if (insn->mode != MODE_16BIT) 1309480093f4SDimitry Andric attrMask |= ATTR_OPSIZE; 1310fe6060f1SDimitry Andric if (insn->hasAdSize) 1311fe6060f1SDimitry Andric attrMask |= ATTR_ADSIZE; 1312480093f4SDimitry Andric break; 1313480093f4SDimitry Andric case 0x67: 1314480093f4SDimitry Andric attrMask |= ATTR_ADSIZE; 1315480093f4SDimitry Andric break; 1316480093f4SDimitry Andric } 1317480093f4SDimitry Andric } 1318480093f4SDimitry Andric 1319480093f4SDimitry Andric if (insn->rexPrefix & 0x08) { 1320480093f4SDimitry Andric attrMask |= ATTR_REXW; 1321480093f4SDimitry Andric attrMask &= ~ATTR_ADSIZE; 1322480093f4SDimitry Andric } 1323480093f4SDimitry Andric 13245f757f3fSDimitry Andric // Absolute jump and pushp/popp need special handling 13255f757f3fSDimitry Andric if (insn->rex2ExtensionPrefix[0] == 0xd5 && insn->opcodeType == ONEBYTE && 13265f757f3fSDimitry Andric (insn->opcode == 0xA1 || (insn->opcode & 0xf0) == 0x50)) 13275f757f3fSDimitry Andric attrMask |= ATTR_REX2; 13285f757f3fSDimitry Andric 1329480093f4SDimitry Andric if (insn->mode == MODE_16BIT) { 1330480093f4SDimitry Andric // JCXZ/JECXZ need special handling for 16-bit mode because the meaning 1331480093f4SDimitry Andric // of the AdSize prefix is inverted w.r.t. 32-bit mode. 1332480093f4SDimitry Andric if (insn->opcodeType == ONEBYTE && insn->opcode == 0xE3) 1333480093f4SDimitry Andric attrMask ^= ATTR_ADSIZE; 1334480093f4SDimitry Andric // If we're in 16-bit mode and this is one of the relative jumps and opsize 1335480093f4SDimitry Andric // prefix isn't present, we need to force the opsize attribute since the 1336480093f4SDimitry Andric // prefix is inverted relative to 32-bit mode. 1337480093f4SDimitry Andric if (!insn->hasOpSize && insn->opcodeType == ONEBYTE && 1338480093f4SDimitry Andric (insn->opcode == 0xE8 || insn->opcode == 0xE9)) 1339480093f4SDimitry Andric attrMask |= ATTR_OPSIZE; 1340480093f4SDimitry Andric 1341480093f4SDimitry Andric if (!insn->hasOpSize && insn->opcodeType == TWOBYTE && 1342480093f4SDimitry Andric insn->opcode >= 0x80 && insn->opcode <= 0x8F) 1343480093f4SDimitry Andric attrMask |= ATTR_OPSIZE; 1344480093f4SDimitry Andric } 1345480093f4SDimitry Andric 1346480093f4SDimitry Andric 1347480093f4SDimitry Andric if (getInstructionIDWithAttrMask(&instructionID, insn, attrMask)) 1348480093f4SDimitry Andric return -1; 1349480093f4SDimitry Andric 1350480093f4SDimitry Andric // The following clauses compensate for limitations of the tables. 1351480093f4SDimitry Andric 1352480093f4SDimitry Andric if (insn->mode != MODE_64BIT && 1353480093f4SDimitry Andric insn->vectorExtensionType != TYPE_NO_VEX_XOP) { 1354480093f4SDimitry Andric // The tables can't distinquish between cases where the W-bit is used to 1355480093f4SDimitry Andric // select register size and cases where its a required part of the opcode. 1356480093f4SDimitry Andric if ((insn->vectorExtensionType == TYPE_EVEX && 1357480093f4SDimitry Andric wFromEVEX3of4(insn->vectorExtensionPrefix[2])) || 1358480093f4SDimitry Andric (insn->vectorExtensionType == TYPE_VEX_3B && 1359480093f4SDimitry Andric wFromVEX3of3(insn->vectorExtensionPrefix[2])) || 1360480093f4SDimitry Andric (insn->vectorExtensionType == TYPE_XOP && 1361480093f4SDimitry Andric wFromXOP3of3(insn->vectorExtensionPrefix[2]))) { 1362480093f4SDimitry Andric 1363480093f4SDimitry Andric uint16_t instructionIDWithREXW; 1364480093f4SDimitry Andric if (getInstructionIDWithAttrMask(&instructionIDWithREXW, insn, 1365480093f4SDimitry Andric attrMask | ATTR_REXW)) { 1366480093f4SDimitry Andric insn->instructionID = instructionID; 1367480093f4SDimitry Andric insn->spec = &INSTRUCTIONS_SYM[instructionID]; 1368480093f4SDimitry Andric return 0; 1369480093f4SDimitry Andric } 1370480093f4SDimitry Andric 1371480093f4SDimitry Andric auto SpecName = mii->getName(instructionIDWithREXW); 1372480093f4SDimitry Andric // If not a 64-bit instruction. Switch the opcode. 1373480093f4SDimitry Andric if (!is64Bit(SpecName.data())) { 1374480093f4SDimitry Andric insn->instructionID = instructionIDWithREXW; 1375480093f4SDimitry Andric insn->spec = &INSTRUCTIONS_SYM[instructionIDWithREXW]; 1376480093f4SDimitry Andric return 0; 1377480093f4SDimitry Andric } 1378480093f4SDimitry Andric } 1379480093f4SDimitry Andric } 1380480093f4SDimitry Andric 1381480093f4SDimitry Andric // Absolute moves, umonitor, and movdir64b need special handling. 1382480093f4SDimitry Andric // -For 16-bit mode because the meaning of the AdSize and OpSize prefixes are 1383480093f4SDimitry Andric // inverted w.r.t. 1384480093f4SDimitry Andric // -For 32-bit mode we need to ensure the ADSIZE prefix is observed in 1385480093f4SDimitry Andric // any position. 1386480093f4SDimitry Andric if ((insn->opcodeType == ONEBYTE && ((insn->opcode & 0xFC) == 0xA0)) || 1387480093f4SDimitry Andric (insn->opcodeType == TWOBYTE && (insn->opcode == 0xAE)) || 13885f757f3fSDimitry Andric (insn->opcodeType == THREEBYTE_38 && insn->opcode == 0xF8) || 13895f757f3fSDimitry Andric (insn->opcodeType == MAP4 && insn->opcode == 0xF8)) { 1390480093f4SDimitry Andric // Make sure we observed the prefixes in any position. 1391480093f4SDimitry Andric if (insn->hasAdSize) 1392480093f4SDimitry Andric attrMask |= ATTR_ADSIZE; 1393480093f4SDimitry Andric if (insn->hasOpSize) 1394480093f4SDimitry Andric attrMask |= ATTR_OPSIZE; 1395480093f4SDimitry Andric 1396480093f4SDimitry Andric // In 16-bit, invert the attributes. 1397480093f4SDimitry Andric if (insn->mode == MODE_16BIT) { 1398480093f4SDimitry Andric attrMask ^= ATTR_ADSIZE; 1399480093f4SDimitry Andric 1400480093f4SDimitry Andric // The OpSize attribute is only valid with the absolute moves. 1401480093f4SDimitry Andric if (insn->opcodeType == ONEBYTE && ((insn->opcode & 0xFC) == 0xA0)) 1402480093f4SDimitry Andric attrMask ^= ATTR_OPSIZE; 1403480093f4SDimitry Andric } 1404480093f4SDimitry Andric 1405480093f4SDimitry Andric if (getInstructionIDWithAttrMask(&instructionID, insn, attrMask)) 1406480093f4SDimitry Andric return -1; 1407480093f4SDimitry Andric 1408480093f4SDimitry Andric insn->instructionID = instructionID; 1409480093f4SDimitry Andric insn->spec = &INSTRUCTIONS_SYM[instructionID]; 1410480093f4SDimitry Andric return 0; 1411480093f4SDimitry Andric } 1412480093f4SDimitry Andric 1413480093f4SDimitry Andric if ((insn->mode == MODE_16BIT || insn->hasOpSize) && 1414480093f4SDimitry Andric !(attrMask & ATTR_OPSIZE)) { 1415480093f4SDimitry Andric // The instruction tables make no distinction between instructions that 1416480093f4SDimitry Andric // allow OpSize anywhere (i.e., 16-bit operations) and that need it in a 1417480093f4SDimitry Andric // particular spot (i.e., many MMX operations). In general we're 1418480093f4SDimitry Andric // conservative, but in the specific case where OpSize is present but not in 1419480093f4SDimitry Andric // the right place we check if there's a 16-bit operation. 1420480093f4SDimitry Andric const struct InstructionSpecifier *spec; 1421480093f4SDimitry Andric uint16_t instructionIDWithOpsize; 1422480093f4SDimitry Andric llvm::StringRef specName, specWithOpSizeName; 1423480093f4SDimitry Andric 1424480093f4SDimitry Andric spec = &INSTRUCTIONS_SYM[instructionID]; 1425480093f4SDimitry Andric 1426480093f4SDimitry Andric if (getInstructionIDWithAttrMask(&instructionIDWithOpsize, insn, 1427480093f4SDimitry Andric attrMask | ATTR_OPSIZE)) { 1428480093f4SDimitry Andric // ModRM required with OpSize but not present. Give up and return the 1429480093f4SDimitry Andric // version without OpSize set. 1430480093f4SDimitry Andric insn->instructionID = instructionID; 1431480093f4SDimitry Andric insn->spec = spec; 1432480093f4SDimitry Andric return 0; 1433480093f4SDimitry Andric } 1434480093f4SDimitry Andric 1435480093f4SDimitry Andric specName = mii->getName(instructionID); 1436480093f4SDimitry Andric specWithOpSizeName = mii->getName(instructionIDWithOpsize); 1437480093f4SDimitry Andric 1438480093f4SDimitry Andric if (is16BitEquivalent(specName.data(), specWithOpSizeName.data()) && 1439480093f4SDimitry Andric (insn->mode == MODE_16BIT) ^ insn->hasOpSize) { 1440480093f4SDimitry Andric insn->instructionID = instructionIDWithOpsize; 1441480093f4SDimitry Andric insn->spec = &INSTRUCTIONS_SYM[instructionIDWithOpsize]; 1442480093f4SDimitry Andric } else { 1443480093f4SDimitry Andric insn->instructionID = instructionID; 1444480093f4SDimitry Andric insn->spec = spec; 1445480093f4SDimitry Andric } 1446480093f4SDimitry Andric return 0; 1447480093f4SDimitry Andric } 1448480093f4SDimitry Andric 1449480093f4SDimitry Andric if (insn->opcodeType == ONEBYTE && insn->opcode == 0x90 && 1450480093f4SDimitry Andric insn->rexPrefix & 0x01) { 1451480093f4SDimitry Andric // NOOP shouldn't decode as NOOP if REX.b is set. Instead it should decode 1452480093f4SDimitry Andric // as XCHG %r8, %eax. 1453480093f4SDimitry Andric const struct InstructionSpecifier *spec; 1454480093f4SDimitry Andric uint16_t instructionIDWithNewOpcode; 1455480093f4SDimitry Andric const struct InstructionSpecifier *specWithNewOpcode; 1456480093f4SDimitry Andric 1457480093f4SDimitry Andric spec = &INSTRUCTIONS_SYM[instructionID]; 1458480093f4SDimitry Andric 1459480093f4SDimitry Andric // Borrow opcode from one of the other XCHGar opcodes 1460480093f4SDimitry Andric insn->opcode = 0x91; 1461480093f4SDimitry Andric 1462480093f4SDimitry Andric if (getInstructionIDWithAttrMask(&instructionIDWithNewOpcode, insn, 1463480093f4SDimitry Andric attrMask)) { 1464480093f4SDimitry Andric insn->opcode = 0x90; 1465480093f4SDimitry Andric 1466480093f4SDimitry Andric insn->instructionID = instructionID; 1467480093f4SDimitry Andric insn->spec = spec; 1468480093f4SDimitry Andric return 0; 1469480093f4SDimitry Andric } 1470480093f4SDimitry Andric 1471480093f4SDimitry Andric specWithNewOpcode = &INSTRUCTIONS_SYM[instructionIDWithNewOpcode]; 1472480093f4SDimitry Andric 1473480093f4SDimitry Andric // Change back 1474480093f4SDimitry Andric insn->opcode = 0x90; 1475480093f4SDimitry Andric 1476480093f4SDimitry Andric insn->instructionID = instructionIDWithNewOpcode; 1477480093f4SDimitry Andric insn->spec = specWithNewOpcode; 1478480093f4SDimitry Andric 1479480093f4SDimitry Andric return 0; 1480480093f4SDimitry Andric } 1481480093f4SDimitry Andric 1482480093f4SDimitry Andric insn->instructionID = instructionID; 1483480093f4SDimitry Andric insn->spec = &INSTRUCTIONS_SYM[insn->instructionID]; 1484480093f4SDimitry Andric 1485480093f4SDimitry Andric return 0; 1486480093f4SDimitry Andric } 1487480093f4SDimitry Andric 1488480093f4SDimitry Andric // Read an operand from the opcode field of an instruction and interprets it 1489480093f4SDimitry Andric // appropriately given the operand width. Handles AddRegFrm instructions. 1490480093f4SDimitry Andric // 1491480093f4SDimitry Andric // @param insn - the instruction whose opcode field is to be read. 1492480093f4SDimitry Andric // @param size - The width (in bytes) of the register being specified. 1493480093f4SDimitry Andric // 1 means AL and friends, 2 means AX, 4 means EAX, and 8 means 1494480093f4SDimitry Andric // RAX. 1495480093f4SDimitry Andric // @return - 0 on success; nonzero otherwise. 1496480093f4SDimitry Andric static int readOpcodeRegister(struct InternalInstruction *insn, uint8_t size) { 1497480093f4SDimitry Andric LLVM_DEBUG(dbgs() << "readOpcodeRegister()"); 1498480093f4SDimitry Andric 1499480093f4SDimitry Andric if (size == 0) 1500480093f4SDimitry Andric size = insn->registerSize; 1501480093f4SDimitry Andric 15025f757f3fSDimitry Andric auto setOpcodeRegister = [&](unsigned base) { 15035f757f3fSDimitry Andric insn->opcodeRegister = 15045f757f3fSDimitry Andric (Reg)(base + ((bFromREX(insn->rexPrefix) << 3) | 15055f757f3fSDimitry Andric (b2FromREX2(insn->rex2ExtensionPrefix[1]) << 4) | 15065f757f3fSDimitry Andric (insn->opcode & 7))); 15075f757f3fSDimitry Andric }; 15085f757f3fSDimitry Andric 1509480093f4SDimitry Andric switch (size) { 1510480093f4SDimitry Andric case 1: 15115f757f3fSDimitry Andric setOpcodeRegister(MODRM_REG_AL); 1512480093f4SDimitry Andric if (insn->rexPrefix && insn->opcodeRegister >= MODRM_REG_AL + 0x4 && 1513480093f4SDimitry Andric insn->opcodeRegister < MODRM_REG_AL + 0x8) { 1514480093f4SDimitry Andric insn->opcodeRegister = 1515480093f4SDimitry Andric (Reg)(MODRM_REG_SPL + (insn->opcodeRegister - MODRM_REG_AL - 4)); 1516480093f4SDimitry Andric } 1517480093f4SDimitry Andric 1518480093f4SDimitry Andric break; 1519480093f4SDimitry Andric case 2: 15205f757f3fSDimitry Andric setOpcodeRegister(MODRM_REG_AX); 1521480093f4SDimitry Andric break; 1522480093f4SDimitry Andric case 4: 15235f757f3fSDimitry Andric setOpcodeRegister(MODRM_REG_EAX); 1524480093f4SDimitry Andric break; 1525480093f4SDimitry Andric case 8: 15265f757f3fSDimitry Andric setOpcodeRegister(MODRM_REG_RAX); 1527480093f4SDimitry Andric break; 1528480093f4SDimitry Andric } 1529480093f4SDimitry Andric 1530480093f4SDimitry Andric return 0; 1531480093f4SDimitry Andric } 1532480093f4SDimitry Andric 1533480093f4SDimitry Andric // Consume an immediate operand from an instruction, given the desired operand 1534480093f4SDimitry Andric // size. 1535480093f4SDimitry Andric // 1536480093f4SDimitry Andric // @param insn - The instruction whose operand is to be read. 1537480093f4SDimitry Andric // @param size - The width (in bytes) of the operand. 1538480093f4SDimitry Andric // @return - 0 if the immediate was successfully consumed; nonzero 1539480093f4SDimitry Andric // otherwise. 1540480093f4SDimitry Andric static int readImmediate(struct InternalInstruction *insn, uint8_t size) { 1541480093f4SDimitry Andric uint8_t imm8; 1542480093f4SDimitry Andric uint16_t imm16; 1543480093f4SDimitry Andric uint32_t imm32; 1544480093f4SDimitry Andric uint64_t imm64; 1545480093f4SDimitry Andric 1546480093f4SDimitry Andric LLVM_DEBUG(dbgs() << "readImmediate()"); 1547480093f4SDimitry Andric 1548480093f4SDimitry Andric assert(insn->numImmediatesConsumed < 2 && "Already consumed two immediates"); 1549480093f4SDimitry Andric 1550480093f4SDimitry Andric insn->immediateSize = size; 1551480093f4SDimitry Andric insn->immediateOffset = insn->readerCursor - insn->startLocation; 1552480093f4SDimitry Andric 1553480093f4SDimitry Andric switch (size) { 1554480093f4SDimitry Andric case 1: 1555480093f4SDimitry Andric if (consume(insn, imm8)) 1556480093f4SDimitry Andric return -1; 1557480093f4SDimitry Andric insn->immediates[insn->numImmediatesConsumed] = imm8; 1558480093f4SDimitry Andric break; 1559480093f4SDimitry Andric case 2: 1560480093f4SDimitry Andric if (consume(insn, imm16)) 1561480093f4SDimitry Andric return -1; 1562480093f4SDimitry Andric insn->immediates[insn->numImmediatesConsumed] = imm16; 1563480093f4SDimitry Andric break; 1564480093f4SDimitry Andric case 4: 1565480093f4SDimitry Andric if (consume(insn, imm32)) 1566480093f4SDimitry Andric return -1; 1567480093f4SDimitry Andric insn->immediates[insn->numImmediatesConsumed] = imm32; 1568480093f4SDimitry Andric break; 1569480093f4SDimitry Andric case 8: 1570480093f4SDimitry Andric if (consume(insn, imm64)) 1571480093f4SDimitry Andric return -1; 1572480093f4SDimitry Andric insn->immediates[insn->numImmediatesConsumed] = imm64; 1573480093f4SDimitry Andric break; 1574480093f4SDimitry Andric default: 1575480093f4SDimitry Andric llvm_unreachable("invalid size"); 1576480093f4SDimitry Andric } 1577480093f4SDimitry Andric 1578480093f4SDimitry Andric insn->numImmediatesConsumed++; 1579480093f4SDimitry Andric 1580480093f4SDimitry Andric return 0; 1581480093f4SDimitry Andric } 1582480093f4SDimitry Andric 1583480093f4SDimitry Andric // Consume vvvv from an instruction if it has a VEX prefix. 1584480093f4SDimitry Andric static int readVVVV(struct InternalInstruction *insn) { 1585480093f4SDimitry Andric LLVM_DEBUG(dbgs() << "readVVVV()"); 1586480093f4SDimitry Andric 1587480093f4SDimitry Andric int vvvv; 1588480093f4SDimitry Andric if (insn->vectorExtensionType == TYPE_EVEX) 1589480093f4SDimitry Andric vvvv = (v2FromEVEX4of4(insn->vectorExtensionPrefix[3]) << 4 | 1590480093f4SDimitry Andric vvvvFromEVEX3of4(insn->vectorExtensionPrefix[2])); 1591480093f4SDimitry Andric else if (insn->vectorExtensionType == TYPE_VEX_3B) 1592480093f4SDimitry Andric vvvv = vvvvFromVEX3of3(insn->vectorExtensionPrefix[2]); 1593480093f4SDimitry Andric else if (insn->vectorExtensionType == TYPE_VEX_2B) 1594480093f4SDimitry Andric vvvv = vvvvFromVEX2of2(insn->vectorExtensionPrefix[1]); 1595480093f4SDimitry Andric else if (insn->vectorExtensionType == TYPE_XOP) 1596480093f4SDimitry Andric vvvv = vvvvFromXOP3of3(insn->vectorExtensionPrefix[2]); 1597480093f4SDimitry Andric else 1598480093f4SDimitry Andric return -1; 1599480093f4SDimitry Andric 1600480093f4SDimitry Andric if (insn->mode != MODE_64BIT) 1601480093f4SDimitry Andric vvvv &= 0xf; // Can only clear bit 4. Bit 3 must be cleared later. 1602480093f4SDimitry Andric 1603480093f4SDimitry Andric insn->vvvv = static_cast<Reg>(vvvv); 1604480093f4SDimitry Andric return 0; 1605480093f4SDimitry Andric } 1606480093f4SDimitry Andric 1607480093f4SDimitry Andric // Read an mask register from the opcode field of an instruction. 1608480093f4SDimitry Andric // 1609480093f4SDimitry Andric // @param insn - The instruction whose opcode field is to be read. 1610480093f4SDimitry Andric // @return - 0 on success; nonzero otherwise. 1611480093f4SDimitry Andric static int readMaskRegister(struct InternalInstruction *insn) { 1612480093f4SDimitry Andric LLVM_DEBUG(dbgs() << "readMaskRegister()"); 1613480093f4SDimitry Andric 1614480093f4SDimitry Andric if (insn->vectorExtensionType != TYPE_EVEX) 1615480093f4SDimitry Andric return -1; 1616480093f4SDimitry Andric 1617480093f4SDimitry Andric insn->writemask = 1618480093f4SDimitry Andric static_cast<Reg>(aaaFromEVEX4of4(insn->vectorExtensionPrefix[3])); 1619480093f4SDimitry Andric return 0; 1620480093f4SDimitry Andric } 1621480093f4SDimitry Andric 1622480093f4SDimitry Andric // Consults the specifier for an instruction and consumes all 1623480093f4SDimitry Andric // operands for that instruction, interpreting them as it goes. 1624480093f4SDimitry Andric static int readOperands(struct InternalInstruction *insn) { 1625480093f4SDimitry Andric int hasVVVV, needVVVV; 1626480093f4SDimitry Andric int sawRegImm = 0; 1627480093f4SDimitry Andric 1628480093f4SDimitry Andric LLVM_DEBUG(dbgs() << "readOperands()"); 1629480093f4SDimitry Andric 1630480093f4SDimitry Andric // If non-zero vvvv specified, make sure one of the operands uses it. 1631480093f4SDimitry Andric hasVVVV = !readVVVV(insn); 1632480093f4SDimitry Andric needVVVV = hasVVVV && (insn->vvvv != 0); 1633480093f4SDimitry Andric 1634480093f4SDimitry Andric for (const auto &Op : x86OperandSets[insn->spec->operands]) { 1635480093f4SDimitry Andric switch (Op.encoding) { 1636480093f4SDimitry Andric case ENCODING_NONE: 1637480093f4SDimitry Andric case ENCODING_SI: 1638480093f4SDimitry Andric case ENCODING_DI: 1639480093f4SDimitry Andric break; 1640480093f4SDimitry Andric CASE_ENCODING_VSIB: 1641480093f4SDimitry Andric // VSIB can use the V2 bit so check only the other bits. 1642480093f4SDimitry Andric if (needVVVV) 1643480093f4SDimitry Andric needVVVV = hasVVVV & ((insn->vvvv & 0xf) != 0); 1644480093f4SDimitry Andric if (readModRM(insn)) 1645480093f4SDimitry Andric return -1; 1646480093f4SDimitry Andric 1647480093f4SDimitry Andric // Reject if SIB wasn't used. 1648480093f4SDimitry Andric if (insn->eaBase != EA_BASE_sib && insn->eaBase != EA_BASE_sib64) 1649480093f4SDimitry Andric return -1; 1650480093f4SDimitry Andric 1651480093f4SDimitry Andric // If sibIndex was set to SIB_INDEX_NONE, index offset is 4. 1652480093f4SDimitry Andric if (insn->sibIndex == SIB_INDEX_NONE) 1653480093f4SDimitry Andric insn->sibIndex = (SIBIndex)(insn->sibIndexBase + 4); 1654480093f4SDimitry Andric 1655480093f4SDimitry Andric // If EVEX.v2 is set this is one of the 16-31 registers. 1656480093f4SDimitry Andric if (insn->vectorExtensionType == TYPE_EVEX && insn->mode == MODE_64BIT && 1657480093f4SDimitry Andric v2FromEVEX4of4(insn->vectorExtensionPrefix[3])) 1658480093f4SDimitry Andric insn->sibIndex = (SIBIndex)(insn->sibIndex + 16); 1659480093f4SDimitry Andric 1660480093f4SDimitry Andric // Adjust the index register to the correct size. 1661480093f4SDimitry Andric switch ((OperandType)Op.type) { 1662480093f4SDimitry Andric default: 1663480093f4SDimitry Andric debug("Unhandled VSIB index type"); 1664480093f4SDimitry Andric return -1; 1665480093f4SDimitry Andric case TYPE_MVSIBX: 1666480093f4SDimitry Andric insn->sibIndex = 1667480093f4SDimitry Andric (SIBIndex)(SIB_INDEX_XMM0 + (insn->sibIndex - insn->sibIndexBase)); 1668480093f4SDimitry Andric break; 1669480093f4SDimitry Andric case TYPE_MVSIBY: 1670480093f4SDimitry Andric insn->sibIndex = 1671480093f4SDimitry Andric (SIBIndex)(SIB_INDEX_YMM0 + (insn->sibIndex - insn->sibIndexBase)); 1672480093f4SDimitry Andric break; 1673480093f4SDimitry Andric case TYPE_MVSIBZ: 1674480093f4SDimitry Andric insn->sibIndex = 1675480093f4SDimitry Andric (SIBIndex)(SIB_INDEX_ZMM0 + (insn->sibIndex - insn->sibIndexBase)); 1676480093f4SDimitry Andric break; 1677480093f4SDimitry Andric } 1678480093f4SDimitry Andric 1679480093f4SDimitry Andric // Apply the AVX512 compressed displacement scaling factor. 1680480093f4SDimitry Andric if (Op.encoding != ENCODING_REG && insn->eaDisplacement == EA_DISP_8) 1681480093f4SDimitry Andric insn->displacement *= 1 << (Op.encoding - ENCODING_VSIB); 1682480093f4SDimitry Andric break; 16835ffd83dbSDimitry Andric case ENCODING_SIB: 16845ffd83dbSDimitry Andric // Reject if SIB wasn't used. 16855ffd83dbSDimitry Andric if (insn->eaBase != EA_BASE_sib && insn->eaBase != EA_BASE_sib64) 16865ffd83dbSDimitry Andric return -1; 16875ffd83dbSDimitry Andric if (readModRM(insn)) 16885ffd83dbSDimitry Andric return -1; 16895ffd83dbSDimitry Andric if (fixupReg(insn, &Op)) 16905ffd83dbSDimitry Andric return -1; 16915ffd83dbSDimitry Andric break; 1692480093f4SDimitry Andric case ENCODING_REG: 1693480093f4SDimitry Andric CASE_ENCODING_RM: 1694480093f4SDimitry Andric if (readModRM(insn)) 1695480093f4SDimitry Andric return -1; 1696480093f4SDimitry Andric if (fixupReg(insn, &Op)) 1697480093f4SDimitry Andric return -1; 1698480093f4SDimitry Andric // Apply the AVX512 compressed displacement scaling factor. 1699480093f4SDimitry Andric if (Op.encoding != ENCODING_REG && insn->eaDisplacement == EA_DISP_8) 1700480093f4SDimitry Andric insn->displacement *= 1 << (Op.encoding - ENCODING_RM); 1701480093f4SDimitry Andric break; 1702480093f4SDimitry Andric case ENCODING_IB: 1703480093f4SDimitry Andric if (sawRegImm) { 1704480093f4SDimitry Andric // Saw a register immediate so don't read again and instead split the 1705480093f4SDimitry Andric // previous immediate. FIXME: This is a hack. 1706480093f4SDimitry Andric insn->immediates[insn->numImmediatesConsumed] = 1707480093f4SDimitry Andric insn->immediates[insn->numImmediatesConsumed - 1] & 0xf; 1708480093f4SDimitry Andric ++insn->numImmediatesConsumed; 1709480093f4SDimitry Andric break; 1710480093f4SDimitry Andric } 1711480093f4SDimitry Andric if (readImmediate(insn, 1)) 1712480093f4SDimitry Andric return -1; 1713480093f4SDimitry Andric if (Op.type == TYPE_XMM || Op.type == TYPE_YMM) 1714480093f4SDimitry Andric sawRegImm = 1; 1715480093f4SDimitry Andric break; 1716480093f4SDimitry Andric case ENCODING_IW: 1717480093f4SDimitry Andric if (readImmediate(insn, 2)) 1718480093f4SDimitry Andric return -1; 1719480093f4SDimitry Andric break; 1720480093f4SDimitry Andric case ENCODING_ID: 1721480093f4SDimitry Andric if (readImmediate(insn, 4)) 1722480093f4SDimitry Andric return -1; 1723480093f4SDimitry Andric break; 1724480093f4SDimitry Andric case ENCODING_IO: 1725480093f4SDimitry Andric if (readImmediate(insn, 8)) 1726480093f4SDimitry Andric return -1; 1727480093f4SDimitry Andric break; 1728480093f4SDimitry Andric case ENCODING_Iv: 1729480093f4SDimitry Andric if (readImmediate(insn, insn->immediateSize)) 1730480093f4SDimitry Andric return -1; 1731480093f4SDimitry Andric break; 1732480093f4SDimitry Andric case ENCODING_Ia: 1733480093f4SDimitry Andric if (readImmediate(insn, insn->addressSize)) 1734480093f4SDimitry Andric return -1; 1735480093f4SDimitry Andric break; 1736480093f4SDimitry Andric case ENCODING_IRC: 1737480093f4SDimitry Andric insn->RC = (l2FromEVEX4of4(insn->vectorExtensionPrefix[3]) << 1) | 1738480093f4SDimitry Andric lFromEVEX4of4(insn->vectorExtensionPrefix[3]); 1739480093f4SDimitry Andric break; 1740480093f4SDimitry Andric case ENCODING_RB: 1741480093f4SDimitry Andric if (readOpcodeRegister(insn, 1)) 1742480093f4SDimitry Andric return -1; 1743480093f4SDimitry Andric break; 1744480093f4SDimitry Andric case ENCODING_RW: 1745480093f4SDimitry Andric if (readOpcodeRegister(insn, 2)) 1746480093f4SDimitry Andric return -1; 1747480093f4SDimitry Andric break; 1748480093f4SDimitry Andric case ENCODING_RD: 1749480093f4SDimitry Andric if (readOpcodeRegister(insn, 4)) 1750480093f4SDimitry Andric return -1; 1751480093f4SDimitry Andric break; 1752480093f4SDimitry Andric case ENCODING_RO: 1753480093f4SDimitry Andric if (readOpcodeRegister(insn, 8)) 1754480093f4SDimitry Andric return -1; 1755480093f4SDimitry Andric break; 1756480093f4SDimitry Andric case ENCODING_Rv: 1757480093f4SDimitry Andric if (readOpcodeRegister(insn, 0)) 1758480093f4SDimitry Andric return -1; 1759480093f4SDimitry Andric break; 1760*0fca6ea1SDimitry Andric case ENCODING_CF: 1761*0fca6ea1SDimitry Andric insn->immediates[1] = oszcFromEVEX3of4(insn->vectorExtensionPrefix[2]); 1762*0fca6ea1SDimitry Andric needVVVV = false; // oszc shares the same bits with VVVV 1763*0fca6ea1SDimitry Andric break; 1764480093f4SDimitry Andric case ENCODING_CC: 1765*0fca6ea1SDimitry Andric if (isCCMPOrCTEST(insn)) 1766*0fca6ea1SDimitry Andric insn->immediates[2] = scFromEVEX4of4(insn->vectorExtensionPrefix[3]); 1767*0fca6ea1SDimitry Andric else 1768480093f4SDimitry Andric insn->immediates[1] = insn->opcode & 0xf; 1769480093f4SDimitry Andric break; 1770480093f4SDimitry Andric case ENCODING_FP: 1771480093f4SDimitry Andric break; 1772480093f4SDimitry Andric case ENCODING_VVVV: 1773480093f4SDimitry Andric needVVVV = 0; // Mark that we have found a VVVV operand. 1774480093f4SDimitry Andric if (!hasVVVV) 1775480093f4SDimitry Andric return -1; 1776480093f4SDimitry Andric if (insn->mode != MODE_64BIT) 1777480093f4SDimitry Andric insn->vvvv = static_cast<Reg>(insn->vvvv & 0x7); 1778480093f4SDimitry Andric if (fixupReg(insn, &Op)) 1779480093f4SDimitry Andric return -1; 1780480093f4SDimitry Andric break; 1781480093f4SDimitry Andric case ENCODING_WRITEMASK: 1782480093f4SDimitry Andric if (readMaskRegister(insn)) 1783480093f4SDimitry Andric return -1; 1784480093f4SDimitry Andric break; 1785480093f4SDimitry Andric case ENCODING_DUP: 1786480093f4SDimitry Andric break; 1787480093f4SDimitry Andric default: 1788480093f4SDimitry Andric LLVM_DEBUG(dbgs() << "Encountered an operand with an unknown encoding."); 1789480093f4SDimitry Andric return -1; 1790480093f4SDimitry Andric } 1791480093f4SDimitry Andric } 1792480093f4SDimitry Andric 1793480093f4SDimitry Andric // If we didn't find ENCODING_VVVV operand, but non-zero vvvv present, fail 1794480093f4SDimitry Andric if (needVVVV) 1795480093f4SDimitry Andric return -1; 1796480093f4SDimitry Andric 1797480093f4SDimitry Andric return 0; 1798480093f4SDimitry Andric } 17990b57cec5SDimitry Andric 18000b57cec5SDimitry Andric namespace llvm { 18010b57cec5SDimitry Andric 18020b57cec5SDimitry Andric // Fill-ins to make the compiler happy. These constants are never actually 18030b57cec5SDimitry Andric // assigned; they are just filler to make an automatically-generated switch 18040b57cec5SDimitry Andric // statement work. 18050b57cec5SDimitry Andric namespace X86 { 18060b57cec5SDimitry Andric enum { 18070b57cec5SDimitry Andric BX_SI = 500, 18080b57cec5SDimitry Andric BX_DI = 501, 18090b57cec5SDimitry Andric BP_SI = 502, 18100b57cec5SDimitry Andric BP_DI = 503, 18110b57cec5SDimitry Andric sib = 504, 18120b57cec5SDimitry Andric sib64 = 505 18130b57cec5SDimitry Andric }; 1814e8d8bef9SDimitry Andric } // namespace X86 18150b57cec5SDimitry Andric 1816e8d8bef9SDimitry Andric } // namespace llvm 18170b57cec5SDimitry Andric 18180b57cec5SDimitry Andric static bool translateInstruction(MCInst &target, 18190b57cec5SDimitry Andric InternalInstruction &source, 18200b57cec5SDimitry Andric const MCDisassembler *Dis); 18210b57cec5SDimitry Andric 18220b57cec5SDimitry Andric namespace { 18230b57cec5SDimitry Andric 18240b57cec5SDimitry Andric /// Generic disassembler for all X86 platforms. All each platform class should 18250b57cec5SDimitry Andric /// have to do is subclass the constructor, and provide a different 18260b57cec5SDimitry Andric /// disassemblerMode value. 18270b57cec5SDimitry Andric class X86GenericDisassembler : public MCDisassembler { 18280b57cec5SDimitry Andric std::unique_ptr<const MCInstrInfo> MII; 18290b57cec5SDimitry Andric public: 18300b57cec5SDimitry Andric X86GenericDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx, 18310b57cec5SDimitry Andric std::unique_ptr<const MCInstrInfo> MII); 18320b57cec5SDimitry Andric public: 18330b57cec5SDimitry Andric DecodeStatus getInstruction(MCInst &instr, uint64_t &size, 18340b57cec5SDimitry Andric ArrayRef<uint8_t> Bytes, uint64_t Address, 18350b57cec5SDimitry Andric raw_ostream &cStream) const override; 18360b57cec5SDimitry Andric 18370b57cec5SDimitry Andric private: 18380b57cec5SDimitry Andric DisassemblerMode fMode; 18390b57cec5SDimitry Andric }; 18400b57cec5SDimitry Andric 1841e8d8bef9SDimitry Andric } // namespace 18420b57cec5SDimitry Andric 18430b57cec5SDimitry Andric X86GenericDisassembler::X86GenericDisassembler( 18440b57cec5SDimitry Andric const MCSubtargetInfo &STI, 18450b57cec5SDimitry Andric MCContext &Ctx, 18460b57cec5SDimitry Andric std::unique_ptr<const MCInstrInfo> MII) 18470b57cec5SDimitry Andric : MCDisassembler(STI, Ctx), MII(std::move(MII)) { 18480b57cec5SDimitry Andric const FeatureBitset &FB = STI.getFeatureBits(); 184981ad6265SDimitry Andric if (FB[X86::Is16Bit]) { 18500b57cec5SDimitry Andric fMode = MODE_16BIT; 18510b57cec5SDimitry Andric return; 185281ad6265SDimitry Andric } else if (FB[X86::Is32Bit]) { 18530b57cec5SDimitry Andric fMode = MODE_32BIT; 18540b57cec5SDimitry Andric return; 185581ad6265SDimitry Andric } else if (FB[X86::Is64Bit]) { 18560b57cec5SDimitry Andric fMode = MODE_64BIT; 18570b57cec5SDimitry Andric return; 18580b57cec5SDimitry Andric } 18590b57cec5SDimitry Andric 18600b57cec5SDimitry Andric llvm_unreachable("Invalid CPU mode"); 18610b57cec5SDimitry Andric } 18620b57cec5SDimitry Andric 18630b57cec5SDimitry Andric MCDisassembler::DecodeStatus X86GenericDisassembler::getInstruction( 18640b57cec5SDimitry Andric MCInst &Instr, uint64_t &Size, ArrayRef<uint8_t> Bytes, uint64_t Address, 1865480093f4SDimitry Andric raw_ostream &CStream) const { 18660b57cec5SDimitry Andric CommentStream = &CStream; 18670b57cec5SDimitry Andric 1868480093f4SDimitry Andric InternalInstruction Insn; 1869480093f4SDimitry Andric memset(&Insn, 0, sizeof(InternalInstruction)); 1870480093f4SDimitry Andric Insn.bytes = Bytes; 1871480093f4SDimitry Andric Insn.startLocation = Address; 1872480093f4SDimitry Andric Insn.readerCursor = Address; 1873480093f4SDimitry Andric Insn.mode = fMode; 18740b57cec5SDimitry Andric 1875480093f4SDimitry Andric if (Bytes.empty() || readPrefixes(&Insn) || readOpcode(&Insn) || 1876480093f4SDimitry Andric getInstructionID(&Insn, MII.get()) || Insn.instructionID == 0 || 1877480093f4SDimitry Andric readOperands(&Insn)) { 1878480093f4SDimitry Andric Size = Insn.readerCursor - Address; 18790b57cec5SDimitry Andric return Fail; 1880480093f4SDimitry Andric } 1881480093f4SDimitry Andric 1882480093f4SDimitry Andric Insn.operands = x86OperandSets[Insn.spec->operands]; 1883480093f4SDimitry Andric Insn.length = Insn.readerCursor - Insn.startLocation; 1884480093f4SDimitry Andric Size = Insn.length; 1885480093f4SDimitry Andric if (Size > 15) 1886480093f4SDimitry Andric LLVM_DEBUG(dbgs() << "Instruction exceeds 15-byte limit"); 1887480093f4SDimitry Andric 1888480093f4SDimitry Andric bool Ret = translateInstruction(Instr, Insn, this); 18890b57cec5SDimitry Andric if (!Ret) { 18900b57cec5SDimitry Andric unsigned Flags = X86::IP_NO_PREFIX; 1891480093f4SDimitry Andric if (Insn.hasAdSize) 18920b57cec5SDimitry Andric Flags |= X86::IP_HAS_AD_SIZE; 1893480093f4SDimitry Andric if (!Insn.mandatoryPrefix) { 1894480093f4SDimitry Andric if (Insn.hasOpSize) 18950b57cec5SDimitry Andric Flags |= X86::IP_HAS_OP_SIZE; 1896480093f4SDimitry Andric if (Insn.repeatPrefix == 0xf2) 18970b57cec5SDimitry Andric Flags |= X86::IP_HAS_REPEAT_NE; 1898480093f4SDimitry Andric else if (Insn.repeatPrefix == 0xf3 && 18990b57cec5SDimitry Andric // It should not be 'pause' f3 90 1900480093f4SDimitry Andric Insn.opcode != 0x90) 19010b57cec5SDimitry Andric Flags |= X86::IP_HAS_REPEAT; 1902480093f4SDimitry Andric if (Insn.hasLockPrefix) 19030b57cec5SDimitry Andric Flags |= X86::IP_HAS_LOCK; 19040b57cec5SDimitry Andric } 19050b57cec5SDimitry Andric Instr.setFlags(Flags); 19060b57cec5SDimitry Andric } 19070b57cec5SDimitry Andric return (!Ret) ? Success : Fail; 19080b57cec5SDimitry Andric } 19090b57cec5SDimitry Andric 19100b57cec5SDimitry Andric // 19110b57cec5SDimitry Andric // Private code that translates from struct InternalInstructions to MCInsts. 19120b57cec5SDimitry Andric // 19130b57cec5SDimitry Andric 19140b57cec5SDimitry Andric /// translateRegister - Translates an internal register to the appropriate LLVM 19150b57cec5SDimitry Andric /// register, and appends it as an operand to an MCInst. 19160b57cec5SDimitry Andric /// 19170b57cec5SDimitry Andric /// @param mcInst - The MCInst to append to. 19180b57cec5SDimitry Andric /// @param reg - The Reg to append. 19190b57cec5SDimitry Andric static void translateRegister(MCInst &mcInst, Reg reg) { 19200b57cec5SDimitry Andric #define ENTRY(x) X86::x, 19210b57cec5SDimitry Andric static constexpr MCPhysReg llvmRegnums[] = {ALL_REGS}; 19220b57cec5SDimitry Andric #undef ENTRY 19230b57cec5SDimitry Andric 19240b57cec5SDimitry Andric MCPhysReg llvmRegnum = llvmRegnums[reg]; 19250b57cec5SDimitry Andric mcInst.addOperand(MCOperand::createReg(llvmRegnum)); 19260b57cec5SDimitry Andric } 19270b57cec5SDimitry Andric 19280b57cec5SDimitry Andric static const uint8_t segmentRegnums[SEG_OVERRIDE_max] = { 19290b57cec5SDimitry Andric 0, // SEG_OVERRIDE_NONE 19300b57cec5SDimitry Andric X86::CS, 19310b57cec5SDimitry Andric X86::SS, 19320b57cec5SDimitry Andric X86::DS, 19330b57cec5SDimitry Andric X86::ES, 19340b57cec5SDimitry Andric X86::FS, 19350b57cec5SDimitry Andric X86::GS 19360b57cec5SDimitry Andric }; 19370b57cec5SDimitry Andric 19380b57cec5SDimitry Andric /// translateSrcIndex - Appends a source index operand to an MCInst. 19390b57cec5SDimitry Andric /// 19400b57cec5SDimitry Andric /// @param mcInst - The MCInst to append to. 19410b57cec5SDimitry Andric /// @param insn - The internal instruction. 19420b57cec5SDimitry Andric static bool translateSrcIndex(MCInst &mcInst, InternalInstruction &insn) { 19430b57cec5SDimitry Andric unsigned baseRegNo; 19440b57cec5SDimitry Andric 19450b57cec5SDimitry Andric if (insn.mode == MODE_64BIT) 19460b57cec5SDimitry Andric baseRegNo = insn.hasAdSize ? X86::ESI : X86::RSI; 19470b57cec5SDimitry Andric else if (insn.mode == MODE_32BIT) 19480b57cec5SDimitry Andric baseRegNo = insn.hasAdSize ? X86::SI : X86::ESI; 19490b57cec5SDimitry Andric else { 19500b57cec5SDimitry Andric assert(insn.mode == MODE_16BIT); 19510b57cec5SDimitry Andric baseRegNo = insn.hasAdSize ? X86::ESI : X86::SI; 19520b57cec5SDimitry Andric } 19530b57cec5SDimitry Andric MCOperand baseReg = MCOperand::createReg(baseRegNo); 19540b57cec5SDimitry Andric mcInst.addOperand(baseReg); 19550b57cec5SDimitry Andric 19560b57cec5SDimitry Andric MCOperand segmentReg; 19570b57cec5SDimitry Andric segmentReg = MCOperand::createReg(segmentRegnums[insn.segmentOverride]); 19580b57cec5SDimitry Andric mcInst.addOperand(segmentReg); 19590b57cec5SDimitry Andric return false; 19600b57cec5SDimitry Andric } 19610b57cec5SDimitry Andric 19620b57cec5SDimitry Andric /// translateDstIndex - Appends a destination index operand to an MCInst. 19630b57cec5SDimitry Andric /// 19640b57cec5SDimitry Andric /// @param mcInst - The MCInst to append to. 19650b57cec5SDimitry Andric /// @param insn - The internal instruction. 19660b57cec5SDimitry Andric 19670b57cec5SDimitry Andric static bool translateDstIndex(MCInst &mcInst, InternalInstruction &insn) { 19680b57cec5SDimitry Andric unsigned baseRegNo; 19690b57cec5SDimitry Andric 19700b57cec5SDimitry Andric if (insn.mode == MODE_64BIT) 19710b57cec5SDimitry Andric baseRegNo = insn.hasAdSize ? X86::EDI : X86::RDI; 19720b57cec5SDimitry Andric else if (insn.mode == MODE_32BIT) 19730b57cec5SDimitry Andric baseRegNo = insn.hasAdSize ? X86::DI : X86::EDI; 19740b57cec5SDimitry Andric else { 19750b57cec5SDimitry Andric assert(insn.mode == MODE_16BIT); 19760b57cec5SDimitry Andric baseRegNo = insn.hasAdSize ? X86::EDI : X86::DI; 19770b57cec5SDimitry Andric } 19780b57cec5SDimitry Andric MCOperand baseReg = MCOperand::createReg(baseRegNo); 19790b57cec5SDimitry Andric mcInst.addOperand(baseReg); 19800b57cec5SDimitry Andric return false; 19810b57cec5SDimitry Andric } 19820b57cec5SDimitry Andric 19830b57cec5SDimitry Andric /// translateImmediate - Appends an immediate operand to an MCInst. 19840b57cec5SDimitry Andric /// 19850b57cec5SDimitry Andric /// @param mcInst - The MCInst to append to. 19860b57cec5SDimitry Andric /// @param immediate - The immediate value to append. 19870b57cec5SDimitry Andric /// @param operand - The operand, as stored in the descriptor table. 19880b57cec5SDimitry Andric /// @param insn - The internal instruction. 19890b57cec5SDimitry Andric static void translateImmediate(MCInst &mcInst, uint64_t immediate, 19900b57cec5SDimitry Andric const OperandSpecifier &operand, 19910b57cec5SDimitry Andric InternalInstruction &insn, 19920b57cec5SDimitry Andric const MCDisassembler *Dis) { 19930b57cec5SDimitry Andric // Sign-extend the immediate if necessary. 19940b57cec5SDimitry Andric 19950b57cec5SDimitry Andric OperandType type = (OperandType)operand.type; 19960b57cec5SDimitry Andric 19970b57cec5SDimitry Andric bool isBranch = false; 19980b57cec5SDimitry Andric uint64_t pcrel = 0; 19990b57cec5SDimitry Andric if (type == TYPE_REL) { 20000b57cec5SDimitry Andric isBranch = true; 200181ad6265SDimitry Andric pcrel = insn.startLocation + insn.length; 20020b57cec5SDimitry Andric switch (operand.encoding) { 20030b57cec5SDimitry Andric default: 20040b57cec5SDimitry Andric break; 20050b57cec5SDimitry Andric case ENCODING_Iv: 20060b57cec5SDimitry Andric switch (insn.displacementSize) { 20070b57cec5SDimitry Andric default: 20080b57cec5SDimitry Andric break; 20090b57cec5SDimitry Andric case 1: 20100b57cec5SDimitry Andric if(immediate & 0x80) 20110b57cec5SDimitry Andric immediate |= ~(0xffull); 20120b57cec5SDimitry Andric break; 20130b57cec5SDimitry Andric case 2: 20140b57cec5SDimitry Andric if(immediate & 0x8000) 20150b57cec5SDimitry Andric immediate |= ~(0xffffull); 20160b57cec5SDimitry Andric break; 20170b57cec5SDimitry Andric case 4: 20180b57cec5SDimitry Andric if(immediate & 0x80000000) 20190b57cec5SDimitry Andric immediate |= ~(0xffffffffull); 20200b57cec5SDimitry Andric break; 20210b57cec5SDimitry Andric case 8: 20220b57cec5SDimitry Andric break; 20230b57cec5SDimitry Andric } 20240b57cec5SDimitry Andric break; 20250b57cec5SDimitry Andric case ENCODING_IB: 20260b57cec5SDimitry Andric if(immediate & 0x80) 20270b57cec5SDimitry Andric immediate |= ~(0xffull); 20280b57cec5SDimitry Andric break; 20290b57cec5SDimitry Andric case ENCODING_IW: 20300b57cec5SDimitry Andric if(immediate & 0x8000) 20310b57cec5SDimitry Andric immediate |= ~(0xffffull); 20320b57cec5SDimitry Andric break; 20330b57cec5SDimitry Andric case ENCODING_ID: 20340b57cec5SDimitry Andric if(immediate & 0x80000000) 20350b57cec5SDimitry Andric immediate |= ~(0xffffffffull); 20360b57cec5SDimitry Andric break; 20370b57cec5SDimitry Andric } 20380b57cec5SDimitry Andric } 20390b57cec5SDimitry Andric // By default sign-extend all X86 immediates based on their encoding. 20400b57cec5SDimitry Andric else if (type == TYPE_IMM) { 20410b57cec5SDimitry Andric switch (operand.encoding) { 20420b57cec5SDimitry Andric default: 20430b57cec5SDimitry Andric break; 20440b57cec5SDimitry Andric case ENCODING_IB: 20450b57cec5SDimitry Andric if(immediate & 0x80) 20460b57cec5SDimitry Andric immediate |= ~(0xffull); 20470b57cec5SDimitry Andric break; 20480b57cec5SDimitry Andric case ENCODING_IW: 20490b57cec5SDimitry Andric if(immediate & 0x8000) 20500b57cec5SDimitry Andric immediate |= ~(0xffffull); 20510b57cec5SDimitry Andric break; 20520b57cec5SDimitry Andric case ENCODING_ID: 20530b57cec5SDimitry Andric if(immediate & 0x80000000) 20540b57cec5SDimitry Andric immediate |= ~(0xffffffffull); 20550b57cec5SDimitry Andric break; 20560b57cec5SDimitry Andric case ENCODING_IO: 20570b57cec5SDimitry Andric break; 20580b57cec5SDimitry Andric } 20590b57cec5SDimitry Andric } 20600b57cec5SDimitry Andric 20610b57cec5SDimitry Andric switch (type) { 20620b57cec5SDimitry Andric case TYPE_XMM: 20630b57cec5SDimitry Andric mcInst.addOperand(MCOperand::createReg(X86::XMM0 + (immediate >> 4))); 20640b57cec5SDimitry Andric return; 20650b57cec5SDimitry Andric case TYPE_YMM: 20660b57cec5SDimitry Andric mcInst.addOperand(MCOperand::createReg(X86::YMM0 + (immediate >> 4))); 20670b57cec5SDimitry Andric return; 20680b57cec5SDimitry Andric case TYPE_ZMM: 20690b57cec5SDimitry Andric mcInst.addOperand(MCOperand::createReg(X86::ZMM0 + (immediate >> 4))); 20700b57cec5SDimitry Andric return; 20710b57cec5SDimitry Andric default: 20720b57cec5SDimitry Andric // operand is 64 bits wide. Do nothing. 20730b57cec5SDimitry Andric break; 20740b57cec5SDimitry Andric } 20750b57cec5SDimitry Andric 207681ad6265SDimitry Andric if (!Dis->tryAddingSymbolicOperand( 207781ad6265SDimitry Andric mcInst, immediate + pcrel, insn.startLocation, isBranch, 207881ad6265SDimitry Andric insn.immediateOffset, insn.immediateSize, insn.length)) 20790b57cec5SDimitry Andric mcInst.addOperand(MCOperand::createImm(immediate)); 20800b57cec5SDimitry Andric 20810b57cec5SDimitry Andric if (type == TYPE_MOFFS) { 20820b57cec5SDimitry Andric MCOperand segmentReg; 20830b57cec5SDimitry Andric segmentReg = MCOperand::createReg(segmentRegnums[insn.segmentOverride]); 20840b57cec5SDimitry Andric mcInst.addOperand(segmentReg); 20850b57cec5SDimitry Andric } 20860b57cec5SDimitry Andric } 20870b57cec5SDimitry Andric 20880b57cec5SDimitry Andric /// translateRMRegister - Translates a register stored in the R/M field of the 20890b57cec5SDimitry Andric /// ModR/M byte to its LLVM equivalent and appends it to an MCInst. 20900b57cec5SDimitry Andric /// @param mcInst - The MCInst to append to. 20910b57cec5SDimitry Andric /// @param insn - The internal instruction to extract the R/M field 20920b57cec5SDimitry Andric /// from. 20930b57cec5SDimitry Andric /// @return - 0 on success; -1 otherwise 20940b57cec5SDimitry Andric static bool translateRMRegister(MCInst &mcInst, 20950b57cec5SDimitry Andric InternalInstruction &insn) { 20960b57cec5SDimitry Andric if (insn.eaBase == EA_BASE_sib || insn.eaBase == EA_BASE_sib64) { 20970b57cec5SDimitry Andric debug("A R/M register operand may not have a SIB byte"); 20980b57cec5SDimitry Andric return true; 20990b57cec5SDimitry Andric } 21000b57cec5SDimitry Andric 21010b57cec5SDimitry Andric switch (insn.eaBase) { 21020b57cec5SDimitry Andric default: 21030b57cec5SDimitry Andric debug("Unexpected EA base register"); 21040b57cec5SDimitry Andric return true; 21050b57cec5SDimitry Andric case EA_BASE_NONE: 21060b57cec5SDimitry Andric debug("EA_BASE_NONE for ModR/M base"); 21070b57cec5SDimitry Andric return true; 21080b57cec5SDimitry Andric #define ENTRY(x) case EA_BASE_##x: 21090b57cec5SDimitry Andric ALL_EA_BASES 21100b57cec5SDimitry Andric #undef ENTRY 21110b57cec5SDimitry Andric debug("A R/M register operand may not have a base; " 21120b57cec5SDimitry Andric "the operand must be a register."); 21130b57cec5SDimitry Andric return true; 21140b57cec5SDimitry Andric #define ENTRY(x) \ 21150b57cec5SDimitry Andric case EA_REG_##x: \ 21160b57cec5SDimitry Andric mcInst.addOperand(MCOperand::createReg(X86::x)); break; 21170b57cec5SDimitry Andric ALL_REGS 21180b57cec5SDimitry Andric #undef ENTRY 21190b57cec5SDimitry Andric } 21200b57cec5SDimitry Andric 21210b57cec5SDimitry Andric return false; 21220b57cec5SDimitry Andric } 21230b57cec5SDimitry Andric 21240b57cec5SDimitry Andric /// translateRMMemory - Translates a memory operand stored in the Mod and R/M 21250b57cec5SDimitry Andric /// fields of an internal instruction (and possibly its SIB byte) to a memory 21260b57cec5SDimitry Andric /// operand in LLVM's format, and appends it to an MCInst. 21270b57cec5SDimitry Andric /// 21280b57cec5SDimitry Andric /// @param mcInst - The MCInst to append to. 21290b57cec5SDimitry Andric /// @param insn - The instruction to extract Mod, R/M, and SIB fields 21300b57cec5SDimitry Andric /// from. 21315ffd83dbSDimitry Andric /// @param ForceSIB - The instruction must use SIB. 21320b57cec5SDimitry Andric /// @return - 0 on success; nonzero otherwise 21330b57cec5SDimitry Andric static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn, 21345ffd83dbSDimitry Andric const MCDisassembler *Dis, 21355ffd83dbSDimitry Andric bool ForceSIB = false) { 21360b57cec5SDimitry Andric // Addresses in an MCInst are represented as five operands: 21370b57cec5SDimitry Andric // 1. basereg (register) The R/M base, or (if there is a SIB) the 21380b57cec5SDimitry Andric // SIB base 21390b57cec5SDimitry Andric // 2. scaleamount (immediate) 1, or (if there is a SIB) the specified 21400b57cec5SDimitry Andric // scale amount 21410b57cec5SDimitry Andric // 3. indexreg (register) x86_registerNONE, or (if there is a SIB) 21420b57cec5SDimitry Andric // the index (which is multiplied by the 21430b57cec5SDimitry Andric // scale amount) 21440b57cec5SDimitry Andric // 4. displacement (immediate) 0, or the displacement if there is one 21450b57cec5SDimitry Andric // 5. segmentreg (register) x86_registerNONE for now, but could be set 21460b57cec5SDimitry Andric // if we have segment overrides 21470b57cec5SDimitry Andric 21480b57cec5SDimitry Andric MCOperand baseReg; 21490b57cec5SDimitry Andric MCOperand scaleAmount; 21500b57cec5SDimitry Andric MCOperand indexReg; 21510b57cec5SDimitry Andric MCOperand displacement; 21520b57cec5SDimitry Andric MCOperand segmentReg; 21530b57cec5SDimitry Andric uint64_t pcrel = 0; 21540b57cec5SDimitry Andric 21550b57cec5SDimitry Andric if (insn.eaBase == EA_BASE_sib || insn.eaBase == EA_BASE_sib64) { 21560b57cec5SDimitry Andric if (insn.sibBase != SIB_BASE_NONE) { 21570b57cec5SDimitry Andric switch (insn.sibBase) { 21580b57cec5SDimitry Andric default: 21590b57cec5SDimitry Andric debug("Unexpected sibBase"); 21600b57cec5SDimitry Andric return true; 21610b57cec5SDimitry Andric #define ENTRY(x) \ 21620b57cec5SDimitry Andric case SIB_BASE_##x: \ 21630b57cec5SDimitry Andric baseReg = MCOperand::createReg(X86::x); break; 21640b57cec5SDimitry Andric ALL_SIB_BASES 21650b57cec5SDimitry Andric #undef ENTRY 21660b57cec5SDimitry Andric } 21670b57cec5SDimitry Andric } else { 21680b57cec5SDimitry Andric baseReg = MCOperand::createReg(X86::NoRegister); 21690b57cec5SDimitry Andric } 21700b57cec5SDimitry Andric 21710b57cec5SDimitry Andric if (insn.sibIndex != SIB_INDEX_NONE) { 21720b57cec5SDimitry Andric switch (insn.sibIndex) { 21730b57cec5SDimitry Andric default: 21740b57cec5SDimitry Andric debug("Unexpected sibIndex"); 21750b57cec5SDimitry Andric return true; 21760b57cec5SDimitry Andric #define ENTRY(x) \ 21770b57cec5SDimitry Andric case SIB_INDEX_##x: \ 21780b57cec5SDimitry Andric indexReg = MCOperand::createReg(X86::x); break; 21790b57cec5SDimitry Andric EA_BASES_32BIT 21800b57cec5SDimitry Andric EA_BASES_64BIT 21810b57cec5SDimitry Andric REGS_XMM 21820b57cec5SDimitry Andric REGS_YMM 21830b57cec5SDimitry Andric REGS_ZMM 21840b57cec5SDimitry Andric #undef ENTRY 21850b57cec5SDimitry Andric } 21860b57cec5SDimitry Andric } else { 21870b57cec5SDimitry Andric // Use EIZ/RIZ for a few ambiguous cases where the SIB byte is present, 21880b57cec5SDimitry Andric // but no index is used and modrm alone should have been enough. 21890b57cec5SDimitry Andric // -No base register in 32-bit mode. In 64-bit mode this is used to 21900b57cec5SDimitry Andric // avoid rip-relative addressing. 21910b57cec5SDimitry Andric // -Any base register used other than ESP/RSP/R12D/R12. Using these as a 21920b57cec5SDimitry Andric // base always requires a SIB byte. 21930b57cec5SDimitry Andric // -A scale other than 1 is used. 21945ffd83dbSDimitry Andric if (!ForceSIB && 21955ffd83dbSDimitry Andric (insn.sibScale != 1 || 21960b57cec5SDimitry Andric (insn.sibBase == SIB_BASE_NONE && insn.mode != MODE_64BIT) || 21970b57cec5SDimitry Andric (insn.sibBase != SIB_BASE_NONE && 21980b57cec5SDimitry Andric insn.sibBase != SIB_BASE_ESP && insn.sibBase != SIB_BASE_RSP && 21995ffd83dbSDimitry Andric insn.sibBase != SIB_BASE_R12D && insn.sibBase != SIB_BASE_R12))) { 22000b57cec5SDimitry Andric indexReg = MCOperand::createReg(insn.addressSize == 4 ? X86::EIZ : 22010b57cec5SDimitry Andric X86::RIZ); 22020b57cec5SDimitry Andric } else 22030b57cec5SDimitry Andric indexReg = MCOperand::createReg(X86::NoRegister); 22040b57cec5SDimitry Andric } 22050b57cec5SDimitry Andric 22060b57cec5SDimitry Andric scaleAmount = MCOperand::createImm(insn.sibScale); 22070b57cec5SDimitry Andric } else { 22080b57cec5SDimitry Andric switch (insn.eaBase) { 22090b57cec5SDimitry Andric case EA_BASE_NONE: 22100b57cec5SDimitry Andric if (insn.eaDisplacement == EA_DISP_NONE) { 22110b57cec5SDimitry Andric debug("EA_BASE_NONE and EA_DISP_NONE for ModR/M base"); 22120b57cec5SDimitry Andric return true; 22130b57cec5SDimitry Andric } 22140b57cec5SDimitry Andric if (insn.mode == MODE_64BIT){ 221581ad6265SDimitry Andric pcrel = insn.startLocation + insn.length; 221681ad6265SDimitry Andric Dis->tryAddingPcLoadReferenceComment(insn.displacement + pcrel, 221781ad6265SDimitry Andric insn.startLocation + 221881ad6265SDimitry Andric insn.displacementOffset); 22190b57cec5SDimitry Andric // Section 2.2.1.6 22200b57cec5SDimitry Andric baseReg = MCOperand::createReg(insn.addressSize == 4 ? X86::EIP : 22210b57cec5SDimitry Andric X86::RIP); 22220b57cec5SDimitry Andric } 22230b57cec5SDimitry Andric else 22240b57cec5SDimitry Andric baseReg = MCOperand::createReg(X86::NoRegister); 22250b57cec5SDimitry Andric 22260b57cec5SDimitry Andric indexReg = MCOperand::createReg(X86::NoRegister); 22270b57cec5SDimitry Andric break; 22280b57cec5SDimitry Andric case EA_BASE_BX_SI: 22290b57cec5SDimitry Andric baseReg = MCOperand::createReg(X86::BX); 22300b57cec5SDimitry Andric indexReg = MCOperand::createReg(X86::SI); 22310b57cec5SDimitry Andric break; 22320b57cec5SDimitry Andric case EA_BASE_BX_DI: 22330b57cec5SDimitry Andric baseReg = MCOperand::createReg(X86::BX); 22340b57cec5SDimitry Andric indexReg = MCOperand::createReg(X86::DI); 22350b57cec5SDimitry Andric break; 22360b57cec5SDimitry Andric case EA_BASE_BP_SI: 22370b57cec5SDimitry Andric baseReg = MCOperand::createReg(X86::BP); 22380b57cec5SDimitry Andric indexReg = MCOperand::createReg(X86::SI); 22390b57cec5SDimitry Andric break; 22400b57cec5SDimitry Andric case EA_BASE_BP_DI: 22410b57cec5SDimitry Andric baseReg = MCOperand::createReg(X86::BP); 22420b57cec5SDimitry Andric indexReg = MCOperand::createReg(X86::DI); 22430b57cec5SDimitry Andric break; 22440b57cec5SDimitry Andric default: 22450b57cec5SDimitry Andric indexReg = MCOperand::createReg(X86::NoRegister); 22460b57cec5SDimitry Andric switch (insn.eaBase) { 22470b57cec5SDimitry Andric default: 22480b57cec5SDimitry Andric debug("Unexpected eaBase"); 22490b57cec5SDimitry Andric return true; 22500b57cec5SDimitry Andric // Here, we will use the fill-ins defined above. However, 22510b57cec5SDimitry Andric // BX_SI, BX_DI, BP_SI, and BP_DI are all handled above and 22520b57cec5SDimitry Andric // sib and sib64 were handled in the top-level if, so they're only 22530b57cec5SDimitry Andric // placeholders to keep the compiler happy. 22540b57cec5SDimitry Andric #define ENTRY(x) \ 22550b57cec5SDimitry Andric case EA_BASE_##x: \ 22560b57cec5SDimitry Andric baseReg = MCOperand::createReg(X86::x); break; 22570b57cec5SDimitry Andric ALL_EA_BASES 22580b57cec5SDimitry Andric #undef ENTRY 22590b57cec5SDimitry Andric #define ENTRY(x) case EA_REG_##x: 22600b57cec5SDimitry Andric ALL_REGS 22610b57cec5SDimitry Andric #undef ENTRY 22620b57cec5SDimitry Andric debug("A R/M memory operand may not be a register; " 22630b57cec5SDimitry Andric "the base field must be a base."); 22640b57cec5SDimitry Andric return true; 22650b57cec5SDimitry Andric } 22660b57cec5SDimitry Andric } 22670b57cec5SDimitry Andric 22680b57cec5SDimitry Andric scaleAmount = MCOperand::createImm(1); 22690b57cec5SDimitry Andric } 22700b57cec5SDimitry Andric 22710b57cec5SDimitry Andric displacement = MCOperand::createImm(insn.displacement); 22720b57cec5SDimitry Andric 22730b57cec5SDimitry Andric segmentReg = MCOperand::createReg(segmentRegnums[insn.segmentOverride]); 22740b57cec5SDimitry Andric 22750b57cec5SDimitry Andric mcInst.addOperand(baseReg); 22760b57cec5SDimitry Andric mcInst.addOperand(scaleAmount); 22770b57cec5SDimitry Andric mcInst.addOperand(indexReg); 227881ad6265SDimitry Andric 227981ad6265SDimitry Andric const uint8_t dispSize = 228081ad6265SDimitry Andric (insn.eaDisplacement == EA_DISP_NONE) ? 0 : insn.displacementSize; 228181ad6265SDimitry Andric 228281ad6265SDimitry Andric if (!Dis->tryAddingSymbolicOperand( 228381ad6265SDimitry Andric mcInst, insn.displacement + pcrel, insn.startLocation, false, 228481ad6265SDimitry Andric insn.displacementOffset, dispSize, insn.length)) 22850b57cec5SDimitry Andric mcInst.addOperand(displacement); 22860b57cec5SDimitry Andric mcInst.addOperand(segmentReg); 22870b57cec5SDimitry Andric return false; 22880b57cec5SDimitry Andric } 22890b57cec5SDimitry Andric 22900b57cec5SDimitry Andric /// translateRM - Translates an operand stored in the R/M (and possibly SIB) 22910b57cec5SDimitry Andric /// byte of an instruction to LLVM form, and appends it to an MCInst. 22920b57cec5SDimitry Andric /// 22930b57cec5SDimitry Andric /// @param mcInst - The MCInst to append to. 22940b57cec5SDimitry Andric /// @param operand - The operand, as stored in the descriptor table. 22950b57cec5SDimitry Andric /// @param insn - The instruction to extract Mod, R/M, and SIB fields 22960b57cec5SDimitry Andric /// from. 22970b57cec5SDimitry Andric /// @return - 0 on success; nonzero otherwise 22980b57cec5SDimitry Andric static bool translateRM(MCInst &mcInst, const OperandSpecifier &operand, 22990b57cec5SDimitry Andric InternalInstruction &insn, const MCDisassembler *Dis) { 23000b57cec5SDimitry Andric switch (operand.type) { 23010b57cec5SDimitry Andric default: 23020b57cec5SDimitry Andric debug("Unexpected type for a R/M operand"); 23030b57cec5SDimitry Andric return true; 23040b57cec5SDimitry Andric case TYPE_R8: 23050b57cec5SDimitry Andric case TYPE_R16: 23060b57cec5SDimitry Andric case TYPE_R32: 23070b57cec5SDimitry Andric case TYPE_R64: 23080b57cec5SDimitry Andric case TYPE_Rv: 23090b57cec5SDimitry Andric case TYPE_MM64: 23100b57cec5SDimitry Andric case TYPE_XMM: 23110b57cec5SDimitry Andric case TYPE_YMM: 23120b57cec5SDimitry Andric case TYPE_ZMM: 23135ffd83dbSDimitry Andric case TYPE_TMM: 23140b57cec5SDimitry Andric case TYPE_VK_PAIR: 23150b57cec5SDimitry Andric case TYPE_VK: 23160b57cec5SDimitry Andric case TYPE_DEBUGREG: 23170b57cec5SDimitry Andric case TYPE_CONTROLREG: 23180b57cec5SDimitry Andric case TYPE_BNDR: 23190b57cec5SDimitry Andric return translateRMRegister(mcInst, insn); 23200b57cec5SDimitry Andric case TYPE_M: 23210b57cec5SDimitry Andric case TYPE_MVSIBX: 23220b57cec5SDimitry Andric case TYPE_MVSIBY: 23230b57cec5SDimitry Andric case TYPE_MVSIBZ: 23240b57cec5SDimitry Andric return translateRMMemory(mcInst, insn, Dis); 23255ffd83dbSDimitry Andric case TYPE_MSIB: 23265ffd83dbSDimitry Andric return translateRMMemory(mcInst, insn, Dis, true); 23270b57cec5SDimitry Andric } 23280b57cec5SDimitry Andric } 23290b57cec5SDimitry Andric 23300b57cec5SDimitry Andric /// translateFPRegister - Translates a stack position on the FPU stack to its 23310b57cec5SDimitry Andric /// LLVM form, and appends it to an MCInst. 23320b57cec5SDimitry Andric /// 23330b57cec5SDimitry Andric /// @param mcInst - The MCInst to append to. 23340b57cec5SDimitry Andric /// @param stackPos - The stack position to translate. 23350b57cec5SDimitry Andric static void translateFPRegister(MCInst &mcInst, 23360b57cec5SDimitry Andric uint8_t stackPos) { 23370b57cec5SDimitry Andric mcInst.addOperand(MCOperand::createReg(X86::ST0 + stackPos)); 23380b57cec5SDimitry Andric } 23390b57cec5SDimitry Andric 23400b57cec5SDimitry Andric /// translateMaskRegister - Translates a 3-bit mask register number to 23410b57cec5SDimitry Andric /// LLVM form, and appends it to an MCInst. 23420b57cec5SDimitry Andric /// 23430b57cec5SDimitry Andric /// @param mcInst - The MCInst to append to. 23440b57cec5SDimitry Andric /// @param maskRegNum - Number of mask register from 0 to 7. 23450b57cec5SDimitry Andric /// @return - false on success; true otherwise. 23460b57cec5SDimitry Andric static bool translateMaskRegister(MCInst &mcInst, 23470b57cec5SDimitry Andric uint8_t maskRegNum) { 23480b57cec5SDimitry Andric if (maskRegNum >= 8) { 23490b57cec5SDimitry Andric debug("Invalid mask register number"); 23500b57cec5SDimitry Andric return true; 23510b57cec5SDimitry Andric } 23520b57cec5SDimitry Andric 23530b57cec5SDimitry Andric mcInst.addOperand(MCOperand::createReg(X86::K0 + maskRegNum)); 23540b57cec5SDimitry Andric return false; 23550b57cec5SDimitry Andric } 23560b57cec5SDimitry Andric 23570b57cec5SDimitry Andric /// translateOperand - Translates an operand stored in an internal instruction 23580b57cec5SDimitry Andric /// to LLVM's format and appends it to an MCInst. 23590b57cec5SDimitry Andric /// 23600b57cec5SDimitry Andric /// @param mcInst - The MCInst to append to. 23610b57cec5SDimitry Andric /// @param operand - The operand, as stored in the descriptor table. 23620b57cec5SDimitry Andric /// @param insn - The internal instruction. 23630b57cec5SDimitry Andric /// @return - false on success; true otherwise. 23640b57cec5SDimitry Andric static bool translateOperand(MCInst &mcInst, const OperandSpecifier &operand, 23650b57cec5SDimitry Andric InternalInstruction &insn, 23660b57cec5SDimitry Andric const MCDisassembler *Dis) { 23670b57cec5SDimitry Andric switch (operand.encoding) { 23680b57cec5SDimitry Andric default: 23690b57cec5SDimitry Andric debug("Unhandled operand encoding during translation"); 23700b57cec5SDimitry Andric return true; 23710b57cec5SDimitry Andric case ENCODING_REG: 23720b57cec5SDimitry Andric translateRegister(mcInst, insn.reg); 23730b57cec5SDimitry Andric return false; 23740b57cec5SDimitry Andric case ENCODING_WRITEMASK: 23750b57cec5SDimitry Andric return translateMaskRegister(mcInst, insn.writemask); 23765ffd83dbSDimitry Andric case ENCODING_SIB: 23770b57cec5SDimitry Andric CASE_ENCODING_RM: 23780b57cec5SDimitry Andric CASE_ENCODING_VSIB: 23790b57cec5SDimitry Andric return translateRM(mcInst, operand, insn, Dis); 23800b57cec5SDimitry Andric case ENCODING_IB: 23810b57cec5SDimitry Andric case ENCODING_IW: 23820b57cec5SDimitry Andric case ENCODING_ID: 23830b57cec5SDimitry Andric case ENCODING_IO: 23840b57cec5SDimitry Andric case ENCODING_Iv: 23850b57cec5SDimitry Andric case ENCODING_Ia: 23860b57cec5SDimitry Andric translateImmediate(mcInst, 23870b57cec5SDimitry Andric insn.immediates[insn.numImmediatesTranslated++], 23880b57cec5SDimitry Andric operand, 23890b57cec5SDimitry Andric insn, 23900b57cec5SDimitry Andric Dis); 23910b57cec5SDimitry Andric return false; 23920b57cec5SDimitry Andric case ENCODING_IRC: 23930b57cec5SDimitry Andric mcInst.addOperand(MCOperand::createImm(insn.RC)); 23940b57cec5SDimitry Andric return false; 23950b57cec5SDimitry Andric case ENCODING_SI: 23960b57cec5SDimitry Andric return translateSrcIndex(mcInst, insn); 23970b57cec5SDimitry Andric case ENCODING_DI: 23980b57cec5SDimitry Andric return translateDstIndex(mcInst, insn); 23990b57cec5SDimitry Andric case ENCODING_RB: 24000b57cec5SDimitry Andric case ENCODING_RW: 24010b57cec5SDimitry Andric case ENCODING_RD: 24020b57cec5SDimitry Andric case ENCODING_RO: 24030b57cec5SDimitry Andric case ENCODING_Rv: 24040b57cec5SDimitry Andric translateRegister(mcInst, insn.opcodeRegister); 24050b57cec5SDimitry Andric return false; 2406*0fca6ea1SDimitry Andric case ENCODING_CF: 2407*0fca6ea1SDimitry Andric mcInst.addOperand(MCOperand::createImm(insn.immediates[1])); 2408*0fca6ea1SDimitry Andric return false; 24090b57cec5SDimitry Andric case ENCODING_CC: 2410*0fca6ea1SDimitry Andric if (isCCMPOrCTEST(&insn)) 2411*0fca6ea1SDimitry Andric mcInst.addOperand(MCOperand::createImm(insn.immediates[2])); 2412*0fca6ea1SDimitry Andric else 24130b57cec5SDimitry Andric mcInst.addOperand(MCOperand::createImm(insn.immediates[1])); 24140b57cec5SDimitry Andric return false; 24150b57cec5SDimitry Andric case ENCODING_FP: 24160b57cec5SDimitry Andric translateFPRegister(mcInst, insn.modRM & 7); 24170b57cec5SDimitry Andric return false; 24180b57cec5SDimitry Andric case ENCODING_VVVV: 24190b57cec5SDimitry Andric translateRegister(mcInst, insn.vvvv); 24200b57cec5SDimitry Andric return false; 24210b57cec5SDimitry Andric case ENCODING_DUP: 24220b57cec5SDimitry Andric return translateOperand(mcInst, insn.operands[operand.type - TYPE_DUP0], 24230b57cec5SDimitry Andric insn, Dis); 24240b57cec5SDimitry Andric } 24250b57cec5SDimitry Andric } 24260b57cec5SDimitry Andric 24270b57cec5SDimitry Andric /// translateInstruction - Translates an internal instruction and all its 24280b57cec5SDimitry Andric /// operands to an MCInst. 24290b57cec5SDimitry Andric /// 24300b57cec5SDimitry Andric /// @param mcInst - The MCInst to populate with the instruction's data. 24310b57cec5SDimitry Andric /// @param insn - The internal instruction. 24320b57cec5SDimitry Andric /// @return - false on success; true otherwise. 24330b57cec5SDimitry Andric static bool translateInstruction(MCInst &mcInst, 24340b57cec5SDimitry Andric InternalInstruction &insn, 24350b57cec5SDimitry Andric const MCDisassembler *Dis) { 24360b57cec5SDimitry Andric if (!insn.spec) { 24370b57cec5SDimitry Andric debug("Instruction has no specification"); 24380b57cec5SDimitry Andric return true; 24390b57cec5SDimitry Andric } 24400b57cec5SDimitry Andric 24410b57cec5SDimitry Andric mcInst.clear(); 24420b57cec5SDimitry Andric mcInst.setOpcode(insn.instructionID); 24430b57cec5SDimitry Andric // If when reading the prefix bytes we determined the overlapping 0xf2 or 0xf3 24440b57cec5SDimitry Andric // prefix bytes should be disassembled as xrelease and xacquire then set the 24450b57cec5SDimitry Andric // opcode to those instead of the rep and repne opcodes. 24460b57cec5SDimitry Andric if (insn.xAcquireRelease) { 24470b57cec5SDimitry Andric if(mcInst.getOpcode() == X86::REP_PREFIX) 24480b57cec5SDimitry Andric mcInst.setOpcode(X86::XRELEASE_PREFIX); 24490b57cec5SDimitry Andric else if(mcInst.getOpcode() == X86::REPNE_PREFIX) 24500b57cec5SDimitry Andric mcInst.setOpcode(X86::XACQUIRE_PREFIX); 24510b57cec5SDimitry Andric } 24520b57cec5SDimitry Andric 24530b57cec5SDimitry Andric insn.numImmediatesTranslated = 0; 24540b57cec5SDimitry Andric 24550b57cec5SDimitry Andric for (const auto &Op : insn.operands) { 24560b57cec5SDimitry Andric if (Op.encoding != ENCODING_NONE) { 24570b57cec5SDimitry Andric if (translateOperand(mcInst, Op, insn, Dis)) { 24580b57cec5SDimitry Andric return true; 24590b57cec5SDimitry Andric } 24600b57cec5SDimitry Andric } 24610b57cec5SDimitry Andric } 24620b57cec5SDimitry Andric 24630b57cec5SDimitry Andric return false; 24640b57cec5SDimitry Andric } 24650b57cec5SDimitry Andric 24660b57cec5SDimitry Andric static MCDisassembler *createX86Disassembler(const Target &T, 24670b57cec5SDimitry Andric const MCSubtargetInfo &STI, 24680b57cec5SDimitry Andric MCContext &Ctx) { 24690b57cec5SDimitry Andric std::unique_ptr<const MCInstrInfo> MII(T.createMCInstrInfo()); 24700b57cec5SDimitry Andric return new X86GenericDisassembler(STI, Ctx, std::move(MII)); 24710b57cec5SDimitry Andric } 24720b57cec5SDimitry Andric 2473480093f4SDimitry Andric extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeX86Disassembler() { 24740b57cec5SDimitry Andric // Register the disassembler. 24750b57cec5SDimitry Andric TargetRegistry::RegisterMCDisassembler(getTheX86_32Target(), 24760b57cec5SDimitry Andric createX86Disassembler); 24770b57cec5SDimitry Andric TargetRegistry::RegisterMCDisassembler(getTheX86_64Target(), 24780b57cec5SDimitry Andric createX86Disassembler); 24790b57cec5SDimitry Andric } 2480