1 //===-- X86Disassembler.cpp - Disassembler for x86 and x86_64 -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file is part of the X86 Disassembler. 10 // It contains code to translate the data produced by the decoder into 11 // MCInsts. 12 // 13 // 14 // The X86 disassembler is a table-driven disassembler for the 16-, 32-, and 15 // 64-bit X86 instruction sets. The main decode sequence for an assembly 16 // instruction in this disassembler is: 17 // 18 // 1. Read the prefix bytes and determine the attributes of the instruction. 19 // These attributes, recorded in enum attributeBits 20 // (X86DisassemblerDecoderCommon.h), form a bitmask. The table CONTEXTS_SYM 21 // provides a mapping from bitmasks to contexts, which are represented by 22 // enum InstructionContext (ibid.). 23 // 24 // 2. Read the opcode, and determine what kind of opcode it is. The 25 // disassembler distinguishes four kinds of opcodes, which are enumerated in 26 // OpcodeType (X86DisassemblerDecoderCommon.h): one-byte (0xnn), two-byte 27 // (0x0f 0xnn), three-byte-38 (0x0f 0x38 0xnn), or three-byte-3a 28 // (0x0f 0x3a 0xnn). Mandatory prefixes are treated as part of the context. 29 // 30 // 3. Depending on the opcode type, look in one of four ClassDecision structures 31 // (X86DisassemblerDecoderCommon.h). Use the opcode class to determine which 32 // OpcodeDecision (ibid.) to look the opcode in. Look up the opcode, to get 33 // a ModRMDecision (ibid.). 34 // 35 // 4. Some instructions, such as escape opcodes or extended opcodes, or even 36 // instructions that have ModRM*Reg / ModRM*Mem forms in LLVM, need the 37 // ModR/M byte to complete decode. The ModRMDecision's type is an entry from 38 // ModRMDecisionType (X86DisassemblerDecoderCommon.h) that indicates if the 39 // ModR/M byte is required and how to interpret it. 40 // 41 // 5. After resolving the ModRMDecision, the disassembler has a unique ID 42 // of type InstrUID (X86DisassemblerDecoderCommon.h). Looking this ID up in 43 // INSTRUCTIONS_SYM yields the name of the instruction and the encodings and 44 // meanings of its operands. 45 // 46 // 6. For each operand, its encoding is an entry from OperandEncoding 47 // (X86DisassemblerDecoderCommon.h) and its type is an entry from 48 // OperandType (ibid.). The encoding indicates how to read it from the 49 // instruction; the type indicates how to interpret the value once it has 50 // been read. For example, a register operand could be stored in the R/M 51 // field of the ModR/M byte, the REG field of the ModR/M byte, or added to 52 // the main opcode. This is orthogonal from its meaning (an GPR or an XMM 53 // register, for instance). Given this information, the operands can be 54 // extracted and interpreted. 55 // 56 // 7. As the last step, the disassembler translates the instruction information 57 // and operands into a format understandable by the client - in this case, an 58 // MCInst for use by the MC infrastructure. 59 // 60 // The disassembler is broken broadly into two parts: the table emitter that 61 // emits the instruction decode tables discussed above during compilation, and 62 // the disassembler itself. The table emitter is documented in more detail in 63 // utils/TableGen/X86DisassemblerEmitter.h. 64 // 65 // X86Disassembler.cpp contains the code responsible for step 7, and for 66 // invoking the decoder to execute steps 1-6. 67 // X86DisassemblerDecoderCommon.h contains the definitions needed by both the 68 // table emitter and the disassembler. 69 // X86DisassemblerDecoder.h contains the public interface of the decoder, 70 // factored out into C for possible use by other projects. 71 // X86DisassemblerDecoder.c contains the source code of the decoder, which is 72 // responsible for steps 1-6. 73 // 74 //===----------------------------------------------------------------------===// 75 76 #include "MCTargetDesc/X86BaseInfo.h" 77 #include "MCTargetDesc/X86MCTargetDesc.h" 78 #include "TargetInfo/X86TargetInfo.h" 79 #include "X86DisassemblerDecoder.h" 80 #include "llvm/MC/MCContext.h" 81 #include "llvm/MC/MCDisassembler/MCDisassembler.h" 82 #include "llvm/MC/MCExpr.h" 83 #include "llvm/MC/MCInst.h" 84 #include "llvm/MC/MCInstrInfo.h" 85 #include "llvm/MC/MCSubtargetInfo.h" 86 #include "llvm/MC/TargetRegistry.h" 87 #include "llvm/Support/Debug.h" 88 #include "llvm/Support/Format.h" 89 #include "llvm/Support/raw_ostream.h" 90 91 using namespace llvm; 92 using namespace llvm::X86Disassembler; 93 94 #define DEBUG_TYPE "x86-disassembler" 95 96 #define debug(s) LLVM_DEBUG(dbgs() << __LINE__ << ": " << s); 97 98 // Specifies whether a ModR/M byte is needed and (if so) which 99 // instruction each possible value of the ModR/M byte corresponds to. Once 100 // this information is known, we have narrowed down to a single instruction. 101 struct ModRMDecision { 102 uint8_t modrm_type; 103 uint32_t instructionIDs; 104 }; 105 106 // Specifies which set of ModR/M->instruction tables to look at 107 // given a particular opcode. 108 struct OpcodeDecision { 109 ModRMDecision modRMDecisions[256]; 110 }; 111 112 // Specifies which opcode->instruction tables to look at given 113 // a particular context (set of attributes). Since there are many possible 114 // contexts, the decoder first uses CONTEXTS_SYM to determine which context 115 // applies given a specific set of attributes. Hence there are only IC_max 116 // entries in this table, rather than 2^(ATTR_max). 117 struct ContextDecision { 118 OpcodeDecision opcodeDecisions[IC_max]; 119 }; 120 121 #include "X86GenDisassemblerTables.inc" 122 123 static InstrUID decode(OpcodeType type, InstructionContext insnContext, 124 uint8_t opcode, uint8_t modRM) { 125 const struct ModRMDecision *dec; 126 127 switch (type) { 128 case ONEBYTE: 129 dec = &ONEBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 130 break; 131 case TWOBYTE: 132 dec = &TWOBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 133 break; 134 case THREEBYTE_38: 135 dec = &THREEBYTE38_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 136 break; 137 case THREEBYTE_3A: 138 dec = &THREEBYTE3A_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 139 break; 140 case XOP8_MAP: 141 dec = &XOP8_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 142 break; 143 case XOP9_MAP: 144 dec = &XOP9_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 145 break; 146 case XOPA_MAP: 147 dec = &XOPA_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 148 break; 149 case THREEDNOW_MAP: 150 dec = 151 &THREEDNOW_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 152 break; 153 case MAP4: 154 dec = &MAP4_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 155 break; 156 case MAP5: 157 dec = &MAP5_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 158 break; 159 case MAP6: 160 dec = &MAP6_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 161 break; 162 case MAP7: 163 dec = &MAP7_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 164 break; 165 } 166 167 switch (dec->modrm_type) { 168 default: 169 llvm_unreachable("Corrupt table! Unknown modrm_type"); 170 return 0; 171 case MODRM_ONEENTRY: 172 return modRMTable[dec->instructionIDs]; 173 case MODRM_SPLITRM: 174 if (modFromModRM(modRM) == 0x3) 175 return modRMTable[dec->instructionIDs + 1]; 176 return modRMTable[dec->instructionIDs]; 177 case MODRM_SPLITREG: 178 if (modFromModRM(modRM) == 0x3) 179 return modRMTable[dec->instructionIDs + ((modRM & 0x38) >> 3) + 8]; 180 return modRMTable[dec->instructionIDs + ((modRM & 0x38) >> 3)]; 181 case MODRM_SPLITMISC: 182 if (modFromModRM(modRM) == 0x3) 183 return modRMTable[dec->instructionIDs + (modRM & 0x3f) + 8]; 184 return modRMTable[dec->instructionIDs + ((modRM & 0x38) >> 3)]; 185 case MODRM_FULL: 186 return modRMTable[dec->instructionIDs + modRM]; 187 } 188 } 189 190 static bool peek(struct InternalInstruction *insn, uint8_t &byte) { 191 uint64_t offset = insn->readerCursor - insn->startLocation; 192 if (offset >= insn->bytes.size()) 193 return true; 194 byte = insn->bytes[offset]; 195 return false; 196 } 197 198 template <typename T> static bool consume(InternalInstruction *insn, T &ptr) { 199 auto r = insn->bytes; 200 uint64_t offset = insn->readerCursor - insn->startLocation; 201 if (offset + sizeof(T) > r.size()) 202 return true; 203 ptr = support::endian::read<T>(&r[offset], llvm::endianness::little); 204 insn->readerCursor += sizeof(T); 205 return false; 206 } 207 208 static bool isREX(struct InternalInstruction *insn, uint8_t prefix) { 209 return insn->mode == MODE_64BIT && prefix >= 0x40 && prefix <= 0x4f; 210 } 211 212 static bool isREX2(struct InternalInstruction *insn, uint8_t prefix) { 213 return insn->mode == MODE_64BIT && prefix == 0xd5; 214 } 215 216 // Consumes all of an instruction's prefix bytes, and marks the 217 // instruction as having them. Also sets the instruction's default operand, 218 // address, and other relevant data sizes to report operands correctly. 219 // 220 // insn must not be empty. 221 static int readPrefixes(struct InternalInstruction *insn) { 222 bool isPrefix = true; 223 uint8_t byte = 0; 224 uint8_t nextByte; 225 226 LLVM_DEBUG(dbgs() << "readPrefixes()"); 227 228 while (isPrefix) { 229 // If we fail reading prefixes, just stop here and let the opcode reader 230 // deal with it. 231 if (consume(insn, byte)) 232 break; 233 234 // If the byte is a LOCK/REP/REPNE prefix and not a part of the opcode, then 235 // break and let it be disassembled as a normal "instruction". 236 if (insn->readerCursor - 1 == insn->startLocation && byte == 0xf0) // LOCK 237 break; 238 239 if ((byte == 0xf2 || byte == 0xf3) && !peek(insn, nextByte)) { 240 // If the byte is 0xf2 or 0xf3, and any of the following conditions are 241 // met: 242 // - it is followed by a LOCK (0xf0) prefix 243 // - it is followed by an xchg instruction 244 // then it should be disassembled as a xacquire/xrelease not repne/rep. 245 if (((nextByte == 0xf0) || 246 ((nextByte & 0xfe) == 0x86 || (nextByte & 0xf8) == 0x90))) { 247 insn->xAcquireRelease = true; 248 if (!(byte == 0xf3 && nextByte == 0x90)) // PAUSE instruction support 249 break; 250 } 251 // Also if the byte is 0xf3, and the following condition is met: 252 // - it is followed by a "mov mem, reg" (opcode 0x88/0x89) or 253 // "mov mem, imm" (opcode 0xc6/0xc7) instructions. 254 // then it should be disassembled as an xrelease not rep. 255 if (byte == 0xf3 && (nextByte == 0x88 || nextByte == 0x89 || 256 nextByte == 0xc6 || nextByte == 0xc7)) { 257 insn->xAcquireRelease = true; 258 break; 259 } 260 if (isREX(insn, nextByte)) { 261 uint8_t nnextByte; 262 // Go to REX prefix after the current one 263 if (consume(insn, nnextByte)) 264 return -1; 265 // We should be able to read next byte after REX prefix 266 if (peek(insn, nnextByte)) 267 return -1; 268 --insn->readerCursor; 269 } 270 } 271 272 switch (byte) { 273 case 0xf0: // LOCK 274 insn->hasLockPrefix = true; 275 break; 276 case 0xf2: // REPNE/REPNZ 277 case 0xf3: { // REP or REPE/REPZ 278 uint8_t nextByte; 279 if (peek(insn, nextByte)) 280 break; 281 // TODO: 282 // 1. There could be several 0x66 283 // 2. if (nextByte == 0x66) and nextNextByte != 0x0f then 284 // it's not mandatory prefix 285 // 3. if (nextByte >= 0x40 && nextByte <= 0x4f) it's REX and we need 286 // 0x0f exactly after it to be mandatory prefix 287 // 4. if (nextByte == 0xd5) it's REX2 and we need 288 // 0x0f exactly after it to be mandatory prefix 289 if (isREX(insn, nextByte) || isREX2(insn, nextByte) || nextByte == 0x0f || 290 nextByte == 0x66) 291 // The last of 0xf2 /0xf3 is mandatory prefix 292 insn->mandatoryPrefix = byte; 293 insn->repeatPrefix = byte; 294 break; 295 } 296 case 0x2e: // CS segment override -OR- Branch not taken 297 insn->segmentOverride = SEG_OVERRIDE_CS; 298 break; 299 case 0x36: // SS segment override -OR- Branch taken 300 insn->segmentOverride = SEG_OVERRIDE_SS; 301 break; 302 case 0x3e: // DS segment override 303 insn->segmentOverride = SEG_OVERRIDE_DS; 304 break; 305 case 0x26: // ES segment override 306 insn->segmentOverride = SEG_OVERRIDE_ES; 307 break; 308 case 0x64: // FS segment override 309 insn->segmentOverride = SEG_OVERRIDE_FS; 310 break; 311 case 0x65: // GS segment override 312 insn->segmentOverride = SEG_OVERRIDE_GS; 313 break; 314 case 0x66: { // Operand-size override { 315 uint8_t nextByte; 316 insn->hasOpSize = true; 317 if (peek(insn, nextByte)) 318 break; 319 // 0x66 can't overwrite existing mandatory prefix and should be ignored 320 if (!insn->mandatoryPrefix && (nextByte == 0x0f || isREX(insn, nextByte))) 321 insn->mandatoryPrefix = byte; 322 break; 323 } 324 case 0x67: // Address-size override 325 insn->hasAdSize = true; 326 break; 327 default: // Not a prefix byte 328 isPrefix = false; 329 break; 330 } 331 332 if (isREX(insn, byte)) { 333 insn->rexPrefix = byte; 334 isPrefix = true; 335 LLVM_DEBUG(dbgs() << format("Found REX prefix 0x%hhx", byte)); 336 } else if (isPrefix) { 337 insn->rexPrefix = 0; 338 } 339 340 if (isPrefix) 341 LLVM_DEBUG(dbgs() << format("Found prefix 0x%hhx", byte)); 342 } 343 344 insn->vectorExtensionType = TYPE_NO_VEX_XOP; 345 346 if (byte == 0x62) { 347 uint8_t byte1, byte2; 348 if (consume(insn, byte1)) { 349 LLVM_DEBUG(dbgs() << "Couldn't read second byte of EVEX prefix"); 350 return -1; 351 } 352 353 if (peek(insn, byte2)) { 354 LLVM_DEBUG(dbgs() << "Couldn't read third byte of EVEX prefix"); 355 return -1; 356 } 357 358 if ((insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0)) { 359 insn->vectorExtensionType = TYPE_EVEX; 360 } else { 361 --insn->readerCursor; // unconsume byte1 362 --insn->readerCursor; // unconsume byte 363 } 364 365 if (insn->vectorExtensionType == TYPE_EVEX) { 366 insn->vectorExtensionPrefix[0] = byte; 367 insn->vectorExtensionPrefix[1] = byte1; 368 if (consume(insn, insn->vectorExtensionPrefix[2])) { 369 LLVM_DEBUG(dbgs() << "Couldn't read third byte of EVEX prefix"); 370 return -1; 371 } 372 if (consume(insn, insn->vectorExtensionPrefix[3])) { 373 LLVM_DEBUG(dbgs() << "Couldn't read fourth byte of EVEX prefix"); 374 return -1; 375 } 376 377 if (insn->mode == MODE_64BIT) { 378 // We simulate the REX prefix for simplicity's sake 379 insn->rexPrefix = 0x40 | 380 (wFromEVEX3of4(insn->vectorExtensionPrefix[2]) << 3) | 381 (rFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 2) | 382 (xFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 1) | 383 (bFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 0); 384 385 // We simulate the REX2 prefix for simplicity's sake 386 insn->rex2ExtensionPrefix[1] = 387 (r2FromEVEX2of4(insn->vectorExtensionPrefix[1]) << 6) | 388 (uFromEVEX3of4(insn->vectorExtensionPrefix[2]) << 5) | 389 (b2FromEVEX2of4(insn->vectorExtensionPrefix[1]) << 4); 390 } 391 392 LLVM_DEBUG( 393 dbgs() << format( 394 "Found EVEX prefix 0x%hhx 0x%hhx 0x%hhx 0x%hhx", 395 insn->vectorExtensionPrefix[0], insn->vectorExtensionPrefix[1], 396 insn->vectorExtensionPrefix[2], insn->vectorExtensionPrefix[3])); 397 } 398 } else if (byte == 0xc4) { 399 uint8_t byte1; 400 if (peek(insn, byte1)) { 401 LLVM_DEBUG(dbgs() << "Couldn't read second byte of VEX"); 402 return -1; 403 } 404 405 if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) 406 insn->vectorExtensionType = TYPE_VEX_3B; 407 else 408 --insn->readerCursor; 409 410 if (insn->vectorExtensionType == TYPE_VEX_3B) { 411 insn->vectorExtensionPrefix[0] = byte; 412 consume(insn, insn->vectorExtensionPrefix[1]); 413 consume(insn, insn->vectorExtensionPrefix[2]); 414 415 // We simulate the REX prefix for simplicity's sake 416 417 if (insn->mode == MODE_64BIT) 418 insn->rexPrefix = 0x40 | 419 (wFromVEX3of3(insn->vectorExtensionPrefix[2]) << 3) | 420 (rFromVEX2of3(insn->vectorExtensionPrefix[1]) << 2) | 421 (xFromVEX2of3(insn->vectorExtensionPrefix[1]) << 1) | 422 (bFromVEX2of3(insn->vectorExtensionPrefix[1]) << 0); 423 424 LLVM_DEBUG(dbgs() << format("Found VEX prefix 0x%hhx 0x%hhx 0x%hhx", 425 insn->vectorExtensionPrefix[0], 426 insn->vectorExtensionPrefix[1], 427 insn->vectorExtensionPrefix[2])); 428 } 429 } else if (byte == 0xc5) { 430 uint8_t byte1; 431 if (peek(insn, byte1)) { 432 LLVM_DEBUG(dbgs() << "Couldn't read second byte of VEX"); 433 return -1; 434 } 435 436 if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) 437 insn->vectorExtensionType = TYPE_VEX_2B; 438 else 439 --insn->readerCursor; 440 441 if (insn->vectorExtensionType == TYPE_VEX_2B) { 442 insn->vectorExtensionPrefix[0] = byte; 443 consume(insn, insn->vectorExtensionPrefix[1]); 444 445 if (insn->mode == MODE_64BIT) 446 insn->rexPrefix = 447 0x40 | (rFromVEX2of2(insn->vectorExtensionPrefix[1]) << 2); 448 449 switch (ppFromVEX2of2(insn->vectorExtensionPrefix[1])) { 450 default: 451 break; 452 case VEX_PREFIX_66: 453 insn->hasOpSize = true; 454 break; 455 } 456 457 LLVM_DEBUG(dbgs() << format("Found VEX prefix 0x%hhx 0x%hhx", 458 insn->vectorExtensionPrefix[0], 459 insn->vectorExtensionPrefix[1])); 460 } 461 } else if (byte == 0x8f) { 462 uint8_t byte1; 463 if (peek(insn, byte1)) { 464 LLVM_DEBUG(dbgs() << "Couldn't read second byte of XOP"); 465 return -1; 466 } 467 468 if ((byte1 & 0x38) != 0x0) // 0 in these 3 bits is a POP instruction. 469 insn->vectorExtensionType = TYPE_XOP; 470 else 471 --insn->readerCursor; 472 473 if (insn->vectorExtensionType == TYPE_XOP) { 474 insn->vectorExtensionPrefix[0] = byte; 475 consume(insn, insn->vectorExtensionPrefix[1]); 476 consume(insn, insn->vectorExtensionPrefix[2]); 477 478 // We simulate the REX prefix for simplicity's sake 479 480 if (insn->mode == MODE_64BIT) 481 insn->rexPrefix = 0x40 | 482 (wFromXOP3of3(insn->vectorExtensionPrefix[2]) << 3) | 483 (rFromXOP2of3(insn->vectorExtensionPrefix[1]) << 2) | 484 (xFromXOP2of3(insn->vectorExtensionPrefix[1]) << 1) | 485 (bFromXOP2of3(insn->vectorExtensionPrefix[1]) << 0); 486 487 switch (ppFromXOP3of3(insn->vectorExtensionPrefix[2])) { 488 default: 489 break; 490 case VEX_PREFIX_66: 491 insn->hasOpSize = true; 492 break; 493 } 494 495 LLVM_DEBUG(dbgs() << format("Found XOP prefix 0x%hhx 0x%hhx 0x%hhx", 496 insn->vectorExtensionPrefix[0], 497 insn->vectorExtensionPrefix[1], 498 insn->vectorExtensionPrefix[2])); 499 } 500 } else if (isREX2(insn, byte)) { 501 uint8_t byte1; 502 if (peek(insn, byte1)) { 503 LLVM_DEBUG(dbgs() << "Couldn't read second byte of REX2"); 504 return -1; 505 } 506 insn->rex2ExtensionPrefix[0] = byte; 507 consume(insn, insn->rex2ExtensionPrefix[1]); 508 509 // We simulate the REX prefix for simplicity's sake 510 insn->rexPrefix = 0x40 | (wFromREX2(insn->rex2ExtensionPrefix[1]) << 3) | 511 (rFromREX2(insn->rex2ExtensionPrefix[1]) << 2) | 512 (xFromREX2(insn->rex2ExtensionPrefix[1]) << 1) | 513 (bFromREX2(insn->rex2ExtensionPrefix[1]) << 0); 514 LLVM_DEBUG(dbgs() << format("Found REX2 prefix 0x%hhx 0x%hhx", 515 insn->rex2ExtensionPrefix[0], 516 insn->rex2ExtensionPrefix[1])); 517 } else 518 --insn->readerCursor; 519 520 if (insn->mode == MODE_16BIT) { 521 insn->registerSize = (insn->hasOpSize ? 4 : 2); 522 insn->addressSize = (insn->hasAdSize ? 4 : 2); 523 insn->displacementSize = (insn->hasAdSize ? 4 : 2); 524 insn->immediateSize = (insn->hasOpSize ? 4 : 2); 525 } else if (insn->mode == MODE_32BIT) { 526 insn->registerSize = (insn->hasOpSize ? 2 : 4); 527 insn->addressSize = (insn->hasAdSize ? 2 : 4); 528 insn->displacementSize = (insn->hasAdSize ? 2 : 4); 529 insn->immediateSize = (insn->hasOpSize ? 2 : 4); 530 } else if (insn->mode == MODE_64BIT) { 531 insn->displacementSize = 4; 532 if (insn->rexPrefix && wFromREX(insn->rexPrefix)) { 533 insn->registerSize = 8; 534 insn->addressSize = (insn->hasAdSize ? 4 : 8); 535 insn->immediateSize = 4; 536 insn->hasOpSize = false; 537 } else { 538 insn->registerSize = (insn->hasOpSize ? 2 : 4); 539 insn->addressSize = (insn->hasAdSize ? 4 : 8); 540 insn->immediateSize = (insn->hasOpSize ? 2 : 4); 541 } 542 } 543 544 return 0; 545 } 546 547 // Consumes the SIB byte to determine addressing information. 548 static int readSIB(struct InternalInstruction *insn) { 549 SIBBase sibBaseBase = SIB_BASE_NONE; 550 uint8_t index, base; 551 552 LLVM_DEBUG(dbgs() << "readSIB()"); 553 switch (insn->addressSize) { 554 case 2: 555 default: 556 llvm_unreachable("SIB-based addressing doesn't work in 16-bit mode"); 557 case 4: 558 insn->sibIndexBase = SIB_INDEX_EAX; 559 sibBaseBase = SIB_BASE_EAX; 560 break; 561 case 8: 562 insn->sibIndexBase = SIB_INDEX_RAX; 563 sibBaseBase = SIB_BASE_RAX; 564 break; 565 } 566 567 if (consume(insn, insn->sib)) 568 return -1; 569 570 index = indexFromSIB(insn->sib) | (xFromREX(insn->rexPrefix) << 3) | 571 (x2FromREX2(insn->rex2ExtensionPrefix[1]) << 4); 572 573 if (index == 0x4) { 574 insn->sibIndex = SIB_INDEX_NONE; 575 } else { 576 insn->sibIndex = (SIBIndex)(insn->sibIndexBase + index); 577 } 578 579 insn->sibScale = 1 << scaleFromSIB(insn->sib); 580 581 base = baseFromSIB(insn->sib) | (bFromREX(insn->rexPrefix) << 3) | 582 (b2FromREX2(insn->rex2ExtensionPrefix[1]) << 4); 583 584 switch (base) { 585 case 0x5: 586 case 0xd: 587 switch (modFromModRM(insn->modRM)) { 588 case 0x0: 589 insn->eaDisplacement = EA_DISP_32; 590 insn->sibBase = SIB_BASE_NONE; 591 break; 592 case 0x1: 593 insn->eaDisplacement = EA_DISP_8; 594 insn->sibBase = (SIBBase)(sibBaseBase + base); 595 break; 596 case 0x2: 597 insn->eaDisplacement = EA_DISP_32; 598 insn->sibBase = (SIBBase)(sibBaseBase + base); 599 break; 600 default: 601 llvm_unreachable("Cannot have Mod = 0b11 and a SIB byte"); 602 } 603 break; 604 default: 605 insn->sibBase = (SIBBase)(sibBaseBase + base); 606 break; 607 } 608 609 return 0; 610 } 611 612 static int readDisplacement(struct InternalInstruction *insn) { 613 int8_t d8; 614 int16_t d16; 615 int32_t d32; 616 LLVM_DEBUG(dbgs() << "readDisplacement()"); 617 618 insn->displacementOffset = insn->readerCursor - insn->startLocation; 619 switch (insn->eaDisplacement) { 620 case EA_DISP_NONE: 621 break; 622 case EA_DISP_8: 623 if (consume(insn, d8)) 624 return -1; 625 insn->displacement = d8; 626 break; 627 case EA_DISP_16: 628 if (consume(insn, d16)) 629 return -1; 630 insn->displacement = d16; 631 break; 632 case EA_DISP_32: 633 if (consume(insn, d32)) 634 return -1; 635 insn->displacement = d32; 636 break; 637 } 638 639 return 0; 640 } 641 642 // Consumes all addressing information (ModR/M byte, SIB byte, and displacement. 643 static int readModRM(struct InternalInstruction *insn) { 644 uint8_t mod, rm, reg; 645 LLVM_DEBUG(dbgs() << "readModRM()"); 646 647 if (insn->consumedModRM) 648 return 0; 649 650 if (consume(insn, insn->modRM)) 651 return -1; 652 insn->consumedModRM = true; 653 654 mod = modFromModRM(insn->modRM); 655 rm = rmFromModRM(insn->modRM); 656 reg = regFromModRM(insn->modRM); 657 658 // This goes by insn->registerSize to pick the correct register, which messes 659 // up if we're using (say) XMM or 8-bit register operands. That gets fixed in 660 // fixupReg(). 661 switch (insn->registerSize) { 662 case 2: 663 insn->regBase = MODRM_REG_AX; 664 insn->eaRegBase = EA_REG_AX; 665 break; 666 case 4: 667 insn->regBase = MODRM_REG_EAX; 668 insn->eaRegBase = EA_REG_EAX; 669 break; 670 case 8: 671 insn->regBase = MODRM_REG_RAX; 672 insn->eaRegBase = EA_REG_RAX; 673 break; 674 } 675 676 reg |= (rFromREX(insn->rexPrefix) << 3) | 677 (r2FromREX2(insn->rex2ExtensionPrefix[1]) << 4); 678 rm |= (bFromREX(insn->rexPrefix) << 3) | 679 (b2FromREX2(insn->rex2ExtensionPrefix[1]) << 4); 680 681 if (insn->vectorExtensionType == TYPE_EVEX && insn->mode == MODE_64BIT) 682 reg |= r2FromEVEX2of4(insn->vectorExtensionPrefix[1]) << 4; 683 684 insn->reg = (Reg)(insn->regBase + reg); 685 686 switch (insn->addressSize) { 687 case 2: { 688 EABase eaBaseBase = EA_BASE_BX_SI; 689 690 switch (mod) { 691 case 0x0: 692 if (rm == 0x6) { 693 insn->eaBase = EA_BASE_NONE; 694 insn->eaDisplacement = EA_DISP_16; 695 if (readDisplacement(insn)) 696 return -1; 697 } else { 698 insn->eaBase = (EABase)(eaBaseBase + rm); 699 insn->eaDisplacement = EA_DISP_NONE; 700 } 701 break; 702 case 0x1: 703 insn->eaBase = (EABase)(eaBaseBase + rm); 704 insn->eaDisplacement = EA_DISP_8; 705 insn->displacementSize = 1; 706 if (readDisplacement(insn)) 707 return -1; 708 break; 709 case 0x2: 710 insn->eaBase = (EABase)(eaBaseBase + rm); 711 insn->eaDisplacement = EA_DISP_16; 712 if (readDisplacement(insn)) 713 return -1; 714 break; 715 case 0x3: 716 insn->eaBase = (EABase)(insn->eaRegBase + rm); 717 if (readDisplacement(insn)) 718 return -1; 719 break; 720 } 721 break; 722 } 723 case 4: 724 case 8: { 725 EABase eaBaseBase = (insn->addressSize == 4 ? EA_BASE_EAX : EA_BASE_RAX); 726 727 switch (mod) { 728 case 0x0: 729 insn->eaDisplacement = EA_DISP_NONE; // readSIB may override this 730 // In determining whether RIP-relative mode is used (rm=5), 731 // or whether a SIB byte is present (rm=4), 732 // the extension bits (REX.b and EVEX.x) are ignored. 733 switch (rm & 7) { 734 case 0x4: // SIB byte is present 735 insn->eaBase = (insn->addressSize == 4 ? EA_BASE_sib : EA_BASE_sib64); 736 if (readSIB(insn) || readDisplacement(insn)) 737 return -1; 738 break; 739 case 0x5: // RIP-relative 740 insn->eaBase = EA_BASE_NONE; 741 insn->eaDisplacement = EA_DISP_32; 742 if (readDisplacement(insn)) 743 return -1; 744 break; 745 default: 746 insn->eaBase = (EABase)(eaBaseBase + rm); 747 break; 748 } 749 break; 750 case 0x1: 751 insn->displacementSize = 1; 752 [[fallthrough]]; 753 case 0x2: 754 insn->eaDisplacement = (mod == 0x1 ? EA_DISP_8 : EA_DISP_32); 755 switch (rm & 7) { 756 case 0x4: // SIB byte is present 757 insn->eaBase = EA_BASE_sib; 758 if (readSIB(insn) || readDisplacement(insn)) 759 return -1; 760 break; 761 default: 762 insn->eaBase = (EABase)(eaBaseBase + rm); 763 if (readDisplacement(insn)) 764 return -1; 765 break; 766 } 767 break; 768 case 0x3: 769 insn->eaDisplacement = EA_DISP_NONE; 770 insn->eaBase = (EABase)(insn->eaRegBase + rm); 771 break; 772 } 773 break; 774 } 775 } // switch (insn->addressSize) 776 777 return 0; 778 } 779 780 #define GENERIC_FIXUP_FUNC(name, base, prefix) \ 781 static uint16_t name(struct InternalInstruction *insn, OperandType type, \ 782 uint8_t index, uint8_t *valid) { \ 783 *valid = 1; \ 784 switch (type) { \ 785 default: \ 786 debug("Unhandled register type"); \ 787 *valid = 0; \ 788 return 0; \ 789 case TYPE_Rv: \ 790 return base + index; \ 791 case TYPE_R8: \ 792 if (insn->rexPrefix && index >= 4 && index <= 7) \ 793 return prefix##_SPL + (index - 4); \ 794 else \ 795 return prefix##_AL + index; \ 796 case TYPE_R16: \ 797 return prefix##_AX + index; \ 798 case TYPE_R32: \ 799 return prefix##_EAX + index; \ 800 case TYPE_R64: \ 801 return prefix##_RAX + index; \ 802 case TYPE_ZMM: \ 803 return prefix##_ZMM0 + index; \ 804 case TYPE_YMM: \ 805 return prefix##_YMM0 + index; \ 806 case TYPE_XMM: \ 807 return prefix##_XMM0 + index; \ 808 case TYPE_TMM: \ 809 if (index > 7) \ 810 *valid = 0; \ 811 return prefix##_TMM0 + index; \ 812 case TYPE_TMM_PAIR: \ 813 if (index > 7) \ 814 *valid = 0; \ 815 return prefix##_TMM0_TMM1 + (index / 2); \ 816 case TYPE_VK: \ 817 index &= 0xf; \ 818 if (index > 7) \ 819 *valid = 0; \ 820 return prefix##_K0 + index; \ 821 case TYPE_VK_PAIR: \ 822 if (index > 7) \ 823 *valid = 0; \ 824 return prefix##_K0_K1 + (index / 2); \ 825 case TYPE_MM64: \ 826 return prefix##_MM0 + (index & 0x7); \ 827 case TYPE_SEGMENTREG: \ 828 if ((index & 7) > 5) \ 829 *valid = 0; \ 830 return prefix##_ES + (index & 7); \ 831 case TYPE_DEBUGREG: \ 832 if (index > 15) \ 833 *valid = 0; \ 834 return prefix##_DR0 + index; \ 835 case TYPE_CONTROLREG: \ 836 if (index > 15) \ 837 *valid = 0; \ 838 return prefix##_CR0 + index; \ 839 case TYPE_MVSIBX: \ 840 return prefix##_XMM0 + index; \ 841 case TYPE_MVSIBY: \ 842 return prefix##_YMM0 + index; \ 843 case TYPE_MVSIBZ: \ 844 return prefix##_ZMM0 + index; \ 845 } \ 846 } 847 848 // Consult an operand type to determine the meaning of the reg or R/M field. If 849 // the operand is an XMM operand, for example, an operand would be XMM0 instead 850 // of AX, which readModRM() would otherwise misinterpret it as. 851 // 852 // @param insn - The instruction containing the operand. 853 // @param type - The operand type. 854 // @param index - The existing value of the field as reported by readModRM(). 855 // @param valid - The address of a uint8_t. The target is set to 1 if the 856 // field is valid for the register class; 0 if not. 857 // @return - The proper value. 858 GENERIC_FIXUP_FUNC(fixupRegValue, insn->regBase, MODRM_REG) 859 GENERIC_FIXUP_FUNC(fixupRMValue, insn->eaRegBase, EA_REG) 860 861 // Consult an operand specifier to determine which of the fixup*Value functions 862 // to use in correcting readModRM()'ss interpretation. 863 // 864 // @param insn - See fixup*Value(). 865 // @param op - The operand specifier. 866 // @return - 0 if fixup was successful; -1 if the register returned was 867 // invalid for its class. 868 static int fixupReg(struct InternalInstruction *insn, 869 const struct OperandSpecifier *op) { 870 uint8_t valid; 871 LLVM_DEBUG(dbgs() << "fixupReg()"); 872 873 switch ((OperandEncoding)op->encoding) { 874 default: 875 debug("Expected a REG or R/M encoding in fixupReg"); 876 return -1; 877 case ENCODING_VVVV: 878 insn->vvvv = 879 (Reg)fixupRegValue(insn, (OperandType)op->type, insn->vvvv, &valid); 880 if (!valid) 881 return -1; 882 break; 883 case ENCODING_REG: 884 insn->reg = (Reg)fixupRegValue(insn, (OperandType)op->type, 885 insn->reg - insn->regBase, &valid); 886 if (!valid) 887 return -1; 888 break; 889 CASE_ENCODING_RM: 890 if (insn->vectorExtensionType == TYPE_EVEX && insn->mode == MODE_64BIT && 891 modFromModRM(insn->modRM) == 3) { 892 // EVEX_X can extend the register id to 32 for a non-GPR register that is 893 // encoded in RM. 894 // mode : MODE_64_BIT 895 // Only 8 vector registers are available in 32 bit mode 896 // mod : 3 897 // RM encodes a register 898 switch (op->type) { 899 case TYPE_Rv: 900 case TYPE_R8: 901 case TYPE_R16: 902 case TYPE_R32: 903 case TYPE_R64: 904 break; 905 default: 906 insn->eaBase = 907 (EABase)(insn->eaBase + 908 (xFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 4)); 909 break; 910 } 911 } 912 [[fallthrough]]; 913 case ENCODING_SIB: 914 if (insn->eaBase >= insn->eaRegBase) { 915 insn->eaBase = (EABase)fixupRMValue( 916 insn, (OperandType)op->type, insn->eaBase - insn->eaRegBase, &valid); 917 if (!valid) 918 return -1; 919 } 920 break; 921 } 922 923 return 0; 924 } 925 926 // Read the opcode (except the ModR/M byte in the case of extended or escape 927 // opcodes). 928 static bool readOpcode(struct InternalInstruction *insn) { 929 uint8_t current; 930 LLVM_DEBUG(dbgs() << "readOpcode()"); 931 932 insn->opcodeType = ONEBYTE; 933 if (insn->vectorExtensionType == TYPE_EVEX) { 934 switch (mmmFromEVEX2of4(insn->vectorExtensionPrefix[1])) { 935 default: 936 LLVM_DEBUG( 937 dbgs() << format("Unhandled mmm field for instruction (0x%hhx)", 938 mmmFromEVEX2of4(insn->vectorExtensionPrefix[1]))); 939 return true; 940 case VEX_LOB_0F: 941 insn->opcodeType = TWOBYTE; 942 return consume(insn, insn->opcode); 943 case VEX_LOB_0F38: 944 insn->opcodeType = THREEBYTE_38; 945 return consume(insn, insn->opcode); 946 case VEX_LOB_0F3A: 947 insn->opcodeType = THREEBYTE_3A; 948 return consume(insn, insn->opcode); 949 case VEX_LOB_MAP4: 950 insn->opcodeType = MAP4; 951 return consume(insn, insn->opcode); 952 case VEX_LOB_MAP5: 953 insn->opcodeType = MAP5; 954 return consume(insn, insn->opcode); 955 case VEX_LOB_MAP6: 956 insn->opcodeType = MAP6; 957 return consume(insn, insn->opcode); 958 case VEX_LOB_MAP7: 959 insn->opcodeType = MAP7; 960 return consume(insn, insn->opcode); 961 } 962 } else if (insn->vectorExtensionType == TYPE_VEX_3B) { 963 switch (mmmmmFromVEX2of3(insn->vectorExtensionPrefix[1])) { 964 default: 965 LLVM_DEBUG( 966 dbgs() << format("Unhandled m-mmmm field for instruction (0x%hhx)", 967 mmmmmFromVEX2of3(insn->vectorExtensionPrefix[1]))); 968 return true; 969 case VEX_LOB_0F: 970 insn->opcodeType = TWOBYTE; 971 return consume(insn, insn->opcode); 972 case VEX_LOB_0F38: 973 insn->opcodeType = THREEBYTE_38; 974 return consume(insn, insn->opcode); 975 case VEX_LOB_0F3A: 976 insn->opcodeType = THREEBYTE_3A; 977 return consume(insn, insn->opcode); 978 case VEX_LOB_MAP5: 979 insn->opcodeType = MAP5; 980 return consume(insn, insn->opcode); 981 case VEX_LOB_MAP6: 982 insn->opcodeType = MAP6; 983 return consume(insn, insn->opcode); 984 case VEX_LOB_MAP7: 985 insn->opcodeType = MAP7; 986 return consume(insn, insn->opcode); 987 } 988 } else if (insn->vectorExtensionType == TYPE_VEX_2B) { 989 insn->opcodeType = TWOBYTE; 990 return consume(insn, insn->opcode); 991 } else if (insn->vectorExtensionType == TYPE_XOP) { 992 switch (mmmmmFromXOP2of3(insn->vectorExtensionPrefix[1])) { 993 default: 994 LLVM_DEBUG( 995 dbgs() << format("Unhandled m-mmmm field for instruction (0x%hhx)", 996 mmmmmFromVEX2of3(insn->vectorExtensionPrefix[1]))); 997 return true; 998 case XOP_MAP_SELECT_8: 999 insn->opcodeType = XOP8_MAP; 1000 return consume(insn, insn->opcode); 1001 case XOP_MAP_SELECT_9: 1002 insn->opcodeType = XOP9_MAP; 1003 return consume(insn, insn->opcode); 1004 case XOP_MAP_SELECT_A: 1005 insn->opcodeType = XOPA_MAP; 1006 return consume(insn, insn->opcode); 1007 } 1008 } else if (mFromREX2(insn->rex2ExtensionPrefix[1])) { 1009 // m bit indicates opcode map 1 1010 insn->opcodeType = TWOBYTE; 1011 return consume(insn, insn->opcode); 1012 } 1013 1014 if (consume(insn, current)) 1015 return true; 1016 1017 if (current == 0x0f) { 1018 LLVM_DEBUG( 1019 dbgs() << format("Found a two-byte escape prefix (0x%hhx)", current)); 1020 if (consume(insn, current)) 1021 return true; 1022 1023 if (current == 0x38) { 1024 LLVM_DEBUG(dbgs() << format("Found a three-byte escape prefix (0x%hhx)", 1025 current)); 1026 if (consume(insn, current)) 1027 return true; 1028 1029 insn->opcodeType = THREEBYTE_38; 1030 } else if (current == 0x3a) { 1031 LLVM_DEBUG(dbgs() << format("Found a three-byte escape prefix (0x%hhx)", 1032 current)); 1033 if (consume(insn, current)) 1034 return true; 1035 1036 insn->opcodeType = THREEBYTE_3A; 1037 } else if (current == 0x0f) { 1038 LLVM_DEBUG( 1039 dbgs() << format("Found a 3dnow escape prefix (0x%hhx)", current)); 1040 1041 // Consume operands before the opcode to comply with the 3DNow encoding 1042 if (readModRM(insn)) 1043 return true; 1044 1045 if (consume(insn, current)) 1046 return true; 1047 1048 insn->opcodeType = THREEDNOW_MAP; 1049 } else { 1050 LLVM_DEBUG(dbgs() << "Didn't find a three-byte escape prefix"); 1051 insn->opcodeType = TWOBYTE; 1052 } 1053 } else if (insn->mandatoryPrefix) 1054 // The opcode with mandatory prefix must start with opcode escape. 1055 // If not it's legacy repeat prefix 1056 insn->mandatoryPrefix = 0; 1057 1058 // At this point we have consumed the full opcode. 1059 // Anything we consume from here on must be unconsumed. 1060 insn->opcode = current; 1061 1062 return false; 1063 } 1064 1065 // Determine whether equiv is the 16-bit equivalent of orig (32-bit or 64-bit). 1066 static bool is16BitEquivalent(const char *orig, const char *equiv) { 1067 for (int i = 0;; i++) { 1068 if (orig[i] == '\0' && equiv[i] == '\0') 1069 return true; 1070 if (orig[i] == '\0' || equiv[i] == '\0') 1071 return false; 1072 if (orig[i] != equiv[i]) { 1073 if ((orig[i] == 'Q' || orig[i] == 'L') && equiv[i] == 'W') 1074 continue; 1075 if ((orig[i] == '6' || orig[i] == '3') && equiv[i] == '1') 1076 continue; 1077 if ((orig[i] == '4' || orig[i] == '2') && equiv[i] == '6') 1078 continue; 1079 return false; 1080 } 1081 } 1082 } 1083 1084 // Determine whether this instruction is a 64-bit instruction. 1085 static bool is64Bit(const char *name) { 1086 for (int i = 0;; ++i) { 1087 if (name[i] == '\0') 1088 return false; 1089 if (name[i] == '6' && name[i + 1] == '4') 1090 return true; 1091 } 1092 } 1093 1094 // Determine the ID of an instruction, consuming the ModR/M byte as appropriate 1095 // for extended and escape opcodes, and using a supplied attribute mask. 1096 static int getInstructionIDWithAttrMask(uint16_t *instructionID, 1097 struct InternalInstruction *insn, 1098 uint16_t attrMask) { 1099 auto insnCtx = InstructionContext(x86DisassemblerContexts[attrMask]); 1100 const ContextDecision *decision; 1101 switch (insn->opcodeType) { 1102 case ONEBYTE: 1103 decision = &ONEBYTE_SYM; 1104 break; 1105 case TWOBYTE: 1106 decision = &TWOBYTE_SYM; 1107 break; 1108 case THREEBYTE_38: 1109 decision = &THREEBYTE38_SYM; 1110 break; 1111 case THREEBYTE_3A: 1112 decision = &THREEBYTE3A_SYM; 1113 break; 1114 case XOP8_MAP: 1115 decision = &XOP8_MAP_SYM; 1116 break; 1117 case XOP9_MAP: 1118 decision = &XOP9_MAP_SYM; 1119 break; 1120 case XOPA_MAP: 1121 decision = &XOPA_MAP_SYM; 1122 break; 1123 case THREEDNOW_MAP: 1124 decision = &THREEDNOW_MAP_SYM; 1125 break; 1126 case MAP4: 1127 decision = &MAP4_SYM; 1128 break; 1129 case MAP5: 1130 decision = &MAP5_SYM; 1131 break; 1132 case MAP6: 1133 decision = &MAP6_SYM; 1134 break; 1135 case MAP7: 1136 decision = &MAP7_SYM; 1137 break; 1138 } 1139 1140 if (decision->opcodeDecisions[insnCtx] 1141 .modRMDecisions[insn->opcode] 1142 .modrm_type != MODRM_ONEENTRY) { 1143 if (readModRM(insn)) 1144 return -1; 1145 *instructionID = 1146 decode(insn->opcodeType, insnCtx, insn->opcode, insn->modRM); 1147 } else { 1148 *instructionID = decode(insn->opcodeType, insnCtx, insn->opcode, 0); 1149 } 1150 1151 return 0; 1152 } 1153 1154 static bool isCCMPOrCTEST(InternalInstruction *insn) { 1155 if (insn->opcodeType != MAP4) 1156 return false; 1157 if (insn->opcode == 0x83 && regFromModRM(insn->modRM) == 7) 1158 return true; 1159 switch (insn->opcode & 0xfe) { 1160 default: 1161 return false; 1162 case 0x38: 1163 case 0x3a: 1164 case 0x84: 1165 return true; 1166 case 0x80: 1167 return regFromModRM(insn->modRM) == 7; 1168 case 0xf6: 1169 return regFromModRM(insn->modRM) == 0; 1170 } 1171 } 1172 1173 static bool isNF(InternalInstruction *insn) { 1174 if (!nfFromEVEX4of4(insn->vectorExtensionPrefix[3])) 1175 return false; 1176 if (insn->opcodeType == MAP4) 1177 return true; 1178 // Below NF instructions are not in map4. 1179 if (insn->opcodeType == THREEBYTE_38 && 1180 ppFromEVEX3of4(insn->vectorExtensionPrefix[2]) == VEX_PREFIX_NONE) { 1181 switch (insn->opcode) { 1182 case 0xf2: // ANDN 1183 case 0xf3: // BLSI, BLSR, BLSMSK 1184 case 0xf5: // BZHI 1185 case 0xf7: // BEXTR 1186 return true; 1187 default: 1188 break; 1189 } 1190 } 1191 return false; 1192 } 1193 1194 // Determine the ID of an instruction, consuming the ModR/M byte as appropriate 1195 // for extended and escape opcodes. Determines the attributes and context for 1196 // the instruction before doing so. 1197 static int getInstructionID(struct InternalInstruction *insn, 1198 const MCInstrInfo *mii) { 1199 uint16_t attrMask; 1200 uint16_t instructionID; 1201 1202 LLVM_DEBUG(dbgs() << "getID()"); 1203 1204 attrMask = ATTR_NONE; 1205 1206 if (insn->mode == MODE_64BIT) 1207 attrMask |= ATTR_64BIT; 1208 1209 if (insn->vectorExtensionType != TYPE_NO_VEX_XOP) { 1210 attrMask |= (insn->vectorExtensionType == TYPE_EVEX) ? ATTR_EVEX : ATTR_VEX; 1211 1212 if (insn->vectorExtensionType == TYPE_EVEX) { 1213 switch (ppFromEVEX3of4(insn->vectorExtensionPrefix[2])) { 1214 case VEX_PREFIX_66: 1215 attrMask |= ATTR_OPSIZE; 1216 break; 1217 case VEX_PREFIX_F3: 1218 attrMask |= ATTR_XS; 1219 break; 1220 case VEX_PREFIX_F2: 1221 attrMask |= ATTR_XD; 1222 break; 1223 } 1224 1225 if (zFromEVEX4of4(insn->vectorExtensionPrefix[3])) 1226 attrMask |= ATTR_EVEXKZ; 1227 if (isNF(insn) && !readModRM(insn) && 1228 !isCCMPOrCTEST(insn)) // NF bit is the MSB of aaa. 1229 attrMask |= ATTR_EVEXNF; 1230 // aaa is not used a opmask in MAP4 1231 else if (aaaFromEVEX4of4(insn->vectorExtensionPrefix[3]) && 1232 (insn->opcodeType != MAP4)) 1233 attrMask |= ATTR_EVEXK; 1234 if (bFromEVEX4of4(insn->vectorExtensionPrefix[3])) { 1235 attrMask |= ATTR_EVEXB; 1236 if (uFromEVEX3of4(insn->vectorExtensionPrefix[2]) && !readModRM(insn) && 1237 modFromModRM(insn->modRM) == 3) 1238 attrMask |= ATTR_EVEXU; 1239 } 1240 if (lFromEVEX4of4(insn->vectorExtensionPrefix[3])) 1241 attrMask |= ATTR_VEXL; 1242 if (l2FromEVEX4of4(insn->vectorExtensionPrefix[3])) 1243 attrMask |= ATTR_EVEXL2; 1244 } else if (insn->vectorExtensionType == TYPE_VEX_3B) { 1245 switch (ppFromVEX3of3(insn->vectorExtensionPrefix[2])) { 1246 case VEX_PREFIX_66: 1247 attrMask |= ATTR_OPSIZE; 1248 break; 1249 case VEX_PREFIX_F3: 1250 attrMask |= ATTR_XS; 1251 break; 1252 case VEX_PREFIX_F2: 1253 attrMask |= ATTR_XD; 1254 break; 1255 } 1256 1257 if (lFromVEX3of3(insn->vectorExtensionPrefix[2])) 1258 attrMask |= ATTR_VEXL; 1259 } else if (insn->vectorExtensionType == TYPE_VEX_2B) { 1260 switch (ppFromVEX2of2(insn->vectorExtensionPrefix[1])) { 1261 case VEX_PREFIX_66: 1262 attrMask |= ATTR_OPSIZE; 1263 if (insn->hasAdSize) 1264 attrMask |= ATTR_ADSIZE; 1265 break; 1266 case VEX_PREFIX_F3: 1267 attrMask |= ATTR_XS; 1268 break; 1269 case VEX_PREFIX_F2: 1270 attrMask |= ATTR_XD; 1271 break; 1272 } 1273 1274 if (lFromVEX2of2(insn->vectorExtensionPrefix[1])) 1275 attrMask |= ATTR_VEXL; 1276 } else if (insn->vectorExtensionType == TYPE_XOP) { 1277 switch (ppFromXOP3of3(insn->vectorExtensionPrefix[2])) { 1278 case VEX_PREFIX_66: 1279 attrMask |= ATTR_OPSIZE; 1280 break; 1281 case VEX_PREFIX_F3: 1282 attrMask |= ATTR_XS; 1283 break; 1284 case VEX_PREFIX_F2: 1285 attrMask |= ATTR_XD; 1286 break; 1287 } 1288 1289 if (lFromXOP3of3(insn->vectorExtensionPrefix[2])) 1290 attrMask |= ATTR_VEXL; 1291 } else { 1292 return -1; 1293 } 1294 } else if (!insn->mandatoryPrefix) { 1295 // If we don't have mandatory prefix we should use legacy prefixes here 1296 if (insn->hasOpSize && (insn->mode != MODE_16BIT)) 1297 attrMask |= ATTR_OPSIZE; 1298 if (insn->hasAdSize) 1299 attrMask |= ATTR_ADSIZE; 1300 if (insn->opcodeType == ONEBYTE) { 1301 if (insn->repeatPrefix == 0xf3 && (insn->opcode == 0x90)) 1302 // Special support for PAUSE 1303 attrMask |= ATTR_XS; 1304 } else { 1305 if (insn->repeatPrefix == 0xf2) 1306 attrMask |= ATTR_XD; 1307 else if (insn->repeatPrefix == 0xf3) 1308 attrMask |= ATTR_XS; 1309 } 1310 } else { 1311 switch (insn->mandatoryPrefix) { 1312 case 0xf2: 1313 attrMask |= ATTR_XD; 1314 break; 1315 case 0xf3: 1316 attrMask |= ATTR_XS; 1317 break; 1318 case 0x66: 1319 if (insn->mode != MODE_16BIT) 1320 attrMask |= ATTR_OPSIZE; 1321 if (insn->hasAdSize) 1322 attrMask |= ATTR_ADSIZE; 1323 break; 1324 case 0x67: 1325 attrMask |= ATTR_ADSIZE; 1326 break; 1327 } 1328 } 1329 1330 if (insn->rexPrefix & 0x08) { 1331 attrMask |= ATTR_REXW; 1332 attrMask &= ~ATTR_ADSIZE; 1333 } 1334 1335 // Absolute jump and pushp/popp need special handling 1336 if (insn->rex2ExtensionPrefix[0] == 0xd5 && insn->opcodeType == ONEBYTE && 1337 (insn->opcode == 0xA1 || (insn->opcode & 0xf0) == 0x50)) 1338 attrMask |= ATTR_REX2; 1339 1340 if (insn->mode == MODE_16BIT) { 1341 // JCXZ/JECXZ need special handling for 16-bit mode because the meaning 1342 // of the AdSize prefix is inverted w.r.t. 32-bit mode. 1343 if (insn->opcodeType == ONEBYTE && insn->opcode == 0xE3) 1344 attrMask ^= ATTR_ADSIZE; 1345 // If we're in 16-bit mode and this is one of the relative jumps and opsize 1346 // prefix isn't present, we need to force the opsize attribute since the 1347 // prefix is inverted relative to 32-bit mode. 1348 if (!insn->hasOpSize && insn->opcodeType == ONEBYTE && 1349 (insn->opcode == 0xE8 || insn->opcode == 0xE9)) 1350 attrMask |= ATTR_OPSIZE; 1351 1352 if (!insn->hasOpSize && insn->opcodeType == TWOBYTE && 1353 insn->opcode >= 0x80 && insn->opcode <= 0x8F) 1354 attrMask |= ATTR_OPSIZE; 1355 } 1356 1357 1358 if (getInstructionIDWithAttrMask(&instructionID, insn, attrMask)) 1359 return -1; 1360 1361 // The following clauses compensate for limitations of the tables. 1362 1363 if (insn->mode != MODE_64BIT && 1364 insn->vectorExtensionType != TYPE_NO_VEX_XOP) { 1365 // The tables can't distinquish between cases where the W-bit is used to 1366 // select register size and cases where its a required part of the opcode. 1367 if ((insn->vectorExtensionType == TYPE_EVEX && 1368 wFromEVEX3of4(insn->vectorExtensionPrefix[2])) || 1369 (insn->vectorExtensionType == TYPE_VEX_3B && 1370 wFromVEX3of3(insn->vectorExtensionPrefix[2])) || 1371 (insn->vectorExtensionType == TYPE_XOP && 1372 wFromXOP3of3(insn->vectorExtensionPrefix[2]))) { 1373 1374 uint16_t instructionIDWithREXW; 1375 if (getInstructionIDWithAttrMask(&instructionIDWithREXW, insn, 1376 attrMask | ATTR_REXW)) { 1377 insn->instructionID = instructionID; 1378 insn->spec = &INSTRUCTIONS_SYM[instructionID]; 1379 return 0; 1380 } 1381 1382 auto SpecName = mii->getName(instructionIDWithREXW); 1383 // If not a 64-bit instruction. Switch the opcode. 1384 if (!is64Bit(SpecName.data())) { 1385 insn->instructionID = instructionIDWithREXW; 1386 insn->spec = &INSTRUCTIONS_SYM[instructionIDWithREXW]; 1387 return 0; 1388 } 1389 } 1390 } 1391 1392 // Absolute moves, umonitor, and movdir64b need special handling. 1393 // -For 16-bit mode because the meaning of the AdSize and OpSize prefixes are 1394 // inverted w.r.t. 1395 // -For 32-bit mode we need to ensure the ADSIZE prefix is observed in 1396 // any position. 1397 if ((insn->opcodeType == ONEBYTE && ((insn->opcode & 0xFC) == 0xA0)) || 1398 (insn->opcodeType == TWOBYTE && (insn->opcode == 0xAE)) || 1399 (insn->opcodeType == THREEBYTE_38 && insn->opcode == 0xF8) || 1400 (insn->opcodeType == MAP4 && insn->opcode == 0xF8)) { 1401 // Make sure we observed the prefixes in any position. 1402 if (insn->hasAdSize) 1403 attrMask |= ATTR_ADSIZE; 1404 if (insn->hasOpSize) 1405 attrMask |= ATTR_OPSIZE; 1406 1407 // In 16-bit, invert the attributes. 1408 if (insn->mode == MODE_16BIT) { 1409 attrMask ^= ATTR_ADSIZE; 1410 1411 // The OpSize attribute is only valid with the absolute moves. 1412 if (insn->opcodeType == ONEBYTE && ((insn->opcode & 0xFC) == 0xA0)) 1413 attrMask ^= ATTR_OPSIZE; 1414 } 1415 1416 if (getInstructionIDWithAttrMask(&instructionID, insn, attrMask)) 1417 return -1; 1418 1419 insn->instructionID = instructionID; 1420 insn->spec = &INSTRUCTIONS_SYM[instructionID]; 1421 return 0; 1422 } 1423 1424 if ((insn->mode == MODE_16BIT || insn->hasOpSize) && 1425 !(attrMask & ATTR_OPSIZE)) { 1426 // The instruction tables make no distinction between instructions that 1427 // allow OpSize anywhere (i.e., 16-bit operations) and that need it in a 1428 // particular spot (i.e., many MMX operations). In general we're 1429 // conservative, but in the specific case where OpSize is present but not in 1430 // the right place we check if there's a 16-bit operation. 1431 const struct InstructionSpecifier *spec; 1432 uint16_t instructionIDWithOpsize; 1433 llvm::StringRef specName, specWithOpSizeName; 1434 1435 spec = &INSTRUCTIONS_SYM[instructionID]; 1436 1437 if (getInstructionIDWithAttrMask(&instructionIDWithOpsize, insn, 1438 attrMask | ATTR_OPSIZE)) { 1439 // ModRM required with OpSize but not present. Give up and return the 1440 // version without OpSize set. 1441 insn->instructionID = instructionID; 1442 insn->spec = spec; 1443 return 0; 1444 } 1445 1446 specName = mii->getName(instructionID); 1447 specWithOpSizeName = mii->getName(instructionIDWithOpsize); 1448 1449 if (is16BitEquivalent(specName.data(), specWithOpSizeName.data()) && 1450 (insn->mode == MODE_16BIT) ^ insn->hasOpSize) { 1451 insn->instructionID = instructionIDWithOpsize; 1452 insn->spec = &INSTRUCTIONS_SYM[instructionIDWithOpsize]; 1453 } else { 1454 insn->instructionID = instructionID; 1455 insn->spec = spec; 1456 } 1457 return 0; 1458 } 1459 1460 if (insn->opcodeType == ONEBYTE && insn->opcode == 0x90 && 1461 insn->rexPrefix & 0x01) { 1462 // NOOP shouldn't decode as NOOP if REX.b is set. Instead it should decode 1463 // as XCHG %r8, %eax. 1464 const struct InstructionSpecifier *spec; 1465 uint16_t instructionIDWithNewOpcode; 1466 const struct InstructionSpecifier *specWithNewOpcode; 1467 1468 spec = &INSTRUCTIONS_SYM[instructionID]; 1469 1470 // Borrow opcode from one of the other XCHGar opcodes 1471 insn->opcode = 0x91; 1472 1473 if (getInstructionIDWithAttrMask(&instructionIDWithNewOpcode, insn, 1474 attrMask)) { 1475 insn->opcode = 0x90; 1476 1477 insn->instructionID = instructionID; 1478 insn->spec = spec; 1479 return 0; 1480 } 1481 1482 specWithNewOpcode = &INSTRUCTIONS_SYM[instructionIDWithNewOpcode]; 1483 1484 // Change back 1485 insn->opcode = 0x90; 1486 1487 insn->instructionID = instructionIDWithNewOpcode; 1488 insn->spec = specWithNewOpcode; 1489 1490 return 0; 1491 } 1492 1493 insn->instructionID = instructionID; 1494 insn->spec = &INSTRUCTIONS_SYM[insn->instructionID]; 1495 1496 return 0; 1497 } 1498 1499 // Read an operand from the opcode field of an instruction and interprets it 1500 // appropriately given the operand width. Handles AddRegFrm instructions. 1501 // 1502 // @param insn - the instruction whose opcode field is to be read. 1503 // @param size - The width (in bytes) of the register being specified. 1504 // 1 means AL and friends, 2 means AX, 4 means EAX, and 8 means 1505 // RAX. 1506 // @return - 0 on success; nonzero otherwise. 1507 static int readOpcodeRegister(struct InternalInstruction *insn, uint8_t size) { 1508 LLVM_DEBUG(dbgs() << "readOpcodeRegister()"); 1509 1510 if (size == 0) 1511 size = insn->registerSize; 1512 1513 auto setOpcodeRegister = [&](unsigned base) { 1514 insn->opcodeRegister = 1515 (Reg)(base + ((bFromREX(insn->rexPrefix) << 3) | 1516 (b2FromREX2(insn->rex2ExtensionPrefix[1]) << 4) | 1517 (insn->opcode & 7))); 1518 }; 1519 1520 switch (size) { 1521 case 1: 1522 setOpcodeRegister(MODRM_REG_AL); 1523 if (insn->rexPrefix && insn->opcodeRegister >= MODRM_REG_AL + 0x4 && 1524 insn->opcodeRegister < MODRM_REG_AL + 0x8) { 1525 insn->opcodeRegister = 1526 (Reg)(MODRM_REG_SPL + (insn->opcodeRegister - MODRM_REG_AL - 4)); 1527 } 1528 1529 break; 1530 case 2: 1531 setOpcodeRegister(MODRM_REG_AX); 1532 break; 1533 case 4: 1534 setOpcodeRegister(MODRM_REG_EAX); 1535 break; 1536 case 8: 1537 setOpcodeRegister(MODRM_REG_RAX); 1538 break; 1539 } 1540 1541 return 0; 1542 } 1543 1544 // Consume an immediate operand from an instruction, given the desired operand 1545 // size. 1546 // 1547 // @param insn - The instruction whose operand is to be read. 1548 // @param size - The width (in bytes) of the operand. 1549 // @return - 0 if the immediate was successfully consumed; nonzero 1550 // otherwise. 1551 static int readImmediate(struct InternalInstruction *insn, uint8_t size) { 1552 uint8_t imm8; 1553 uint16_t imm16; 1554 uint32_t imm32; 1555 uint64_t imm64; 1556 1557 LLVM_DEBUG(dbgs() << "readImmediate()"); 1558 1559 assert(insn->numImmediatesConsumed < 2 && "Already consumed two immediates"); 1560 1561 insn->immediateSize = size; 1562 insn->immediateOffset = insn->readerCursor - insn->startLocation; 1563 1564 switch (size) { 1565 case 1: 1566 if (consume(insn, imm8)) 1567 return -1; 1568 insn->immediates[insn->numImmediatesConsumed] = imm8; 1569 break; 1570 case 2: 1571 if (consume(insn, imm16)) 1572 return -1; 1573 insn->immediates[insn->numImmediatesConsumed] = imm16; 1574 break; 1575 case 4: 1576 if (consume(insn, imm32)) 1577 return -1; 1578 insn->immediates[insn->numImmediatesConsumed] = imm32; 1579 break; 1580 case 8: 1581 if (consume(insn, imm64)) 1582 return -1; 1583 insn->immediates[insn->numImmediatesConsumed] = imm64; 1584 break; 1585 default: 1586 llvm_unreachable("invalid size"); 1587 } 1588 1589 insn->numImmediatesConsumed++; 1590 1591 return 0; 1592 } 1593 1594 // Consume vvvv from an instruction if it has a VEX prefix. 1595 static int readVVVV(struct InternalInstruction *insn) { 1596 LLVM_DEBUG(dbgs() << "readVVVV()"); 1597 1598 int vvvv; 1599 if (insn->vectorExtensionType == TYPE_EVEX) 1600 vvvv = (v2FromEVEX4of4(insn->vectorExtensionPrefix[3]) << 4 | 1601 vvvvFromEVEX3of4(insn->vectorExtensionPrefix[2])); 1602 else if (insn->vectorExtensionType == TYPE_VEX_3B) 1603 vvvv = vvvvFromVEX3of3(insn->vectorExtensionPrefix[2]); 1604 else if (insn->vectorExtensionType == TYPE_VEX_2B) 1605 vvvv = vvvvFromVEX2of2(insn->vectorExtensionPrefix[1]); 1606 else if (insn->vectorExtensionType == TYPE_XOP) 1607 vvvv = vvvvFromXOP3of3(insn->vectorExtensionPrefix[2]); 1608 else 1609 return -1; 1610 1611 if (insn->mode != MODE_64BIT) 1612 vvvv &= 0xf; // Can only clear bit 4. Bit 3 must be cleared later. 1613 1614 insn->vvvv = static_cast<Reg>(vvvv); 1615 return 0; 1616 } 1617 1618 // Read an mask register from the opcode field of an instruction. 1619 // 1620 // @param insn - The instruction whose opcode field is to be read. 1621 // @return - 0 on success; nonzero otherwise. 1622 static int readMaskRegister(struct InternalInstruction *insn) { 1623 LLVM_DEBUG(dbgs() << "readMaskRegister()"); 1624 1625 if (insn->vectorExtensionType != TYPE_EVEX) 1626 return -1; 1627 1628 insn->writemask = 1629 static_cast<Reg>(aaaFromEVEX4of4(insn->vectorExtensionPrefix[3])); 1630 return 0; 1631 } 1632 1633 // Consults the specifier for an instruction and consumes all 1634 // operands for that instruction, interpreting them as it goes. 1635 static int readOperands(struct InternalInstruction *insn) { 1636 int hasVVVV, needVVVV; 1637 int sawRegImm = 0; 1638 1639 LLVM_DEBUG(dbgs() << "readOperands()"); 1640 1641 // If non-zero vvvv specified, make sure one of the operands uses it. 1642 hasVVVV = !readVVVV(insn); 1643 needVVVV = hasVVVV && (insn->vvvv != 0); 1644 1645 for (const auto &Op : x86OperandSets[insn->spec->operands]) { 1646 switch (Op.encoding) { 1647 case ENCODING_NONE: 1648 case ENCODING_SI: 1649 case ENCODING_DI: 1650 break; 1651 CASE_ENCODING_VSIB: 1652 // VSIB can use the V2 bit so check only the other bits. 1653 if (needVVVV) 1654 needVVVV = hasVVVV & ((insn->vvvv & 0xf) != 0); 1655 if (readModRM(insn)) 1656 return -1; 1657 1658 // Reject if SIB wasn't used. 1659 if (insn->eaBase != EA_BASE_sib && insn->eaBase != EA_BASE_sib64) 1660 return -1; 1661 1662 // If sibIndex was set to SIB_INDEX_NONE, index offset is 4. 1663 if (insn->sibIndex == SIB_INDEX_NONE) 1664 insn->sibIndex = (SIBIndex)(insn->sibIndexBase + 4); 1665 1666 // If EVEX.v2 is set this is one of the 16-31 registers. 1667 if (insn->vectorExtensionType == TYPE_EVEX && insn->mode == MODE_64BIT && 1668 v2FromEVEX4of4(insn->vectorExtensionPrefix[3])) 1669 insn->sibIndex = (SIBIndex)(insn->sibIndex + 16); 1670 1671 // Adjust the index register to the correct size. 1672 switch ((OperandType)Op.type) { 1673 default: 1674 debug("Unhandled VSIB index type"); 1675 return -1; 1676 case TYPE_MVSIBX: 1677 insn->sibIndex = 1678 (SIBIndex)(SIB_INDEX_XMM0 + (insn->sibIndex - insn->sibIndexBase)); 1679 break; 1680 case TYPE_MVSIBY: 1681 insn->sibIndex = 1682 (SIBIndex)(SIB_INDEX_YMM0 + (insn->sibIndex - insn->sibIndexBase)); 1683 break; 1684 case TYPE_MVSIBZ: 1685 insn->sibIndex = 1686 (SIBIndex)(SIB_INDEX_ZMM0 + (insn->sibIndex - insn->sibIndexBase)); 1687 break; 1688 } 1689 1690 // Apply the AVX512 compressed displacement scaling factor. 1691 if (Op.encoding != ENCODING_REG && insn->eaDisplacement == EA_DISP_8) 1692 insn->displacement *= 1 << (Op.encoding - ENCODING_VSIB); 1693 break; 1694 case ENCODING_SIB: 1695 // Reject if SIB wasn't used. 1696 if (insn->eaBase != EA_BASE_sib && insn->eaBase != EA_BASE_sib64) 1697 return -1; 1698 if (readModRM(insn)) 1699 return -1; 1700 if (fixupReg(insn, &Op)) 1701 return -1; 1702 break; 1703 case ENCODING_REG: 1704 CASE_ENCODING_RM: 1705 if (readModRM(insn)) 1706 return -1; 1707 if (fixupReg(insn, &Op)) 1708 return -1; 1709 // Apply the AVX512 compressed displacement scaling factor. 1710 if (Op.encoding != ENCODING_REG && insn->eaDisplacement == EA_DISP_8) 1711 insn->displacement *= 1 << (Op.encoding - ENCODING_RM); 1712 break; 1713 case ENCODING_IB: 1714 if (sawRegImm) { 1715 // Saw a register immediate so don't read again and instead split the 1716 // previous immediate. FIXME: This is a hack. 1717 insn->immediates[insn->numImmediatesConsumed] = 1718 insn->immediates[insn->numImmediatesConsumed - 1] & 0xf; 1719 ++insn->numImmediatesConsumed; 1720 break; 1721 } 1722 if (readImmediate(insn, 1)) 1723 return -1; 1724 if (Op.type == TYPE_XMM || Op.type == TYPE_YMM) 1725 sawRegImm = 1; 1726 break; 1727 case ENCODING_IW: 1728 if (readImmediate(insn, 2)) 1729 return -1; 1730 break; 1731 case ENCODING_ID: 1732 if (readImmediate(insn, 4)) 1733 return -1; 1734 break; 1735 case ENCODING_IO: 1736 if (readImmediate(insn, 8)) 1737 return -1; 1738 break; 1739 case ENCODING_Iv: 1740 if (readImmediate(insn, insn->immediateSize)) 1741 return -1; 1742 break; 1743 case ENCODING_Ia: 1744 if (readImmediate(insn, insn->addressSize)) 1745 return -1; 1746 break; 1747 case ENCODING_IRC: 1748 insn->RC = (l2FromEVEX4of4(insn->vectorExtensionPrefix[3]) << 1) | 1749 lFromEVEX4of4(insn->vectorExtensionPrefix[3]); 1750 break; 1751 case ENCODING_RB: 1752 if (readOpcodeRegister(insn, 1)) 1753 return -1; 1754 break; 1755 case ENCODING_RW: 1756 if (readOpcodeRegister(insn, 2)) 1757 return -1; 1758 break; 1759 case ENCODING_RD: 1760 if (readOpcodeRegister(insn, 4)) 1761 return -1; 1762 break; 1763 case ENCODING_RO: 1764 if (readOpcodeRegister(insn, 8)) 1765 return -1; 1766 break; 1767 case ENCODING_Rv: 1768 if (readOpcodeRegister(insn, 0)) 1769 return -1; 1770 break; 1771 case ENCODING_CF: 1772 insn->immediates[1] = oszcFromEVEX3of4(insn->vectorExtensionPrefix[2]); 1773 needVVVV = false; // oszc shares the same bits with VVVV 1774 break; 1775 case ENCODING_CC: 1776 if (isCCMPOrCTEST(insn)) 1777 insn->immediates[2] = scFromEVEX4of4(insn->vectorExtensionPrefix[3]); 1778 else 1779 insn->immediates[1] = insn->opcode & 0xf; 1780 break; 1781 case ENCODING_FP: 1782 break; 1783 case ENCODING_VVVV: 1784 needVVVV = 0; // Mark that we have found a VVVV operand. 1785 if (!hasVVVV) 1786 return -1; 1787 if (insn->mode != MODE_64BIT) 1788 insn->vvvv = static_cast<Reg>(insn->vvvv & 0x7); 1789 if (fixupReg(insn, &Op)) 1790 return -1; 1791 break; 1792 case ENCODING_WRITEMASK: 1793 if (readMaskRegister(insn)) 1794 return -1; 1795 break; 1796 case ENCODING_DUP: 1797 break; 1798 default: 1799 LLVM_DEBUG(dbgs() << "Encountered an operand with an unknown encoding."); 1800 return -1; 1801 } 1802 } 1803 1804 // If we didn't find ENCODING_VVVV operand, but non-zero vvvv present, fail 1805 if (needVVVV) 1806 return -1; 1807 1808 return 0; 1809 } 1810 1811 namespace llvm { 1812 1813 // Fill-ins to make the compiler happy. These constants are never actually 1814 // assigned; they are just filler to make an automatically-generated switch 1815 // statement work. 1816 namespace X86 { 1817 enum { 1818 BX_SI = 500, 1819 BX_DI = 501, 1820 BP_SI = 502, 1821 BP_DI = 503, 1822 sib = 504, 1823 sib64 = 505 1824 }; 1825 } // namespace X86 1826 1827 } // namespace llvm 1828 1829 static bool translateInstruction(MCInst &target, 1830 InternalInstruction &source, 1831 const MCDisassembler *Dis); 1832 1833 namespace { 1834 1835 /// Generic disassembler for all X86 platforms. All each platform class should 1836 /// have to do is subclass the constructor, and provide a different 1837 /// disassemblerMode value. 1838 class X86GenericDisassembler : public MCDisassembler { 1839 std::unique_ptr<const MCInstrInfo> MII; 1840 public: 1841 X86GenericDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx, 1842 std::unique_ptr<const MCInstrInfo> MII); 1843 public: 1844 DecodeStatus getInstruction(MCInst &instr, uint64_t &size, 1845 ArrayRef<uint8_t> Bytes, uint64_t Address, 1846 raw_ostream &cStream) const override; 1847 1848 private: 1849 DisassemblerMode fMode; 1850 }; 1851 1852 } // namespace 1853 1854 X86GenericDisassembler::X86GenericDisassembler( 1855 const MCSubtargetInfo &STI, 1856 MCContext &Ctx, 1857 std::unique_ptr<const MCInstrInfo> MII) 1858 : MCDisassembler(STI, Ctx), MII(std::move(MII)) { 1859 const FeatureBitset &FB = STI.getFeatureBits(); 1860 if (FB[X86::Is16Bit]) { 1861 fMode = MODE_16BIT; 1862 return; 1863 } else if (FB[X86::Is32Bit]) { 1864 fMode = MODE_32BIT; 1865 return; 1866 } else if (FB[X86::Is64Bit]) { 1867 fMode = MODE_64BIT; 1868 return; 1869 } 1870 1871 llvm_unreachable("Invalid CPU mode"); 1872 } 1873 1874 MCDisassembler::DecodeStatus X86GenericDisassembler::getInstruction( 1875 MCInst &Instr, uint64_t &Size, ArrayRef<uint8_t> Bytes, uint64_t Address, 1876 raw_ostream &CStream) const { 1877 CommentStream = &CStream; 1878 1879 InternalInstruction Insn; 1880 memset(&Insn, 0, sizeof(InternalInstruction)); 1881 Insn.bytes = Bytes; 1882 Insn.startLocation = Address; 1883 Insn.readerCursor = Address; 1884 Insn.mode = fMode; 1885 1886 if (Bytes.empty() || readPrefixes(&Insn) || readOpcode(&Insn) || 1887 getInstructionID(&Insn, MII.get()) || Insn.instructionID == 0 || 1888 readOperands(&Insn)) { 1889 Size = Insn.readerCursor - Address; 1890 return Fail; 1891 } 1892 1893 Insn.operands = x86OperandSets[Insn.spec->operands]; 1894 Insn.length = Insn.readerCursor - Insn.startLocation; 1895 Size = Insn.length; 1896 if (Size > 15) 1897 LLVM_DEBUG(dbgs() << "Instruction exceeds 15-byte limit"); 1898 1899 bool Ret = translateInstruction(Instr, Insn, this); 1900 if (!Ret) { 1901 unsigned Flags = X86::IP_NO_PREFIX; 1902 if (Insn.hasAdSize) 1903 Flags |= X86::IP_HAS_AD_SIZE; 1904 if (!Insn.mandatoryPrefix) { 1905 if (Insn.hasOpSize) 1906 Flags |= X86::IP_HAS_OP_SIZE; 1907 if (Insn.repeatPrefix == 0xf2) 1908 Flags |= X86::IP_HAS_REPEAT_NE; 1909 else if (Insn.repeatPrefix == 0xf3 && 1910 // It should not be 'pause' f3 90 1911 Insn.opcode != 0x90) 1912 Flags |= X86::IP_HAS_REPEAT; 1913 if (Insn.hasLockPrefix) 1914 Flags |= X86::IP_HAS_LOCK; 1915 } 1916 Instr.setFlags(Flags); 1917 } 1918 return (!Ret) ? Success : Fail; 1919 } 1920 1921 // 1922 // Private code that translates from struct InternalInstructions to MCInsts. 1923 // 1924 1925 /// translateRegister - Translates an internal register to the appropriate LLVM 1926 /// register, and appends it as an operand to an MCInst. 1927 /// 1928 /// @param mcInst - The MCInst to append to. 1929 /// @param reg - The Reg to append. 1930 static void translateRegister(MCInst &mcInst, Reg reg) { 1931 #define ENTRY(x) X86::x, 1932 static constexpr MCPhysReg llvmRegnums[] = {ALL_REGS}; 1933 #undef ENTRY 1934 1935 MCPhysReg llvmRegnum = llvmRegnums[reg]; 1936 mcInst.addOperand(MCOperand::createReg(llvmRegnum)); 1937 } 1938 1939 static const uint8_t segmentRegnums[SEG_OVERRIDE_max] = { 1940 0, // SEG_OVERRIDE_NONE 1941 X86::CS, 1942 X86::SS, 1943 X86::DS, 1944 X86::ES, 1945 X86::FS, 1946 X86::GS 1947 }; 1948 1949 /// translateSrcIndex - Appends a source index operand to an MCInst. 1950 /// 1951 /// @param mcInst - The MCInst to append to. 1952 /// @param insn - The internal instruction. 1953 static bool translateSrcIndex(MCInst &mcInst, InternalInstruction &insn) { 1954 unsigned baseRegNo; 1955 1956 if (insn.mode == MODE_64BIT) 1957 baseRegNo = insn.hasAdSize ? X86::ESI : X86::RSI; 1958 else if (insn.mode == MODE_32BIT) 1959 baseRegNo = insn.hasAdSize ? X86::SI : X86::ESI; 1960 else { 1961 assert(insn.mode == MODE_16BIT); 1962 baseRegNo = insn.hasAdSize ? X86::ESI : X86::SI; 1963 } 1964 MCOperand baseReg = MCOperand::createReg(baseRegNo); 1965 mcInst.addOperand(baseReg); 1966 1967 MCOperand segmentReg; 1968 segmentReg = MCOperand::createReg(segmentRegnums[insn.segmentOverride]); 1969 mcInst.addOperand(segmentReg); 1970 return false; 1971 } 1972 1973 /// translateDstIndex - Appends a destination index operand to an MCInst. 1974 /// 1975 /// @param mcInst - The MCInst to append to. 1976 /// @param insn - The internal instruction. 1977 1978 static bool translateDstIndex(MCInst &mcInst, InternalInstruction &insn) { 1979 unsigned baseRegNo; 1980 1981 if (insn.mode == MODE_64BIT) 1982 baseRegNo = insn.hasAdSize ? X86::EDI : X86::RDI; 1983 else if (insn.mode == MODE_32BIT) 1984 baseRegNo = insn.hasAdSize ? X86::DI : X86::EDI; 1985 else { 1986 assert(insn.mode == MODE_16BIT); 1987 baseRegNo = insn.hasAdSize ? X86::EDI : X86::DI; 1988 } 1989 MCOperand baseReg = MCOperand::createReg(baseRegNo); 1990 mcInst.addOperand(baseReg); 1991 return false; 1992 } 1993 1994 /// translateImmediate - Appends an immediate operand to an MCInst. 1995 /// 1996 /// @param mcInst - The MCInst to append to. 1997 /// @param immediate - The immediate value to append. 1998 /// @param operand - The operand, as stored in the descriptor table. 1999 /// @param insn - The internal instruction. 2000 static void translateImmediate(MCInst &mcInst, uint64_t immediate, 2001 const OperandSpecifier &operand, 2002 InternalInstruction &insn, 2003 const MCDisassembler *Dis) { 2004 // Sign-extend the immediate if necessary. 2005 2006 OperandType type = (OperandType)operand.type; 2007 2008 bool isBranch = false; 2009 uint64_t pcrel = 0; 2010 if (type == TYPE_REL) { 2011 isBranch = true; 2012 pcrel = insn.startLocation + insn.length; 2013 switch (operand.encoding) { 2014 default: 2015 break; 2016 case ENCODING_Iv: 2017 switch (insn.displacementSize) { 2018 default: 2019 break; 2020 case 1: 2021 if(immediate & 0x80) 2022 immediate |= ~(0xffull); 2023 break; 2024 case 2: 2025 if(immediate & 0x8000) 2026 immediate |= ~(0xffffull); 2027 break; 2028 case 4: 2029 if(immediate & 0x80000000) 2030 immediate |= ~(0xffffffffull); 2031 break; 2032 case 8: 2033 break; 2034 } 2035 break; 2036 case ENCODING_IB: 2037 if(immediate & 0x80) 2038 immediate |= ~(0xffull); 2039 break; 2040 case ENCODING_IW: 2041 if(immediate & 0x8000) 2042 immediate |= ~(0xffffull); 2043 break; 2044 case ENCODING_ID: 2045 if(immediate & 0x80000000) 2046 immediate |= ~(0xffffffffull); 2047 break; 2048 } 2049 } 2050 // By default sign-extend all X86 immediates based on their encoding. 2051 else if (type == TYPE_IMM) { 2052 switch (operand.encoding) { 2053 default: 2054 break; 2055 case ENCODING_IB: 2056 if(immediate & 0x80) 2057 immediate |= ~(0xffull); 2058 break; 2059 case ENCODING_IW: 2060 if(immediate & 0x8000) 2061 immediate |= ~(0xffffull); 2062 break; 2063 case ENCODING_ID: 2064 if(immediate & 0x80000000) 2065 immediate |= ~(0xffffffffull); 2066 break; 2067 case ENCODING_IO: 2068 break; 2069 } 2070 } 2071 2072 switch (type) { 2073 case TYPE_XMM: 2074 mcInst.addOperand(MCOperand::createReg(X86::XMM0 + (immediate >> 4))); 2075 return; 2076 case TYPE_YMM: 2077 mcInst.addOperand(MCOperand::createReg(X86::YMM0 + (immediate >> 4))); 2078 return; 2079 case TYPE_ZMM: 2080 mcInst.addOperand(MCOperand::createReg(X86::ZMM0 + (immediate >> 4))); 2081 return; 2082 default: 2083 // operand is 64 bits wide. Do nothing. 2084 break; 2085 } 2086 2087 if (!Dis->tryAddingSymbolicOperand( 2088 mcInst, immediate + pcrel, insn.startLocation, isBranch, 2089 insn.immediateOffset, insn.immediateSize, insn.length)) 2090 mcInst.addOperand(MCOperand::createImm(immediate)); 2091 2092 if (type == TYPE_MOFFS) { 2093 MCOperand segmentReg; 2094 segmentReg = MCOperand::createReg(segmentRegnums[insn.segmentOverride]); 2095 mcInst.addOperand(segmentReg); 2096 } 2097 } 2098 2099 /// translateRMRegister - Translates a register stored in the R/M field of the 2100 /// ModR/M byte to its LLVM equivalent and appends it to an MCInst. 2101 /// @param mcInst - The MCInst to append to. 2102 /// @param insn - The internal instruction to extract the R/M field 2103 /// from. 2104 /// @return - 0 on success; -1 otherwise 2105 static bool translateRMRegister(MCInst &mcInst, 2106 InternalInstruction &insn) { 2107 if (insn.eaBase == EA_BASE_sib || insn.eaBase == EA_BASE_sib64) { 2108 debug("A R/M register operand may not have a SIB byte"); 2109 return true; 2110 } 2111 2112 switch (insn.eaBase) { 2113 default: 2114 debug("Unexpected EA base register"); 2115 return true; 2116 case EA_BASE_NONE: 2117 debug("EA_BASE_NONE for ModR/M base"); 2118 return true; 2119 #define ENTRY(x) case EA_BASE_##x: 2120 ALL_EA_BASES 2121 #undef ENTRY 2122 debug("A R/M register operand may not have a base; " 2123 "the operand must be a register."); 2124 return true; 2125 #define ENTRY(x) \ 2126 case EA_REG_##x: \ 2127 mcInst.addOperand(MCOperand::createReg(X86::x)); break; 2128 ALL_REGS 2129 #undef ENTRY 2130 } 2131 2132 return false; 2133 } 2134 2135 /// translateRMMemory - Translates a memory operand stored in the Mod and R/M 2136 /// fields of an internal instruction (and possibly its SIB byte) to a memory 2137 /// operand in LLVM's format, and appends it to an MCInst. 2138 /// 2139 /// @param mcInst - The MCInst to append to. 2140 /// @param insn - The instruction to extract Mod, R/M, and SIB fields 2141 /// from. 2142 /// @param ForceSIB - The instruction must use SIB. 2143 /// @return - 0 on success; nonzero otherwise 2144 static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn, 2145 const MCDisassembler *Dis, 2146 bool ForceSIB = false) { 2147 // Addresses in an MCInst are represented as five operands: 2148 // 1. basereg (register) The R/M base, or (if there is a SIB) the 2149 // SIB base 2150 // 2. scaleamount (immediate) 1, or (if there is a SIB) the specified 2151 // scale amount 2152 // 3. indexreg (register) x86_registerNONE, or (if there is a SIB) 2153 // the index (which is multiplied by the 2154 // scale amount) 2155 // 4. displacement (immediate) 0, or the displacement if there is one 2156 // 5. segmentreg (register) x86_registerNONE for now, but could be set 2157 // if we have segment overrides 2158 2159 MCOperand baseReg; 2160 MCOperand scaleAmount; 2161 MCOperand indexReg; 2162 MCOperand displacement; 2163 MCOperand segmentReg; 2164 uint64_t pcrel = 0; 2165 2166 if (insn.eaBase == EA_BASE_sib || insn.eaBase == EA_BASE_sib64) { 2167 if (insn.sibBase != SIB_BASE_NONE) { 2168 switch (insn.sibBase) { 2169 default: 2170 debug("Unexpected sibBase"); 2171 return true; 2172 #define ENTRY(x) \ 2173 case SIB_BASE_##x: \ 2174 baseReg = MCOperand::createReg(X86::x); break; 2175 ALL_SIB_BASES 2176 #undef ENTRY 2177 } 2178 } else { 2179 baseReg = MCOperand::createReg(X86::NoRegister); 2180 } 2181 2182 if (insn.sibIndex != SIB_INDEX_NONE) { 2183 switch (insn.sibIndex) { 2184 default: 2185 debug("Unexpected sibIndex"); 2186 return true; 2187 #define ENTRY(x) \ 2188 case SIB_INDEX_##x: \ 2189 indexReg = MCOperand::createReg(X86::x); break; 2190 EA_BASES_32BIT 2191 EA_BASES_64BIT 2192 REGS_XMM 2193 REGS_YMM 2194 REGS_ZMM 2195 #undef ENTRY 2196 } 2197 } else { 2198 // Use EIZ/RIZ for a few ambiguous cases where the SIB byte is present, 2199 // but no index is used and modrm alone should have been enough. 2200 // -No base register in 32-bit mode. In 64-bit mode this is used to 2201 // avoid rip-relative addressing. 2202 // -Any base register used other than ESP/RSP/R12D/R12. Using these as a 2203 // base always requires a SIB byte. 2204 // -A scale other than 1 is used. 2205 if (!ForceSIB && 2206 (insn.sibScale != 1 || 2207 (insn.sibBase == SIB_BASE_NONE && insn.mode != MODE_64BIT) || 2208 (insn.sibBase != SIB_BASE_NONE && 2209 insn.sibBase != SIB_BASE_ESP && insn.sibBase != SIB_BASE_RSP && 2210 insn.sibBase != SIB_BASE_R12D && insn.sibBase != SIB_BASE_R12))) { 2211 indexReg = MCOperand::createReg(insn.addressSize == 4 ? X86::EIZ : 2212 X86::RIZ); 2213 } else 2214 indexReg = MCOperand::createReg(X86::NoRegister); 2215 } 2216 2217 scaleAmount = MCOperand::createImm(insn.sibScale); 2218 } else { 2219 switch (insn.eaBase) { 2220 case EA_BASE_NONE: 2221 if (insn.eaDisplacement == EA_DISP_NONE) { 2222 debug("EA_BASE_NONE and EA_DISP_NONE for ModR/M base"); 2223 return true; 2224 } 2225 if (insn.mode == MODE_64BIT){ 2226 pcrel = insn.startLocation + insn.length; 2227 Dis->tryAddingPcLoadReferenceComment(insn.displacement + pcrel, 2228 insn.startLocation + 2229 insn.displacementOffset); 2230 // Section 2.2.1.6 2231 baseReg = MCOperand::createReg(insn.addressSize == 4 ? X86::EIP : 2232 X86::RIP); 2233 } 2234 else 2235 baseReg = MCOperand::createReg(X86::NoRegister); 2236 2237 indexReg = MCOperand::createReg(X86::NoRegister); 2238 break; 2239 case EA_BASE_BX_SI: 2240 baseReg = MCOperand::createReg(X86::BX); 2241 indexReg = MCOperand::createReg(X86::SI); 2242 break; 2243 case EA_BASE_BX_DI: 2244 baseReg = MCOperand::createReg(X86::BX); 2245 indexReg = MCOperand::createReg(X86::DI); 2246 break; 2247 case EA_BASE_BP_SI: 2248 baseReg = MCOperand::createReg(X86::BP); 2249 indexReg = MCOperand::createReg(X86::SI); 2250 break; 2251 case EA_BASE_BP_DI: 2252 baseReg = MCOperand::createReg(X86::BP); 2253 indexReg = MCOperand::createReg(X86::DI); 2254 break; 2255 default: 2256 indexReg = MCOperand::createReg(X86::NoRegister); 2257 switch (insn.eaBase) { 2258 default: 2259 debug("Unexpected eaBase"); 2260 return true; 2261 // Here, we will use the fill-ins defined above. However, 2262 // BX_SI, BX_DI, BP_SI, and BP_DI are all handled above and 2263 // sib and sib64 were handled in the top-level if, so they're only 2264 // placeholders to keep the compiler happy. 2265 #define ENTRY(x) \ 2266 case EA_BASE_##x: \ 2267 baseReg = MCOperand::createReg(X86::x); break; 2268 ALL_EA_BASES 2269 #undef ENTRY 2270 #define ENTRY(x) case EA_REG_##x: 2271 ALL_REGS 2272 #undef ENTRY 2273 debug("A R/M memory operand may not be a register; " 2274 "the base field must be a base."); 2275 return true; 2276 } 2277 } 2278 2279 scaleAmount = MCOperand::createImm(1); 2280 } 2281 2282 displacement = MCOperand::createImm(insn.displacement); 2283 2284 segmentReg = MCOperand::createReg(segmentRegnums[insn.segmentOverride]); 2285 2286 mcInst.addOperand(baseReg); 2287 mcInst.addOperand(scaleAmount); 2288 mcInst.addOperand(indexReg); 2289 2290 const uint8_t dispSize = 2291 (insn.eaDisplacement == EA_DISP_NONE) ? 0 : insn.displacementSize; 2292 2293 if (!Dis->tryAddingSymbolicOperand( 2294 mcInst, insn.displacement + pcrel, insn.startLocation, false, 2295 insn.displacementOffset, dispSize, insn.length)) 2296 mcInst.addOperand(displacement); 2297 mcInst.addOperand(segmentReg); 2298 return false; 2299 } 2300 2301 /// translateRM - Translates an operand stored in the R/M (and possibly SIB) 2302 /// byte of an instruction to LLVM form, and appends it to an MCInst. 2303 /// 2304 /// @param mcInst - The MCInst to append to. 2305 /// @param operand - The operand, as stored in the descriptor table. 2306 /// @param insn - The instruction to extract Mod, R/M, and SIB fields 2307 /// from. 2308 /// @return - 0 on success; nonzero otherwise 2309 static bool translateRM(MCInst &mcInst, const OperandSpecifier &operand, 2310 InternalInstruction &insn, const MCDisassembler *Dis) { 2311 switch (operand.type) { 2312 default: 2313 debug("Unexpected type for a R/M operand"); 2314 return true; 2315 case TYPE_R8: 2316 case TYPE_R16: 2317 case TYPE_R32: 2318 case TYPE_R64: 2319 case TYPE_Rv: 2320 case TYPE_MM64: 2321 case TYPE_XMM: 2322 case TYPE_YMM: 2323 case TYPE_ZMM: 2324 case TYPE_TMM: 2325 case TYPE_TMM_PAIR: 2326 case TYPE_VK_PAIR: 2327 case TYPE_VK: 2328 case TYPE_DEBUGREG: 2329 case TYPE_CONTROLREG: 2330 case TYPE_BNDR: 2331 return translateRMRegister(mcInst, insn); 2332 case TYPE_M: 2333 case TYPE_MVSIBX: 2334 case TYPE_MVSIBY: 2335 case TYPE_MVSIBZ: 2336 return translateRMMemory(mcInst, insn, Dis); 2337 case TYPE_MSIB: 2338 return translateRMMemory(mcInst, insn, Dis, true); 2339 } 2340 } 2341 2342 /// translateFPRegister - Translates a stack position on the FPU stack to its 2343 /// LLVM form, and appends it to an MCInst. 2344 /// 2345 /// @param mcInst - The MCInst to append to. 2346 /// @param stackPos - The stack position to translate. 2347 static void translateFPRegister(MCInst &mcInst, 2348 uint8_t stackPos) { 2349 mcInst.addOperand(MCOperand::createReg(X86::ST0 + stackPos)); 2350 } 2351 2352 /// translateMaskRegister - Translates a 3-bit mask register number to 2353 /// LLVM form, and appends it to an MCInst. 2354 /// 2355 /// @param mcInst - The MCInst to append to. 2356 /// @param maskRegNum - Number of mask register from 0 to 7. 2357 /// @return - false on success; true otherwise. 2358 static bool translateMaskRegister(MCInst &mcInst, 2359 uint8_t maskRegNum) { 2360 if (maskRegNum >= 8) { 2361 debug("Invalid mask register number"); 2362 return true; 2363 } 2364 2365 mcInst.addOperand(MCOperand::createReg(X86::K0 + maskRegNum)); 2366 return false; 2367 } 2368 2369 /// translateOperand - Translates an operand stored in an internal instruction 2370 /// to LLVM's format and appends it to an MCInst. 2371 /// 2372 /// @param mcInst - The MCInst to append to. 2373 /// @param operand - The operand, as stored in the descriptor table. 2374 /// @param insn - The internal instruction. 2375 /// @return - false on success; true otherwise. 2376 static bool translateOperand(MCInst &mcInst, const OperandSpecifier &operand, 2377 InternalInstruction &insn, 2378 const MCDisassembler *Dis) { 2379 switch (operand.encoding) { 2380 default: 2381 debug("Unhandled operand encoding during translation"); 2382 return true; 2383 case ENCODING_REG: 2384 translateRegister(mcInst, insn.reg); 2385 return false; 2386 case ENCODING_WRITEMASK: 2387 return translateMaskRegister(mcInst, insn.writemask); 2388 case ENCODING_SIB: 2389 CASE_ENCODING_RM: 2390 CASE_ENCODING_VSIB: 2391 return translateRM(mcInst, operand, insn, Dis); 2392 case ENCODING_IB: 2393 case ENCODING_IW: 2394 case ENCODING_ID: 2395 case ENCODING_IO: 2396 case ENCODING_Iv: 2397 case ENCODING_Ia: 2398 translateImmediate(mcInst, 2399 insn.immediates[insn.numImmediatesTranslated++], 2400 operand, 2401 insn, 2402 Dis); 2403 return false; 2404 case ENCODING_IRC: 2405 mcInst.addOperand(MCOperand::createImm(insn.RC)); 2406 return false; 2407 case ENCODING_SI: 2408 return translateSrcIndex(mcInst, insn); 2409 case ENCODING_DI: 2410 return translateDstIndex(mcInst, insn); 2411 case ENCODING_RB: 2412 case ENCODING_RW: 2413 case ENCODING_RD: 2414 case ENCODING_RO: 2415 case ENCODING_Rv: 2416 translateRegister(mcInst, insn.opcodeRegister); 2417 return false; 2418 case ENCODING_CF: 2419 mcInst.addOperand(MCOperand::createImm(insn.immediates[1])); 2420 return false; 2421 case ENCODING_CC: 2422 if (isCCMPOrCTEST(&insn)) 2423 mcInst.addOperand(MCOperand::createImm(insn.immediates[2])); 2424 else 2425 mcInst.addOperand(MCOperand::createImm(insn.immediates[1])); 2426 return false; 2427 case ENCODING_FP: 2428 translateFPRegister(mcInst, insn.modRM & 7); 2429 return false; 2430 case ENCODING_VVVV: 2431 translateRegister(mcInst, insn.vvvv); 2432 return false; 2433 case ENCODING_DUP: 2434 return translateOperand(mcInst, insn.operands[operand.type - TYPE_DUP0], 2435 insn, Dis); 2436 } 2437 } 2438 2439 /// translateInstruction - Translates an internal instruction and all its 2440 /// operands to an MCInst. 2441 /// 2442 /// @param mcInst - The MCInst to populate with the instruction's data. 2443 /// @param insn - The internal instruction. 2444 /// @return - false on success; true otherwise. 2445 static bool translateInstruction(MCInst &mcInst, 2446 InternalInstruction &insn, 2447 const MCDisassembler *Dis) { 2448 if (!insn.spec) { 2449 debug("Instruction has no specification"); 2450 return true; 2451 } 2452 2453 mcInst.clear(); 2454 mcInst.setOpcode(insn.instructionID); 2455 // If when reading the prefix bytes we determined the overlapping 0xf2 or 0xf3 2456 // prefix bytes should be disassembled as xrelease and xacquire then set the 2457 // opcode to those instead of the rep and repne opcodes. 2458 if (insn.xAcquireRelease) { 2459 if(mcInst.getOpcode() == X86::REP_PREFIX) 2460 mcInst.setOpcode(X86::XRELEASE_PREFIX); 2461 else if(mcInst.getOpcode() == X86::REPNE_PREFIX) 2462 mcInst.setOpcode(X86::XACQUIRE_PREFIX); 2463 } 2464 2465 insn.numImmediatesTranslated = 0; 2466 2467 for (const auto &Op : insn.operands) { 2468 if (Op.encoding != ENCODING_NONE) { 2469 if (translateOperand(mcInst, Op, insn, Dis)) { 2470 return true; 2471 } 2472 } 2473 } 2474 2475 return false; 2476 } 2477 2478 static MCDisassembler *createX86Disassembler(const Target &T, 2479 const MCSubtargetInfo &STI, 2480 MCContext &Ctx) { 2481 std::unique_ptr<const MCInstrInfo> MII(T.createMCInstrInfo()); 2482 return new X86GenericDisassembler(STI, Ctx, std::move(MII)); 2483 } 2484 2485 extern "C" LLVM_C_ABI void LLVMInitializeX86Disassembler() { 2486 // Register the disassembler. 2487 TargetRegistry::RegisterMCDisassembler(getTheX86_32Target(), 2488 createX86Disassembler); 2489 TargetRegistry::RegisterMCDisassembler(getTheX86_64Target(), 2490 createX86Disassembler); 2491 } 2492