1 //===-- X86Disassembler.cpp - Disassembler for x86 and x86_64 -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file is part of the X86 Disassembler. 10 // It contains code to translate the data produced by the decoder into 11 // MCInsts. 12 // 13 // 14 // The X86 disassembler is a table-driven disassembler for the 16-, 32-, and 15 // 64-bit X86 instruction sets. The main decode sequence for an assembly 16 // instruction in this disassembler is: 17 // 18 // 1. Read the prefix bytes and determine the attributes of the instruction. 19 // These attributes, recorded in enum attributeBits 20 // (X86DisassemblerDecoderCommon.h), form a bitmask. The table CONTEXTS_SYM 21 // provides a mapping from bitmasks to contexts, which are represented by 22 // enum InstructionContext (ibid.). 23 // 24 // 2. Read the opcode, and determine what kind of opcode it is. The 25 // disassembler distinguishes four kinds of opcodes, which are enumerated in 26 // OpcodeType (X86DisassemblerDecoderCommon.h): one-byte (0xnn), two-byte 27 // (0x0f 0xnn), three-byte-38 (0x0f 0x38 0xnn), or three-byte-3a 28 // (0x0f 0x3a 0xnn). Mandatory prefixes are treated as part of the context. 29 // 30 // 3. Depending on the opcode type, look in one of four ClassDecision structures 31 // (X86DisassemblerDecoderCommon.h). Use the opcode class to determine which 32 // OpcodeDecision (ibid.) to look the opcode in. Look up the opcode, to get 33 // a ModRMDecision (ibid.). 34 // 35 // 4. Some instructions, such as escape opcodes or extended opcodes, or even 36 // instructions that have ModRM*Reg / ModRM*Mem forms in LLVM, need the 37 // ModR/M byte to complete decode. The ModRMDecision's type is an entry from 38 // ModRMDecisionType (X86DisassemblerDecoderCommon.h) that indicates if the 39 // ModR/M byte is required and how to interpret it. 40 // 41 // 5. After resolving the ModRMDecision, the disassembler has a unique ID 42 // of type InstrUID (X86DisassemblerDecoderCommon.h). Looking this ID up in 43 // INSTRUCTIONS_SYM yields the name of the instruction and the encodings and 44 // meanings of its operands. 45 // 46 // 6. For each operand, its encoding is an entry from OperandEncoding 47 // (X86DisassemblerDecoderCommon.h) and its type is an entry from 48 // OperandType (ibid.). The encoding indicates how to read it from the 49 // instruction; the type indicates how to interpret the value once it has 50 // been read. For example, a register operand could be stored in the R/M 51 // field of the ModR/M byte, the REG field of the ModR/M byte, or added to 52 // the main opcode. This is orthogonal from its meaning (an GPR or an XMM 53 // register, for instance). Given this information, the operands can be 54 // extracted and interpreted. 55 // 56 // 7. As the last step, the disassembler translates the instruction information 57 // and operands into a format understandable by the client - in this case, an 58 // MCInst for use by the MC infrastructure. 59 // 60 // The disassembler is broken broadly into two parts: the table emitter that 61 // emits the instruction decode tables discussed above during compilation, and 62 // the disassembler itself. The table emitter is documented in more detail in 63 // utils/TableGen/X86DisassemblerEmitter.h. 64 // 65 // X86Disassembler.cpp contains the code responsible for step 7, and for 66 // invoking the decoder to execute steps 1-6. 67 // X86DisassemblerDecoderCommon.h contains the definitions needed by both the 68 // table emitter and the disassembler. 69 // X86DisassemblerDecoder.h contains the public interface of the decoder, 70 // factored out into C for possible use by other projects. 71 // X86DisassemblerDecoder.c contains the source code of the decoder, which is 72 // responsible for steps 1-6. 73 // 74 //===----------------------------------------------------------------------===// 75 76 #include "MCTargetDesc/X86BaseInfo.h" 77 #include "MCTargetDesc/X86MCTargetDesc.h" 78 #include "TargetInfo/X86TargetInfo.h" 79 #include "X86DisassemblerDecoder.h" 80 #include "llvm/MC/MCContext.h" 81 #include "llvm/MC/MCDisassembler/MCDisassembler.h" 82 #include "llvm/MC/MCExpr.h" 83 #include "llvm/MC/MCInst.h" 84 #include "llvm/MC/MCInstrInfo.h" 85 #include "llvm/MC/MCSubtargetInfo.h" 86 #include "llvm/MC/TargetRegistry.h" 87 #include "llvm/Support/Debug.h" 88 #include "llvm/Support/Format.h" 89 #include "llvm/Support/raw_ostream.h" 90 91 using namespace llvm; 92 using namespace llvm::X86Disassembler; 93 94 #define DEBUG_TYPE "x86-disassembler" 95 96 #define debug(s) LLVM_DEBUG(dbgs() << __LINE__ << ": " << s); 97 98 // Specifies whether a ModR/M byte is needed and (if so) which 99 // instruction each possible value of the ModR/M byte corresponds to. Once 100 // this information is known, we have narrowed down to a single instruction. 101 struct ModRMDecision { 102 uint8_t modrm_type; 103 uint32_t instructionIDs; 104 }; 105 106 // Specifies which set of ModR/M->instruction tables to look at 107 // given a particular opcode. 108 struct OpcodeDecision { 109 ModRMDecision modRMDecisions[256]; 110 }; 111 112 // Specifies which opcode->instruction tables to look at given 113 // a particular context (set of attributes). Since there are many possible 114 // contexts, the decoder first uses CONTEXTS_SYM to determine which context 115 // applies given a specific set of attributes. Hence there are only IC_max 116 // entries in this table, rather than 2^(ATTR_max). 117 struct ContextDecision { 118 OpcodeDecision opcodeDecisions[IC_max]; 119 }; 120 121 #include "X86GenDisassemblerTables.inc" 122 123 static InstrUID decode(OpcodeType type, InstructionContext insnContext, 124 uint8_t opcode, uint8_t modRM) { 125 const struct ModRMDecision *dec; 126 127 switch (type) { 128 case ONEBYTE: 129 dec = &ONEBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 130 break; 131 case TWOBYTE: 132 dec = &TWOBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 133 break; 134 case THREEBYTE_38: 135 dec = &THREEBYTE38_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 136 break; 137 case THREEBYTE_3A: 138 dec = &THREEBYTE3A_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 139 break; 140 case XOP8_MAP: 141 dec = &XOP8_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 142 break; 143 case XOP9_MAP: 144 dec = &XOP9_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 145 break; 146 case XOPA_MAP: 147 dec = &XOPA_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 148 break; 149 case THREEDNOW_MAP: 150 dec = 151 &THREEDNOW_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 152 break; 153 case MAP4: 154 dec = &MAP4_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 155 break; 156 case MAP5: 157 dec = &MAP5_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 158 break; 159 case MAP6: 160 dec = &MAP6_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 161 break; 162 case MAP7: 163 dec = &MAP7_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 164 break; 165 } 166 167 switch (dec->modrm_type) { 168 default: 169 llvm_unreachable("Corrupt table! Unknown modrm_type"); 170 return 0; 171 case MODRM_ONEENTRY: 172 return modRMTable[dec->instructionIDs]; 173 case MODRM_SPLITRM: 174 if (modFromModRM(modRM) == 0x3) 175 return modRMTable[dec->instructionIDs + 1]; 176 return modRMTable[dec->instructionIDs]; 177 case MODRM_SPLITREG: 178 if (modFromModRM(modRM) == 0x3) 179 return modRMTable[dec->instructionIDs + ((modRM & 0x38) >> 3) + 8]; 180 return modRMTable[dec->instructionIDs + ((modRM & 0x38) >> 3)]; 181 case MODRM_SPLITMISC: 182 if (modFromModRM(modRM) == 0x3) 183 return modRMTable[dec->instructionIDs + (modRM & 0x3f) + 8]; 184 return modRMTable[dec->instructionIDs + ((modRM & 0x38) >> 3)]; 185 case MODRM_FULL: 186 return modRMTable[dec->instructionIDs + modRM]; 187 } 188 } 189 190 static bool peek(struct InternalInstruction *insn, uint8_t &byte) { 191 uint64_t offset = insn->readerCursor - insn->startLocation; 192 if (offset >= insn->bytes.size()) 193 return true; 194 byte = insn->bytes[offset]; 195 return false; 196 } 197 198 template <typename T> static bool consume(InternalInstruction *insn, T &ptr) { 199 auto r = insn->bytes; 200 uint64_t offset = insn->readerCursor - insn->startLocation; 201 if (offset + sizeof(T) > r.size()) 202 return true; 203 ptr = support::endian::read<T>(&r[offset], llvm::endianness::little); 204 insn->readerCursor += sizeof(T); 205 return false; 206 } 207 208 static bool isREX(struct InternalInstruction *insn, uint8_t prefix) { 209 return insn->mode == MODE_64BIT && prefix >= 0x40 && prefix <= 0x4f; 210 } 211 212 static bool isREX2(struct InternalInstruction *insn, uint8_t prefix) { 213 return insn->mode == MODE_64BIT && prefix == 0xd5; 214 } 215 216 // Consumes all of an instruction's prefix bytes, and marks the 217 // instruction as having them. Also sets the instruction's default operand, 218 // address, and other relevant data sizes to report operands correctly. 219 // 220 // insn must not be empty. 221 static int readPrefixes(struct InternalInstruction *insn) { 222 bool isPrefix = true; 223 uint8_t byte = 0; 224 uint8_t nextByte; 225 226 LLVM_DEBUG(dbgs() << "readPrefixes()"); 227 228 while (isPrefix) { 229 // If we fail reading prefixes, just stop here and let the opcode reader 230 // deal with it. 231 if (consume(insn, byte)) 232 break; 233 234 // If the byte is a LOCK/REP/REPNE prefix and not a part of the opcode, then 235 // break and let it be disassembled as a normal "instruction". 236 if (insn->readerCursor - 1 == insn->startLocation && byte == 0xf0) // LOCK 237 break; 238 239 if ((byte == 0xf2 || byte == 0xf3) && !peek(insn, nextByte)) { 240 // If the byte is 0xf2 or 0xf3, and any of the following conditions are 241 // met: 242 // - it is followed by a LOCK (0xf0) prefix 243 // - it is followed by an xchg instruction 244 // then it should be disassembled as a xacquire/xrelease not repne/rep. 245 if (((nextByte == 0xf0) || 246 ((nextByte & 0xfe) == 0x86 || (nextByte & 0xf8) == 0x90))) { 247 insn->xAcquireRelease = true; 248 if (!(byte == 0xf3 && nextByte == 0x90)) // PAUSE instruction support 249 break; 250 } 251 // Also if the byte is 0xf3, and the following condition is met: 252 // - it is followed by a "mov mem, reg" (opcode 0x88/0x89) or 253 // "mov mem, imm" (opcode 0xc6/0xc7) instructions. 254 // then it should be disassembled as an xrelease not rep. 255 if (byte == 0xf3 && (nextByte == 0x88 || nextByte == 0x89 || 256 nextByte == 0xc6 || nextByte == 0xc7)) { 257 insn->xAcquireRelease = true; 258 break; 259 } 260 if (isREX(insn, nextByte)) { 261 uint8_t nnextByte; 262 // Go to REX prefix after the current one 263 if (consume(insn, nnextByte)) 264 return -1; 265 // We should be able to read next byte after REX prefix 266 if (peek(insn, nnextByte)) 267 return -1; 268 --insn->readerCursor; 269 } 270 } 271 272 switch (byte) { 273 case 0xf0: // LOCK 274 insn->hasLockPrefix = true; 275 break; 276 case 0xf2: // REPNE/REPNZ 277 case 0xf3: { // REP or REPE/REPZ 278 uint8_t nextByte; 279 if (peek(insn, nextByte)) 280 break; 281 // TODO: 282 // 1. There could be several 0x66 283 // 2. if (nextByte == 0x66) and nextNextByte != 0x0f then 284 // it's not mandatory prefix 285 // 3. if (nextByte >= 0x40 && nextByte <= 0x4f) it's REX and we need 286 // 0x0f exactly after it to be mandatory prefix 287 // 4. if (nextByte == 0xd5) it's REX2 and we need 288 // 0x0f exactly after it to be mandatory prefix 289 if (isREX(insn, nextByte) || isREX2(insn, nextByte) || nextByte == 0x0f || 290 nextByte == 0x66) 291 // The last of 0xf2 /0xf3 is mandatory prefix 292 insn->mandatoryPrefix = byte; 293 insn->repeatPrefix = byte; 294 break; 295 } 296 case 0x2e: // CS segment override -OR- Branch not taken 297 insn->segmentOverride = SEG_OVERRIDE_CS; 298 break; 299 case 0x36: // SS segment override -OR- Branch taken 300 insn->segmentOverride = SEG_OVERRIDE_SS; 301 break; 302 case 0x3e: // DS segment override 303 insn->segmentOverride = SEG_OVERRIDE_DS; 304 break; 305 case 0x26: // ES segment override 306 insn->segmentOverride = SEG_OVERRIDE_ES; 307 break; 308 case 0x64: // FS segment override 309 insn->segmentOverride = SEG_OVERRIDE_FS; 310 break; 311 case 0x65: // GS segment override 312 insn->segmentOverride = SEG_OVERRIDE_GS; 313 break; 314 case 0x66: { // Operand-size override { 315 uint8_t nextByte; 316 insn->hasOpSize = true; 317 if (peek(insn, nextByte)) 318 break; 319 // 0x66 can't overwrite existing mandatory prefix and should be ignored 320 if (!insn->mandatoryPrefix && (nextByte == 0x0f || isREX(insn, nextByte))) 321 insn->mandatoryPrefix = byte; 322 break; 323 } 324 case 0x67: // Address-size override 325 insn->hasAdSize = true; 326 break; 327 default: // Not a prefix byte 328 isPrefix = false; 329 break; 330 } 331 332 if (isPrefix) 333 LLVM_DEBUG(dbgs() << format("Found prefix 0x%hhx", byte)); 334 } 335 336 insn->vectorExtensionType = TYPE_NO_VEX_XOP; 337 338 if (byte == 0x62) { 339 uint8_t byte1, byte2; 340 if (consume(insn, byte1)) { 341 LLVM_DEBUG(dbgs() << "Couldn't read second byte of EVEX prefix"); 342 return -1; 343 } 344 345 if (peek(insn, byte2)) { 346 LLVM_DEBUG(dbgs() << "Couldn't read third byte of EVEX prefix"); 347 return -1; 348 } 349 350 if ((insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0)) { 351 insn->vectorExtensionType = TYPE_EVEX; 352 } else { 353 --insn->readerCursor; // unconsume byte1 354 --insn->readerCursor; // unconsume byte 355 } 356 357 if (insn->vectorExtensionType == TYPE_EVEX) { 358 insn->vectorExtensionPrefix[0] = byte; 359 insn->vectorExtensionPrefix[1] = byte1; 360 if (consume(insn, insn->vectorExtensionPrefix[2])) { 361 LLVM_DEBUG(dbgs() << "Couldn't read third byte of EVEX prefix"); 362 return -1; 363 } 364 if (consume(insn, insn->vectorExtensionPrefix[3])) { 365 LLVM_DEBUG(dbgs() << "Couldn't read fourth byte of EVEX prefix"); 366 return -1; 367 } 368 369 if (insn->mode == MODE_64BIT) { 370 // We simulate the REX prefix for simplicity's sake 371 insn->rexPrefix = 0x40 | 372 (wFromEVEX3of4(insn->vectorExtensionPrefix[2]) << 3) | 373 (rFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 2) | 374 (xFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 1) | 375 (bFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 0); 376 377 // We simulate the REX2 prefix for simplicity's sake 378 insn->rex2ExtensionPrefix[1] = 379 (r2FromEVEX2of4(insn->vectorExtensionPrefix[1]) << 6) | 380 (uFromEVEX3of4(insn->vectorExtensionPrefix[2]) << 5) | 381 (b2FromEVEX2of4(insn->vectorExtensionPrefix[1]) << 4); 382 } 383 384 LLVM_DEBUG( 385 dbgs() << format( 386 "Found EVEX prefix 0x%hhx 0x%hhx 0x%hhx 0x%hhx", 387 insn->vectorExtensionPrefix[0], insn->vectorExtensionPrefix[1], 388 insn->vectorExtensionPrefix[2], insn->vectorExtensionPrefix[3])); 389 } 390 } else if (byte == 0xc4) { 391 uint8_t byte1; 392 if (peek(insn, byte1)) { 393 LLVM_DEBUG(dbgs() << "Couldn't read second byte of VEX"); 394 return -1; 395 } 396 397 if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) 398 insn->vectorExtensionType = TYPE_VEX_3B; 399 else 400 --insn->readerCursor; 401 402 if (insn->vectorExtensionType == TYPE_VEX_3B) { 403 insn->vectorExtensionPrefix[0] = byte; 404 consume(insn, insn->vectorExtensionPrefix[1]); 405 consume(insn, insn->vectorExtensionPrefix[2]); 406 407 // We simulate the REX prefix for simplicity's sake 408 409 if (insn->mode == MODE_64BIT) 410 insn->rexPrefix = 0x40 | 411 (wFromVEX3of3(insn->vectorExtensionPrefix[2]) << 3) | 412 (rFromVEX2of3(insn->vectorExtensionPrefix[1]) << 2) | 413 (xFromVEX2of3(insn->vectorExtensionPrefix[1]) << 1) | 414 (bFromVEX2of3(insn->vectorExtensionPrefix[1]) << 0); 415 416 LLVM_DEBUG(dbgs() << format("Found VEX prefix 0x%hhx 0x%hhx 0x%hhx", 417 insn->vectorExtensionPrefix[0], 418 insn->vectorExtensionPrefix[1], 419 insn->vectorExtensionPrefix[2])); 420 } 421 } else if (byte == 0xc5) { 422 uint8_t byte1; 423 if (peek(insn, byte1)) { 424 LLVM_DEBUG(dbgs() << "Couldn't read second byte of VEX"); 425 return -1; 426 } 427 428 if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) 429 insn->vectorExtensionType = TYPE_VEX_2B; 430 else 431 --insn->readerCursor; 432 433 if (insn->vectorExtensionType == TYPE_VEX_2B) { 434 insn->vectorExtensionPrefix[0] = byte; 435 consume(insn, insn->vectorExtensionPrefix[1]); 436 437 if (insn->mode == MODE_64BIT) 438 insn->rexPrefix = 439 0x40 | (rFromVEX2of2(insn->vectorExtensionPrefix[1]) << 2); 440 441 switch (ppFromVEX2of2(insn->vectorExtensionPrefix[1])) { 442 default: 443 break; 444 case VEX_PREFIX_66: 445 insn->hasOpSize = true; 446 break; 447 } 448 449 LLVM_DEBUG(dbgs() << format("Found VEX prefix 0x%hhx 0x%hhx", 450 insn->vectorExtensionPrefix[0], 451 insn->vectorExtensionPrefix[1])); 452 } 453 } else if (byte == 0x8f) { 454 uint8_t byte1; 455 if (peek(insn, byte1)) { 456 LLVM_DEBUG(dbgs() << "Couldn't read second byte of XOP"); 457 return -1; 458 } 459 460 if ((byte1 & 0x38) != 0x0) // 0 in these 3 bits is a POP instruction. 461 insn->vectorExtensionType = TYPE_XOP; 462 else 463 --insn->readerCursor; 464 465 if (insn->vectorExtensionType == TYPE_XOP) { 466 insn->vectorExtensionPrefix[0] = byte; 467 consume(insn, insn->vectorExtensionPrefix[1]); 468 consume(insn, insn->vectorExtensionPrefix[2]); 469 470 // We simulate the REX prefix for simplicity's sake 471 472 if (insn->mode == MODE_64BIT) 473 insn->rexPrefix = 0x40 | 474 (wFromXOP3of3(insn->vectorExtensionPrefix[2]) << 3) | 475 (rFromXOP2of3(insn->vectorExtensionPrefix[1]) << 2) | 476 (xFromXOP2of3(insn->vectorExtensionPrefix[1]) << 1) | 477 (bFromXOP2of3(insn->vectorExtensionPrefix[1]) << 0); 478 479 switch (ppFromXOP3of3(insn->vectorExtensionPrefix[2])) { 480 default: 481 break; 482 case VEX_PREFIX_66: 483 insn->hasOpSize = true; 484 break; 485 } 486 487 LLVM_DEBUG(dbgs() << format("Found XOP prefix 0x%hhx 0x%hhx 0x%hhx", 488 insn->vectorExtensionPrefix[0], 489 insn->vectorExtensionPrefix[1], 490 insn->vectorExtensionPrefix[2])); 491 } 492 } else if (isREX2(insn, byte)) { 493 uint8_t byte1; 494 if (peek(insn, byte1)) { 495 LLVM_DEBUG(dbgs() << "Couldn't read second byte of REX2"); 496 return -1; 497 } 498 insn->rex2ExtensionPrefix[0] = byte; 499 consume(insn, insn->rex2ExtensionPrefix[1]); 500 501 // We simulate the REX prefix for simplicity's sake 502 insn->rexPrefix = 0x40 | (wFromREX2(insn->rex2ExtensionPrefix[1]) << 3) | 503 (rFromREX2(insn->rex2ExtensionPrefix[1]) << 2) | 504 (xFromREX2(insn->rex2ExtensionPrefix[1]) << 1) | 505 (bFromREX2(insn->rex2ExtensionPrefix[1]) << 0); 506 LLVM_DEBUG(dbgs() << format("Found REX2 prefix 0x%hhx 0x%hhx", 507 insn->rex2ExtensionPrefix[0], 508 insn->rex2ExtensionPrefix[1])); 509 } else if (isREX(insn, byte)) { 510 if (peek(insn, nextByte)) 511 return -1; 512 insn->rexPrefix = byte; 513 LLVM_DEBUG(dbgs() << format("Found REX prefix 0x%hhx", byte)); 514 } else 515 --insn->readerCursor; 516 517 if (insn->mode == MODE_16BIT) { 518 insn->registerSize = (insn->hasOpSize ? 4 : 2); 519 insn->addressSize = (insn->hasAdSize ? 4 : 2); 520 insn->displacementSize = (insn->hasAdSize ? 4 : 2); 521 insn->immediateSize = (insn->hasOpSize ? 4 : 2); 522 } else if (insn->mode == MODE_32BIT) { 523 insn->registerSize = (insn->hasOpSize ? 2 : 4); 524 insn->addressSize = (insn->hasAdSize ? 2 : 4); 525 insn->displacementSize = (insn->hasAdSize ? 2 : 4); 526 insn->immediateSize = (insn->hasOpSize ? 2 : 4); 527 } else if (insn->mode == MODE_64BIT) { 528 insn->displacementSize = 4; 529 if (insn->rexPrefix && wFromREX(insn->rexPrefix)) { 530 insn->registerSize = 8; 531 insn->addressSize = (insn->hasAdSize ? 4 : 8); 532 insn->immediateSize = 4; 533 insn->hasOpSize = false; 534 } else { 535 insn->registerSize = (insn->hasOpSize ? 2 : 4); 536 insn->addressSize = (insn->hasAdSize ? 4 : 8); 537 insn->immediateSize = (insn->hasOpSize ? 2 : 4); 538 } 539 } 540 541 return 0; 542 } 543 544 // Consumes the SIB byte to determine addressing information. 545 static int readSIB(struct InternalInstruction *insn) { 546 SIBBase sibBaseBase = SIB_BASE_NONE; 547 uint8_t index, base; 548 549 LLVM_DEBUG(dbgs() << "readSIB()"); 550 switch (insn->addressSize) { 551 case 2: 552 default: 553 llvm_unreachable("SIB-based addressing doesn't work in 16-bit mode"); 554 case 4: 555 insn->sibIndexBase = SIB_INDEX_EAX; 556 sibBaseBase = SIB_BASE_EAX; 557 break; 558 case 8: 559 insn->sibIndexBase = SIB_INDEX_RAX; 560 sibBaseBase = SIB_BASE_RAX; 561 break; 562 } 563 564 if (consume(insn, insn->sib)) 565 return -1; 566 567 index = indexFromSIB(insn->sib) | (xFromREX(insn->rexPrefix) << 3) | 568 (x2FromREX2(insn->rex2ExtensionPrefix[1]) << 4); 569 570 if (index == 0x4) { 571 insn->sibIndex = SIB_INDEX_NONE; 572 } else { 573 insn->sibIndex = (SIBIndex)(insn->sibIndexBase + index); 574 } 575 576 insn->sibScale = 1 << scaleFromSIB(insn->sib); 577 578 base = baseFromSIB(insn->sib) | (bFromREX(insn->rexPrefix) << 3) | 579 (b2FromREX2(insn->rex2ExtensionPrefix[1]) << 4); 580 581 switch (base) { 582 case 0x5: 583 case 0xd: 584 switch (modFromModRM(insn->modRM)) { 585 case 0x0: 586 insn->eaDisplacement = EA_DISP_32; 587 insn->sibBase = SIB_BASE_NONE; 588 break; 589 case 0x1: 590 insn->eaDisplacement = EA_DISP_8; 591 insn->sibBase = (SIBBase)(sibBaseBase + base); 592 break; 593 case 0x2: 594 insn->eaDisplacement = EA_DISP_32; 595 insn->sibBase = (SIBBase)(sibBaseBase + base); 596 break; 597 default: 598 llvm_unreachable("Cannot have Mod = 0b11 and a SIB byte"); 599 } 600 break; 601 default: 602 insn->sibBase = (SIBBase)(sibBaseBase + base); 603 break; 604 } 605 606 return 0; 607 } 608 609 static int readDisplacement(struct InternalInstruction *insn) { 610 int8_t d8; 611 int16_t d16; 612 int32_t d32; 613 LLVM_DEBUG(dbgs() << "readDisplacement()"); 614 615 insn->displacementOffset = insn->readerCursor - insn->startLocation; 616 switch (insn->eaDisplacement) { 617 case EA_DISP_NONE: 618 break; 619 case EA_DISP_8: 620 if (consume(insn, d8)) 621 return -1; 622 insn->displacement = d8; 623 break; 624 case EA_DISP_16: 625 if (consume(insn, d16)) 626 return -1; 627 insn->displacement = d16; 628 break; 629 case EA_DISP_32: 630 if (consume(insn, d32)) 631 return -1; 632 insn->displacement = d32; 633 break; 634 } 635 636 return 0; 637 } 638 639 // Consumes all addressing information (ModR/M byte, SIB byte, and displacement. 640 static int readModRM(struct InternalInstruction *insn) { 641 uint8_t mod, rm, reg; 642 LLVM_DEBUG(dbgs() << "readModRM()"); 643 644 if (insn->consumedModRM) 645 return 0; 646 647 if (consume(insn, insn->modRM)) 648 return -1; 649 insn->consumedModRM = true; 650 651 mod = modFromModRM(insn->modRM); 652 rm = rmFromModRM(insn->modRM); 653 reg = regFromModRM(insn->modRM); 654 655 // This goes by insn->registerSize to pick the correct register, which messes 656 // up if we're using (say) XMM or 8-bit register operands. That gets fixed in 657 // fixupReg(). 658 switch (insn->registerSize) { 659 case 2: 660 insn->regBase = MODRM_REG_AX; 661 insn->eaRegBase = EA_REG_AX; 662 break; 663 case 4: 664 insn->regBase = MODRM_REG_EAX; 665 insn->eaRegBase = EA_REG_EAX; 666 break; 667 case 8: 668 insn->regBase = MODRM_REG_RAX; 669 insn->eaRegBase = EA_REG_RAX; 670 break; 671 } 672 673 reg |= (rFromREX(insn->rexPrefix) << 3) | 674 (r2FromREX2(insn->rex2ExtensionPrefix[1]) << 4); 675 rm |= (bFromREX(insn->rexPrefix) << 3) | 676 (b2FromREX2(insn->rex2ExtensionPrefix[1]) << 4); 677 678 if (insn->vectorExtensionType == TYPE_EVEX && insn->mode == MODE_64BIT) 679 reg |= r2FromEVEX2of4(insn->vectorExtensionPrefix[1]) << 4; 680 681 insn->reg = (Reg)(insn->regBase + reg); 682 683 switch (insn->addressSize) { 684 case 2: { 685 EABase eaBaseBase = EA_BASE_BX_SI; 686 687 switch (mod) { 688 case 0x0: 689 if (rm == 0x6) { 690 insn->eaBase = EA_BASE_NONE; 691 insn->eaDisplacement = EA_DISP_16; 692 if (readDisplacement(insn)) 693 return -1; 694 } else { 695 insn->eaBase = (EABase)(eaBaseBase + rm); 696 insn->eaDisplacement = EA_DISP_NONE; 697 } 698 break; 699 case 0x1: 700 insn->eaBase = (EABase)(eaBaseBase + rm); 701 insn->eaDisplacement = EA_DISP_8; 702 insn->displacementSize = 1; 703 if (readDisplacement(insn)) 704 return -1; 705 break; 706 case 0x2: 707 insn->eaBase = (EABase)(eaBaseBase + rm); 708 insn->eaDisplacement = EA_DISP_16; 709 if (readDisplacement(insn)) 710 return -1; 711 break; 712 case 0x3: 713 insn->eaBase = (EABase)(insn->eaRegBase + rm); 714 if (readDisplacement(insn)) 715 return -1; 716 break; 717 } 718 break; 719 } 720 case 4: 721 case 8: { 722 EABase eaBaseBase = (insn->addressSize == 4 ? EA_BASE_EAX : EA_BASE_RAX); 723 724 switch (mod) { 725 case 0x0: 726 insn->eaDisplacement = EA_DISP_NONE; // readSIB may override this 727 // In determining whether RIP-relative mode is used (rm=5), 728 // or whether a SIB byte is present (rm=4), 729 // the extension bits (REX.b and EVEX.x) are ignored. 730 switch (rm & 7) { 731 case 0x4: // SIB byte is present 732 insn->eaBase = (insn->addressSize == 4 ? EA_BASE_sib : EA_BASE_sib64); 733 if (readSIB(insn) || readDisplacement(insn)) 734 return -1; 735 break; 736 case 0x5: // RIP-relative 737 insn->eaBase = EA_BASE_NONE; 738 insn->eaDisplacement = EA_DISP_32; 739 if (readDisplacement(insn)) 740 return -1; 741 break; 742 default: 743 insn->eaBase = (EABase)(eaBaseBase + rm); 744 break; 745 } 746 break; 747 case 0x1: 748 insn->displacementSize = 1; 749 [[fallthrough]]; 750 case 0x2: 751 insn->eaDisplacement = (mod == 0x1 ? EA_DISP_8 : EA_DISP_32); 752 switch (rm & 7) { 753 case 0x4: // SIB byte is present 754 insn->eaBase = EA_BASE_sib; 755 if (readSIB(insn) || readDisplacement(insn)) 756 return -1; 757 break; 758 default: 759 insn->eaBase = (EABase)(eaBaseBase + rm); 760 if (readDisplacement(insn)) 761 return -1; 762 break; 763 } 764 break; 765 case 0x3: 766 insn->eaDisplacement = EA_DISP_NONE; 767 insn->eaBase = (EABase)(insn->eaRegBase + rm); 768 break; 769 } 770 break; 771 } 772 } // switch (insn->addressSize) 773 774 return 0; 775 } 776 777 #define GENERIC_FIXUP_FUNC(name, base, prefix) \ 778 static uint16_t name(struct InternalInstruction *insn, OperandType type, \ 779 uint8_t index, uint8_t *valid) { \ 780 *valid = 1; \ 781 switch (type) { \ 782 default: \ 783 debug("Unhandled register type"); \ 784 *valid = 0; \ 785 return 0; \ 786 case TYPE_Rv: \ 787 return base + index; \ 788 case TYPE_R8: \ 789 if (insn->rexPrefix && index >= 4 && index <= 7) \ 790 return prefix##_SPL + (index - 4); \ 791 else \ 792 return prefix##_AL + index; \ 793 case TYPE_R16: \ 794 return prefix##_AX + index; \ 795 case TYPE_R32: \ 796 return prefix##_EAX + index; \ 797 case TYPE_R64: \ 798 return prefix##_RAX + index; \ 799 case TYPE_ZMM: \ 800 return prefix##_ZMM0 + index; \ 801 case TYPE_YMM: \ 802 return prefix##_YMM0 + index; \ 803 case TYPE_XMM: \ 804 return prefix##_XMM0 + index; \ 805 case TYPE_TMM: \ 806 if (index > 7) \ 807 *valid = 0; \ 808 return prefix##_TMM0 + index; \ 809 case TYPE_TMM_PAIR: \ 810 if (index > 7) \ 811 *valid = 0; \ 812 return prefix##_TMM0_TMM1 + (index / 2); \ 813 case TYPE_VK: \ 814 index &= 0xf; \ 815 if (index > 7) \ 816 *valid = 0; \ 817 return prefix##_K0 + index; \ 818 case TYPE_VK_PAIR: \ 819 if (index > 7) \ 820 *valid = 0; \ 821 return prefix##_K0_K1 + (index / 2); \ 822 case TYPE_MM64: \ 823 return prefix##_MM0 + (index & 0x7); \ 824 case TYPE_SEGMENTREG: \ 825 if ((index & 7) > 5) \ 826 *valid = 0; \ 827 return prefix##_ES + (index & 7); \ 828 case TYPE_DEBUGREG: \ 829 if (index > 15) \ 830 *valid = 0; \ 831 return prefix##_DR0 + index; \ 832 case TYPE_CONTROLREG: \ 833 if (index > 15) \ 834 *valid = 0; \ 835 return prefix##_CR0 + index; \ 836 case TYPE_MVSIBX: \ 837 return prefix##_XMM0 + index; \ 838 case TYPE_MVSIBY: \ 839 return prefix##_YMM0 + index; \ 840 case TYPE_MVSIBZ: \ 841 return prefix##_ZMM0 + index; \ 842 } \ 843 } 844 845 // Consult an operand type to determine the meaning of the reg or R/M field. If 846 // the operand is an XMM operand, for example, an operand would be XMM0 instead 847 // of AX, which readModRM() would otherwise misinterpret it as. 848 // 849 // @param insn - The instruction containing the operand. 850 // @param type - The operand type. 851 // @param index - The existing value of the field as reported by readModRM(). 852 // @param valid - The address of a uint8_t. The target is set to 1 if the 853 // field is valid for the register class; 0 if not. 854 // @return - The proper value. 855 GENERIC_FIXUP_FUNC(fixupRegValue, insn->regBase, MODRM_REG) 856 GENERIC_FIXUP_FUNC(fixupRMValue, insn->eaRegBase, EA_REG) 857 858 // Consult an operand specifier to determine which of the fixup*Value functions 859 // to use in correcting readModRM()'ss interpretation. 860 // 861 // @param insn - See fixup*Value(). 862 // @param op - The operand specifier. 863 // @return - 0 if fixup was successful; -1 if the register returned was 864 // invalid for its class. 865 static int fixupReg(struct InternalInstruction *insn, 866 const struct OperandSpecifier *op) { 867 uint8_t valid; 868 LLVM_DEBUG(dbgs() << "fixupReg()"); 869 870 switch ((OperandEncoding)op->encoding) { 871 default: 872 debug("Expected a REG or R/M encoding in fixupReg"); 873 return -1; 874 case ENCODING_VVVV: 875 insn->vvvv = 876 (Reg)fixupRegValue(insn, (OperandType)op->type, insn->vvvv, &valid); 877 if (!valid) 878 return -1; 879 break; 880 case ENCODING_REG: 881 insn->reg = (Reg)fixupRegValue(insn, (OperandType)op->type, 882 insn->reg - insn->regBase, &valid); 883 if (!valid) 884 return -1; 885 break; 886 CASE_ENCODING_RM: 887 if (insn->vectorExtensionType == TYPE_EVEX && insn->mode == MODE_64BIT && 888 modFromModRM(insn->modRM) == 3) { 889 // EVEX_X can extend the register id to 32 for a non-GPR register that is 890 // encoded in RM. 891 // mode : MODE_64_BIT 892 // Only 8 vector registers are available in 32 bit mode 893 // mod : 3 894 // RM encodes a register 895 switch (op->type) { 896 case TYPE_Rv: 897 case TYPE_R8: 898 case TYPE_R16: 899 case TYPE_R32: 900 case TYPE_R64: 901 break; 902 default: 903 insn->eaBase = 904 (EABase)(insn->eaBase + 905 (xFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 4)); 906 break; 907 } 908 } 909 [[fallthrough]]; 910 case ENCODING_SIB: 911 if (insn->eaBase >= insn->eaRegBase) { 912 insn->eaBase = (EABase)fixupRMValue( 913 insn, (OperandType)op->type, insn->eaBase - insn->eaRegBase, &valid); 914 if (!valid) 915 return -1; 916 } 917 break; 918 } 919 920 return 0; 921 } 922 923 // Read the opcode (except the ModR/M byte in the case of extended or escape 924 // opcodes). 925 static bool readOpcode(struct InternalInstruction *insn) { 926 uint8_t current; 927 LLVM_DEBUG(dbgs() << "readOpcode()"); 928 929 insn->opcodeType = ONEBYTE; 930 if (insn->vectorExtensionType == TYPE_EVEX) { 931 switch (mmmFromEVEX2of4(insn->vectorExtensionPrefix[1])) { 932 default: 933 LLVM_DEBUG( 934 dbgs() << format("Unhandled mmm field for instruction (0x%hhx)", 935 mmmFromEVEX2of4(insn->vectorExtensionPrefix[1]))); 936 return true; 937 case VEX_LOB_0F: 938 insn->opcodeType = TWOBYTE; 939 return consume(insn, insn->opcode); 940 case VEX_LOB_0F38: 941 insn->opcodeType = THREEBYTE_38; 942 return consume(insn, insn->opcode); 943 case VEX_LOB_0F3A: 944 insn->opcodeType = THREEBYTE_3A; 945 return consume(insn, insn->opcode); 946 case VEX_LOB_MAP4: 947 insn->opcodeType = MAP4; 948 return consume(insn, insn->opcode); 949 case VEX_LOB_MAP5: 950 insn->opcodeType = MAP5; 951 return consume(insn, insn->opcode); 952 case VEX_LOB_MAP6: 953 insn->opcodeType = MAP6; 954 return consume(insn, insn->opcode); 955 case VEX_LOB_MAP7: 956 insn->opcodeType = MAP7; 957 return consume(insn, insn->opcode); 958 } 959 } else if (insn->vectorExtensionType == TYPE_VEX_3B) { 960 switch (mmmmmFromVEX2of3(insn->vectorExtensionPrefix[1])) { 961 default: 962 LLVM_DEBUG( 963 dbgs() << format("Unhandled m-mmmm field for instruction (0x%hhx)", 964 mmmmmFromVEX2of3(insn->vectorExtensionPrefix[1]))); 965 return true; 966 case VEX_LOB_0F: 967 insn->opcodeType = TWOBYTE; 968 return consume(insn, insn->opcode); 969 case VEX_LOB_0F38: 970 insn->opcodeType = THREEBYTE_38; 971 return consume(insn, insn->opcode); 972 case VEX_LOB_0F3A: 973 insn->opcodeType = THREEBYTE_3A; 974 return consume(insn, insn->opcode); 975 case VEX_LOB_MAP5: 976 insn->opcodeType = MAP5; 977 return consume(insn, insn->opcode); 978 case VEX_LOB_MAP6: 979 insn->opcodeType = MAP6; 980 return consume(insn, insn->opcode); 981 case VEX_LOB_MAP7: 982 insn->opcodeType = MAP7; 983 return consume(insn, insn->opcode); 984 } 985 } else if (insn->vectorExtensionType == TYPE_VEX_2B) { 986 insn->opcodeType = TWOBYTE; 987 return consume(insn, insn->opcode); 988 } else if (insn->vectorExtensionType == TYPE_XOP) { 989 switch (mmmmmFromXOP2of3(insn->vectorExtensionPrefix[1])) { 990 default: 991 LLVM_DEBUG( 992 dbgs() << format("Unhandled m-mmmm field for instruction (0x%hhx)", 993 mmmmmFromVEX2of3(insn->vectorExtensionPrefix[1]))); 994 return true; 995 case XOP_MAP_SELECT_8: 996 insn->opcodeType = XOP8_MAP; 997 return consume(insn, insn->opcode); 998 case XOP_MAP_SELECT_9: 999 insn->opcodeType = XOP9_MAP; 1000 return consume(insn, insn->opcode); 1001 case XOP_MAP_SELECT_A: 1002 insn->opcodeType = XOPA_MAP; 1003 return consume(insn, insn->opcode); 1004 } 1005 } else if (mFromREX2(insn->rex2ExtensionPrefix[1])) { 1006 // m bit indicates opcode map 1 1007 insn->opcodeType = TWOBYTE; 1008 return consume(insn, insn->opcode); 1009 } 1010 1011 if (consume(insn, current)) 1012 return true; 1013 1014 if (current == 0x0f) { 1015 LLVM_DEBUG( 1016 dbgs() << format("Found a two-byte escape prefix (0x%hhx)", current)); 1017 if (consume(insn, current)) 1018 return true; 1019 1020 if (current == 0x38) { 1021 LLVM_DEBUG(dbgs() << format("Found a three-byte escape prefix (0x%hhx)", 1022 current)); 1023 if (consume(insn, current)) 1024 return true; 1025 1026 insn->opcodeType = THREEBYTE_38; 1027 } else if (current == 0x3a) { 1028 LLVM_DEBUG(dbgs() << format("Found a three-byte escape prefix (0x%hhx)", 1029 current)); 1030 if (consume(insn, current)) 1031 return true; 1032 1033 insn->opcodeType = THREEBYTE_3A; 1034 } else if (current == 0x0f) { 1035 LLVM_DEBUG( 1036 dbgs() << format("Found a 3dnow escape prefix (0x%hhx)", current)); 1037 1038 // Consume operands before the opcode to comply with the 3DNow encoding 1039 if (readModRM(insn)) 1040 return true; 1041 1042 if (consume(insn, current)) 1043 return true; 1044 1045 insn->opcodeType = THREEDNOW_MAP; 1046 } else { 1047 LLVM_DEBUG(dbgs() << "Didn't find a three-byte escape prefix"); 1048 insn->opcodeType = TWOBYTE; 1049 } 1050 } else if (insn->mandatoryPrefix) 1051 // The opcode with mandatory prefix must start with opcode escape. 1052 // If not it's legacy repeat prefix 1053 insn->mandatoryPrefix = 0; 1054 1055 // At this point we have consumed the full opcode. 1056 // Anything we consume from here on must be unconsumed. 1057 insn->opcode = current; 1058 1059 return false; 1060 } 1061 1062 // Determine whether equiv is the 16-bit equivalent of orig (32-bit or 64-bit). 1063 static bool is16BitEquivalent(const char *orig, const char *equiv) { 1064 for (int i = 0;; i++) { 1065 if (orig[i] == '\0' && equiv[i] == '\0') 1066 return true; 1067 if (orig[i] == '\0' || equiv[i] == '\0') 1068 return false; 1069 if (orig[i] != equiv[i]) { 1070 if ((orig[i] == 'Q' || orig[i] == 'L') && equiv[i] == 'W') 1071 continue; 1072 if ((orig[i] == '6' || orig[i] == '3') && equiv[i] == '1') 1073 continue; 1074 if ((orig[i] == '4' || orig[i] == '2') && equiv[i] == '6') 1075 continue; 1076 return false; 1077 } 1078 } 1079 } 1080 1081 // Determine whether this instruction is a 64-bit instruction. 1082 static bool is64Bit(const char *name) { 1083 for (int i = 0;; ++i) { 1084 if (name[i] == '\0') 1085 return false; 1086 if (name[i] == '6' && name[i + 1] == '4') 1087 return true; 1088 } 1089 } 1090 1091 // Determine the ID of an instruction, consuming the ModR/M byte as appropriate 1092 // for extended and escape opcodes, and using a supplied attribute mask. 1093 static int getInstructionIDWithAttrMask(uint16_t *instructionID, 1094 struct InternalInstruction *insn, 1095 uint16_t attrMask) { 1096 auto insnCtx = InstructionContext(x86DisassemblerContexts[attrMask]); 1097 const ContextDecision *decision; 1098 switch (insn->opcodeType) { 1099 case ONEBYTE: 1100 decision = &ONEBYTE_SYM; 1101 break; 1102 case TWOBYTE: 1103 decision = &TWOBYTE_SYM; 1104 break; 1105 case THREEBYTE_38: 1106 decision = &THREEBYTE38_SYM; 1107 break; 1108 case THREEBYTE_3A: 1109 decision = &THREEBYTE3A_SYM; 1110 break; 1111 case XOP8_MAP: 1112 decision = &XOP8_MAP_SYM; 1113 break; 1114 case XOP9_MAP: 1115 decision = &XOP9_MAP_SYM; 1116 break; 1117 case XOPA_MAP: 1118 decision = &XOPA_MAP_SYM; 1119 break; 1120 case THREEDNOW_MAP: 1121 decision = &THREEDNOW_MAP_SYM; 1122 break; 1123 case MAP4: 1124 decision = &MAP4_SYM; 1125 break; 1126 case MAP5: 1127 decision = &MAP5_SYM; 1128 break; 1129 case MAP6: 1130 decision = &MAP6_SYM; 1131 break; 1132 case MAP7: 1133 decision = &MAP7_SYM; 1134 break; 1135 } 1136 1137 if (decision->opcodeDecisions[insnCtx] 1138 .modRMDecisions[insn->opcode] 1139 .modrm_type != MODRM_ONEENTRY) { 1140 if (readModRM(insn)) 1141 return -1; 1142 *instructionID = 1143 decode(insn->opcodeType, insnCtx, insn->opcode, insn->modRM); 1144 } else { 1145 *instructionID = decode(insn->opcodeType, insnCtx, insn->opcode, 0); 1146 } 1147 1148 return 0; 1149 } 1150 1151 static bool isCCMPOrCTEST(InternalInstruction *insn) { 1152 if (insn->opcodeType != MAP4) 1153 return false; 1154 if (insn->opcode == 0x83 && regFromModRM(insn->modRM) == 7) 1155 return true; 1156 switch (insn->opcode & 0xfe) { 1157 default: 1158 return false; 1159 case 0x38: 1160 case 0x3a: 1161 case 0x84: 1162 return true; 1163 case 0x80: 1164 return regFromModRM(insn->modRM) == 7; 1165 case 0xf6: 1166 return regFromModRM(insn->modRM) == 0; 1167 } 1168 } 1169 1170 static bool isNF(InternalInstruction *insn) { 1171 if (!nfFromEVEX4of4(insn->vectorExtensionPrefix[3])) 1172 return false; 1173 if (insn->opcodeType == MAP4) 1174 return true; 1175 // Below NF instructions are not in map4. 1176 if (insn->opcodeType == THREEBYTE_38 && 1177 ppFromEVEX3of4(insn->vectorExtensionPrefix[2]) == VEX_PREFIX_NONE) { 1178 switch (insn->opcode) { 1179 case 0xf2: // ANDN 1180 case 0xf3: // BLSI, BLSR, BLSMSK 1181 case 0xf5: // BZHI 1182 case 0xf7: // BEXTR 1183 return true; 1184 default: 1185 break; 1186 } 1187 } 1188 return false; 1189 } 1190 1191 // Determine the ID of an instruction, consuming the ModR/M byte as appropriate 1192 // for extended and escape opcodes. Determines the attributes and context for 1193 // the instruction before doing so. 1194 static int getInstructionID(struct InternalInstruction *insn, 1195 const MCInstrInfo *mii) { 1196 uint16_t attrMask; 1197 uint16_t instructionID; 1198 1199 LLVM_DEBUG(dbgs() << "getID()"); 1200 1201 attrMask = ATTR_NONE; 1202 1203 if (insn->mode == MODE_64BIT) 1204 attrMask |= ATTR_64BIT; 1205 1206 if (insn->vectorExtensionType != TYPE_NO_VEX_XOP) { 1207 attrMask |= (insn->vectorExtensionType == TYPE_EVEX) ? ATTR_EVEX : ATTR_VEX; 1208 1209 if (insn->vectorExtensionType == TYPE_EVEX) { 1210 switch (ppFromEVEX3of4(insn->vectorExtensionPrefix[2])) { 1211 case VEX_PREFIX_66: 1212 attrMask |= ATTR_OPSIZE; 1213 break; 1214 case VEX_PREFIX_F3: 1215 attrMask |= ATTR_XS; 1216 break; 1217 case VEX_PREFIX_F2: 1218 attrMask |= ATTR_XD; 1219 break; 1220 } 1221 1222 if (zFromEVEX4of4(insn->vectorExtensionPrefix[3])) 1223 attrMask |= ATTR_EVEXKZ; 1224 if (isNF(insn) && !readModRM(insn) && 1225 !isCCMPOrCTEST(insn)) // NF bit is the MSB of aaa. 1226 attrMask |= ATTR_EVEXNF; 1227 // aaa is not used a opmask in MAP4 1228 else if (aaaFromEVEX4of4(insn->vectorExtensionPrefix[3]) && 1229 (insn->opcodeType != MAP4)) 1230 attrMask |= ATTR_EVEXK; 1231 if (bFromEVEX4of4(insn->vectorExtensionPrefix[3])) { 1232 attrMask |= ATTR_EVEXB; 1233 if (uFromEVEX3of4(insn->vectorExtensionPrefix[2]) && !readModRM(insn) && 1234 modFromModRM(insn->modRM) == 3) 1235 attrMask |= ATTR_EVEXU; 1236 } 1237 if (lFromEVEX4of4(insn->vectorExtensionPrefix[3])) 1238 attrMask |= ATTR_VEXL; 1239 if (l2FromEVEX4of4(insn->vectorExtensionPrefix[3])) 1240 attrMask |= ATTR_EVEXL2; 1241 } else if (insn->vectorExtensionType == TYPE_VEX_3B) { 1242 switch (ppFromVEX3of3(insn->vectorExtensionPrefix[2])) { 1243 case VEX_PREFIX_66: 1244 attrMask |= ATTR_OPSIZE; 1245 break; 1246 case VEX_PREFIX_F3: 1247 attrMask |= ATTR_XS; 1248 break; 1249 case VEX_PREFIX_F2: 1250 attrMask |= ATTR_XD; 1251 break; 1252 } 1253 1254 if (lFromVEX3of3(insn->vectorExtensionPrefix[2])) 1255 attrMask |= ATTR_VEXL; 1256 } else if (insn->vectorExtensionType == TYPE_VEX_2B) { 1257 switch (ppFromVEX2of2(insn->vectorExtensionPrefix[1])) { 1258 case VEX_PREFIX_66: 1259 attrMask |= ATTR_OPSIZE; 1260 if (insn->hasAdSize) 1261 attrMask |= ATTR_ADSIZE; 1262 break; 1263 case VEX_PREFIX_F3: 1264 attrMask |= ATTR_XS; 1265 break; 1266 case VEX_PREFIX_F2: 1267 attrMask |= ATTR_XD; 1268 break; 1269 } 1270 1271 if (lFromVEX2of2(insn->vectorExtensionPrefix[1])) 1272 attrMask |= ATTR_VEXL; 1273 } else if (insn->vectorExtensionType == TYPE_XOP) { 1274 switch (ppFromXOP3of3(insn->vectorExtensionPrefix[2])) { 1275 case VEX_PREFIX_66: 1276 attrMask |= ATTR_OPSIZE; 1277 break; 1278 case VEX_PREFIX_F3: 1279 attrMask |= ATTR_XS; 1280 break; 1281 case VEX_PREFIX_F2: 1282 attrMask |= ATTR_XD; 1283 break; 1284 } 1285 1286 if (lFromXOP3of3(insn->vectorExtensionPrefix[2])) 1287 attrMask |= ATTR_VEXL; 1288 } else { 1289 return -1; 1290 } 1291 } else if (!insn->mandatoryPrefix) { 1292 // If we don't have mandatory prefix we should use legacy prefixes here 1293 if (insn->hasOpSize && (insn->mode != MODE_16BIT)) 1294 attrMask |= ATTR_OPSIZE; 1295 if (insn->hasAdSize) 1296 attrMask |= ATTR_ADSIZE; 1297 if (insn->opcodeType == ONEBYTE) { 1298 if (insn->repeatPrefix == 0xf3 && (insn->opcode == 0x90)) 1299 // Special support for PAUSE 1300 attrMask |= ATTR_XS; 1301 } else { 1302 if (insn->repeatPrefix == 0xf2) 1303 attrMask |= ATTR_XD; 1304 else if (insn->repeatPrefix == 0xf3) 1305 attrMask |= ATTR_XS; 1306 } 1307 } else { 1308 switch (insn->mandatoryPrefix) { 1309 case 0xf2: 1310 attrMask |= ATTR_XD; 1311 break; 1312 case 0xf3: 1313 attrMask |= ATTR_XS; 1314 break; 1315 case 0x66: 1316 if (insn->mode != MODE_16BIT) 1317 attrMask |= ATTR_OPSIZE; 1318 if (insn->hasAdSize) 1319 attrMask |= ATTR_ADSIZE; 1320 break; 1321 case 0x67: 1322 attrMask |= ATTR_ADSIZE; 1323 break; 1324 } 1325 } 1326 1327 if (insn->rexPrefix & 0x08) { 1328 attrMask |= ATTR_REXW; 1329 attrMask &= ~ATTR_ADSIZE; 1330 } 1331 1332 // Absolute jump and pushp/popp need special handling 1333 if (insn->rex2ExtensionPrefix[0] == 0xd5 && insn->opcodeType == ONEBYTE && 1334 (insn->opcode == 0xA1 || (insn->opcode & 0xf0) == 0x50)) 1335 attrMask |= ATTR_REX2; 1336 1337 if (insn->mode == MODE_16BIT) { 1338 // JCXZ/JECXZ need special handling for 16-bit mode because the meaning 1339 // of the AdSize prefix is inverted w.r.t. 32-bit mode. 1340 if (insn->opcodeType == ONEBYTE && insn->opcode == 0xE3) 1341 attrMask ^= ATTR_ADSIZE; 1342 // If we're in 16-bit mode and this is one of the relative jumps and opsize 1343 // prefix isn't present, we need to force the opsize attribute since the 1344 // prefix is inverted relative to 32-bit mode. 1345 if (!insn->hasOpSize && insn->opcodeType == ONEBYTE && 1346 (insn->opcode == 0xE8 || insn->opcode == 0xE9)) 1347 attrMask |= ATTR_OPSIZE; 1348 1349 if (!insn->hasOpSize && insn->opcodeType == TWOBYTE && 1350 insn->opcode >= 0x80 && insn->opcode <= 0x8F) 1351 attrMask |= ATTR_OPSIZE; 1352 } 1353 1354 1355 if (getInstructionIDWithAttrMask(&instructionID, insn, attrMask)) 1356 return -1; 1357 1358 // The following clauses compensate for limitations of the tables. 1359 1360 if (insn->mode != MODE_64BIT && 1361 insn->vectorExtensionType != TYPE_NO_VEX_XOP) { 1362 // The tables can't distinquish between cases where the W-bit is used to 1363 // select register size and cases where its a required part of the opcode. 1364 if ((insn->vectorExtensionType == TYPE_EVEX && 1365 wFromEVEX3of4(insn->vectorExtensionPrefix[2])) || 1366 (insn->vectorExtensionType == TYPE_VEX_3B && 1367 wFromVEX3of3(insn->vectorExtensionPrefix[2])) || 1368 (insn->vectorExtensionType == TYPE_XOP && 1369 wFromXOP3of3(insn->vectorExtensionPrefix[2]))) { 1370 1371 uint16_t instructionIDWithREXW; 1372 if (getInstructionIDWithAttrMask(&instructionIDWithREXW, insn, 1373 attrMask | ATTR_REXW)) { 1374 insn->instructionID = instructionID; 1375 insn->spec = &INSTRUCTIONS_SYM[instructionID]; 1376 return 0; 1377 } 1378 1379 auto SpecName = mii->getName(instructionIDWithREXW); 1380 // If not a 64-bit instruction. Switch the opcode. 1381 if (!is64Bit(SpecName.data())) { 1382 insn->instructionID = instructionIDWithREXW; 1383 insn->spec = &INSTRUCTIONS_SYM[instructionIDWithREXW]; 1384 return 0; 1385 } 1386 } 1387 } 1388 1389 // Absolute moves, umonitor, and movdir64b need special handling. 1390 // -For 16-bit mode because the meaning of the AdSize and OpSize prefixes are 1391 // inverted w.r.t. 1392 // -For 32-bit mode we need to ensure the ADSIZE prefix is observed in 1393 // any position. 1394 if ((insn->opcodeType == ONEBYTE && ((insn->opcode & 0xFC) == 0xA0)) || 1395 (insn->opcodeType == TWOBYTE && (insn->opcode == 0xAE)) || 1396 (insn->opcodeType == THREEBYTE_38 && insn->opcode == 0xF8) || 1397 (insn->opcodeType == MAP4 && insn->opcode == 0xF8)) { 1398 // Make sure we observed the prefixes in any position. 1399 if (insn->hasAdSize) 1400 attrMask |= ATTR_ADSIZE; 1401 if (insn->hasOpSize) 1402 attrMask |= ATTR_OPSIZE; 1403 1404 // In 16-bit, invert the attributes. 1405 if (insn->mode == MODE_16BIT) { 1406 attrMask ^= ATTR_ADSIZE; 1407 1408 // The OpSize attribute is only valid with the absolute moves. 1409 if (insn->opcodeType == ONEBYTE && ((insn->opcode & 0xFC) == 0xA0)) 1410 attrMask ^= ATTR_OPSIZE; 1411 } 1412 1413 if (getInstructionIDWithAttrMask(&instructionID, insn, attrMask)) 1414 return -1; 1415 1416 insn->instructionID = instructionID; 1417 insn->spec = &INSTRUCTIONS_SYM[instructionID]; 1418 return 0; 1419 } 1420 1421 if ((insn->mode == MODE_16BIT || insn->hasOpSize) && 1422 !(attrMask & ATTR_OPSIZE)) { 1423 // The instruction tables make no distinction between instructions that 1424 // allow OpSize anywhere (i.e., 16-bit operations) and that need it in a 1425 // particular spot (i.e., many MMX operations). In general we're 1426 // conservative, but in the specific case where OpSize is present but not in 1427 // the right place we check if there's a 16-bit operation. 1428 const struct InstructionSpecifier *spec; 1429 uint16_t instructionIDWithOpsize; 1430 llvm::StringRef specName, specWithOpSizeName; 1431 1432 spec = &INSTRUCTIONS_SYM[instructionID]; 1433 1434 if (getInstructionIDWithAttrMask(&instructionIDWithOpsize, insn, 1435 attrMask | ATTR_OPSIZE)) { 1436 // ModRM required with OpSize but not present. Give up and return the 1437 // version without OpSize set. 1438 insn->instructionID = instructionID; 1439 insn->spec = spec; 1440 return 0; 1441 } 1442 1443 specName = mii->getName(instructionID); 1444 specWithOpSizeName = mii->getName(instructionIDWithOpsize); 1445 1446 if (is16BitEquivalent(specName.data(), specWithOpSizeName.data()) && 1447 (insn->mode == MODE_16BIT) ^ insn->hasOpSize) { 1448 insn->instructionID = instructionIDWithOpsize; 1449 insn->spec = &INSTRUCTIONS_SYM[instructionIDWithOpsize]; 1450 } else { 1451 insn->instructionID = instructionID; 1452 insn->spec = spec; 1453 } 1454 return 0; 1455 } 1456 1457 if (insn->opcodeType == ONEBYTE && insn->opcode == 0x90 && 1458 insn->rexPrefix & 0x01) { 1459 // NOOP shouldn't decode as NOOP if REX.b is set. Instead it should decode 1460 // as XCHG %r8, %eax. 1461 const struct InstructionSpecifier *spec; 1462 uint16_t instructionIDWithNewOpcode; 1463 const struct InstructionSpecifier *specWithNewOpcode; 1464 1465 spec = &INSTRUCTIONS_SYM[instructionID]; 1466 1467 // Borrow opcode from one of the other XCHGar opcodes 1468 insn->opcode = 0x91; 1469 1470 if (getInstructionIDWithAttrMask(&instructionIDWithNewOpcode, insn, 1471 attrMask)) { 1472 insn->opcode = 0x90; 1473 1474 insn->instructionID = instructionID; 1475 insn->spec = spec; 1476 return 0; 1477 } 1478 1479 specWithNewOpcode = &INSTRUCTIONS_SYM[instructionIDWithNewOpcode]; 1480 1481 // Change back 1482 insn->opcode = 0x90; 1483 1484 insn->instructionID = instructionIDWithNewOpcode; 1485 insn->spec = specWithNewOpcode; 1486 1487 return 0; 1488 } 1489 1490 insn->instructionID = instructionID; 1491 insn->spec = &INSTRUCTIONS_SYM[insn->instructionID]; 1492 1493 return 0; 1494 } 1495 1496 // Read an operand from the opcode field of an instruction and interprets it 1497 // appropriately given the operand width. Handles AddRegFrm instructions. 1498 // 1499 // @param insn - the instruction whose opcode field is to be read. 1500 // @param size - The width (in bytes) of the register being specified. 1501 // 1 means AL and friends, 2 means AX, 4 means EAX, and 8 means 1502 // RAX. 1503 // @return - 0 on success; nonzero otherwise. 1504 static int readOpcodeRegister(struct InternalInstruction *insn, uint8_t size) { 1505 LLVM_DEBUG(dbgs() << "readOpcodeRegister()"); 1506 1507 if (size == 0) 1508 size = insn->registerSize; 1509 1510 auto setOpcodeRegister = [&](unsigned base) { 1511 insn->opcodeRegister = 1512 (Reg)(base + ((bFromREX(insn->rexPrefix) << 3) | 1513 (b2FromREX2(insn->rex2ExtensionPrefix[1]) << 4) | 1514 (insn->opcode & 7))); 1515 }; 1516 1517 switch (size) { 1518 case 1: 1519 setOpcodeRegister(MODRM_REG_AL); 1520 if (insn->rexPrefix && insn->opcodeRegister >= MODRM_REG_AL + 0x4 && 1521 insn->opcodeRegister < MODRM_REG_AL + 0x8) { 1522 insn->opcodeRegister = 1523 (Reg)(MODRM_REG_SPL + (insn->opcodeRegister - MODRM_REG_AL - 4)); 1524 } 1525 1526 break; 1527 case 2: 1528 setOpcodeRegister(MODRM_REG_AX); 1529 break; 1530 case 4: 1531 setOpcodeRegister(MODRM_REG_EAX); 1532 break; 1533 case 8: 1534 setOpcodeRegister(MODRM_REG_RAX); 1535 break; 1536 } 1537 1538 return 0; 1539 } 1540 1541 // Consume an immediate operand from an instruction, given the desired operand 1542 // size. 1543 // 1544 // @param insn - The instruction whose operand is to be read. 1545 // @param size - The width (in bytes) of the operand. 1546 // @return - 0 if the immediate was successfully consumed; nonzero 1547 // otherwise. 1548 static int readImmediate(struct InternalInstruction *insn, uint8_t size) { 1549 uint8_t imm8; 1550 uint16_t imm16; 1551 uint32_t imm32; 1552 uint64_t imm64; 1553 1554 LLVM_DEBUG(dbgs() << "readImmediate()"); 1555 1556 assert(insn->numImmediatesConsumed < 2 && "Already consumed two immediates"); 1557 1558 insn->immediateSize = size; 1559 insn->immediateOffset = insn->readerCursor - insn->startLocation; 1560 1561 switch (size) { 1562 case 1: 1563 if (consume(insn, imm8)) 1564 return -1; 1565 insn->immediates[insn->numImmediatesConsumed] = imm8; 1566 break; 1567 case 2: 1568 if (consume(insn, imm16)) 1569 return -1; 1570 insn->immediates[insn->numImmediatesConsumed] = imm16; 1571 break; 1572 case 4: 1573 if (consume(insn, imm32)) 1574 return -1; 1575 insn->immediates[insn->numImmediatesConsumed] = imm32; 1576 break; 1577 case 8: 1578 if (consume(insn, imm64)) 1579 return -1; 1580 insn->immediates[insn->numImmediatesConsumed] = imm64; 1581 break; 1582 default: 1583 llvm_unreachable("invalid size"); 1584 } 1585 1586 insn->numImmediatesConsumed++; 1587 1588 return 0; 1589 } 1590 1591 // Consume vvvv from an instruction if it has a VEX prefix. 1592 static int readVVVV(struct InternalInstruction *insn) { 1593 LLVM_DEBUG(dbgs() << "readVVVV()"); 1594 1595 int vvvv; 1596 if (insn->vectorExtensionType == TYPE_EVEX) 1597 vvvv = (v2FromEVEX4of4(insn->vectorExtensionPrefix[3]) << 4 | 1598 vvvvFromEVEX3of4(insn->vectorExtensionPrefix[2])); 1599 else if (insn->vectorExtensionType == TYPE_VEX_3B) 1600 vvvv = vvvvFromVEX3of3(insn->vectorExtensionPrefix[2]); 1601 else if (insn->vectorExtensionType == TYPE_VEX_2B) 1602 vvvv = vvvvFromVEX2of2(insn->vectorExtensionPrefix[1]); 1603 else if (insn->vectorExtensionType == TYPE_XOP) 1604 vvvv = vvvvFromXOP3of3(insn->vectorExtensionPrefix[2]); 1605 else 1606 return -1; 1607 1608 if (insn->mode != MODE_64BIT) 1609 vvvv &= 0xf; // Can only clear bit 4. Bit 3 must be cleared later. 1610 1611 insn->vvvv = static_cast<Reg>(vvvv); 1612 return 0; 1613 } 1614 1615 // Read an mask register from the opcode field of an instruction. 1616 // 1617 // @param insn - The instruction whose opcode field is to be read. 1618 // @return - 0 on success; nonzero otherwise. 1619 static int readMaskRegister(struct InternalInstruction *insn) { 1620 LLVM_DEBUG(dbgs() << "readMaskRegister()"); 1621 1622 if (insn->vectorExtensionType != TYPE_EVEX) 1623 return -1; 1624 1625 insn->writemask = 1626 static_cast<Reg>(aaaFromEVEX4of4(insn->vectorExtensionPrefix[3])); 1627 return 0; 1628 } 1629 1630 // Consults the specifier for an instruction and consumes all 1631 // operands for that instruction, interpreting them as it goes. 1632 static int readOperands(struct InternalInstruction *insn) { 1633 int hasVVVV, needVVVV; 1634 int sawRegImm = 0; 1635 1636 LLVM_DEBUG(dbgs() << "readOperands()"); 1637 1638 // If non-zero vvvv specified, make sure one of the operands uses it. 1639 hasVVVV = !readVVVV(insn); 1640 needVVVV = hasVVVV && (insn->vvvv != 0); 1641 1642 for (const auto &Op : x86OperandSets[insn->spec->operands]) { 1643 switch (Op.encoding) { 1644 case ENCODING_NONE: 1645 case ENCODING_SI: 1646 case ENCODING_DI: 1647 break; 1648 CASE_ENCODING_VSIB: 1649 // VSIB can use the V2 bit so check only the other bits. 1650 if (needVVVV) 1651 needVVVV = hasVVVV & ((insn->vvvv & 0xf) != 0); 1652 if (readModRM(insn)) 1653 return -1; 1654 1655 // Reject if SIB wasn't used. 1656 if (insn->eaBase != EA_BASE_sib && insn->eaBase != EA_BASE_sib64) 1657 return -1; 1658 1659 // If sibIndex was set to SIB_INDEX_NONE, index offset is 4. 1660 if (insn->sibIndex == SIB_INDEX_NONE) 1661 insn->sibIndex = (SIBIndex)(insn->sibIndexBase + 4); 1662 1663 // If EVEX.v2 is set this is one of the 16-31 registers. 1664 if (insn->vectorExtensionType == TYPE_EVEX && insn->mode == MODE_64BIT && 1665 v2FromEVEX4of4(insn->vectorExtensionPrefix[3])) 1666 insn->sibIndex = (SIBIndex)(insn->sibIndex + 16); 1667 1668 // Adjust the index register to the correct size. 1669 switch ((OperandType)Op.type) { 1670 default: 1671 debug("Unhandled VSIB index type"); 1672 return -1; 1673 case TYPE_MVSIBX: 1674 insn->sibIndex = 1675 (SIBIndex)(SIB_INDEX_XMM0 + (insn->sibIndex - insn->sibIndexBase)); 1676 break; 1677 case TYPE_MVSIBY: 1678 insn->sibIndex = 1679 (SIBIndex)(SIB_INDEX_YMM0 + (insn->sibIndex - insn->sibIndexBase)); 1680 break; 1681 case TYPE_MVSIBZ: 1682 insn->sibIndex = 1683 (SIBIndex)(SIB_INDEX_ZMM0 + (insn->sibIndex - insn->sibIndexBase)); 1684 break; 1685 } 1686 1687 // Apply the AVX512 compressed displacement scaling factor. 1688 if (Op.encoding != ENCODING_REG && insn->eaDisplacement == EA_DISP_8) 1689 insn->displacement *= 1 << (Op.encoding - ENCODING_VSIB); 1690 break; 1691 case ENCODING_SIB: 1692 // Reject if SIB wasn't used. 1693 if (insn->eaBase != EA_BASE_sib && insn->eaBase != EA_BASE_sib64) 1694 return -1; 1695 if (readModRM(insn)) 1696 return -1; 1697 if (fixupReg(insn, &Op)) 1698 return -1; 1699 break; 1700 case ENCODING_REG: 1701 CASE_ENCODING_RM: 1702 if (readModRM(insn)) 1703 return -1; 1704 if (fixupReg(insn, &Op)) 1705 return -1; 1706 // Apply the AVX512 compressed displacement scaling factor. 1707 if (Op.encoding != ENCODING_REG && insn->eaDisplacement == EA_DISP_8) 1708 insn->displacement *= 1 << (Op.encoding - ENCODING_RM); 1709 break; 1710 case ENCODING_IB: 1711 if (sawRegImm) { 1712 // Saw a register immediate so don't read again and instead split the 1713 // previous immediate. FIXME: This is a hack. 1714 insn->immediates[insn->numImmediatesConsumed] = 1715 insn->immediates[insn->numImmediatesConsumed - 1] & 0xf; 1716 ++insn->numImmediatesConsumed; 1717 break; 1718 } 1719 if (readImmediate(insn, 1)) 1720 return -1; 1721 if (Op.type == TYPE_XMM || Op.type == TYPE_YMM) 1722 sawRegImm = 1; 1723 break; 1724 case ENCODING_IW: 1725 if (readImmediate(insn, 2)) 1726 return -1; 1727 break; 1728 case ENCODING_ID: 1729 if (readImmediate(insn, 4)) 1730 return -1; 1731 break; 1732 case ENCODING_IO: 1733 if (readImmediate(insn, 8)) 1734 return -1; 1735 break; 1736 case ENCODING_Iv: 1737 if (readImmediate(insn, insn->immediateSize)) 1738 return -1; 1739 break; 1740 case ENCODING_Ia: 1741 if (readImmediate(insn, insn->addressSize)) 1742 return -1; 1743 break; 1744 case ENCODING_IRC: 1745 insn->RC = (l2FromEVEX4of4(insn->vectorExtensionPrefix[3]) << 1) | 1746 lFromEVEX4of4(insn->vectorExtensionPrefix[3]); 1747 break; 1748 case ENCODING_RB: 1749 if (readOpcodeRegister(insn, 1)) 1750 return -1; 1751 break; 1752 case ENCODING_RW: 1753 if (readOpcodeRegister(insn, 2)) 1754 return -1; 1755 break; 1756 case ENCODING_RD: 1757 if (readOpcodeRegister(insn, 4)) 1758 return -1; 1759 break; 1760 case ENCODING_RO: 1761 if (readOpcodeRegister(insn, 8)) 1762 return -1; 1763 break; 1764 case ENCODING_Rv: 1765 if (readOpcodeRegister(insn, 0)) 1766 return -1; 1767 break; 1768 case ENCODING_CF: 1769 insn->immediates[1] = oszcFromEVEX3of4(insn->vectorExtensionPrefix[2]); 1770 needVVVV = false; // oszc shares the same bits with VVVV 1771 break; 1772 case ENCODING_CC: 1773 if (isCCMPOrCTEST(insn)) 1774 insn->immediates[2] = scFromEVEX4of4(insn->vectorExtensionPrefix[3]); 1775 else 1776 insn->immediates[1] = insn->opcode & 0xf; 1777 break; 1778 case ENCODING_FP: 1779 break; 1780 case ENCODING_VVVV: 1781 needVVVV = 0; // Mark that we have found a VVVV operand. 1782 if (!hasVVVV) 1783 return -1; 1784 if (insn->mode != MODE_64BIT) 1785 insn->vvvv = static_cast<Reg>(insn->vvvv & 0x7); 1786 if (fixupReg(insn, &Op)) 1787 return -1; 1788 break; 1789 case ENCODING_WRITEMASK: 1790 if (readMaskRegister(insn)) 1791 return -1; 1792 break; 1793 case ENCODING_DUP: 1794 break; 1795 default: 1796 LLVM_DEBUG(dbgs() << "Encountered an operand with an unknown encoding."); 1797 return -1; 1798 } 1799 } 1800 1801 // If we didn't find ENCODING_VVVV operand, but non-zero vvvv present, fail 1802 if (needVVVV) 1803 return -1; 1804 1805 return 0; 1806 } 1807 1808 namespace llvm { 1809 1810 // Fill-ins to make the compiler happy. These constants are never actually 1811 // assigned; they are just filler to make an automatically-generated switch 1812 // statement work. 1813 namespace X86 { 1814 enum { 1815 BX_SI = 500, 1816 BX_DI = 501, 1817 BP_SI = 502, 1818 BP_DI = 503, 1819 sib = 504, 1820 sib64 = 505 1821 }; 1822 } // namespace X86 1823 1824 } // namespace llvm 1825 1826 static bool translateInstruction(MCInst &target, 1827 InternalInstruction &source, 1828 const MCDisassembler *Dis); 1829 1830 namespace { 1831 1832 /// Generic disassembler for all X86 platforms. All each platform class should 1833 /// have to do is subclass the constructor, and provide a different 1834 /// disassemblerMode value. 1835 class X86GenericDisassembler : public MCDisassembler { 1836 std::unique_ptr<const MCInstrInfo> MII; 1837 public: 1838 X86GenericDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx, 1839 std::unique_ptr<const MCInstrInfo> MII); 1840 public: 1841 DecodeStatus getInstruction(MCInst &instr, uint64_t &size, 1842 ArrayRef<uint8_t> Bytes, uint64_t Address, 1843 raw_ostream &cStream) const override; 1844 1845 private: 1846 DisassemblerMode fMode; 1847 }; 1848 1849 } // namespace 1850 1851 X86GenericDisassembler::X86GenericDisassembler( 1852 const MCSubtargetInfo &STI, 1853 MCContext &Ctx, 1854 std::unique_ptr<const MCInstrInfo> MII) 1855 : MCDisassembler(STI, Ctx), MII(std::move(MII)) { 1856 const FeatureBitset &FB = STI.getFeatureBits(); 1857 if (FB[X86::Is16Bit]) { 1858 fMode = MODE_16BIT; 1859 return; 1860 } else if (FB[X86::Is32Bit]) { 1861 fMode = MODE_32BIT; 1862 return; 1863 } else if (FB[X86::Is64Bit]) { 1864 fMode = MODE_64BIT; 1865 return; 1866 } 1867 1868 llvm_unreachable("Invalid CPU mode"); 1869 } 1870 1871 MCDisassembler::DecodeStatus X86GenericDisassembler::getInstruction( 1872 MCInst &Instr, uint64_t &Size, ArrayRef<uint8_t> Bytes, uint64_t Address, 1873 raw_ostream &CStream) const { 1874 CommentStream = &CStream; 1875 1876 InternalInstruction Insn; 1877 memset(&Insn, 0, sizeof(InternalInstruction)); 1878 Insn.bytes = Bytes; 1879 Insn.startLocation = Address; 1880 Insn.readerCursor = Address; 1881 Insn.mode = fMode; 1882 1883 if (Bytes.empty() || readPrefixes(&Insn) || readOpcode(&Insn) || 1884 getInstructionID(&Insn, MII.get()) || Insn.instructionID == 0 || 1885 readOperands(&Insn)) { 1886 Size = Insn.readerCursor - Address; 1887 return Fail; 1888 } 1889 1890 Insn.operands = x86OperandSets[Insn.spec->operands]; 1891 Insn.length = Insn.readerCursor - Insn.startLocation; 1892 Size = Insn.length; 1893 if (Size > 15) 1894 LLVM_DEBUG(dbgs() << "Instruction exceeds 15-byte limit"); 1895 1896 bool Ret = translateInstruction(Instr, Insn, this); 1897 if (!Ret) { 1898 unsigned Flags = X86::IP_NO_PREFIX; 1899 if (Insn.hasAdSize) 1900 Flags |= X86::IP_HAS_AD_SIZE; 1901 if (!Insn.mandatoryPrefix) { 1902 if (Insn.hasOpSize) 1903 Flags |= X86::IP_HAS_OP_SIZE; 1904 if (Insn.repeatPrefix == 0xf2) 1905 Flags |= X86::IP_HAS_REPEAT_NE; 1906 else if (Insn.repeatPrefix == 0xf3 && 1907 // It should not be 'pause' f3 90 1908 Insn.opcode != 0x90) 1909 Flags |= X86::IP_HAS_REPEAT; 1910 if (Insn.hasLockPrefix) 1911 Flags |= X86::IP_HAS_LOCK; 1912 } 1913 Instr.setFlags(Flags); 1914 } 1915 return (!Ret) ? Success : Fail; 1916 } 1917 1918 // 1919 // Private code that translates from struct InternalInstructions to MCInsts. 1920 // 1921 1922 /// translateRegister - Translates an internal register to the appropriate LLVM 1923 /// register, and appends it as an operand to an MCInst. 1924 /// 1925 /// @param mcInst - The MCInst to append to. 1926 /// @param reg - The Reg to append. 1927 static void translateRegister(MCInst &mcInst, Reg reg) { 1928 #define ENTRY(x) X86::x, 1929 static constexpr MCPhysReg llvmRegnums[] = {ALL_REGS}; 1930 #undef ENTRY 1931 1932 MCPhysReg llvmRegnum = llvmRegnums[reg]; 1933 mcInst.addOperand(MCOperand::createReg(llvmRegnum)); 1934 } 1935 1936 static const uint8_t segmentRegnums[SEG_OVERRIDE_max] = { 1937 0, // SEG_OVERRIDE_NONE 1938 X86::CS, 1939 X86::SS, 1940 X86::DS, 1941 X86::ES, 1942 X86::FS, 1943 X86::GS 1944 }; 1945 1946 /// translateSrcIndex - Appends a source index operand to an MCInst. 1947 /// 1948 /// @param mcInst - The MCInst to append to. 1949 /// @param insn - The internal instruction. 1950 static bool translateSrcIndex(MCInst &mcInst, InternalInstruction &insn) { 1951 unsigned baseRegNo; 1952 1953 if (insn.mode == MODE_64BIT) 1954 baseRegNo = insn.hasAdSize ? X86::ESI : X86::RSI; 1955 else if (insn.mode == MODE_32BIT) 1956 baseRegNo = insn.hasAdSize ? X86::SI : X86::ESI; 1957 else { 1958 assert(insn.mode == MODE_16BIT); 1959 baseRegNo = insn.hasAdSize ? X86::ESI : X86::SI; 1960 } 1961 MCOperand baseReg = MCOperand::createReg(baseRegNo); 1962 mcInst.addOperand(baseReg); 1963 1964 MCOperand segmentReg; 1965 segmentReg = MCOperand::createReg(segmentRegnums[insn.segmentOverride]); 1966 mcInst.addOperand(segmentReg); 1967 return false; 1968 } 1969 1970 /// translateDstIndex - Appends a destination index operand to an MCInst. 1971 /// 1972 /// @param mcInst - The MCInst to append to. 1973 /// @param insn - The internal instruction. 1974 1975 static bool translateDstIndex(MCInst &mcInst, InternalInstruction &insn) { 1976 unsigned baseRegNo; 1977 1978 if (insn.mode == MODE_64BIT) 1979 baseRegNo = insn.hasAdSize ? X86::EDI : X86::RDI; 1980 else if (insn.mode == MODE_32BIT) 1981 baseRegNo = insn.hasAdSize ? X86::DI : X86::EDI; 1982 else { 1983 assert(insn.mode == MODE_16BIT); 1984 baseRegNo = insn.hasAdSize ? X86::EDI : X86::DI; 1985 } 1986 MCOperand baseReg = MCOperand::createReg(baseRegNo); 1987 mcInst.addOperand(baseReg); 1988 return false; 1989 } 1990 1991 /// translateImmediate - Appends an immediate operand to an MCInst. 1992 /// 1993 /// @param mcInst - The MCInst to append to. 1994 /// @param immediate - The immediate value to append. 1995 /// @param operand - The operand, as stored in the descriptor table. 1996 /// @param insn - The internal instruction. 1997 static void translateImmediate(MCInst &mcInst, uint64_t immediate, 1998 const OperandSpecifier &operand, 1999 InternalInstruction &insn, 2000 const MCDisassembler *Dis) { 2001 // Sign-extend the immediate if necessary. 2002 2003 OperandType type = (OperandType)operand.type; 2004 2005 bool isBranch = false; 2006 uint64_t pcrel = 0; 2007 if (type == TYPE_REL) { 2008 isBranch = true; 2009 pcrel = insn.startLocation + insn.length; 2010 switch (operand.encoding) { 2011 default: 2012 break; 2013 case ENCODING_Iv: 2014 switch (insn.displacementSize) { 2015 default: 2016 break; 2017 case 1: 2018 if(immediate & 0x80) 2019 immediate |= ~(0xffull); 2020 break; 2021 case 2: 2022 if(immediate & 0x8000) 2023 immediate |= ~(0xffffull); 2024 break; 2025 case 4: 2026 if(immediate & 0x80000000) 2027 immediate |= ~(0xffffffffull); 2028 break; 2029 case 8: 2030 break; 2031 } 2032 break; 2033 case ENCODING_IB: 2034 if(immediate & 0x80) 2035 immediate |= ~(0xffull); 2036 break; 2037 case ENCODING_IW: 2038 if(immediate & 0x8000) 2039 immediate |= ~(0xffffull); 2040 break; 2041 case ENCODING_ID: 2042 if(immediate & 0x80000000) 2043 immediate |= ~(0xffffffffull); 2044 break; 2045 } 2046 } 2047 // By default sign-extend all X86 immediates based on their encoding. 2048 else if (type == TYPE_IMM) { 2049 switch (operand.encoding) { 2050 default: 2051 break; 2052 case ENCODING_IB: 2053 if(immediate & 0x80) 2054 immediate |= ~(0xffull); 2055 break; 2056 case ENCODING_IW: 2057 if(immediate & 0x8000) 2058 immediate |= ~(0xffffull); 2059 break; 2060 case ENCODING_ID: 2061 if(immediate & 0x80000000) 2062 immediate |= ~(0xffffffffull); 2063 break; 2064 case ENCODING_IO: 2065 break; 2066 } 2067 } 2068 2069 switch (type) { 2070 case TYPE_XMM: 2071 mcInst.addOperand(MCOperand::createReg(X86::XMM0 + (immediate >> 4))); 2072 return; 2073 case TYPE_YMM: 2074 mcInst.addOperand(MCOperand::createReg(X86::YMM0 + (immediate >> 4))); 2075 return; 2076 case TYPE_ZMM: 2077 mcInst.addOperand(MCOperand::createReg(X86::ZMM0 + (immediate >> 4))); 2078 return; 2079 default: 2080 // operand is 64 bits wide. Do nothing. 2081 break; 2082 } 2083 2084 if (!Dis->tryAddingSymbolicOperand( 2085 mcInst, immediate + pcrel, insn.startLocation, isBranch, 2086 insn.immediateOffset, insn.immediateSize, insn.length)) 2087 mcInst.addOperand(MCOperand::createImm(immediate)); 2088 2089 if (type == TYPE_MOFFS) { 2090 MCOperand segmentReg; 2091 segmentReg = MCOperand::createReg(segmentRegnums[insn.segmentOverride]); 2092 mcInst.addOperand(segmentReg); 2093 } 2094 } 2095 2096 /// translateRMRegister - Translates a register stored in the R/M field of the 2097 /// ModR/M byte to its LLVM equivalent and appends it to an MCInst. 2098 /// @param mcInst - The MCInst to append to. 2099 /// @param insn - The internal instruction to extract the R/M field 2100 /// from. 2101 /// @return - 0 on success; -1 otherwise 2102 static bool translateRMRegister(MCInst &mcInst, 2103 InternalInstruction &insn) { 2104 if (insn.eaBase == EA_BASE_sib || insn.eaBase == EA_BASE_sib64) { 2105 debug("A R/M register operand may not have a SIB byte"); 2106 return true; 2107 } 2108 2109 switch (insn.eaBase) { 2110 default: 2111 debug("Unexpected EA base register"); 2112 return true; 2113 case EA_BASE_NONE: 2114 debug("EA_BASE_NONE for ModR/M base"); 2115 return true; 2116 #define ENTRY(x) case EA_BASE_##x: 2117 ALL_EA_BASES 2118 #undef ENTRY 2119 debug("A R/M register operand may not have a base; " 2120 "the operand must be a register."); 2121 return true; 2122 #define ENTRY(x) \ 2123 case EA_REG_##x: \ 2124 mcInst.addOperand(MCOperand::createReg(X86::x)); break; 2125 ALL_REGS 2126 #undef ENTRY 2127 } 2128 2129 return false; 2130 } 2131 2132 /// translateRMMemory - Translates a memory operand stored in the Mod and R/M 2133 /// fields of an internal instruction (and possibly its SIB byte) to a memory 2134 /// operand in LLVM's format, and appends it to an MCInst. 2135 /// 2136 /// @param mcInst - The MCInst to append to. 2137 /// @param insn - The instruction to extract Mod, R/M, and SIB fields 2138 /// from. 2139 /// @param ForceSIB - The instruction must use SIB. 2140 /// @return - 0 on success; nonzero otherwise 2141 static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn, 2142 const MCDisassembler *Dis, 2143 bool ForceSIB = false) { 2144 // Addresses in an MCInst are represented as five operands: 2145 // 1. basereg (register) The R/M base, or (if there is a SIB) the 2146 // SIB base 2147 // 2. scaleamount (immediate) 1, or (if there is a SIB) the specified 2148 // scale amount 2149 // 3. indexreg (register) x86_registerNONE, or (if there is a SIB) 2150 // the index (which is multiplied by the 2151 // scale amount) 2152 // 4. displacement (immediate) 0, or the displacement if there is one 2153 // 5. segmentreg (register) x86_registerNONE for now, but could be set 2154 // if we have segment overrides 2155 2156 MCOperand baseReg; 2157 MCOperand scaleAmount; 2158 MCOperand indexReg; 2159 MCOperand displacement; 2160 MCOperand segmentReg; 2161 uint64_t pcrel = 0; 2162 2163 if (insn.eaBase == EA_BASE_sib || insn.eaBase == EA_BASE_sib64) { 2164 if (insn.sibBase != SIB_BASE_NONE) { 2165 switch (insn.sibBase) { 2166 default: 2167 debug("Unexpected sibBase"); 2168 return true; 2169 #define ENTRY(x) \ 2170 case SIB_BASE_##x: \ 2171 baseReg = MCOperand::createReg(X86::x); break; 2172 ALL_SIB_BASES 2173 #undef ENTRY 2174 } 2175 } else { 2176 baseReg = MCOperand::createReg(X86::NoRegister); 2177 } 2178 2179 if (insn.sibIndex != SIB_INDEX_NONE) { 2180 switch (insn.sibIndex) { 2181 default: 2182 debug("Unexpected sibIndex"); 2183 return true; 2184 #define ENTRY(x) \ 2185 case SIB_INDEX_##x: \ 2186 indexReg = MCOperand::createReg(X86::x); break; 2187 EA_BASES_32BIT 2188 EA_BASES_64BIT 2189 REGS_XMM 2190 REGS_YMM 2191 REGS_ZMM 2192 #undef ENTRY 2193 } 2194 } else { 2195 // Use EIZ/RIZ for a few ambiguous cases where the SIB byte is present, 2196 // but no index is used and modrm alone should have been enough. 2197 // -No base register in 32-bit mode. In 64-bit mode this is used to 2198 // avoid rip-relative addressing. 2199 // -Any base register used other than ESP/RSP/R12D/R12. Using these as a 2200 // base always requires a SIB byte. 2201 // -A scale other than 1 is used. 2202 if (!ForceSIB && 2203 (insn.sibScale != 1 || 2204 (insn.sibBase == SIB_BASE_NONE && insn.mode != MODE_64BIT) || 2205 (insn.sibBase != SIB_BASE_NONE && 2206 insn.sibBase != SIB_BASE_ESP && insn.sibBase != SIB_BASE_RSP && 2207 insn.sibBase != SIB_BASE_R12D && insn.sibBase != SIB_BASE_R12))) { 2208 indexReg = MCOperand::createReg(insn.addressSize == 4 ? X86::EIZ : 2209 X86::RIZ); 2210 } else 2211 indexReg = MCOperand::createReg(X86::NoRegister); 2212 } 2213 2214 scaleAmount = MCOperand::createImm(insn.sibScale); 2215 } else { 2216 switch (insn.eaBase) { 2217 case EA_BASE_NONE: 2218 if (insn.eaDisplacement == EA_DISP_NONE) { 2219 debug("EA_BASE_NONE and EA_DISP_NONE for ModR/M base"); 2220 return true; 2221 } 2222 if (insn.mode == MODE_64BIT){ 2223 pcrel = insn.startLocation + insn.length; 2224 Dis->tryAddingPcLoadReferenceComment(insn.displacement + pcrel, 2225 insn.startLocation + 2226 insn.displacementOffset); 2227 // Section 2.2.1.6 2228 baseReg = MCOperand::createReg(insn.addressSize == 4 ? X86::EIP : 2229 X86::RIP); 2230 } 2231 else 2232 baseReg = MCOperand::createReg(X86::NoRegister); 2233 2234 indexReg = MCOperand::createReg(X86::NoRegister); 2235 break; 2236 case EA_BASE_BX_SI: 2237 baseReg = MCOperand::createReg(X86::BX); 2238 indexReg = MCOperand::createReg(X86::SI); 2239 break; 2240 case EA_BASE_BX_DI: 2241 baseReg = MCOperand::createReg(X86::BX); 2242 indexReg = MCOperand::createReg(X86::DI); 2243 break; 2244 case EA_BASE_BP_SI: 2245 baseReg = MCOperand::createReg(X86::BP); 2246 indexReg = MCOperand::createReg(X86::SI); 2247 break; 2248 case EA_BASE_BP_DI: 2249 baseReg = MCOperand::createReg(X86::BP); 2250 indexReg = MCOperand::createReg(X86::DI); 2251 break; 2252 default: 2253 indexReg = MCOperand::createReg(X86::NoRegister); 2254 switch (insn.eaBase) { 2255 default: 2256 debug("Unexpected eaBase"); 2257 return true; 2258 // Here, we will use the fill-ins defined above. However, 2259 // BX_SI, BX_DI, BP_SI, and BP_DI are all handled above and 2260 // sib and sib64 were handled in the top-level if, so they're only 2261 // placeholders to keep the compiler happy. 2262 #define ENTRY(x) \ 2263 case EA_BASE_##x: \ 2264 baseReg = MCOperand::createReg(X86::x); break; 2265 ALL_EA_BASES 2266 #undef ENTRY 2267 #define ENTRY(x) case EA_REG_##x: 2268 ALL_REGS 2269 #undef ENTRY 2270 debug("A R/M memory operand may not be a register; " 2271 "the base field must be a base."); 2272 return true; 2273 } 2274 } 2275 2276 scaleAmount = MCOperand::createImm(1); 2277 } 2278 2279 displacement = MCOperand::createImm(insn.displacement); 2280 2281 segmentReg = MCOperand::createReg(segmentRegnums[insn.segmentOverride]); 2282 2283 mcInst.addOperand(baseReg); 2284 mcInst.addOperand(scaleAmount); 2285 mcInst.addOperand(indexReg); 2286 2287 const uint8_t dispSize = 2288 (insn.eaDisplacement == EA_DISP_NONE) ? 0 : insn.displacementSize; 2289 2290 if (!Dis->tryAddingSymbolicOperand( 2291 mcInst, insn.displacement + pcrel, insn.startLocation, false, 2292 insn.displacementOffset, dispSize, insn.length)) 2293 mcInst.addOperand(displacement); 2294 mcInst.addOperand(segmentReg); 2295 return false; 2296 } 2297 2298 /// translateRM - Translates an operand stored in the R/M (and possibly SIB) 2299 /// byte of an instruction to LLVM form, and appends it to an MCInst. 2300 /// 2301 /// @param mcInst - The MCInst to append to. 2302 /// @param operand - The operand, as stored in the descriptor table. 2303 /// @param insn - The instruction to extract Mod, R/M, and SIB fields 2304 /// from. 2305 /// @return - 0 on success; nonzero otherwise 2306 static bool translateRM(MCInst &mcInst, const OperandSpecifier &operand, 2307 InternalInstruction &insn, const MCDisassembler *Dis) { 2308 switch (operand.type) { 2309 default: 2310 debug("Unexpected type for a R/M operand"); 2311 return true; 2312 case TYPE_R8: 2313 case TYPE_R16: 2314 case TYPE_R32: 2315 case TYPE_R64: 2316 case TYPE_Rv: 2317 case TYPE_MM64: 2318 case TYPE_XMM: 2319 case TYPE_YMM: 2320 case TYPE_ZMM: 2321 case TYPE_TMM: 2322 case TYPE_TMM_PAIR: 2323 case TYPE_VK_PAIR: 2324 case TYPE_VK: 2325 case TYPE_DEBUGREG: 2326 case TYPE_CONTROLREG: 2327 case TYPE_BNDR: 2328 return translateRMRegister(mcInst, insn); 2329 case TYPE_M: 2330 case TYPE_MVSIBX: 2331 case TYPE_MVSIBY: 2332 case TYPE_MVSIBZ: 2333 return translateRMMemory(mcInst, insn, Dis); 2334 case TYPE_MSIB: 2335 return translateRMMemory(mcInst, insn, Dis, true); 2336 } 2337 } 2338 2339 /// translateFPRegister - Translates a stack position on the FPU stack to its 2340 /// LLVM form, and appends it to an MCInst. 2341 /// 2342 /// @param mcInst - The MCInst to append to. 2343 /// @param stackPos - The stack position to translate. 2344 static void translateFPRegister(MCInst &mcInst, 2345 uint8_t stackPos) { 2346 mcInst.addOperand(MCOperand::createReg(X86::ST0 + stackPos)); 2347 } 2348 2349 /// translateMaskRegister - Translates a 3-bit mask register number to 2350 /// LLVM form, and appends it to an MCInst. 2351 /// 2352 /// @param mcInst - The MCInst to append to. 2353 /// @param maskRegNum - Number of mask register from 0 to 7. 2354 /// @return - false on success; true otherwise. 2355 static bool translateMaskRegister(MCInst &mcInst, 2356 uint8_t maskRegNum) { 2357 if (maskRegNum >= 8) { 2358 debug("Invalid mask register number"); 2359 return true; 2360 } 2361 2362 mcInst.addOperand(MCOperand::createReg(X86::K0 + maskRegNum)); 2363 return false; 2364 } 2365 2366 /// translateOperand - Translates an operand stored in an internal instruction 2367 /// to LLVM's format and appends it to an MCInst. 2368 /// 2369 /// @param mcInst - The MCInst to append to. 2370 /// @param operand - The operand, as stored in the descriptor table. 2371 /// @param insn - The internal instruction. 2372 /// @return - false on success; true otherwise. 2373 static bool translateOperand(MCInst &mcInst, const OperandSpecifier &operand, 2374 InternalInstruction &insn, 2375 const MCDisassembler *Dis) { 2376 switch (operand.encoding) { 2377 default: 2378 debug("Unhandled operand encoding during translation"); 2379 return true; 2380 case ENCODING_REG: 2381 translateRegister(mcInst, insn.reg); 2382 return false; 2383 case ENCODING_WRITEMASK: 2384 return translateMaskRegister(mcInst, insn.writemask); 2385 case ENCODING_SIB: 2386 CASE_ENCODING_RM: 2387 CASE_ENCODING_VSIB: 2388 return translateRM(mcInst, operand, insn, Dis); 2389 case ENCODING_IB: 2390 case ENCODING_IW: 2391 case ENCODING_ID: 2392 case ENCODING_IO: 2393 case ENCODING_Iv: 2394 case ENCODING_Ia: 2395 translateImmediate(mcInst, 2396 insn.immediates[insn.numImmediatesTranslated++], 2397 operand, 2398 insn, 2399 Dis); 2400 return false; 2401 case ENCODING_IRC: 2402 mcInst.addOperand(MCOperand::createImm(insn.RC)); 2403 return false; 2404 case ENCODING_SI: 2405 return translateSrcIndex(mcInst, insn); 2406 case ENCODING_DI: 2407 return translateDstIndex(mcInst, insn); 2408 case ENCODING_RB: 2409 case ENCODING_RW: 2410 case ENCODING_RD: 2411 case ENCODING_RO: 2412 case ENCODING_Rv: 2413 translateRegister(mcInst, insn.opcodeRegister); 2414 return false; 2415 case ENCODING_CF: 2416 mcInst.addOperand(MCOperand::createImm(insn.immediates[1])); 2417 return false; 2418 case ENCODING_CC: 2419 if (isCCMPOrCTEST(&insn)) 2420 mcInst.addOperand(MCOperand::createImm(insn.immediates[2])); 2421 else 2422 mcInst.addOperand(MCOperand::createImm(insn.immediates[1])); 2423 return false; 2424 case ENCODING_FP: 2425 translateFPRegister(mcInst, insn.modRM & 7); 2426 return false; 2427 case ENCODING_VVVV: 2428 translateRegister(mcInst, insn.vvvv); 2429 return false; 2430 case ENCODING_DUP: 2431 return translateOperand(mcInst, insn.operands[operand.type - TYPE_DUP0], 2432 insn, Dis); 2433 } 2434 } 2435 2436 /// translateInstruction - Translates an internal instruction and all its 2437 /// operands to an MCInst. 2438 /// 2439 /// @param mcInst - The MCInst to populate with the instruction's data. 2440 /// @param insn - The internal instruction. 2441 /// @return - false on success; true otherwise. 2442 static bool translateInstruction(MCInst &mcInst, 2443 InternalInstruction &insn, 2444 const MCDisassembler *Dis) { 2445 if (!insn.spec) { 2446 debug("Instruction has no specification"); 2447 return true; 2448 } 2449 2450 mcInst.clear(); 2451 mcInst.setOpcode(insn.instructionID); 2452 // If when reading the prefix bytes we determined the overlapping 0xf2 or 0xf3 2453 // prefix bytes should be disassembled as xrelease and xacquire then set the 2454 // opcode to those instead of the rep and repne opcodes. 2455 if (insn.xAcquireRelease) { 2456 if(mcInst.getOpcode() == X86::REP_PREFIX) 2457 mcInst.setOpcode(X86::XRELEASE_PREFIX); 2458 else if(mcInst.getOpcode() == X86::REPNE_PREFIX) 2459 mcInst.setOpcode(X86::XACQUIRE_PREFIX); 2460 } 2461 2462 insn.numImmediatesTranslated = 0; 2463 2464 for (const auto &Op : insn.operands) { 2465 if (Op.encoding != ENCODING_NONE) { 2466 if (translateOperand(mcInst, Op, insn, Dis)) { 2467 return true; 2468 } 2469 } 2470 } 2471 2472 return false; 2473 } 2474 2475 static MCDisassembler *createX86Disassembler(const Target &T, 2476 const MCSubtargetInfo &STI, 2477 MCContext &Ctx) { 2478 std::unique_ptr<const MCInstrInfo> MII(T.createMCInstrInfo()); 2479 return new X86GenericDisassembler(STI, Ctx, std::move(MII)); 2480 } 2481 2482 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeX86Disassembler() { 2483 // Register the disassembler. 2484 TargetRegistry::RegisterMCDisassembler(getTheX86_32Target(), 2485 createX86Disassembler); 2486 TargetRegistry::RegisterMCDisassembler(getTheX86_64Target(), 2487 createX86Disassembler); 2488 } 2489