1 /* $NetBSD: libnvmm_x86.c,v 1.31 2019/06/08 07:27:44 maxv Exp $ */ 2 3 /* 4 * Copyright (c) 2018 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Maxime Villard. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 #include <sys/cdefs.h> 33 34 #include <stdio.h> 35 #include <stdlib.h> 36 #include <string.h> 37 #include <unistd.h> 38 #include <fcntl.h> 39 #include <errno.h> 40 #include <sys/ioctl.h> 41 #include <sys/mman.h> 42 #include <machine/vmparam.h> 43 #include <machine/pte.h> 44 #include <machine/psl.h> 45 46 #define MIN(X, Y) (((X) < (Y)) ? (X) : (Y)) 47 #define __cacheline_aligned __attribute__((__aligned__(64))) 48 49 #include <x86/specialreg.h> 50 51 /* -------------------------------------------------------------------------- */ 52 53 /* 54 * Undocumented debugging function. Helpful. 55 */ 56 int 57 nvmm_vcpu_dump(struct nvmm_machine *mach, struct nvmm_vcpu *vcpu) 58 { 59 struct nvmm_x64_state *state = vcpu->state; 60 uint16_t *attr; 61 size_t i; 62 int ret; 63 64 const char *segnames[] = { 65 "ES", "CS", "SS", "DS", "FS", "GS", "GDT", "IDT", "LDT", "TR" 66 }; 67 68 ret = nvmm_vcpu_getstate(mach, vcpu, NVMM_X64_STATE_ALL); 69 if (ret == -1) 70 return -1; 71 72 printf("+ VCPU id=%d\n", (int)vcpu->cpuid); 73 printf("| -> RIP=%"PRIx64"\n", state->gprs[NVMM_X64_GPR_RIP]); 74 printf("| -> RSP=%"PRIx64"\n", state->gprs[NVMM_X64_GPR_RSP]); 75 printf("| -> RAX=%"PRIx64"\n", state->gprs[NVMM_X64_GPR_RAX]); 76 printf("| -> RBX=%"PRIx64"\n", state->gprs[NVMM_X64_GPR_RBX]); 77 printf("| -> RCX=%"PRIx64"\n", state->gprs[NVMM_X64_GPR_RCX]); 78 printf("| -> RFLAGS=%p\n", (void *)state->gprs[NVMM_X64_GPR_RFLAGS]); 79 for (i = 0; i < NVMM_X64_NSEG; i++) { 80 attr = (uint16_t *)&state->segs[i].attrib; 81 printf("| -> %s: sel=0x%x base=%"PRIx64", limit=%x, attrib=%x\n", 82 segnames[i], 83 state->segs[i].selector, 84 state->segs[i].base, 85 state->segs[i].limit, 86 *attr); 87 } 88 printf("| -> MSR_EFER=%"PRIx64"\n", state->msrs[NVMM_X64_MSR_EFER]); 89 printf("| -> CR0=%"PRIx64"\n", state->crs[NVMM_X64_CR_CR0]); 90 printf("| -> CR3=%"PRIx64"\n", state->crs[NVMM_X64_CR_CR3]); 91 printf("| -> CR4=%"PRIx64"\n", state->crs[NVMM_X64_CR_CR4]); 92 printf("| -> CR8=%"PRIx64"\n", state->crs[NVMM_X64_CR_CR8]); 93 94 return 0; 95 } 96 97 /* -------------------------------------------------------------------------- */ 98 99 #define PTE32_L1_SHIFT 12 100 #define PTE32_L2_SHIFT 22 101 102 #define PTE32_L2_MASK 0xffc00000 103 #define PTE32_L1_MASK 0x003ff000 104 105 #define PTE32_L2_FRAME (PTE32_L2_MASK) 106 #define PTE32_L1_FRAME (PTE32_L2_FRAME|PTE32_L1_MASK) 107 108 #define pte32_l1idx(va) (((va) & PTE32_L1_MASK) >> PTE32_L1_SHIFT) 109 #define pte32_l2idx(va) (((va) & PTE32_L2_MASK) >> PTE32_L2_SHIFT) 110 111 #define CR3_FRAME_32BIT PG_FRAME 112 113 typedef uint32_t pte_32bit_t; 114 115 static int 116 x86_gva_to_gpa_32bit(struct nvmm_machine *mach, uint64_t cr3, 117 gvaddr_t gva, gpaddr_t *gpa, bool has_pse, nvmm_prot_t *prot) 118 { 119 gpaddr_t L2gpa, L1gpa; 120 uintptr_t L2hva, L1hva; 121 pte_32bit_t *pdir, pte; 122 nvmm_prot_t pageprot; 123 124 /* We begin with an RWXU access. */ 125 *prot = NVMM_PROT_ALL; 126 127 /* Parse L2. */ 128 L2gpa = (cr3 & CR3_FRAME_32BIT); 129 if (nvmm_gpa_to_hva(mach, L2gpa, &L2hva, &pageprot) == -1) 130 return -1; 131 pdir = (pte_32bit_t *)L2hva; 132 pte = pdir[pte32_l2idx(gva)]; 133 if ((pte & PG_V) == 0) 134 return -1; 135 if ((pte & PG_u) == 0) 136 *prot &= ~NVMM_PROT_USER; 137 if ((pte & PG_KW) == 0) 138 *prot &= ~NVMM_PROT_WRITE; 139 if ((pte & PG_PS) && !has_pse) 140 return -1; 141 if (pte & PG_PS) { 142 *gpa = (pte & PTE32_L2_FRAME); 143 *gpa = *gpa + (gva & PTE32_L1_MASK); 144 return 0; 145 } 146 147 /* Parse L1. */ 148 L1gpa = (pte & PG_FRAME); 149 if (nvmm_gpa_to_hva(mach, L1gpa, &L1hva, &pageprot) == -1) 150 return -1; 151 pdir = (pte_32bit_t *)L1hva; 152 pte = pdir[pte32_l1idx(gva)]; 153 if ((pte & PG_V) == 0) 154 return -1; 155 if ((pte & PG_u) == 0) 156 *prot &= ~NVMM_PROT_USER; 157 if ((pte & PG_KW) == 0) 158 *prot &= ~NVMM_PROT_WRITE; 159 if (pte & PG_PS) 160 return -1; 161 162 *gpa = (pte & PG_FRAME); 163 return 0; 164 } 165 166 /* -------------------------------------------------------------------------- */ 167 168 #define PTE32_PAE_L1_SHIFT 12 169 #define PTE32_PAE_L2_SHIFT 21 170 #define PTE32_PAE_L3_SHIFT 30 171 172 #define PTE32_PAE_L3_MASK 0xc0000000 173 #define PTE32_PAE_L2_MASK 0x3fe00000 174 #define PTE32_PAE_L1_MASK 0x001ff000 175 176 #define PTE32_PAE_L3_FRAME (PTE32_PAE_L3_MASK) 177 #define PTE32_PAE_L2_FRAME (PTE32_PAE_L3_FRAME|PTE32_PAE_L2_MASK) 178 #define PTE32_PAE_L1_FRAME (PTE32_PAE_L2_FRAME|PTE32_PAE_L1_MASK) 179 180 #define pte32_pae_l1idx(va) (((va) & PTE32_PAE_L1_MASK) >> PTE32_PAE_L1_SHIFT) 181 #define pte32_pae_l2idx(va) (((va) & PTE32_PAE_L2_MASK) >> PTE32_PAE_L2_SHIFT) 182 #define pte32_pae_l3idx(va) (((va) & PTE32_PAE_L3_MASK) >> PTE32_PAE_L3_SHIFT) 183 184 #define CR3_FRAME_32BIT_PAE __BITS(31, 5) 185 186 typedef uint64_t pte_32bit_pae_t; 187 188 static int 189 x86_gva_to_gpa_32bit_pae(struct nvmm_machine *mach, uint64_t cr3, 190 gvaddr_t gva, gpaddr_t *gpa, nvmm_prot_t *prot) 191 { 192 gpaddr_t L3gpa, L2gpa, L1gpa; 193 uintptr_t L3hva, L2hva, L1hva; 194 pte_32bit_pae_t *pdir, pte; 195 nvmm_prot_t pageprot; 196 197 /* We begin with an RWXU access. */ 198 *prot = NVMM_PROT_ALL; 199 200 /* Parse L3. */ 201 L3gpa = (cr3 & CR3_FRAME_32BIT_PAE); 202 if (nvmm_gpa_to_hva(mach, L3gpa, &L3hva, &pageprot) == -1) 203 return -1; 204 pdir = (pte_32bit_pae_t *)L3hva; 205 pte = pdir[pte32_pae_l3idx(gva)]; 206 if ((pte & PG_V) == 0) 207 return -1; 208 if (pte & PG_NX) 209 *prot &= ~NVMM_PROT_EXEC; 210 if (pte & PG_PS) 211 return -1; 212 213 /* Parse L2. */ 214 L2gpa = (pte & PG_FRAME); 215 if (nvmm_gpa_to_hva(mach, L2gpa, &L2hva, &pageprot) == -1) 216 return -1; 217 pdir = (pte_32bit_pae_t *)L2hva; 218 pte = pdir[pte32_pae_l2idx(gva)]; 219 if ((pte & PG_V) == 0) 220 return -1; 221 if ((pte & PG_u) == 0) 222 *prot &= ~NVMM_PROT_USER; 223 if ((pte & PG_KW) == 0) 224 *prot &= ~NVMM_PROT_WRITE; 225 if (pte & PG_NX) 226 *prot &= ~NVMM_PROT_EXEC; 227 if (pte & PG_PS) { 228 *gpa = (pte & PTE32_PAE_L2_FRAME); 229 *gpa = *gpa + (gva & PTE32_PAE_L1_MASK); 230 return 0; 231 } 232 233 /* Parse L1. */ 234 L1gpa = (pte & PG_FRAME); 235 if (nvmm_gpa_to_hva(mach, L1gpa, &L1hva, &pageprot) == -1) 236 return -1; 237 pdir = (pte_32bit_pae_t *)L1hva; 238 pte = pdir[pte32_pae_l1idx(gva)]; 239 if ((pte & PG_V) == 0) 240 return -1; 241 if ((pte & PG_u) == 0) 242 *prot &= ~NVMM_PROT_USER; 243 if ((pte & PG_KW) == 0) 244 *prot &= ~NVMM_PROT_WRITE; 245 if (pte & PG_NX) 246 *prot &= ~NVMM_PROT_EXEC; 247 if (pte & PG_PS) 248 return -1; 249 250 *gpa = (pte & PG_FRAME); 251 return 0; 252 } 253 254 /* -------------------------------------------------------------------------- */ 255 256 #define PTE64_L1_SHIFT 12 257 #define PTE64_L2_SHIFT 21 258 #define PTE64_L3_SHIFT 30 259 #define PTE64_L4_SHIFT 39 260 261 #define PTE64_L4_MASK 0x0000ff8000000000 262 #define PTE64_L3_MASK 0x0000007fc0000000 263 #define PTE64_L2_MASK 0x000000003fe00000 264 #define PTE64_L1_MASK 0x00000000001ff000 265 266 #define PTE64_L4_FRAME PTE64_L4_MASK 267 #define PTE64_L3_FRAME (PTE64_L4_FRAME|PTE64_L3_MASK) 268 #define PTE64_L2_FRAME (PTE64_L3_FRAME|PTE64_L2_MASK) 269 #define PTE64_L1_FRAME (PTE64_L2_FRAME|PTE64_L1_MASK) 270 271 #define pte64_l1idx(va) (((va) & PTE64_L1_MASK) >> PTE64_L1_SHIFT) 272 #define pte64_l2idx(va) (((va) & PTE64_L2_MASK) >> PTE64_L2_SHIFT) 273 #define pte64_l3idx(va) (((va) & PTE64_L3_MASK) >> PTE64_L3_SHIFT) 274 #define pte64_l4idx(va) (((va) & PTE64_L4_MASK) >> PTE64_L4_SHIFT) 275 276 #define CR3_FRAME_64BIT PG_FRAME 277 278 typedef uint64_t pte_64bit_t; 279 280 static inline bool 281 x86_gva_64bit_canonical(gvaddr_t gva) 282 { 283 /* Bits 63:47 must have the same value. */ 284 #define SIGN_EXTEND 0xffff800000000000ULL 285 return (gva & SIGN_EXTEND) == 0 || (gva & SIGN_EXTEND) == SIGN_EXTEND; 286 } 287 288 static int 289 x86_gva_to_gpa_64bit(struct nvmm_machine *mach, uint64_t cr3, 290 gvaddr_t gva, gpaddr_t *gpa, nvmm_prot_t *prot) 291 { 292 gpaddr_t L4gpa, L3gpa, L2gpa, L1gpa; 293 uintptr_t L4hva, L3hva, L2hva, L1hva; 294 pte_64bit_t *pdir, pte; 295 nvmm_prot_t pageprot; 296 297 /* We begin with an RWXU access. */ 298 *prot = NVMM_PROT_ALL; 299 300 if (!x86_gva_64bit_canonical(gva)) 301 return -1; 302 303 /* Parse L4. */ 304 L4gpa = (cr3 & CR3_FRAME_64BIT); 305 if (nvmm_gpa_to_hva(mach, L4gpa, &L4hva, &pageprot) == -1) 306 return -1; 307 pdir = (pte_64bit_t *)L4hva; 308 pte = pdir[pte64_l4idx(gva)]; 309 if ((pte & PG_V) == 0) 310 return -1; 311 if ((pte & PG_u) == 0) 312 *prot &= ~NVMM_PROT_USER; 313 if ((pte & PG_KW) == 0) 314 *prot &= ~NVMM_PROT_WRITE; 315 if (pte & PG_NX) 316 *prot &= ~NVMM_PROT_EXEC; 317 if (pte & PG_PS) 318 return -1; 319 320 /* Parse L3. */ 321 L3gpa = (pte & PG_FRAME); 322 if (nvmm_gpa_to_hva(mach, L3gpa, &L3hva, &pageprot) == -1) 323 return -1; 324 pdir = (pte_64bit_t *)L3hva; 325 pte = pdir[pte64_l3idx(gva)]; 326 if ((pte & PG_V) == 0) 327 return -1; 328 if ((pte & PG_u) == 0) 329 *prot &= ~NVMM_PROT_USER; 330 if ((pte & PG_KW) == 0) 331 *prot &= ~NVMM_PROT_WRITE; 332 if (pte & PG_NX) 333 *prot &= ~NVMM_PROT_EXEC; 334 if (pte & PG_PS) { 335 *gpa = (pte & PTE64_L3_FRAME); 336 *gpa = *gpa + (gva & (PTE64_L2_MASK|PTE64_L1_MASK)); 337 return 0; 338 } 339 340 /* Parse L2. */ 341 L2gpa = (pte & PG_FRAME); 342 if (nvmm_gpa_to_hva(mach, L2gpa, &L2hva, &pageprot) == -1) 343 return -1; 344 pdir = (pte_64bit_t *)L2hva; 345 pte = pdir[pte64_l2idx(gva)]; 346 if ((pte & PG_V) == 0) 347 return -1; 348 if ((pte & PG_u) == 0) 349 *prot &= ~NVMM_PROT_USER; 350 if ((pte & PG_KW) == 0) 351 *prot &= ~NVMM_PROT_WRITE; 352 if (pte & PG_NX) 353 *prot &= ~NVMM_PROT_EXEC; 354 if (pte & PG_PS) { 355 *gpa = (pte & PTE64_L2_FRAME); 356 *gpa = *gpa + (gva & PTE64_L1_MASK); 357 return 0; 358 } 359 360 /* Parse L1. */ 361 L1gpa = (pte & PG_FRAME); 362 if (nvmm_gpa_to_hva(mach, L1gpa, &L1hva, &pageprot) == -1) 363 return -1; 364 pdir = (pte_64bit_t *)L1hva; 365 pte = pdir[pte64_l1idx(gva)]; 366 if ((pte & PG_V) == 0) 367 return -1; 368 if ((pte & PG_u) == 0) 369 *prot &= ~NVMM_PROT_USER; 370 if ((pte & PG_KW) == 0) 371 *prot &= ~NVMM_PROT_WRITE; 372 if (pte & PG_NX) 373 *prot &= ~NVMM_PROT_EXEC; 374 if (pte & PG_PS) 375 return -1; 376 377 *gpa = (pte & PG_FRAME); 378 return 0; 379 } 380 381 static inline int 382 x86_gva_to_gpa(struct nvmm_machine *mach, struct nvmm_x64_state *state, 383 gvaddr_t gva, gpaddr_t *gpa, nvmm_prot_t *prot) 384 { 385 bool is_pae, is_lng, has_pse; 386 uint64_t cr3; 387 size_t off; 388 int ret; 389 390 if ((state->crs[NVMM_X64_CR_CR0] & CR0_PG) == 0) { 391 /* No paging. */ 392 *prot = NVMM_PROT_ALL; 393 *gpa = gva; 394 return 0; 395 } 396 397 off = (gva & PAGE_MASK); 398 gva &= ~PAGE_MASK; 399 400 is_pae = (state->crs[NVMM_X64_CR_CR4] & CR4_PAE) != 0; 401 is_lng = (state->msrs[NVMM_X64_MSR_EFER] & EFER_LMA) != 0; 402 has_pse = (state->crs[NVMM_X64_CR_CR4] & CR4_PSE) != 0; 403 cr3 = state->crs[NVMM_X64_CR_CR3]; 404 405 if (is_pae && is_lng) { 406 /* 64bit */ 407 ret = x86_gva_to_gpa_64bit(mach, cr3, gva, gpa, prot); 408 } else if (is_pae && !is_lng) { 409 /* 32bit PAE */ 410 ret = x86_gva_to_gpa_32bit_pae(mach, cr3, gva, gpa, prot); 411 } else if (!is_pae && !is_lng) { 412 /* 32bit */ 413 ret = x86_gva_to_gpa_32bit(mach, cr3, gva, gpa, has_pse, prot); 414 } else { 415 ret = -1; 416 } 417 418 if (ret == -1) { 419 errno = EFAULT; 420 } 421 422 *gpa = *gpa + off; 423 424 return ret; 425 } 426 427 int 428 nvmm_gva_to_gpa(struct nvmm_machine *mach, struct nvmm_vcpu *vcpu, 429 gvaddr_t gva, gpaddr_t *gpa, nvmm_prot_t *prot) 430 { 431 struct nvmm_x64_state *state = vcpu->state; 432 int ret; 433 434 ret = nvmm_vcpu_getstate(mach, vcpu, 435 NVMM_X64_STATE_CRS | NVMM_X64_STATE_MSRS); 436 if (ret == -1) 437 return -1; 438 439 return x86_gva_to_gpa(mach, state, gva, gpa, prot); 440 } 441 442 /* -------------------------------------------------------------------------- */ 443 444 static inline bool 445 is_long_mode(struct nvmm_x64_state *state) 446 { 447 return (state->msrs[NVMM_X64_MSR_EFER] & EFER_LMA) != 0; 448 } 449 450 static inline bool 451 is_64bit(struct nvmm_x64_state *state) 452 { 453 return (state->segs[NVMM_X64_SEG_CS].attrib.l != 0); 454 } 455 456 static inline bool 457 is_32bit(struct nvmm_x64_state *state) 458 { 459 return (state->segs[NVMM_X64_SEG_CS].attrib.l == 0) && 460 (state->segs[NVMM_X64_SEG_CS].attrib.def == 1); 461 } 462 463 static inline bool 464 is_16bit(struct nvmm_x64_state *state) 465 { 466 return (state->segs[NVMM_X64_SEG_CS].attrib.l == 0) && 467 (state->segs[NVMM_X64_SEG_CS].attrib.def == 0); 468 } 469 470 static int 471 segment_check(struct nvmm_x64_state_seg *seg, gvaddr_t gva, size_t size) 472 { 473 uint64_t limit; 474 475 /* 476 * This is incomplete. We should check topdown, etc, really that's 477 * tiring. 478 */ 479 if (__predict_false(!seg->attrib.p)) { 480 goto error; 481 } 482 483 limit = (uint64_t)seg->limit + 1; 484 if (__predict_true(seg->attrib.g)) { 485 limit *= PAGE_SIZE; 486 } 487 488 if (__predict_false(gva + size > limit)) { 489 goto error; 490 } 491 492 return 0; 493 494 error: 495 errno = EFAULT; 496 return -1; 497 } 498 499 static inline void 500 segment_apply(struct nvmm_x64_state_seg *seg, gvaddr_t *gva) 501 { 502 *gva += seg->base; 503 } 504 505 static inline uint64_t 506 size_to_mask(size_t size) 507 { 508 switch (size) { 509 case 1: 510 return 0x00000000000000FF; 511 case 2: 512 return 0x000000000000FFFF; 513 case 4: 514 return 0x00000000FFFFFFFF; 515 case 8: 516 default: 517 return 0xFFFFFFFFFFFFFFFF; 518 } 519 } 520 521 static uint64_t 522 rep_get_cnt(struct nvmm_x64_state *state, size_t adsize) 523 { 524 uint64_t mask, cnt; 525 526 mask = size_to_mask(adsize); 527 cnt = state->gprs[NVMM_X64_GPR_RCX] & mask; 528 529 return cnt; 530 } 531 532 static void 533 rep_set_cnt(struct nvmm_x64_state *state, size_t adsize, uint64_t cnt) 534 { 535 uint64_t mask; 536 537 /* XXX: should we zero-extend? */ 538 mask = size_to_mask(adsize); 539 state->gprs[NVMM_X64_GPR_RCX] &= ~mask; 540 state->gprs[NVMM_X64_GPR_RCX] |= cnt; 541 } 542 543 static int 544 read_guest_memory(struct nvmm_machine *mach, struct nvmm_x64_state *state, 545 gvaddr_t gva, uint8_t *data, size_t size) 546 { 547 struct nvmm_mem mem; 548 nvmm_prot_t prot; 549 gpaddr_t gpa; 550 uintptr_t hva; 551 bool is_mmio; 552 int ret, remain; 553 554 ret = x86_gva_to_gpa(mach, state, gva, &gpa, &prot); 555 if (__predict_false(ret == -1)) { 556 return -1; 557 } 558 if (__predict_false(!(prot & NVMM_PROT_READ))) { 559 errno = EFAULT; 560 return -1; 561 } 562 563 if ((gva & PAGE_MASK) + size > PAGE_SIZE) { 564 remain = ((gva & PAGE_MASK) + size - PAGE_SIZE); 565 } else { 566 remain = 0; 567 } 568 size -= remain; 569 570 ret = nvmm_gpa_to_hva(mach, gpa, &hva, &prot); 571 is_mmio = (ret == -1); 572 573 if (is_mmio) { 574 mem.data = data; 575 mem.gpa = gpa; 576 mem.write = false; 577 mem.size = size; 578 (*mach->cbs.mem)(&mem); 579 } else { 580 if (__predict_false(!(prot & NVMM_PROT_READ))) { 581 errno = EFAULT; 582 return -1; 583 } 584 memcpy(data, (uint8_t *)hva, size); 585 } 586 587 if (remain > 0) { 588 ret = read_guest_memory(mach, state, gva + size, 589 data + size, remain); 590 } else { 591 ret = 0; 592 } 593 594 return ret; 595 } 596 597 static int 598 write_guest_memory(struct nvmm_machine *mach, struct nvmm_x64_state *state, 599 gvaddr_t gva, uint8_t *data, size_t size) 600 { 601 struct nvmm_mem mem; 602 nvmm_prot_t prot; 603 gpaddr_t gpa; 604 uintptr_t hva; 605 bool is_mmio; 606 int ret, remain; 607 608 ret = x86_gva_to_gpa(mach, state, gva, &gpa, &prot); 609 if (__predict_false(ret == -1)) { 610 return -1; 611 } 612 if (__predict_false(!(prot & NVMM_PROT_WRITE))) { 613 errno = EFAULT; 614 return -1; 615 } 616 617 if ((gva & PAGE_MASK) + size > PAGE_SIZE) { 618 remain = ((gva & PAGE_MASK) + size - PAGE_SIZE); 619 } else { 620 remain = 0; 621 } 622 size -= remain; 623 624 ret = nvmm_gpa_to_hva(mach, gpa, &hva, &prot); 625 is_mmio = (ret == -1); 626 627 if (is_mmio) { 628 mem.data = data; 629 mem.gpa = gpa; 630 mem.write = true; 631 mem.size = size; 632 (*mach->cbs.mem)(&mem); 633 } else { 634 if (__predict_false(!(prot & NVMM_PROT_WRITE))) { 635 errno = EFAULT; 636 return -1; 637 } 638 memcpy((uint8_t *)hva, data, size); 639 } 640 641 if (remain > 0) { 642 ret = write_guest_memory(mach, state, gva + size, 643 data + size, remain); 644 } else { 645 ret = 0; 646 } 647 648 return ret; 649 } 650 651 /* -------------------------------------------------------------------------- */ 652 653 static int fetch_segment(struct nvmm_machine *, struct nvmm_x64_state *); 654 655 #define NVMM_IO_BATCH_SIZE 32 656 657 static int 658 assist_io_batch(struct nvmm_machine *mach, struct nvmm_x64_state *state, 659 struct nvmm_io *io, gvaddr_t gva, uint64_t cnt) 660 { 661 uint8_t iobuf[NVMM_IO_BATCH_SIZE]; 662 size_t i, iosize, iocnt; 663 int ret; 664 665 cnt = MIN(cnt, NVMM_IO_BATCH_SIZE); 666 iosize = MIN(io->size * cnt, NVMM_IO_BATCH_SIZE); 667 iocnt = iosize / io->size; 668 669 io->data = iobuf; 670 671 if (!io->in) { 672 ret = read_guest_memory(mach, state, gva, iobuf, iosize); 673 if (ret == -1) 674 return -1; 675 } 676 677 for (i = 0; i < iocnt; i++) { 678 (*mach->cbs.io)(io); 679 io->data += io->size; 680 } 681 682 if (io->in) { 683 ret = write_guest_memory(mach, state, gva, iobuf, iosize); 684 if (ret == -1) 685 return -1; 686 } 687 688 return iocnt; 689 } 690 691 int 692 nvmm_assist_io(struct nvmm_machine *mach, struct nvmm_vcpu *vcpu) 693 { 694 struct nvmm_x64_state *state = vcpu->state; 695 struct nvmm_exit *exit = vcpu->exit; 696 struct nvmm_io io; 697 uint64_t cnt = 0; /* GCC */ 698 uint8_t iobuf[8]; 699 int iocnt = 1; 700 gvaddr_t gva = 0; /* GCC */ 701 int reg = 0; /* GCC */ 702 int ret, seg; 703 bool psld = false; 704 705 if (__predict_false(exit->reason != NVMM_EXIT_IO)) { 706 errno = EINVAL; 707 return -1; 708 } 709 710 io.port = exit->u.io.port; 711 io.in = (exit->u.io.type == NVMM_EXIT_IO_IN); 712 io.size = exit->u.io.operand_size; 713 io.data = iobuf; 714 715 ret = nvmm_vcpu_getstate(mach, vcpu, 716 NVMM_X64_STATE_GPRS | NVMM_X64_STATE_SEGS | 717 NVMM_X64_STATE_CRS | NVMM_X64_STATE_MSRS); 718 if (ret == -1) 719 return -1; 720 721 if (exit->u.io.rep) { 722 cnt = rep_get_cnt(state, exit->u.io.address_size); 723 if (__predict_false(cnt == 0)) { 724 state->gprs[NVMM_X64_GPR_RIP] = exit->u.io.npc; 725 goto out; 726 } 727 } 728 729 if (__predict_false(state->gprs[NVMM_X64_GPR_RFLAGS] & PSL_D)) { 730 psld = true; 731 } 732 733 /* 734 * Determine GVA. 735 */ 736 if (exit->u.io.str) { 737 if (io.in) { 738 reg = NVMM_X64_GPR_RDI; 739 } else { 740 reg = NVMM_X64_GPR_RSI; 741 } 742 743 gva = state->gprs[reg]; 744 gva &= size_to_mask(exit->u.io.address_size); 745 746 if (exit->u.io.seg != -1) { 747 seg = exit->u.io.seg; 748 } else { 749 if (io.in) { 750 seg = NVMM_X64_SEG_ES; 751 } else { 752 seg = fetch_segment(mach, state); 753 if (seg == -1) 754 return -1; 755 } 756 } 757 758 if (__predict_true(is_long_mode(state))) { 759 if (seg == NVMM_X64_SEG_GS || seg == NVMM_X64_SEG_FS) { 760 segment_apply(&state->segs[seg], &gva); 761 } 762 } else { 763 ret = segment_check(&state->segs[seg], gva, io.size); 764 if (ret == -1) 765 return -1; 766 segment_apply(&state->segs[seg], &gva); 767 } 768 769 if (exit->u.io.rep && !psld) { 770 iocnt = assist_io_batch(mach, state, &io, gva, cnt); 771 if (iocnt == -1) 772 return -1; 773 goto done; 774 } 775 } 776 777 if (!io.in) { 778 if (!exit->u.io.str) { 779 memcpy(io.data, &state->gprs[NVMM_X64_GPR_RAX], io.size); 780 } else { 781 ret = read_guest_memory(mach, state, gva, io.data, 782 io.size); 783 if (ret == -1) 784 return -1; 785 } 786 } 787 788 (*mach->cbs.io)(&io); 789 790 if (io.in) { 791 if (!exit->u.io.str) { 792 memcpy(&state->gprs[NVMM_X64_GPR_RAX], io.data, io.size); 793 if (io.size == 4) { 794 /* Zero-extend to 64 bits. */ 795 state->gprs[NVMM_X64_GPR_RAX] &= size_to_mask(4); 796 } 797 } else { 798 ret = write_guest_memory(mach, state, gva, io.data, 799 io.size); 800 if (ret == -1) 801 return -1; 802 } 803 } 804 805 done: 806 if (exit->u.io.str) { 807 if (__predict_false(psld)) { 808 state->gprs[reg] -= iocnt * io.size; 809 } else { 810 state->gprs[reg] += iocnt * io.size; 811 } 812 } 813 814 if (exit->u.io.rep) { 815 cnt -= iocnt; 816 rep_set_cnt(state, exit->u.io.address_size, cnt); 817 if (cnt == 0) { 818 state->gprs[NVMM_X64_GPR_RIP] = exit->u.io.npc; 819 } 820 } else { 821 state->gprs[NVMM_X64_GPR_RIP] = exit->u.io.npc; 822 } 823 824 out: 825 ret = nvmm_vcpu_setstate(mach, vcpu, NVMM_X64_STATE_GPRS); 826 if (ret == -1) 827 return -1; 828 829 return 0; 830 } 831 832 /* -------------------------------------------------------------------------- */ 833 834 struct x86_emul { 835 bool read; 836 bool notouch; 837 void (*func)(struct nvmm_machine *, struct nvmm_mem *, uint64_t *); 838 }; 839 840 static void x86_func_or(struct nvmm_machine *, struct nvmm_mem *, uint64_t *); 841 static void x86_func_and(struct nvmm_machine *, struct nvmm_mem *, uint64_t *); 842 static void x86_func_sub(struct nvmm_machine *, struct nvmm_mem *, uint64_t *); 843 static void x86_func_xor(struct nvmm_machine *, struct nvmm_mem *, uint64_t *); 844 static void x86_func_cmp(struct nvmm_machine *, struct nvmm_mem *, uint64_t *); 845 static void x86_func_test(struct nvmm_machine *, struct nvmm_mem *, uint64_t *); 846 static void x86_func_mov(struct nvmm_machine *, struct nvmm_mem *, uint64_t *); 847 static void x86_func_stos(struct nvmm_machine *, struct nvmm_mem *, uint64_t *); 848 static void x86_func_lods(struct nvmm_machine *, struct nvmm_mem *, uint64_t *); 849 static void x86_func_movs(struct nvmm_machine *, struct nvmm_mem *, uint64_t *); 850 851 static const struct x86_emul x86_emul_or = { 852 .read = true, 853 .func = x86_func_or 854 }; 855 856 static const struct x86_emul x86_emul_and = { 857 .read = true, 858 .func = x86_func_and 859 }; 860 861 static const struct x86_emul x86_emul_sub = { 862 .read = true, 863 .func = x86_func_sub 864 }; 865 866 static const struct x86_emul x86_emul_xor = { 867 .read = true, 868 .func = x86_func_xor 869 }; 870 871 static const struct x86_emul x86_emul_cmp = { 872 .notouch = true, 873 .func = x86_func_cmp 874 }; 875 876 static const struct x86_emul x86_emul_test = { 877 .notouch = true, 878 .func = x86_func_test 879 }; 880 881 static const struct x86_emul x86_emul_mov = { 882 .func = x86_func_mov 883 }; 884 885 static const struct x86_emul x86_emul_stos = { 886 .func = x86_func_stos 887 }; 888 889 static const struct x86_emul x86_emul_lods = { 890 .func = x86_func_lods 891 }; 892 893 static const struct x86_emul x86_emul_movs = { 894 .func = x86_func_movs 895 }; 896 897 /* Legacy prefixes. */ 898 #define LEG_LOCK 0xF0 899 #define LEG_REPN 0xF2 900 #define LEG_REP 0xF3 901 #define LEG_OVR_CS 0x2E 902 #define LEG_OVR_SS 0x36 903 #define LEG_OVR_DS 0x3E 904 #define LEG_OVR_ES 0x26 905 #define LEG_OVR_FS 0x64 906 #define LEG_OVR_GS 0x65 907 #define LEG_OPR_OVR 0x66 908 #define LEG_ADR_OVR 0x67 909 910 struct x86_legpref { 911 bool opr_ovr:1; 912 bool adr_ovr:1; 913 bool rep:1; 914 bool repn:1; 915 int8_t seg; 916 }; 917 918 struct x86_rexpref { 919 bool b:1; 920 bool x:1; 921 bool r:1; 922 bool w:1; 923 bool present:1; 924 }; 925 926 struct x86_reg { 927 int num; /* NVMM GPR state index */ 928 uint64_t mask; 929 }; 930 931 enum x86_disp_type { 932 DISP_NONE, 933 DISP_0, 934 DISP_1, 935 DISP_4 936 }; 937 938 struct x86_disp { 939 enum x86_disp_type type; 940 uint64_t data; /* 4 bytes, but can be sign-extended */ 941 }; 942 943 enum REGMODRM__Mod { 944 MOD_DIS0, /* also, register indirect */ 945 MOD_DIS1, 946 MOD_DIS4, 947 MOD_REG 948 }; 949 950 enum REGMODRM__Reg { 951 REG_000, /* these fields are indexes to the register map */ 952 REG_001, 953 REG_010, 954 REG_011, 955 REG_100, 956 REG_101, 957 REG_110, 958 REG_111 959 }; 960 961 enum REGMODRM__Rm { 962 RM_000, /* reg */ 963 RM_001, /* reg */ 964 RM_010, /* reg */ 965 RM_011, /* reg */ 966 RM_RSP_SIB, /* reg or SIB, depending on the MOD */ 967 RM_RBP_DISP32, /* reg or displacement-only (= RIP-relative on amd64) */ 968 RM_110, 969 RM_111 970 }; 971 972 struct x86_regmodrm { 973 uint8_t mod:2; 974 uint8_t reg:3; 975 uint8_t rm:3; 976 }; 977 978 struct x86_immediate { 979 uint64_t data; 980 }; 981 982 struct x86_sib { 983 uint8_t scale; 984 const struct x86_reg *idx; 985 const struct x86_reg *bas; 986 }; 987 988 enum x86_store_type { 989 STORE_NONE, 990 STORE_REG, 991 STORE_IMM, 992 STORE_SIB, 993 STORE_DMO 994 }; 995 996 struct x86_store { 997 enum x86_store_type type; 998 union { 999 const struct x86_reg *reg; 1000 struct x86_immediate imm; 1001 struct x86_sib sib; 1002 uint64_t dmo; 1003 } u; 1004 struct x86_disp disp; 1005 int hardseg; 1006 }; 1007 1008 struct x86_instr { 1009 uint8_t len; 1010 struct x86_legpref legpref; 1011 struct x86_rexpref rexpref; 1012 struct x86_regmodrm regmodrm; 1013 uint8_t operand_size; 1014 uint8_t address_size; 1015 uint64_t zeroextend_mask; 1016 1017 const struct x86_opcode *opcode; 1018 const struct x86_emul *emul; 1019 1020 struct x86_store src; 1021 struct x86_store dst; 1022 struct x86_store *strm; 1023 }; 1024 1025 struct x86_decode_fsm { 1026 /* vcpu */ 1027 bool is64bit; 1028 bool is32bit; 1029 bool is16bit; 1030 1031 /* fsm */ 1032 int (*fn)(struct x86_decode_fsm *, struct x86_instr *); 1033 uint8_t *buf; 1034 uint8_t *end; 1035 }; 1036 1037 struct x86_opcode { 1038 bool valid:1; 1039 bool regmodrm:1; 1040 bool regtorm:1; 1041 bool dmo:1; 1042 bool todmo:1; 1043 bool movs:1; 1044 bool stos:1; 1045 bool lods:1; 1046 bool szoverride:1; 1047 bool group1:1; 1048 bool group3:1; 1049 bool group11:1; 1050 bool immediate:1; 1051 uint8_t defsize; 1052 uint8_t flags; 1053 const struct x86_emul *emul; 1054 }; 1055 1056 struct x86_group_entry { 1057 const struct x86_emul *emul; 1058 }; 1059 1060 #define OPSIZE_BYTE 0x01 1061 #define OPSIZE_WORD 0x02 /* 2 bytes */ 1062 #define OPSIZE_DOUB 0x04 /* 4 bytes */ 1063 #define OPSIZE_QUAD 0x08 /* 8 bytes */ 1064 1065 #define FLAG_imm8 0x01 1066 #define FLAG_immz 0x02 1067 #define FLAG_ze 0x04 1068 1069 static const struct x86_group_entry group1[8] __cacheline_aligned = { 1070 [1] = { .emul = &x86_emul_or }, 1071 [4] = { .emul = &x86_emul_and }, 1072 [6] = { .emul = &x86_emul_xor }, 1073 [7] = { .emul = &x86_emul_cmp } 1074 }; 1075 1076 static const struct x86_group_entry group3[8] __cacheline_aligned = { 1077 [0] = { .emul = &x86_emul_test }, 1078 [1] = { .emul = &x86_emul_test } 1079 }; 1080 1081 static const struct x86_group_entry group11[8] __cacheline_aligned = { 1082 [0] = { .emul = &x86_emul_mov } 1083 }; 1084 1085 static const struct x86_opcode primary_opcode_table[256] __cacheline_aligned = { 1086 /* 1087 * Group1 1088 */ 1089 [0x80] = { 1090 /* Eb, Ib */ 1091 .valid = true, 1092 .regmodrm = true, 1093 .regtorm = true, 1094 .szoverride = false, 1095 .defsize = OPSIZE_BYTE, 1096 .group1 = true, 1097 .immediate = true, 1098 .emul = NULL /* group1 */ 1099 }, 1100 [0x81] = { 1101 /* Ev, Iz */ 1102 .valid = true, 1103 .regmodrm = true, 1104 .regtorm = true, 1105 .szoverride = true, 1106 .defsize = -1, 1107 .group1 = true, 1108 .immediate = true, 1109 .flags = FLAG_immz, 1110 .emul = NULL /* group1 */ 1111 }, 1112 [0x83] = { 1113 /* Ev, Ib */ 1114 .valid = true, 1115 .regmodrm = true, 1116 .regtorm = true, 1117 .szoverride = true, 1118 .defsize = -1, 1119 .group1 = true, 1120 .immediate = true, 1121 .flags = FLAG_imm8, 1122 .emul = NULL /* group1 */ 1123 }, 1124 1125 /* 1126 * Group3 1127 */ 1128 [0xF6] = { 1129 /* Eb, Ib */ 1130 .valid = true, 1131 .regmodrm = true, 1132 .regtorm = true, 1133 .szoverride = false, 1134 .defsize = OPSIZE_BYTE, 1135 .group3 = true, 1136 .immediate = true, 1137 .emul = NULL /* group3 */ 1138 }, 1139 [0xF7] = { 1140 /* Ev, Iz */ 1141 .valid = true, 1142 .regmodrm = true, 1143 .regtorm = true, 1144 .szoverride = true, 1145 .defsize = -1, 1146 .group3 = true, 1147 .immediate = true, 1148 .flags = FLAG_immz, 1149 .emul = NULL /* group3 */ 1150 }, 1151 1152 /* 1153 * Group11 1154 */ 1155 [0xC6] = { 1156 /* Eb, Ib */ 1157 .valid = true, 1158 .regmodrm = true, 1159 .regtorm = true, 1160 .szoverride = false, 1161 .defsize = OPSIZE_BYTE, 1162 .group11 = true, 1163 .immediate = true, 1164 .emul = NULL /* group11 */ 1165 }, 1166 [0xC7] = { 1167 /* Ev, Iz */ 1168 .valid = true, 1169 .regmodrm = true, 1170 .regtorm = true, 1171 .szoverride = true, 1172 .defsize = -1, 1173 .group11 = true, 1174 .immediate = true, 1175 .flags = FLAG_immz, 1176 .emul = NULL /* group11 */ 1177 }, 1178 1179 /* 1180 * OR 1181 */ 1182 [0x08] = { 1183 /* Eb, Gb */ 1184 .valid = true, 1185 .regmodrm = true, 1186 .regtorm = true, 1187 .szoverride = false, 1188 .defsize = OPSIZE_BYTE, 1189 .emul = &x86_emul_or 1190 }, 1191 [0x09] = { 1192 /* Ev, Gv */ 1193 .valid = true, 1194 .regmodrm = true, 1195 .regtorm = true, 1196 .szoverride = true, 1197 .defsize = -1, 1198 .emul = &x86_emul_or 1199 }, 1200 [0x0A] = { 1201 /* Gb, Eb */ 1202 .valid = true, 1203 .regmodrm = true, 1204 .regtorm = false, 1205 .szoverride = false, 1206 .defsize = OPSIZE_BYTE, 1207 .emul = &x86_emul_or 1208 }, 1209 [0x0B] = { 1210 /* Gv, Ev */ 1211 .valid = true, 1212 .regmodrm = true, 1213 .regtorm = false, 1214 .szoverride = true, 1215 .defsize = -1, 1216 .emul = &x86_emul_or 1217 }, 1218 1219 /* 1220 * AND 1221 */ 1222 [0x20] = { 1223 /* Eb, Gb */ 1224 .valid = true, 1225 .regmodrm = true, 1226 .regtorm = true, 1227 .szoverride = false, 1228 .defsize = OPSIZE_BYTE, 1229 .emul = &x86_emul_and 1230 }, 1231 [0x21] = { 1232 /* Ev, Gv */ 1233 .valid = true, 1234 .regmodrm = true, 1235 .regtorm = true, 1236 .szoverride = true, 1237 .defsize = -1, 1238 .emul = &x86_emul_and 1239 }, 1240 [0x22] = { 1241 /* Gb, Eb */ 1242 .valid = true, 1243 .regmodrm = true, 1244 .regtorm = false, 1245 .szoverride = false, 1246 .defsize = OPSIZE_BYTE, 1247 .emul = &x86_emul_and 1248 }, 1249 [0x23] = { 1250 /* Gv, Ev */ 1251 .valid = true, 1252 .regmodrm = true, 1253 .regtorm = false, 1254 .szoverride = true, 1255 .defsize = -1, 1256 .emul = &x86_emul_and 1257 }, 1258 1259 /* 1260 * SUB 1261 */ 1262 [0x28] = { 1263 /* Eb, Gb */ 1264 .valid = true, 1265 .regmodrm = true, 1266 .regtorm = true, 1267 .szoverride = false, 1268 .defsize = OPSIZE_BYTE, 1269 .emul = &x86_emul_sub 1270 }, 1271 [0x29] = { 1272 /* Ev, Gv */ 1273 .valid = true, 1274 .regmodrm = true, 1275 .regtorm = true, 1276 .szoverride = true, 1277 .defsize = -1, 1278 .emul = &x86_emul_sub 1279 }, 1280 [0x2A] = { 1281 /* Gb, Eb */ 1282 .valid = true, 1283 .regmodrm = true, 1284 .regtorm = false, 1285 .szoverride = false, 1286 .defsize = OPSIZE_BYTE, 1287 .emul = &x86_emul_sub 1288 }, 1289 [0x2B] = { 1290 /* Gv, Ev */ 1291 .valid = true, 1292 .regmodrm = true, 1293 .regtorm = false, 1294 .szoverride = true, 1295 .defsize = -1, 1296 .emul = &x86_emul_sub 1297 }, 1298 1299 /* 1300 * XOR 1301 */ 1302 [0x30] = { 1303 /* Eb, Gb */ 1304 .valid = true, 1305 .regmodrm = true, 1306 .regtorm = true, 1307 .szoverride = false, 1308 .defsize = OPSIZE_BYTE, 1309 .emul = &x86_emul_xor 1310 }, 1311 [0x31] = { 1312 /* Ev, Gv */ 1313 .valid = true, 1314 .regmodrm = true, 1315 .regtorm = true, 1316 .szoverride = true, 1317 .defsize = -1, 1318 .emul = &x86_emul_xor 1319 }, 1320 [0x32] = { 1321 /* Gb, Eb */ 1322 .valid = true, 1323 .regmodrm = true, 1324 .regtorm = false, 1325 .szoverride = false, 1326 .defsize = OPSIZE_BYTE, 1327 .emul = &x86_emul_xor 1328 }, 1329 [0x33] = { 1330 /* Gv, Ev */ 1331 .valid = true, 1332 .regmodrm = true, 1333 .regtorm = false, 1334 .szoverride = true, 1335 .defsize = -1, 1336 .emul = &x86_emul_xor 1337 }, 1338 1339 /* 1340 * MOV 1341 */ 1342 [0x88] = { 1343 /* Eb, Gb */ 1344 .valid = true, 1345 .regmodrm = true, 1346 .regtorm = true, 1347 .szoverride = false, 1348 .defsize = OPSIZE_BYTE, 1349 .emul = &x86_emul_mov 1350 }, 1351 [0x89] = { 1352 /* Ev, Gv */ 1353 .valid = true, 1354 .regmodrm = true, 1355 .regtorm = true, 1356 .szoverride = true, 1357 .defsize = -1, 1358 .emul = &x86_emul_mov 1359 }, 1360 [0x8A] = { 1361 /* Gb, Eb */ 1362 .valid = true, 1363 .regmodrm = true, 1364 .regtorm = false, 1365 .szoverride = false, 1366 .defsize = OPSIZE_BYTE, 1367 .emul = &x86_emul_mov 1368 }, 1369 [0x8B] = { 1370 /* Gv, Ev */ 1371 .valid = true, 1372 .regmodrm = true, 1373 .regtorm = false, 1374 .szoverride = true, 1375 .defsize = -1, 1376 .emul = &x86_emul_mov 1377 }, 1378 [0xA0] = { 1379 /* AL, Ob */ 1380 .valid = true, 1381 .dmo = true, 1382 .todmo = false, 1383 .szoverride = false, 1384 .defsize = OPSIZE_BYTE, 1385 .emul = &x86_emul_mov 1386 }, 1387 [0xA1] = { 1388 /* rAX, Ov */ 1389 .valid = true, 1390 .dmo = true, 1391 .todmo = false, 1392 .szoverride = true, 1393 .defsize = -1, 1394 .emul = &x86_emul_mov 1395 }, 1396 [0xA2] = { 1397 /* Ob, AL */ 1398 .valid = true, 1399 .dmo = true, 1400 .todmo = true, 1401 .szoverride = false, 1402 .defsize = OPSIZE_BYTE, 1403 .emul = &x86_emul_mov 1404 }, 1405 [0xA3] = { 1406 /* Ov, rAX */ 1407 .valid = true, 1408 .dmo = true, 1409 .todmo = true, 1410 .szoverride = true, 1411 .defsize = -1, 1412 .emul = &x86_emul_mov 1413 }, 1414 1415 /* 1416 * MOVS 1417 */ 1418 [0xA4] = { 1419 /* Yb, Xb */ 1420 .valid = true, 1421 .movs = true, 1422 .szoverride = false, 1423 .defsize = OPSIZE_BYTE, 1424 .emul = &x86_emul_movs 1425 }, 1426 [0xA5] = { 1427 /* Yv, Xv */ 1428 .valid = true, 1429 .movs = true, 1430 .szoverride = true, 1431 .defsize = -1, 1432 .emul = &x86_emul_movs 1433 }, 1434 1435 /* 1436 * STOS 1437 */ 1438 [0xAA] = { 1439 /* Yb, AL */ 1440 .valid = true, 1441 .stos = true, 1442 .szoverride = false, 1443 .defsize = OPSIZE_BYTE, 1444 .emul = &x86_emul_stos 1445 }, 1446 [0xAB] = { 1447 /* Yv, rAX */ 1448 .valid = true, 1449 .stos = true, 1450 .szoverride = true, 1451 .defsize = -1, 1452 .emul = &x86_emul_stos 1453 }, 1454 1455 /* 1456 * LODS 1457 */ 1458 [0xAC] = { 1459 /* AL, Xb */ 1460 .valid = true, 1461 .lods = true, 1462 .szoverride = false, 1463 .defsize = OPSIZE_BYTE, 1464 .emul = &x86_emul_lods 1465 }, 1466 [0xAD] = { 1467 /* rAX, Xv */ 1468 .valid = true, 1469 .lods = true, 1470 .szoverride = true, 1471 .defsize = -1, 1472 .emul = &x86_emul_lods 1473 }, 1474 }; 1475 1476 static const struct x86_opcode secondary_opcode_table[256] __cacheline_aligned = { 1477 /* 1478 * MOVZX 1479 */ 1480 [0xB6] = { 1481 /* Gv, Eb */ 1482 .valid = true, 1483 .regmodrm = true, 1484 .regtorm = false, 1485 .szoverride = true, 1486 .defsize = OPSIZE_BYTE, 1487 .flags = FLAG_ze, 1488 .emul = &x86_emul_mov 1489 }, 1490 [0xB7] = { 1491 /* Gv, Ew */ 1492 .valid = true, 1493 .regmodrm = true, 1494 .regtorm = false, 1495 .szoverride = true, 1496 .defsize = OPSIZE_WORD, 1497 .flags = FLAG_ze, 1498 .emul = &x86_emul_mov 1499 }, 1500 }; 1501 1502 static const struct x86_reg gpr_map__rip = { NVMM_X64_GPR_RIP, 0xFFFFFFFFFFFFFFFF }; 1503 1504 /* [REX-present][enc][opsize] */ 1505 static const struct x86_reg gpr_map__special[2][4][8] __cacheline_aligned = { 1506 [false] = { 1507 /* No REX prefix. */ 1508 [0b00] = { 1509 [0] = { NVMM_X64_GPR_RAX, 0x000000000000FF00 }, /* AH */ 1510 [1] = { NVMM_X64_GPR_RSP, 0x000000000000FFFF }, /* SP */ 1511 [2] = { -1, 0 }, 1512 [3] = { NVMM_X64_GPR_RSP, 0x00000000FFFFFFFF }, /* ESP */ 1513 [4] = { -1, 0 }, 1514 [5] = { -1, 0 }, 1515 [6] = { -1, 0 }, 1516 [7] = { -1, 0 }, 1517 }, 1518 [0b01] = { 1519 [0] = { NVMM_X64_GPR_RCX, 0x000000000000FF00 }, /* CH */ 1520 [1] = { NVMM_X64_GPR_RBP, 0x000000000000FFFF }, /* BP */ 1521 [2] = { -1, 0 }, 1522 [3] = { NVMM_X64_GPR_RBP, 0x00000000FFFFFFFF }, /* EBP */ 1523 [4] = { -1, 0 }, 1524 [5] = { -1, 0 }, 1525 [6] = { -1, 0 }, 1526 [7] = { -1, 0 }, 1527 }, 1528 [0b10] = { 1529 [0] = { NVMM_X64_GPR_RDX, 0x000000000000FF00 }, /* DH */ 1530 [1] = { NVMM_X64_GPR_RSI, 0x000000000000FFFF }, /* SI */ 1531 [2] = { -1, 0 }, 1532 [3] = { NVMM_X64_GPR_RSI, 0x00000000FFFFFFFF }, /* ESI */ 1533 [4] = { -1, 0 }, 1534 [5] = { -1, 0 }, 1535 [6] = { -1, 0 }, 1536 [7] = { -1, 0 }, 1537 }, 1538 [0b11] = { 1539 [0] = { NVMM_X64_GPR_RBX, 0x000000000000FF00 }, /* BH */ 1540 [1] = { NVMM_X64_GPR_RDI, 0x000000000000FFFF }, /* DI */ 1541 [2] = { -1, 0 }, 1542 [3] = { NVMM_X64_GPR_RDI, 0x00000000FFFFFFFF }, /* EDI */ 1543 [4] = { -1, 0 }, 1544 [5] = { -1, 0 }, 1545 [6] = { -1, 0 }, 1546 [7] = { -1, 0 }, 1547 } 1548 }, 1549 [true] = { 1550 /* Has REX prefix. */ 1551 [0b00] = { 1552 [0] = { NVMM_X64_GPR_RSP, 0x00000000000000FF }, /* SPL */ 1553 [1] = { NVMM_X64_GPR_RSP, 0x000000000000FFFF }, /* SP */ 1554 [2] = { -1, 0 }, 1555 [3] = { NVMM_X64_GPR_RSP, 0x00000000FFFFFFFF }, /* ESP */ 1556 [4] = { -1, 0 }, 1557 [5] = { -1, 0 }, 1558 [6] = { -1, 0 }, 1559 [7] = { NVMM_X64_GPR_RSP, 0xFFFFFFFFFFFFFFFF }, /* RSP */ 1560 }, 1561 [0b01] = { 1562 [0] = { NVMM_X64_GPR_RBP, 0x00000000000000FF }, /* BPL */ 1563 [1] = { NVMM_X64_GPR_RBP, 0x000000000000FFFF }, /* BP */ 1564 [2] = { -1, 0 }, 1565 [3] = { NVMM_X64_GPR_RBP, 0x00000000FFFFFFFF }, /* EBP */ 1566 [4] = { -1, 0 }, 1567 [5] = { -1, 0 }, 1568 [6] = { -1, 0 }, 1569 [7] = { NVMM_X64_GPR_RBP, 0xFFFFFFFFFFFFFFFF }, /* RBP */ 1570 }, 1571 [0b10] = { 1572 [0] = { NVMM_X64_GPR_RSI, 0x00000000000000FF }, /* SIL */ 1573 [1] = { NVMM_X64_GPR_RSI, 0x000000000000FFFF }, /* SI */ 1574 [2] = { -1, 0 }, 1575 [3] = { NVMM_X64_GPR_RSI, 0x00000000FFFFFFFF }, /* ESI */ 1576 [4] = { -1, 0 }, 1577 [5] = { -1, 0 }, 1578 [6] = { -1, 0 }, 1579 [7] = { NVMM_X64_GPR_RSI, 0xFFFFFFFFFFFFFFFF }, /* RSI */ 1580 }, 1581 [0b11] = { 1582 [0] = { NVMM_X64_GPR_RDI, 0x00000000000000FF }, /* DIL */ 1583 [1] = { NVMM_X64_GPR_RDI, 0x000000000000FFFF }, /* DI */ 1584 [2] = { -1, 0 }, 1585 [3] = { NVMM_X64_GPR_RDI, 0x00000000FFFFFFFF }, /* EDI */ 1586 [4] = { -1, 0 }, 1587 [5] = { -1, 0 }, 1588 [6] = { -1, 0 }, 1589 [7] = { NVMM_X64_GPR_RDI, 0xFFFFFFFFFFFFFFFF }, /* RDI */ 1590 } 1591 } 1592 }; 1593 1594 /* [depends][enc][size] */ 1595 static const struct x86_reg gpr_map[2][8][8] __cacheline_aligned = { 1596 [false] = { 1597 /* Not extended. */ 1598 [0b000] = { 1599 [0] = { NVMM_X64_GPR_RAX, 0x00000000000000FF }, /* AL */ 1600 [1] = { NVMM_X64_GPR_RAX, 0x000000000000FFFF }, /* AX */ 1601 [2] = { -1, 0 }, 1602 [3] = { NVMM_X64_GPR_RAX, 0x00000000FFFFFFFF }, /* EAX */ 1603 [4] = { -1, 0 }, 1604 [5] = { -1, 0 }, 1605 [6] = { -1, 0 }, 1606 [7] = { NVMM_X64_GPR_RAX, 0xFFFFFFFFFFFFFFFF }, /* RAX */ 1607 }, 1608 [0b001] = { 1609 [0] = { NVMM_X64_GPR_RCX, 0x00000000000000FF }, /* CL */ 1610 [1] = { NVMM_X64_GPR_RCX, 0x000000000000FFFF }, /* CX */ 1611 [2] = { -1, 0 }, 1612 [3] = { NVMM_X64_GPR_RCX, 0x00000000FFFFFFFF }, /* ECX */ 1613 [4] = { -1, 0 }, 1614 [5] = { -1, 0 }, 1615 [6] = { -1, 0 }, 1616 [7] = { NVMM_X64_GPR_RCX, 0xFFFFFFFFFFFFFFFF }, /* RCX */ 1617 }, 1618 [0b010] = { 1619 [0] = { NVMM_X64_GPR_RDX, 0x00000000000000FF }, /* DL */ 1620 [1] = { NVMM_X64_GPR_RDX, 0x000000000000FFFF }, /* DX */ 1621 [2] = { -1, 0 }, 1622 [3] = { NVMM_X64_GPR_RDX, 0x00000000FFFFFFFF }, /* EDX */ 1623 [4] = { -1, 0 }, 1624 [5] = { -1, 0 }, 1625 [6] = { -1, 0 }, 1626 [7] = { NVMM_X64_GPR_RDX, 0xFFFFFFFFFFFFFFFF }, /* RDX */ 1627 }, 1628 [0b011] = { 1629 [0] = { NVMM_X64_GPR_RBX, 0x00000000000000FF }, /* BL */ 1630 [1] = { NVMM_X64_GPR_RBX, 0x000000000000FFFF }, /* BX */ 1631 [2] = { -1, 0 }, 1632 [3] = { NVMM_X64_GPR_RBX, 0x00000000FFFFFFFF }, /* EBX */ 1633 [4] = { -1, 0 }, 1634 [5] = { -1, 0 }, 1635 [6] = { -1, 0 }, 1636 [7] = { NVMM_X64_GPR_RBX, 0xFFFFFFFFFFFFFFFF }, /* RBX */ 1637 }, 1638 [0b100] = { 1639 [0] = { -1, 0 }, /* SPECIAL */ 1640 [1] = { -1, 0 }, /* SPECIAL */ 1641 [2] = { -1, 0 }, 1642 [3] = { -1, 0 }, /* SPECIAL */ 1643 [4] = { -1, 0 }, 1644 [5] = { -1, 0 }, 1645 [6] = { -1, 0 }, 1646 [7] = { -1, 0 }, /* SPECIAL */ 1647 }, 1648 [0b101] = { 1649 [0] = { -1, 0 }, /* SPECIAL */ 1650 [1] = { -1, 0 }, /* SPECIAL */ 1651 [2] = { -1, 0 }, 1652 [3] = { -1, 0 }, /* SPECIAL */ 1653 [4] = { -1, 0 }, 1654 [5] = { -1, 0 }, 1655 [6] = { -1, 0 }, 1656 [7] = { -1, 0 }, /* SPECIAL */ 1657 }, 1658 [0b110] = { 1659 [0] = { -1, 0 }, /* SPECIAL */ 1660 [1] = { -1, 0 }, /* SPECIAL */ 1661 [2] = { -1, 0 }, 1662 [3] = { -1, 0 }, /* SPECIAL */ 1663 [4] = { -1, 0 }, 1664 [5] = { -1, 0 }, 1665 [6] = { -1, 0 }, 1666 [7] = { -1, 0 }, /* SPECIAL */ 1667 }, 1668 [0b111] = { 1669 [0] = { -1, 0 }, /* SPECIAL */ 1670 [1] = { -1, 0 }, /* SPECIAL */ 1671 [2] = { -1, 0 }, 1672 [3] = { -1, 0 }, /* SPECIAL */ 1673 [4] = { -1, 0 }, 1674 [5] = { -1, 0 }, 1675 [6] = { -1, 0 }, 1676 [7] = { -1, 0 }, /* SPECIAL */ 1677 }, 1678 }, 1679 [true] = { 1680 /* Extended. */ 1681 [0b000] = { 1682 [0] = { NVMM_X64_GPR_R8, 0x00000000000000FF }, /* R8B */ 1683 [1] = { NVMM_X64_GPR_R8, 0x000000000000FFFF }, /* R8W */ 1684 [2] = { -1, 0 }, 1685 [3] = { NVMM_X64_GPR_R8, 0x00000000FFFFFFFF }, /* R8D */ 1686 [4] = { -1, 0 }, 1687 [5] = { -1, 0 }, 1688 [6] = { -1, 0 }, 1689 [7] = { NVMM_X64_GPR_R8, 0xFFFFFFFFFFFFFFFF }, /* R8 */ 1690 }, 1691 [0b001] = { 1692 [0] = { NVMM_X64_GPR_R9, 0x00000000000000FF }, /* R9B */ 1693 [1] = { NVMM_X64_GPR_R9, 0x000000000000FFFF }, /* R9W */ 1694 [2] = { -1, 0 }, 1695 [3] = { NVMM_X64_GPR_R9, 0x00000000FFFFFFFF }, /* R9D */ 1696 [4] = { -1, 0 }, 1697 [5] = { -1, 0 }, 1698 [6] = { -1, 0 }, 1699 [7] = { NVMM_X64_GPR_R9, 0xFFFFFFFFFFFFFFFF }, /* R9 */ 1700 }, 1701 [0b010] = { 1702 [0] = { NVMM_X64_GPR_R10, 0x00000000000000FF }, /* R10B */ 1703 [1] = { NVMM_X64_GPR_R10, 0x000000000000FFFF }, /* R10W */ 1704 [2] = { -1, 0 }, 1705 [3] = { NVMM_X64_GPR_R10, 0x00000000FFFFFFFF }, /* R10D */ 1706 [4] = { -1, 0 }, 1707 [5] = { -1, 0 }, 1708 [6] = { -1, 0 }, 1709 [7] = { NVMM_X64_GPR_R10, 0xFFFFFFFFFFFFFFFF }, /* R10 */ 1710 }, 1711 [0b011] = { 1712 [0] = { NVMM_X64_GPR_R11, 0x00000000000000FF }, /* R11B */ 1713 [1] = { NVMM_X64_GPR_R11, 0x000000000000FFFF }, /* R11W */ 1714 [2] = { -1, 0 }, 1715 [3] = { NVMM_X64_GPR_R11, 0x00000000FFFFFFFF }, /* R11D */ 1716 [4] = { -1, 0 }, 1717 [5] = { -1, 0 }, 1718 [6] = { -1, 0 }, 1719 [7] = { NVMM_X64_GPR_R11, 0xFFFFFFFFFFFFFFFF }, /* R11 */ 1720 }, 1721 [0b100] = { 1722 [0] = { NVMM_X64_GPR_R12, 0x00000000000000FF }, /* R12B */ 1723 [1] = { NVMM_X64_GPR_R12, 0x000000000000FFFF }, /* R12W */ 1724 [2] = { -1, 0 }, 1725 [3] = { NVMM_X64_GPR_R12, 0x00000000FFFFFFFF }, /* R12D */ 1726 [4] = { -1, 0 }, 1727 [5] = { -1, 0 }, 1728 [6] = { -1, 0 }, 1729 [7] = { NVMM_X64_GPR_R12, 0xFFFFFFFFFFFFFFFF }, /* R12 */ 1730 }, 1731 [0b101] = { 1732 [0] = { NVMM_X64_GPR_R13, 0x00000000000000FF }, /* R13B */ 1733 [1] = { NVMM_X64_GPR_R13, 0x000000000000FFFF }, /* R13W */ 1734 [2] = { -1, 0 }, 1735 [3] = { NVMM_X64_GPR_R13, 0x00000000FFFFFFFF }, /* R13D */ 1736 [4] = { -1, 0 }, 1737 [5] = { -1, 0 }, 1738 [6] = { -1, 0 }, 1739 [7] = { NVMM_X64_GPR_R13, 0xFFFFFFFFFFFFFFFF }, /* R13 */ 1740 }, 1741 [0b110] = { 1742 [0] = { NVMM_X64_GPR_R14, 0x00000000000000FF }, /* R14B */ 1743 [1] = { NVMM_X64_GPR_R14, 0x000000000000FFFF }, /* R14W */ 1744 [2] = { -1, 0 }, 1745 [3] = { NVMM_X64_GPR_R14, 0x00000000FFFFFFFF }, /* R14D */ 1746 [4] = { -1, 0 }, 1747 [5] = { -1, 0 }, 1748 [6] = { -1, 0 }, 1749 [7] = { NVMM_X64_GPR_R14, 0xFFFFFFFFFFFFFFFF }, /* R14 */ 1750 }, 1751 [0b111] = { 1752 [0] = { NVMM_X64_GPR_R15, 0x00000000000000FF }, /* R15B */ 1753 [1] = { NVMM_X64_GPR_R15, 0x000000000000FFFF }, /* R15W */ 1754 [2] = { -1, 0 }, 1755 [3] = { NVMM_X64_GPR_R15, 0x00000000FFFFFFFF }, /* R15D */ 1756 [4] = { -1, 0 }, 1757 [5] = { -1, 0 }, 1758 [6] = { -1, 0 }, 1759 [7] = { NVMM_X64_GPR_R15, 0xFFFFFFFFFFFFFFFF }, /* R15 */ 1760 }, 1761 } 1762 }; 1763 1764 static int 1765 node_overflow(struct x86_decode_fsm *fsm, struct x86_instr *instr) 1766 { 1767 fsm->fn = NULL; 1768 return -1; 1769 } 1770 1771 static int 1772 fsm_read(struct x86_decode_fsm *fsm, uint8_t *bytes, size_t n) 1773 { 1774 if (fsm->buf + n > fsm->end) { 1775 return -1; 1776 } 1777 memcpy(bytes, fsm->buf, n); 1778 return 0; 1779 } 1780 1781 static inline void 1782 fsm_advance(struct x86_decode_fsm *fsm, size_t n, 1783 int (*fn)(struct x86_decode_fsm *, struct x86_instr *)) 1784 { 1785 fsm->buf += n; 1786 if (fsm->buf > fsm->end) { 1787 fsm->fn = node_overflow; 1788 } else { 1789 fsm->fn = fn; 1790 } 1791 } 1792 1793 static const struct x86_reg * 1794 resolve_special_register(struct x86_instr *instr, uint8_t enc, size_t regsize) 1795 { 1796 enc &= 0b11; 1797 if (regsize == 8) { 1798 /* May be 64bit without REX */ 1799 return &gpr_map__special[1][enc][regsize-1]; 1800 } 1801 return &gpr_map__special[instr->rexpref.present][enc][regsize-1]; 1802 } 1803 1804 /* 1805 * Special node, for MOVS. Fake two displacements of zero on the source and 1806 * destination registers. 1807 */ 1808 static int 1809 node_movs(struct x86_decode_fsm *fsm, struct x86_instr *instr) 1810 { 1811 size_t adrsize; 1812 1813 adrsize = instr->address_size; 1814 1815 /* DS:RSI */ 1816 instr->src.type = STORE_REG; 1817 instr->src.u.reg = &gpr_map__special[1][2][adrsize-1]; 1818 instr->src.disp.type = DISP_0; 1819 1820 /* ES:RDI, force ES */ 1821 instr->dst.type = STORE_REG; 1822 instr->dst.u.reg = &gpr_map__special[1][3][adrsize-1]; 1823 instr->dst.disp.type = DISP_0; 1824 instr->dst.hardseg = NVMM_X64_SEG_ES; 1825 1826 fsm_advance(fsm, 0, NULL); 1827 1828 return 0; 1829 } 1830 1831 /* 1832 * Special node, for STOS and LODS. Fake a displacement of zero on the 1833 * destination register. 1834 */ 1835 static int 1836 node_stlo(struct x86_decode_fsm *fsm, struct x86_instr *instr) 1837 { 1838 const struct x86_opcode *opcode = instr->opcode; 1839 struct x86_store *stlo, *streg; 1840 size_t adrsize, regsize; 1841 1842 adrsize = instr->address_size; 1843 regsize = instr->operand_size; 1844 1845 if (opcode->stos) { 1846 streg = &instr->src; 1847 stlo = &instr->dst; 1848 } else { 1849 streg = &instr->dst; 1850 stlo = &instr->src; 1851 } 1852 1853 streg->type = STORE_REG; 1854 streg->u.reg = &gpr_map[0][0][regsize-1]; /* ?AX */ 1855 1856 stlo->type = STORE_REG; 1857 if (opcode->stos) { 1858 /* ES:RDI, force ES */ 1859 stlo->u.reg = &gpr_map__special[1][3][adrsize-1]; 1860 stlo->hardseg = NVMM_X64_SEG_ES; 1861 } else { 1862 /* DS:RSI */ 1863 stlo->u.reg = &gpr_map__special[1][2][adrsize-1]; 1864 } 1865 stlo->disp.type = DISP_0; 1866 1867 fsm_advance(fsm, 0, NULL); 1868 1869 return 0; 1870 } 1871 1872 static int 1873 node_dmo(struct x86_decode_fsm *fsm, struct x86_instr *instr) 1874 { 1875 const struct x86_opcode *opcode = instr->opcode; 1876 struct x86_store *stdmo, *streg; 1877 size_t adrsize, regsize; 1878 1879 adrsize = instr->address_size; 1880 regsize = instr->operand_size; 1881 1882 if (opcode->todmo) { 1883 streg = &instr->src; 1884 stdmo = &instr->dst; 1885 } else { 1886 streg = &instr->dst; 1887 stdmo = &instr->src; 1888 } 1889 1890 streg->type = STORE_REG; 1891 streg->u.reg = &gpr_map[0][0][regsize-1]; /* ?AX */ 1892 1893 stdmo->type = STORE_DMO; 1894 if (fsm_read(fsm, (uint8_t *)&stdmo->u.dmo, adrsize) == -1) { 1895 return -1; 1896 } 1897 fsm_advance(fsm, adrsize, NULL); 1898 1899 return 0; 1900 } 1901 1902 static inline uint64_t 1903 sign_extend(uint64_t val, int size) 1904 { 1905 if (size == 1) { 1906 if (val & __BIT(7)) 1907 val |= 0xFFFFFFFFFFFFFF00; 1908 } else if (size == 2) { 1909 if (val & __BIT(15)) 1910 val |= 0xFFFFFFFFFFFF0000; 1911 } else if (size == 4) { 1912 if (val & __BIT(31)) 1913 val |= 0xFFFFFFFF00000000; 1914 } 1915 return val; 1916 } 1917 1918 static int 1919 node_immediate(struct x86_decode_fsm *fsm, struct x86_instr *instr) 1920 { 1921 const struct x86_opcode *opcode = instr->opcode; 1922 struct x86_store *store; 1923 uint8_t immsize; 1924 size_t sesize = 0; 1925 1926 /* The immediate is the source */ 1927 store = &instr->src; 1928 immsize = instr->operand_size; 1929 1930 if (opcode->flags & FLAG_imm8) { 1931 sesize = immsize; 1932 immsize = 1; 1933 } else if ((opcode->flags & FLAG_immz) && (immsize == 8)) { 1934 sesize = immsize; 1935 immsize = 4; 1936 } 1937 1938 store->type = STORE_IMM; 1939 if (fsm_read(fsm, (uint8_t *)&store->u.imm.data, immsize) == -1) { 1940 return -1; 1941 } 1942 fsm_advance(fsm, immsize, NULL); 1943 1944 if (sesize != 0) { 1945 store->u.imm.data = sign_extend(store->u.imm.data, sesize); 1946 } 1947 1948 return 0; 1949 } 1950 1951 static int 1952 node_disp(struct x86_decode_fsm *fsm, struct x86_instr *instr) 1953 { 1954 const struct x86_opcode *opcode = instr->opcode; 1955 uint64_t data = 0; 1956 size_t n; 1957 1958 if (instr->strm->disp.type == DISP_1) { 1959 n = 1; 1960 } else { /* DISP4 */ 1961 n = 4; 1962 } 1963 1964 if (fsm_read(fsm, (uint8_t *)&data, n) == -1) { 1965 return -1; 1966 } 1967 1968 if (__predict_true(fsm->is64bit)) { 1969 data = sign_extend(data, n); 1970 } 1971 1972 instr->strm->disp.data = data; 1973 1974 if (opcode->immediate) { 1975 fsm_advance(fsm, n, node_immediate); 1976 } else { 1977 fsm_advance(fsm, n, NULL); 1978 } 1979 1980 return 0; 1981 } 1982 1983 static const struct x86_reg * 1984 get_register_idx(struct x86_instr *instr, uint8_t index) 1985 { 1986 uint8_t enc = index; 1987 const struct x86_reg *reg; 1988 size_t regsize; 1989 1990 regsize = instr->address_size; 1991 reg = &gpr_map[instr->rexpref.x][enc][regsize-1]; 1992 1993 if (reg->num == -1) { 1994 reg = resolve_special_register(instr, enc, regsize); 1995 } 1996 1997 return reg; 1998 } 1999 2000 static const struct x86_reg * 2001 get_register_bas(struct x86_instr *instr, uint8_t base) 2002 { 2003 uint8_t enc = base; 2004 const struct x86_reg *reg; 2005 size_t regsize; 2006 2007 regsize = instr->address_size; 2008 reg = &gpr_map[instr->rexpref.b][enc][regsize-1]; 2009 if (reg->num == -1) { 2010 reg = resolve_special_register(instr, enc, regsize); 2011 } 2012 2013 return reg; 2014 } 2015 2016 static int 2017 node_sib(struct x86_decode_fsm *fsm, struct x86_instr *instr) 2018 { 2019 const struct x86_opcode *opcode; 2020 uint8_t scale, index, base; 2021 bool noindex, nobase; 2022 uint8_t byte; 2023 2024 if (fsm_read(fsm, &byte, sizeof(byte)) == -1) { 2025 return -1; 2026 } 2027 2028 scale = ((byte & 0b11000000) >> 6); 2029 index = ((byte & 0b00111000) >> 3); 2030 base = ((byte & 0b00000111) >> 0); 2031 2032 opcode = instr->opcode; 2033 2034 noindex = false; 2035 nobase = false; 2036 2037 if (index == 0b100 && !instr->rexpref.x) { 2038 /* Special case: the index is null */ 2039 noindex = true; 2040 } 2041 2042 if (instr->regmodrm.mod == 0b00 && base == 0b101) { 2043 /* Special case: the base is null + disp32 */ 2044 instr->strm->disp.type = DISP_4; 2045 nobase = true; 2046 } 2047 2048 instr->strm->type = STORE_SIB; 2049 instr->strm->u.sib.scale = (1 << scale); 2050 if (!noindex) 2051 instr->strm->u.sib.idx = get_register_idx(instr, index); 2052 if (!nobase) 2053 instr->strm->u.sib.bas = get_register_bas(instr, base); 2054 2055 /* May have a displacement, or an immediate */ 2056 if (instr->strm->disp.type == DISP_1 || instr->strm->disp.type == DISP_4) { 2057 fsm_advance(fsm, 1, node_disp); 2058 } else if (opcode->immediate) { 2059 fsm_advance(fsm, 1, node_immediate); 2060 } else { 2061 fsm_advance(fsm, 1, NULL); 2062 } 2063 2064 return 0; 2065 } 2066 2067 static const struct x86_reg * 2068 get_register_reg(struct x86_instr *instr, const struct x86_opcode *opcode) 2069 { 2070 uint8_t enc = instr->regmodrm.reg; 2071 const struct x86_reg *reg; 2072 size_t regsize; 2073 2074 regsize = instr->operand_size; 2075 2076 reg = &gpr_map[instr->rexpref.r][enc][regsize-1]; 2077 if (reg->num == -1) { 2078 reg = resolve_special_register(instr, enc, regsize); 2079 } 2080 2081 return reg; 2082 } 2083 2084 static const struct x86_reg * 2085 get_register_rm(struct x86_instr *instr, const struct x86_opcode *opcode) 2086 { 2087 uint8_t enc = instr->regmodrm.rm; 2088 const struct x86_reg *reg; 2089 size_t regsize; 2090 2091 if (instr->strm->disp.type == DISP_NONE) { 2092 regsize = instr->operand_size; 2093 } else { 2094 /* Indirect access, the size is that of the address. */ 2095 regsize = instr->address_size; 2096 } 2097 2098 reg = &gpr_map[instr->rexpref.b][enc][regsize-1]; 2099 if (reg->num == -1) { 2100 reg = resolve_special_register(instr, enc, regsize); 2101 } 2102 2103 return reg; 2104 } 2105 2106 static inline bool 2107 has_sib(struct x86_instr *instr) 2108 { 2109 return (instr->regmodrm.mod != 3 && instr->regmodrm.rm == 4); 2110 } 2111 2112 static inline bool 2113 is_rip_relative(struct x86_decode_fsm *fsm, struct x86_instr *instr) 2114 { 2115 return (fsm->is64bit && instr->strm->disp.type == DISP_0 && 2116 instr->regmodrm.rm == RM_RBP_DISP32); 2117 } 2118 2119 static inline bool 2120 is_disp32_only(struct x86_decode_fsm *fsm, struct x86_instr *instr) 2121 { 2122 return (!fsm->is64bit && instr->strm->disp.type == DISP_0 && 2123 instr->regmodrm.rm == RM_RBP_DISP32); 2124 } 2125 2126 static enum x86_disp_type 2127 get_disp_type(struct x86_instr *instr) 2128 { 2129 switch (instr->regmodrm.mod) { 2130 case MOD_DIS0: /* indirect */ 2131 return DISP_0; 2132 case MOD_DIS1: /* indirect+1 */ 2133 return DISP_1; 2134 case MOD_DIS4: /* indirect+4 */ 2135 return DISP_4; 2136 case MOD_REG: /* direct */ 2137 default: /* gcc */ 2138 return DISP_NONE; 2139 } 2140 } 2141 2142 static int 2143 node_regmodrm(struct x86_decode_fsm *fsm, struct x86_instr *instr) 2144 { 2145 struct x86_store *strg, *strm; 2146 const struct x86_opcode *opcode; 2147 const struct x86_reg *reg; 2148 uint8_t byte; 2149 2150 if (fsm_read(fsm, &byte, sizeof(byte)) == -1) { 2151 return -1; 2152 } 2153 2154 opcode = instr->opcode; 2155 2156 instr->regmodrm.rm = ((byte & 0b00000111) >> 0); 2157 instr->regmodrm.reg = ((byte & 0b00111000) >> 3); 2158 instr->regmodrm.mod = ((byte & 0b11000000) >> 6); 2159 2160 if (opcode->regtorm) { 2161 strg = &instr->src; 2162 strm = &instr->dst; 2163 } else { /* RM to REG */ 2164 strm = &instr->src; 2165 strg = &instr->dst; 2166 } 2167 2168 /* Save for later use. */ 2169 instr->strm = strm; 2170 2171 /* 2172 * Special cases: Groups. The REG field of REGMODRM is the index in 2173 * the group. op1 gets overwritten in the Immediate node, if any. 2174 */ 2175 if (opcode->group1) { 2176 if (group1[instr->regmodrm.reg].emul == NULL) { 2177 return -1; 2178 } 2179 instr->emul = group1[instr->regmodrm.reg].emul; 2180 } else if (opcode->group3) { 2181 if (group3[instr->regmodrm.reg].emul == NULL) { 2182 return -1; 2183 } 2184 instr->emul = group3[instr->regmodrm.reg].emul; 2185 } else if (opcode->group11) { 2186 if (group11[instr->regmodrm.reg].emul == NULL) { 2187 return -1; 2188 } 2189 instr->emul = group11[instr->regmodrm.reg].emul; 2190 } 2191 2192 if (!opcode->immediate) { 2193 reg = get_register_reg(instr, opcode); 2194 if (reg == NULL) { 2195 return -1; 2196 } 2197 strg->type = STORE_REG; 2198 strg->u.reg = reg; 2199 } 2200 2201 /* The displacement applies to RM. */ 2202 strm->disp.type = get_disp_type(instr); 2203 2204 if (has_sib(instr)) { 2205 /* Overwrites RM */ 2206 fsm_advance(fsm, 1, node_sib); 2207 return 0; 2208 } 2209 2210 if (is_rip_relative(fsm, instr)) { 2211 /* Overwrites RM */ 2212 strm->type = STORE_REG; 2213 strm->u.reg = &gpr_map__rip; 2214 strm->disp.type = DISP_4; 2215 fsm_advance(fsm, 1, node_disp); 2216 return 0; 2217 } 2218 2219 if (is_disp32_only(fsm, instr)) { 2220 /* Overwrites RM */ 2221 strm->type = STORE_REG; 2222 strm->u.reg = NULL; 2223 strm->disp.type = DISP_4; 2224 fsm_advance(fsm, 1, node_disp); 2225 return 0; 2226 } 2227 2228 reg = get_register_rm(instr, opcode); 2229 if (reg == NULL) { 2230 return -1; 2231 } 2232 strm->type = STORE_REG; 2233 strm->u.reg = reg; 2234 2235 if (strm->disp.type == DISP_NONE) { 2236 /* Direct register addressing mode */ 2237 if (opcode->immediate) { 2238 fsm_advance(fsm, 1, node_immediate); 2239 } else { 2240 fsm_advance(fsm, 1, NULL); 2241 } 2242 } else if (strm->disp.type == DISP_0) { 2243 /* Indirect register addressing mode */ 2244 if (opcode->immediate) { 2245 fsm_advance(fsm, 1, node_immediate); 2246 } else { 2247 fsm_advance(fsm, 1, NULL); 2248 } 2249 } else { 2250 fsm_advance(fsm, 1, node_disp); 2251 } 2252 2253 return 0; 2254 } 2255 2256 static size_t 2257 get_operand_size(struct x86_decode_fsm *fsm, struct x86_instr *instr) 2258 { 2259 const struct x86_opcode *opcode = instr->opcode; 2260 int opsize; 2261 2262 /* Get the opsize */ 2263 if (!opcode->szoverride) { 2264 opsize = opcode->defsize; 2265 } else if (instr->rexpref.present && instr->rexpref.w) { 2266 opsize = 8; 2267 } else { 2268 if (!fsm->is16bit) { 2269 if (instr->legpref.opr_ovr) { 2270 opsize = 2; 2271 } else { 2272 opsize = 4; 2273 } 2274 } else { /* 16bit */ 2275 if (instr->legpref.opr_ovr) { 2276 opsize = 4; 2277 } else { 2278 opsize = 2; 2279 } 2280 } 2281 } 2282 2283 return opsize; 2284 } 2285 2286 static size_t 2287 get_address_size(struct x86_decode_fsm *fsm, struct x86_instr *instr) 2288 { 2289 if (fsm->is64bit) { 2290 if (__predict_false(instr->legpref.adr_ovr)) { 2291 return 4; 2292 } 2293 return 8; 2294 } 2295 2296 if (fsm->is32bit) { 2297 if (__predict_false(instr->legpref.adr_ovr)) { 2298 return 2; 2299 } 2300 return 4; 2301 } 2302 2303 /* 16bit. */ 2304 if (__predict_false(instr->legpref.adr_ovr)) { 2305 return 4; 2306 } 2307 return 2; 2308 } 2309 2310 static int 2311 node_primary_opcode(struct x86_decode_fsm *fsm, struct x86_instr *instr) 2312 { 2313 const struct x86_opcode *opcode; 2314 uint8_t byte; 2315 2316 if (fsm_read(fsm, &byte, sizeof(byte)) == -1) { 2317 return -1; 2318 } 2319 2320 opcode = &primary_opcode_table[byte]; 2321 if (__predict_false(!opcode->valid)) { 2322 return -1; 2323 } 2324 2325 instr->opcode = opcode; 2326 instr->emul = opcode->emul; 2327 instr->operand_size = get_operand_size(fsm, instr); 2328 instr->address_size = get_address_size(fsm, instr); 2329 2330 if (fsm->is64bit && (instr->operand_size == 4)) { 2331 /* Zero-extend to 64 bits. */ 2332 instr->zeroextend_mask = ~size_to_mask(4); 2333 } 2334 2335 if (opcode->regmodrm) { 2336 fsm_advance(fsm, 1, node_regmodrm); 2337 } else if (opcode->dmo) { 2338 /* Direct-Memory Offsets */ 2339 fsm_advance(fsm, 1, node_dmo); 2340 } else if (opcode->stos || opcode->lods) { 2341 fsm_advance(fsm, 1, node_stlo); 2342 } else if (opcode->movs) { 2343 fsm_advance(fsm, 1, node_movs); 2344 } else { 2345 return -1; 2346 } 2347 2348 return 0; 2349 } 2350 2351 static int 2352 node_secondary_opcode(struct x86_decode_fsm *fsm, struct x86_instr *instr) 2353 { 2354 const struct x86_opcode *opcode; 2355 uint8_t byte; 2356 2357 if (fsm_read(fsm, &byte, sizeof(byte)) == -1) { 2358 return -1; 2359 } 2360 2361 opcode = &secondary_opcode_table[byte]; 2362 if (__predict_false(!opcode->valid)) { 2363 return -1; 2364 } 2365 2366 instr->opcode = opcode; 2367 instr->emul = opcode->emul; 2368 instr->operand_size = get_operand_size(fsm, instr); 2369 instr->address_size = get_address_size(fsm, instr); 2370 2371 if (fsm->is64bit && (instr->operand_size == 4)) { 2372 /* Zero-extend to 64 bits. */ 2373 instr->zeroextend_mask = ~size_to_mask(4); 2374 } 2375 2376 if (opcode->flags & FLAG_ze) { 2377 /* 2378 * Compute the mask for zero-extend. Update the operand size, 2379 * we move fewer bytes. 2380 */ 2381 instr->zeroextend_mask |= size_to_mask(instr->operand_size); 2382 instr->zeroextend_mask &= ~size_to_mask(opcode->defsize); 2383 instr->operand_size = opcode->defsize; 2384 } 2385 2386 if (opcode->regmodrm) { 2387 fsm_advance(fsm, 1, node_regmodrm); 2388 } else { 2389 return -1; 2390 } 2391 2392 return 0; 2393 } 2394 2395 static int 2396 node_main(struct x86_decode_fsm *fsm, struct x86_instr *instr) 2397 { 2398 uint8_t byte; 2399 2400 #define ESCAPE 0x0F 2401 #define VEX_1 0xC5 2402 #define VEX_2 0xC4 2403 #define XOP 0x8F 2404 2405 if (fsm_read(fsm, &byte, sizeof(byte)) == -1) { 2406 return -1; 2407 } 2408 2409 /* 2410 * We don't take XOP. It is AMD-specific, and it was removed shortly 2411 * after being introduced. 2412 */ 2413 if (byte == ESCAPE) { 2414 fsm_advance(fsm, 1, node_secondary_opcode); 2415 } else if (!instr->rexpref.present) { 2416 if (byte == VEX_1) { 2417 return -1; 2418 } else if (byte == VEX_2) { 2419 return -1; 2420 } else { 2421 fsm->fn = node_primary_opcode; 2422 } 2423 } else { 2424 fsm->fn = node_primary_opcode; 2425 } 2426 2427 return 0; 2428 } 2429 2430 static int 2431 node_rex_prefix(struct x86_decode_fsm *fsm, struct x86_instr *instr) 2432 { 2433 struct x86_rexpref *rexpref = &instr->rexpref; 2434 uint8_t byte; 2435 size_t n = 0; 2436 2437 if (fsm_read(fsm, &byte, sizeof(byte)) == -1) { 2438 return -1; 2439 } 2440 2441 if (byte >= 0x40 && byte <= 0x4F) { 2442 if (__predict_false(!fsm->is64bit)) { 2443 return -1; 2444 } 2445 rexpref->b = ((byte & 0x1) != 0); 2446 rexpref->x = ((byte & 0x2) != 0); 2447 rexpref->r = ((byte & 0x4) != 0); 2448 rexpref->w = ((byte & 0x8) != 0); 2449 rexpref->present = true; 2450 n = 1; 2451 } 2452 2453 fsm_advance(fsm, n, node_main); 2454 return 0; 2455 } 2456 2457 static int 2458 node_legacy_prefix(struct x86_decode_fsm *fsm, struct x86_instr *instr) 2459 { 2460 uint8_t byte; 2461 2462 if (fsm_read(fsm, &byte, sizeof(byte)) == -1) { 2463 return -1; 2464 } 2465 2466 if (byte == LEG_OPR_OVR) { 2467 instr->legpref.opr_ovr = 1; 2468 } else if (byte == LEG_OVR_DS) { 2469 instr->legpref.seg = NVMM_X64_SEG_DS; 2470 } else if (byte == LEG_OVR_ES) { 2471 instr->legpref.seg = NVMM_X64_SEG_ES; 2472 } else if (byte == LEG_REP) { 2473 instr->legpref.rep = 1; 2474 } else if (byte == LEG_OVR_GS) { 2475 instr->legpref.seg = NVMM_X64_SEG_GS; 2476 } else if (byte == LEG_OVR_FS) { 2477 instr->legpref.seg = NVMM_X64_SEG_FS; 2478 } else if (byte == LEG_ADR_OVR) { 2479 instr->legpref.adr_ovr = 1; 2480 } else if (byte == LEG_OVR_CS) { 2481 instr->legpref.seg = NVMM_X64_SEG_CS; 2482 } else if (byte == LEG_OVR_SS) { 2483 instr->legpref.seg = NVMM_X64_SEG_SS; 2484 } else if (byte == LEG_REPN) { 2485 instr->legpref.repn = 1; 2486 } else if (byte == LEG_LOCK) { 2487 /* ignore */ 2488 } else { 2489 /* not a legacy prefix */ 2490 fsm_advance(fsm, 0, node_rex_prefix); 2491 return 0; 2492 } 2493 2494 fsm_advance(fsm, 1, node_legacy_prefix); 2495 return 0; 2496 } 2497 2498 static int 2499 x86_decode(uint8_t *inst_bytes, size_t inst_len, struct x86_instr *instr, 2500 struct nvmm_x64_state *state) 2501 { 2502 struct x86_decode_fsm fsm; 2503 int ret; 2504 2505 memset(instr, 0, sizeof(*instr)); 2506 instr->legpref.seg = -1; 2507 instr->src.hardseg = -1; 2508 instr->dst.hardseg = -1; 2509 2510 fsm.is64bit = is_64bit(state); 2511 fsm.is32bit = is_32bit(state); 2512 fsm.is16bit = is_16bit(state); 2513 2514 fsm.fn = node_legacy_prefix; 2515 fsm.buf = inst_bytes; 2516 fsm.end = inst_bytes + inst_len; 2517 2518 while (fsm.fn != NULL) { 2519 ret = (*fsm.fn)(&fsm, instr); 2520 if (ret == -1) 2521 return -1; 2522 } 2523 2524 instr->len = fsm.buf - inst_bytes; 2525 2526 return 0; 2527 } 2528 2529 /* -------------------------------------------------------------------------- */ 2530 2531 #define EXEC_INSTR(sz, instr) \ 2532 static uint##sz##_t \ 2533 exec_##instr##sz(uint##sz##_t op1, uint##sz##_t op2, uint64_t *rflags) \ 2534 { \ 2535 uint##sz##_t res; \ 2536 __asm __volatile ( \ 2537 #instr " %2, %3;" \ 2538 "mov %3, %1;" \ 2539 "pushfq;" \ 2540 "popq %0" \ 2541 : "=r" (*rflags), "=r" (res) \ 2542 : "r" (op1), "r" (op2)); \ 2543 return res; \ 2544 } 2545 2546 #define EXEC_DISPATCHER(instr) \ 2547 static uint64_t \ 2548 exec_##instr(uint64_t op1, uint64_t op2, uint64_t *rflags, size_t opsize) \ 2549 { \ 2550 switch (opsize) { \ 2551 case 1: \ 2552 return exec_##instr##8(op1, op2, rflags); \ 2553 case 2: \ 2554 return exec_##instr##16(op1, op2, rflags); \ 2555 case 4: \ 2556 return exec_##instr##32(op1, op2, rflags); \ 2557 default: \ 2558 return exec_##instr##64(op1, op2, rflags); \ 2559 } \ 2560 } 2561 2562 /* SUB: ret = op1 - op2 */ 2563 #define PSL_SUB_MASK (PSL_V|PSL_C|PSL_Z|PSL_N|PSL_PF|PSL_AF) 2564 EXEC_INSTR(8, sub) 2565 EXEC_INSTR(16, sub) 2566 EXEC_INSTR(32, sub) 2567 EXEC_INSTR(64, sub) 2568 EXEC_DISPATCHER(sub) 2569 2570 /* OR: ret = op1 | op2 */ 2571 #define PSL_OR_MASK (PSL_V|PSL_C|PSL_Z|PSL_N|PSL_PF) 2572 EXEC_INSTR(8, or) 2573 EXEC_INSTR(16, or) 2574 EXEC_INSTR(32, or) 2575 EXEC_INSTR(64, or) 2576 EXEC_DISPATCHER(or) 2577 2578 /* AND: ret = op1 & op2 */ 2579 #define PSL_AND_MASK (PSL_V|PSL_C|PSL_Z|PSL_N|PSL_PF) 2580 EXEC_INSTR(8, and) 2581 EXEC_INSTR(16, and) 2582 EXEC_INSTR(32, and) 2583 EXEC_INSTR(64, and) 2584 EXEC_DISPATCHER(and) 2585 2586 /* XOR: ret = op1 ^ op2 */ 2587 #define PSL_XOR_MASK (PSL_V|PSL_C|PSL_Z|PSL_N|PSL_PF) 2588 EXEC_INSTR(8, xor) 2589 EXEC_INSTR(16, xor) 2590 EXEC_INSTR(32, xor) 2591 EXEC_INSTR(64, xor) 2592 EXEC_DISPATCHER(xor) 2593 2594 /* -------------------------------------------------------------------------- */ 2595 2596 /* 2597 * Emulation functions. We don't care about the order of the operands, except 2598 * for SUB, CMP and TEST. For these ones we look at mem->write todetermine who 2599 * is op1 and who is op2. 2600 */ 2601 2602 static void 2603 x86_func_or(struct nvmm_machine *mach, struct nvmm_mem *mem, uint64_t *gprs) 2604 { 2605 uint64_t *retval = (uint64_t *)mem->data; 2606 const bool write = mem->write; 2607 uint64_t *op1, op2, fl, ret; 2608 2609 op1 = (uint64_t *)mem->data; 2610 op2 = 0; 2611 2612 /* Fetch the value to be OR'ed (op2). */ 2613 mem->data = (uint8_t *)&op2; 2614 mem->write = false; 2615 (*mach->cbs.mem)(mem); 2616 2617 /* Perform the OR. */ 2618 ret = exec_or(*op1, op2, &fl, mem->size); 2619 2620 if (write) { 2621 /* Write back the result. */ 2622 mem->data = (uint8_t *)&ret; 2623 mem->write = true; 2624 (*mach->cbs.mem)(mem); 2625 } else { 2626 /* Return data to the caller. */ 2627 *retval = ret; 2628 } 2629 2630 gprs[NVMM_X64_GPR_RFLAGS] &= ~PSL_OR_MASK; 2631 gprs[NVMM_X64_GPR_RFLAGS] |= (fl & PSL_OR_MASK); 2632 } 2633 2634 static void 2635 x86_func_and(struct nvmm_machine *mach, struct nvmm_mem *mem, uint64_t *gprs) 2636 { 2637 uint64_t *retval = (uint64_t *)mem->data; 2638 const bool write = mem->write; 2639 uint64_t *op1, op2, fl, ret; 2640 2641 op1 = (uint64_t *)mem->data; 2642 op2 = 0; 2643 2644 /* Fetch the value to be AND'ed (op2). */ 2645 mem->data = (uint8_t *)&op2; 2646 mem->write = false; 2647 (*mach->cbs.mem)(mem); 2648 2649 /* Perform the AND. */ 2650 ret = exec_and(*op1, op2, &fl, mem->size); 2651 2652 if (write) { 2653 /* Write back the result. */ 2654 mem->data = (uint8_t *)&ret; 2655 mem->write = true; 2656 (*mach->cbs.mem)(mem); 2657 } else { 2658 /* Return data to the caller. */ 2659 *retval = ret; 2660 } 2661 2662 gprs[NVMM_X64_GPR_RFLAGS] &= ~PSL_AND_MASK; 2663 gprs[NVMM_X64_GPR_RFLAGS] |= (fl & PSL_AND_MASK); 2664 } 2665 2666 static void 2667 x86_func_sub(struct nvmm_machine *mach, struct nvmm_mem *mem, uint64_t *gprs) 2668 { 2669 uint64_t *retval = (uint64_t *)mem->data; 2670 const bool write = mem->write; 2671 uint64_t *op1, *op2, fl, ret; 2672 uint64_t tmp; 2673 bool memop1; 2674 2675 memop1 = !mem->write; 2676 op1 = memop1 ? &tmp : (uint64_t *)mem->data; 2677 op2 = memop1 ? (uint64_t *)mem->data : &tmp; 2678 2679 /* Fetch the value to be SUB'ed (op1 or op2). */ 2680 mem->data = (uint8_t *)&tmp; 2681 mem->write = false; 2682 (*mach->cbs.mem)(mem); 2683 2684 /* Perform the SUB. */ 2685 ret = exec_sub(*op1, *op2, &fl, mem->size); 2686 2687 if (write) { 2688 /* Write back the result. */ 2689 mem->data = (uint8_t *)&ret; 2690 mem->write = true; 2691 (*mach->cbs.mem)(mem); 2692 } else { 2693 /* Return data to the caller. */ 2694 *retval = ret; 2695 } 2696 2697 gprs[NVMM_X64_GPR_RFLAGS] &= ~PSL_SUB_MASK; 2698 gprs[NVMM_X64_GPR_RFLAGS] |= (fl & PSL_SUB_MASK); 2699 } 2700 2701 static void 2702 x86_func_xor(struct nvmm_machine *mach, struct nvmm_mem *mem, uint64_t *gprs) 2703 { 2704 uint64_t *retval = (uint64_t *)mem->data; 2705 const bool write = mem->write; 2706 uint64_t *op1, op2, fl, ret; 2707 2708 op1 = (uint64_t *)mem->data; 2709 op2 = 0; 2710 2711 /* Fetch the value to be XOR'ed (op2). */ 2712 mem->data = (uint8_t *)&op2; 2713 mem->write = false; 2714 (*mach->cbs.mem)(mem); 2715 2716 /* Perform the XOR. */ 2717 ret = exec_xor(*op1, op2, &fl, mem->size); 2718 2719 if (write) { 2720 /* Write back the result. */ 2721 mem->data = (uint8_t *)&ret; 2722 mem->write = true; 2723 (*mach->cbs.mem)(mem); 2724 } else { 2725 /* Return data to the caller. */ 2726 *retval = ret; 2727 } 2728 2729 gprs[NVMM_X64_GPR_RFLAGS] &= ~PSL_XOR_MASK; 2730 gprs[NVMM_X64_GPR_RFLAGS] |= (fl & PSL_XOR_MASK); 2731 } 2732 2733 static void 2734 x86_func_cmp(struct nvmm_machine *mach, struct nvmm_mem *mem, uint64_t *gprs) 2735 { 2736 uint64_t *op1, *op2, fl; 2737 uint64_t tmp; 2738 bool memop1; 2739 2740 memop1 = !mem->write; 2741 op1 = memop1 ? &tmp : (uint64_t *)mem->data; 2742 op2 = memop1 ? (uint64_t *)mem->data : &tmp; 2743 2744 /* Fetch the value to be CMP'ed (op1 or op2). */ 2745 mem->data = (uint8_t *)&tmp; 2746 mem->write = false; 2747 (*mach->cbs.mem)(mem); 2748 2749 /* Perform the CMP. */ 2750 exec_sub(*op1, *op2, &fl, mem->size); 2751 2752 gprs[NVMM_X64_GPR_RFLAGS] &= ~PSL_SUB_MASK; 2753 gprs[NVMM_X64_GPR_RFLAGS] |= (fl & PSL_SUB_MASK); 2754 } 2755 2756 static void 2757 x86_func_test(struct nvmm_machine *mach, struct nvmm_mem *mem, uint64_t *gprs) 2758 { 2759 uint64_t *op1, *op2, fl; 2760 uint64_t tmp; 2761 bool memop1; 2762 2763 memop1 = !mem->write; 2764 op1 = memop1 ? &tmp : (uint64_t *)mem->data; 2765 op2 = memop1 ? (uint64_t *)mem->data : &tmp; 2766 2767 /* Fetch the value to be TEST'ed (op1 or op2). */ 2768 mem->data = (uint8_t *)&tmp; 2769 mem->write = false; 2770 (*mach->cbs.mem)(mem); 2771 2772 /* Perform the TEST. */ 2773 exec_and(*op1, *op2, &fl, mem->size); 2774 2775 gprs[NVMM_X64_GPR_RFLAGS] &= ~PSL_AND_MASK; 2776 gprs[NVMM_X64_GPR_RFLAGS] |= (fl & PSL_AND_MASK); 2777 } 2778 2779 static void 2780 x86_func_mov(struct nvmm_machine *mach, struct nvmm_mem *mem, uint64_t *gprs) 2781 { 2782 /* 2783 * Nothing special, just move without emulation. 2784 */ 2785 (*mach->cbs.mem)(mem); 2786 } 2787 2788 static void 2789 x86_func_stos(struct nvmm_machine *mach, struct nvmm_mem *mem, uint64_t *gprs) 2790 { 2791 /* 2792 * Just move, and update RDI. 2793 */ 2794 (*mach->cbs.mem)(mem); 2795 2796 if (gprs[NVMM_X64_GPR_RFLAGS] & PSL_D) { 2797 gprs[NVMM_X64_GPR_RDI] -= mem->size; 2798 } else { 2799 gprs[NVMM_X64_GPR_RDI] += mem->size; 2800 } 2801 } 2802 2803 static void 2804 x86_func_lods(struct nvmm_machine *mach, struct nvmm_mem *mem, uint64_t *gprs) 2805 { 2806 /* 2807 * Just move, and update RSI. 2808 */ 2809 (*mach->cbs.mem)(mem); 2810 2811 if (gprs[NVMM_X64_GPR_RFLAGS] & PSL_D) { 2812 gprs[NVMM_X64_GPR_RSI] -= mem->size; 2813 } else { 2814 gprs[NVMM_X64_GPR_RSI] += mem->size; 2815 } 2816 } 2817 2818 static void 2819 x86_func_movs(struct nvmm_machine *mach, struct nvmm_mem *mem, uint64_t *gprs) 2820 { 2821 /* 2822 * Special instruction: double memory operand. Don't call the cb, 2823 * because the storage has already been performed earlier. 2824 */ 2825 2826 if (gprs[NVMM_X64_GPR_RFLAGS] & PSL_D) { 2827 gprs[NVMM_X64_GPR_RSI] -= mem->size; 2828 gprs[NVMM_X64_GPR_RDI] -= mem->size; 2829 } else { 2830 gprs[NVMM_X64_GPR_RSI] += mem->size; 2831 gprs[NVMM_X64_GPR_RDI] += mem->size; 2832 } 2833 } 2834 2835 /* -------------------------------------------------------------------------- */ 2836 2837 static inline uint64_t 2838 gpr_read_address(struct x86_instr *instr, struct nvmm_x64_state *state, int gpr) 2839 { 2840 uint64_t val; 2841 2842 val = state->gprs[gpr]; 2843 val &= size_to_mask(instr->address_size); 2844 2845 return val; 2846 } 2847 2848 static int 2849 store_to_gva(struct nvmm_x64_state *state, struct x86_instr *instr, 2850 struct x86_store *store, gvaddr_t *gvap, size_t size) 2851 { 2852 struct x86_sib *sib; 2853 gvaddr_t gva = 0; 2854 uint64_t reg; 2855 int ret, seg; 2856 2857 if (store->type == STORE_SIB) { 2858 sib = &store->u.sib; 2859 if (sib->bas != NULL) 2860 gva += gpr_read_address(instr, state, sib->bas->num); 2861 if (sib->idx != NULL) { 2862 reg = gpr_read_address(instr, state, sib->idx->num); 2863 gva += sib->scale * reg; 2864 } 2865 } else if (store->type == STORE_REG) { 2866 if (store->u.reg == NULL) { 2867 /* The base is null. Happens with disp32-only. */ 2868 } else { 2869 gva = gpr_read_address(instr, state, store->u.reg->num); 2870 } 2871 } else { 2872 gva = store->u.dmo; 2873 } 2874 2875 if (store->disp.type != DISP_NONE) { 2876 gva += store->disp.data; 2877 } 2878 2879 if (store->hardseg != -1) { 2880 seg = store->hardseg; 2881 } else { 2882 if (__predict_false(instr->legpref.seg != -1)) { 2883 seg = instr->legpref.seg; 2884 } else { 2885 seg = NVMM_X64_SEG_DS; 2886 } 2887 } 2888 2889 if (__predict_true(is_long_mode(state))) { 2890 if (seg == NVMM_X64_SEG_GS || seg == NVMM_X64_SEG_FS) { 2891 segment_apply(&state->segs[seg], &gva); 2892 } 2893 } else { 2894 ret = segment_check(&state->segs[seg], gva, size); 2895 if (ret == -1) 2896 return -1; 2897 segment_apply(&state->segs[seg], &gva); 2898 } 2899 2900 *gvap = gva; 2901 return 0; 2902 } 2903 2904 static int 2905 fetch_segment(struct nvmm_machine *mach, struct nvmm_x64_state *state) 2906 { 2907 uint8_t inst_bytes[5], byte; 2908 size_t i, fetchsize; 2909 gvaddr_t gva; 2910 int ret, seg; 2911 2912 fetchsize = sizeof(inst_bytes); 2913 2914 gva = state->gprs[NVMM_X64_GPR_RIP]; 2915 if (__predict_false(!is_long_mode(state))) { 2916 ret = segment_check(&state->segs[NVMM_X64_SEG_CS], gva, 2917 fetchsize); 2918 if (ret == -1) 2919 return -1; 2920 segment_apply(&state->segs[NVMM_X64_SEG_CS], &gva); 2921 } 2922 2923 ret = read_guest_memory(mach, state, gva, inst_bytes, fetchsize); 2924 if (ret == -1) 2925 return -1; 2926 2927 seg = NVMM_X64_SEG_DS; 2928 for (i = 0; i < fetchsize; i++) { 2929 byte = inst_bytes[i]; 2930 2931 if (byte == LEG_OVR_DS) { 2932 seg = NVMM_X64_SEG_DS; 2933 } else if (byte == LEG_OVR_ES) { 2934 seg = NVMM_X64_SEG_ES; 2935 } else if (byte == LEG_OVR_GS) { 2936 seg = NVMM_X64_SEG_GS; 2937 } else if (byte == LEG_OVR_FS) { 2938 seg = NVMM_X64_SEG_FS; 2939 } else if (byte == LEG_OVR_CS) { 2940 seg = NVMM_X64_SEG_CS; 2941 } else if (byte == LEG_OVR_SS) { 2942 seg = NVMM_X64_SEG_SS; 2943 } else if (byte == LEG_OPR_OVR) { 2944 /* nothing */ 2945 } else if (byte == LEG_ADR_OVR) { 2946 /* nothing */ 2947 } else if (byte == LEG_REP) { 2948 /* nothing */ 2949 } else if (byte == LEG_REPN) { 2950 /* nothing */ 2951 } else if (byte == LEG_LOCK) { 2952 /* nothing */ 2953 } else { 2954 return seg; 2955 } 2956 } 2957 2958 return seg; 2959 } 2960 2961 static int 2962 fetch_instruction(struct nvmm_machine *mach, struct nvmm_x64_state *state, 2963 struct nvmm_exit *exit) 2964 { 2965 size_t fetchsize; 2966 gvaddr_t gva; 2967 int ret; 2968 2969 fetchsize = sizeof(exit->u.mem.inst_bytes); 2970 2971 gva = state->gprs[NVMM_X64_GPR_RIP]; 2972 if (__predict_false(!is_long_mode(state))) { 2973 ret = segment_check(&state->segs[NVMM_X64_SEG_CS], gva, 2974 fetchsize); 2975 if (ret == -1) 2976 return -1; 2977 segment_apply(&state->segs[NVMM_X64_SEG_CS], &gva); 2978 } 2979 2980 ret = read_guest_memory(mach, state, gva, exit->u.mem.inst_bytes, 2981 fetchsize); 2982 if (ret == -1) 2983 return -1; 2984 2985 exit->u.mem.inst_len = fetchsize; 2986 2987 return 0; 2988 } 2989 2990 static int 2991 assist_mem_double(struct nvmm_machine *mach, struct nvmm_x64_state *state, 2992 struct x86_instr *instr) 2993 { 2994 struct nvmm_mem mem; 2995 uint8_t data[8]; 2996 gvaddr_t gva; 2997 size_t size; 2998 int ret; 2999 3000 size = instr->operand_size; 3001 3002 /* Source. */ 3003 ret = store_to_gva(state, instr, &instr->src, &gva, size); 3004 if (ret == -1) 3005 return -1; 3006 ret = read_guest_memory(mach, state, gva, data, size); 3007 if (ret == -1) 3008 return -1; 3009 3010 /* Destination. */ 3011 ret = store_to_gva(state, instr, &instr->dst, &gva, size); 3012 if (ret == -1) 3013 return -1; 3014 ret = write_guest_memory(mach, state, gva, data, size); 3015 if (ret == -1) 3016 return -1; 3017 3018 mem.size = size; 3019 (*instr->emul->func)(mach, &mem, state->gprs); 3020 3021 return 0; 3022 } 3023 3024 #define DISASSEMBLER_BUG() \ 3025 do { \ 3026 errno = EINVAL; \ 3027 return -1; \ 3028 } while (0); 3029 3030 static int 3031 assist_mem_single(struct nvmm_machine *mach, struct nvmm_x64_state *state, 3032 struct x86_instr *instr, struct nvmm_exit *exit) 3033 { 3034 struct nvmm_mem mem; 3035 uint8_t membuf[8]; 3036 uint64_t val; 3037 3038 memset(membuf, 0, sizeof(membuf)); 3039 3040 mem.gpa = exit->u.mem.gpa; 3041 mem.size = instr->operand_size; 3042 mem.data = membuf; 3043 3044 /* Determine the direction. */ 3045 switch (instr->src.type) { 3046 case STORE_REG: 3047 if (instr->src.disp.type != DISP_NONE) { 3048 /* Indirect access. */ 3049 mem.write = false; 3050 } else { 3051 /* Direct access. */ 3052 mem.write = true; 3053 } 3054 break; 3055 case STORE_IMM: 3056 mem.write = true; 3057 break; 3058 case STORE_SIB: 3059 mem.write = false; 3060 break; 3061 case STORE_DMO: 3062 mem.write = false; 3063 break; 3064 default: 3065 DISASSEMBLER_BUG(); 3066 } 3067 3068 if (mem.write) { 3069 switch (instr->src.type) { 3070 case STORE_REG: 3071 if (instr->src.disp.type != DISP_NONE) { 3072 DISASSEMBLER_BUG(); 3073 } 3074 val = state->gprs[instr->src.u.reg->num]; 3075 val = __SHIFTOUT(val, instr->src.u.reg->mask); 3076 memcpy(mem.data, &val, mem.size); 3077 break; 3078 case STORE_IMM: 3079 memcpy(mem.data, &instr->src.u.imm.data, mem.size); 3080 break; 3081 default: 3082 DISASSEMBLER_BUG(); 3083 } 3084 } else if (instr->emul->read) { 3085 if (instr->dst.type != STORE_REG) { 3086 DISASSEMBLER_BUG(); 3087 } 3088 if (instr->dst.disp.type != DISP_NONE) { 3089 DISASSEMBLER_BUG(); 3090 } 3091 val = state->gprs[instr->dst.u.reg->num]; 3092 val = __SHIFTOUT(val, instr->dst.u.reg->mask); 3093 memcpy(mem.data, &val, mem.size); 3094 } 3095 3096 (*instr->emul->func)(mach, &mem, state->gprs); 3097 3098 if (!instr->emul->notouch && !mem.write) { 3099 if (instr->dst.type != STORE_REG) { 3100 DISASSEMBLER_BUG(); 3101 } 3102 memcpy(&val, membuf, sizeof(uint64_t)); 3103 val = __SHIFTIN(val, instr->dst.u.reg->mask); 3104 state->gprs[instr->dst.u.reg->num] &= ~instr->dst.u.reg->mask; 3105 state->gprs[instr->dst.u.reg->num] |= val; 3106 state->gprs[instr->dst.u.reg->num] &= ~instr->zeroextend_mask; 3107 } 3108 3109 return 0; 3110 } 3111 3112 int 3113 nvmm_assist_mem(struct nvmm_machine *mach, struct nvmm_vcpu *vcpu) 3114 { 3115 struct nvmm_x64_state *state = vcpu->state; 3116 struct nvmm_exit *exit = vcpu->exit; 3117 struct x86_instr instr; 3118 uint64_t cnt = 0; /* GCC */ 3119 int ret; 3120 3121 if (__predict_false(exit->reason != NVMM_EXIT_MEMORY)) { 3122 errno = EINVAL; 3123 return -1; 3124 } 3125 3126 ret = nvmm_vcpu_getstate(mach, vcpu, 3127 NVMM_X64_STATE_GPRS | NVMM_X64_STATE_SEGS | 3128 NVMM_X64_STATE_CRS | NVMM_X64_STATE_MSRS); 3129 if (ret == -1) 3130 return -1; 3131 3132 if (exit->u.mem.inst_len == 0) { 3133 /* 3134 * The instruction was not fetched from the kernel. Fetch 3135 * it ourselves. 3136 */ 3137 ret = fetch_instruction(mach, state, exit); 3138 if (ret == -1) 3139 return -1; 3140 } 3141 3142 ret = x86_decode(exit->u.mem.inst_bytes, exit->u.mem.inst_len, 3143 &instr, state); 3144 if (ret == -1) { 3145 errno = ENODEV; 3146 return -1; 3147 } 3148 3149 if (instr.legpref.rep || instr.legpref.repn) { 3150 cnt = rep_get_cnt(state, instr.address_size); 3151 if (__predict_false(cnt == 0)) { 3152 state->gprs[NVMM_X64_GPR_RIP] += instr.len; 3153 goto out; 3154 } 3155 } 3156 3157 if (instr.opcode->movs) { 3158 ret = assist_mem_double(mach, state, &instr); 3159 } else { 3160 ret = assist_mem_single(mach, state, &instr, exit); 3161 } 3162 if (ret == -1) { 3163 errno = ENODEV; 3164 return -1; 3165 } 3166 3167 if (instr.legpref.rep || instr.legpref.repn) { 3168 cnt -= 1; 3169 rep_set_cnt(state, instr.address_size, cnt); 3170 if (cnt == 0) { 3171 state->gprs[NVMM_X64_GPR_RIP] += instr.len; 3172 } else if (__predict_false(instr.legpref.repn)) { 3173 if (state->gprs[NVMM_X64_GPR_RFLAGS] & PSL_Z) { 3174 state->gprs[NVMM_X64_GPR_RIP] += instr.len; 3175 } 3176 } 3177 } else { 3178 state->gprs[NVMM_X64_GPR_RIP] += instr.len; 3179 } 3180 3181 out: 3182 ret = nvmm_vcpu_setstate(mach, vcpu, NVMM_X64_STATE_GPRS); 3183 if (ret == -1) 3184 return -1; 3185 3186 return 0; 3187 } 3188