1 /* $OpenBSD: x86_vm.c,v 1.5 2024/10/02 17:05:56 dv Exp $ */ 2 /* 3 * Copyright (c) 2015 Mike Larkin <mlarkin@openbsd.org> 4 * 5 * Permission to use, copy, modify, and distribute this software for any 6 * purpose with or without fee is hereby granted, provided that the above 7 * copyright notice and this permission notice appear in all copies. 8 * 9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 16 */ 17 18 #include <sys/stat.h> 19 #include <sys/types.h> 20 21 #include <dev/ic/i8253reg.h> 22 #include <dev/isa/isareg.h> 23 24 #include <machine/pte.h> 25 #include <machine/specialreg.h> 26 #include <machine/vmmvar.h> 27 28 #include <errno.h> 29 #include <string.h> 30 #include <unistd.h> 31 32 #include <zlib.h> 33 34 #include "atomicio.h" 35 #include "fw_cfg.h" 36 #include "i8253.h" 37 #include "i8259.h" 38 #include "loadfile.h" 39 #include "mc146818.h" 40 #include "ns8250.h" 41 #include "pci.h" 42 #include "virtio.h" 43 44 typedef uint8_t (*io_fn_t)(struct vm_run_params *); 45 46 #define MAX_PORTS 65536 47 48 io_fn_t ioports_map[MAX_PORTS]; 49 extern char *__progname; 50 51 void create_memory_map(struct vm_create_params *); 52 int translate_gva(struct vm_exit*, uint64_t, uint64_t *, int); 53 54 static int loadfile_bios(gzFile, off_t, struct vcpu_reg_state *); 55 static int vcpu_exit_eptviolation(struct vm_run_params *); 56 static void vcpu_exit_inout(struct vm_run_params *); 57 58 extern struct vmd_vm *current_vm; 59 extern int con_fd; 60 61 /* 62 * Represents a standard register set for an OS to be booted 63 * as a flat 64 bit address space. 64 * 65 * NOT set here are: 66 * RIP 67 * RSP 68 * GDTR BASE 69 * 70 * Specific bootloaders should clone this structure and override 71 * those fields as needed. 72 * 73 * Note - CR3 and various bits in CR0 may be overridden by vmm(4) based on 74 * features of the CPU in use. 75 */ 76 static const struct vcpu_reg_state vcpu_init_flat64 = { 77 .vrs_gprs[VCPU_REGS_RFLAGS] = 0x2, 78 .vrs_gprs[VCPU_REGS_RIP] = 0x0, 79 .vrs_gprs[VCPU_REGS_RSP] = 0x0, 80 .vrs_crs[VCPU_REGS_CR0] = CR0_ET | CR0_PE | CR0_PG, 81 .vrs_crs[VCPU_REGS_CR3] = PML4_PAGE, 82 .vrs_crs[VCPU_REGS_CR4] = CR4_PAE | CR4_PSE, 83 .vrs_crs[VCPU_REGS_PDPTE0] = 0ULL, 84 .vrs_crs[VCPU_REGS_PDPTE1] = 0ULL, 85 .vrs_crs[VCPU_REGS_PDPTE2] = 0ULL, 86 .vrs_crs[VCPU_REGS_PDPTE3] = 0ULL, 87 .vrs_sregs[VCPU_REGS_CS] = { 0x8, 0xFFFFFFFF, 0xC09F, 0x0}, 88 .vrs_sregs[VCPU_REGS_DS] = { 0x10, 0xFFFFFFFF, 0xC093, 0x0}, 89 .vrs_sregs[VCPU_REGS_ES] = { 0x10, 0xFFFFFFFF, 0xC093, 0x0}, 90 .vrs_sregs[VCPU_REGS_FS] = { 0x10, 0xFFFFFFFF, 0xC093, 0x0}, 91 .vrs_sregs[VCPU_REGS_GS] = { 0x10, 0xFFFFFFFF, 0xC093, 0x0}, 92 .vrs_sregs[VCPU_REGS_SS] = { 0x10, 0xFFFFFFFF, 0xC093, 0x0}, 93 .vrs_gdtr = { 0x0, 0xFFFF, 0x0, 0x0}, 94 .vrs_idtr = { 0x0, 0xFFFF, 0x0, 0x0}, 95 .vrs_sregs[VCPU_REGS_LDTR] = { 0x0, 0xFFFF, 0x0082, 0x0}, 96 .vrs_sregs[VCPU_REGS_TR] = { 0x0, 0xFFFF, 0x008B, 0x0}, 97 .vrs_msrs[VCPU_REGS_EFER] = EFER_LME | EFER_LMA, 98 .vrs_drs[VCPU_REGS_DR0] = 0x0, 99 .vrs_drs[VCPU_REGS_DR1] = 0x0, 100 .vrs_drs[VCPU_REGS_DR2] = 0x0, 101 .vrs_drs[VCPU_REGS_DR3] = 0x0, 102 .vrs_drs[VCPU_REGS_DR6] = 0xFFFF0FF0, 103 .vrs_drs[VCPU_REGS_DR7] = 0x400, 104 .vrs_msrs[VCPU_REGS_STAR] = 0ULL, 105 .vrs_msrs[VCPU_REGS_LSTAR] = 0ULL, 106 .vrs_msrs[VCPU_REGS_CSTAR] = 0ULL, 107 .vrs_msrs[VCPU_REGS_SFMASK] = 0ULL, 108 .vrs_msrs[VCPU_REGS_KGSBASE] = 0ULL, 109 .vrs_msrs[VCPU_REGS_MISC_ENABLE] = 0ULL, 110 .vrs_crs[VCPU_REGS_XCR0] = XFEATURE_X87 111 }; 112 113 /* 114 * Represents a standard register set for an BIOS to be booted 115 * as a flat 16 bit address space. 116 */ 117 static const struct vcpu_reg_state vcpu_init_flat16 = { 118 .vrs_gprs[VCPU_REGS_RFLAGS] = 0x2, 119 .vrs_gprs[VCPU_REGS_RIP] = 0xFFF0, 120 .vrs_gprs[VCPU_REGS_RSP] = 0x0, 121 .vrs_crs[VCPU_REGS_CR0] = 0x60000010, 122 .vrs_crs[VCPU_REGS_CR3] = 0, 123 .vrs_sregs[VCPU_REGS_CS] = { 0xF000, 0xFFFF, 0x809F, 0xF0000}, 124 .vrs_sregs[VCPU_REGS_DS] = { 0x0, 0xFFFF, 0x8093, 0x0}, 125 .vrs_sregs[VCPU_REGS_ES] = { 0x0, 0xFFFF, 0x8093, 0x0}, 126 .vrs_sregs[VCPU_REGS_FS] = { 0x0, 0xFFFF, 0x8093, 0x0}, 127 .vrs_sregs[VCPU_REGS_GS] = { 0x0, 0xFFFF, 0x8093, 0x0}, 128 .vrs_sregs[VCPU_REGS_SS] = { 0x0, 0xFFFF, 0x8093, 0x0}, 129 .vrs_gdtr = { 0x0, 0xFFFF, 0x0, 0x0}, 130 .vrs_idtr = { 0x0, 0xFFFF, 0x0, 0x0}, 131 .vrs_sregs[VCPU_REGS_LDTR] = { 0x0, 0xFFFF, 0x0082, 0x0}, 132 .vrs_sregs[VCPU_REGS_TR] = { 0x0, 0xFFFF, 0x008B, 0x0}, 133 .vrs_msrs[VCPU_REGS_EFER] = 0ULL, 134 .vrs_drs[VCPU_REGS_DR0] = 0x0, 135 .vrs_drs[VCPU_REGS_DR1] = 0x0, 136 .vrs_drs[VCPU_REGS_DR2] = 0x0, 137 .vrs_drs[VCPU_REGS_DR3] = 0x0, 138 .vrs_drs[VCPU_REGS_DR6] = 0xFFFF0FF0, 139 .vrs_drs[VCPU_REGS_DR7] = 0x400, 140 .vrs_msrs[VCPU_REGS_STAR] = 0ULL, 141 .vrs_msrs[VCPU_REGS_LSTAR] = 0ULL, 142 .vrs_msrs[VCPU_REGS_CSTAR] = 0ULL, 143 .vrs_msrs[VCPU_REGS_SFMASK] = 0ULL, 144 .vrs_msrs[VCPU_REGS_KGSBASE] = 0ULL, 145 .vrs_crs[VCPU_REGS_XCR0] = XFEATURE_X87 146 }; 147 148 /* 149 * create_memory_map 150 * 151 * Sets up the guest physical memory ranges that the VM can access. 152 * 153 * Parameters: 154 * vcp: VM create parameters describing the VM whose memory map 155 * is being created 156 * 157 * Return values: 158 * nothing 159 */ 160 void 161 create_memory_map(struct vm_create_params *vcp) 162 { 163 size_t len, mem_bytes; 164 size_t above_1m = 0, above_4g = 0; 165 166 mem_bytes = vcp->vcp_memranges[0].vmr_size; 167 vcp->vcp_nmemranges = 0; 168 if (mem_bytes == 0 || mem_bytes > VMM_MAX_VM_MEM_SIZE) 169 return; 170 171 /* First memory region: 0 - LOWMEM_KB (DOS low mem) */ 172 len = LOWMEM_KB * 1024; 173 vcp->vcp_memranges[0].vmr_gpa = 0x0; 174 vcp->vcp_memranges[0].vmr_size = len; 175 vcp->vcp_memranges[0].vmr_type = VM_MEM_RAM; 176 mem_bytes -= len; 177 178 /* 179 * Second memory region: LOWMEM_KB - 1MB. 180 * 181 * N.B. - Normally ROMs or parts of video RAM are mapped here. 182 * We have to add this region, because some systems 183 * unconditionally write to 0xb8000 (VGA RAM), and 184 * we need to make sure that vmm(4) permits accesses 185 * to it. So allocate guest memory for it. 186 */ 187 len = MB(1) - (LOWMEM_KB * 1024); 188 vcp->vcp_memranges[1].vmr_gpa = LOWMEM_KB * 1024; 189 vcp->vcp_memranges[1].vmr_size = len; 190 vcp->vcp_memranges[1].vmr_type = VM_MEM_RESERVED; 191 mem_bytes -= len; 192 193 /* If we have less than 2MB remaining, still create a 2nd BIOS area. */ 194 if (mem_bytes <= MB(2)) { 195 vcp->vcp_memranges[2].vmr_gpa = PCI_MMIO_BAR_END; 196 vcp->vcp_memranges[2].vmr_size = MB(2); 197 vcp->vcp_memranges[2].vmr_type = VM_MEM_RESERVED; 198 vcp->vcp_nmemranges = 3; 199 return; 200 } 201 202 /* 203 * Calculate the how to split any remaining memory across the 4GB 204 * boundary while making sure we do not place physical memory into 205 * MMIO ranges. 206 */ 207 if (mem_bytes > PCI_MMIO_BAR_BASE - MB(1)) { 208 above_1m = PCI_MMIO_BAR_BASE - MB(1); 209 above_4g = mem_bytes - above_1m; 210 } else { 211 above_1m = mem_bytes; 212 above_4g = 0; 213 } 214 215 /* Third memory region: area above 1MB to MMIO region */ 216 vcp->vcp_memranges[2].vmr_gpa = MB(1); 217 vcp->vcp_memranges[2].vmr_size = above_1m; 218 vcp->vcp_memranges[2].vmr_type = VM_MEM_RAM; 219 220 /* Fourth region: PCI MMIO range */ 221 vcp->vcp_memranges[3].vmr_gpa = PCI_MMIO_BAR_BASE; 222 vcp->vcp_memranges[3].vmr_size = PCI_MMIO_BAR_END - 223 PCI_MMIO_BAR_BASE + 1; 224 vcp->vcp_memranges[3].vmr_type = VM_MEM_MMIO; 225 226 /* Fifth region: 2nd copy of BIOS above MMIO ending at 4GB */ 227 vcp->vcp_memranges[4].vmr_gpa = PCI_MMIO_BAR_END + 1; 228 vcp->vcp_memranges[4].vmr_size = MB(2); 229 vcp->vcp_memranges[4].vmr_type = VM_MEM_RESERVED; 230 231 /* Sixth region: any remainder above 4GB */ 232 if (above_4g > 0) { 233 vcp->vcp_memranges[5].vmr_gpa = GB(4); 234 vcp->vcp_memranges[5].vmr_size = above_4g; 235 vcp->vcp_memranges[5].vmr_type = VM_MEM_RAM; 236 vcp->vcp_nmemranges = 6; 237 } else 238 vcp->vcp_nmemranges = 5; 239 } 240 241 int 242 load_firmware(struct vmd_vm *vm, struct vcpu_reg_state *vrs) 243 { 244 int ret; 245 gzFile fp; 246 struct stat sb; 247 248 /* 249 * Set up default "flat 64 bit" register state - RIP, RSP, and 250 * GDT info will be set in bootloader 251 */ 252 memcpy(vrs, &vcpu_init_flat64, sizeof(*vrs)); 253 254 /* Find and open kernel image */ 255 if ((fp = gzdopen(vm->vm_kernel, "r")) == NULL) 256 fatalx("failed to open kernel - exiting"); 257 258 /* Load kernel image */ 259 ret = loadfile_elf(fp, vm, vrs, vm->vm_params.vmc_bootdevice); 260 261 /* 262 * Try BIOS as a fallback (only if it was provided as an image 263 * with vm->vm_kernel and the file is not compressed) 264 */ 265 if (ret && errno == ENOEXEC && vm->vm_kernel != -1 && 266 gzdirect(fp) && (ret = fstat(vm->vm_kernel, &sb)) == 0) 267 ret = loadfile_bios(fp, sb.st_size, vrs); 268 269 gzclose(fp); 270 271 return (ret); 272 } 273 274 275 /* 276 * loadfile_bios 277 * 278 * Alternatively to loadfile_elf, this function loads a non-ELF BIOS image 279 * directly into memory. 280 * 281 * Parameters: 282 * fp: file of a kernel file to load 283 * size: uncompressed size of the image 284 * (out) vrs: register state to set on init for this kernel 285 * 286 * Return values: 287 * 0 if successful 288 * various error codes returned from read(2) or loadelf functions 289 */ 290 int 291 loadfile_bios(gzFile fp, off_t size, struct vcpu_reg_state *vrs) 292 { 293 off_t off; 294 295 /* Set up a "flat 16 bit" register state for BIOS */ 296 memcpy(vrs, &vcpu_init_flat16, sizeof(*vrs)); 297 298 /* Seek to the beginning of the BIOS image */ 299 if (gzseek(fp, 0, SEEK_SET) == -1) 300 return (-1); 301 302 /* The BIOS image must end at 1MB */ 303 if ((off = MB(1) - size) < 0) 304 return (-1); 305 306 /* Read BIOS image into memory */ 307 if (mread(fp, off, size) != (size_t)size) { 308 errno = EIO; 309 return (-1); 310 } 311 312 if (gzseek(fp, 0, SEEK_SET) == -1) 313 return (-1); 314 315 /* Read a second BIOS copy into memory ending at 4GB */ 316 off = GB(4) - size; 317 if (mread(fp, off, size) != (size_t)size) { 318 errno = EIO; 319 return (-1); 320 } 321 322 log_debug("%s: loaded BIOS image", __func__); 323 324 return (0); 325 } 326 327 /* 328 * init_emulated_hw 329 * 330 * Initializes the userspace hardware emulation 331 */ 332 void 333 init_emulated_hw(struct vmop_create_params *vmc, int child_cdrom, 334 int child_disks[][VM_MAX_BASE_PER_DISK], int *child_taps) 335 { 336 struct vm_create_params *vcp = &vmc->vmc_params; 337 size_t i; 338 uint64_t memlo, memhi; 339 340 /* Calculate memory size for NVRAM registers */ 341 memlo = memhi = 0; 342 for (i = 0; i < vcp->vcp_nmemranges; i++) { 343 if (vcp->vcp_memranges[i].vmr_gpa == MB(1) && 344 vcp->vcp_memranges[i].vmr_size > (15 * MB(1))) 345 memlo = vcp->vcp_memranges[i].vmr_size - (15 * MB(1)); 346 else if (vcp->vcp_memranges[i].vmr_gpa == GB(4)) 347 memhi = vcp->vcp_memranges[i].vmr_size; 348 } 349 350 /* Reset the IO port map */ 351 memset(&ioports_map, 0, sizeof(io_fn_t) * MAX_PORTS); 352 353 /* Init i8253 PIT */ 354 i8253_init(vcp->vcp_id); 355 ioports_map[TIMER_CTRL] = vcpu_exit_i8253; 356 ioports_map[TIMER_BASE + TIMER_CNTR0] = vcpu_exit_i8253; 357 ioports_map[TIMER_BASE + TIMER_CNTR1] = vcpu_exit_i8253; 358 ioports_map[TIMER_BASE + TIMER_CNTR2] = vcpu_exit_i8253; 359 ioports_map[PCKBC_AUX] = vcpu_exit_i8253_misc; 360 361 /* Init mc146818 RTC */ 362 mc146818_init(vcp->vcp_id, memlo, memhi); 363 ioports_map[IO_RTC] = vcpu_exit_mc146818; 364 ioports_map[IO_RTC + 1] = vcpu_exit_mc146818; 365 366 /* Init master and slave PICs */ 367 i8259_init(); 368 ioports_map[IO_ICU1] = vcpu_exit_i8259; 369 ioports_map[IO_ICU1 + 1] = vcpu_exit_i8259; 370 ioports_map[IO_ICU2] = vcpu_exit_i8259; 371 ioports_map[IO_ICU2 + 1] = vcpu_exit_i8259; 372 ioports_map[ELCR0] = vcpu_exit_elcr; 373 ioports_map[ELCR1] = vcpu_exit_elcr; 374 375 /* Init ns8250 UART */ 376 ns8250_init(con_fd, vcp->vcp_id); 377 for (i = COM1_DATA; i <= COM1_SCR; i++) 378 ioports_map[i] = vcpu_exit_com; 379 380 /* Initialize PCI */ 381 for (i = VM_PCI_IO_BAR_BASE; i <= VM_PCI_IO_BAR_END; i++) 382 ioports_map[i] = vcpu_exit_pci; 383 384 ioports_map[PCI_MODE1_ADDRESS_REG] = vcpu_exit_pci; 385 ioports_map[PCI_MODE1_DATA_REG] = vcpu_exit_pci; 386 ioports_map[PCI_MODE1_DATA_REG + 1] = vcpu_exit_pci; 387 ioports_map[PCI_MODE1_DATA_REG + 2] = vcpu_exit_pci; 388 ioports_map[PCI_MODE1_DATA_REG + 3] = vcpu_exit_pci; 389 pci_init(); 390 391 /* Initialize virtio devices */ 392 virtio_init(current_vm, child_cdrom, child_disks, child_taps); 393 394 /* 395 * Init QEMU fw_cfg interface. Must be done last for pci hardware 396 * detection. 397 */ 398 fw_cfg_init(vmc); 399 ioports_map[FW_CFG_IO_SELECT] = vcpu_exit_fw_cfg; 400 ioports_map[FW_CFG_IO_DATA] = vcpu_exit_fw_cfg; 401 ioports_map[FW_CFG_IO_DMA_ADDR_HIGH] = vcpu_exit_fw_cfg_dma; 402 ioports_map[FW_CFG_IO_DMA_ADDR_LOW] = vcpu_exit_fw_cfg_dma; 403 } 404 405 /* 406 * restore_emulated_hw 407 * 408 * Restores the userspace hardware emulation from fd 409 */ 410 void 411 restore_emulated_hw(struct vm_create_params *vcp, int fd, 412 int *child_taps, int child_disks[][VM_MAX_BASE_PER_DISK], int child_cdrom) 413 { 414 /* struct vm_create_params *vcp = &vmc->vmc_params; */ 415 int i; 416 memset(&ioports_map, 0, sizeof(io_fn_t) * MAX_PORTS); 417 418 /* Init i8253 PIT */ 419 i8253_restore(fd, vcp->vcp_id); 420 ioports_map[TIMER_CTRL] = vcpu_exit_i8253; 421 ioports_map[TIMER_BASE + TIMER_CNTR0] = vcpu_exit_i8253; 422 ioports_map[TIMER_BASE + TIMER_CNTR1] = vcpu_exit_i8253; 423 ioports_map[TIMER_BASE + TIMER_CNTR2] = vcpu_exit_i8253; 424 425 /* Init master and slave PICs */ 426 i8259_restore(fd); 427 ioports_map[IO_ICU1] = vcpu_exit_i8259; 428 ioports_map[IO_ICU1 + 1] = vcpu_exit_i8259; 429 ioports_map[IO_ICU2] = vcpu_exit_i8259; 430 ioports_map[IO_ICU2 + 1] = vcpu_exit_i8259; 431 432 /* Init ns8250 UART */ 433 ns8250_restore(fd, con_fd, vcp->vcp_id); 434 for (i = COM1_DATA; i <= COM1_SCR; i++) 435 ioports_map[i] = vcpu_exit_com; 436 437 /* Init mc146818 RTC */ 438 mc146818_restore(fd, vcp->vcp_id); 439 ioports_map[IO_RTC] = vcpu_exit_mc146818; 440 ioports_map[IO_RTC + 1] = vcpu_exit_mc146818; 441 442 /* Init QEMU fw_cfg interface */ 443 fw_cfg_restore(fd); 444 ioports_map[FW_CFG_IO_SELECT] = vcpu_exit_fw_cfg; 445 ioports_map[FW_CFG_IO_DATA] = vcpu_exit_fw_cfg; 446 ioports_map[FW_CFG_IO_DMA_ADDR_HIGH] = vcpu_exit_fw_cfg_dma; 447 ioports_map[FW_CFG_IO_DMA_ADDR_LOW] = vcpu_exit_fw_cfg_dma; 448 449 /* Initialize PCI */ 450 for (i = VM_PCI_IO_BAR_BASE; i <= VM_PCI_IO_BAR_END; i++) 451 ioports_map[i] = vcpu_exit_pci; 452 453 ioports_map[PCI_MODE1_ADDRESS_REG] = vcpu_exit_pci; 454 ioports_map[PCI_MODE1_DATA_REG] = vcpu_exit_pci; 455 ioports_map[PCI_MODE1_DATA_REG + 1] = vcpu_exit_pci; 456 ioports_map[PCI_MODE1_DATA_REG + 2] = vcpu_exit_pci; 457 ioports_map[PCI_MODE1_DATA_REG + 3] = vcpu_exit_pci; 458 pci_restore(fd); 459 virtio_restore(fd, current_vm, child_cdrom, child_disks, child_taps); 460 } 461 462 void 463 pause_vm_md(struct vmd_vm *vm) 464 { 465 i8253_stop(); 466 mc146818_stop(); 467 ns8250_stop(); 468 virtio_stop(vm); 469 } 470 471 void 472 unpause_vm_md(struct vmd_vm *vm) 473 { 474 i8253_start(); 475 mc146818_start(); 476 ns8250_start(); 477 virtio_start(vm); 478 } 479 480 int 481 dump_devs(int fd) 482 { 483 int ret = 0; 484 485 if ((ret = i8253_dump(fd))) 486 return ret; 487 if ((ret = i8259_dump(fd))) 488 return ret; 489 if ((ret = ns8250_dump(fd))) 490 return ret; 491 if ((ret = mc146818_dump(fd))) 492 return ret; 493 ret = fw_cfg_dump(fd); 494 495 return ret; 496 } 497 498 int 499 dump_send_header(int fd) { 500 struct vm_dump_header vmh; 501 int i; 502 503 memcpy(&vmh.vmh_signature, VM_DUMP_SIGNATURE, 504 sizeof(vmh.vmh_signature)); 505 506 vmh.vmh_cpuids[0].code = 0x00; 507 vmh.vmh_cpuids[0].leaf = 0x00; 508 509 vmh.vmh_cpuids[1].code = 0x01; 510 vmh.vmh_cpuids[1].leaf = 0x00; 511 512 vmh.vmh_cpuids[2].code = 0x07; 513 vmh.vmh_cpuids[2].leaf = 0x00; 514 515 vmh.vmh_cpuids[3].code = 0x0d; 516 vmh.vmh_cpuids[3].leaf = 0x00; 517 518 vmh.vmh_cpuids[4].code = 0x80000001; 519 vmh.vmh_cpuids[4].leaf = 0x00; 520 521 vmh.vmh_version = VM_DUMP_VERSION; 522 523 for (i=0; i < VM_DUMP_HEADER_CPUID_COUNT; i++) { 524 CPUID_LEAF(vmh.vmh_cpuids[i].code, 525 vmh.vmh_cpuids[i].leaf, 526 vmh.vmh_cpuids[i].a, 527 vmh.vmh_cpuids[i].b, 528 vmh.vmh_cpuids[i].c, 529 vmh.vmh_cpuids[i].d); 530 } 531 532 if (atomicio(vwrite, fd, &vmh, sizeof(vmh)) != sizeof(vmh)) 533 return (-1); 534 535 return (0); 536 } 537 538 539 /* 540 * vcpu_exit_inout 541 * 542 * Handle all I/O exits that need to be emulated in vmd. This includes the 543 * i8253 PIT, the com1 ns8250 UART, and the MC146818 RTC/NVRAM device. 544 * 545 * Parameters: 546 * vrp: vcpu run parameters containing guest state for this exit 547 */ 548 void 549 vcpu_exit_inout(struct vm_run_params *vrp) 550 { 551 struct vm_exit *vei = vrp->vrp_exit; 552 uint8_t intr = 0xFF; 553 554 if (vei->vei.vei_rep || vei->vei.vei_string) { 555 #ifdef MMIO_DEBUG 556 log_info("%s: %s%s%s %d-byte, enc=%d, data=0x%08x, port=0x%04x", 557 __func__, 558 vei->vei.vei_rep == 0 ? "" : "REP ", 559 vei->vei.vei_dir == VEI_DIR_IN ? "IN" : "OUT", 560 vei->vei.vei_string == 0 ? "" : "S", 561 vei->vei.vei_size, vei->vei.vei_encoding, 562 vei->vei.vei_data, vei->vei.vei_port); 563 log_info("%s: ECX = 0x%llx, RDX = 0x%llx, RSI = 0x%llx", 564 __func__, 565 vei->vrs.vrs_gprs[VCPU_REGS_RCX], 566 vei->vrs.vrs_gprs[VCPU_REGS_RDX], 567 vei->vrs.vrs_gprs[VCPU_REGS_RSI]); 568 #endif /* MMIO_DEBUG */ 569 fatalx("%s: can't emulate REP prefixed IN(S)/OUT(S)", 570 __func__); 571 } 572 573 if (ioports_map[vei->vei.vei_port] != NULL) 574 intr = ioports_map[vei->vei.vei_port](vrp); 575 else if (vei->vei.vei_dir == VEI_DIR_IN) 576 set_return_data(vei, 0xFFFFFFFF); 577 578 vei->vrs.vrs_gprs[VCPU_REGS_RIP] += vei->vei.vei_insn_len; 579 580 if (intr != 0xFF) 581 vcpu_assert_irq(vrp->vrp_vm_id, vrp->vrp_vcpu_id, intr); 582 } 583 584 /* 585 * vcpu_exit 586 * 587 * Handle a vcpu exit. This function is called when it is determined that 588 * vmm(4) requires the assistance of vmd to support a particular guest 589 * exit type (eg, accessing an I/O port or device). Guest state is contained 590 * in 'vrp', and will be resent to vmm(4) on exit completion. 591 * 592 * Upon conclusion of handling the exit, the function determines if any 593 * interrupts should be injected into the guest, and asserts the proper 594 * IRQ line whose interrupt should be vectored. 595 * 596 * Parameters: 597 * vrp: vcpu run parameters containing guest state for this exit 598 * 599 * Return values: 600 * 0: the exit was handled successfully 601 * 1: an error occurred (eg, unknown exit reason passed in 'vrp') 602 */ 603 int 604 vcpu_exit(struct vm_run_params *vrp) 605 { 606 int ret; 607 608 switch (vrp->vrp_exit_reason) { 609 case VMX_EXIT_INT_WINDOW: 610 case SVM_VMEXIT_VINTR: 611 case VMX_EXIT_CPUID: 612 case VMX_EXIT_EXTINT: 613 case SVM_VMEXIT_INTR: 614 case SVM_VMEXIT_MSR: 615 case SVM_VMEXIT_CPUID: 616 /* 617 * We may be exiting to vmd to handle a pending interrupt but 618 * at the same time the last exit type may have been one of 619 * these. In this case, there's nothing extra to be done 620 * here (and falling through to the default case below results 621 * in more vmd log spam). 622 */ 623 break; 624 case SVM_VMEXIT_NPF: 625 case VMX_EXIT_EPT_VIOLATION: 626 ret = vcpu_exit_eptviolation(vrp); 627 if (ret) 628 return (ret); 629 break; 630 case VMX_EXIT_IO: 631 case SVM_VMEXIT_IOIO: 632 vcpu_exit_inout(vrp); 633 break; 634 case VMX_EXIT_HLT: 635 case SVM_VMEXIT_HLT: 636 vcpu_halt(vrp->vrp_vcpu_id); 637 break; 638 case VMX_EXIT_TRIPLE_FAULT: 639 case SVM_VMEXIT_SHUTDOWN: 640 /* reset VM */ 641 return (EAGAIN); 642 default: 643 log_debug("%s: unknown exit reason 0x%x", 644 __progname, vrp->vrp_exit_reason); 645 } 646 647 return (0); 648 } 649 650 /* 651 * vcpu_exit_eptviolation 652 * 653 * handle an EPT Violation 654 * 655 * Parameters: 656 * vrp: vcpu run parameters containing guest state for this exit 657 * 658 * Return values: 659 * 0: no action required 660 * EFAULT: a protection fault occured, kill the vm. 661 */ 662 static int 663 vcpu_exit_eptviolation(struct vm_run_params *vrp) 664 { 665 struct vm_exit *ve = vrp->vrp_exit; 666 int ret = 0; 667 #if MMIO_NOTYET 668 struct x86_insn insn; 669 uint64_t va, pa; 670 size_t len = 15; /* Max instruction length in x86. */ 671 #endif /* MMIO_NOTYET */ 672 switch (ve->vee.vee_fault_type) { 673 case VEE_FAULT_HANDLED: 674 break; 675 676 #if MMIO_NOTYET 677 case VEE_FAULT_MMIO_ASSIST: 678 /* Intel VMX might give us the length of the instruction. */ 679 if (ve->vee.vee_insn_info & VEE_LEN_VALID) 680 len = ve->vee.vee_insn_len; 681 682 if (len > 15) 683 fatalx("%s: invalid instruction length %lu", __func__, 684 len); 685 686 /* If we weren't given instruction bytes, we need to fetch. */ 687 if (!(ve->vee.vee_insn_info & VEE_BYTES_VALID)) { 688 memset(ve->vee.vee_insn_bytes, 0, 689 sizeof(ve->vee.vee_insn_bytes)); 690 va = ve->vrs.vrs_gprs[VCPU_REGS_RIP]; 691 692 /* XXX Only support instructions that fit on 1 page. */ 693 if ((va & PAGE_MASK) + len > PAGE_SIZE) { 694 log_warnx("%s: instruction might cross page " 695 "boundary", __func__); 696 ret = EINVAL; 697 break; 698 } 699 700 ret = translate_gva(ve, va, &pa, PROT_EXEC); 701 if (ret != 0) { 702 log_warnx("%s: failed gva translation", 703 __func__); 704 break; 705 } 706 707 ret = read_mem(pa, ve->vee.vee_insn_bytes, len); 708 if (ret != 0) { 709 log_warnx("%s: failed to fetch instruction " 710 "bytes from 0x%llx", __func__, pa); 711 break; 712 } 713 } 714 715 ret = insn_decode(ve, &insn); 716 if (ret == 0) 717 ret = insn_emulate(ve, &insn); 718 break; 719 #endif /* MMIO_NOTYET */ 720 721 case VEE_FAULT_PROTECT: 722 log_debug("%s: EPT Violation: rip=0x%llx", __progname, 723 ve->vrs.vrs_gprs[VCPU_REGS_RIP]); 724 ret = EFAULT; 725 break; 726 727 default: 728 fatalx("%s: invalid fault_type %d", __progname, 729 ve->vee.vee_fault_type); 730 /* UNREACHED */ 731 } 732 733 return (ret); 734 } 735 736 /* 737 * vcpu_exit_pci 738 * 739 * Handle all I/O to the emulated PCI subsystem. 740 * 741 * Parameters: 742 * vrp: vcpu run parameters containing guest state for this exit 743 * 744 * Return value: 745 * Interrupt to inject to the guest VM, or 0xFF if no interrupt should 746 * be injected. 747 */ 748 uint8_t 749 vcpu_exit_pci(struct vm_run_params *vrp) 750 { 751 struct vm_exit *vei = vrp->vrp_exit; 752 uint8_t intr; 753 754 intr = 0xFF; 755 756 switch (vei->vei.vei_port) { 757 case PCI_MODE1_ADDRESS_REG: 758 pci_handle_address_reg(vrp); 759 break; 760 case PCI_MODE1_DATA_REG: 761 case PCI_MODE1_DATA_REG + 1: 762 case PCI_MODE1_DATA_REG + 2: 763 case PCI_MODE1_DATA_REG + 3: 764 pci_handle_data_reg(vrp); 765 break; 766 case VM_PCI_IO_BAR_BASE ... VM_PCI_IO_BAR_END: 767 intr = pci_handle_io(vrp); 768 break; 769 default: 770 log_warnx("%s: unknown PCI register 0x%llx", 771 __progname, (uint64_t)vei->vei.vei_port); 772 break; 773 } 774 775 return (intr); 776 } 777 778 /* 779 * find_gpa_range 780 * 781 * Search for a contiguous guest physical mem range. 782 * 783 * Parameters: 784 * vcp: VM create parameters that contain the memory map to search in 785 * gpa: the starting guest physical address 786 * len: the length of the memory range 787 * 788 * Return values: 789 * NULL: on failure if there is no memory range as described by the parameters 790 * Pointer to vm_mem_range that contains the start of the range otherwise. 791 */ 792 struct vm_mem_range * 793 find_gpa_range(struct vm_create_params *vcp, paddr_t gpa, size_t len) 794 { 795 size_t i, n; 796 struct vm_mem_range *vmr; 797 798 /* Find the first vm_mem_range that contains gpa */ 799 for (i = 0; i < vcp->vcp_nmemranges; i++) { 800 vmr = &vcp->vcp_memranges[i]; 801 if (gpa < vmr->vmr_gpa + vmr->vmr_size) 802 break; 803 } 804 805 /* No range found. */ 806 if (i == vcp->vcp_nmemranges) 807 return (NULL); 808 809 /* 810 * vmr may cover the range [gpa, gpa + len) only partly. Make 811 * sure that the following vm_mem_ranges are contiguous and 812 * cover the rest. 813 */ 814 n = vmr->vmr_size - (gpa - vmr->vmr_gpa); 815 if (len < n) 816 len = 0; 817 else 818 len -= n; 819 gpa = vmr->vmr_gpa + vmr->vmr_size; 820 for (i = i + 1; len != 0 && i < vcp->vcp_nmemranges; i++) { 821 vmr = &vcp->vcp_memranges[i]; 822 if (gpa != vmr->vmr_gpa) 823 return (NULL); 824 if (len <= vmr->vmr_size) 825 len = 0; 826 else 827 len -= vmr->vmr_size; 828 829 gpa = vmr->vmr_gpa + vmr->vmr_size; 830 } 831 832 if (len != 0) 833 return (NULL); 834 835 return (vmr); 836 } 837 /* 838 * write_mem 839 * 840 * Copies data from 'buf' into the guest VM's memory at paddr 'dst'. 841 * 842 * Parameters: 843 * dst: the destination paddr_t in the guest VM 844 * buf: data to copy (or NULL to zero the data) 845 * len: number of bytes to copy 846 * 847 * Return values: 848 * 0: success 849 * EINVAL: if the guest physical memory range [dst, dst + len) does not 850 * exist in the guest. 851 */ 852 int 853 write_mem(paddr_t dst, const void *buf, size_t len) 854 { 855 const char *from = buf; 856 char *to; 857 size_t n, off; 858 struct vm_mem_range *vmr; 859 860 vmr = find_gpa_range(¤t_vm->vm_params.vmc_params, dst, len); 861 if (vmr == NULL) { 862 errno = EINVAL; 863 log_warn("%s: failed - invalid memory range dst = 0x%lx, " 864 "len = 0x%zx", __func__, dst, len); 865 return (EINVAL); 866 } 867 868 off = dst - vmr->vmr_gpa; 869 while (len != 0) { 870 n = vmr->vmr_size - off; 871 if (len < n) 872 n = len; 873 874 to = (char *)vmr->vmr_va + off; 875 if (buf == NULL) 876 memset(to, 0, n); 877 else { 878 memcpy(to, from, n); 879 from += n; 880 } 881 len -= n; 882 off = 0; 883 vmr++; 884 } 885 886 return (0); 887 } 888 889 /* 890 * read_mem 891 * 892 * Reads memory at guest paddr 'src' into 'buf'. 893 * 894 * Parameters: 895 * src: the source paddr_t in the guest VM to read from. 896 * buf: destination (local) buffer 897 * len: number of bytes to read 898 * 899 * Return values: 900 * 0: success 901 * EINVAL: if the guest physical memory range [dst, dst + len) does not 902 * exist in the guest. 903 */ 904 int 905 read_mem(paddr_t src, void *buf, size_t len) 906 { 907 char *from, *to = buf; 908 size_t n, off; 909 struct vm_mem_range *vmr; 910 911 vmr = find_gpa_range(¤t_vm->vm_params.vmc_params, src, len); 912 if (vmr == NULL) { 913 errno = EINVAL; 914 log_warn("%s: failed - invalid memory range src = 0x%lx, " 915 "len = 0x%zx", __func__, src, len); 916 return (EINVAL); 917 } 918 919 off = src - vmr->vmr_gpa; 920 while (len != 0) { 921 n = vmr->vmr_size - off; 922 if (len < n) 923 n = len; 924 925 from = (char *)vmr->vmr_va + off; 926 memcpy(to, from, n); 927 928 to += n; 929 len -= n; 930 off = 0; 931 vmr++; 932 } 933 934 return (0); 935 } 936 937 /* 938 * hvaddr_mem 939 * 940 * Translate a guest physical address to a host virtual address, checking the 941 * provided memory range length to confirm it's contiguous within the same 942 * guest memory range (vm_mem_range). 943 * 944 * Parameters: 945 * gpa: guest physical address to translate 946 * len: number of bytes in the intended range 947 * 948 * Return values: 949 * void* to host virtual memory on success 950 * NULL on error, setting errno to: 951 * EFAULT: gpa falls outside guest memory ranges 952 * EINVAL: requested len extends beyond memory range 953 */ 954 void * 955 hvaddr_mem(paddr_t gpa, size_t len) 956 { 957 struct vm_mem_range *vmr; 958 size_t off; 959 960 vmr = find_gpa_range(¤t_vm->vm_params.vmc_params, gpa, len); 961 if (vmr == NULL) { 962 log_warnx("%s: failed - invalid gpa: 0x%lx\n", __func__, gpa); 963 errno = EFAULT; 964 return (NULL); 965 } 966 967 off = gpa - vmr->vmr_gpa; 968 if (len > (vmr->vmr_size - off)) { 969 log_warnx("%s: failed - invalid memory range: gpa=0x%lx, " 970 "len=%zu", __func__, gpa, len); 971 errno = EINVAL; 972 return (NULL); 973 } 974 975 return ((char *)vmr->vmr_va + off); 976 } 977 978 /* 979 * vcpu_assert_irq 980 * 981 * Injects the specified IRQ on the supplied vcpu/vm 982 * 983 * Parameters: 984 * vm_id: VM ID to inject to 985 * vcpu_id: VCPU ID to inject to 986 * irq: IRQ to inject 987 */ 988 void 989 vcpu_assert_irq(uint32_t vm_id, uint32_t vcpu_id, int irq) 990 { 991 i8259_assert_irq(irq); 992 993 if (i8259_is_pending()) { 994 if (vcpu_intr(vm_id, vcpu_id, 1)) 995 fatalx("%s: can't assert INTR", __func__); 996 997 vcpu_unhalt(vcpu_id); 998 vcpu_signal_run(vcpu_id); 999 } 1000 } 1001 1002 /* 1003 * vcpu_deassert_pic_irq 1004 * 1005 * Clears the specified IRQ on the supplied vcpu/vm 1006 * 1007 * Parameters: 1008 * vm_id: VM ID to clear in 1009 * vcpu_id: VCPU ID to clear in 1010 * irq: IRQ to clear 1011 */ 1012 void 1013 vcpu_deassert_irq(uint32_t vm_id, uint32_t vcpu_id, int irq) 1014 { 1015 i8259_deassert_irq(irq); 1016 1017 if (!i8259_is_pending()) { 1018 if (vcpu_intr(vm_id, vcpu_id, 0)) 1019 fatalx("%s: can't deassert INTR for vm_id %d, " 1020 "vcpu_id %d", __func__, vm_id, vcpu_id); 1021 } 1022 } 1023 /* 1024 * set_return_data 1025 * 1026 * Utility function for manipulating register data in vm exit info structs. This 1027 * function ensures that the data is copied to the vei->vei.vei_data field with 1028 * the proper size for the operation being performed. 1029 * 1030 * Parameters: 1031 * vei: exit information 1032 * data: return data 1033 */ 1034 void 1035 set_return_data(struct vm_exit *vei, uint32_t data) 1036 { 1037 switch (vei->vei.vei_size) { 1038 case 1: 1039 vei->vei.vei_data &= ~0xFF; 1040 vei->vei.vei_data |= (uint8_t)data; 1041 break; 1042 case 2: 1043 vei->vei.vei_data &= ~0xFFFF; 1044 vei->vei.vei_data |= (uint16_t)data; 1045 break; 1046 case 4: 1047 vei->vei.vei_data = data; 1048 break; 1049 } 1050 } 1051 1052 /* 1053 * get_input_data 1054 * 1055 * Utility function for manipulating register data in vm exit info 1056 * structs. This function ensures that the data is copied from the 1057 * vei->vei.vei_data field with the proper size for the operation being 1058 * performed. 1059 * 1060 * Parameters: 1061 * vei: exit information 1062 * data: location to store the result 1063 */ 1064 void 1065 get_input_data(struct vm_exit *vei, uint32_t *data) 1066 { 1067 switch (vei->vei.vei_size) { 1068 case 1: 1069 *data &= 0xFFFFFF00; 1070 *data |= (uint8_t)vei->vei.vei_data; 1071 break; 1072 case 2: 1073 *data &= 0xFFFF0000; 1074 *data |= (uint16_t)vei->vei.vei_data; 1075 break; 1076 case 4: 1077 *data = vei->vei.vei_data; 1078 break; 1079 default: 1080 log_warnx("%s: invalid i/o size %d", __func__, 1081 vei->vei.vei_size); 1082 } 1083 1084 } 1085 1086 /* 1087 * translate_gva 1088 * 1089 * Translates a guest virtual address to a guest physical address by walking 1090 * the currently active page table (if needed). 1091 * 1092 * XXX ensure translate_gva updates the A bit in the PTE 1093 * XXX ensure translate_gva respects segment base and limits in i386 mode 1094 * XXX ensure translate_gva respects segment wraparound in i8086 mode 1095 * XXX ensure translate_gva updates the A bit in the segment selector 1096 * XXX ensure translate_gva respects CR4.LMSLE if available 1097 * 1098 * Parameters: 1099 * exit: The VCPU this translation should be performed for (guest MMU settings 1100 * are gathered from this VCPU) 1101 * va: virtual address to translate 1102 * pa: pointer to paddr_t variable that will receive the translated physical 1103 * address. 'pa' is unchanged on error. 1104 * mode: one of PROT_READ, PROT_WRITE, PROT_EXEC indicating the mode in which 1105 * the address should be translated 1106 * 1107 * Return values: 1108 * 0: the address was successfully translated - 'pa' contains the physical 1109 * address currently mapped by 'va'. 1110 * EFAULT: the PTE for 'VA' is unmapped. A #PF will be injected in this case 1111 * and %cr2 set in the vcpu structure. 1112 * EINVAL: an error occurred reading paging table structures 1113 */ 1114 int 1115 translate_gva(struct vm_exit* exit, uint64_t va, uint64_t* pa, int mode) 1116 { 1117 int level, shift, pdidx; 1118 uint64_t pte, pt_paddr, pte_paddr, mask, low_mask, high_mask; 1119 uint64_t shift_width, pte_size; 1120 struct vcpu_reg_state *vrs; 1121 1122 vrs = &exit->vrs; 1123 1124 if (!pa) 1125 return (EINVAL); 1126 1127 if (!(vrs->vrs_crs[VCPU_REGS_CR0] & CR0_PG)) { 1128 log_debug("%s: unpaged, va=pa=0x%llx", __func__, va); 1129 *pa = va; 1130 return (0); 1131 } 1132 1133 pt_paddr = vrs->vrs_crs[VCPU_REGS_CR3]; 1134 1135 log_debug("%s: guest %%cr0=0x%llx, %%cr3=0x%llx", __func__, 1136 vrs->vrs_crs[VCPU_REGS_CR0], vrs->vrs_crs[VCPU_REGS_CR3]); 1137 1138 if (vrs->vrs_crs[VCPU_REGS_CR0] & CR0_PE) { 1139 if (vrs->vrs_crs[VCPU_REGS_CR4] & CR4_PAE) { 1140 pte_size = sizeof(uint64_t); 1141 shift_width = 9; 1142 1143 if (vrs->vrs_msrs[VCPU_REGS_EFER] & EFER_LMA) { 1144 /* 4 level paging */ 1145 level = 4; 1146 mask = L4_MASK; 1147 shift = L4_SHIFT; 1148 } else { 1149 /* 32 bit with PAE paging */ 1150 level = 3; 1151 mask = L3_MASK; 1152 shift = L3_SHIFT; 1153 } 1154 } else { 1155 /* 32 bit paging */ 1156 level = 2; 1157 shift_width = 10; 1158 mask = 0xFFC00000; 1159 shift = 22; 1160 pte_size = sizeof(uint32_t); 1161 } 1162 } else 1163 return (EINVAL); 1164 1165 /* XXX: Check for R bit in segment selector and set A bit */ 1166 1167 for (;level > 0; level--) { 1168 pdidx = (va & mask) >> shift; 1169 pte_paddr = (pt_paddr) + (pdidx * pte_size); 1170 1171 log_debug("%s: read pte level %d @ GPA 0x%llx", __func__, 1172 level, pte_paddr); 1173 if (read_mem(pte_paddr, &pte, pte_size)) { 1174 log_warn("%s: failed to read pte", __func__); 1175 return (EFAULT); 1176 } 1177 1178 log_debug("%s: PTE @ 0x%llx = 0x%llx", __func__, pte_paddr, 1179 pte); 1180 1181 /* XXX: Set CR2 */ 1182 if (!(pte & PG_V)) 1183 return (EFAULT); 1184 1185 /* XXX: Check for SMAP */ 1186 if ((mode == PROT_WRITE) && !(pte & PG_RW)) 1187 return (EPERM); 1188 1189 if ((exit->cpl > 0) && !(pte & PG_u)) 1190 return (EPERM); 1191 1192 pte = pte | PG_U; 1193 if (mode == PROT_WRITE) 1194 pte = pte | PG_M; 1195 if (write_mem(pte_paddr, &pte, pte_size)) { 1196 log_warn("%s: failed to write back flags to pte", 1197 __func__); 1198 return (EIO); 1199 } 1200 1201 /* XXX: EINVAL if in 32bit and PG_PS is 1 but CR4.PSE is 0 */ 1202 if (pte & PG_PS) 1203 break; 1204 1205 if (level > 1) { 1206 pt_paddr = pte & PG_FRAME; 1207 shift -= shift_width; 1208 mask = mask >> shift_width; 1209 } 1210 } 1211 1212 low_mask = (1 << shift) - 1; 1213 high_mask = (((uint64_t)1ULL << ((pte_size * 8) - 1)) - 1) ^ low_mask; 1214 *pa = (pte & high_mask) | (va & low_mask); 1215 1216 log_debug("%s: final GPA for GVA 0x%llx = 0x%llx\n", __func__, va, *pa); 1217 1218 return (0); 1219 } 1220 1221 int 1222 intr_pending(struct vmd_vm *vm) 1223 { 1224 /* XXX select active interrupt controller */ 1225 return i8259_is_pending(); 1226 } 1227 1228 int 1229 intr_ack(struct vmd_vm *vm) 1230 { 1231 /* XXX select active interrupt controller */ 1232 return i8259_ack(); 1233 } 1234 1235 void 1236 intr_toggle_el(struct vmd_vm *vm, int irq, int val) 1237 { 1238 /* XXX select active interrupt controller */ 1239 pic_set_elcr(irq, val); 1240 } 1241 1242 int 1243 vmd_check_vmh(struct vm_dump_header *vmh) 1244 { 1245 int i; 1246 unsigned int code, leaf; 1247 unsigned int a, b, c, d; 1248 1249 if (strncmp(vmh->vmh_signature, VM_DUMP_SIGNATURE, strlen(VM_DUMP_SIGNATURE)) != 0) { 1250 log_warnx("%s: incompatible dump signature", __func__); 1251 return (-1); 1252 } 1253 1254 if (vmh->vmh_version != VM_DUMP_VERSION) { 1255 log_warnx("%s: incompatible dump version", __func__); 1256 return (-1); 1257 } 1258 1259 for (i = 0; i < VM_DUMP_HEADER_CPUID_COUNT; i++) { 1260 code = vmh->vmh_cpuids[i].code; 1261 leaf = vmh->vmh_cpuids[i].leaf; 1262 if (leaf != 0x00) { 1263 log_debug("%s: invalid leaf 0x%x for code 0x%x", 1264 __func__, leaf, code); 1265 return (-1); 1266 } 1267 1268 switch (code) { 1269 case 0x00: 1270 CPUID_LEAF(code, leaf, a, b, c, d); 1271 if (vmh->vmh_cpuids[i].a > a) { 1272 log_debug("%s: incompatible cpuid level", 1273 __func__); 1274 return (-1); 1275 } 1276 if (!(vmh->vmh_cpuids[i].b == b && 1277 vmh->vmh_cpuids[i].c == c && 1278 vmh->vmh_cpuids[i].d == d)) { 1279 log_debug("%s: incompatible cpu brand", 1280 __func__); 1281 return (-1); 1282 } 1283 break; 1284 1285 case 0x01: 1286 CPUID_LEAF(code, leaf, a, b, c, d); 1287 if ((vmh->vmh_cpuids[i].c & c & VMM_CPUIDECX_MASK) != 1288 (vmh->vmh_cpuids[i].c & VMM_CPUIDECX_MASK)) { 1289 log_debug("%s: incompatible cpu features " 1290 "code: 0x%x leaf: 0x%x reg: c", __func__, 1291 code, leaf); 1292 return (-1); 1293 } 1294 if ((vmh->vmh_cpuids[i].d & d & VMM_CPUIDEDX_MASK) != 1295 (vmh->vmh_cpuids[i].d & VMM_CPUIDEDX_MASK)) { 1296 log_debug("%s: incompatible cpu features " 1297 "code: 0x%x leaf: 0x%x reg: d", __func__, 1298 code, leaf); 1299 return (-1); 1300 } 1301 break; 1302 1303 case 0x07: 1304 CPUID_LEAF(code, leaf, a, b, c, d); 1305 if ((vmh->vmh_cpuids[i].b & b & VMM_SEFF0EBX_MASK) != 1306 (vmh->vmh_cpuids[i].b & VMM_SEFF0EBX_MASK)) { 1307 log_debug("%s: incompatible cpu features " 1308 "code: 0x%x leaf: 0x%x reg: c", __func__, 1309 code, leaf); 1310 return (-1); 1311 } 1312 if ((vmh->vmh_cpuids[i].c & c & VMM_SEFF0ECX_MASK) != 1313 (vmh->vmh_cpuids[i].c & VMM_SEFF0ECX_MASK)) { 1314 log_debug("%s: incompatible cpu features " 1315 "code: 0x%x leaf: 0x%x reg: d", __func__, 1316 code, leaf); 1317 return (-1); 1318 } 1319 break; 1320 1321 case 0x0d: 1322 CPUID_LEAF(code, leaf, a, b, c, d); 1323 if (vmh->vmh_cpuids[i].b > b) { 1324 log_debug("%s: incompatible cpu: insufficient " 1325 "max save area for enabled XCR0 features", 1326 __func__); 1327 return (-1); 1328 } 1329 if (vmh->vmh_cpuids[i].c > c) { 1330 log_debug("%s: incompatible cpu: insufficient " 1331 "max save area for supported XCR0 features", 1332 __func__); 1333 return (-1); 1334 } 1335 break; 1336 1337 case 0x80000001: 1338 CPUID_LEAF(code, leaf, a, b, c, d); 1339 if ((vmh->vmh_cpuids[i].a & a) != 1340 vmh->vmh_cpuids[i].a) { 1341 log_debug("%s: incompatible cpu features " 1342 "code: 0x%x leaf: 0x%x reg: a", __func__, 1343 code, leaf); 1344 return (-1); 1345 } 1346 if ((vmh->vmh_cpuids[i].c & c) != 1347 vmh->vmh_cpuids[i].c) { 1348 log_debug("%s: incompatible cpu features " 1349 "code: 0x%x leaf: 0x%x reg: c", __func__, 1350 code, leaf); 1351 return (-1); 1352 } 1353 if ((vmh->vmh_cpuids[i].d & d) != 1354 vmh->vmh_cpuids[i].d) { 1355 log_debug("%s: incompatible cpu features " 1356 "code: 0x%x leaf: 0x%x reg: d", __func__, 1357 code, leaf); 1358 return (-1); 1359 } 1360 break; 1361 1362 default: 1363 log_debug("%s: unknown code 0x%x", __func__, code); 1364 return (-1); 1365 } 1366 } 1367 1368 return (0); 1369 } 1370