1 /* $OpenBSD: vmm_machdep.c,v 1.41 2024/11/27 10:09:51 mpi Exp $ */ 2 /* 3 * Copyright (c) 2014 Mike Larkin <mlarkin@openbsd.org> 4 * 5 * Permission to use, copy, modify, and distribute this software for any 6 * purpose with or without fee is hereby granted, provided that the above 7 * copyright notice and this permission notice appear in all copies. 8 * 9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 16 */ 17 18 #include <sys/param.h> 19 #include <sys/systm.h> 20 #include <sys/malloc.h> 21 #include <sys/device.h> 22 #include <sys/pool.h> 23 #include <sys/proc.h> 24 #include <sys/user.h> 25 #include <sys/ioctl.h> 26 #include <sys/queue.h> 27 #include <sys/refcnt.h> 28 #include <sys/rwlock.h> 29 #include <sys/pledge.h> 30 #include <sys/memrange.h> 31 #include <sys/tracepoint.h> 32 33 #include <uvm/uvm_extern.h> 34 35 #include <machine/fpu.h> 36 #include <machine/pmap.h> 37 #include <machine/biosvar.h> 38 #include <machine/segments.h> 39 #include <machine/cpufunc.h> 40 #include <machine/vmmvar.h> 41 42 #include <dev/isa/isareg.h> 43 #include <dev/pv/pvreg.h> 44 45 #include <dev/vmm/vmm.h> 46 47 #ifdef MP_LOCKDEBUG 48 #include <ddb/db_output.h> 49 extern int __mp_lock_spinout; 50 #endif /* MP_LOCKDEBUG */ 51 52 void *l1tf_flush_region; 53 54 #define DEVNAME(s) ((s)->sc_dev.dv_xname) 55 56 #define CTRL_DUMP(x,y,z) printf(" %s: Can set:%s Can clear:%s\n", #z , \ 57 vcpu_vmx_check_cap(x, IA32_VMX_##y ##_CTLS, \ 58 IA32_VMX_##z, 1) ? "Yes" : "No", \ 59 vcpu_vmx_check_cap(x, IA32_VMX_##y ##_CTLS, \ 60 IA32_VMX_##z, 0) ? "Yes" : "No"); 61 62 #define VMX_EXIT_INFO_HAVE_RIP 0x1 63 #define VMX_EXIT_INFO_HAVE_REASON 0x2 64 #define VMX_EXIT_INFO_COMPLETE \ 65 (VMX_EXIT_INFO_HAVE_RIP | VMX_EXIT_INFO_HAVE_REASON) 66 67 void vmx_dump_vmcs_field(uint16_t, const char *); 68 int vmm_enabled(void); 69 void vmm_activate_machdep(struct device *, int); 70 int vmmioctl_machdep(dev_t, u_long, caddr_t, int, struct proc *); 71 int vmm_quiesce_vmx(void); 72 int vm_run(struct vm_run_params *); 73 int vm_intr_pending(struct vm_intr_params *); 74 int vm_rwregs(struct vm_rwregs_params *, int); 75 int vm_rwvmparams(struct vm_rwvmparams_params *, int); 76 int vcpu_readregs_vmx(struct vcpu *, uint64_t, int, struct vcpu_reg_state *); 77 int vcpu_readregs_svm(struct vcpu *, uint64_t, struct vcpu_reg_state *); 78 int vcpu_writeregs_vmx(struct vcpu *, uint64_t, int, struct vcpu_reg_state *); 79 int vcpu_writeregs_svm(struct vcpu *, uint64_t, struct vcpu_reg_state *); 80 int vcpu_reset_regs(struct vcpu *, struct vcpu_reg_state *); 81 int vcpu_reset_regs_vmx(struct vcpu *, struct vcpu_reg_state *); 82 int vcpu_reset_regs_svm(struct vcpu *, struct vcpu_reg_state *); 83 int vcpu_reload_vmcs_vmx(struct vcpu *); 84 int vcpu_init(struct vcpu *, struct vm_create_params *); 85 int vcpu_init_vmx(struct vcpu *); 86 int vcpu_init_svm(struct vcpu *, struct vm_create_params *); 87 int vcpu_run_vmx(struct vcpu *, struct vm_run_params *); 88 int vcpu_run_svm(struct vcpu *, struct vm_run_params *); 89 void vcpu_deinit(struct vcpu *); 90 void vcpu_deinit_svm(struct vcpu *); 91 void vcpu_deinit_vmx(struct vcpu *); 92 int vcpu_vmx_check_cap(struct vcpu *, uint32_t, uint32_t, int); 93 int vcpu_vmx_compute_ctrl(uint64_t, uint16_t, uint32_t, uint32_t, uint32_t *); 94 int vmx_get_exit_info(uint64_t *, uint64_t *); 95 int vmx_load_pdptes(struct vcpu *); 96 int vmx_handle_exit(struct vcpu *); 97 int svm_handle_exit(struct vcpu *); 98 int svm_handle_msr(struct vcpu *); 99 int vmm_handle_xsetbv(struct vcpu *, uint64_t *); 100 int vmx_handle_xsetbv(struct vcpu *); 101 int svm_handle_xsetbv(struct vcpu *); 102 int vmm_handle_cpuid(struct vcpu *); 103 int vmx_handle_rdmsr(struct vcpu *); 104 int vmx_handle_wrmsr(struct vcpu *); 105 int vmx_handle_cr0_write(struct vcpu *, uint64_t); 106 int vmx_handle_cr4_write(struct vcpu *, uint64_t); 107 int vmx_handle_cr(struct vcpu *); 108 int svm_handle_inout(struct vcpu *); 109 int vmx_handle_inout(struct vcpu *); 110 int svm_handle_hlt(struct vcpu *); 111 int vmx_handle_hlt(struct vcpu *); 112 int vmm_inject_ud(struct vcpu *); 113 int vmm_inject_gp(struct vcpu *); 114 int vmm_inject_db(struct vcpu *); 115 void vmx_handle_intr(struct vcpu *); 116 void vmx_handle_misc_enable_msr(struct vcpu *); 117 int vmm_get_guest_memtype(struct vm *, paddr_t); 118 int vmx_get_guest_faulttype(void); 119 int svm_get_guest_faulttype(struct vmcb *); 120 int vmx_get_exit_qualification(uint64_t *); 121 int vmm_get_guest_cpu_cpl(struct vcpu *); 122 int vmm_get_guest_cpu_mode(struct vcpu *); 123 int svm_fault_page(struct vcpu *, paddr_t); 124 int vmx_fault_page(struct vcpu *, paddr_t); 125 int vmx_handle_np_fault(struct vcpu *); 126 int svm_handle_np_fault(struct vcpu *); 127 int vmm_alloc_vpid(uint16_t *); 128 void vmm_free_vpid(uint16_t); 129 const char *vcpu_state_decode(u_int); 130 const char *vmx_exit_reason_decode(uint32_t); 131 const char *svm_exit_reason_decode(uint32_t); 132 const char *vmx_instruction_error_decode(uint32_t); 133 void svm_setmsrbr(struct vcpu *, uint32_t); 134 void svm_setmsrbw(struct vcpu *, uint32_t); 135 void svm_setmsrbrw(struct vcpu *, uint32_t); 136 void vmx_setmsrbr(struct vcpu *, uint32_t); 137 void vmx_setmsrbw(struct vcpu *, uint32_t); 138 void vmx_setmsrbrw(struct vcpu *, uint32_t); 139 void svm_set_clean(struct vcpu *, uint32_t); 140 void svm_set_dirty(struct vcpu *, uint32_t); 141 142 int vmm_gpa_is_valid(struct vcpu *vcpu, paddr_t gpa, size_t obj_size); 143 void vmm_init_pvclock(struct vcpu *, paddr_t); 144 int vmm_update_pvclock(struct vcpu *); 145 int vmm_pat_is_valid(uint64_t); 146 147 #ifdef MULTIPROCESSOR 148 static int vmx_remote_vmclear(struct cpu_info*, struct vcpu *); 149 #endif 150 151 #ifdef VMM_DEBUG 152 void vmx_vcpu_dump_regs(struct vcpu *); 153 void vmx_dump_vmcs(struct vcpu *); 154 const char *msr_name_decode(uint32_t); 155 void vmm_segment_desc_decode(uint64_t); 156 void vmm_decode_cr0(uint64_t); 157 void vmm_decode_cr3(uint64_t); 158 void vmm_decode_cr4(uint64_t); 159 void vmm_decode_msr_value(uint64_t, uint64_t); 160 void vmm_decode_apicbase_msr_value(uint64_t); 161 void vmm_decode_ia32_fc_value(uint64_t); 162 void vmm_decode_mtrrcap_value(uint64_t); 163 void vmm_decode_perf_status_value(uint64_t); 164 void vmm_decode_perf_ctl_value(uint64_t); 165 void vmm_decode_mtrrdeftype_value(uint64_t); 166 void vmm_decode_efer_value(uint64_t); 167 void vmm_decode_rflags(uint64_t); 168 void vmm_decode_misc_enable_value(uint64_t); 169 const char *vmm_decode_cpu_mode(struct vcpu *); 170 171 extern int mtrr2mrt(int); 172 173 struct vmm_reg_debug_info { 174 uint64_t vrdi_bit; 175 const char *vrdi_present; 176 const char *vrdi_absent; 177 }; 178 #endif /* VMM_DEBUG */ 179 180 extern uint64_t tsc_frequency; 181 extern int tsc_is_invariant; 182 183 const char *vmm_hv_signature = VMM_HV_SIGNATURE; 184 185 const struct kmem_pa_mode vmm_kp_contig = { 186 .kp_constraint = &no_constraint, 187 .kp_maxseg = 1, 188 .kp_align = 4096, 189 .kp_zero = 1, 190 }; 191 192 extern struct cfdriver vmm_cd; 193 extern const struct cfattach vmm_ca; 194 195 /* 196 * Helper struct to easily get the VMCS field IDs needed in vmread/vmwrite 197 * to access the individual fields of the guest segment registers. This 198 * struct is indexed by VCPU_REGS_* id. 199 */ 200 const struct { 201 uint64_t selid; 202 uint64_t limitid; 203 uint64_t arid; 204 uint64_t baseid; 205 } vmm_vmx_sreg_vmcs_fields[] = { 206 { VMCS_GUEST_IA32_ES_SEL, VMCS_GUEST_IA32_ES_LIMIT, 207 VMCS_GUEST_IA32_ES_AR, VMCS_GUEST_IA32_ES_BASE }, 208 { VMCS_GUEST_IA32_CS_SEL, VMCS_GUEST_IA32_CS_LIMIT, 209 VMCS_GUEST_IA32_CS_AR, VMCS_GUEST_IA32_CS_BASE }, 210 { VMCS_GUEST_IA32_SS_SEL, VMCS_GUEST_IA32_SS_LIMIT, 211 VMCS_GUEST_IA32_SS_AR, VMCS_GUEST_IA32_SS_BASE }, 212 { VMCS_GUEST_IA32_DS_SEL, VMCS_GUEST_IA32_DS_LIMIT, 213 VMCS_GUEST_IA32_DS_AR, VMCS_GUEST_IA32_DS_BASE }, 214 { VMCS_GUEST_IA32_FS_SEL, VMCS_GUEST_IA32_FS_LIMIT, 215 VMCS_GUEST_IA32_FS_AR, VMCS_GUEST_IA32_FS_BASE }, 216 { VMCS_GUEST_IA32_GS_SEL, VMCS_GUEST_IA32_GS_LIMIT, 217 VMCS_GUEST_IA32_GS_AR, VMCS_GUEST_IA32_GS_BASE }, 218 { VMCS_GUEST_IA32_LDTR_SEL, VMCS_GUEST_IA32_LDTR_LIMIT, 219 VMCS_GUEST_IA32_LDTR_AR, VMCS_GUEST_IA32_LDTR_BASE }, 220 { VMCS_GUEST_IA32_TR_SEL, VMCS_GUEST_IA32_TR_LIMIT, 221 VMCS_GUEST_IA32_TR_AR, VMCS_GUEST_IA32_TR_BASE } 222 }; 223 224 /* Pools for VMs and VCPUs */ 225 extern struct pool vm_pool; 226 extern struct pool vcpu_pool; 227 228 extern struct vmm_softc *vmm_softc; 229 230 /* IDT information used when populating host state area */ 231 extern vaddr_t idt_vaddr; 232 extern struct gate_descriptor *idt; 233 234 /* Constants used in "CR access exit" */ 235 #define CR_WRITE 0 236 #define CR_READ 1 237 #define CR_CLTS 2 238 #define CR_LMSW 3 239 240 /* 241 * vmm_enabled 242 * 243 * Checks if we have at least one CPU with either VMX or SVM. 244 * Returns 1 if we have at least one of either type, but not both, 0 otherwise. 245 */ 246 int 247 vmm_enabled(void) 248 { 249 struct cpu_info *ci; 250 CPU_INFO_ITERATOR cii; 251 int found_vmx = 0, found_svm = 0; 252 253 /* Check if we have at least one CPU with either VMX or SVM */ 254 CPU_INFO_FOREACH(cii, ci) { 255 if (ci->ci_vmm_flags & CI_VMM_VMX) 256 found_vmx = 1; 257 if (ci->ci_vmm_flags & CI_VMM_SVM) 258 found_svm = 1; 259 } 260 261 /* Don't support both SVM and VMX at the same time */ 262 if (found_vmx && found_svm) 263 return (0); 264 265 if (found_vmx || found_svm) 266 return 1; 267 268 return 0; 269 } 270 271 void 272 vmm_attach_machdep(struct device *parent, struct device *self, void *aux) 273 { 274 struct vmm_softc *sc = (struct vmm_softc *)self; 275 struct cpu_info *ci; 276 CPU_INFO_ITERATOR cii; 277 278 sc->sc_md.nr_rvi_cpus = 0; 279 sc->sc_md.nr_ept_cpus = 0; 280 281 /* Calculate CPU features */ 282 CPU_INFO_FOREACH(cii, ci) { 283 if (ci->ci_vmm_flags & CI_VMM_RVI) 284 sc->sc_md.nr_rvi_cpus++; 285 if (ci->ci_vmm_flags & CI_VMM_EPT) 286 sc->sc_md.nr_ept_cpus++; 287 } 288 289 sc->sc_md.pkru_enabled = 0; 290 if (rcr4() & CR4_PKE) 291 sc->sc_md.pkru_enabled = 1; 292 293 if (sc->sc_md.nr_ept_cpus) { 294 printf(": VMX/EPT"); 295 sc->mode = VMM_MODE_EPT; 296 } else if (sc->sc_md.nr_rvi_cpus) { 297 printf(": SVM/RVI"); 298 sc->mode = VMM_MODE_RVI; 299 } else { 300 printf(": unknown"); 301 sc->mode = VMM_MODE_UNKNOWN; 302 } 303 304 if (sc->mode == VMM_MODE_EPT) { 305 if (!(curcpu()->ci_vmm_cap.vcc_vmx.vmx_has_l1_flush_msr)) { 306 l1tf_flush_region = km_alloc(VMX_L1D_FLUSH_SIZE, 307 &kv_any, &vmm_kp_contig, &kd_waitok); 308 if (!l1tf_flush_region) { 309 printf(" (failing, no memory)"); 310 sc->mode = VMM_MODE_UNKNOWN; 311 } else { 312 printf(" (using slow L1TF mitigation)"); 313 memset(l1tf_flush_region, 0xcc, 314 VMX_L1D_FLUSH_SIZE); 315 } 316 } 317 } 318 319 if (sc->mode == VMM_MODE_RVI) { 320 sc->max_vpid = curcpu()->ci_vmm_cap.vcc_svm.svm_max_asid; 321 } else { 322 sc->max_vpid = 0xFFF; 323 } 324 325 bzero(&sc->vpids, sizeof(sc->vpids)); 326 rw_init(&sc->vpid_lock, "vpid"); 327 } 328 329 /* 330 * vmm_quiesce_vmx 331 * 332 * Prepare the host for suspend by flushing all VMCS states. 333 */ 334 int 335 vmm_quiesce_vmx(void) 336 { 337 struct vm *vm; 338 struct vcpu *vcpu; 339 int err; 340 341 /* 342 * We should be only called from a quiescing device state so we 343 * don't expect to sleep here. If we can't get all our locks, 344 * something is wrong. 345 */ 346 if ((err = rw_enter(&vmm_softc->vm_lock, RW_WRITE | RW_NOSLEEP))) 347 return (err); 348 349 /* Iterate over each vm... */ 350 SLIST_FOREACH(vm, &vmm_softc->vm_list, vm_link) { 351 /* Iterate over each vcpu... */ 352 SLIST_FOREACH(vcpu, &vm->vm_vcpu_list, vc_vcpu_link) { 353 err = rw_enter(&vcpu->vc_lock, RW_WRITE | RW_NOSLEEP); 354 if (err) 355 break; 356 357 /* We can skip unlaunched VMCS. Nothing to flush. */ 358 if (atomic_load_int(&vcpu->vc_vmx_vmcs_state) 359 != VMCS_LAUNCHED) { 360 DPRINTF("%s: skipping vcpu %d for vm %d\n", 361 __func__, vcpu->vc_id, vm->vm_id); 362 rw_exit_write(&vcpu->vc_lock); 363 continue; 364 } 365 366 #ifdef MULTIPROCESSOR 367 if (vcpu->vc_last_pcpu != curcpu()) { 368 /* Remote cpu vmclear via ipi. */ 369 err = vmx_remote_vmclear(vcpu->vc_last_pcpu, 370 vcpu); 371 if (err) 372 printf("%s: failed to remote vmclear " 373 "vcpu %d of vm %d\n", __func__, 374 vcpu->vc_id, vm->vm_id); 375 } else 376 #endif 377 { 378 /* Local cpu vmclear instruction. */ 379 if ((err = vmclear(&vcpu->vc_control_pa))) 380 printf("%s: failed to locally vmclear " 381 "vcpu %d of vm %d\n", __func__, 382 vcpu->vc_id, vm->vm_id); 383 atomic_swap_uint(&vcpu->vc_vmx_vmcs_state, 384 VMCS_CLEARED); 385 } 386 387 rw_exit_write(&vcpu->vc_lock); 388 if (err) 389 break; 390 DPRINTF("%s: cleared vcpu %d for vm %d\n", __func__, 391 vcpu->vc_id, vm->vm_id); 392 } 393 if (err) 394 break; 395 } 396 rw_exit_write(&vmm_softc->vm_lock); 397 398 if (err) 399 return (err); 400 return (0); 401 } 402 403 void 404 vmm_activate_machdep(struct device *self, int act) 405 { 406 struct cpu_info *ci = curcpu(); 407 408 switch (act) { 409 case DVACT_QUIESCE: 410 /* If we're not in vmm mode, nothing to do. */ 411 if ((ci->ci_flags & CPUF_VMM) == 0) 412 break; 413 414 /* Intel systems need extra steps to sync vcpu state. */ 415 if (vmm_softc->mode == VMM_MODE_EPT) 416 if (vmm_quiesce_vmx()) 417 DPRINTF("%s: vmx quiesce failed\n", __func__); 418 419 /* Stop virtualization mode on all cpus. */ 420 vmm_stop(); 421 break; 422 423 case DVACT_WAKEUP: 424 /* Restart virtualization mode on all cpu's. */ 425 if (vmm_softc->vm_ct > 0) 426 vmm_start(); 427 break; 428 } 429 } 430 431 int 432 vmmioctl_machdep(dev_t dev, u_long cmd, caddr_t data, int flag, struct proc *p) 433 { 434 int ret; 435 436 switch (cmd) { 437 case VMM_IOC_INTR: 438 ret = vm_intr_pending((struct vm_intr_params *)data); 439 break; 440 default: 441 DPRINTF("%s: unknown ioctl code 0x%lx\n", __func__, cmd); 442 ret = ENOTTY; 443 } 444 445 return (ret); 446 } 447 448 int 449 pledge_ioctl_vmm_machdep(struct proc *p, long com) 450 { 451 switch (com) { 452 case VMM_IOC_INTR: 453 return (0); 454 } 455 456 return (EPERM); 457 } 458 459 /* 460 * vm_intr_pending 461 * 462 * IOCTL handler routine for VMM_IOC_INTR messages, sent from vmd when an 463 * interrupt is pending and needs acknowledgment 464 * 465 * Parameters: 466 * vip: Describes the vm/vcpu for which the interrupt is pending 467 * 468 * Return values: 469 * 0: if successful 470 * ENOENT: if the VM/VCPU defined by 'vip' cannot be found 471 */ 472 int 473 vm_intr_pending(struct vm_intr_params *vip) 474 { 475 struct vm *vm; 476 struct vcpu *vcpu; 477 #ifdef MULTIPROCESSOR 478 struct cpu_info *ci; 479 #endif 480 int error, ret = 0; 481 482 /* Find the desired VM */ 483 error = vm_find(vip->vip_vm_id, &vm); 484 485 /* Not found? exit. */ 486 if (error != 0) 487 return (error); 488 489 vcpu = vm_find_vcpu(vm, vip->vip_vcpu_id); 490 491 if (vcpu == NULL) { 492 ret = ENOENT; 493 goto out; 494 } 495 496 vcpu->vc_intr = vip->vip_intr; 497 #ifdef MULTIPROCESSOR 498 ci = READ_ONCE(vcpu->vc_curcpu); 499 if (ci != NULL) 500 x86_send_ipi(ci, X86_IPI_NOP); 501 #endif 502 503 out: 504 refcnt_rele_wake(&vm->vm_refcnt); 505 return (ret); 506 } 507 508 /* 509 * vm_rwvmparams 510 * 511 * IOCTL handler to read/write the current vmm params like pvclock gpa, pvclock 512 * version, etc. 513 * 514 * Parameters: 515 * vrwp: Describes the VM and VCPU to get/set the params from 516 * dir: 0 for reading, 1 for writing 517 * 518 * Return values: 519 * 0: if successful 520 * ENOENT: if the VM/VCPU defined by 'vpp' cannot be found 521 * EINVAL: if an error occurred reading the registers of the guest 522 */ 523 int 524 vm_rwvmparams(struct vm_rwvmparams_params *vpp, int dir) 525 { 526 struct vm *vm; 527 struct vcpu *vcpu; 528 int error, ret = 0; 529 530 /* Find the desired VM */ 531 error = vm_find(vpp->vpp_vm_id, &vm); 532 533 /* Not found? exit. */ 534 if (error != 0) 535 return (error); 536 537 vcpu = vm_find_vcpu(vm, vpp->vpp_vcpu_id); 538 539 if (vcpu == NULL) { 540 ret = ENOENT; 541 goto out; 542 } 543 544 if (dir == 0) { 545 if (vpp->vpp_mask & VM_RWVMPARAMS_PVCLOCK_VERSION) 546 vpp->vpp_pvclock_version = vcpu->vc_pvclock_version; 547 if (vpp->vpp_mask & VM_RWVMPARAMS_PVCLOCK_SYSTEM_GPA) 548 vpp->vpp_pvclock_system_gpa = \ 549 vcpu->vc_pvclock_system_gpa; 550 } else { 551 if (vpp->vpp_mask & VM_RWVMPARAMS_PVCLOCK_VERSION) 552 vcpu->vc_pvclock_version = vpp->vpp_pvclock_version; 553 if (vpp->vpp_mask & VM_RWVMPARAMS_PVCLOCK_SYSTEM_GPA) { 554 vmm_init_pvclock(vcpu, vpp->vpp_pvclock_system_gpa); 555 } 556 } 557 out: 558 refcnt_rele_wake(&vm->vm_refcnt); 559 return (ret); 560 } 561 562 /* 563 * vm_readregs 564 * 565 * IOCTL handler to read/write the current register values of a guest VCPU. 566 * The VCPU must not be running. 567 * 568 * Parameters: 569 * vrwp: Describes the VM and VCPU to get/set the registers from. The 570 * register values are returned here as well. 571 * dir: 0 for reading, 1 for writing 572 * 573 * Return values: 574 * 0: if successful 575 * ENOENT: if the VM/VCPU defined by 'vrwp' cannot be found 576 * EINVAL: if an error occurred accessing the registers of the guest 577 * EPERM: if the vm cannot be accessed from the calling process 578 */ 579 int 580 vm_rwregs(struct vm_rwregs_params *vrwp, int dir) 581 { 582 struct vm *vm; 583 struct vcpu *vcpu; 584 struct vcpu_reg_state *vrs = &vrwp->vrwp_regs; 585 int error, ret = 0; 586 587 /* Find the desired VM */ 588 error = vm_find(vrwp->vrwp_vm_id, &vm); 589 590 /* Not found? exit. */ 591 if (error != 0) 592 return (error); 593 594 vcpu = vm_find_vcpu(vm, vrwp->vrwp_vcpu_id); 595 596 if (vcpu == NULL) { 597 ret = ENOENT; 598 goto out; 599 } 600 601 rw_enter_write(&vcpu->vc_lock); 602 if (vmm_softc->mode == VMM_MODE_EPT) 603 ret = (dir == 0) ? 604 vcpu_readregs_vmx(vcpu, vrwp->vrwp_mask, 1, vrs) : 605 vcpu_writeregs_vmx(vcpu, vrwp->vrwp_mask, 1, vrs); 606 else if (vmm_softc->mode == VMM_MODE_RVI) 607 ret = (dir == 0) ? 608 vcpu_readregs_svm(vcpu, vrwp->vrwp_mask, vrs) : 609 vcpu_writeregs_svm(vcpu, vrwp->vrwp_mask, vrs); 610 else { 611 DPRINTF("%s: unknown vmm mode", __func__); 612 ret = EINVAL; 613 } 614 rw_exit_write(&vcpu->vc_lock); 615 out: 616 refcnt_rele_wake(&vm->vm_refcnt); 617 return (ret); 618 } 619 620 /* 621 * vmm_start 622 * 623 * Starts VMM mode on the system 624 */ 625 int 626 vmm_start(void) 627 { 628 int rv = 0; 629 struct cpu_info *self = curcpu(); 630 #ifdef MULTIPROCESSOR 631 struct cpu_info *ci; 632 CPU_INFO_ITERATOR cii; 633 #ifdef MP_LOCKDEBUG 634 int nticks; 635 #endif /* MP_LOCKDEBUG */ 636 #endif /* MULTIPROCESSOR */ 637 638 rw_enter_write(&vmm_softc->sc_slock); 639 640 /* VMM is already running */ 641 if (self->ci_flags & CPUF_VMM) 642 goto unlock; 643 644 /* Start VMM on this CPU */ 645 start_vmm_on_cpu(self); 646 if (!(self->ci_flags & CPUF_VMM)) { 647 printf("%s: failed to enter VMM mode\n", 648 self->ci_dev->dv_xname); 649 rv = EIO; 650 goto unlock; 651 } 652 653 #ifdef MULTIPROCESSOR 654 /* Broadcast start VMM IPI */ 655 x86_broadcast_ipi(X86_IPI_START_VMM); 656 657 CPU_INFO_FOREACH(cii, ci) { 658 if (ci == self) 659 continue; 660 #ifdef MP_LOCKDEBUG 661 nticks = __mp_lock_spinout; 662 #endif /* MP_LOCKDEBUG */ 663 while (!(ci->ci_flags & CPUF_VMM)) { 664 CPU_BUSY_CYCLE(); 665 #ifdef MP_LOCKDEBUG 666 if (--nticks <= 0) { 667 db_printf("%s: spun out", __func__); 668 db_enter(); 669 nticks = __mp_lock_spinout; 670 } 671 #endif /* MP_LOCKDEBUG */ 672 } 673 } 674 #endif /* MULTIPROCESSOR */ 675 unlock: 676 rw_exit_write(&vmm_softc->sc_slock); 677 return (rv); 678 } 679 680 /* 681 * vmm_stop 682 * 683 * Stops VMM mode on the system 684 */ 685 int 686 vmm_stop(void) 687 { 688 int rv = 0; 689 struct cpu_info *self = curcpu(); 690 #ifdef MULTIPROCESSOR 691 struct cpu_info *ci; 692 CPU_INFO_ITERATOR cii; 693 #ifdef MP_LOCKDEBUG 694 int nticks; 695 #endif /* MP_LOCKDEBUG */ 696 #endif /* MULTIPROCESSOR */ 697 698 rw_enter_write(&vmm_softc->sc_slock); 699 700 /* VMM is not running */ 701 if (!(self->ci_flags & CPUF_VMM)) 702 goto unlock; 703 704 /* Stop VMM on this CPU */ 705 stop_vmm_on_cpu(self); 706 if (self->ci_flags & CPUF_VMM) { 707 printf("%s: failed to exit VMM mode\n", 708 self->ci_dev->dv_xname); 709 rv = EIO; 710 goto unlock; 711 } 712 713 #ifdef MULTIPROCESSOR 714 /* Stop VMM on other CPUs */ 715 x86_broadcast_ipi(X86_IPI_STOP_VMM); 716 717 CPU_INFO_FOREACH(cii, ci) { 718 if (ci == self) 719 continue; 720 #ifdef MP_LOCKDEBUG 721 nticks = __mp_lock_spinout; 722 #endif /* MP_LOCKDEBUG */ 723 while ((ci->ci_flags & CPUF_VMM)) { 724 CPU_BUSY_CYCLE(); 725 #ifdef MP_LOCKDEBUG 726 if (--nticks <= 0) { 727 db_printf("%s: spunout", __func__); 728 db_enter(); 729 nticks = __mp_lock_spinout; 730 } 731 #endif /* MP_LOCKDEBUG */ 732 } 733 } 734 #endif /* MULTIPROCESSOR */ 735 unlock: 736 rw_exit_write(&vmm_softc->sc_slock); 737 return (0); 738 } 739 740 /* 741 * start_vmm_on_cpu 742 * 743 * Starts VMM mode on 'ci' by executing the appropriate CPU-specific insn 744 * sequence to enter VMM mode (eg, VMXON) 745 */ 746 void 747 start_vmm_on_cpu(struct cpu_info *ci) 748 { 749 uint64_t msr; 750 uint32_t cr4; 751 struct vmx_invept_descriptor vid; 752 753 /* No VMM mode? exit. */ 754 if ((ci->ci_vmm_flags & CI_VMM_VMX) == 0 && 755 (ci->ci_vmm_flags & CI_VMM_SVM) == 0) 756 return; 757 758 /* 759 * AMD SVM 760 */ 761 if (ci->ci_vmm_flags & CI_VMM_SVM) { 762 msr = rdmsr(MSR_EFER); 763 msr |= EFER_SVME; 764 wrmsr(MSR_EFER, msr); 765 } 766 767 /* 768 * Intel VMX 769 */ 770 if (ci->ci_vmm_flags & CI_VMM_VMX) { 771 if (ci->ci_vmxon_region == 0) 772 return; 773 else { 774 bzero(ci->ci_vmxon_region, PAGE_SIZE); 775 ci->ci_vmxon_region->vr_revision = 776 ci->ci_vmm_cap.vcc_vmx.vmx_vmxon_revision; 777 778 /* Enable VMX */ 779 msr = rdmsr(MSR_IA32_FEATURE_CONTROL); 780 if (msr & IA32_FEATURE_CONTROL_LOCK) { 781 if (!(msr & IA32_FEATURE_CONTROL_VMX_EN)) 782 return; 783 } else { 784 msr |= IA32_FEATURE_CONTROL_VMX_EN | 785 IA32_FEATURE_CONTROL_LOCK; 786 wrmsr(MSR_IA32_FEATURE_CONTROL, msr); 787 } 788 789 /* Set CR4.VMXE */ 790 cr4 = rcr4(); 791 cr4 |= CR4_VMXE; 792 lcr4(cr4); 793 794 /* Enter VMX mode and clear EPTs on this cpu */ 795 if (vmxon((uint64_t *)&ci->ci_vmxon_region_pa)) 796 panic("vmxon failed"); 797 798 memset(&vid, 0, sizeof(vid)); 799 if (invept(IA32_VMX_INVEPT_GLOBAL_CTX, &vid)) 800 panic("invept failed"); 801 } 802 } 803 804 atomic_setbits_int(&ci->ci_flags, CPUF_VMM); 805 } 806 807 /* 808 * stop_vmm_on_cpu 809 * 810 * Stops VMM mode on 'ci' by executing the appropriate CPU-specific insn 811 * sequence to exit VMM mode (eg, VMXOFF) 812 */ 813 void 814 stop_vmm_on_cpu(struct cpu_info *ci) 815 { 816 uint64_t msr; 817 uint32_t cr4; 818 819 if (!(ci->ci_flags & CPUF_VMM)) 820 return; 821 822 /* 823 * AMD SVM 824 */ 825 if (ci->ci_vmm_flags & CI_VMM_SVM) { 826 msr = rdmsr(MSR_EFER); 827 msr &= ~EFER_SVME; 828 wrmsr(MSR_EFER, msr); 829 } 830 831 /* 832 * Intel VMX 833 */ 834 if (ci->ci_vmm_flags & CI_VMM_VMX) { 835 if (vmxoff()) 836 panic("VMXOFF failed"); 837 838 cr4 = rcr4(); 839 cr4 &= ~CR4_VMXE; 840 lcr4(cr4); 841 } 842 843 atomic_clearbits_int(&ci->ci_flags, CPUF_VMM); 844 } 845 846 /* 847 * vmclear_on_cpu 848 * 849 * Flush and clear VMCS on 'ci' by executing vmclear. 850 * 851 */ 852 void 853 vmclear_on_cpu(struct cpu_info *ci) 854 { 855 if ((ci->ci_flags & CPUF_VMM) && (ci->ci_vmm_flags & CI_VMM_VMX)) { 856 if (vmclear(&ci->ci_vmcs_pa)) 857 panic("VMCLEAR ipi failed"); 858 atomic_swap_ulong(&ci->ci_vmcs_pa, VMX_VMCS_PA_CLEAR); 859 } 860 } 861 862 #ifdef MULTIPROCESSOR 863 static int 864 vmx_remote_vmclear(struct cpu_info *ci, struct vcpu *vcpu) 865 { 866 #ifdef MP_LOCKDEBUG 867 int nticks = __mp_lock_spinout; 868 #endif /* MP_LOCKDEBUG */ 869 870 rw_enter_write(&ci->ci_vmcs_lock); 871 atomic_swap_ulong(&ci->ci_vmcs_pa, vcpu->vc_control_pa); 872 x86_send_ipi(ci, X86_IPI_VMCLEAR_VMM); 873 874 while (ci->ci_vmcs_pa != VMX_VMCS_PA_CLEAR) { 875 CPU_BUSY_CYCLE(); 876 #ifdef MP_LOCKDEBUG 877 if (--nticks <= 0) { 878 db_printf("%s: spun out\n", __func__); 879 db_enter(); 880 nticks = __mp_lock_spinout; 881 } 882 #endif /* MP_LOCKDEBUG */ 883 } 884 atomic_swap_uint(&vcpu->vc_vmx_vmcs_state, VMCS_CLEARED); 885 rw_exit_write(&ci->ci_vmcs_lock); 886 887 return (0); 888 } 889 #endif /* MULTIPROCESSOR */ 890 891 /* 892 * vm_impl_init 893 * 894 * VM address space initialization routine 895 * 896 * Parameters: 897 * vm: the VM being initialized 898 * p: vmd process owning the VM 899 * 900 * Return values: 901 * 0: the initialization was successful 902 * EINVAL: unsupported vmm mode 903 * ENOMEM: the initialization failed (lack of resources) 904 */ 905 int 906 vm_impl_init(struct vm *vm, struct proc *p) 907 { 908 int i, mode, ret; 909 vaddr_t mingpa, maxgpa; 910 struct vm_mem_range *vmr; 911 912 /* If not EPT or RVI, nothing to do here */ 913 switch (vmm_softc->mode) { 914 case VMM_MODE_EPT: 915 mode = PMAP_TYPE_EPT; 916 break; 917 case VMM_MODE_RVI: 918 mode = PMAP_TYPE_RVI; 919 break; 920 default: 921 printf("%s: invalid vmm mode %d\n", __func__, vmm_softc->mode); 922 return (EINVAL); 923 } 924 925 vmr = &vm->vm_memranges[0]; 926 mingpa = vmr->vmr_gpa; 927 vmr = &vm->vm_memranges[vm->vm_nmemranges - 1]; 928 maxgpa = vmr->vmr_gpa + vmr->vmr_size; 929 930 /* 931 * uvmspace_alloc (currently) always returns a valid vmspace 932 */ 933 vm->vm_vmspace = uvmspace_alloc(mingpa, maxgpa, TRUE, FALSE); 934 vm->vm_map = &vm->vm_vmspace->vm_map; 935 936 /* Map the new map with an anon */ 937 DPRINTF("%s: created vm_map @ %p\n", __func__, vm->vm_map); 938 for (i = 0; i < vm->vm_nmemranges; i++) { 939 vmr = &vm->vm_memranges[i]; 940 ret = uvm_share(vm->vm_map, vmr->vmr_gpa, 941 PROT_READ | PROT_WRITE | PROT_EXEC, 942 &p->p_vmspace->vm_map, vmr->vmr_va, vmr->vmr_size); 943 if (ret) { 944 printf("%s: uvm_share failed (%d)\n", __func__, ret); 945 /* uvmspace_free calls pmap_destroy for us */ 946 KERNEL_LOCK(); 947 uvmspace_free(vm->vm_vmspace); 948 vm->vm_vmspace = NULL; 949 KERNEL_UNLOCK(); 950 return (ENOMEM); 951 } 952 } 953 954 pmap_convert(vm->vm_map->pmap, mode); 955 956 return (0); 957 } 958 959 void 960 vm_impl_deinit(struct vm *vm) 961 { 962 /* unused */ 963 } 964 965 /* 966 * vcpu_reload_vmcs_vmx 967 * 968 * (Re)load the VMCS on the current cpu. Must be called with the VMCS write 969 * lock acquired. If the VMCS is determined to be loaded on a remote cpu, an 970 * ipi will be used to remotely flush it before loading the VMCS locally. 971 * 972 * Parameters: 973 * vcpu: Pointer to the vcpu needing its VMCS 974 * 975 * Return values: 976 * 0: if successful 977 * EINVAL: an error occurred during flush or reload 978 */ 979 int 980 vcpu_reload_vmcs_vmx(struct vcpu *vcpu) 981 { 982 struct cpu_info *ci, *last_ci; 983 984 rw_assert_wrlock(&vcpu->vc_lock); 985 986 ci = curcpu(); 987 last_ci = vcpu->vc_last_pcpu; 988 989 if (last_ci == NULL) { 990 /* First launch */ 991 if (vmclear(&vcpu->vc_control_pa)) 992 return (EINVAL); 993 atomic_swap_uint(&vcpu->vc_vmx_vmcs_state, VMCS_CLEARED); 994 #ifdef MULTIPROCESSOR 995 } else if (last_ci != ci) { 996 /* We've moved CPUs at some point, so remote VMCLEAR */ 997 if (vmx_remote_vmclear(last_ci, vcpu)) 998 return (EINVAL); 999 KASSERT(vcpu->vc_vmx_vmcs_state == VMCS_CLEARED); 1000 #endif /* MULTIPROCESSOR */ 1001 } 1002 1003 if (vmptrld(&vcpu->vc_control_pa)) { 1004 printf("%s: vmptrld\n", __func__); 1005 return (EINVAL); 1006 } 1007 1008 return (0); 1009 } 1010 1011 /* 1012 * vcpu_readregs_vmx 1013 * 1014 * Reads 'vcpu's registers 1015 * 1016 * Parameters: 1017 * vcpu: the vcpu to read register values from 1018 * regmask: the types of registers to read 1019 * loadvmcs: bit to indicate whether the VMCS has to be loaded first 1020 * vrs: output parameter where register values are stored 1021 * 1022 * Return values: 1023 * 0: if successful 1024 * EINVAL: an error reading registers occurred 1025 */ 1026 int 1027 vcpu_readregs_vmx(struct vcpu *vcpu, uint64_t regmask, int loadvmcs, 1028 struct vcpu_reg_state *vrs) 1029 { 1030 int i, ret = 0; 1031 uint64_t sel, limit, ar; 1032 uint64_t *gprs = vrs->vrs_gprs; 1033 uint64_t *crs = vrs->vrs_crs; 1034 uint64_t *msrs = vrs->vrs_msrs; 1035 uint64_t *drs = vrs->vrs_drs; 1036 struct vcpu_segment_info *sregs = vrs->vrs_sregs; 1037 struct vmx_msr_store *msr_store; 1038 1039 if (loadvmcs) { 1040 if (vcpu_reload_vmcs_vmx(vcpu)) 1041 return (EINVAL); 1042 } 1043 1044 #ifdef VMM_DEBUG 1045 /* VMCS should be loaded... */ 1046 paddr_t pa = 0ULL; 1047 if (vmptrst(&pa)) 1048 panic("%s: vmptrst", __func__); 1049 KASSERT(pa == vcpu->vc_control_pa); 1050 #endif /* VMM_DEBUG */ 1051 1052 if (regmask & VM_RWREGS_GPRS) { 1053 gprs[VCPU_REGS_RAX] = vcpu->vc_gueststate.vg_rax; 1054 gprs[VCPU_REGS_RBX] = vcpu->vc_gueststate.vg_rbx; 1055 gprs[VCPU_REGS_RCX] = vcpu->vc_gueststate.vg_rcx; 1056 gprs[VCPU_REGS_RDX] = vcpu->vc_gueststate.vg_rdx; 1057 gprs[VCPU_REGS_RSI] = vcpu->vc_gueststate.vg_rsi; 1058 gprs[VCPU_REGS_RDI] = vcpu->vc_gueststate.vg_rdi; 1059 gprs[VCPU_REGS_R8] = vcpu->vc_gueststate.vg_r8; 1060 gprs[VCPU_REGS_R9] = vcpu->vc_gueststate.vg_r9; 1061 gprs[VCPU_REGS_R10] = vcpu->vc_gueststate.vg_r10; 1062 gprs[VCPU_REGS_R11] = vcpu->vc_gueststate.vg_r11; 1063 gprs[VCPU_REGS_R12] = vcpu->vc_gueststate.vg_r12; 1064 gprs[VCPU_REGS_R13] = vcpu->vc_gueststate.vg_r13; 1065 gprs[VCPU_REGS_R14] = vcpu->vc_gueststate.vg_r14; 1066 gprs[VCPU_REGS_R15] = vcpu->vc_gueststate.vg_r15; 1067 gprs[VCPU_REGS_RBP] = vcpu->vc_gueststate.vg_rbp; 1068 gprs[VCPU_REGS_RIP] = vcpu->vc_gueststate.vg_rip; 1069 if (vmread(VMCS_GUEST_IA32_RSP, &gprs[VCPU_REGS_RSP])) 1070 goto errout; 1071 if (vmread(VMCS_GUEST_IA32_RFLAGS, &gprs[VCPU_REGS_RFLAGS])) 1072 goto errout; 1073 } 1074 1075 if (regmask & VM_RWREGS_SREGS) { 1076 for (i = 0; i < nitems(vmm_vmx_sreg_vmcs_fields); i++) { 1077 if (vmread(vmm_vmx_sreg_vmcs_fields[i].selid, &sel)) 1078 goto errout; 1079 if (vmread(vmm_vmx_sreg_vmcs_fields[i].limitid, &limit)) 1080 goto errout; 1081 if (vmread(vmm_vmx_sreg_vmcs_fields[i].arid, &ar)) 1082 goto errout; 1083 if (vmread(vmm_vmx_sreg_vmcs_fields[i].baseid, 1084 &sregs[i].vsi_base)) 1085 goto errout; 1086 1087 sregs[i].vsi_sel = sel; 1088 sregs[i].vsi_limit = limit; 1089 sregs[i].vsi_ar = ar; 1090 } 1091 1092 if (vmread(VMCS_GUEST_IA32_GDTR_LIMIT, &limit)) 1093 goto errout; 1094 if (vmread(VMCS_GUEST_IA32_GDTR_BASE, 1095 &vrs->vrs_gdtr.vsi_base)) 1096 goto errout; 1097 vrs->vrs_gdtr.vsi_limit = limit; 1098 1099 if (vmread(VMCS_GUEST_IA32_IDTR_LIMIT, &limit)) 1100 goto errout; 1101 if (vmread(VMCS_GUEST_IA32_IDTR_BASE, 1102 &vrs->vrs_idtr.vsi_base)) 1103 goto errout; 1104 vrs->vrs_idtr.vsi_limit = limit; 1105 } 1106 1107 if (regmask & VM_RWREGS_CRS) { 1108 crs[VCPU_REGS_CR2] = vcpu->vc_gueststate.vg_cr2; 1109 crs[VCPU_REGS_XCR0] = vcpu->vc_gueststate.vg_xcr0; 1110 if (vmread(VMCS_GUEST_IA32_CR0, &crs[VCPU_REGS_CR0])) 1111 goto errout; 1112 if (vmread(VMCS_GUEST_IA32_CR3, &crs[VCPU_REGS_CR3])) 1113 goto errout; 1114 if (vmread(VMCS_GUEST_IA32_CR4, &crs[VCPU_REGS_CR4])) 1115 goto errout; 1116 if (vmread(VMCS_GUEST_PDPTE0, &crs[VCPU_REGS_PDPTE0])) 1117 goto errout; 1118 if (vmread(VMCS_GUEST_PDPTE1, &crs[VCPU_REGS_PDPTE1])) 1119 goto errout; 1120 if (vmread(VMCS_GUEST_PDPTE2, &crs[VCPU_REGS_PDPTE2])) 1121 goto errout; 1122 if (vmread(VMCS_GUEST_PDPTE3, &crs[VCPU_REGS_PDPTE3])) 1123 goto errout; 1124 } 1125 1126 msr_store = (struct vmx_msr_store *)vcpu->vc_vmx_msr_exit_save_va; 1127 1128 if (regmask & VM_RWREGS_MSRS) { 1129 for (i = 0; i < VCPU_REGS_NMSRS; i++) { 1130 msrs[i] = msr_store[i].vms_data; 1131 } 1132 } 1133 1134 if (regmask & VM_RWREGS_DRS) { 1135 drs[VCPU_REGS_DR0] = vcpu->vc_gueststate.vg_dr0; 1136 drs[VCPU_REGS_DR1] = vcpu->vc_gueststate.vg_dr1; 1137 drs[VCPU_REGS_DR2] = vcpu->vc_gueststate.vg_dr2; 1138 drs[VCPU_REGS_DR3] = vcpu->vc_gueststate.vg_dr3; 1139 drs[VCPU_REGS_DR6] = vcpu->vc_gueststate.vg_dr6; 1140 if (vmread(VMCS_GUEST_IA32_DR7, &drs[VCPU_REGS_DR7])) 1141 goto errout; 1142 } 1143 1144 goto out; 1145 1146 errout: 1147 ret = EINVAL; 1148 out: 1149 return (ret); 1150 } 1151 1152 /* 1153 * vcpu_readregs_svm 1154 * 1155 * Reads 'vcpu's registers 1156 * 1157 * Parameters: 1158 * vcpu: the vcpu to read register values from 1159 * regmask: the types of registers to read 1160 * vrs: output parameter where register values are stored 1161 * 1162 * Return values: 1163 * 0: if successful 1164 */ 1165 int 1166 vcpu_readregs_svm(struct vcpu *vcpu, uint64_t regmask, 1167 struct vcpu_reg_state *vrs) 1168 { 1169 uint64_t *gprs = vrs->vrs_gprs; 1170 uint64_t *crs = vrs->vrs_crs; 1171 uint64_t *msrs = vrs->vrs_msrs; 1172 uint64_t *drs = vrs->vrs_drs; 1173 uint32_t attr; 1174 struct vcpu_segment_info *sregs = vrs->vrs_sregs; 1175 struct vmcb *vmcb = (struct vmcb *)vcpu->vc_control_va; 1176 1177 if (regmask & VM_RWREGS_GPRS) { 1178 gprs[VCPU_REGS_RAX] = vmcb->v_rax; 1179 gprs[VCPU_REGS_RBX] = vcpu->vc_gueststate.vg_rbx; 1180 gprs[VCPU_REGS_RCX] = vcpu->vc_gueststate.vg_rcx; 1181 gprs[VCPU_REGS_RDX] = vcpu->vc_gueststate.vg_rdx; 1182 gprs[VCPU_REGS_RSI] = vcpu->vc_gueststate.vg_rsi; 1183 gprs[VCPU_REGS_RDI] = vcpu->vc_gueststate.vg_rdi; 1184 gprs[VCPU_REGS_R8] = vcpu->vc_gueststate.vg_r8; 1185 gprs[VCPU_REGS_R9] = vcpu->vc_gueststate.vg_r9; 1186 gprs[VCPU_REGS_R10] = vcpu->vc_gueststate.vg_r10; 1187 gprs[VCPU_REGS_R11] = vcpu->vc_gueststate.vg_r11; 1188 gprs[VCPU_REGS_R12] = vcpu->vc_gueststate.vg_r12; 1189 gprs[VCPU_REGS_R13] = vcpu->vc_gueststate.vg_r13; 1190 gprs[VCPU_REGS_R14] = vcpu->vc_gueststate.vg_r14; 1191 gprs[VCPU_REGS_R15] = vcpu->vc_gueststate.vg_r15; 1192 gprs[VCPU_REGS_RBP] = vcpu->vc_gueststate.vg_rbp; 1193 gprs[VCPU_REGS_RIP] = vmcb->v_rip; 1194 gprs[VCPU_REGS_RSP] = vmcb->v_rsp; 1195 gprs[VCPU_REGS_RFLAGS] = vmcb->v_rflags; 1196 } 1197 1198 if (regmask & VM_RWREGS_SREGS) { 1199 sregs[VCPU_REGS_CS].vsi_sel = vmcb->v_cs.vs_sel; 1200 sregs[VCPU_REGS_CS].vsi_limit = vmcb->v_cs.vs_lim; 1201 attr = vmcb->v_cs.vs_attr; 1202 sregs[VCPU_REGS_CS].vsi_ar = (attr & 0xff) | ((attr << 4) & 1203 0xf000); 1204 sregs[VCPU_REGS_CS].vsi_base = vmcb->v_cs.vs_base; 1205 1206 sregs[VCPU_REGS_DS].vsi_sel = vmcb->v_ds.vs_sel; 1207 sregs[VCPU_REGS_DS].vsi_limit = vmcb->v_ds.vs_lim; 1208 attr = vmcb->v_ds.vs_attr; 1209 sregs[VCPU_REGS_DS].vsi_ar = (attr & 0xff) | ((attr << 4) & 1210 0xf000); 1211 sregs[VCPU_REGS_DS].vsi_base = vmcb->v_ds.vs_base; 1212 1213 sregs[VCPU_REGS_ES].vsi_sel = vmcb->v_es.vs_sel; 1214 sregs[VCPU_REGS_ES].vsi_limit = vmcb->v_es.vs_lim; 1215 attr = vmcb->v_es.vs_attr; 1216 sregs[VCPU_REGS_ES].vsi_ar = (attr & 0xff) | ((attr << 4) & 1217 0xf000); 1218 sregs[VCPU_REGS_ES].vsi_base = vmcb->v_es.vs_base; 1219 1220 sregs[VCPU_REGS_FS].vsi_sel = vmcb->v_fs.vs_sel; 1221 sregs[VCPU_REGS_FS].vsi_limit = vmcb->v_fs.vs_lim; 1222 attr = vmcb->v_fs.vs_attr; 1223 sregs[VCPU_REGS_FS].vsi_ar = (attr & 0xff) | ((attr << 4) & 1224 0xf000); 1225 sregs[VCPU_REGS_FS].vsi_base = vmcb->v_fs.vs_base; 1226 1227 sregs[VCPU_REGS_GS].vsi_sel = vmcb->v_gs.vs_sel; 1228 sregs[VCPU_REGS_GS].vsi_limit = vmcb->v_gs.vs_lim; 1229 attr = vmcb->v_gs.vs_attr; 1230 sregs[VCPU_REGS_GS].vsi_ar = (attr & 0xff) | ((attr << 4) & 1231 0xf000); 1232 sregs[VCPU_REGS_GS].vsi_base = vmcb->v_gs.vs_base; 1233 1234 sregs[VCPU_REGS_SS].vsi_sel = vmcb->v_ss.vs_sel; 1235 sregs[VCPU_REGS_SS].vsi_limit = vmcb->v_ss.vs_lim; 1236 attr = vmcb->v_ss.vs_attr; 1237 sregs[VCPU_REGS_SS].vsi_ar = (attr & 0xff) | ((attr << 4) & 1238 0xf000); 1239 sregs[VCPU_REGS_SS].vsi_base = vmcb->v_ss.vs_base; 1240 1241 sregs[VCPU_REGS_LDTR].vsi_sel = vmcb->v_ldtr.vs_sel; 1242 sregs[VCPU_REGS_LDTR].vsi_limit = vmcb->v_ldtr.vs_lim; 1243 attr = vmcb->v_ldtr.vs_attr; 1244 sregs[VCPU_REGS_LDTR].vsi_ar = (attr & 0xff) | ((attr << 4) 1245 & 0xf000); 1246 sregs[VCPU_REGS_LDTR].vsi_base = vmcb->v_ldtr.vs_base; 1247 1248 sregs[VCPU_REGS_TR].vsi_sel = vmcb->v_tr.vs_sel; 1249 sregs[VCPU_REGS_TR].vsi_limit = vmcb->v_tr.vs_lim; 1250 attr = vmcb->v_tr.vs_attr; 1251 sregs[VCPU_REGS_TR].vsi_ar = (attr & 0xff) | ((attr << 4) & 1252 0xf000); 1253 sregs[VCPU_REGS_TR].vsi_base = vmcb->v_tr.vs_base; 1254 1255 vrs->vrs_gdtr.vsi_limit = vmcb->v_gdtr.vs_lim; 1256 vrs->vrs_gdtr.vsi_base = vmcb->v_gdtr.vs_base; 1257 vrs->vrs_idtr.vsi_limit = vmcb->v_idtr.vs_lim; 1258 vrs->vrs_idtr.vsi_base = vmcb->v_idtr.vs_base; 1259 } 1260 1261 if (regmask & VM_RWREGS_CRS) { 1262 crs[VCPU_REGS_CR0] = vmcb->v_cr0; 1263 crs[VCPU_REGS_CR3] = vmcb->v_cr3; 1264 crs[VCPU_REGS_CR4] = vmcb->v_cr4; 1265 crs[VCPU_REGS_CR2] = vcpu->vc_gueststate.vg_cr2; 1266 crs[VCPU_REGS_XCR0] = vcpu->vc_gueststate.vg_xcr0; 1267 } 1268 1269 if (regmask & VM_RWREGS_MSRS) { 1270 msrs[VCPU_REGS_EFER] = vmcb->v_efer; 1271 msrs[VCPU_REGS_STAR] = vmcb->v_star; 1272 msrs[VCPU_REGS_LSTAR] = vmcb->v_lstar; 1273 msrs[VCPU_REGS_CSTAR] = vmcb->v_cstar; 1274 msrs[VCPU_REGS_SFMASK] = vmcb->v_sfmask; 1275 msrs[VCPU_REGS_KGSBASE] = vmcb->v_kgsbase; 1276 } 1277 1278 if (regmask & VM_RWREGS_DRS) { 1279 drs[VCPU_REGS_DR0] = vcpu->vc_gueststate.vg_dr0; 1280 drs[VCPU_REGS_DR1] = vcpu->vc_gueststate.vg_dr1; 1281 drs[VCPU_REGS_DR2] = vcpu->vc_gueststate.vg_dr2; 1282 drs[VCPU_REGS_DR3] = vcpu->vc_gueststate.vg_dr3; 1283 drs[VCPU_REGS_DR6] = vmcb->v_dr6; 1284 drs[VCPU_REGS_DR7] = vmcb->v_dr7; 1285 } 1286 1287 return (0); 1288 } 1289 1290 /* 1291 * vcpu_writeregs_vmx 1292 * 1293 * Writes VCPU registers 1294 * 1295 * Parameters: 1296 * vcpu: the vcpu that has to get its registers written to 1297 * regmask: the types of registers to write 1298 * loadvmcs: bit to indicate whether the VMCS has to be loaded first 1299 * vrs: the register values to write 1300 * 1301 * Return values: 1302 * 0: if successful 1303 * EINVAL an error writing registers occurred 1304 */ 1305 int 1306 vcpu_writeregs_vmx(struct vcpu *vcpu, uint64_t regmask, int loadvmcs, 1307 struct vcpu_reg_state *vrs) 1308 { 1309 int i, ret = 0; 1310 uint16_t sel; 1311 uint64_t limit, ar; 1312 uint64_t *gprs = vrs->vrs_gprs; 1313 uint64_t *crs = vrs->vrs_crs; 1314 uint64_t *msrs = vrs->vrs_msrs; 1315 uint64_t *drs = vrs->vrs_drs; 1316 struct vcpu_segment_info *sregs = vrs->vrs_sregs; 1317 struct vmx_msr_store *msr_store; 1318 1319 if (loadvmcs) { 1320 if (vcpu_reload_vmcs_vmx(vcpu)) 1321 return (EINVAL); 1322 } 1323 1324 #ifdef VMM_DEBUG 1325 /* VMCS should be loaded... */ 1326 paddr_t pa = 0ULL; 1327 if (vmptrst(&pa)) 1328 panic("%s: vmptrst", __func__); 1329 KASSERT(pa == vcpu->vc_control_pa); 1330 #endif /* VMM_DEBUG */ 1331 1332 if (regmask & VM_RWREGS_GPRS) { 1333 vcpu->vc_gueststate.vg_rax = gprs[VCPU_REGS_RAX]; 1334 vcpu->vc_gueststate.vg_rbx = gprs[VCPU_REGS_RBX]; 1335 vcpu->vc_gueststate.vg_rcx = gprs[VCPU_REGS_RCX]; 1336 vcpu->vc_gueststate.vg_rdx = gprs[VCPU_REGS_RDX]; 1337 vcpu->vc_gueststate.vg_rsi = gprs[VCPU_REGS_RSI]; 1338 vcpu->vc_gueststate.vg_rdi = gprs[VCPU_REGS_RDI]; 1339 vcpu->vc_gueststate.vg_r8 = gprs[VCPU_REGS_R8]; 1340 vcpu->vc_gueststate.vg_r9 = gprs[VCPU_REGS_R9]; 1341 vcpu->vc_gueststate.vg_r10 = gprs[VCPU_REGS_R10]; 1342 vcpu->vc_gueststate.vg_r11 = gprs[VCPU_REGS_R11]; 1343 vcpu->vc_gueststate.vg_r12 = gprs[VCPU_REGS_R12]; 1344 vcpu->vc_gueststate.vg_r13 = gprs[VCPU_REGS_R13]; 1345 vcpu->vc_gueststate.vg_r14 = gprs[VCPU_REGS_R14]; 1346 vcpu->vc_gueststate.vg_r15 = gprs[VCPU_REGS_R15]; 1347 vcpu->vc_gueststate.vg_rbp = gprs[VCPU_REGS_RBP]; 1348 vcpu->vc_gueststate.vg_rip = gprs[VCPU_REGS_RIP]; 1349 if (vmwrite(VMCS_GUEST_IA32_RIP, gprs[VCPU_REGS_RIP])) 1350 goto errout; 1351 if (vmwrite(VMCS_GUEST_IA32_RSP, gprs[VCPU_REGS_RSP])) 1352 goto errout; 1353 if (vmwrite(VMCS_GUEST_IA32_RFLAGS, gprs[VCPU_REGS_RFLAGS])) 1354 goto errout; 1355 } 1356 1357 if (regmask & VM_RWREGS_SREGS) { 1358 for (i = 0; i < nitems(vmm_vmx_sreg_vmcs_fields); i++) { 1359 sel = sregs[i].vsi_sel; 1360 limit = sregs[i].vsi_limit; 1361 ar = sregs[i].vsi_ar; 1362 1363 if (vmwrite(vmm_vmx_sreg_vmcs_fields[i].selid, sel)) 1364 goto errout; 1365 if (vmwrite(vmm_vmx_sreg_vmcs_fields[i].limitid, limit)) 1366 goto errout; 1367 if (vmwrite(vmm_vmx_sreg_vmcs_fields[i].arid, ar)) 1368 goto errout; 1369 if (vmwrite(vmm_vmx_sreg_vmcs_fields[i].baseid, 1370 sregs[i].vsi_base)) 1371 goto errout; 1372 } 1373 1374 if (vmwrite(VMCS_GUEST_IA32_GDTR_LIMIT, 1375 vrs->vrs_gdtr.vsi_limit)) 1376 goto errout; 1377 if (vmwrite(VMCS_GUEST_IA32_GDTR_BASE, 1378 vrs->vrs_gdtr.vsi_base)) 1379 goto errout; 1380 if (vmwrite(VMCS_GUEST_IA32_IDTR_LIMIT, 1381 vrs->vrs_idtr.vsi_limit)) 1382 goto errout; 1383 if (vmwrite(VMCS_GUEST_IA32_IDTR_BASE, 1384 vrs->vrs_idtr.vsi_base)) 1385 goto errout; 1386 } 1387 1388 if (regmask & VM_RWREGS_CRS) { 1389 vcpu->vc_gueststate.vg_xcr0 = crs[VCPU_REGS_XCR0]; 1390 if (vmwrite(VMCS_GUEST_IA32_CR0, crs[VCPU_REGS_CR0])) 1391 goto errout; 1392 if (vmwrite(VMCS_GUEST_IA32_CR3, crs[VCPU_REGS_CR3])) 1393 goto errout; 1394 if (vmwrite(VMCS_GUEST_IA32_CR4, crs[VCPU_REGS_CR4])) 1395 goto errout; 1396 if (vmwrite(VMCS_GUEST_PDPTE0, crs[VCPU_REGS_PDPTE0])) 1397 goto errout; 1398 if (vmwrite(VMCS_GUEST_PDPTE1, crs[VCPU_REGS_PDPTE1])) 1399 goto errout; 1400 if (vmwrite(VMCS_GUEST_PDPTE2, crs[VCPU_REGS_PDPTE2])) 1401 goto errout; 1402 if (vmwrite(VMCS_GUEST_PDPTE3, crs[VCPU_REGS_PDPTE3])) 1403 goto errout; 1404 } 1405 1406 msr_store = (struct vmx_msr_store *)vcpu->vc_vmx_msr_exit_save_va; 1407 1408 if (regmask & VM_RWREGS_MSRS) { 1409 for (i = 0; i < VCPU_REGS_NMSRS; i++) { 1410 msr_store[i].vms_data = msrs[i]; 1411 } 1412 } 1413 1414 if (regmask & VM_RWREGS_DRS) { 1415 vcpu->vc_gueststate.vg_dr0 = drs[VCPU_REGS_DR0]; 1416 vcpu->vc_gueststate.vg_dr1 = drs[VCPU_REGS_DR1]; 1417 vcpu->vc_gueststate.vg_dr2 = drs[VCPU_REGS_DR2]; 1418 vcpu->vc_gueststate.vg_dr3 = drs[VCPU_REGS_DR3]; 1419 vcpu->vc_gueststate.vg_dr6 = drs[VCPU_REGS_DR6]; 1420 if (vmwrite(VMCS_GUEST_IA32_DR7, drs[VCPU_REGS_DR7])) 1421 goto errout; 1422 } 1423 1424 goto out; 1425 1426 errout: 1427 ret = EINVAL; 1428 out: 1429 if (loadvmcs) { 1430 if (vmclear(&vcpu->vc_control_pa)) 1431 ret = EINVAL; 1432 atomic_swap_uint(&vcpu->vc_vmx_vmcs_state, VMCS_CLEARED); 1433 } 1434 return (ret); 1435 } 1436 1437 /* 1438 * vcpu_writeregs_svm 1439 * 1440 * Writes 'vcpu's registers 1441 * 1442 * Parameters: 1443 * vcpu: the vcpu that has to get its registers written to 1444 * regmask: the types of registers to write 1445 * vrs: the register values to write 1446 * 1447 * Return values: 1448 * 0: if successful 1449 * EINVAL an error writing registers occurred 1450 */ 1451 int 1452 vcpu_writeregs_svm(struct vcpu *vcpu, uint64_t regmask, 1453 struct vcpu_reg_state *vrs) 1454 { 1455 uint64_t *gprs = vrs->vrs_gprs; 1456 uint64_t *crs = vrs->vrs_crs; 1457 uint16_t attr; 1458 uint64_t *msrs = vrs->vrs_msrs; 1459 uint64_t *drs = vrs->vrs_drs; 1460 struct vcpu_segment_info *sregs = vrs->vrs_sregs; 1461 struct vmcb *vmcb = (struct vmcb *)vcpu->vc_control_va; 1462 1463 if (regmask & VM_RWREGS_GPRS) { 1464 vcpu->vc_gueststate.vg_rax = gprs[VCPU_REGS_RAX]; 1465 vcpu->vc_gueststate.vg_rbx = gprs[VCPU_REGS_RBX]; 1466 vcpu->vc_gueststate.vg_rcx = gprs[VCPU_REGS_RCX]; 1467 vcpu->vc_gueststate.vg_rdx = gprs[VCPU_REGS_RDX]; 1468 vcpu->vc_gueststate.vg_rsi = gprs[VCPU_REGS_RSI]; 1469 vcpu->vc_gueststate.vg_rdi = gprs[VCPU_REGS_RDI]; 1470 vcpu->vc_gueststate.vg_r8 = gprs[VCPU_REGS_R8]; 1471 vcpu->vc_gueststate.vg_r9 = gprs[VCPU_REGS_R9]; 1472 vcpu->vc_gueststate.vg_r10 = gprs[VCPU_REGS_R10]; 1473 vcpu->vc_gueststate.vg_r11 = gprs[VCPU_REGS_R11]; 1474 vcpu->vc_gueststate.vg_r12 = gprs[VCPU_REGS_R12]; 1475 vcpu->vc_gueststate.vg_r13 = gprs[VCPU_REGS_R13]; 1476 vcpu->vc_gueststate.vg_r14 = gprs[VCPU_REGS_R14]; 1477 vcpu->vc_gueststate.vg_r15 = gprs[VCPU_REGS_R15]; 1478 vcpu->vc_gueststate.vg_rbp = gprs[VCPU_REGS_RBP]; 1479 vcpu->vc_gueststate.vg_rip = gprs[VCPU_REGS_RIP]; 1480 1481 vmcb->v_rax = gprs[VCPU_REGS_RAX]; 1482 vmcb->v_rip = gprs[VCPU_REGS_RIP]; 1483 vmcb->v_rsp = gprs[VCPU_REGS_RSP]; 1484 vmcb->v_rflags = gprs[VCPU_REGS_RFLAGS]; 1485 } 1486 1487 if (regmask & VM_RWREGS_SREGS) { 1488 vmcb->v_cs.vs_sel = sregs[VCPU_REGS_CS].vsi_sel; 1489 vmcb->v_cs.vs_lim = sregs[VCPU_REGS_CS].vsi_limit; 1490 attr = sregs[VCPU_REGS_CS].vsi_ar; 1491 vmcb->v_cs.vs_attr = (attr & 0xff) | ((attr >> 4) & 0xf00); 1492 vmcb->v_cs.vs_base = sregs[VCPU_REGS_CS].vsi_base; 1493 vmcb->v_ds.vs_sel = sregs[VCPU_REGS_DS].vsi_sel; 1494 vmcb->v_ds.vs_lim = sregs[VCPU_REGS_DS].vsi_limit; 1495 attr = sregs[VCPU_REGS_DS].vsi_ar; 1496 vmcb->v_ds.vs_attr = (attr & 0xff) | ((attr >> 4) & 0xf00); 1497 vmcb->v_ds.vs_base = sregs[VCPU_REGS_DS].vsi_base; 1498 vmcb->v_es.vs_sel = sregs[VCPU_REGS_ES].vsi_sel; 1499 vmcb->v_es.vs_lim = sregs[VCPU_REGS_ES].vsi_limit; 1500 attr = sregs[VCPU_REGS_ES].vsi_ar; 1501 vmcb->v_es.vs_attr = (attr & 0xff) | ((attr >> 4) & 0xf00); 1502 vmcb->v_es.vs_base = sregs[VCPU_REGS_ES].vsi_base; 1503 vmcb->v_fs.vs_sel = sregs[VCPU_REGS_FS].vsi_sel; 1504 vmcb->v_fs.vs_lim = sregs[VCPU_REGS_FS].vsi_limit; 1505 attr = sregs[VCPU_REGS_FS].vsi_ar; 1506 vmcb->v_fs.vs_attr = (attr & 0xff) | ((attr >> 4) & 0xf00); 1507 vmcb->v_fs.vs_base = sregs[VCPU_REGS_FS].vsi_base; 1508 vmcb->v_gs.vs_sel = sregs[VCPU_REGS_GS].vsi_sel; 1509 vmcb->v_gs.vs_lim = sregs[VCPU_REGS_GS].vsi_limit; 1510 attr = sregs[VCPU_REGS_GS].vsi_ar; 1511 vmcb->v_gs.vs_attr = (attr & 0xff) | ((attr >> 4) & 0xf00); 1512 vmcb->v_gs.vs_base = sregs[VCPU_REGS_GS].vsi_base; 1513 vmcb->v_ss.vs_sel = sregs[VCPU_REGS_SS].vsi_sel; 1514 vmcb->v_ss.vs_lim = sregs[VCPU_REGS_SS].vsi_limit; 1515 attr = sregs[VCPU_REGS_SS].vsi_ar; 1516 vmcb->v_ss.vs_attr = (attr & 0xff) | ((attr >> 4) & 0xf00); 1517 vmcb->v_ss.vs_base = sregs[VCPU_REGS_SS].vsi_base; 1518 vmcb->v_ldtr.vs_sel = sregs[VCPU_REGS_LDTR].vsi_sel; 1519 vmcb->v_ldtr.vs_lim = sregs[VCPU_REGS_LDTR].vsi_limit; 1520 attr = sregs[VCPU_REGS_LDTR].vsi_ar; 1521 vmcb->v_ldtr.vs_attr = (attr & 0xff) | ((attr >> 4) & 0xf00); 1522 vmcb->v_ldtr.vs_base = sregs[VCPU_REGS_LDTR].vsi_base; 1523 vmcb->v_tr.vs_sel = sregs[VCPU_REGS_TR].vsi_sel; 1524 vmcb->v_tr.vs_lim = sregs[VCPU_REGS_TR].vsi_limit; 1525 attr = sregs[VCPU_REGS_TR].vsi_ar; 1526 vmcb->v_tr.vs_attr = (attr & 0xff) | ((attr >> 4) & 0xf00); 1527 vmcb->v_tr.vs_base = sregs[VCPU_REGS_TR].vsi_base; 1528 vmcb->v_gdtr.vs_lim = vrs->vrs_gdtr.vsi_limit; 1529 vmcb->v_gdtr.vs_base = vrs->vrs_gdtr.vsi_base; 1530 vmcb->v_idtr.vs_lim = vrs->vrs_idtr.vsi_limit; 1531 vmcb->v_idtr.vs_base = vrs->vrs_idtr.vsi_base; 1532 } 1533 1534 if (regmask & VM_RWREGS_CRS) { 1535 vmcb->v_cr0 = crs[VCPU_REGS_CR0]; 1536 vmcb->v_cr3 = crs[VCPU_REGS_CR3]; 1537 vmcb->v_cr4 = crs[VCPU_REGS_CR4]; 1538 vcpu->vc_gueststate.vg_cr2 = crs[VCPU_REGS_CR2]; 1539 vcpu->vc_gueststate.vg_xcr0 = crs[VCPU_REGS_XCR0]; 1540 } 1541 1542 if (regmask & VM_RWREGS_MSRS) { 1543 vmcb->v_efer |= msrs[VCPU_REGS_EFER]; 1544 vmcb->v_star = msrs[VCPU_REGS_STAR]; 1545 vmcb->v_lstar = msrs[VCPU_REGS_LSTAR]; 1546 vmcb->v_cstar = msrs[VCPU_REGS_CSTAR]; 1547 vmcb->v_sfmask = msrs[VCPU_REGS_SFMASK]; 1548 vmcb->v_kgsbase = msrs[VCPU_REGS_KGSBASE]; 1549 } 1550 1551 if (regmask & VM_RWREGS_DRS) { 1552 vcpu->vc_gueststate.vg_dr0 = drs[VCPU_REGS_DR0]; 1553 vcpu->vc_gueststate.vg_dr1 = drs[VCPU_REGS_DR1]; 1554 vcpu->vc_gueststate.vg_dr2 = drs[VCPU_REGS_DR2]; 1555 vcpu->vc_gueststate.vg_dr3 = drs[VCPU_REGS_DR3]; 1556 vmcb->v_dr6 = drs[VCPU_REGS_DR6]; 1557 vmcb->v_dr7 = drs[VCPU_REGS_DR7]; 1558 } 1559 1560 return (0); 1561 } 1562 1563 /* 1564 * vcpu_reset_regs_svm 1565 * 1566 * Initializes 'vcpu's registers to supplied state 1567 * 1568 * Parameters: 1569 * vcpu: the vcpu whose register state is to be initialized 1570 * vrs: the register state to set 1571 * 1572 * Return values: 1573 * 0: registers init'ed successfully 1574 * EINVAL: an error occurred setting register state 1575 */ 1576 int 1577 vcpu_reset_regs_svm(struct vcpu *vcpu, struct vcpu_reg_state *vrs) 1578 { 1579 struct vmcb *vmcb; 1580 int ret; 1581 1582 vmcb = (struct vmcb *)vcpu->vc_control_va; 1583 1584 /* 1585 * Intercept controls 1586 * 1587 * External Interrupt exiting (SVM_INTERCEPT_INTR) 1588 * External NMI exiting (SVM_INTERCEPT_NMI) 1589 * CPUID instruction (SVM_INTERCEPT_CPUID) 1590 * HLT instruction (SVM_INTERCEPT_HLT) 1591 * I/O instructions (SVM_INTERCEPT_INOUT) 1592 * MSR access (SVM_INTERCEPT_MSR) 1593 * shutdown events (SVM_INTERCEPT_SHUTDOWN) 1594 * INVLPGA instruction (SVM_INTERCEPT_INVLPGA) 1595 * 1596 * VMRUN instruction (SVM_INTERCEPT_VMRUN) 1597 * VMMCALL instruction (SVM_INTERCEPT_VMMCALL) 1598 * VMLOAD instruction (SVM_INTERCEPT_VMLOAD) 1599 * VMSAVE instruction (SVM_INTERCEPT_VMSAVE) 1600 * STGI instruction (SVM_INTERCEPT_STGI) 1601 * CLGI instruction (SVM_INTERCEPT_CLGI) 1602 * SKINIT instruction (SVM_INTERCEPT_SKINIT) 1603 * ICEBP instruction (SVM_INTERCEPT_ICEBP) 1604 * MWAIT instruction (SVM_INTERCEPT_MWAIT_UNCOND) 1605 * MWAIT instruction (SVM_INTERCEPT_MWAIT_COND) 1606 * MONITOR instruction (SVM_INTERCEPT_MONITOR) 1607 * RDTSCP instruction (SVM_INTERCEPT_RDTSCP) 1608 * XSETBV instruction (SVM_INTERCEPT_XSETBV) (if available) 1609 */ 1610 vmcb->v_intercept1 = SVM_INTERCEPT_INTR | SVM_INTERCEPT_NMI | 1611 SVM_INTERCEPT_CPUID | SVM_INTERCEPT_HLT | SVM_INTERCEPT_INOUT | 1612 SVM_INTERCEPT_MSR | SVM_INTERCEPT_SHUTDOWN | SVM_INTERCEPT_INVLPGA; 1613 1614 vmcb->v_intercept2 = SVM_INTERCEPT_VMRUN | SVM_INTERCEPT_VMMCALL | 1615 SVM_INTERCEPT_VMLOAD | SVM_INTERCEPT_VMSAVE | SVM_INTERCEPT_STGI | 1616 SVM_INTERCEPT_CLGI | SVM_INTERCEPT_SKINIT | SVM_INTERCEPT_ICEBP | 1617 SVM_INTERCEPT_MWAIT_UNCOND | SVM_INTERCEPT_MONITOR | 1618 SVM_INTERCEPT_MWAIT_COND | SVM_INTERCEPT_RDTSCP; 1619 1620 if (xsave_mask) 1621 vmcb->v_intercept2 |= SVM_INTERCEPT_XSETBV; 1622 1623 /* Setup I/O bitmap */ 1624 memset((uint8_t *)vcpu->vc_svm_ioio_va, 0xFF, 3 * PAGE_SIZE); 1625 vmcb->v_iopm_pa = (uint64_t)(vcpu->vc_svm_ioio_pa); 1626 1627 /* Setup MSR bitmap */ 1628 memset((uint8_t *)vcpu->vc_msr_bitmap_va, 0xFF, 2 * PAGE_SIZE); 1629 vmcb->v_msrpm_pa = (uint64_t)(vcpu->vc_msr_bitmap_pa); 1630 svm_setmsrbrw(vcpu, MSR_IA32_FEATURE_CONTROL); 1631 svm_setmsrbrw(vcpu, MSR_SYSENTER_CS); 1632 svm_setmsrbrw(vcpu, MSR_SYSENTER_ESP); 1633 svm_setmsrbrw(vcpu, MSR_SYSENTER_EIP); 1634 svm_setmsrbrw(vcpu, MSR_STAR); 1635 svm_setmsrbrw(vcpu, MSR_LSTAR); 1636 svm_setmsrbrw(vcpu, MSR_CSTAR); 1637 svm_setmsrbrw(vcpu, MSR_SFMASK); 1638 svm_setmsrbrw(vcpu, MSR_FSBASE); 1639 svm_setmsrbrw(vcpu, MSR_GSBASE); 1640 svm_setmsrbrw(vcpu, MSR_KERNELGSBASE); 1641 1642 /* EFER is R/O so we can ensure the guest always has SVME */ 1643 svm_setmsrbr(vcpu, MSR_EFER); 1644 1645 /* allow reading TSC */ 1646 svm_setmsrbr(vcpu, MSR_TSC); 1647 1648 /* allow reading HWCR and PSTATEDEF to determine TSC frequency */ 1649 svm_setmsrbr(vcpu, MSR_HWCR); 1650 svm_setmsrbr(vcpu, MSR_PSTATEDEF(0)); 1651 1652 /* Guest VCPU ASID */ 1653 vmcb->v_asid = vcpu->vc_vpid; 1654 1655 /* TLB Control - First time in, flush all*/ 1656 vmcb->v_tlb_control = SVM_TLB_CONTROL_FLUSH_ALL; 1657 1658 /* INTR masking */ 1659 vmcb->v_intr_masking = 1; 1660 1661 /* PAT */ 1662 vmcb->v_g_pat = PATENTRY(0, PAT_WB) | PATENTRY(1, PAT_WC) | 1663 PATENTRY(2, PAT_UCMINUS) | PATENTRY(3, PAT_UC) | 1664 PATENTRY(4, PAT_WB) | PATENTRY(5, PAT_WC) | 1665 PATENTRY(6, PAT_UCMINUS) | PATENTRY(7, PAT_UC); 1666 1667 /* NPT */ 1668 vmcb->v_np_enable = SVM_ENABLE_NP; 1669 vmcb->v_n_cr3 = vcpu->vc_parent->vm_map->pmap->pm_pdirpa; 1670 1671 /* SEV */ 1672 if (vcpu->vc_sev) 1673 vmcb->v_np_enable |= SVM_ENABLE_SEV; 1674 1675 /* Enable SVME in EFER (must always be set) */ 1676 vmcb->v_efer |= EFER_SVME; 1677 1678 ret = vcpu_writeregs_svm(vcpu, VM_RWREGS_ALL, vrs); 1679 1680 /* xcr0 power on default sets bit 0 (x87 state) */ 1681 vcpu->vc_gueststate.vg_xcr0 = XFEATURE_X87 & xsave_mask; 1682 1683 vcpu->vc_parent->vm_map->pmap->eptp = 0; 1684 1685 return ret; 1686 } 1687 1688 /* 1689 * svm_setmsrbr 1690 * 1691 * Allow read access to the specified msr on the supplied vcpu. 1692 * 1693 * Parameters: 1694 * vcpu: the VCPU to allow access 1695 * msr: the MSR number to allow access to 1696 */ 1697 void 1698 svm_setmsrbr(struct vcpu *vcpu, uint32_t msr) 1699 { 1700 uint8_t *msrs; 1701 uint16_t idx; 1702 1703 msrs = (uint8_t *)vcpu->vc_msr_bitmap_va; 1704 1705 /* 1706 * MSR Read bitmap layout: 1707 * Pentium MSRs (0x0 - 0x1fff) @ 0x0 1708 * Gen6 and Syscall MSRs (0xc0000000 - 0xc0001fff) @ 0x800 1709 * Gen7 and Gen8 MSRs (0xc0010000 - 0xc0011fff) @ 0x1000 1710 * 1711 * Read enable bit is low order bit of 2-bit pair 1712 * per MSR (eg, MSR 0x0 write bit is at bit 0 @ 0x0) 1713 */ 1714 if (msr <= 0x1fff) { 1715 idx = SVM_MSRIDX(msr); 1716 msrs[idx] &= ~(SVM_MSRBIT_R(msr)); 1717 } else if (msr >= 0xc0000000 && msr <= 0xc0001fff) { 1718 idx = SVM_MSRIDX(msr - 0xc0000000) + 0x800; 1719 msrs[idx] &= ~(SVM_MSRBIT_R(msr - 0xc0000000)); 1720 } else if (msr >= 0xc0010000 && msr <= 0xc0011fff) { 1721 idx = SVM_MSRIDX(msr - 0xc0010000) + 0x1000; 1722 msrs[idx] &= ~(SVM_MSRBIT_R(msr - 0xc0010000)); 1723 } else { 1724 printf("%s: invalid msr 0x%x\n", __func__, msr); 1725 return; 1726 } 1727 } 1728 1729 /* 1730 * svm_setmsrbw 1731 * 1732 * Allow write access to the specified msr on the supplied vcpu 1733 * 1734 * Parameters: 1735 * vcpu: the VCPU to allow access 1736 * msr: the MSR number to allow access to 1737 */ 1738 void 1739 svm_setmsrbw(struct vcpu *vcpu, uint32_t msr) 1740 { 1741 uint8_t *msrs; 1742 uint16_t idx; 1743 1744 msrs = (uint8_t *)vcpu->vc_msr_bitmap_va; 1745 1746 /* 1747 * MSR Write bitmap layout: 1748 * Pentium MSRs (0x0 - 0x1fff) @ 0x0 1749 * Gen6 and Syscall MSRs (0xc0000000 - 0xc0001fff) @ 0x800 1750 * Gen7 and Gen8 MSRs (0xc0010000 - 0xc0011fff) @ 0x1000 1751 * 1752 * Write enable bit is high order bit of 2-bit pair 1753 * per MSR (eg, MSR 0x0 write bit is at bit 1 @ 0x0) 1754 */ 1755 if (msr <= 0x1fff) { 1756 idx = SVM_MSRIDX(msr); 1757 msrs[idx] &= ~(SVM_MSRBIT_W(msr)); 1758 } else if (msr >= 0xc0000000 && msr <= 0xc0001fff) { 1759 idx = SVM_MSRIDX(msr - 0xc0000000) + 0x800; 1760 msrs[idx] &= ~(SVM_MSRBIT_W(msr - 0xc0000000)); 1761 } else if (msr >= 0xc0010000 && msr <= 0xc0011fff) { 1762 idx = SVM_MSRIDX(msr - 0xc0010000) + 0x1000; 1763 msrs[idx] &= ~(SVM_MSRBIT_W(msr - 0xc0010000)); 1764 } else { 1765 printf("%s: invalid msr 0x%x\n", __func__, msr); 1766 return; 1767 } 1768 } 1769 1770 /* 1771 * svm_setmsrbrw 1772 * 1773 * Allow read/write access to the specified msr on the supplied vcpu 1774 * 1775 * Parameters: 1776 * vcpu: the VCPU to allow access 1777 * msr: the MSR number to allow access to 1778 */ 1779 void 1780 svm_setmsrbrw(struct vcpu *vcpu, uint32_t msr) 1781 { 1782 svm_setmsrbr(vcpu, msr); 1783 svm_setmsrbw(vcpu, msr); 1784 } 1785 1786 /* 1787 * vmx_setmsrbr 1788 * 1789 * Allow read access to the specified msr on the supplied vcpu. 1790 * 1791 * Parameters: 1792 * vcpu: the VCPU to allow access 1793 * msr: the MSR number to allow access to 1794 */ 1795 void 1796 vmx_setmsrbr(struct vcpu *vcpu, uint32_t msr) 1797 { 1798 uint8_t *msrs; 1799 uint16_t idx; 1800 1801 msrs = (uint8_t *)vcpu->vc_msr_bitmap_va; 1802 1803 /* 1804 * MSR Read bitmap layout: 1805 * "Low" MSRs (0x0 - 0x1fff) @ 0x0 1806 * "High" MSRs (0xc0000000 - 0xc0001fff) @ 0x400 1807 */ 1808 if (msr <= 0x1fff) { 1809 idx = VMX_MSRIDX(msr); 1810 msrs[idx] &= ~(VMX_MSRBIT(msr)); 1811 } else if (msr >= 0xc0000000 && msr <= 0xc0001fff) { 1812 idx = VMX_MSRIDX(msr - 0xc0000000) + 0x400; 1813 msrs[idx] &= ~(VMX_MSRBIT(msr - 0xc0000000)); 1814 } else 1815 printf("%s: invalid msr 0x%x\n", __func__, msr); 1816 } 1817 1818 /* 1819 * vmx_setmsrbw 1820 * 1821 * Allow write access to the specified msr on the supplied vcpu 1822 * 1823 * Parameters: 1824 * vcpu: the VCPU to allow access 1825 * msr: the MSR number to allow access to 1826 */ 1827 void 1828 vmx_setmsrbw(struct vcpu *vcpu, uint32_t msr) 1829 { 1830 uint8_t *msrs; 1831 uint16_t idx; 1832 1833 msrs = (uint8_t *)vcpu->vc_msr_bitmap_va; 1834 1835 /* 1836 * MSR Write bitmap layout: 1837 * "Low" MSRs (0x0 - 0x1fff) @ 0x800 1838 * "High" MSRs (0xc0000000 - 0xc0001fff) @ 0xc00 1839 */ 1840 if (msr <= 0x1fff) { 1841 idx = VMX_MSRIDX(msr) + 0x800; 1842 msrs[idx] &= ~(VMX_MSRBIT(msr)); 1843 } else if (msr >= 0xc0000000 && msr <= 0xc0001fff) { 1844 idx = VMX_MSRIDX(msr - 0xc0000000) + 0xc00; 1845 msrs[idx] &= ~(VMX_MSRBIT(msr - 0xc0000000)); 1846 } else 1847 printf("%s: invalid msr 0x%x\n", __func__, msr); 1848 } 1849 1850 /* 1851 * vmx_setmsrbrw 1852 * 1853 * Allow read/write access to the specified msr on the supplied vcpu 1854 * 1855 * Parameters: 1856 * vcpu: the VCPU to allow access 1857 * msr: the MSR number to allow access to 1858 */ 1859 void 1860 vmx_setmsrbrw(struct vcpu *vcpu, uint32_t msr) 1861 { 1862 vmx_setmsrbr(vcpu, msr); 1863 vmx_setmsrbw(vcpu, msr); 1864 } 1865 1866 /* 1867 * svm_set_clean 1868 * 1869 * Sets (mark as unmodified) the VMCB clean bit set in 'value'. 1870 * For example, to set the clean bit for the VMCB intercepts (bit position 0), 1871 * the caller provides 'SVM_CLEANBITS_I' (0x1) for the 'value' argument. 1872 * Multiple cleanbits can be provided in 'value' at the same time (eg, 1873 * "SVM_CLEANBITS_I | SVM_CLEANBITS_TPR"). 1874 * 1875 * Note that this function does not clear any bits; to clear bits in the 1876 * vmcb cleanbits bitfield, use 'svm_set_dirty'. 1877 * 1878 * Parameters: 1879 * vmcs: the VCPU whose VMCB clean value should be set 1880 * value: the value(s) to enable in the cleanbits mask 1881 */ 1882 void 1883 svm_set_clean(struct vcpu *vcpu, uint32_t value) 1884 { 1885 struct vmcb *vmcb; 1886 1887 /* If no cleanbits support, do nothing */ 1888 if (!curcpu()->ci_vmm_cap.vcc_svm.svm_vmcb_clean) 1889 return; 1890 1891 vmcb = (struct vmcb *)vcpu->vc_control_va; 1892 1893 vmcb->v_vmcb_clean_bits |= value; 1894 } 1895 1896 /* 1897 * svm_set_dirty 1898 * 1899 * Clears (mark as modified) the VMCB clean bit set in 'value'. 1900 * For example, to clear the bit for the VMCB intercepts (bit position 0) 1901 * the caller provides 'SVM_CLEANBITS_I' (0x1) for the 'value' argument. 1902 * Multiple dirty bits can be provided in 'value' at the same time (eg, 1903 * "SVM_CLEANBITS_I | SVM_CLEANBITS_TPR"). 1904 * 1905 * Parameters: 1906 * vmcs: the VCPU whose VMCB dirty value should be set 1907 * value: the value(s) to dirty in the cleanbits mask 1908 */ 1909 void 1910 svm_set_dirty(struct vcpu *vcpu, uint32_t value) 1911 { 1912 struct vmcb *vmcb; 1913 1914 /* If no cleanbits support, do nothing */ 1915 if (!curcpu()->ci_vmm_cap.vcc_svm.svm_vmcb_clean) 1916 return; 1917 1918 vmcb = (struct vmcb *)vcpu->vc_control_va; 1919 1920 vmcb->v_vmcb_clean_bits &= ~value; 1921 } 1922 1923 /* 1924 * vcpu_reset_regs_vmx 1925 * 1926 * Initializes 'vcpu's registers to supplied state 1927 * 1928 * Parameters: 1929 * vcpu: the vcpu whose register state is to be initialized 1930 * vrs: the register state to set 1931 * 1932 * Return values: 1933 * 0: registers init'ed successfully 1934 * EINVAL: an error occurred setting register state 1935 */ 1936 int 1937 vcpu_reset_regs_vmx(struct vcpu *vcpu, struct vcpu_reg_state *vrs) 1938 { 1939 int ret = 0, ug = 0; 1940 uint32_t cr0, cr4; 1941 uint32_t pinbased, procbased, procbased2, exit, entry; 1942 uint32_t want1, want0; 1943 uint64_t ctrlval, cr3, msr_misc_enable; 1944 uint16_t ctrl; 1945 struct vmx_msr_store *msr_store; 1946 1947 rw_assert_wrlock(&vcpu->vc_lock); 1948 1949 cr0 = vrs->vrs_crs[VCPU_REGS_CR0]; 1950 1951 if (vcpu_reload_vmcs_vmx(vcpu)) { 1952 DPRINTF("%s: error reloading VMCS\n", __func__); 1953 ret = EINVAL; 1954 goto exit; 1955 } 1956 1957 #ifdef VMM_DEBUG 1958 /* VMCS should be loaded... */ 1959 paddr_t pa = 0ULL; 1960 if (vmptrst(&pa)) 1961 panic("%s: vmptrst", __func__); 1962 KASSERT(pa == vcpu->vc_control_pa); 1963 #endif /* VMM_DEBUG */ 1964 1965 /* Compute Basic Entry / Exit Controls */ 1966 vcpu->vc_vmx_basic = rdmsr(IA32_VMX_BASIC); 1967 vcpu->vc_vmx_entry_ctls = rdmsr(IA32_VMX_ENTRY_CTLS); 1968 vcpu->vc_vmx_exit_ctls = rdmsr(IA32_VMX_EXIT_CTLS); 1969 vcpu->vc_vmx_pinbased_ctls = rdmsr(IA32_VMX_PINBASED_CTLS); 1970 vcpu->vc_vmx_procbased_ctls = rdmsr(IA32_VMX_PROCBASED_CTLS); 1971 1972 /* Compute True Entry / Exit Controls (if applicable) */ 1973 if (vcpu->vc_vmx_basic & IA32_VMX_TRUE_CTLS_AVAIL) { 1974 vcpu->vc_vmx_true_entry_ctls = rdmsr(IA32_VMX_TRUE_ENTRY_CTLS); 1975 vcpu->vc_vmx_true_exit_ctls = rdmsr(IA32_VMX_TRUE_EXIT_CTLS); 1976 vcpu->vc_vmx_true_pinbased_ctls = 1977 rdmsr(IA32_VMX_TRUE_PINBASED_CTLS); 1978 vcpu->vc_vmx_true_procbased_ctls = 1979 rdmsr(IA32_VMX_TRUE_PROCBASED_CTLS); 1980 } 1981 1982 /* Compute Secondary Procbased Controls (if applicable) */ 1983 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED_CTLS, 1984 IA32_VMX_ACTIVATE_SECONDARY_CONTROLS, 1)) 1985 vcpu->vc_vmx_procbased2_ctls = rdmsr(IA32_VMX_PROCBASED2_CTLS); 1986 1987 /* 1988 * Pinbased ctrls 1989 * 1990 * We must be able to set the following: 1991 * IA32_VMX_EXTERNAL_INT_EXITING - exit on host interrupt 1992 * IA32_VMX_NMI_EXITING - exit on host NMI 1993 */ 1994 want1 = IA32_VMX_EXTERNAL_INT_EXITING | 1995 IA32_VMX_NMI_EXITING; 1996 want0 = 0; 1997 1998 if (vcpu->vc_vmx_basic & IA32_VMX_TRUE_CTLS_AVAIL) { 1999 ctrl = IA32_VMX_TRUE_PINBASED_CTLS; 2000 ctrlval = vcpu->vc_vmx_true_pinbased_ctls; 2001 } else { 2002 ctrl = IA32_VMX_PINBASED_CTLS; 2003 ctrlval = vcpu->vc_vmx_pinbased_ctls; 2004 } 2005 2006 if (vcpu_vmx_compute_ctrl(ctrlval, ctrl, want1, want0, &pinbased)) { 2007 DPRINTF("%s: error computing pinbased controls\n", __func__); 2008 ret = EINVAL; 2009 goto exit; 2010 } 2011 2012 if (vmwrite(VMCS_PINBASED_CTLS, pinbased)) { 2013 DPRINTF("%s: error setting pinbased controls\n", __func__); 2014 ret = EINVAL; 2015 goto exit; 2016 } 2017 2018 /* 2019 * Procbased ctrls 2020 * 2021 * We must be able to set the following: 2022 * IA32_VMX_HLT_EXITING - exit on HLT instruction 2023 * IA32_VMX_MWAIT_EXITING - exit on MWAIT instruction 2024 * IA32_VMX_UNCONDITIONAL_IO_EXITING - exit on I/O instructions 2025 * IA32_VMX_USE_MSR_BITMAPS - exit on various MSR accesses 2026 * IA32_VMX_CR8_LOAD_EXITING - guest TPR access 2027 * IA32_VMX_CR8_STORE_EXITING - guest TPR access 2028 * IA32_VMX_USE_TPR_SHADOW - guest TPR access (shadow) 2029 * IA32_VMX_MONITOR_EXITING - exit on MONITOR instruction 2030 * 2031 * If we have EPT, we must be able to clear the following 2032 * IA32_VMX_CR3_LOAD_EXITING - don't care about guest CR3 accesses 2033 * IA32_VMX_CR3_STORE_EXITING - don't care about guest CR3 accesses 2034 */ 2035 want1 = IA32_VMX_HLT_EXITING | 2036 IA32_VMX_MWAIT_EXITING | 2037 IA32_VMX_UNCONDITIONAL_IO_EXITING | 2038 IA32_VMX_USE_MSR_BITMAPS | 2039 IA32_VMX_CR8_LOAD_EXITING | 2040 IA32_VMX_CR8_STORE_EXITING | 2041 IA32_VMX_MONITOR_EXITING | 2042 IA32_VMX_USE_TPR_SHADOW; 2043 want0 = 0; 2044 2045 want1 |= IA32_VMX_ACTIVATE_SECONDARY_CONTROLS; 2046 want0 |= IA32_VMX_CR3_LOAD_EXITING | IA32_VMX_CR3_STORE_EXITING; 2047 2048 if (vcpu->vc_vmx_basic & IA32_VMX_TRUE_CTLS_AVAIL) { 2049 ctrl = IA32_VMX_TRUE_PROCBASED_CTLS; 2050 ctrlval = vcpu->vc_vmx_true_procbased_ctls; 2051 } else { 2052 ctrl = IA32_VMX_PROCBASED_CTLS; 2053 ctrlval = vcpu->vc_vmx_procbased_ctls; 2054 } 2055 2056 if (vcpu_vmx_compute_ctrl(ctrlval, ctrl, want1, want0, &procbased)) { 2057 DPRINTF("%s: error computing procbased controls\n", __func__); 2058 ret = EINVAL; 2059 goto exit; 2060 } 2061 2062 if (vmwrite(VMCS_PROCBASED_CTLS, procbased)) { 2063 DPRINTF("%s: error setting procbased controls\n", __func__); 2064 ret = EINVAL; 2065 goto exit; 2066 } 2067 2068 /* 2069 * Secondary Procbased ctrls 2070 * 2071 * We want to be able to set the following, if available: 2072 * IA32_VMX_ENABLE_VPID - use VPIDs where available 2073 * 2074 * If we have EPT, we must be able to set the following: 2075 * IA32_VMX_ENABLE_EPT - enable EPT 2076 * 2077 * If we have unrestricted guest capability, we must be able to set 2078 * the following: 2079 * IA32_VMX_UNRESTRICTED_GUEST - enable unrestricted guest (if caller 2080 * specified CR0_PG | CR0_PE in %cr0 in the 'vrs' parameter) 2081 */ 2082 want1 = IA32_VMX_ENABLE_EPT; 2083 2084 /* XXX checking for 2ndary controls can be combined here */ 2085 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED_CTLS, 2086 IA32_VMX_ACTIVATE_SECONDARY_CONTROLS, 1)) { 2087 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS, 2088 IA32_VMX_ENABLE_VPID, 1)) { 2089 want1 |= IA32_VMX_ENABLE_VPID; 2090 vcpu->vc_vmx_vpid_enabled = 1; 2091 } 2092 } 2093 2094 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED_CTLS, 2095 IA32_VMX_ACTIVATE_SECONDARY_CONTROLS, 1)) { 2096 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS, 2097 IA32_VMX_UNRESTRICTED_GUEST, 1)) { 2098 if ((cr0 & (CR0_PE | CR0_PG)) == 0) { 2099 want1 |= IA32_VMX_UNRESTRICTED_GUEST; 2100 ug = 1; 2101 } 2102 } 2103 } 2104 2105 want0 = ~want1; 2106 ctrlval = vcpu->vc_vmx_procbased2_ctls; 2107 ctrl = IA32_VMX_PROCBASED2_CTLS; 2108 2109 if (vcpu_vmx_compute_ctrl(ctrlval, ctrl, want1, want0, &procbased2)) { 2110 DPRINTF("%s: error computing secondary procbased controls\n", 2111 __func__); 2112 ret = EINVAL; 2113 goto exit; 2114 } 2115 2116 if (vmwrite(VMCS_PROCBASED2_CTLS, procbased2)) { 2117 DPRINTF("%s: error setting secondary procbased controls\n", 2118 __func__); 2119 ret = EINVAL; 2120 goto exit; 2121 } 2122 2123 /* 2124 * Exit ctrls 2125 * 2126 * We must be able to set the following: 2127 * IA32_VMX_SAVE_DEBUG_CONTROLS 2128 * IA32_VMX_HOST_SPACE_ADDRESS_SIZE - exit to long mode 2129 * IA32_VMX_ACKNOWLEDGE_INTERRUPT_ON_EXIT - ack interrupt on exit 2130 */ 2131 want1 = IA32_VMX_HOST_SPACE_ADDRESS_SIZE | 2132 IA32_VMX_ACKNOWLEDGE_INTERRUPT_ON_EXIT | 2133 IA32_VMX_SAVE_DEBUG_CONTROLS; 2134 want0 = 0; 2135 2136 if (vcpu->vc_vmx_basic & IA32_VMX_TRUE_CTLS_AVAIL) { 2137 ctrl = IA32_VMX_TRUE_EXIT_CTLS; 2138 ctrlval = vcpu->vc_vmx_true_exit_ctls; 2139 } else { 2140 ctrl = IA32_VMX_EXIT_CTLS; 2141 ctrlval = vcpu->vc_vmx_exit_ctls; 2142 } 2143 2144 if (rcr4() & CR4_CET) 2145 want1 |= IA32_VMX_LOAD_HOST_CET_STATE; 2146 else 2147 want0 |= IA32_VMX_LOAD_HOST_CET_STATE; 2148 2149 if (vcpu_vmx_compute_ctrl(ctrlval, ctrl, want1, want0, &exit)) { 2150 DPRINTF("%s: error computing exit controls\n", __func__); 2151 ret = EINVAL; 2152 goto exit; 2153 } 2154 2155 if (vmwrite(VMCS_EXIT_CTLS, exit)) { 2156 DPRINTF("%s: error setting exit controls\n", __func__); 2157 ret = EINVAL; 2158 goto exit; 2159 } 2160 2161 /* 2162 * Entry ctrls 2163 * 2164 * We must be able to set the following: 2165 * IA32_VMX_IA32E_MODE_GUEST (if no unrestricted guest) 2166 * IA32_VMX_LOAD_DEBUG_CONTROLS 2167 * We must be able to clear the following: 2168 * IA32_VMX_ENTRY_TO_SMM - enter to SMM 2169 * IA32_VMX_DEACTIVATE_DUAL_MONITOR_TREATMENT 2170 * IA32_VMX_LOAD_IA32_PERF_GLOBAL_CTRL_ON_ENTRY 2171 */ 2172 want1 = IA32_VMX_LOAD_DEBUG_CONTROLS; 2173 if (vrs->vrs_msrs[VCPU_REGS_EFER] & EFER_LMA) 2174 want1 |= IA32_VMX_IA32E_MODE_GUEST; 2175 2176 want0 = IA32_VMX_ENTRY_TO_SMM | 2177 IA32_VMX_DEACTIVATE_DUAL_MONITOR_TREATMENT | 2178 IA32_VMX_LOAD_IA32_PERF_GLOBAL_CTRL_ON_ENTRY; 2179 2180 if (vcpu->vc_vmx_basic & IA32_VMX_TRUE_CTLS_AVAIL) { 2181 ctrl = IA32_VMX_TRUE_ENTRY_CTLS; 2182 ctrlval = vcpu->vc_vmx_true_entry_ctls; 2183 } else { 2184 ctrl = IA32_VMX_ENTRY_CTLS; 2185 ctrlval = vcpu->vc_vmx_entry_ctls; 2186 } 2187 2188 if (rcr4() & CR4_CET) 2189 want1 |= IA32_VMX_LOAD_GUEST_CET_STATE; 2190 else 2191 want0 |= IA32_VMX_LOAD_GUEST_CET_STATE; 2192 2193 if (vcpu_vmx_compute_ctrl(ctrlval, ctrl, want1, want0, &entry)) { 2194 ret = EINVAL; 2195 goto exit; 2196 } 2197 2198 if (vmwrite(VMCS_ENTRY_CTLS, entry)) { 2199 ret = EINVAL; 2200 goto exit; 2201 } 2202 2203 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED_CTLS, 2204 IA32_VMX_ACTIVATE_SECONDARY_CONTROLS, 1)) { 2205 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS, 2206 IA32_VMX_ENABLE_VPID, 1)) { 2207 if (vmwrite(VMCS_GUEST_VPID, vcpu->vc_vpid)) { 2208 DPRINTF("%s: error setting guest VPID\n", 2209 __func__); 2210 ret = EINVAL; 2211 goto exit; 2212 } 2213 } 2214 } 2215 2216 /* 2217 * Determine which bits in CR0 have to be set to a fixed 2218 * value as per Intel SDM A.7. 2219 * CR0 bits in the vrs parameter must match these. 2220 */ 2221 want1 = (curcpu()->ci_vmm_cap.vcc_vmx.vmx_cr0_fixed0) & 2222 (curcpu()->ci_vmm_cap.vcc_vmx.vmx_cr0_fixed1); 2223 want0 = ~(curcpu()->ci_vmm_cap.vcc_vmx.vmx_cr0_fixed0) & 2224 ~(curcpu()->ci_vmm_cap.vcc_vmx.vmx_cr0_fixed1); 2225 2226 /* 2227 * CR0_FIXED0 and CR0_FIXED1 may report the CR0_PG and CR0_PE bits as 2228 * fixed to 1 even if the CPU supports the unrestricted guest 2229 * feature. Update want1 and want0 accordingly to allow 2230 * any value for CR0_PG and CR0_PE in vrs->vrs_crs[VCPU_REGS_CR0] if 2231 * the CPU has the unrestricted guest capability. 2232 */ 2233 if (ug) { 2234 want1 &= ~(CR0_PG | CR0_PE); 2235 want0 &= ~(CR0_PG | CR0_PE); 2236 } 2237 2238 /* 2239 * VMX may require some bits to be set that userland should not have 2240 * to care about. Set those here. 2241 */ 2242 if (want1 & CR0_NE) 2243 cr0 |= CR0_NE; 2244 2245 if ((cr0 & want1) != want1) { 2246 ret = EINVAL; 2247 goto exit; 2248 } 2249 2250 if ((~cr0 & want0) != want0) { 2251 ret = EINVAL; 2252 goto exit; 2253 } 2254 2255 vcpu->vc_vmx_cr0_fixed1 = want1; 2256 vcpu->vc_vmx_cr0_fixed0 = want0; 2257 /* 2258 * Determine which bits in CR4 have to be set to a fixed 2259 * value as per Intel SDM A.8. 2260 * CR4 bits in the vrs parameter must match these, except 2261 * CR4_VMXE - we add that here since it must always be set. 2262 */ 2263 want1 = (curcpu()->ci_vmm_cap.vcc_vmx.vmx_cr4_fixed0) & 2264 (curcpu()->ci_vmm_cap.vcc_vmx.vmx_cr4_fixed1); 2265 want0 = ~(curcpu()->ci_vmm_cap.vcc_vmx.vmx_cr4_fixed0) & 2266 ~(curcpu()->ci_vmm_cap.vcc_vmx.vmx_cr4_fixed1); 2267 2268 cr4 = vrs->vrs_crs[VCPU_REGS_CR4] | CR4_VMXE; 2269 2270 if ((cr4 & want1) != want1) { 2271 ret = EINVAL; 2272 goto exit; 2273 } 2274 2275 if ((~cr4 & want0) != want0) { 2276 ret = EINVAL; 2277 goto exit; 2278 } 2279 2280 cr3 = vrs->vrs_crs[VCPU_REGS_CR3]; 2281 2282 /* Restore PDPTEs if 32-bit PAE paging is being used */ 2283 if (cr3 && (cr4 & CR4_PAE) && 2284 !(vrs->vrs_msrs[VCPU_REGS_EFER] & EFER_LMA)) { 2285 if (vmwrite(VMCS_GUEST_PDPTE0, 2286 vrs->vrs_crs[VCPU_REGS_PDPTE0])) { 2287 ret = EINVAL; 2288 goto exit; 2289 } 2290 2291 if (vmwrite(VMCS_GUEST_PDPTE1, 2292 vrs->vrs_crs[VCPU_REGS_PDPTE1])) { 2293 ret = EINVAL; 2294 goto exit; 2295 } 2296 2297 if (vmwrite(VMCS_GUEST_PDPTE2, 2298 vrs->vrs_crs[VCPU_REGS_PDPTE2])) { 2299 ret = EINVAL; 2300 goto exit; 2301 } 2302 2303 if (vmwrite(VMCS_GUEST_PDPTE3, 2304 vrs->vrs_crs[VCPU_REGS_PDPTE3])) { 2305 ret = EINVAL; 2306 goto exit; 2307 } 2308 } 2309 2310 vrs->vrs_crs[VCPU_REGS_CR0] = cr0; 2311 vrs->vrs_crs[VCPU_REGS_CR4] = cr4; 2312 2313 msr_misc_enable = rdmsr(MSR_MISC_ENABLE); 2314 2315 /* 2316 * Select host MSRs to be loaded on exit 2317 */ 2318 msr_store = (struct vmx_msr_store *)vcpu->vc_vmx_msr_exit_load_va; 2319 msr_store[VCPU_HOST_REGS_EFER].vms_index = MSR_EFER; 2320 msr_store[VCPU_HOST_REGS_EFER].vms_data = rdmsr(MSR_EFER); 2321 msr_store[VCPU_HOST_REGS_STAR].vms_index = MSR_STAR; 2322 msr_store[VCPU_HOST_REGS_STAR].vms_data = rdmsr(MSR_STAR); 2323 msr_store[VCPU_HOST_REGS_LSTAR].vms_index = MSR_LSTAR; 2324 msr_store[VCPU_HOST_REGS_LSTAR].vms_data = rdmsr(MSR_LSTAR); 2325 msr_store[VCPU_HOST_REGS_CSTAR].vms_index = MSR_CSTAR; 2326 msr_store[VCPU_HOST_REGS_CSTAR].vms_data = 0; 2327 msr_store[VCPU_HOST_REGS_SFMASK].vms_index = MSR_SFMASK; 2328 msr_store[VCPU_HOST_REGS_SFMASK].vms_data = rdmsr(MSR_SFMASK); 2329 msr_store[VCPU_HOST_REGS_KGSBASE].vms_index = MSR_KERNELGSBASE; 2330 msr_store[VCPU_HOST_REGS_KGSBASE].vms_data = 0; 2331 msr_store[VCPU_HOST_REGS_MISC_ENABLE].vms_index = MSR_MISC_ENABLE; 2332 msr_store[VCPU_HOST_REGS_MISC_ENABLE].vms_data = msr_misc_enable; 2333 2334 /* 2335 * Select guest MSRs to be loaded on entry / saved on exit 2336 */ 2337 msr_store = (struct vmx_msr_store *)vcpu->vc_vmx_msr_exit_save_va; 2338 2339 msr_store[VCPU_REGS_EFER].vms_index = MSR_EFER; 2340 msr_store[VCPU_REGS_STAR].vms_index = MSR_STAR; 2341 msr_store[VCPU_REGS_LSTAR].vms_index = MSR_LSTAR; 2342 msr_store[VCPU_REGS_CSTAR].vms_index = MSR_CSTAR; 2343 msr_store[VCPU_REGS_SFMASK].vms_index = MSR_SFMASK; 2344 msr_store[VCPU_REGS_KGSBASE].vms_index = MSR_KERNELGSBASE; 2345 msr_store[VCPU_REGS_MISC_ENABLE].vms_index = MSR_MISC_ENABLE; 2346 2347 /* 2348 * Initialize MSR_MISC_ENABLE as it can't be read and populated from vmd 2349 * and some of the content is based on the host. 2350 */ 2351 msr_store[VCPU_REGS_MISC_ENABLE].vms_data = msr_misc_enable; 2352 msr_store[VCPU_REGS_MISC_ENABLE].vms_data &= 2353 ~(MISC_ENABLE_TCC | MISC_ENABLE_PERF_MON_AVAILABLE | 2354 MISC_ENABLE_EIST_ENABLED | MISC_ENABLE_ENABLE_MONITOR_FSM | 2355 MISC_ENABLE_xTPR_MESSAGE_DISABLE); 2356 msr_store[VCPU_REGS_MISC_ENABLE].vms_data |= 2357 MISC_ENABLE_BTS_UNAVAILABLE | MISC_ENABLE_PEBS_UNAVAILABLE; 2358 2359 /* 2360 * Currently we use the same memory for guest MSRs (entry-load and 2361 * exit-store) so they have the same count. We exit-load the same 2362 * host MSRs, so same count but different memory. Those are just 2363 * our current choices, not architectural requirements. 2364 */ 2365 if (vmwrite(VMCS_EXIT_MSR_STORE_COUNT, VCPU_REGS_NMSRS)) { 2366 DPRINTF("%s: error setting guest MSR exit store count\n", 2367 __func__); 2368 ret = EINVAL; 2369 goto exit; 2370 } 2371 2372 if (vmwrite(VMCS_EXIT_MSR_LOAD_COUNT, VCPU_HOST_REGS_NMSRS)) { 2373 DPRINTF("%s: error setting guest MSR exit load count\n", 2374 __func__); 2375 ret = EINVAL; 2376 goto exit; 2377 } 2378 2379 if (vmwrite(VMCS_ENTRY_MSR_LOAD_COUNT, VCPU_REGS_NMSRS)) { 2380 DPRINTF("%s: error setting guest MSR entry load count\n", 2381 __func__); 2382 ret = EINVAL; 2383 goto exit; 2384 } 2385 2386 if (vmwrite(VMCS_EXIT_STORE_MSR_ADDRESS, 2387 vcpu->vc_vmx_msr_exit_save_pa)) { 2388 DPRINTF("%s: error setting guest MSR exit store address\n", 2389 __func__); 2390 ret = EINVAL; 2391 goto exit; 2392 } 2393 2394 if (vmwrite(VMCS_EXIT_LOAD_MSR_ADDRESS, 2395 vcpu->vc_vmx_msr_exit_load_pa)) { 2396 DPRINTF("%s: error setting guest MSR exit load address\n", 2397 __func__); 2398 ret = EINVAL; 2399 goto exit; 2400 } 2401 2402 if (vmwrite(VMCS_ENTRY_LOAD_MSR_ADDRESS, 2403 vcpu->vc_vmx_msr_exit_save_pa)) { 2404 DPRINTF("%s: error setting guest MSR entry load address\n", 2405 __func__); 2406 ret = EINVAL; 2407 goto exit; 2408 } 2409 2410 if (vmwrite(VMCS_MSR_BITMAP_ADDRESS, 2411 vcpu->vc_msr_bitmap_pa)) { 2412 DPRINTF("%s: error setting guest MSR bitmap address\n", 2413 __func__); 2414 ret = EINVAL; 2415 goto exit; 2416 } 2417 2418 if (vmwrite(VMCS_CR4_MASK, CR4_VMXE)) { 2419 DPRINTF("%s: error setting guest CR4 mask\n", __func__); 2420 ret = EINVAL; 2421 goto exit; 2422 } 2423 2424 if (vmwrite(VMCS_CR0_MASK, CR0_NE)) { 2425 DPRINTF("%s: error setting guest CR0 mask\n", __func__); 2426 ret = EINVAL; 2427 goto exit; 2428 } 2429 2430 /* 2431 * Set up the VMCS for the register state we want during VCPU start. 2432 * This matches what the CPU state would be after a bootloader 2433 * transition to 'start'. 2434 */ 2435 ret = vcpu_writeregs_vmx(vcpu, VM_RWREGS_ALL, 0, vrs); 2436 2437 /* 2438 * Set up the MSR bitmap 2439 */ 2440 memset((uint8_t *)vcpu->vc_msr_bitmap_va, 0xFF, PAGE_SIZE); 2441 vmx_setmsrbrw(vcpu, MSR_IA32_FEATURE_CONTROL); 2442 vmx_setmsrbrw(vcpu, MSR_SYSENTER_CS); 2443 vmx_setmsrbrw(vcpu, MSR_SYSENTER_ESP); 2444 vmx_setmsrbrw(vcpu, MSR_SYSENTER_EIP); 2445 vmx_setmsrbrw(vcpu, MSR_EFER); 2446 vmx_setmsrbrw(vcpu, MSR_STAR); 2447 vmx_setmsrbrw(vcpu, MSR_LSTAR); 2448 vmx_setmsrbrw(vcpu, MSR_CSTAR); 2449 vmx_setmsrbrw(vcpu, MSR_SFMASK); 2450 vmx_setmsrbrw(vcpu, MSR_FSBASE); 2451 vmx_setmsrbrw(vcpu, MSR_GSBASE); 2452 vmx_setmsrbrw(vcpu, MSR_KERNELGSBASE); 2453 2454 vmx_setmsrbr(vcpu, MSR_MISC_ENABLE); 2455 vmx_setmsrbr(vcpu, MSR_TSC); 2456 2457 /* If host supports CET, pass through access to the guest. */ 2458 if (rcr4() & CR4_CET) 2459 vmx_setmsrbrw(vcpu, MSR_S_CET); 2460 2461 /* XXX CR0 shadow */ 2462 /* XXX CR4 shadow */ 2463 2464 /* xcr0 power on default sets bit 0 (x87 state) */ 2465 vcpu->vc_gueststate.vg_xcr0 = XFEATURE_X87 & xsave_mask; 2466 2467 /* XXX PAT shadow */ 2468 vcpu->vc_shadow_pat = rdmsr(MSR_CR_PAT); 2469 2470 /* Flush the VMCS */ 2471 if (vmclear(&vcpu->vc_control_pa)) { 2472 DPRINTF("%s: vmclear failed\n", __func__); 2473 ret = EINVAL; 2474 } 2475 atomic_swap_uint(&vcpu->vc_vmx_vmcs_state, VMCS_CLEARED); 2476 2477 exit: 2478 return (ret); 2479 } 2480 2481 /* 2482 * vcpu_init_vmx 2483 * 2484 * Intel VMX specific VCPU initialization routine. 2485 * 2486 * This function allocates various per-VCPU memory regions, sets up initial 2487 * VCPU VMCS controls, and sets initial register values. 2488 * 2489 * Parameters: 2490 * vcpu: the VCPU structure being initialized 2491 * 2492 * Return values: 2493 * 0: the VCPU was initialized successfully 2494 * ENOMEM: insufficient resources 2495 * EINVAL: an error occurred during VCPU initialization 2496 */ 2497 int 2498 vcpu_init_vmx(struct vcpu *vcpu) 2499 { 2500 struct vmcs *vmcs; 2501 uint64_t msr, eptp; 2502 uint32_t cr0, cr4; 2503 int ret = 0; 2504 2505 /* Allocate a VPID early to avoid km_alloc if we're out of VPIDs. */ 2506 if (vmm_alloc_vpid(&vcpu->vc_vpid)) 2507 return (ENOMEM); 2508 2509 /* Allocate VMCS VA */ 2510 vcpu->vc_control_va = (vaddr_t)km_alloc(PAGE_SIZE, &kv_page, &kp_zero, 2511 &kd_waitok); 2512 vcpu->vc_vmx_vmcs_state = VMCS_CLEARED; 2513 2514 if (!vcpu->vc_control_va) { 2515 ret = ENOMEM; 2516 goto exit; 2517 } 2518 2519 /* Compute VMCS PA */ 2520 if (!pmap_extract(pmap_kernel(), vcpu->vc_control_va, 2521 (paddr_t *)&vcpu->vc_control_pa)) { 2522 ret = ENOMEM; 2523 goto exit; 2524 } 2525 2526 /* Allocate MSR bitmap VA */ 2527 vcpu->vc_msr_bitmap_va = (vaddr_t)km_alloc(PAGE_SIZE, &kv_page, &kp_zero, 2528 &kd_waitok); 2529 2530 if (!vcpu->vc_msr_bitmap_va) { 2531 ret = ENOMEM; 2532 goto exit; 2533 } 2534 2535 /* Compute MSR bitmap PA */ 2536 if (!pmap_extract(pmap_kernel(), vcpu->vc_msr_bitmap_va, 2537 (paddr_t *)&vcpu->vc_msr_bitmap_pa)) { 2538 ret = ENOMEM; 2539 goto exit; 2540 } 2541 2542 /* Allocate MSR exit load area VA */ 2543 vcpu->vc_vmx_msr_exit_load_va = (vaddr_t)km_alloc(PAGE_SIZE, &kv_page, 2544 &kp_zero, &kd_waitok); 2545 2546 if (!vcpu->vc_vmx_msr_exit_load_va) { 2547 ret = ENOMEM; 2548 goto exit; 2549 } 2550 2551 /* Compute MSR exit load area PA */ 2552 if (!pmap_extract(pmap_kernel(), vcpu->vc_vmx_msr_exit_load_va, 2553 &vcpu->vc_vmx_msr_exit_load_pa)) { 2554 ret = ENOMEM; 2555 goto exit; 2556 } 2557 2558 /* Allocate MSR exit save area VA */ 2559 vcpu->vc_vmx_msr_exit_save_va = (vaddr_t)km_alloc(PAGE_SIZE, &kv_page, 2560 &kp_zero, &kd_waitok); 2561 2562 if (!vcpu->vc_vmx_msr_exit_save_va) { 2563 ret = ENOMEM; 2564 goto exit; 2565 } 2566 2567 /* Compute MSR exit save area PA */ 2568 if (!pmap_extract(pmap_kernel(), vcpu->vc_vmx_msr_exit_save_va, 2569 &vcpu->vc_vmx_msr_exit_save_pa)) { 2570 ret = ENOMEM; 2571 goto exit; 2572 } 2573 2574 #if 0 /* XXX currently use msr_exit_save for msr_entry_load too */ 2575 /* Allocate MSR entry load area VA */ 2576 vcpu->vc_vmx_msr_entry_load_va = (vaddr_t)km_alloc(PAGE_SIZE, &kv_page, 2577 &kp_zero, &kd_waitok); 2578 2579 if (!vcpu->vc_vmx_msr_entry_load_va) { 2580 ret = ENOMEM; 2581 goto exit; 2582 } 2583 2584 /* Compute MSR entry load area PA */ 2585 if (!pmap_extract(pmap_kernel(), vcpu->vc_vmx_msr_entry_load_va, 2586 &vcpu->vc_vmx_msr_entry_load_pa)) { 2587 ret = ENOMEM; 2588 goto exit; 2589 } 2590 #endif 2591 2592 vmcs = (struct vmcs *)vcpu->vc_control_va; 2593 vmcs->vmcs_revision = curcpu()->ci_vmm_cap.vcc_vmx.vmx_vmxon_revision; 2594 2595 /* 2596 * Load the VMCS onto this PCPU so we can write registers 2597 */ 2598 if (vmptrld(&vcpu->vc_control_pa)) { 2599 ret = EINVAL; 2600 goto exit; 2601 } 2602 2603 /* Configure EPT Pointer */ 2604 eptp = vcpu->vc_parent->vm_map->pmap->pm_pdirpa; 2605 msr = rdmsr(IA32_VMX_EPT_VPID_CAP); 2606 if (msr & IA32_EPT_VPID_CAP_PAGE_WALK_4) { 2607 /* Page walk length 4 supported */ 2608 eptp |= ((IA32_EPT_PAGE_WALK_LENGTH - 1) << 3); 2609 } else { 2610 DPRINTF("EPT page walk length 4 not supported\n"); 2611 ret = EINVAL; 2612 goto exit; 2613 } 2614 if (msr & IA32_EPT_VPID_CAP_WB) { 2615 /* WB cache type supported */ 2616 eptp |= IA32_EPT_PAGING_CACHE_TYPE_WB; 2617 } else 2618 DPRINTF("%s: no WB cache type available, guest VM will run " 2619 "uncached\n", __func__); 2620 2621 DPRINTF("Guest EPTP = 0x%llx\n", eptp); 2622 if (vmwrite(VMCS_GUEST_IA32_EPTP, eptp)) { 2623 DPRINTF("%s: error setting guest EPTP\n", __func__); 2624 ret = EINVAL; 2625 goto exit; 2626 } 2627 2628 vcpu->vc_parent->vm_map->pmap->eptp = eptp; 2629 2630 /* Host CR0 */ 2631 cr0 = rcr0() & ~CR0_TS; 2632 if (vmwrite(VMCS_HOST_IA32_CR0, cr0)) { 2633 DPRINTF("%s: error writing host CR0\n", __func__); 2634 ret = EINVAL; 2635 goto exit; 2636 } 2637 2638 /* Host CR4 */ 2639 cr4 = rcr4(); 2640 if (vmwrite(VMCS_HOST_IA32_CR4, cr4)) { 2641 DPRINTF("%s: error writing host CR4\n", __func__); 2642 ret = EINVAL; 2643 goto exit; 2644 } 2645 2646 /* Host Segment Selectors */ 2647 if (vmwrite(VMCS_HOST_IA32_CS_SEL, GSEL(GCODE_SEL, SEL_KPL))) { 2648 DPRINTF("%s: error writing host CS selector\n", __func__); 2649 ret = EINVAL; 2650 goto exit; 2651 } 2652 2653 if (vmwrite(VMCS_HOST_IA32_DS_SEL, GSEL(GDATA_SEL, SEL_KPL))) { 2654 DPRINTF("%s: error writing host DS selector\n", __func__); 2655 ret = EINVAL; 2656 goto exit; 2657 } 2658 2659 if (vmwrite(VMCS_HOST_IA32_ES_SEL, GSEL(GDATA_SEL, SEL_KPL))) { 2660 DPRINTF("%s: error writing host ES selector\n", __func__); 2661 ret = EINVAL; 2662 goto exit; 2663 } 2664 2665 if (vmwrite(VMCS_HOST_IA32_FS_SEL, GSEL(GDATA_SEL, SEL_KPL))) { 2666 DPRINTF("%s: error writing host FS selector\n", __func__); 2667 ret = EINVAL; 2668 goto exit; 2669 } 2670 2671 if (vmwrite(VMCS_HOST_IA32_GS_SEL, GSEL(GDATA_SEL, SEL_KPL))) { 2672 DPRINTF("%s: error writing host GS selector\n", __func__); 2673 ret = EINVAL; 2674 goto exit; 2675 } 2676 2677 if (vmwrite(VMCS_HOST_IA32_SS_SEL, GSEL(GDATA_SEL, SEL_KPL))) { 2678 DPRINTF("%s: error writing host SS selector\n", __func__); 2679 ret = EINVAL; 2680 goto exit; 2681 } 2682 2683 if (vmwrite(VMCS_HOST_IA32_TR_SEL, GSYSSEL(GPROC0_SEL, SEL_KPL))) { 2684 DPRINTF("%s: error writing host TR selector\n", __func__); 2685 ret = EINVAL; 2686 goto exit; 2687 } 2688 2689 /* Host IDTR base */ 2690 if (vmwrite(VMCS_HOST_IA32_IDTR_BASE, idt_vaddr)) { 2691 DPRINTF("%s: error writing host IDTR base\n", __func__); 2692 ret = EINVAL; 2693 goto exit; 2694 } 2695 2696 /* VMCS link */ 2697 if (vmwrite(VMCS_LINK_POINTER, VMX_VMCS_PA_CLEAR)) { 2698 DPRINTF("%s: error writing VMCS link pointer\n", __func__); 2699 ret = EINVAL; 2700 goto exit; 2701 } 2702 2703 /* Flush the initial VMCS */ 2704 if (vmclear(&vcpu->vc_control_pa)) { 2705 DPRINTF("%s: vmclear failed\n", __func__); 2706 ret = EINVAL; 2707 } 2708 2709 exit: 2710 if (ret) 2711 vcpu_deinit_vmx(vcpu); 2712 2713 return (ret); 2714 } 2715 2716 /* 2717 * vcpu_reset_regs 2718 * 2719 * Resets a vcpu's registers to the provided state 2720 * 2721 * Parameters: 2722 * vcpu: the vcpu whose registers shall be reset 2723 * vrs: the desired register state 2724 * 2725 * Return values: 2726 * 0: the vcpu's registers were successfully reset 2727 * !0: the vcpu's registers could not be reset (see arch-specific reset 2728 * function for various values that can be returned here) 2729 */ 2730 int 2731 vcpu_reset_regs(struct vcpu *vcpu, struct vcpu_reg_state *vrs) 2732 { 2733 int ret; 2734 2735 if (vmm_softc->mode == VMM_MODE_EPT) 2736 ret = vcpu_reset_regs_vmx(vcpu, vrs); 2737 else if (vmm_softc->mode == VMM_MODE_RVI) 2738 ret = vcpu_reset_regs_svm(vcpu, vrs); 2739 else 2740 panic("%s: unknown vmm mode: %d", __func__, vmm_softc->mode); 2741 2742 return (ret); 2743 } 2744 2745 /* 2746 * vcpu_init_svm 2747 * 2748 * AMD SVM specific VCPU initialization routine. 2749 * 2750 * This function allocates various per-VCPU memory regions, sets up initial 2751 * VCPU VMCB controls, and sets initial register values. 2752 * 2753 * Parameters: 2754 * vcpu: the VCPU structure being initialized 2755 * vcp: parameters provided by vmd(8) 2756 * 2757 * Return values: 2758 * 0: the VCPU was initialized successfully 2759 * ENOMEM: insufficient resources 2760 * EINVAL: an error occurred during VCPU initialization 2761 */ 2762 int 2763 vcpu_init_svm(struct vcpu *vcpu, struct vm_create_params *vcp) 2764 { 2765 int ret = 0; 2766 2767 /* Allocate an ASID early to avoid km_alloc if we're out of ASIDs. */ 2768 if (vmm_alloc_vpid(&vcpu->vc_vpid)) 2769 return (ENOMEM); 2770 2771 /* Allocate VMCB VA */ 2772 vcpu->vc_control_va = (vaddr_t)km_alloc(PAGE_SIZE, &kv_page, &kp_zero, 2773 &kd_waitok); 2774 2775 if (!vcpu->vc_control_va) { 2776 ret = ENOMEM; 2777 goto exit; 2778 } 2779 2780 /* Compute VMCB PA */ 2781 if (!pmap_extract(pmap_kernel(), vcpu->vc_control_va, 2782 (paddr_t *)&vcpu->vc_control_pa)) { 2783 ret = ENOMEM; 2784 goto exit; 2785 } 2786 2787 DPRINTF("%s: VMCB va @ 0x%llx, pa @ 0x%llx\n", __func__, 2788 (uint64_t)vcpu->vc_control_va, 2789 (uint64_t)vcpu->vc_control_pa); 2790 2791 2792 /* Allocate MSR bitmap VA (2 pages) */ 2793 vcpu->vc_msr_bitmap_va = (vaddr_t)km_alloc(2 * PAGE_SIZE, &kv_any, 2794 &vmm_kp_contig, &kd_waitok); 2795 2796 if (!vcpu->vc_msr_bitmap_va) { 2797 ret = ENOMEM; 2798 goto exit; 2799 } 2800 2801 /* Compute MSR bitmap PA */ 2802 if (!pmap_extract(pmap_kernel(), vcpu->vc_msr_bitmap_va, 2803 (paddr_t *)&vcpu->vc_msr_bitmap_pa)) { 2804 ret = ENOMEM; 2805 goto exit; 2806 } 2807 2808 DPRINTF("%s: MSR bitmap va @ 0x%llx, pa @ 0x%llx\n", __func__, 2809 (uint64_t)vcpu->vc_msr_bitmap_va, 2810 (uint64_t)vcpu->vc_msr_bitmap_pa); 2811 2812 /* Allocate host state area VA */ 2813 vcpu->vc_svm_hsa_va = (vaddr_t)km_alloc(PAGE_SIZE, &kv_page, 2814 &kp_zero, &kd_waitok); 2815 2816 if (!vcpu->vc_svm_hsa_va) { 2817 ret = ENOMEM; 2818 goto exit; 2819 } 2820 2821 /* Compute host state area PA */ 2822 if (!pmap_extract(pmap_kernel(), vcpu->vc_svm_hsa_va, 2823 &vcpu->vc_svm_hsa_pa)) { 2824 ret = ENOMEM; 2825 goto exit; 2826 } 2827 2828 DPRINTF("%s: HSA va @ 0x%llx, pa @ 0x%llx\n", __func__, 2829 (uint64_t)vcpu->vc_svm_hsa_va, 2830 (uint64_t)vcpu->vc_svm_hsa_pa); 2831 2832 /* Allocate IOIO area VA (3 pages) */ 2833 vcpu->vc_svm_ioio_va = (vaddr_t)km_alloc(3 * PAGE_SIZE, &kv_any, 2834 &vmm_kp_contig, &kd_waitok); 2835 2836 if (!vcpu->vc_svm_ioio_va) { 2837 ret = ENOMEM; 2838 goto exit; 2839 } 2840 2841 /* Compute IOIO area PA */ 2842 if (!pmap_extract(pmap_kernel(), vcpu->vc_svm_ioio_va, 2843 &vcpu->vc_svm_ioio_pa)) { 2844 ret = ENOMEM; 2845 goto exit; 2846 } 2847 2848 DPRINTF("%s: IOIO va @ 0x%llx, pa @ 0x%llx\n", __func__, 2849 (uint64_t)vcpu->vc_svm_ioio_va, 2850 (uint64_t)vcpu->vc_svm_ioio_pa); 2851 2852 /* Shall we enable SEV? */ 2853 vcpu->vc_sev = vcp->vcp_sev; 2854 2855 /* Inform vmd(8) about ASID and C bit position. */ 2856 vcp->vcp_poscbit = amd64_pos_cbit; 2857 vcp->vcp_asid[vcpu->vc_id] = vcpu->vc_vpid; 2858 2859 exit: 2860 if (ret) 2861 vcpu_deinit_svm(vcpu); 2862 2863 return (ret); 2864 } 2865 2866 /* 2867 * vcpu_init 2868 * 2869 * Calls the architecture-specific VCPU init routine 2870 */ 2871 int 2872 vcpu_init(struct vcpu *vcpu, struct vm_create_params *vcp) 2873 { 2874 int ret = 0; 2875 2876 vcpu->vc_virt_mode = vmm_softc->mode; 2877 vcpu->vc_state = VCPU_STATE_STOPPED; 2878 vcpu->vc_vpid = 0; 2879 vcpu->vc_pvclock_system_gpa = 0; 2880 vcpu->vc_last_pcpu = NULL; 2881 2882 rw_init(&vcpu->vc_lock, "vcpu"); 2883 2884 /* Shadow PAT MSR, starting with host's value. */ 2885 vcpu->vc_shadow_pat = rdmsr(MSR_CR_PAT); 2886 2887 if (vmm_softc->mode == VMM_MODE_EPT) 2888 ret = vcpu_init_vmx(vcpu); 2889 else if (vmm_softc->mode == VMM_MODE_RVI) 2890 ret = vcpu_init_svm(vcpu, vcp); 2891 else 2892 panic("%s: unknown vmm mode: %d", __func__, vmm_softc->mode); 2893 2894 return (ret); 2895 } 2896 2897 /* 2898 * vcpu_deinit_vmx 2899 * 2900 * Deinitializes the vcpu described by 'vcpu' 2901 * 2902 * Parameters: 2903 * vcpu: the vcpu to be deinited 2904 */ 2905 void 2906 vcpu_deinit_vmx(struct vcpu *vcpu) 2907 { 2908 if (vcpu->vc_control_va) { 2909 km_free((void *)vcpu->vc_control_va, PAGE_SIZE, 2910 &kv_page, &kp_zero); 2911 vcpu->vc_control_va = 0; 2912 } 2913 if (vcpu->vc_vmx_msr_exit_save_va) { 2914 km_free((void *)vcpu->vc_vmx_msr_exit_save_va, 2915 PAGE_SIZE, &kv_page, &kp_zero); 2916 vcpu->vc_vmx_msr_exit_save_va = 0; 2917 } 2918 if (vcpu->vc_vmx_msr_exit_load_va) { 2919 km_free((void *)vcpu->vc_vmx_msr_exit_load_va, 2920 PAGE_SIZE, &kv_page, &kp_zero); 2921 vcpu->vc_vmx_msr_exit_load_va = 0; 2922 } 2923 #if 0 2924 if (vcpu->vc_vmx_msr_entry_load_va) { 2925 km_free((void *)vcpu->vc_vmx_msr_entry_load_va, 2926 PAGE_SIZE, &kv_page, &kp_zero); 2927 vcpu->vc_vmx_msr_entry_load_va = 0; 2928 } 2929 #endif 2930 2931 vmm_free_vpid(vcpu->vc_vpid); 2932 } 2933 2934 /* 2935 * vcpu_deinit_svm 2936 * 2937 * Deinitializes the vcpu described by 'vcpu' 2938 * 2939 * Parameters: 2940 * vcpu: the vcpu to be deinited 2941 */ 2942 void 2943 vcpu_deinit_svm(struct vcpu *vcpu) 2944 { 2945 if (vcpu->vc_control_va) { 2946 km_free((void *)vcpu->vc_control_va, PAGE_SIZE, &kv_page, 2947 &kp_zero); 2948 vcpu->vc_control_va = 0; 2949 } 2950 if (vcpu->vc_msr_bitmap_va) { 2951 km_free((void *)vcpu->vc_msr_bitmap_va, 2 * PAGE_SIZE, &kv_any, 2952 &vmm_kp_contig); 2953 vcpu->vc_msr_bitmap_va = 0; 2954 } 2955 if (vcpu->vc_svm_hsa_va) { 2956 km_free((void *)vcpu->vc_svm_hsa_va, PAGE_SIZE, &kv_page, 2957 &kp_zero); 2958 vcpu->vc_svm_hsa_va = 0; 2959 } 2960 if (vcpu->vc_svm_ioio_va) { 2961 km_free((void *)vcpu->vc_svm_ioio_va, 3 * PAGE_SIZE, &kv_any, 2962 &vmm_kp_contig); 2963 vcpu->vc_svm_ioio_va = 0; 2964 } 2965 2966 vmm_free_vpid(vcpu->vc_vpid); 2967 } 2968 2969 /* 2970 * vcpu_deinit 2971 * 2972 * Calls the architecture-specific VCPU deinit routine 2973 * 2974 * Parameters: 2975 * vcpu: the vcpu to be deinited 2976 */ 2977 void 2978 vcpu_deinit(struct vcpu *vcpu) 2979 { 2980 if (vmm_softc->mode == VMM_MODE_EPT) 2981 vcpu_deinit_vmx(vcpu); 2982 else if (vmm_softc->mode == VMM_MODE_RVI) 2983 vcpu_deinit_svm(vcpu); 2984 else 2985 panic("%s: unknown vmm mode: %d", __func__, vmm_softc->mode); 2986 } 2987 2988 /* 2989 * vcpu_vmx_check_cap 2990 * 2991 * Checks if the 'cap' bit in the 'msr' MSR can be set or cleared (set = 1 2992 * or set = 0, respectively). 2993 * 2994 * When considering 'msr', we check to see if true controls are available, 2995 * and use those if so. 2996 * 2997 * Returns 1 of 'cap' can be set/cleared as requested, 0 otherwise. 2998 */ 2999 int 3000 vcpu_vmx_check_cap(struct vcpu *vcpu, uint32_t msr, uint32_t cap, int set) 3001 { 3002 uint64_t ctl; 3003 3004 if (vcpu->vc_vmx_basic & IA32_VMX_TRUE_CTLS_AVAIL) { 3005 switch (msr) { 3006 case IA32_VMX_PINBASED_CTLS: 3007 ctl = vcpu->vc_vmx_true_pinbased_ctls; 3008 break; 3009 case IA32_VMX_PROCBASED_CTLS: 3010 ctl = vcpu->vc_vmx_true_procbased_ctls; 3011 break; 3012 case IA32_VMX_PROCBASED2_CTLS: 3013 ctl = vcpu->vc_vmx_procbased2_ctls; 3014 break; 3015 case IA32_VMX_ENTRY_CTLS: 3016 ctl = vcpu->vc_vmx_true_entry_ctls; 3017 break; 3018 case IA32_VMX_EXIT_CTLS: 3019 ctl = vcpu->vc_vmx_true_exit_ctls; 3020 break; 3021 default: 3022 return (0); 3023 } 3024 } else { 3025 switch (msr) { 3026 case IA32_VMX_PINBASED_CTLS: 3027 ctl = vcpu->vc_vmx_pinbased_ctls; 3028 break; 3029 case IA32_VMX_PROCBASED_CTLS: 3030 ctl = vcpu->vc_vmx_procbased_ctls; 3031 break; 3032 case IA32_VMX_PROCBASED2_CTLS: 3033 ctl = vcpu->vc_vmx_procbased2_ctls; 3034 break; 3035 case IA32_VMX_ENTRY_CTLS: 3036 ctl = vcpu->vc_vmx_entry_ctls; 3037 break; 3038 case IA32_VMX_EXIT_CTLS: 3039 ctl = vcpu->vc_vmx_exit_ctls; 3040 break; 3041 default: 3042 return (0); 3043 } 3044 } 3045 3046 if (set) { 3047 /* Check bit 'cap << 32', must be !0 */ 3048 return (ctl & ((uint64_t)cap << 32)) != 0; 3049 } else { 3050 /* Check bit 'cap', must be 0 */ 3051 return (ctl & cap) == 0; 3052 } 3053 } 3054 3055 /* 3056 * vcpu_vmx_compute_ctrl 3057 * 3058 * Computes the appropriate control value, given the supplied parameters 3059 * and CPU capabilities. 3060 * 3061 * Intel has made somewhat of a mess of this computation - it is described 3062 * using no fewer than three different approaches, spread across many 3063 * pages of the SDM. Further compounding the problem is the fact that now 3064 * we have "true controls" for each type of "control", and each needs to 3065 * be examined to get the calculation right, but only if "true" controls 3066 * are present on the CPU we're on. 3067 * 3068 * Parameters: 3069 * ctrlval: the control value, as read from the CPU MSR 3070 * ctrl: which control is being set (eg, pinbased, procbased, etc) 3071 * want0: the set of desired 0 bits 3072 * want1: the set of desired 1 bits 3073 * out: (out) the correct value to write into the VMCS for this VCPU, 3074 * for the 'ctrl' desired. 3075 * 3076 * Returns 0 if successful, or EINVAL if the supplied parameters define 3077 * an unworkable control setup. 3078 */ 3079 int 3080 vcpu_vmx_compute_ctrl(uint64_t ctrlval, uint16_t ctrl, uint32_t want1, 3081 uint32_t want0, uint32_t *out) 3082 { 3083 int i, set, clear; 3084 3085 *out = 0; 3086 3087 /* 3088 * The Intel SDM gives three formulae for determining which bits to 3089 * set/clear for a given control and desired functionality. Formula 3090 * 1 is the simplest but disallows use of newer features that are 3091 * enabled by functionality in later CPUs. 3092 * 3093 * Formulas 2 and 3 allow such extra functionality. We use formula 3094 * 2 - this requires us to know the identity of controls in the 3095 * "default1" class for each control register, but allows us to not 3096 * have to pass along and/or query both sets of capability MSRs for 3097 * each control lookup. This makes the code slightly longer, 3098 * however. 3099 */ 3100 for (i = 0; i < 32; i++) { 3101 /* Figure out if we can set and / or clear this bit */ 3102 set = (ctrlval & (1ULL << (i + 32))) != 0; 3103 clear = ((1ULL << i) & ((uint64_t)ctrlval)) == 0; 3104 3105 /* If the bit can't be set nor cleared, something's wrong */ 3106 if (!set && !clear) 3107 return (EINVAL); 3108 3109 /* 3110 * Formula 2.c.i - "If the relevant VMX capability MSR 3111 * reports that a control has a single setting, use that 3112 * setting." 3113 */ 3114 if (set && !clear) { 3115 if (want0 & (1ULL << i)) 3116 return (EINVAL); 3117 else 3118 *out |= (1ULL << i); 3119 } else if (clear && !set) { 3120 if (want1 & (1ULL << i)) 3121 return (EINVAL); 3122 else 3123 *out &= ~(1ULL << i); 3124 } else { 3125 /* 3126 * 2.c.ii - "If the relevant VMX capability MSR 3127 * reports that a control can be set to 0 or 1 3128 * and that control's meaning is known to the VMM, 3129 * set the control based on the functionality desired." 3130 */ 3131 if (want1 & (1ULL << i)) 3132 *out |= (1ULL << i); 3133 else if (want0 & (1 << i)) 3134 *out &= ~(1ULL << i); 3135 else { 3136 /* 3137 * ... assuming the control's meaning is not 3138 * known to the VMM ... 3139 * 3140 * 2.c.iii - "If the relevant VMX capability 3141 * MSR reports that a control can be set to 0 3142 * or 1 and the control is not in the default1 3143 * class, set the control to 0." 3144 * 3145 * 2.c.iv - "If the relevant VMX capability 3146 * MSR reports that a control can be set to 0 3147 * or 1 and the control is in the default1 3148 * class, set the control to 1." 3149 */ 3150 switch (ctrl) { 3151 case IA32_VMX_PINBASED_CTLS: 3152 case IA32_VMX_TRUE_PINBASED_CTLS: 3153 /* 3154 * A.3.1 - default1 class of pinbased 3155 * controls comprises bits 1,2,4 3156 */ 3157 switch (i) { 3158 case 1: 3159 case 2: 3160 case 4: 3161 *out |= (1ULL << i); 3162 break; 3163 default: 3164 *out &= ~(1ULL << i); 3165 break; 3166 } 3167 break; 3168 case IA32_VMX_PROCBASED_CTLS: 3169 case IA32_VMX_TRUE_PROCBASED_CTLS: 3170 /* 3171 * A.3.2 - default1 class of procbased 3172 * controls comprises bits 1, 4-6, 8, 3173 * 13-16, 26 3174 */ 3175 switch (i) { 3176 case 1: 3177 case 4 ... 6: 3178 case 8: 3179 case 13 ... 16: 3180 case 26: 3181 *out |= (1ULL << i); 3182 break; 3183 default: 3184 *out &= ~(1ULL << i); 3185 break; 3186 } 3187 break; 3188 /* 3189 * Unknown secondary procbased controls 3190 * can always be set to 0 3191 */ 3192 case IA32_VMX_PROCBASED2_CTLS: 3193 *out &= ~(1ULL << i); 3194 break; 3195 case IA32_VMX_EXIT_CTLS: 3196 case IA32_VMX_TRUE_EXIT_CTLS: 3197 /* 3198 * A.4 - default1 class of exit 3199 * controls comprises bits 0-8, 10, 3200 * 11, 13, 14, 16, 17 3201 */ 3202 switch (i) { 3203 case 0 ... 8: 3204 case 10 ... 11: 3205 case 13 ... 14: 3206 case 16 ... 17: 3207 *out |= (1ULL << i); 3208 break; 3209 default: 3210 *out &= ~(1ULL << i); 3211 break; 3212 } 3213 break; 3214 case IA32_VMX_ENTRY_CTLS: 3215 case IA32_VMX_TRUE_ENTRY_CTLS: 3216 /* 3217 * A.5 - default1 class of entry 3218 * controls comprises bits 0-8, 12 3219 */ 3220 switch (i) { 3221 case 0 ... 8: 3222 case 12: 3223 *out |= (1ULL << i); 3224 break; 3225 default: 3226 *out &= ~(1ULL << i); 3227 break; 3228 } 3229 break; 3230 } 3231 } 3232 } 3233 } 3234 3235 return (0); 3236 } 3237 3238 /* 3239 * vm_run 3240 * 3241 * Run the vm / vcpu specified by 'vrp' 3242 * 3243 * Parameters: 3244 * vrp: structure defining the VM to run 3245 * 3246 * Return value: 3247 * ENOENT: the VM defined in 'vrp' could not be located 3248 * EBUSY: the VM defined in 'vrp' is already running 3249 * EFAULT: error copying data from userspace (vmd) on return from previous 3250 * exit. 3251 * EAGAIN: help is needed from vmd(8) (device I/O or exit vmm(4) cannot 3252 * handle in-kernel.) 3253 * 0: the run loop exited and no help is needed from vmd(8) 3254 */ 3255 int 3256 vm_run(struct vm_run_params *vrp) 3257 { 3258 struct vm *vm; 3259 struct vcpu *vcpu; 3260 int ret = 0; 3261 u_int old, next; 3262 3263 /* 3264 * Find desired VM 3265 */ 3266 ret = vm_find(vrp->vrp_vm_id, &vm); 3267 if (ret) 3268 return (ret); 3269 3270 vcpu = vm_find_vcpu(vm, vrp->vrp_vcpu_id); 3271 if (vcpu == NULL) { 3272 ret = ENOENT; 3273 goto out; 3274 } 3275 3276 /* 3277 * Attempt to transition from VCPU_STATE_STOPPED -> VCPU_STATE_RUNNING. 3278 * Failure to make the transition indicates the VCPU is busy. 3279 */ 3280 rw_enter_write(&vcpu->vc_lock); 3281 old = VCPU_STATE_STOPPED; 3282 next = VCPU_STATE_RUNNING; 3283 if (atomic_cas_uint(&vcpu->vc_state, old, next) != old) { 3284 ret = EBUSY; 3285 goto out_unlock; 3286 } 3287 3288 /* 3289 * We may be returning from userland helping us from the last 3290 * exit. Copy in the exit data from vmd. The exit data will be 3291 * consumed before the next entry (this typically comprises 3292 * VCPU register changes as the result of vmd(8)'s actions). 3293 */ 3294 ret = copyin(vrp->vrp_exit, &vcpu->vc_exit, sizeof(struct vm_exit)); 3295 if (ret) 3296 goto out_unlock; 3297 3298 vcpu->vc_inject.vie_type = vrp->vrp_inject.vie_type; 3299 vcpu->vc_inject.vie_vector = vrp->vrp_inject.vie_vector; 3300 vcpu->vc_inject.vie_errorcode = vrp->vrp_inject.vie_errorcode; 3301 3302 WRITE_ONCE(vcpu->vc_curcpu, curcpu()); 3303 /* Run the VCPU specified in vrp */ 3304 if (vcpu->vc_virt_mode == VMM_MODE_EPT) { 3305 ret = vcpu_run_vmx(vcpu, vrp); 3306 } else if (vcpu->vc_virt_mode == VMM_MODE_RVI) { 3307 ret = vcpu_run_svm(vcpu, vrp); 3308 } 3309 WRITE_ONCE(vcpu->vc_curcpu, NULL); 3310 3311 if (ret == 0 || ret == EAGAIN) { 3312 /* If we are exiting, populate exit data so vmd can help. */ 3313 vrp->vrp_exit_reason = (ret == 0) ? VM_EXIT_NONE 3314 : vcpu->vc_gueststate.vg_exit_reason; 3315 vrp->vrp_irqready = vcpu->vc_irqready; 3316 vcpu->vc_state = VCPU_STATE_STOPPED; 3317 3318 if (copyout(&vcpu->vc_exit, vrp->vrp_exit, 3319 sizeof(struct vm_exit)) == EFAULT) { 3320 ret = EFAULT; 3321 } else 3322 ret = 0; 3323 } else { 3324 vrp->vrp_exit_reason = VM_EXIT_TERMINATED; 3325 vcpu->vc_state = VCPU_STATE_TERMINATED; 3326 } 3327 out_unlock: 3328 rw_exit_write(&vcpu->vc_lock); 3329 out: 3330 refcnt_rele_wake(&vm->vm_refcnt); 3331 return (ret); 3332 } 3333 3334 /* 3335 * vmm_fpurestore 3336 * 3337 * Restore the guest's FPU state, saving the existing userland thread's 3338 * FPU context if necessary. Must be called with interrupts disabled. 3339 */ 3340 int 3341 vmm_fpurestore(struct vcpu *vcpu) 3342 { 3343 struct cpu_info *ci = curcpu(); 3344 3345 rw_assert_wrlock(&vcpu->vc_lock); 3346 3347 /* save vmm's FPU state if we haven't already */ 3348 if (ci->ci_pflags & CPUPF_USERXSTATE) { 3349 ci->ci_pflags &= ~CPUPF_USERXSTATE; 3350 fpusavereset(&curproc->p_addr->u_pcb.pcb_savefpu); 3351 } 3352 3353 if (vcpu->vc_fpuinited) 3354 xrstor_kern(&vcpu->vc_g_fpu, xsave_mask); 3355 3356 if (xsave_mask) { 3357 /* Restore guest %xcr0 */ 3358 if (xsetbv_user(0, vcpu->vc_gueststate.vg_xcr0)) { 3359 DPRINTF("%s: guest attempted to set invalid bits in " 3360 "xcr0 (guest %%xcr0=0x%llx, host %%xcr0=0x%llx)\n", 3361 __func__, vcpu->vc_gueststate.vg_xcr0, xsave_mask); 3362 return EINVAL; 3363 } 3364 } 3365 3366 return 0; 3367 } 3368 3369 /* 3370 * vmm_fpusave 3371 * 3372 * Save the guest's FPU state. Must be called with interrupts disabled. 3373 */ 3374 void 3375 vmm_fpusave(struct vcpu *vcpu) 3376 { 3377 rw_assert_wrlock(&vcpu->vc_lock); 3378 3379 if (xsave_mask) { 3380 /* Save guest %xcr0 */ 3381 vcpu->vc_gueststate.vg_xcr0 = xgetbv(0); 3382 3383 /* Restore host %xcr0 */ 3384 xsetbv(0, xsave_mask & XFEATURE_XCR0_MASK); 3385 } 3386 3387 /* 3388 * Save full copy of FPU state - guest content is always 3389 * a subset of host's save area (see xsetbv exit handler) 3390 */ 3391 fpusavereset(&vcpu->vc_g_fpu); 3392 vcpu->vc_fpuinited = 1; 3393 } 3394 3395 /* 3396 * vmm_translate_gva 3397 * 3398 * Translates a guest virtual address to a guest physical address by walking 3399 * the currently active page table (if needed). 3400 * 3401 * Note - this function can possibly alter the supplied VCPU state. 3402 * Specifically, it may inject exceptions depending on the current VCPU 3403 * configuration, and may alter %cr2 on #PF. Consequently, this function 3404 * should only be used as part of instruction emulation. 3405 * 3406 * Parameters: 3407 * vcpu: The VCPU this translation should be performed for (guest MMU settings 3408 * are gathered from this VCPU) 3409 * va: virtual address to translate 3410 * pa: pointer to paddr_t variable that will receive the translated physical 3411 * address. 'pa' is unchanged on error. 3412 * mode: one of PROT_READ, PROT_WRITE, PROT_EXEC indicating the mode in which 3413 * the address should be translated 3414 * 3415 * Return values: 3416 * 0: the address was successfully translated - 'pa' contains the physical 3417 * address currently mapped by 'va'. 3418 * EFAULT: the PTE for 'VA' is unmapped. A #PF will be injected in this case 3419 * and %cr2 set in the vcpu structure. 3420 * EINVAL: an error occurred reading paging table structures 3421 */ 3422 int 3423 vmm_translate_gva(struct vcpu *vcpu, uint64_t va, uint64_t *pa, int mode) 3424 { 3425 int level, shift, pdidx; 3426 uint64_t pte, pt_paddr, pte_paddr, mask, low_mask, high_mask; 3427 uint64_t shift_width, pte_size, *hva; 3428 paddr_t hpa; 3429 struct vcpu_reg_state vrs; 3430 3431 level = 0; 3432 3433 if (vmm_softc->mode == VMM_MODE_EPT) { 3434 if (vcpu_readregs_vmx(vcpu, VM_RWREGS_ALL, 1, &vrs)) 3435 return (EINVAL); 3436 } else if (vmm_softc->mode == VMM_MODE_RVI) { 3437 if (vcpu_readregs_svm(vcpu, VM_RWREGS_ALL, &vrs)) 3438 return (EINVAL); 3439 } else { 3440 printf("%s: unknown vmm mode", __func__); 3441 return (EINVAL); 3442 } 3443 3444 DPRINTF("%s: guest %%cr0=0x%llx, %%cr3=0x%llx\n", __func__, 3445 vrs.vrs_crs[VCPU_REGS_CR0], vrs.vrs_crs[VCPU_REGS_CR3]); 3446 3447 if (!(vrs.vrs_crs[VCPU_REGS_CR0] & CR0_PG)) { 3448 DPRINTF("%s: unpaged, va=pa=0x%llx\n", __func__, 3449 va); 3450 *pa = va; 3451 return (0); 3452 } 3453 3454 pt_paddr = vrs.vrs_crs[VCPU_REGS_CR3]; 3455 3456 if (vrs.vrs_crs[VCPU_REGS_CR0] & CR0_PE) { 3457 if (vrs.vrs_crs[VCPU_REGS_CR4] & CR4_PAE) { 3458 pte_size = sizeof(uint64_t); 3459 shift_width = 9; 3460 3461 if (vrs.vrs_msrs[VCPU_REGS_EFER] & EFER_LMA) { 3462 level = 4; 3463 mask = L4_MASK; 3464 shift = L4_SHIFT; 3465 } else { 3466 level = 3; 3467 mask = L3_MASK; 3468 shift = L3_SHIFT; 3469 } 3470 } else { 3471 level = 2; 3472 shift_width = 10; 3473 mask = 0xFFC00000; 3474 shift = 22; 3475 pte_size = sizeof(uint32_t); 3476 } 3477 } else { 3478 return (EINVAL); 3479 } 3480 3481 DPRINTF("%s: pte size=%lld level=%d mask=0x%llx, shift=%d, " 3482 "shift_width=%lld\n", __func__, pte_size, level, mask, shift, 3483 shift_width); 3484 3485 /* XXX: Check for R bit in segment selector and set A bit */ 3486 3487 for (;level > 0; level--) { 3488 pdidx = (va & mask) >> shift; 3489 pte_paddr = (pt_paddr) + (pdidx * pte_size); 3490 3491 DPRINTF("%s: read pte level %d @ GPA 0x%llx\n", __func__, 3492 level, pte_paddr); 3493 if (!pmap_extract(vcpu->vc_parent->vm_map->pmap, pte_paddr, 3494 &hpa)) { 3495 DPRINTF("%s: cannot extract HPA for GPA 0x%llx\n", 3496 __func__, pte_paddr); 3497 return (EINVAL); 3498 } 3499 3500 hpa = hpa | (pte_paddr & 0xFFF); 3501 hva = (uint64_t *)PMAP_DIRECT_MAP(hpa); 3502 DPRINTF("%s: GPA 0x%llx -> HPA 0x%llx -> HVA 0x%llx\n", 3503 __func__, pte_paddr, (uint64_t)hpa, (uint64_t)hva); 3504 if (pte_size == 8) 3505 pte = *hva; 3506 else 3507 pte = *(uint32_t *)hva; 3508 3509 DPRINTF("%s: PTE @ 0x%llx = 0x%llx\n", __func__, pte_paddr, 3510 pte); 3511 3512 /* XXX: Set CR2 */ 3513 if (!(pte & PG_V)) 3514 return (EFAULT); 3515 3516 /* XXX: Check for SMAP */ 3517 if ((mode == PROT_WRITE) && !(pte & PG_RW)) 3518 return (EPERM); 3519 3520 if ((vcpu->vc_exit.cpl > 0) && !(pte & PG_u)) 3521 return (EPERM); 3522 3523 pte = pte | PG_U; 3524 if (mode == PROT_WRITE) 3525 pte = pte | PG_M; 3526 *hva = pte; 3527 3528 /* XXX: EINVAL if in 32bit and PG_PS is 1 but CR4.PSE is 0 */ 3529 if (pte & PG_PS) 3530 break; 3531 3532 if (level > 1) { 3533 pt_paddr = pte & PG_FRAME; 3534 shift -= shift_width; 3535 mask = mask >> shift_width; 3536 } 3537 } 3538 3539 low_mask = ((uint64_t)1ULL << shift) - 1; 3540 high_mask = (((uint64_t)1ULL << ((pte_size * 8) - 1)) - 1) ^ low_mask; 3541 *pa = (pte & high_mask) | (va & low_mask); 3542 3543 DPRINTF("%s: final GPA for GVA 0x%llx = 0x%llx\n", __func__, 3544 va, *pa); 3545 3546 return (0); 3547 } 3548 3549 3550 /* 3551 * vcpu_run_vmx 3552 * 3553 * VMX main loop used to run a VCPU. 3554 * 3555 * Parameters: 3556 * vcpu: The VCPU to run 3557 * vrp: run parameters 3558 * 3559 * Return values: 3560 * 0: The run loop exited and no help is needed from vmd 3561 * EAGAIN: The run loop exited and help from vmd is needed 3562 * EINVAL: an error occurred 3563 */ 3564 int 3565 vcpu_run_vmx(struct vcpu *vcpu, struct vm_run_params *vrp) 3566 { 3567 int ret = 0, exitinfo; 3568 struct region_descriptor gdt; 3569 struct cpu_info *ci = NULL; 3570 uint64_t exit_reason, cr3, msr, insn_error; 3571 struct schedstate_percpu *spc; 3572 struct vmx_msr_store *msr_store; 3573 struct vmx_invvpid_descriptor vid; 3574 struct vmx_invept_descriptor vid_ept; 3575 uint64_t cr0, eii, procbased, int_st; 3576 u_long s; 3577 3578 rw_assert_wrlock(&vcpu->vc_lock); 3579 3580 if (vcpu_reload_vmcs_vmx(vcpu)) { 3581 printf("%s: failed (re)loading vmcs\n", __func__); 3582 return (EINVAL); 3583 } 3584 3585 /* 3586 * If we are returning from userspace (vmd) because we exited 3587 * last time, fix up any needed vcpu state first. Which state 3588 * needs to be fixed up depends on what vmd populated in the 3589 * exit data structure. 3590 */ 3591 if (vrp->vrp_intr_pending) 3592 vcpu->vc_intr = 1; 3593 else 3594 vcpu->vc_intr = 0; 3595 3596 switch (vcpu->vc_gueststate.vg_exit_reason) { 3597 case VMX_EXIT_IO: 3598 if (vcpu->vc_exit.vei.vei_dir == VEI_DIR_IN) 3599 vcpu->vc_gueststate.vg_rax = vcpu->vc_exit.vei.vei_data; 3600 vcpu->vc_gueststate.vg_rip = 3601 vcpu->vc_exit.vrs.vrs_gprs[VCPU_REGS_RIP]; 3602 if (vmwrite(VMCS_GUEST_IA32_RIP, vcpu->vc_gueststate.vg_rip)) { 3603 printf("%s: failed to update rip\n", __func__); 3604 return (EINVAL); 3605 } 3606 break; 3607 case VMX_EXIT_EPT_VIOLATION: 3608 ret = vcpu_writeregs_vmx(vcpu, VM_RWREGS_GPRS, 0, 3609 &vcpu->vc_exit.vrs); 3610 if (ret) { 3611 printf("%s: vm %d vcpu %d failed to update registers\n", 3612 __func__, vcpu->vc_parent->vm_id, vcpu->vc_id); 3613 return (EINVAL); 3614 } 3615 break; 3616 } 3617 memset(&vcpu->vc_exit, 0, sizeof(vcpu->vc_exit)); 3618 3619 /* Handle vmd(8) injected interrupts */ 3620 /* Is there an interrupt pending injection? */ 3621 if (vcpu->vc_inject.vie_type == VCPU_INJECT_INTR) { 3622 if (vmread(VMCS_GUEST_INTERRUPTIBILITY_ST, &int_st)) { 3623 printf("%s: can't get interruptibility state\n", 3624 __func__); 3625 return (EINVAL); 3626 } 3627 3628 /* Interruptibility state 0x3 covers NMIs and STI */ 3629 if (!(int_st & 0x3) && vcpu->vc_irqready) { 3630 eii = (uint64_t)vcpu->vc_inject.vie_vector; 3631 eii |= (1ULL << 31); /* Valid */ 3632 if (vmwrite(VMCS_ENTRY_INTERRUPTION_INFO, eii)) { 3633 printf("vcpu_run_vmx: can't vector " 3634 "interrupt to guest\n"); 3635 return (EINVAL); 3636 } 3637 3638 vcpu->vc_inject.vie_type = VCPU_INJECT_NONE; 3639 } 3640 } else if (!vcpu->vc_intr) { 3641 /* 3642 * Disable window exiting 3643 */ 3644 if (vmread(VMCS_PROCBASED_CTLS, &procbased)) { 3645 printf("%s: can't read procbased ctls on exit\n", 3646 __func__); 3647 return (EINVAL); 3648 } else { 3649 procbased &= ~IA32_VMX_INTERRUPT_WINDOW_EXITING; 3650 if (vmwrite(VMCS_PROCBASED_CTLS, procbased)) { 3651 printf("%s: can't write procbased ctls " 3652 "on exit\n", __func__); 3653 return (EINVAL); 3654 } 3655 } 3656 } 3657 3658 msr_store = (struct vmx_msr_store *)vcpu->vc_vmx_msr_exit_load_va; 3659 while (ret == 0) { 3660 #ifdef VMM_DEBUG 3661 paddr_t pa = 0ULL; 3662 vmptrst(&pa); 3663 KASSERT(pa == vcpu->vc_control_pa); 3664 #endif /* VMM_DEBUG */ 3665 3666 vmm_update_pvclock(vcpu); 3667 3668 if (ci != curcpu()) { 3669 ci = curcpu(); 3670 vcpu->vc_last_pcpu = ci; 3671 3672 /* We're now using this vcpu's EPT pmap on this cpu. */ 3673 atomic_swap_ptr(&ci->ci_ept_pmap, 3674 vcpu->vc_parent->vm_map->pmap); 3675 3676 /* Invalidate EPT cache. */ 3677 vid_ept.vid_reserved = 0; 3678 vid_ept.vid_eptp = vcpu->vc_parent->vm_map->pmap->eptp; 3679 if (invept(ci->ci_vmm_cap.vcc_vmx.vmx_invept_mode, 3680 &vid_ept)) { 3681 printf("%s: invept\n", __func__); 3682 return (EINVAL); 3683 } 3684 3685 /* Host CR3 */ 3686 cr3 = rcr3(); 3687 if (vmwrite(VMCS_HOST_IA32_CR3, cr3)) { 3688 printf("%s: vmwrite(0x%04X, 0x%llx)\n", __func__, 3689 VMCS_HOST_IA32_CR3, cr3); 3690 return (EINVAL); 3691 } 3692 3693 setregion(&gdt, ci->ci_gdt, GDT_SIZE - 1); 3694 if (gdt.rd_base == 0) { 3695 printf("%s: setregion\n", __func__); 3696 return (EINVAL); 3697 } 3698 3699 /* Host GDTR base */ 3700 if (vmwrite(VMCS_HOST_IA32_GDTR_BASE, gdt.rd_base)) { 3701 printf("%s: vmwrite(0x%04X, 0x%llx)\n", 3702 __func__, VMCS_HOST_IA32_GDTR_BASE, 3703 gdt.rd_base); 3704 return (EINVAL); 3705 } 3706 3707 /* Host TR base */ 3708 if (vmwrite(VMCS_HOST_IA32_TR_BASE, 3709 (uint64_t)ci->ci_tss)) { 3710 printf("%s: vmwrite(0x%04X, 0x%llx)\n", 3711 __func__, VMCS_HOST_IA32_TR_BASE, 3712 (uint64_t)ci->ci_tss); 3713 return (EINVAL); 3714 } 3715 3716 /* Host GS.base (aka curcpu) */ 3717 if (vmwrite(VMCS_HOST_IA32_GS_BASE, (uint64_t)ci)) { 3718 printf("%s: vmwrite(0x%04X, 0x%llx)\n", 3719 __func__, VMCS_HOST_IA32_GS_BASE, 3720 (uint64_t)ci); 3721 return (EINVAL); 3722 } 3723 3724 /* Host FS.base */ 3725 msr = rdmsr(MSR_FSBASE); 3726 if (vmwrite(VMCS_HOST_IA32_FS_BASE, msr)) { 3727 printf("%s: vmwrite(0x%04X, 0x%llx)\n", 3728 __func__, VMCS_HOST_IA32_FS_BASE, msr); 3729 return (EINVAL); 3730 } 3731 3732 /* Host KernelGS.base (userspace GS.base here) */ 3733 msr_store[VCPU_HOST_REGS_KGSBASE].vms_data = 3734 rdmsr(MSR_KERNELGSBASE); 3735 } 3736 3737 /* Inject event if present */ 3738 if (vcpu->vc_inject.vie_type == VCPU_INJECT_EX) { 3739 eii = (uint64_t)vcpu->vc_inject.vie_vector; 3740 eii |= (1ULL << 31); /* Valid */ 3741 3742 switch (vcpu->vc_inject.vie_vector) { 3743 case VMM_EX_BP: 3744 case VMM_EX_OF: 3745 /* Software Exceptions */ 3746 eii |= (4ULL << 8); 3747 break; 3748 case VMM_EX_DF: 3749 case VMM_EX_TS: 3750 case VMM_EX_NP: 3751 case VMM_EX_SS: 3752 case VMM_EX_GP: 3753 case VMM_EX_PF: 3754 case VMM_EX_AC: 3755 /* Hardware Exceptions */ 3756 eii |= (3ULL << 8); 3757 cr0 = 0; 3758 if (vmread(VMCS_GUEST_IA32_CR0, &cr0)) { 3759 printf("%s: vmread(VMCS_GUEST_IA32_CR0)" 3760 "\n", __func__); 3761 ret = EINVAL; 3762 break; 3763 } 3764 3765 /* Don't set error codes if in real mode. */ 3766 if (ret == EINVAL || !(cr0 & CR0_PE)) 3767 break; 3768 eii |= (1ULL << 11); 3769 3770 /* Enforce a 0 error code for #AC. */ 3771 if (vcpu->vc_inject.vie_vector == VMM_EX_AC) 3772 vcpu->vc_inject.vie_errorcode = 0; 3773 /* 3774 * XXX: Intel SDM says if IA32_VMX_BASIC[56] is 3775 * set, error codes can be injected for hw 3776 * exceptions with or without error code, 3777 * regardless of vector. See Vol 3D. A1. Ignore 3778 * this capability for now. 3779 */ 3780 if (vmwrite(VMCS_ENTRY_EXCEPTION_ERROR_CODE, 3781 vcpu->vc_inject.vie_errorcode)) { 3782 printf("%s: can't write error code to " 3783 "guest\n", __func__); 3784 ret = EINVAL; 3785 } 3786 } /* switch */ 3787 if (ret == EINVAL) 3788 break; 3789 3790 if (vmwrite(VMCS_ENTRY_INTERRUPTION_INFO, eii)) { 3791 printf("%s: can't vector event to guest\n", 3792 __func__); 3793 ret = EINVAL; 3794 break; 3795 } 3796 vcpu->vc_inject.vie_type = VCPU_INJECT_NONE; 3797 } 3798 3799 if (vcpu->vc_vmx_vpid_enabled) { 3800 /* Invalidate old TLB mappings */ 3801 vid.vid_vpid = vcpu->vc_vpid; 3802 vid.vid_addr = 0; 3803 invvpid(IA32_VMX_INVVPID_SINGLE_CTX_GLB, &vid); 3804 } 3805 3806 /* Start / resume the VCPU */ 3807 3808 /* Disable interrupts and save the current host FPU state. */ 3809 s = intr_disable(); 3810 if ((ret = vmm_fpurestore(vcpu))) { 3811 intr_restore(s); 3812 break; 3813 } 3814 3815 TRACEPOINT(vmm, guest_enter, vcpu, vrp); 3816 3817 /* 3818 * If we're resuming to a different VCPU and have IBPB, 3819 * then use it to prevent cross-VM branch-target injection. 3820 */ 3821 if (ci->ci_guest_vcpu != vcpu && 3822 (ci->ci_feature_sefflags_edx & SEFF0EDX_IBRS)) { 3823 wrmsr(MSR_PRED_CMD, PRED_CMD_IBPB); 3824 ci->ci_guest_vcpu = vcpu; 3825 } 3826 3827 /* Restore any guest PKRU state. */ 3828 if (vmm_softc->sc_md.pkru_enabled) 3829 wrpkru(0, vcpu->vc_pkru); 3830 3831 ret = vmx_enter_guest(&vcpu->vc_control_pa, 3832 &vcpu->vc_gueststate, 3833 (vcpu->vc_vmx_vmcs_state == VMCS_LAUNCHED), 3834 ci->ci_vmm_cap.vcc_vmx.vmx_has_l1_flush_msr); 3835 3836 /* Restore host PKRU state. */ 3837 if (vmm_softc->sc_md.pkru_enabled) { 3838 vcpu->vc_pkru = rdpkru(0); 3839 wrpkru(0, PGK_VALUE); 3840 } 3841 3842 /* 3843 * VM exit restores the GDT and IDT bases, but gives 3844 * them high limits. Reload with the correct limits here. 3845 * 'gdt' is set above first time through and reset there 3846 * whenever this thread switches CPU. 3847 */ 3848 bare_lgdt(&gdt); 3849 cpu_init_idt(); 3850 3851 /* 3852 * On exit, interrupts are disabled, and we are running with 3853 * the guest FPU state still possibly on the CPU. Save the FPU 3854 * state before re-enabling interrupts. 3855 */ 3856 vmm_fpusave(vcpu); 3857 intr_restore(s); 3858 3859 atomic_swap_uint(&vcpu->vc_vmx_vmcs_state, VMCS_LAUNCHED); 3860 exit_reason = VM_EXIT_NONE; 3861 3862 /* If we exited successfully ... */ 3863 if (ret == 0) { 3864 exitinfo = vmx_get_exit_info( 3865 &vcpu->vc_gueststate.vg_rip, &exit_reason); 3866 if (!(exitinfo & VMX_EXIT_INFO_HAVE_RIP)) { 3867 printf("%s: cannot read guest rip\n", __func__); 3868 ret = EINVAL; 3869 break; 3870 } 3871 if (!(exitinfo & VMX_EXIT_INFO_HAVE_REASON)) { 3872 printf("%s: can't read exit reason\n", 3873 __func__); 3874 ret = EINVAL; 3875 break; 3876 } 3877 vcpu->vc_gueststate.vg_exit_reason = exit_reason; 3878 TRACEPOINT(vmm, guest_exit, vcpu, vrp, exit_reason); 3879 3880 /* Update our state */ 3881 if (vmread(VMCS_GUEST_IA32_RFLAGS, 3882 &vcpu->vc_gueststate.vg_rflags)) { 3883 printf("%s: can't read guest rflags during " 3884 "exit\n", __func__); 3885 ret = EINVAL; 3886 break; 3887 } 3888 3889 /* 3890 * Handle the exit. This will alter "ret" to EAGAIN if 3891 * the exit handler determines help from vmd is needed. 3892 */ 3893 ret = vmx_handle_exit(vcpu); 3894 3895 if (vcpu->vc_gueststate.vg_rflags & PSL_I) 3896 vcpu->vc_irqready = 1; 3897 else 3898 vcpu->vc_irqready = 0; 3899 3900 /* 3901 * If not ready for interrupts, but interrupts pending, 3902 * enable interrupt window exiting. 3903 */ 3904 if (vcpu->vc_irqready == 0 && vcpu->vc_intr) { 3905 if (vmread(VMCS_PROCBASED_CTLS, &procbased)) { 3906 printf("%s: can't read procbased ctls " 3907 "on intwin exit\n", __func__); 3908 ret = EINVAL; 3909 break; 3910 } 3911 3912 procbased |= IA32_VMX_INTERRUPT_WINDOW_EXITING; 3913 if (vmwrite(VMCS_PROCBASED_CTLS, procbased)) { 3914 printf("%s: can't write procbased ctls " 3915 "on intwin exit\n", __func__); 3916 ret = EINVAL; 3917 break; 3918 } 3919 } 3920 3921 /* 3922 * Exit to vmd if we are terminating, failed to enter, 3923 * or need help (device I/O) 3924 */ 3925 if (ret || vcpu_must_stop(vcpu)) 3926 break; 3927 3928 if (vcpu->vc_intr && vcpu->vc_irqready) { 3929 ret = EAGAIN; 3930 break; 3931 } 3932 3933 /* Check if we should yield - don't hog the {p,v}pu */ 3934 spc = &ci->ci_schedstate; 3935 if (spc->spc_schedflags & SPCF_SHOULDYIELD) 3936 break; 3937 3938 } else { 3939 /* 3940 * We failed vmresume or vmlaunch for some reason, 3941 * typically due to invalid vmcs state or other 3942 * reasons documented in SDM Vol 3C 30.4. 3943 */ 3944 switch (ret) { 3945 case VMX_FAIL_LAUNCH_INVALID_VMCS: 3946 printf("%s: failed %s with invalid vmcs\n", 3947 __func__, 3948 (vcpu->vc_vmx_vmcs_state == VMCS_LAUNCHED 3949 ? "vmresume" : "vmlaunch")); 3950 break; 3951 case VMX_FAIL_LAUNCH_VALID_VMCS: 3952 printf("%s: failed %s with valid vmcs\n", 3953 __func__, 3954 (vcpu->vc_vmx_vmcs_state == VMCS_LAUNCHED 3955 ? "vmresume" : "vmlaunch")); 3956 break; 3957 default: 3958 printf("%s: failed %s for unknown reason\n", 3959 __func__, 3960 (vcpu->vc_vmx_vmcs_state == VMCS_LAUNCHED 3961 ? "vmresume" : "vmlaunch")); 3962 } 3963 3964 ret = EINVAL; 3965 3966 /* Try to translate a vmfail error code, if possible. */ 3967 if (vmread(VMCS_INSTRUCTION_ERROR, &insn_error)) { 3968 printf("%s: can't read insn error field\n", 3969 __func__); 3970 } else 3971 printf("%s: error code = %lld, %s\n", __func__, 3972 insn_error, 3973 vmx_instruction_error_decode(insn_error)); 3974 #ifdef VMM_DEBUG 3975 vmx_vcpu_dump_regs(vcpu); 3976 dump_vcpu(vcpu); 3977 #endif /* VMM_DEBUG */ 3978 } 3979 } 3980 3981 vcpu->vc_last_pcpu = curcpu(); 3982 3983 /* Copy the VCPU register state to the exit structure */ 3984 if (vcpu_readregs_vmx(vcpu, VM_RWREGS_ALL, 0, &vcpu->vc_exit.vrs)) 3985 ret = EINVAL; 3986 vcpu->vc_exit.cpl = vmm_get_guest_cpu_cpl(vcpu); 3987 3988 return (ret); 3989 } 3990 3991 /* 3992 * vmx_handle_intr 3993 * 3994 * Handle host (external) interrupts. We read which interrupt fired by 3995 * extracting the vector from the VMCS and dispatch the interrupt directly 3996 * to the host using vmm_dispatch_intr. 3997 */ 3998 void 3999 vmx_handle_intr(struct vcpu *vcpu) 4000 { 4001 uint8_t vec; 4002 uint64_t eii; 4003 struct gate_descriptor *idte; 4004 vaddr_t handler; 4005 4006 if (vmread(VMCS_EXIT_INTERRUPTION_INFO, &eii)) { 4007 printf("%s: can't obtain intr info\n", __func__); 4008 return; 4009 } 4010 4011 vec = eii & 0xFF; 4012 4013 /* XXX check "error valid" code in eii, abort if 0 */ 4014 idte=&idt[vec]; 4015 handler = idte->gd_looffset + ((uint64_t)idte->gd_hioffset << 16); 4016 vmm_dispatch_intr(handler); 4017 } 4018 4019 /* 4020 * svm_handle_hlt 4021 * 4022 * Handle HLT exits 4023 * 4024 * Parameters 4025 * vcpu: The VCPU that executed the HLT instruction 4026 * 4027 * Return Values: 4028 * EIO: The guest halted with interrupts disabled 4029 * EAGAIN: Normal return to vmd - vmd should halt scheduling this VCPU 4030 * until a virtual interrupt is ready to inject 4031 */ 4032 int 4033 svm_handle_hlt(struct vcpu *vcpu) 4034 { 4035 struct vmcb *vmcb = (struct vmcb *)vcpu->vc_control_va; 4036 uint64_t rflags = vmcb->v_rflags; 4037 4038 /* All HLT insns are 1 byte */ 4039 vcpu->vc_gueststate.vg_rip += 1; 4040 4041 if (!(rflags & PSL_I)) { 4042 DPRINTF("%s: guest halted with interrupts disabled\n", 4043 __func__); 4044 return (EIO); 4045 } 4046 4047 return (EAGAIN); 4048 } 4049 4050 /* 4051 * vmx_handle_hlt 4052 * 4053 * Handle HLT exits. HLTing the CPU with interrupts disabled will terminate 4054 * the guest (no NMIs handled) by returning EIO to vmd. 4055 * 4056 * Parameters: 4057 * vcpu: The VCPU that executed the HLT instruction 4058 * 4059 * Return Values: 4060 * EINVAL: An error occurred extracting information from the VMCS, or an 4061 * invalid HLT instruction was encountered 4062 * EIO: The guest halted with interrupts disabled 4063 * EAGAIN: Normal return to vmd - vmd should halt scheduling this VCPU 4064 * until a virtual interrupt is ready to inject 4065 * 4066 */ 4067 int 4068 vmx_handle_hlt(struct vcpu *vcpu) 4069 { 4070 uint64_t insn_length, rflags; 4071 4072 if (vmread(VMCS_INSTRUCTION_LENGTH, &insn_length)) { 4073 printf("%s: can't obtain instruction length\n", __func__); 4074 return (EINVAL); 4075 } 4076 4077 if (vmread(VMCS_GUEST_IA32_RFLAGS, &rflags)) { 4078 printf("%s: can't obtain guest rflags\n", __func__); 4079 return (EINVAL); 4080 } 4081 4082 if (insn_length != 1) { 4083 DPRINTF("%s: HLT with instruction length %lld not supported\n", 4084 __func__, insn_length); 4085 return (EINVAL); 4086 } 4087 4088 if (!(rflags & PSL_I)) { 4089 DPRINTF("%s: guest halted with interrupts disabled\n", 4090 __func__); 4091 return (EIO); 4092 } 4093 4094 vcpu->vc_gueststate.vg_rip += insn_length; 4095 return (EAGAIN); 4096 } 4097 4098 /* 4099 * vmx_get_exit_info 4100 * 4101 * Returns exit information containing the current guest RIP and exit reason 4102 * in rip and exit_reason. The return value is a bitmask indicating whether 4103 * reading the RIP and exit reason was successful. 4104 */ 4105 int 4106 vmx_get_exit_info(uint64_t *rip, uint64_t *exit_reason) 4107 { 4108 int rv = 0; 4109 4110 if (vmread(VMCS_GUEST_IA32_RIP, rip) == 0) { 4111 rv |= VMX_EXIT_INFO_HAVE_RIP; 4112 if (vmread(VMCS_EXIT_REASON, exit_reason) == 0) 4113 rv |= VMX_EXIT_INFO_HAVE_REASON; 4114 } 4115 return (rv); 4116 } 4117 4118 /* 4119 * svm_handle_exit 4120 * 4121 * Handle exits from the VM by decoding the exit reason and calling various 4122 * subhandlers as needed. 4123 */ 4124 int 4125 svm_handle_exit(struct vcpu *vcpu) 4126 { 4127 uint64_t exit_reason, rflags; 4128 int update_rip, ret = 0; 4129 struct vmcb *vmcb = (struct vmcb *)vcpu->vc_control_va; 4130 4131 update_rip = 0; 4132 exit_reason = vcpu->vc_gueststate.vg_exit_reason; 4133 rflags = vcpu->vc_gueststate.vg_rflags; 4134 4135 switch (exit_reason) { 4136 case SVM_VMEXIT_VINTR: 4137 if (!(rflags & PSL_I)) { 4138 DPRINTF("%s: impossible interrupt window exit " 4139 "config\n", __func__); 4140 ret = EINVAL; 4141 break; 4142 } 4143 4144 /* 4145 * Guest is now ready for interrupts, so disable interrupt 4146 * window exiting. 4147 */ 4148 vmcb->v_irq = 0; 4149 vmcb->v_intr_vector = 0; 4150 vmcb->v_intercept1 &= ~SVM_INTERCEPT_VINTR; 4151 svm_set_dirty(vcpu, SVM_CLEANBITS_TPR | SVM_CLEANBITS_I); 4152 4153 update_rip = 0; 4154 break; 4155 case SVM_VMEXIT_INTR: 4156 update_rip = 0; 4157 break; 4158 case SVM_VMEXIT_SHUTDOWN: 4159 update_rip = 0; 4160 ret = EAGAIN; 4161 break; 4162 case SVM_VMEXIT_NPF: 4163 ret = svm_handle_np_fault(vcpu); 4164 break; 4165 case SVM_VMEXIT_CPUID: 4166 ret = vmm_handle_cpuid(vcpu); 4167 update_rip = 1; 4168 break; 4169 case SVM_VMEXIT_MSR: 4170 ret = svm_handle_msr(vcpu); 4171 update_rip = 1; 4172 break; 4173 case SVM_VMEXIT_XSETBV: 4174 ret = svm_handle_xsetbv(vcpu); 4175 update_rip = 1; 4176 break; 4177 case SVM_VMEXIT_IOIO: 4178 if (svm_handle_inout(vcpu) == 0) 4179 ret = EAGAIN; 4180 break; 4181 case SVM_VMEXIT_HLT: 4182 ret = svm_handle_hlt(vcpu); 4183 update_rip = 1; 4184 break; 4185 case SVM_VMEXIT_MWAIT: 4186 case SVM_VMEXIT_MWAIT_CONDITIONAL: 4187 case SVM_VMEXIT_MONITOR: 4188 case SVM_VMEXIT_VMRUN: 4189 case SVM_VMEXIT_VMMCALL: 4190 case SVM_VMEXIT_VMLOAD: 4191 case SVM_VMEXIT_VMSAVE: 4192 case SVM_VMEXIT_STGI: 4193 case SVM_VMEXIT_CLGI: 4194 case SVM_VMEXIT_SKINIT: 4195 case SVM_VMEXIT_RDTSCP: 4196 case SVM_VMEXIT_ICEBP: 4197 case SVM_VMEXIT_INVLPGA: 4198 ret = vmm_inject_ud(vcpu); 4199 update_rip = 0; 4200 break; 4201 default: 4202 DPRINTF("%s: unhandled exit 0x%llx (pa=0x%llx)\n", __func__, 4203 exit_reason, (uint64_t)vcpu->vc_control_pa); 4204 return (EINVAL); 4205 } 4206 4207 if (update_rip) { 4208 vmcb->v_rip = vcpu->vc_gueststate.vg_rip; 4209 4210 if (rflags & PSL_T) { 4211 if (vmm_inject_db(vcpu)) { 4212 printf("%s: can't inject #DB exception to " 4213 "guest", __func__); 4214 return (EINVAL); 4215 } 4216 } 4217 } 4218 4219 /* Enable SVME in EFER (must always be set) */ 4220 vmcb->v_efer |= EFER_SVME; 4221 svm_set_dirty(vcpu, SVM_CLEANBITS_CR); 4222 4223 return (ret); 4224 } 4225 4226 /* 4227 * vmx_handle_exit 4228 * 4229 * Handle exits from the VM by decoding the exit reason and calling various 4230 * subhandlers as needed. 4231 */ 4232 int 4233 vmx_handle_exit(struct vcpu *vcpu) 4234 { 4235 uint64_t exit_reason, rflags, istate; 4236 int update_rip, ret = 0; 4237 4238 update_rip = 0; 4239 exit_reason = vcpu->vc_gueststate.vg_exit_reason; 4240 rflags = vcpu->vc_gueststate.vg_rflags; 4241 4242 switch (exit_reason) { 4243 case VMX_EXIT_INT_WINDOW: 4244 if (!(rflags & PSL_I)) { 4245 DPRINTF("%s: impossible interrupt window exit " 4246 "config\n", __func__); 4247 ret = EINVAL; 4248 break; 4249 } 4250 4251 ret = EAGAIN; 4252 update_rip = 0; 4253 break; 4254 case VMX_EXIT_EPT_VIOLATION: 4255 ret = vmx_handle_np_fault(vcpu); 4256 break; 4257 case VMX_EXIT_CPUID: 4258 ret = vmm_handle_cpuid(vcpu); 4259 update_rip = 1; 4260 break; 4261 case VMX_EXIT_IO: 4262 if (vmx_handle_inout(vcpu) == 0) 4263 ret = EAGAIN; 4264 break; 4265 case VMX_EXIT_EXTINT: 4266 vmx_handle_intr(vcpu); 4267 update_rip = 0; 4268 break; 4269 case VMX_EXIT_CR_ACCESS: 4270 ret = vmx_handle_cr(vcpu); 4271 update_rip = 1; 4272 break; 4273 case VMX_EXIT_HLT: 4274 ret = vmx_handle_hlt(vcpu); 4275 update_rip = 1; 4276 break; 4277 case VMX_EXIT_RDMSR: 4278 ret = vmx_handle_rdmsr(vcpu); 4279 update_rip = 1; 4280 break; 4281 case VMX_EXIT_WRMSR: 4282 ret = vmx_handle_wrmsr(vcpu); 4283 update_rip = 1; 4284 break; 4285 case VMX_EXIT_XSETBV: 4286 ret = vmx_handle_xsetbv(vcpu); 4287 update_rip = 1; 4288 break; 4289 case VMX_EXIT_MWAIT: 4290 case VMX_EXIT_MONITOR: 4291 case VMX_EXIT_VMXON: 4292 case VMX_EXIT_VMWRITE: 4293 case VMX_EXIT_VMREAD: 4294 case VMX_EXIT_VMLAUNCH: 4295 case VMX_EXIT_VMRESUME: 4296 case VMX_EXIT_VMPTRLD: 4297 case VMX_EXIT_VMPTRST: 4298 case VMX_EXIT_VMCLEAR: 4299 case VMX_EXIT_VMCALL: 4300 case VMX_EXIT_VMFUNC: 4301 case VMX_EXIT_VMXOFF: 4302 case VMX_EXIT_INVVPID: 4303 case VMX_EXIT_INVEPT: 4304 ret = vmm_inject_ud(vcpu); 4305 update_rip = 0; 4306 break; 4307 case VMX_EXIT_TRIPLE_FAULT: 4308 #ifdef VMM_DEBUG 4309 DPRINTF("%s: vm %d vcpu %d triple fault\n", __func__, 4310 vcpu->vc_parent->vm_id, vcpu->vc_id); 4311 vmx_vcpu_dump_regs(vcpu); 4312 dump_vcpu(vcpu); 4313 vmx_dump_vmcs(vcpu); 4314 #endif /* VMM_DEBUG */ 4315 ret = EAGAIN; 4316 update_rip = 0; 4317 break; 4318 default: 4319 #ifdef VMM_DEBUG 4320 DPRINTF("%s: unhandled exit 0x%llx (%s)\n", __func__, 4321 exit_reason, vmx_exit_reason_decode(exit_reason)); 4322 #endif /* VMM_DEBUG */ 4323 return (EINVAL); 4324 } 4325 4326 if (update_rip) { 4327 if (vmwrite(VMCS_GUEST_IA32_RIP, 4328 vcpu->vc_gueststate.vg_rip)) { 4329 printf("%s: can't advance rip\n", __func__); 4330 return (EINVAL); 4331 } 4332 4333 if (vmread(VMCS_GUEST_INTERRUPTIBILITY_ST, 4334 &istate)) { 4335 printf("%s: can't read interruptibility state\n", 4336 __func__); 4337 return (EINVAL); 4338 } 4339 4340 /* Interruptibility state 0x3 covers NMIs and STI */ 4341 istate &= ~0x3; 4342 4343 if (vmwrite(VMCS_GUEST_INTERRUPTIBILITY_ST, 4344 istate)) { 4345 printf("%s: can't write interruptibility state\n", 4346 __func__); 4347 return (EINVAL); 4348 } 4349 4350 if (rflags & PSL_T) { 4351 if (vmm_inject_db(vcpu)) { 4352 printf("%s: can't inject #DB exception to " 4353 "guest", __func__); 4354 return (EINVAL); 4355 } 4356 } 4357 } 4358 4359 return (ret); 4360 } 4361 4362 /* 4363 * vmm_inject_gp 4364 * 4365 * Injects an #GP exception into the guest VCPU. 4366 * 4367 * Parameters: 4368 * vcpu: vcpu to inject into 4369 * 4370 * Return values: 4371 * Always 0 4372 */ 4373 int 4374 vmm_inject_gp(struct vcpu *vcpu) 4375 { 4376 DPRINTF("%s: injecting #GP at guest %%rip 0x%llx\n", __func__, 4377 vcpu->vc_gueststate.vg_rip); 4378 vcpu->vc_inject.vie_vector = VMM_EX_GP; 4379 vcpu->vc_inject.vie_type = VCPU_INJECT_EX; 4380 vcpu->vc_inject.vie_errorcode = 0; 4381 4382 return (0); 4383 } 4384 4385 /* 4386 * vmm_inject_ud 4387 * 4388 * Injects an #UD exception into the guest VCPU. 4389 * 4390 * Parameters: 4391 * vcpu: vcpu to inject into 4392 * 4393 * Return values: 4394 * Always 0 4395 */ 4396 int 4397 vmm_inject_ud(struct vcpu *vcpu) 4398 { 4399 DPRINTF("%s: injecting #UD at guest %%rip 0x%llx\n", __func__, 4400 vcpu->vc_gueststate.vg_rip); 4401 vcpu->vc_inject.vie_vector = VMM_EX_UD; 4402 vcpu->vc_inject.vie_type = VCPU_INJECT_EX; 4403 vcpu->vc_inject.vie_errorcode = 0; 4404 4405 return (0); 4406 } 4407 4408 /* 4409 * vmm_inject_db 4410 * 4411 * Injects a #DB exception into the guest VCPU. 4412 * 4413 * Parameters: 4414 * vcpu: vcpu to inject into 4415 * 4416 * Return values: 4417 * Always 0 4418 */ 4419 int 4420 vmm_inject_db(struct vcpu *vcpu) 4421 { 4422 DPRINTF("%s: injecting #DB at guest %%rip 0x%llx\n", __func__, 4423 vcpu->vc_gueststate.vg_rip); 4424 vcpu->vc_inject.vie_vector = VMM_EX_DB; 4425 vcpu->vc_inject.vie_type = VCPU_INJECT_EX; 4426 vcpu->vc_inject.vie_errorcode = 0; 4427 4428 return (0); 4429 } 4430 4431 /* 4432 * vmm_get_guest_memtype 4433 * 4434 * Returns the type of memory 'gpa' refers to in the context of vm 'vm' 4435 */ 4436 int 4437 vmm_get_guest_memtype(struct vm *vm, paddr_t gpa) 4438 { 4439 int i; 4440 struct vm_mem_range *vmr; 4441 4442 /* XXX Use binary search? */ 4443 for (i = 0; i < vm->vm_nmemranges; i++) { 4444 vmr = &vm->vm_memranges[i]; 4445 4446 /* 4447 * vm_memranges are ascending. gpa can no longer be in one of 4448 * the memranges 4449 */ 4450 if (gpa < vmr->vmr_gpa) 4451 break; 4452 4453 if (gpa < vmr->vmr_gpa + vmr->vmr_size) { 4454 if (vmr->vmr_type == VM_MEM_MMIO) 4455 return (VMM_MEM_TYPE_MMIO); 4456 return (VMM_MEM_TYPE_REGULAR); 4457 } 4458 } 4459 4460 DPRINTF("guest memtype @ 0x%llx unknown\n", (uint64_t)gpa); 4461 return (VMM_MEM_TYPE_UNKNOWN); 4462 } 4463 4464 /* 4465 * vmx_get_exit_qualification 4466 * 4467 * Return the current VMCS' exit qualification information 4468 */ 4469 int 4470 vmx_get_exit_qualification(uint64_t *exit_qualification) 4471 { 4472 if (vmread(VMCS_GUEST_EXIT_QUALIFICATION, exit_qualification)) { 4473 printf("%s: can't extract exit qual\n", __func__); 4474 return (EINVAL); 4475 } 4476 4477 return (0); 4478 } 4479 4480 /* 4481 * vmx_get_guest_faulttype 4482 * 4483 * Determines the type (R/W/X) of the last fault on the VCPU last run on 4484 * this PCPU. 4485 */ 4486 int 4487 vmx_get_guest_faulttype(void) 4488 { 4489 uint64_t exit_qual; 4490 uint64_t presentmask = IA32_VMX_EPT_FAULT_WAS_READABLE | 4491 IA32_VMX_EPT_FAULT_WAS_WRITABLE | IA32_VMX_EPT_FAULT_WAS_EXECABLE; 4492 vm_prot_t prot, was_prot; 4493 4494 if (vmx_get_exit_qualification(&exit_qual)) 4495 return (-1); 4496 4497 if ((exit_qual & presentmask) == 0) 4498 return VM_FAULT_INVALID; 4499 4500 was_prot = 0; 4501 if (exit_qual & IA32_VMX_EPT_FAULT_WAS_READABLE) 4502 was_prot |= PROT_READ; 4503 if (exit_qual & IA32_VMX_EPT_FAULT_WAS_WRITABLE) 4504 was_prot |= PROT_WRITE; 4505 if (exit_qual & IA32_VMX_EPT_FAULT_WAS_EXECABLE) 4506 was_prot |= PROT_EXEC; 4507 4508 prot = 0; 4509 if (exit_qual & IA32_VMX_EPT_FAULT_READ) 4510 prot = PROT_READ; 4511 else if (exit_qual & IA32_VMX_EPT_FAULT_WRITE) 4512 prot = PROT_WRITE; 4513 else if (exit_qual & IA32_VMX_EPT_FAULT_EXEC) 4514 prot = PROT_EXEC; 4515 4516 if ((was_prot & prot) == 0) 4517 return VM_FAULT_PROTECT; 4518 4519 return (-1); 4520 } 4521 4522 /* 4523 * svm_get_guest_faulttype 4524 * 4525 * Determines the type (R/W/X) of the last fault on the VCPU last run on 4526 * this PCPU. 4527 */ 4528 int 4529 svm_get_guest_faulttype(struct vmcb *vmcb) 4530 { 4531 if (!(vmcb->v_exitinfo1 & 0x1)) 4532 return VM_FAULT_INVALID; 4533 return VM_FAULT_PROTECT; 4534 } 4535 4536 /* 4537 * svm_fault_page 4538 * 4539 * Request a new page to be faulted into the UVM map of the VM owning 'vcpu' 4540 * at address 'gpa'. 4541 */ 4542 int 4543 svm_fault_page(struct vcpu *vcpu, paddr_t gpa) 4544 { 4545 paddr_t pa = trunc_page(gpa); 4546 int ret; 4547 4548 ret = uvm_fault_wire(vcpu->vc_parent->vm_map, pa, pa + PAGE_SIZE, 4549 PROT_READ | PROT_WRITE | PROT_EXEC); 4550 if (ret) 4551 printf("%s: uvm_fault returns %d, GPA=0x%llx, rip=0x%llx\n", 4552 __func__, ret, (uint64_t)gpa, vcpu->vc_gueststate.vg_rip); 4553 4554 return (ret); 4555 } 4556 4557 /* 4558 * svm_handle_np_fault 4559 * 4560 * High level nested paging handler for SVM. Verifies that a fault is for a 4561 * valid memory region, then faults a page, or aborts otherwise. 4562 */ 4563 int 4564 svm_handle_np_fault(struct vcpu *vcpu) 4565 { 4566 uint64_t gpa; 4567 int gpa_memtype, ret = 0; 4568 struct vmcb *vmcb = (struct vmcb *)vcpu->vc_control_va; 4569 struct vm_exit_eptviolation *vee = &vcpu->vc_exit.vee; 4570 struct cpu_info *ci = curcpu(); 4571 4572 memset(vee, 0, sizeof(*vee)); 4573 4574 gpa = vmcb->v_exitinfo2; 4575 4576 gpa_memtype = vmm_get_guest_memtype(vcpu->vc_parent, gpa); 4577 switch (gpa_memtype) { 4578 case VMM_MEM_TYPE_REGULAR: 4579 vee->vee_fault_type = VEE_FAULT_HANDLED; 4580 ret = svm_fault_page(vcpu, gpa); 4581 break; 4582 case VMM_MEM_TYPE_MMIO: 4583 vee->vee_fault_type = VEE_FAULT_MMIO_ASSIST; 4584 if (ci->ci_vmm_cap.vcc_svm.svm_decode_assist) { 4585 vee->vee_insn_len = vmcb->v_n_bytes_fetched; 4586 memcpy(&vee->vee_insn_bytes, vmcb->v_guest_ins_bytes, 4587 sizeof(vee->vee_insn_bytes)); 4588 vee->vee_insn_info |= VEE_BYTES_VALID; 4589 } 4590 ret = EAGAIN; 4591 break; 4592 default: 4593 printf("%s: unknown memory type %d for GPA 0x%llx\n", 4594 __func__, gpa_memtype, gpa); 4595 return (EINVAL); 4596 } 4597 4598 return (ret); 4599 } 4600 4601 /* 4602 * vmx_fault_page 4603 * 4604 * Request a new page to be faulted into the UVM map of the VM owning 'vcpu' 4605 * at address 'gpa'. 4606 * 4607 * Parameters: 4608 * vcpu: guest VCPU requiring the page to be faulted into the UVM map 4609 * gpa: guest physical address that triggered the fault 4610 * 4611 * Return Values: 4612 * 0: if successful 4613 * EINVAL: if fault type could not be determined or VMCS reload fails 4614 * EAGAIN: if a protection fault occurred, ie writing to a read-only page 4615 * errno: if uvm_fault_wire() fails to wire in the page 4616 */ 4617 int 4618 vmx_fault_page(struct vcpu *vcpu, paddr_t gpa) 4619 { 4620 int fault_type, ret; 4621 paddr_t pa = trunc_page(gpa); 4622 4623 fault_type = vmx_get_guest_faulttype(); 4624 switch (fault_type) { 4625 case -1: 4626 printf("%s: invalid fault type\n", __func__); 4627 return (EINVAL); 4628 case VM_FAULT_PROTECT: 4629 vcpu->vc_exit.vee.vee_fault_type = VEE_FAULT_PROTECT; 4630 return (EAGAIN); 4631 default: 4632 vcpu->vc_exit.vee.vee_fault_type = VEE_FAULT_HANDLED; 4633 break; 4634 } 4635 4636 /* We may sleep during uvm_fault_wire(), so reload VMCS. */ 4637 vcpu->vc_last_pcpu = curcpu(); 4638 ret = uvm_fault_wire(vcpu->vc_parent->vm_map, pa, pa + PAGE_SIZE, 4639 PROT_READ | PROT_WRITE | PROT_EXEC); 4640 if (vcpu_reload_vmcs_vmx(vcpu)) { 4641 printf("%s: failed to reload vmcs\n", __func__); 4642 return (EINVAL); 4643 } 4644 4645 if (ret) 4646 printf("%s: uvm_fault returns %d, GPA=0x%llx, rip=0x%llx\n", 4647 __func__, ret, (uint64_t)gpa, vcpu->vc_gueststate.vg_rip); 4648 4649 return (ret); 4650 } 4651 4652 /* 4653 * vmx_handle_np_fault 4654 * 4655 * High level nested paging handler for VMX. Verifies that a fault is for a 4656 * valid memory region, then faults a page, or aborts otherwise. 4657 */ 4658 int 4659 vmx_handle_np_fault(struct vcpu *vcpu) 4660 { 4661 uint64_t insn_len = 0, gpa; 4662 int gpa_memtype, ret = 0; 4663 struct vm_exit_eptviolation *vee = &vcpu->vc_exit.vee; 4664 4665 memset(vee, 0, sizeof(*vee)); 4666 4667 if (vmread(VMCS_GUEST_PHYSICAL_ADDRESS, &gpa)) { 4668 printf("%s: cannot extract faulting pa\n", __func__); 4669 return (EINVAL); 4670 } 4671 4672 gpa_memtype = vmm_get_guest_memtype(vcpu->vc_parent, gpa); 4673 switch (gpa_memtype) { 4674 case VMM_MEM_TYPE_REGULAR: 4675 vee->vee_fault_type = VEE_FAULT_HANDLED; 4676 ret = vmx_fault_page(vcpu, gpa); 4677 break; 4678 case VMM_MEM_TYPE_MMIO: 4679 vee->vee_fault_type = VEE_FAULT_MMIO_ASSIST; 4680 if (vmread(VMCS_INSTRUCTION_LENGTH, &insn_len) || 4681 insn_len == 0 || insn_len > 15) { 4682 printf("%s: failed to extract instruction length\n", 4683 __func__); 4684 ret = EINVAL; 4685 } else { 4686 vee->vee_insn_len = (uint32_t)insn_len; 4687 vee->vee_insn_info |= VEE_LEN_VALID; 4688 ret = EAGAIN; 4689 } 4690 break; 4691 default: 4692 printf("%s: unknown memory type %d for GPA 0x%llx\n", 4693 __func__, gpa_memtype, gpa); 4694 return (EINVAL); 4695 } 4696 4697 return (ret); 4698 } 4699 4700 /* 4701 * vmm_get_guest_cpu_cpl 4702 * 4703 * Determines current CPL of 'vcpu'. On VMX/Intel, this is gathered from the 4704 * VMCS field for the DPL of SS (this seems odd, but is documented that way 4705 * in the SDM). For SVM/AMD, this is gathered directly from the VMCB's 'cpl' 4706 * field, as per the APM. 4707 * 4708 * Parameters: 4709 * vcpu: guest VCPU for which CPL is to be checked 4710 * 4711 * Return Values: 4712 * -1: the CPL could not be determined 4713 * 0-3 indicating the current CPL. For real mode operation, 0 is returned. 4714 */ 4715 int 4716 vmm_get_guest_cpu_cpl(struct vcpu *vcpu) 4717 { 4718 int mode; 4719 struct vmcb *vmcb; 4720 uint64_t ss_ar; 4721 4722 mode = vmm_get_guest_cpu_mode(vcpu); 4723 4724 if (mode == VMM_CPU_MODE_UNKNOWN) 4725 return (-1); 4726 4727 if (mode == VMM_CPU_MODE_REAL) 4728 return (0); 4729 4730 if (vmm_softc->mode == VMM_MODE_RVI) { 4731 vmcb = (struct vmcb *)vcpu->vc_control_va; 4732 return (vmcb->v_cpl); 4733 } else if (vmm_softc->mode == VMM_MODE_EPT) { 4734 if (vmread(VMCS_GUEST_IA32_SS_AR, &ss_ar)) 4735 return (-1); 4736 return ((ss_ar & 0x60) >> 5); 4737 } else 4738 return (-1); 4739 } 4740 4741 /* 4742 * vmm_get_guest_cpu_mode 4743 * 4744 * Determines current CPU mode of 'vcpu'. 4745 * 4746 * Parameters: 4747 * vcpu: guest VCPU for which mode is to be checked 4748 * 4749 * Return Values: 4750 * One of VMM_CPU_MODE_*, or VMM_CPU_MODE_UNKNOWN if the mode could not be 4751 * ascertained. 4752 */ 4753 int 4754 vmm_get_guest_cpu_mode(struct vcpu *vcpu) 4755 { 4756 uint64_t cr0, efer, cs_ar; 4757 uint8_t l, dib; 4758 struct vmcb *vmcb; 4759 struct vmx_msr_store *msr_store; 4760 4761 if (vmm_softc->mode == VMM_MODE_RVI) { 4762 vmcb = (struct vmcb *)vcpu->vc_control_va; 4763 cr0 = vmcb->v_cr0; 4764 efer = vmcb->v_efer; 4765 cs_ar = vmcb->v_cs.vs_attr; 4766 cs_ar = (cs_ar & 0xff) | ((cs_ar << 4) & 0xf000); 4767 } else if (vmm_softc->mode == VMM_MODE_EPT) { 4768 if (vmread(VMCS_GUEST_IA32_CR0, &cr0)) 4769 return (VMM_CPU_MODE_UNKNOWN); 4770 if (vmread(VMCS_GUEST_IA32_CS_AR, &cs_ar)) 4771 return (VMM_CPU_MODE_UNKNOWN); 4772 msr_store = 4773 (struct vmx_msr_store *)vcpu->vc_vmx_msr_exit_save_va; 4774 efer = msr_store[VCPU_REGS_EFER].vms_data; 4775 } else 4776 return (VMM_CPU_MODE_UNKNOWN); 4777 4778 l = (cs_ar & 0x2000) >> 13; 4779 dib = (cs_ar & 0x4000) >> 14; 4780 4781 /* Check CR0.PE */ 4782 if (!(cr0 & CR0_PE)) 4783 return (VMM_CPU_MODE_REAL); 4784 4785 /* Check EFER */ 4786 if (efer & EFER_LMA) { 4787 /* Could be compat or long mode, check CS.L */ 4788 if (l) 4789 return (VMM_CPU_MODE_LONG); 4790 else 4791 return (VMM_CPU_MODE_COMPAT); 4792 } 4793 4794 /* Check prot vs prot32 */ 4795 if (dib) 4796 return (VMM_CPU_MODE_PROT32); 4797 else 4798 return (VMM_CPU_MODE_PROT); 4799 } 4800 4801 /* 4802 * svm_handle_inout 4803 * 4804 * Exit handler for IN/OUT instructions. 4805 * 4806 * Parameters: 4807 * vcpu: The VCPU where the IN/OUT instruction occurred 4808 * 4809 * Return values: 4810 * 0: if successful 4811 * EINVAL: an invalid IN/OUT instruction was encountered 4812 */ 4813 int 4814 svm_handle_inout(struct vcpu *vcpu) 4815 { 4816 uint64_t insn_length, exit_qual; 4817 struct vmcb *vmcb = (struct vmcb *)vcpu->vc_control_va; 4818 4819 insn_length = vmcb->v_exitinfo2 - vmcb->v_rip; 4820 exit_qual = vmcb->v_exitinfo1; 4821 4822 /* Bit 0 - direction */ 4823 if (exit_qual & 0x1) 4824 vcpu->vc_exit.vei.vei_dir = VEI_DIR_IN; 4825 else 4826 vcpu->vc_exit.vei.vei_dir = VEI_DIR_OUT; 4827 /* Bit 2 - string instruction? */ 4828 vcpu->vc_exit.vei.vei_string = (exit_qual & 0x4) >> 2; 4829 /* Bit 3 - REP prefix? */ 4830 vcpu->vc_exit.vei.vei_rep = (exit_qual & 0x8) >> 3; 4831 4832 /* Bits 4:6 - size of exit */ 4833 if (exit_qual & 0x10) 4834 vcpu->vc_exit.vei.vei_size = 1; 4835 else if (exit_qual & 0x20) 4836 vcpu->vc_exit.vei.vei_size = 2; 4837 else if (exit_qual & 0x40) 4838 vcpu->vc_exit.vei.vei_size = 4; 4839 4840 /* Bit 16:31 - port */ 4841 vcpu->vc_exit.vei.vei_port = (exit_qual & 0xFFFF0000) >> 16; 4842 /* Data */ 4843 vcpu->vc_exit.vei.vei_data = vmcb->v_rax; 4844 4845 vcpu->vc_exit.vei.vei_insn_len = (uint8_t)insn_length; 4846 4847 TRACEPOINT(vmm, inout, vcpu, vcpu->vc_exit.vei.vei_port, 4848 vcpu->vc_exit.vei.vei_dir, vcpu->vc_exit.vei.vei_data); 4849 4850 return (0); 4851 } 4852 4853 /* 4854 * vmx_handle_inout 4855 * 4856 * Exit handler for IN/OUT instructions. 4857 * 4858 * Parameters: 4859 * vcpu: The VCPU where the IN/OUT instruction occurred 4860 * 4861 * Return values: 4862 * 0: if successful 4863 * EINVAL: invalid IN/OUT instruction or vmread failures occurred 4864 */ 4865 int 4866 vmx_handle_inout(struct vcpu *vcpu) 4867 { 4868 uint64_t insn_length, exit_qual; 4869 4870 if (vmread(VMCS_INSTRUCTION_LENGTH, &insn_length)) { 4871 printf("%s: can't obtain instruction length\n", __func__); 4872 return (EINVAL); 4873 } 4874 4875 if (vmx_get_exit_qualification(&exit_qual)) { 4876 printf("%s: can't get exit qual\n", __func__); 4877 return (EINVAL); 4878 } 4879 4880 /* Bits 0:2 - size of exit */ 4881 vcpu->vc_exit.vei.vei_size = (exit_qual & 0x7) + 1; 4882 /* Bit 3 - direction */ 4883 if ((exit_qual & 0x8) >> 3) 4884 vcpu->vc_exit.vei.vei_dir = VEI_DIR_IN; 4885 else 4886 vcpu->vc_exit.vei.vei_dir = VEI_DIR_OUT; 4887 /* Bit 4 - string instruction? */ 4888 vcpu->vc_exit.vei.vei_string = (exit_qual & 0x10) >> 4; 4889 /* Bit 5 - REP prefix? */ 4890 vcpu->vc_exit.vei.vei_rep = (exit_qual & 0x20) >> 5; 4891 /* Bit 6 - Operand encoding */ 4892 vcpu->vc_exit.vei.vei_encoding = (exit_qual & 0x40) >> 6; 4893 /* Bit 16:31 - port */ 4894 vcpu->vc_exit.vei.vei_port = (exit_qual & 0xFFFF0000) >> 16; 4895 /* Data */ 4896 vcpu->vc_exit.vei.vei_data = (uint32_t)vcpu->vc_gueststate.vg_rax; 4897 4898 vcpu->vc_exit.vei.vei_insn_len = (uint8_t)insn_length; 4899 4900 TRACEPOINT(vmm, inout, vcpu, vcpu->vc_exit.vei.vei_port, 4901 vcpu->vc_exit.vei.vei_dir, vcpu->vc_exit.vei.vei_data); 4902 4903 return (0); 4904 } 4905 4906 /* 4907 * vmx_load_pdptes 4908 * 4909 * Update the PDPTEs in the VMCS with the values currently indicated by the 4910 * guest CR3. This is used for 32-bit PAE guests when enabling paging. 4911 * 4912 * Parameters 4913 * vcpu: The vcpu whose PDPTEs should be loaded 4914 * 4915 * Return values: 4916 * 0: if successful 4917 * EINVAL: if the PDPTEs could not be loaded 4918 * ENOMEM: memory allocation failure 4919 */ 4920 int 4921 vmx_load_pdptes(struct vcpu *vcpu) 4922 { 4923 uint64_t cr3, cr3_host_phys; 4924 vaddr_t cr3_host_virt; 4925 pd_entry_t *pdptes; 4926 int ret; 4927 4928 if (vmread(VMCS_GUEST_IA32_CR3, &cr3)) { 4929 printf("%s: can't read guest cr3\n", __func__); 4930 return (EINVAL); 4931 } 4932 4933 if (!pmap_extract(vcpu->vc_parent->vm_map->pmap, (vaddr_t)cr3, 4934 (paddr_t *)&cr3_host_phys)) { 4935 DPRINTF("%s: nonmapped guest CR3, setting PDPTEs to 0\n", 4936 __func__); 4937 if (vmwrite(VMCS_GUEST_PDPTE0, 0)) { 4938 printf("%s: can't write guest PDPTE0\n", __func__); 4939 return (EINVAL); 4940 } 4941 4942 if (vmwrite(VMCS_GUEST_PDPTE1, 0)) { 4943 printf("%s: can't write guest PDPTE1\n", __func__); 4944 return (EINVAL); 4945 } 4946 4947 if (vmwrite(VMCS_GUEST_PDPTE2, 0)) { 4948 printf("%s: can't write guest PDPTE2\n", __func__); 4949 return (EINVAL); 4950 } 4951 4952 if (vmwrite(VMCS_GUEST_PDPTE3, 0)) { 4953 printf("%s: can't write guest PDPTE3\n", __func__); 4954 return (EINVAL); 4955 } 4956 return (0); 4957 } 4958 4959 ret = 0; 4960 4961 /* We may sleep during km_alloc(9), so reload VMCS. */ 4962 vcpu->vc_last_pcpu = curcpu(); 4963 cr3_host_virt = (vaddr_t)km_alloc(PAGE_SIZE, &kv_any, &kp_none, 4964 &kd_waitok); 4965 if (vcpu_reload_vmcs_vmx(vcpu)) { 4966 printf("%s: failed to reload vmcs\n", __func__); 4967 ret = EINVAL; 4968 goto exit; 4969 } 4970 4971 if (!cr3_host_virt) { 4972 printf("%s: can't allocate address for guest CR3 mapping\n", 4973 __func__); 4974 return (ENOMEM); 4975 } 4976 4977 pmap_kenter_pa(cr3_host_virt, cr3_host_phys, PROT_READ); 4978 4979 pdptes = (pd_entry_t *)cr3_host_virt; 4980 if (vmwrite(VMCS_GUEST_PDPTE0, pdptes[0])) { 4981 printf("%s: can't write guest PDPTE0\n", __func__); 4982 ret = EINVAL; 4983 goto exit; 4984 } 4985 4986 if (vmwrite(VMCS_GUEST_PDPTE1, pdptes[1])) { 4987 printf("%s: can't write guest PDPTE1\n", __func__); 4988 ret = EINVAL; 4989 goto exit; 4990 } 4991 4992 if (vmwrite(VMCS_GUEST_PDPTE2, pdptes[2])) { 4993 printf("%s: can't write guest PDPTE2\n", __func__); 4994 ret = EINVAL; 4995 goto exit; 4996 } 4997 4998 if (vmwrite(VMCS_GUEST_PDPTE3, pdptes[3])) { 4999 printf("%s: can't write guest PDPTE3\n", __func__); 5000 ret = EINVAL; 5001 goto exit; 5002 } 5003 5004 exit: 5005 pmap_kremove(cr3_host_virt, PAGE_SIZE); 5006 5007 /* km_free(9) might sleep, so we need to reload VMCS. */ 5008 vcpu->vc_last_pcpu = curcpu(); 5009 km_free((void *)cr3_host_virt, PAGE_SIZE, &kv_any, &kp_none); 5010 if (vcpu_reload_vmcs_vmx(vcpu)) { 5011 printf("%s: failed to reload vmcs after km_free\n", __func__); 5012 ret = EINVAL; 5013 } 5014 5015 return (ret); 5016 } 5017 5018 /* 5019 * vmx_handle_cr0_write 5020 * 5021 * Write handler for CR0. This function ensures valid values are written into 5022 * CR0 for the cpu/vmm mode in use (cr0 must-be-0 and must-be-1 bits, etc). 5023 * 5024 * Parameters 5025 * vcpu: The vcpu taking the cr0 write exit 5026 * r: The guest's desired (incoming) cr0 value 5027 * 5028 * Return values: 5029 * 0: if successful 5030 * EINVAL: if an error occurred 5031 */ 5032 int 5033 vmx_handle_cr0_write(struct vcpu *vcpu, uint64_t r) 5034 { 5035 struct vmx_msr_store *msr_store; 5036 struct vmx_invvpid_descriptor vid; 5037 uint64_t ectls, oldcr0, cr4, mask; 5038 int ret; 5039 5040 /* Check must-be-0 bits */ 5041 mask = vcpu->vc_vmx_cr0_fixed1; 5042 if (~r & mask) { 5043 /* Inject #GP, let the guest handle it */ 5044 DPRINTF("%s: guest set invalid bits in %%cr0. Zeros " 5045 "mask=0x%llx, data=0x%llx\n", __func__, 5046 vcpu->vc_vmx_cr0_fixed1, r); 5047 vmm_inject_gp(vcpu); 5048 return (0); 5049 } 5050 5051 /* Check must-be-1 bits */ 5052 mask = vcpu->vc_vmx_cr0_fixed0; 5053 if ((r & mask) != mask) { 5054 /* Inject #GP, let the guest handle it */ 5055 DPRINTF("%s: guest set invalid bits in %%cr0. Ones " 5056 "mask=0x%llx, data=0x%llx\n", __func__, 5057 vcpu->vc_vmx_cr0_fixed0, r); 5058 vmm_inject_gp(vcpu); 5059 return (0); 5060 } 5061 5062 if (r & 0xFFFFFFFF00000000ULL) { 5063 DPRINTF("%s: setting bits 63:32 of %%cr0 is invalid," 5064 " inject #GP, cr0=0x%llx\n", __func__, r); 5065 vmm_inject_gp(vcpu); 5066 return (0); 5067 } 5068 5069 if ((r & CR0_PG) && (r & CR0_PE) == 0) { 5070 DPRINTF("%s: PG flag set when the PE flag is clear," 5071 " inject #GP, cr0=0x%llx\n", __func__, r); 5072 vmm_inject_gp(vcpu); 5073 return (0); 5074 } 5075 5076 if ((r & CR0_NW) && (r & CR0_CD) == 0) { 5077 DPRINTF("%s: NW flag set when the CD flag is clear," 5078 " inject #GP, cr0=0x%llx\n", __func__, r); 5079 vmm_inject_gp(vcpu); 5080 return (0); 5081 } 5082 5083 if (vmread(VMCS_GUEST_IA32_CR0, &oldcr0)) { 5084 printf("%s: can't read guest cr0\n", __func__); 5085 return (EINVAL); 5086 } 5087 5088 /* CR0 must always have NE set */ 5089 r |= CR0_NE; 5090 5091 if (vmwrite(VMCS_GUEST_IA32_CR0, r)) { 5092 printf("%s: can't write guest cr0\n", __func__); 5093 return (EINVAL); 5094 } 5095 5096 /* If the guest hasn't enabled paging ... */ 5097 if (!(r & CR0_PG) && (oldcr0 & CR0_PG)) { 5098 /* Paging was disabled (prev. enabled) - Flush TLB */ 5099 if (vcpu->vc_vmx_vpid_enabled) { 5100 vid.vid_vpid = vcpu->vc_vpid; 5101 vid.vid_addr = 0; 5102 invvpid(IA32_VMX_INVVPID_SINGLE_CTX_GLB, &vid); 5103 } 5104 } else if (!(oldcr0 & CR0_PG) && (r & CR0_PG)) { 5105 /* 5106 * Since the guest has enabled paging, then the IA32_VMX_IA32E_MODE_GUEST 5107 * control must be set to the same as EFER_LME. 5108 */ 5109 msr_store = (struct vmx_msr_store *)vcpu->vc_vmx_msr_exit_save_va; 5110 5111 if (vmread(VMCS_ENTRY_CTLS, &ectls)) { 5112 printf("%s: can't read entry controls", __func__); 5113 return (EINVAL); 5114 } 5115 5116 if (msr_store[VCPU_REGS_EFER].vms_data & EFER_LME) 5117 ectls |= IA32_VMX_IA32E_MODE_GUEST; 5118 else 5119 ectls &= ~IA32_VMX_IA32E_MODE_GUEST; 5120 5121 if (vmwrite(VMCS_ENTRY_CTLS, ectls)) { 5122 printf("%s: can't write entry controls", __func__); 5123 return (EINVAL); 5124 } 5125 5126 if (vmread(VMCS_GUEST_IA32_CR4, &cr4)) { 5127 printf("%s: can't read guest cr4\n", __func__); 5128 return (EINVAL); 5129 } 5130 5131 /* Load PDPTEs if PAE guest enabling paging */ 5132 if (cr4 & CR4_PAE) { 5133 ret = vmx_load_pdptes(vcpu); 5134 5135 if (ret) { 5136 printf("%s: updating PDPTEs failed\n", __func__); 5137 return (ret); 5138 } 5139 } 5140 } 5141 5142 return (0); 5143 } 5144 5145 /* 5146 * vmx_handle_cr4_write 5147 * 5148 * Write handler for CR4. This function ensures valid values are written into 5149 * CR4 for the cpu/vmm mode in use (cr4 must-be-0 and must-be-1 bits, etc). 5150 * 5151 * Parameters 5152 * vcpu: The vcpu taking the cr4 write exit 5153 * r: The guest's desired (incoming) cr4 value 5154 * 5155 * Return values: 5156 * 0: if successful 5157 * EINVAL: if an error occurred 5158 */ 5159 int 5160 vmx_handle_cr4_write(struct vcpu *vcpu, uint64_t r) 5161 { 5162 uint64_t mask; 5163 5164 /* Check must-be-0 bits */ 5165 mask = ~(curcpu()->ci_vmm_cap.vcc_vmx.vmx_cr4_fixed1); 5166 if (r & mask) { 5167 /* Inject #GP, let the guest handle it */ 5168 DPRINTF("%s: guest set invalid bits in %%cr4. Zeros " 5169 "mask=0x%llx, data=0x%llx\n", __func__, 5170 curcpu()->ci_vmm_cap.vcc_vmx.vmx_cr4_fixed1, 5171 r); 5172 vmm_inject_gp(vcpu); 5173 return (0); 5174 } 5175 5176 /* Check must-be-1 bits */ 5177 mask = curcpu()->ci_vmm_cap.vcc_vmx.vmx_cr4_fixed0; 5178 if ((r & mask) != mask) { 5179 /* Inject #GP, let the guest handle it */ 5180 DPRINTF("%s: guest set invalid bits in %%cr4. Ones " 5181 "mask=0x%llx, data=0x%llx\n", __func__, 5182 curcpu()->ci_vmm_cap.vcc_vmx.vmx_cr4_fixed0, 5183 r); 5184 vmm_inject_gp(vcpu); 5185 return (0); 5186 } 5187 5188 /* CR4_VMXE must always be enabled */ 5189 r |= CR4_VMXE; 5190 5191 if (vmwrite(VMCS_GUEST_IA32_CR4, r)) { 5192 printf("%s: can't write guest cr4\n", __func__); 5193 return (EINVAL); 5194 } 5195 5196 return (0); 5197 } 5198 5199 /* 5200 * vmx_handle_cr 5201 * 5202 * Handle reads/writes to control registers (except CR3) 5203 */ 5204 int 5205 vmx_handle_cr(struct vcpu *vcpu) 5206 { 5207 uint64_t insn_length, exit_qual, r; 5208 uint8_t crnum, dir, reg; 5209 5210 if (vmread(VMCS_INSTRUCTION_LENGTH, &insn_length)) { 5211 printf("%s: can't obtain instruction length\n", __func__); 5212 return (EINVAL); 5213 } 5214 5215 if (vmx_get_exit_qualification(&exit_qual)) { 5216 printf("%s: can't get exit qual\n", __func__); 5217 return (EINVAL); 5218 } 5219 5220 /* Low 4 bits of exit_qual represent the CR number */ 5221 crnum = exit_qual & 0xf; 5222 5223 /* 5224 * Bits 5:4 indicate the direction of operation (or special CR-modifying 5225 * instruction) 5226 */ 5227 dir = (exit_qual & 0x30) >> 4; 5228 5229 /* Bits 11:8 encode the source/target register */ 5230 reg = (exit_qual & 0xf00) >> 8; 5231 5232 switch (dir) { 5233 case CR_WRITE: 5234 if (crnum == 0 || crnum == 4) { 5235 switch (reg) { 5236 case 0: r = vcpu->vc_gueststate.vg_rax; break; 5237 case 1: r = vcpu->vc_gueststate.vg_rcx; break; 5238 case 2: r = vcpu->vc_gueststate.vg_rdx; break; 5239 case 3: r = vcpu->vc_gueststate.vg_rbx; break; 5240 case 4: if (vmread(VMCS_GUEST_IA32_RSP, &r)) { 5241 printf("%s: unable to read guest " 5242 "RSP\n", __func__); 5243 return (EINVAL); 5244 } 5245 break; 5246 case 5: r = vcpu->vc_gueststate.vg_rbp; break; 5247 case 6: r = vcpu->vc_gueststate.vg_rsi; break; 5248 case 7: r = vcpu->vc_gueststate.vg_rdi; break; 5249 case 8: r = vcpu->vc_gueststate.vg_r8; break; 5250 case 9: r = vcpu->vc_gueststate.vg_r9; break; 5251 case 10: r = vcpu->vc_gueststate.vg_r10; break; 5252 case 11: r = vcpu->vc_gueststate.vg_r11; break; 5253 case 12: r = vcpu->vc_gueststate.vg_r12; break; 5254 case 13: r = vcpu->vc_gueststate.vg_r13; break; 5255 case 14: r = vcpu->vc_gueststate.vg_r14; break; 5256 case 15: r = vcpu->vc_gueststate.vg_r15; break; 5257 } 5258 DPRINTF("%s: mov to cr%d @ %llx, data=0x%llx\n", 5259 __func__, crnum, vcpu->vc_gueststate.vg_rip, r); 5260 } 5261 5262 if (crnum == 0) 5263 vmx_handle_cr0_write(vcpu, r); 5264 5265 if (crnum == 4) 5266 vmx_handle_cr4_write(vcpu, r); 5267 5268 break; 5269 case CR_READ: 5270 DPRINTF("%s: mov from cr%d @ %llx\n", __func__, crnum, 5271 vcpu->vc_gueststate.vg_rip); 5272 break; 5273 case CR_CLTS: 5274 DPRINTF("%s: clts instruction @ %llx\n", __func__, 5275 vcpu->vc_gueststate.vg_rip); 5276 break; 5277 case CR_LMSW: 5278 DPRINTF("%s: lmsw instruction @ %llx\n", __func__, 5279 vcpu->vc_gueststate.vg_rip); 5280 break; 5281 default: 5282 DPRINTF("%s: unknown cr access @ %llx\n", __func__, 5283 vcpu->vc_gueststate.vg_rip); 5284 } 5285 5286 vcpu->vc_gueststate.vg_rip += insn_length; 5287 5288 return (0); 5289 } 5290 5291 /* 5292 * vmx_handle_rdmsr 5293 * 5294 * Handler for rdmsr instructions. Bitmap MSRs are allowed implicit access 5295 * and won't end up here. This handler is primarily intended to catch otherwise 5296 * unknown MSR access for possible later inclusion in the bitmap list. For 5297 * each MSR access that ends up here, we log the access (when VMM_DEBUG is 5298 * enabled) 5299 * 5300 * Parameters: 5301 * vcpu: vcpu structure containing instruction info causing the exit 5302 * 5303 * Return value: 5304 * 0: The operation was successful 5305 * EINVAL: An error occurred 5306 */ 5307 int 5308 vmx_handle_rdmsr(struct vcpu *vcpu) 5309 { 5310 uint64_t insn_length; 5311 uint64_t *rax, *rdx; 5312 uint64_t *rcx; 5313 int ret; 5314 5315 if (vmread(VMCS_INSTRUCTION_LENGTH, &insn_length)) { 5316 printf("%s: can't obtain instruction length\n", __func__); 5317 return (EINVAL); 5318 } 5319 5320 if (insn_length != 2) { 5321 DPRINTF("%s: RDMSR with instruction length %lld not " 5322 "supported\n", __func__, insn_length); 5323 return (EINVAL); 5324 } 5325 5326 rax = &vcpu->vc_gueststate.vg_rax; 5327 rcx = &vcpu->vc_gueststate.vg_rcx; 5328 rdx = &vcpu->vc_gueststate.vg_rdx; 5329 5330 switch (*rcx) { 5331 case MSR_BIOS_SIGN: 5332 case MSR_PLATFORM_ID: 5333 /* Ignored */ 5334 *rax = 0; 5335 *rdx = 0; 5336 break; 5337 case MSR_CR_PAT: 5338 *rax = (vcpu->vc_shadow_pat & 0xFFFFFFFFULL); 5339 *rdx = (vcpu->vc_shadow_pat >> 32); 5340 break; 5341 default: 5342 /* Unsupported MSRs causes #GP exception, don't advance %rip */ 5343 DPRINTF("%s: unsupported rdmsr (msr=0x%llx), injecting #GP\n", 5344 __func__, *rcx); 5345 ret = vmm_inject_gp(vcpu); 5346 return (ret); 5347 } 5348 5349 vcpu->vc_gueststate.vg_rip += insn_length; 5350 5351 return (0); 5352 } 5353 5354 /* 5355 * vmx_handle_xsetbv 5356 * 5357 * VMX-specific part of the xsetbv instruction exit handler 5358 * 5359 * Parameters: 5360 * vcpu: vcpu structure containing instruction info causing the exit 5361 * 5362 * Return value: 5363 * 0: The operation was successful 5364 * EINVAL: An error occurred 5365 */ 5366 int 5367 vmx_handle_xsetbv(struct vcpu *vcpu) 5368 { 5369 uint64_t insn_length, *rax; 5370 int ret; 5371 5372 if (vmread(VMCS_INSTRUCTION_LENGTH, &insn_length)) { 5373 printf("%s: can't obtain instruction length\n", __func__); 5374 return (EINVAL); 5375 } 5376 5377 /* All XSETBV instructions are 3 bytes */ 5378 if (insn_length != 3) { 5379 DPRINTF("%s: XSETBV with instruction length %lld not " 5380 "supported\n", __func__, insn_length); 5381 return (EINVAL); 5382 } 5383 5384 rax = &vcpu->vc_gueststate.vg_rax; 5385 5386 ret = vmm_handle_xsetbv(vcpu, rax); 5387 5388 vcpu->vc_gueststate.vg_rip += insn_length; 5389 5390 return ret; 5391 } 5392 5393 /* 5394 * svm_handle_xsetbv 5395 * 5396 * SVM-specific part of the xsetbv instruction exit handler 5397 * 5398 * Parameters: 5399 * vcpu: vcpu structure containing instruction info causing the exit 5400 * 5401 * Return value: 5402 * 0: The operation was successful 5403 * EINVAL: An error occurred 5404 */ 5405 int 5406 svm_handle_xsetbv(struct vcpu *vcpu) 5407 { 5408 uint64_t insn_length, *rax; 5409 int ret; 5410 struct vmcb *vmcb = (struct vmcb *)vcpu->vc_control_va; 5411 5412 /* All XSETBV instructions are 3 bytes */ 5413 insn_length = 3; 5414 5415 rax = &vmcb->v_rax; 5416 5417 ret = vmm_handle_xsetbv(vcpu, rax); 5418 5419 vcpu->vc_gueststate.vg_rip += insn_length; 5420 5421 return ret; 5422 } 5423 5424 /* 5425 * vmm_handle_xsetbv 5426 * 5427 * Handler for xsetbv instructions. We allow the guest VM to set xcr0 values 5428 * limited to the xsave_mask in use in the host. 5429 * 5430 * Parameters: 5431 * vcpu: vcpu structure containing instruction info causing the exit 5432 * rax: pointer to guest %rax 5433 * 5434 * Return value: 5435 * 0: The operation was successful 5436 * EINVAL: An error occurred 5437 */ 5438 int 5439 vmm_handle_xsetbv(struct vcpu *vcpu, uint64_t *rax) 5440 { 5441 uint64_t *rdx, *rcx, val; 5442 5443 rcx = &vcpu->vc_gueststate.vg_rcx; 5444 rdx = &vcpu->vc_gueststate.vg_rdx; 5445 5446 if (vmm_get_guest_cpu_cpl(vcpu) != 0) { 5447 DPRINTF("%s: guest cpl not zero\n", __func__); 5448 return (vmm_inject_gp(vcpu)); 5449 } 5450 5451 if (*rcx != 0) { 5452 DPRINTF("%s: guest specified invalid xcr register number " 5453 "%lld\n", __func__, *rcx); 5454 return (vmm_inject_gp(vcpu)); 5455 } 5456 5457 val = *rax + (*rdx << 32); 5458 if (val & ~xsave_mask) { 5459 DPRINTF("%s: guest specified xcr0 outside xsave_mask %lld\n", 5460 __func__, val); 5461 return (vmm_inject_gp(vcpu)); 5462 } 5463 5464 vcpu->vc_gueststate.vg_xcr0 = val; 5465 5466 return (0); 5467 } 5468 5469 /* 5470 * vmx_handle_misc_enable_msr 5471 * 5472 * Handler for writes to the MSR_MISC_ENABLE (0x1a0) MSR on Intel CPUs. We 5473 * limit what the guest can write to this MSR (certain hardware-related 5474 * settings like speedstep, etc). 5475 * 5476 * Parameters: 5477 * vcpu: vcpu structure containing information about the wrmsr causing this 5478 * exit 5479 */ 5480 void 5481 vmx_handle_misc_enable_msr(struct vcpu *vcpu) 5482 { 5483 uint64_t *rax, *rdx; 5484 struct vmx_msr_store *msr_store; 5485 5486 rax = &vcpu->vc_gueststate.vg_rax; 5487 rdx = &vcpu->vc_gueststate.vg_rdx; 5488 msr_store = (struct vmx_msr_store *)vcpu->vc_vmx_msr_exit_save_va; 5489 5490 /* Filter out guest writes to TCC, EIST, and xTPR */ 5491 *rax &= ~(MISC_ENABLE_TCC | MISC_ENABLE_EIST_ENABLED | 5492 MISC_ENABLE_xTPR_MESSAGE_DISABLE); 5493 5494 msr_store[VCPU_REGS_MISC_ENABLE].vms_data = *rax | (*rdx << 32); 5495 } 5496 5497 /* 5498 * vmx_handle_wrmsr 5499 * 5500 * Handler for wrmsr instructions. This handler logs the access, and discards 5501 * the written data (when VMM_DEBUG is enabled). Any valid wrmsr will not end 5502 * up here (it will be whitelisted in the MSR bitmap). 5503 * 5504 * Parameters: 5505 * vcpu: vcpu structure containing instruction info causing the exit 5506 * 5507 * Return value: 5508 * 0: The operation was successful 5509 * EINVAL: An error occurred 5510 */ 5511 int 5512 vmx_handle_wrmsr(struct vcpu *vcpu) 5513 { 5514 uint64_t insn_length, val; 5515 uint64_t *rax, *rdx, *rcx; 5516 int ret; 5517 5518 if (vmread(VMCS_INSTRUCTION_LENGTH, &insn_length)) { 5519 printf("%s: can't obtain instruction length\n", __func__); 5520 return (EINVAL); 5521 } 5522 5523 if (insn_length != 2) { 5524 DPRINTF("%s: WRMSR with instruction length %lld not " 5525 "supported\n", __func__, insn_length); 5526 return (EINVAL); 5527 } 5528 5529 rax = &vcpu->vc_gueststate.vg_rax; 5530 rcx = &vcpu->vc_gueststate.vg_rcx; 5531 rdx = &vcpu->vc_gueststate.vg_rdx; 5532 val = (*rdx << 32) | (*rax & 0xFFFFFFFFULL); 5533 5534 switch (*rcx) { 5535 case MSR_CR_PAT: 5536 if (!vmm_pat_is_valid(val)) { 5537 ret = vmm_inject_gp(vcpu); 5538 return (ret); 5539 } 5540 vcpu->vc_shadow_pat = val; 5541 break; 5542 case MSR_MISC_ENABLE: 5543 vmx_handle_misc_enable_msr(vcpu); 5544 break; 5545 case MSR_SMM_MONITOR_CTL: 5546 /* 5547 * 34.15.5 - Enabling dual monitor treatment 5548 * 5549 * Unsupported, so inject #GP and return without 5550 * advancing %rip. 5551 */ 5552 ret = vmm_inject_gp(vcpu); 5553 return (ret); 5554 case KVM_MSR_SYSTEM_TIME: 5555 vmm_init_pvclock(vcpu, 5556 (*rax & 0xFFFFFFFFULL) | (*rdx << 32)); 5557 break; 5558 #ifdef VMM_DEBUG 5559 default: 5560 /* 5561 * Log the access, to be able to identify unknown MSRs 5562 */ 5563 DPRINTF("%s: wrmsr exit, msr=0x%llx, discarding data " 5564 "written from guest=0x%llx:0x%llx\n", __func__, 5565 *rcx, *rdx, *rax); 5566 #endif /* VMM_DEBUG */ 5567 } 5568 5569 vcpu->vc_gueststate.vg_rip += insn_length; 5570 5571 return (0); 5572 } 5573 5574 /* 5575 * svm_handle_msr 5576 * 5577 * Handler for MSR instructions. 5578 * 5579 * Parameters: 5580 * vcpu: vcpu structure containing instruction info causing the exit 5581 * 5582 * Return value: 5583 * Always 0 (successful) 5584 */ 5585 int 5586 svm_handle_msr(struct vcpu *vcpu) 5587 { 5588 uint64_t insn_length, val; 5589 uint64_t *rax, *rcx, *rdx; 5590 struct vmcb *vmcb = (struct vmcb *)vcpu->vc_control_va; 5591 int ret; 5592 5593 /* XXX: Validate RDMSR / WRMSR insn_length */ 5594 insn_length = 2; 5595 5596 rax = &vmcb->v_rax; 5597 rcx = &vcpu->vc_gueststate.vg_rcx; 5598 rdx = &vcpu->vc_gueststate.vg_rdx; 5599 5600 if (vmcb->v_exitinfo1 == 1) { 5601 /* WRMSR */ 5602 val = (*rdx << 32) | (*rax & 0xFFFFFFFFULL); 5603 5604 switch (*rcx) { 5605 case MSR_CR_PAT: 5606 if (!vmm_pat_is_valid(val)) { 5607 ret = vmm_inject_gp(vcpu); 5608 return (ret); 5609 } 5610 vcpu->vc_shadow_pat = val; 5611 break; 5612 case MSR_EFER: 5613 vmcb->v_efer = *rax | EFER_SVME; 5614 break; 5615 case KVM_MSR_SYSTEM_TIME: 5616 vmm_init_pvclock(vcpu, 5617 (*rax & 0xFFFFFFFFULL) | (*rdx << 32)); 5618 break; 5619 default: 5620 /* Log the access, to be able to identify unknown MSRs */ 5621 DPRINTF("%s: wrmsr exit, msr=0x%llx, discarding data " 5622 "written from guest=0x%llx:0x%llx\n", __func__, 5623 *rcx, *rdx, *rax); 5624 } 5625 } else { 5626 /* RDMSR */ 5627 switch (*rcx) { 5628 case MSR_BIOS_SIGN: 5629 case MSR_INT_PEN_MSG: 5630 case MSR_PLATFORM_ID: 5631 /* Ignored */ 5632 *rax = 0; 5633 *rdx = 0; 5634 break; 5635 case MSR_CR_PAT: 5636 *rax = (vcpu->vc_shadow_pat & 0xFFFFFFFFULL); 5637 *rdx = (vcpu->vc_shadow_pat >> 32); 5638 break; 5639 case MSR_DE_CFG: 5640 /* LFENCE serializing bit is set by host */ 5641 *rax = DE_CFG_SERIALIZE_LFENCE; 5642 *rdx = 0; 5643 break; 5644 default: 5645 /* 5646 * Unsupported MSRs causes #GP exception, don't advance 5647 * %rip 5648 */ 5649 DPRINTF("%s: unsupported rdmsr (msr=0x%llx), " 5650 "injecting #GP\n", __func__, *rcx); 5651 ret = vmm_inject_gp(vcpu); 5652 return (ret); 5653 } 5654 } 5655 5656 vcpu->vc_gueststate.vg_rip += insn_length; 5657 5658 return (0); 5659 } 5660 5661 /* Handle cpuid(0xd) and its subleafs */ 5662 static void 5663 vmm_handle_cpuid_0xd(struct vcpu *vcpu, uint32_t subleaf, uint64_t *rax, 5664 uint32_t eax, uint32_t ebx, uint32_t ecx, uint32_t edx) 5665 { 5666 if (subleaf == 0) { 5667 /* 5668 * CPUID(0xd.0) depends on the value in XCR0 and MSR_XSS. If 5669 * the guest XCR0 isn't the same as the host then set it, redo 5670 * the CPUID, and restore it. 5671 */ 5672 uint64_t xcr0 = vcpu->vc_gueststate.vg_xcr0; 5673 5674 /* 5675 * "ecx enumerates the size required ... for an area 5676 * containing all the ... components supported by this 5677 * processor" 5678 * "ebx enumerates the size required ... for an area 5679 * containing all the ... components corresponding to bits 5680 * currently set in xcr0" 5681 * So: since the VMM 'processor' is what our base kernel uses, 5682 * the VMM ecx is our ebx 5683 */ 5684 ecx = ebx; 5685 if (xcr0 != (xsave_mask & XFEATURE_XCR0_MASK)) { 5686 uint32_t dummy; 5687 xsetbv(0, xcr0); 5688 CPUID_LEAF(0xd, subleaf, eax, ebx, dummy, edx); 5689 xsetbv(0, xsave_mask & XFEATURE_XCR0_MASK); 5690 } 5691 eax = xsave_mask & XFEATURE_XCR0_MASK; 5692 edx = (xsave_mask & XFEATURE_XCR0_MASK) >> 32; 5693 } else if (subleaf == 1) { 5694 /* mask out XSAVEC, XSAVES, and XFD support */ 5695 eax &= XSAVE_XSAVEOPT | XSAVE_XGETBV1; 5696 ebx = 0; /* no xsavec or xsaves for now */ 5697 ecx = edx = 0; /* no xsaves for now */ 5698 } else if (subleaf >= 63 || 5699 ((1ULL << subleaf) & xsave_mask & XFEATURE_XCR0_MASK) == 0) { 5700 /* disclaim subleaves of features we don't expose */ 5701 eax = ebx = ecx = edx = 0; 5702 } else { 5703 /* disclaim compressed alignment or xfd support */ 5704 ecx = 0; 5705 } 5706 5707 *rax = eax; 5708 vcpu->vc_gueststate.vg_rbx = ebx; 5709 vcpu->vc_gueststate.vg_rcx = ecx; 5710 vcpu->vc_gueststate.vg_rdx = edx; 5711 } 5712 5713 /* 5714 * vmm_handle_cpuid 5715 * 5716 * Exit handler for CPUID instruction 5717 * 5718 * Parameters: 5719 * vcpu: vcpu causing the CPUID exit 5720 * 5721 * Return value: 5722 * 0: the exit was processed successfully 5723 * EINVAL: error occurred validating the CPUID instruction arguments 5724 */ 5725 int 5726 vmm_handle_cpuid(struct vcpu *vcpu) 5727 { 5728 uint64_t insn_length, cr4; 5729 uint64_t *rax, *rbx, *rcx, *rdx; 5730 struct vmcb *vmcb; 5731 uint32_t leaf, subleaf, eax, ebx, ecx, edx; 5732 struct vmx_msr_store *msr_store; 5733 int vmm_cpuid_level; 5734 5735 /* what's the cpuid level we support/advertise? */ 5736 vmm_cpuid_level = cpuid_level; 5737 if (vmm_cpuid_level < 0x15 && tsc_is_invariant) 5738 vmm_cpuid_level = 0x15; 5739 5740 if (vmm_softc->mode == VMM_MODE_EPT) { 5741 if (vmread(VMCS_INSTRUCTION_LENGTH, &insn_length)) { 5742 DPRINTF("%s: can't obtain instruction length\n", 5743 __func__); 5744 return (EINVAL); 5745 } 5746 5747 if (vmread(VMCS_GUEST_IA32_CR4, &cr4)) { 5748 DPRINTF("%s: can't obtain cr4\n", __func__); 5749 return (EINVAL); 5750 } 5751 5752 rax = &vcpu->vc_gueststate.vg_rax; 5753 5754 /* 5755 * "CPUID leaves above 02H and below 80000000H are only 5756 * visible when IA32_MISC_ENABLE MSR has bit 22 set to its 5757 * default value 0" 5758 */ 5759 msr_store = 5760 (struct vmx_msr_store *)vcpu->vc_vmx_msr_exit_save_va; 5761 if (msr_store[VCPU_REGS_MISC_ENABLE].vms_data & 5762 MISC_ENABLE_LIMIT_CPUID_MAXVAL) 5763 vmm_cpuid_level = 0x02; 5764 } else { 5765 /* XXX: validate insn_length 2 */ 5766 insn_length = 2; 5767 vmcb = (struct vmcb *)vcpu->vc_control_va; 5768 rax = &vmcb->v_rax; 5769 cr4 = vmcb->v_cr4; 5770 } 5771 5772 rbx = &vcpu->vc_gueststate.vg_rbx; 5773 rcx = &vcpu->vc_gueststate.vg_rcx; 5774 rdx = &vcpu->vc_gueststate.vg_rdx; 5775 vcpu->vc_gueststate.vg_rip += insn_length; 5776 5777 leaf = *rax; 5778 subleaf = *rcx; 5779 5780 /* 5781 * "If a value entered for CPUID.EAX is higher than the maximum input 5782 * value for basic or extended function for that processor then the 5783 * data for the highest basic information leaf is returned." 5784 * 5785 * "When CPUID returns the highest basic leaf information as a result 5786 * of an invalid input EAX value, any dependence on input ECX value 5787 * in the basic leaf is honored." 5788 * 5789 * This means if leaf is between vmm_cpuid_level and 0x40000000 (the start 5790 * of the hypervisor info leaves), clamp to vmm_cpuid_level, but without 5791 * altering subleaf. Also, if leaf is greater than the extended function 5792 * info, clamp also to vmm_cpuid_level. 5793 */ 5794 if ((leaf > vmm_cpuid_level && leaf < 0x40000000) || 5795 (leaf > curcpu()->ci_pnfeatset)) { 5796 DPRINTF("%s: invalid cpuid input leaf 0x%x, guest rip=" 5797 "0x%llx - resetting to 0x%x\n", __func__, leaf, 5798 vcpu->vc_gueststate.vg_rip - insn_length, 5799 vmm_cpuid_level); 5800 leaf = vmm_cpuid_level; 5801 } 5802 5803 /* we fake up values in the range (cpuid_level, vmm_cpuid_level] */ 5804 if (leaf <= cpuid_level || leaf > 0x80000000) 5805 CPUID_LEAF(leaf, subleaf, eax, ebx, ecx, edx); 5806 else 5807 eax = ebx = ecx = edx = 0; 5808 5809 switch (leaf) { 5810 case 0x00: /* Max level and vendor ID */ 5811 *rax = vmm_cpuid_level; 5812 *rbx = *((uint32_t *)&cpu_vendor); 5813 *rdx = *((uint32_t *)&cpu_vendor + 1); 5814 *rcx = *((uint32_t *)&cpu_vendor + 2); 5815 break; 5816 case 0x01: /* Version, brand, feature info */ 5817 *rax = cpu_id; 5818 /* mask off host's APIC ID, reset to vcpu id */ 5819 *rbx = cpu_ebxfeature & 0x0000FFFF; 5820 *rbx |= (vcpu->vc_id & 0xFF) << 24; 5821 *rcx = (cpu_ecxfeature | CPUIDECX_HV) & VMM_CPUIDECX_MASK; 5822 5823 /* Guest CR4.OSXSAVE determines presence of CPUIDECX_OSXSAVE */ 5824 if (cr4 & CR4_OSXSAVE) 5825 *rcx |= CPUIDECX_OSXSAVE; 5826 else 5827 *rcx &= ~CPUIDECX_OSXSAVE; 5828 5829 *rdx = curcpu()->ci_feature_flags & VMM_CPUIDEDX_MASK; 5830 break; 5831 case 0x02: /* Cache and TLB information */ 5832 *rax = eax; 5833 *rbx = ebx; 5834 *rcx = ecx; 5835 *rdx = edx; 5836 break; 5837 case 0x03: /* Processor serial number (not supported) */ 5838 DPRINTF("%s: function 0x03 (processor serial number) not " 5839 "supported\n", __func__); 5840 *rax = 0; 5841 *rbx = 0; 5842 *rcx = 0; 5843 *rdx = 0; 5844 break; 5845 case 0x04: /* Deterministic cache info */ 5846 *rax = eax & VMM_CPUID4_CACHE_TOPOLOGY_MASK; 5847 *rbx = ebx; 5848 *rcx = ecx; 5849 *rdx = edx; 5850 break; 5851 case 0x05: /* MONITOR/MWAIT (not supported) */ 5852 DPRINTF("%s: function 0x05 (monitor/mwait) not supported\n", 5853 __func__); 5854 *rax = 0; 5855 *rbx = 0; 5856 *rcx = 0; 5857 *rdx = 0; 5858 break; 5859 case 0x06: /* Thermal / Power management (not supported) */ 5860 DPRINTF("%s: function 0x06 (thermal/power mgt) not supported\n", 5861 __func__); 5862 *rax = 0; 5863 *rbx = 0; 5864 *rcx = 0; 5865 *rdx = 0; 5866 break; 5867 case 0x07: /* SEFF */ 5868 if (subleaf == 0) { 5869 *rax = 0; /* Highest subleaf supported */ 5870 *rbx = curcpu()->ci_feature_sefflags_ebx & VMM_SEFF0EBX_MASK; 5871 *rcx = curcpu()->ci_feature_sefflags_ecx & VMM_SEFF0ECX_MASK; 5872 *rdx = curcpu()->ci_feature_sefflags_edx & VMM_SEFF0EDX_MASK; 5873 /* 5874 * Only expose PKU support if we've detected it in use 5875 * on the host. 5876 */ 5877 if (vmm_softc->sc_md.pkru_enabled) 5878 *rcx |= SEFF0ECX_PKU; 5879 else 5880 *rcx &= ~SEFF0ECX_PKU; 5881 5882 /* Expose IBT bit if we've enabled CET on the host. */ 5883 if (rcr4() & CR4_CET) 5884 *rdx |= SEFF0EDX_IBT; 5885 else 5886 *rdx &= ~SEFF0EDX_IBT; 5887 5888 } else { 5889 /* Unsupported subleaf */ 5890 DPRINTF("%s: function 0x07 (SEFF) unsupported subleaf " 5891 "0x%x not supported\n", __func__, subleaf); 5892 *rax = 0; 5893 *rbx = 0; 5894 *rcx = 0; 5895 *rdx = 0; 5896 } 5897 break; 5898 case 0x09: /* Direct Cache Access (not supported) */ 5899 DPRINTF("%s: function 0x09 (direct cache access) not " 5900 "supported\n", __func__); 5901 *rax = 0; 5902 *rbx = 0; 5903 *rcx = 0; 5904 *rdx = 0; 5905 break; 5906 case 0x0a: /* Architectural perf monitoring (not supported) */ 5907 DPRINTF("%s: function 0x0a (arch. perf mon) not supported\n", 5908 __func__); 5909 *rax = 0; 5910 *rbx = 0; 5911 *rcx = 0; 5912 *rdx = 0; 5913 break; 5914 case 0x0b: /* Extended topology enumeration (not supported) */ 5915 DPRINTF("%s: function 0x0b (topology enumeration) not " 5916 "supported\n", __func__); 5917 *rax = 0; 5918 *rbx = 0; 5919 *rcx = 0; 5920 *rdx = 0; 5921 break; 5922 case 0x0d: /* Processor ext. state information */ 5923 vmm_handle_cpuid_0xd(vcpu, subleaf, rax, eax, ebx, ecx, edx); 5924 break; 5925 case 0x0f: /* QoS info (not supported) */ 5926 DPRINTF("%s: function 0x0f (QoS info) not supported\n", 5927 __func__); 5928 *rax = 0; 5929 *rbx = 0; 5930 *rcx = 0; 5931 *rdx = 0; 5932 break; 5933 case 0x14: /* Processor Trace info (not supported) */ 5934 DPRINTF("%s: function 0x14 (processor trace info) not " 5935 "supported\n", __func__); 5936 *rax = 0; 5937 *rbx = 0; 5938 *rcx = 0; 5939 *rdx = 0; 5940 break; 5941 case 0x15: 5942 if (cpuid_level >= 0x15) { 5943 *rax = eax; 5944 *rbx = ebx; 5945 *rcx = ecx; 5946 *rdx = edx; 5947 } else { 5948 KASSERT(tsc_is_invariant); 5949 *rax = 1; 5950 *rbx = 100; 5951 *rcx = tsc_frequency / 100; 5952 *rdx = 0; 5953 } 5954 break; 5955 case 0x16: /* Processor frequency info */ 5956 *rax = eax; 5957 *rbx = ebx; 5958 *rcx = ecx; 5959 *rdx = edx; 5960 break; 5961 case 0x40000000: /* Hypervisor information */ 5962 *rax = 0; 5963 *rbx = *((uint32_t *)&vmm_hv_signature[0]); 5964 *rcx = *((uint32_t *)&vmm_hv_signature[4]); 5965 *rdx = *((uint32_t *)&vmm_hv_signature[8]); 5966 break; 5967 case 0x40000001: /* KVM hypervisor features */ 5968 *rax = (1 << KVM_FEATURE_CLOCKSOURCE2) | 5969 (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT); 5970 *rbx = 0; 5971 *rcx = 0; 5972 *rdx = 0; 5973 break; 5974 case 0x80000000: /* Extended function level */ 5975 /* We don't emulate past 0x8000001f currently. */ 5976 *rax = min(curcpu()->ci_pnfeatset, 0x8000001f); 5977 *rbx = 0; 5978 *rcx = 0; 5979 *rdx = 0; 5980 break; 5981 case 0x80000001: /* Extended function info */ 5982 *rax = curcpu()->ci_efeature_eax; 5983 *rbx = 0; /* Reserved */ 5984 *rcx = curcpu()->ci_efeature_ecx & VMM_ECPUIDECX_MASK; 5985 *rdx = curcpu()->ci_feature_eflags & VMM_FEAT_EFLAGS_MASK; 5986 break; 5987 case 0x80000002: /* Brand string */ 5988 *rax = curcpu()->ci_brand[0]; 5989 *rbx = curcpu()->ci_brand[1]; 5990 *rcx = curcpu()->ci_brand[2]; 5991 *rdx = curcpu()->ci_brand[3]; 5992 break; 5993 case 0x80000003: /* Brand string */ 5994 *rax = curcpu()->ci_brand[4]; 5995 *rbx = curcpu()->ci_brand[5]; 5996 *rcx = curcpu()->ci_brand[6]; 5997 *rdx = curcpu()->ci_brand[7]; 5998 break; 5999 case 0x80000004: /* Brand string */ 6000 *rax = curcpu()->ci_brand[8]; 6001 *rbx = curcpu()->ci_brand[9]; 6002 *rcx = curcpu()->ci_brand[10]; 6003 *rdx = curcpu()->ci_brand[11]; 6004 break; 6005 case 0x80000005: /* Reserved (Intel), cacheinfo (AMD) */ 6006 *rax = eax; 6007 *rbx = ebx; 6008 *rcx = ecx; 6009 *rdx = edx; 6010 break; 6011 case 0x80000006: /* ext. cache info */ 6012 *rax = eax; 6013 *rbx = ebx; 6014 *rcx = ecx; 6015 *rdx = edx; 6016 break; 6017 case 0x80000007: /* apmi */ 6018 *rax = eax; 6019 *rbx = ebx; 6020 *rcx = ecx; 6021 *rdx = edx & VMM_APMI_EDX_INCLUDE_MASK; 6022 break; 6023 case 0x80000008: /* Phys bits info and topology (AMD) */ 6024 *rax = eax; 6025 *rbx = ebx & VMM_AMDSPEC_EBX_MASK; 6026 /* Reset %rcx (topology) */ 6027 *rcx = 0; 6028 *rdx = edx; 6029 break; 6030 case 0x8000001d: /* cache topology (AMD) */ 6031 *rax = eax; 6032 *rbx = ebx; 6033 *rcx = ecx; 6034 *rdx = edx; 6035 break; 6036 case 0x8000001f: /* encryption features (AMD) */ 6037 *rax = eax; 6038 *rbx = ebx; 6039 *rcx = ecx; 6040 *rdx = edx; 6041 break; 6042 default: 6043 DPRINTF("%s: unsupported rax=0x%llx\n", __func__, *rax); 6044 *rax = 0; 6045 *rbx = 0; 6046 *rcx = 0; 6047 *rdx = 0; 6048 } 6049 6050 6051 if (vmm_softc->mode == VMM_MODE_RVI) { 6052 /* 6053 * update %rax. the rest of the registers get updated in 6054 * svm_enter_guest 6055 */ 6056 vmcb->v_rax = *rax; 6057 } 6058 6059 return (0); 6060 } 6061 6062 /* 6063 * vcpu_run_svm 6064 * 6065 * SVM main loop used to run a VCPU. 6066 * 6067 * Parameters: 6068 * vcpu: The VCPU to run 6069 * vrp: run parameters 6070 * 6071 * Return values: 6072 * 0: The run loop exited and no help is needed from vmd 6073 * EAGAIN: The run loop exited and help from vmd is needed 6074 * EINVAL: an error occurred 6075 */ 6076 int 6077 vcpu_run_svm(struct vcpu *vcpu, struct vm_run_params *vrp) 6078 { 6079 int ret = 0; 6080 struct region_descriptor gdt; 6081 struct cpu_info *ci = NULL; 6082 uint64_t exit_reason; 6083 struct schedstate_percpu *spc; 6084 struct vmcb *vmcb = (struct vmcb *)vcpu->vc_control_va; 6085 6086 if (vrp->vrp_intr_pending) 6087 vcpu->vc_intr = 1; 6088 else 6089 vcpu->vc_intr = 0; 6090 6091 /* 6092 * If we are returning from userspace (vmd) because we exited 6093 * last time, fix up any needed vcpu state first. Which state 6094 * needs to be fixed up depends on what vmd populated in the 6095 * exit data structure. 6096 */ 6097 switch (vcpu->vc_gueststate.vg_exit_reason) { 6098 case SVM_VMEXIT_IOIO: 6099 if (vcpu->vc_exit.vei.vei_dir == VEI_DIR_IN) { 6100 vcpu->vc_gueststate.vg_rax = 6101 vcpu->vc_exit.vei.vei_data; 6102 vmcb->v_rax = vcpu->vc_gueststate.vg_rax; 6103 } 6104 vcpu->vc_gueststate.vg_rip = 6105 vcpu->vc_exit.vrs.vrs_gprs[VCPU_REGS_RIP]; 6106 vmcb->v_rip = vcpu->vc_gueststate.vg_rip; 6107 break; 6108 case SVM_VMEXIT_NPF: 6109 ret = vcpu_writeregs_svm(vcpu, VM_RWREGS_GPRS, 6110 &vcpu->vc_exit.vrs); 6111 if (ret) { 6112 printf("%s: vm %d vcpu %d failed to update " 6113 "registers\n", __func__, 6114 vcpu->vc_parent->vm_id, vcpu->vc_id); 6115 return (EINVAL); 6116 } 6117 break; 6118 } 6119 memset(&vcpu->vc_exit, 0, sizeof(vcpu->vc_exit)); 6120 6121 while (ret == 0) { 6122 vmm_update_pvclock(vcpu); 6123 if (ci != curcpu()) { 6124 /* 6125 * We are launching for the first time, or we are 6126 * resuming from a different pcpu, so we need to 6127 * reset certain pcpu-specific values. 6128 */ 6129 ci = curcpu(); 6130 setregion(&gdt, ci->ci_gdt, GDT_SIZE - 1); 6131 6132 if (ci != vcpu->vc_last_pcpu) { 6133 /* 6134 * Flush TLB by guest ASID if feature 6135 * available, flush entire TLB if not. 6136 */ 6137 if (ci->ci_vmm_cap.vcc_svm.svm_flush_by_asid) 6138 vmcb->v_tlb_control = 6139 SVM_TLB_CONTROL_FLUSH_ASID; 6140 else 6141 vmcb->v_tlb_control = 6142 SVM_TLB_CONTROL_FLUSH_ALL; 6143 6144 svm_set_dirty(vcpu, SVM_CLEANBITS_ALL); 6145 } 6146 6147 vcpu->vc_last_pcpu = ci; 6148 6149 if (gdt.rd_base == 0) { 6150 ret = EINVAL; 6151 break; 6152 } 6153 } 6154 6155 /* Handle vmd(8) injected interrupts */ 6156 /* Is there an interrupt pending injection? */ 6157 if (vcpu->vc_inject.vie_type == VCPU_INJECT_INTR && 6158 vcpu->vc_irqready) { 6159 vmcb->v_eventinj = vcpu->vc_inject.vie_vector | 6160 (1U << 31); 6161 vcpu->vc_inject.vie_type = VCPU_INJECT_NONE; 6162 } 6163 6164 /* Inject event if present */ 6165 if (vcpu->vc_inject.vie_type == VCPU_INJECT_EX) { 6166 vmcb->v_eventinj = vcpu->vc_inject.vie_vector; 6167 6168 /* Set the "Event Valid" flag for certain vectors */ 6169 switch (vcpu->vc_inject.vie_vector) { 6170 case VMM_EX_BP: 6171 case VMM_EX_OF: 6172 case VMM_EX_DB: 6173 /* 6174 * Software exception. 6175 * XXX check nRIP support. 6176 */ 6177 vmcb->v_eventinj |= (4ULL << 8); 6178 break; 6179 case VMM_EX_AC: 6180 vcpu->vc_inject.vie_errorcode = 0; 6181 /* fallthrough */ 6182 case VMM_EX_DF: 6183 case VMM_EX_TS: 6184 case VMM_EX_NP: 6185 case VMM_EX_SS: 6186 case VMM_EX_GP: 6187 case VMM_EX_PF: 6188 /* Hardware exception. */ 6189 vmcb->v_eventinj |= (3ULL << 8); 6190 6191 if (vmcb->v_cr0 & CR0_PE) { 6192 /* Error code valid. */ 6193 vmcb->v_eventinj |= (1ULL << 11); 6194 vmcb->v_eventinj |= (uint64_t) 6195 vcpu->vc_inject.vie_errorcode << 32; 6196 } 6197 break; 6198 default: 6199 printf("%s: unsupported exception vector %u\n", 6200 __func__, vcpu->vc_inject.vie_vector); 6201 ret = EINVAL; 6202 } /* switch */ 6203 if (ret == EINVAL) 6204 break; 6205 6206 /* Event is valid. */ 6207 vmcb->v_eventinj |= (1U << 31); 6208 vcpu->vc_inject.vie_type = VCPU_INJECT_NONE; 6209 } 6210 6211 TRACEPOINT(vmm, guest_enter, vcpu, vrp); 6212 6213 /* Start / resume the VCPU */ 6214 /* Disable interrupts and save the current host FPU state. */ 6215 clgi(); 6216 if ((ret = vmm_fpurestore(vcpu))) { 6217 stgi(); 6218 break; 6219 } 6220 6221 /* 6222 * If we're resuming to a different VCPU and have IBPB, 6223 * then use it to prevent cross-VM branch-target injection. 6224 */ 6225 if (ci->ci_guest_vcpu != vcpu && 6226 (ci->ci_feature_amdspec_ebx & CPUIDEBX_IBPB)) { 6227 wrmsr(MSR_PRED_CMD, PRED_CMD_IBPB); 6228 ci->ci_guest_vcpu = vcpu; 6229 } 6230 6231 /* Restore any guest PKRU state. */ 6232 if (vmm_softc->sc_md.pkru_enabled) 6233 wrpkru(0, vcpu->vc_pkru); 6234 6235 KASSERT(vmcb->v_intercept1 & SVM_INTERCEPT_INTR); 6236 wrmsr(MSR_AMD_VM_HSAVE_PA, vcpu->vc_svm_hsa_pa); 6237 6238 ret = svm_enter_guest(vcpu->vc_control_pa, 6239 &vcpu->vc_gueststate, &gdt); 6240 6241 /* Restore host PKRU state. */ 6242 if (vmm_softc->sc_md.pkru_enabled) { 6243 vcpu->vc_pkru = rdpkru(0); 6244 wrpkru(0, PGK_VALUE); 6245 } 6246 6247 /* 6248 * On exit, interrupts are disabled, and we are running with 6249 * the guest FPU state still possibly on the CPU. Save the FPU 6250 * state before re-enabling interrupts. 6251 */ 6252 vmm_fpusave(vcpu); 6253 6254 /* 6255 * Enable interrupts now. Note that if the exit was due to INTR 6256 * (external interrupt), the interrupt will be processed now. 6257 */ 6258 stgi(); 6259 6260 vcpu->vc_gueststate.vg_rip = vmcb->v_rip; 6261 vmcb->v_tlb_control = SVM_TLB_CONTROL_FLUSH_NONE; 6262 svm_set_clean(vcpu, SVM_CLEANBITS_ALL); 6263 6264 /* If we exited successfully ... */ 6265 if (ret == 0) { 6266 exit_reason = vmcb->v_exitcode; 6267 vcpu->vc_gueststate.vg_exit_reason = exit_reason; 6268 TRACEPOINT(vmm, guest_exit, vcpu, vrp, exit_reason); 6269 6270 vcpu->vc_gueststate.vg_rflags = vmcb->v_rflags; 6271 6272 /* 6273 * Handle the exit. This will alter "ret" to EAGAIN if 6274 * the exit handler determines help from vmd is needed. 6275 */ 6276 ret = svm_handle_exit(vcpu); 6277 6278 if (vcpu->vc_gueststate.vg_rflags & PSL_I) 6279 vcpu->vc_irqready = 1; 6280 else 6281 vcpu->vc_irqready = 0; 6282 6283 /* 6284 * If not ready for interrupts, but interrupts pending, 6285 * enable interrupt window exiting. 6286 */ 6287 if (vcpu->vc_irqready == 0 && vcpu->vc_intr) { 6288 vmcb->v_intercept1 |= SVM_INTERCEPT_VINTR; 6289 vmcb->v_irq = 1; 6290 vmcb->v_intr_misc = SVM_INTR_MISC_V_IGN_TPR; 6291 vmcb->v_intr_vector = 0; 6292 svm_set_dirty(vcpu, SVM_CLEANBITS_TPR | 6293 SVM_CLEANBITS_I); 6294 } 6295 6296 /* 6297 * Exit to vmd if we are terminating, failed to enter, 6298 * or need help (device I/O) 6299 */ 6300 if (ret || vcpu_must_stop(vcpu)) 6301 break; 6302 6303 if (vcpu->vc_intr && vcpu->vc_irqready) { 6304 ret = EAGAIN; 6305 break; 6306 } 6307 6308 /* Check if we should yield - don't hog the cpu */ 6309 spc = &ci->ci_schedstate; 6310 if (spc->spc_schedflags & SPCF_SHOULDYIELD) 6311 break; 6312 } 6313 } 6314 6315 /* 6316 * We are heading back to userspace (vmd), either because we need help 6317 * handling an exit, a guest interrupt is pending, or we failed in some 6318 * way to enter the guest. Copy the guest registers to the exit struct 6319 * and return to vmd. 6320 */ 6321 if (vcpu_readregs_svm(vcpu, VM_RWREGS_ALL, &vcpu->vc_exit.vrs)) 6322 ret = EINVAL; 6323 6324 return (ret); 6325 } 6326 6327 /* 6328 * vmm_alloc_vpid 6329 * 6330 * Sets the memory location pointed to by "vpid" to the next available VPID 6331 * or ASID. 6332 * 6333 * Parameters: 6334 * vpid: Pointer to location to receive the next VPID/ASID 6335 * 6336 * Return Values: 6337 * 0: The operation completed successfully 6338 * ENOMEM: No VPIDs/ASIDs were available. Content of 'vpid' is unchanged. 6339 */ 6340 int 6341 vmm_alloc_vpid(uint16_t *vpid) 6342 { 6343 uint16_t i; 6344 uint8_t idx, bit; 6345 struct vmm_softc *sc = vmm_softc; 6346 6347 rw_enter_write(&vmm_softc->vpid_lock); 6348 for (i = 1; i <= sc->max_vpid; i++) { 6349 idx = i / 8; 6350 bit = i - (idx * 8); 6351 6352 if (!(sc->vpids[idx] & (1 << bit))) { 6353 sc->vpids[idx] |= (1 << bit); 6354 *vpid = i; 6355 DPRINTF("%s: allocated VPID/ASID %d\n", __func__, 6356 i); 6357 rw_exit_write(&vmm_softc->vpid_lock); 6358 return 0; 6359 } 6360 } 6361 6362 printf("%s: no available %ss\n", __func__, 6363 (sc->mode == VMM_MODE_EPT) ? "VPID" : 6364 "ASID"); 6365 6366 rw_exit_write(&vmm_softc->vpid_lock); 6367 return ENOMEM; 6368 } 6369 6370 /* 6371 * vmm_free_vpid 6372 * 6373 * Frees the VPID/ASID id supplied in "vpid". 6374 * 6375 * Parameters: 6376 * vpid: VPID/ASID to free. 6377 */ 6378 void 6379 vmm_free_vpid(uint16_t vpid) 6380 { 6381 uint8_t idx, bit; 6382 struct vmm_softc *sc = vmm_softc; 6383 6384 rw_enter_write(&vmm_softc->vpid_lock); 6385 idx = vpid / 8; 6386 bit = vpid - (idx * 8); 6387 sc->vpids[idx] &= ~(1 << bit); 6388 6389 DPRINTF("%s: freed VPID/ASID %d\n", __func__, vpid); 6390 rw_exit_write(&vmm_softc->vpid_lock); 6391 } 6392 6393 6394 /* vmm_gpa_is_valid 6395 * 6396 * Check if the given gpa is within guest memory space. 6397 * 6398 * Parameters: 6399 * vcpu: The virtual cpu we are running on. 6400 * gpa: The address to check. 6401 * obj_size: The size of the object assigned to gpa 6402 * 6403 * Return values: 6404 * 1: gpa is within the memory ranges allocated for the vcpu 6405 * 0: otherwise 6406 */ 6407 int 6408 vmm_gpa_is_valid(struct vcpu *vcpu, paddr_t gpa, size_t obj_size) 6409 { 6410 struct vm *vm = vcpu->vc_parent; 6411 struct vm_mem_range *vmr; 6412 size_t i; 6413 6414 for (i = 0; i < vm->vm_nmemranges; ++i) { 6415 vmr = &vm->vm_memranges[i]; 6416 if (vmr->vmr_size >= obj_size && 6417 vmr->vmr_gpa <= gpa && 6418 gpa < (vmr->vmr_gpa + vmr->vmr_size - obj_size)) { 6419 return 1; 6420 } 6421 } 6422 return 0; 6423 } 6424 6425 void 6426 vmm_init_pvclock(struct vcpu *vcpu, paddr_t gpa) 6427 { 6428 paddr_t pvclock_gpa = gpa & 0xFFFFFFFFFFFFFFF0; 6429 if (!vmm_gpa_is_valid(vcpu, pvclock_gpa, 6430 sizeof(struct pvclock_time_info))) { 6431 /* XXX: Kill guest? */ 6432 vmm_inject_gp(vcpu); 6433 return; 6434 } 6435 6436 /* XXX: handle case when this struct goes over page boundaries */ 6437 if ((pvclock_gpa & PAGE_MASK) + sizeof(struct pvclock_time_info) > 6438 PAGE_SIZE) { 6439 vmm_inject_gp(vcpu); 6440 return; 6441 } 6442 6443 vcpu->vc_pvclock_system_gpa = gpa; 6444 if (tsc_frequency > 0) 6445 vcpu->vc_pvclock_system_tsc_mul = 6446 (int) ((1000000000L << 20) / tsc_frequency); 6447 else 6448 vcpu->vc_pvclock_system_tsc_mul = 0; 6449 vmm_update_pvclock(vcpu); 6450 } 6451 6452 int 6453 vmm_update_pvclock(struct vcpu *vcpu) 6454 { 6455 struct pvclock_time_info *pvclock_ti; 6456 struct timespec tv; 6457 struct vm *vm = vcpu->vc_parent; 6458 paddr_t pvclock_hpa, pvclock_gpa; 6459 6460 if (vcpu->vc_pvclock_system_gpa & PVCLOCK_SYSTEM_TIME_ENABLE) { 6461 pvclock_gpa = vcpu->vc_pvclock_system_gpa & 0xFFFFFFFFFFFFFFF0; 6462 if (!pmap_extract(vm->vm_map->pmap, pvclock_gpa, &pvclock_hpa)) 6463 return (EINVAL); 6464 pvclock_ti = (void*) PMAP_DIRECT_MAP(pvclock_hpa); 6465 6466 /* START next cycle (must be odd) */ 6467 pvclock_ti->ti_version = 6468 (++vcpu->vc_pvclock_version << 1) | 0x1; 6469 6470 pvclock_ti->ti_tsc_timestamp = rdtsc(); 6471 nanotime(&tv); 6472 pvclock_ti->ti_system_time = 6473 tv.tv_sec * 1000000000L + tv.tv_nsec; 6474 pvclock_ti->ti_tsc_shift = 12; 6475 pvclock_ti->ti_tsc_to_system_mul = 6476 vcpu->vc_pvclock_system_tsc_mul; 6477 pvclock_ti->ti_flags = PVCLOCK_FLAG_TSC_STABLE; 6478 6479 /* END (must be even) */ 6480 pvclock_ti->ti_version &= ~0x1; 6481 } 6482 return (0); 6483 } 6484 6485 int 6486 vmm_pat_is_valid(uint64_t pat) 6487 { 6488 int i; 6489 uint8_t *byte = (uint8_t *)&pat; 6490 6491 /* Intel SDM Vol 3A, 11.12.2: 0x02, 0x03, and 0x08-0xFF result in #GP */ 6492 for (i = 0; i < 8; i++) { 6493 if (byte[i] == 0x02 || byte[i] == 0x03 || byte[i] > 0x07) { 6494 DPRINTF("%s: invalid pat %llx\n", __func__, pat); 6495 return 0; 6496 } 6497 } 6498 6499 return 1; 6500 } 6501 6502 /* 6503 * vmx_exit_reason_decode 6504 * 6505 * Returns a human readable string describing exit type 'code' 6506 */ 6507 const char * 6508 vmx_exit_reason_decode(uint32_t code) 6509 { 6510 switch (code) { 6511 case VMX_EXIT_NMI: return "NMI"; 6512 case VMX_EXIT_EXTINT: return "External interrupt"; 6513 case VMX_EXIT_TRIPLE_FAULT: return "Triple fault"; 6514 case VMX_EXIT_INIT: return "INIT signal"; 6515 case VMX_EXIT_SIPI: return "SIPI signal"; 6516 case VMX_EXIT_IO_SMI: return "I/O SMI"; 6517 case VMX_EXIT_OTHER_SMI: return "other SMI"; 6518 case VMX_EXIT_INT_WINDOW: return "Interrupt window"; 6519 case VMX_EXIT_NMI_WINDOW: return "NMI window"; 6520 case VMX_EXIT_TASK_SWITCH: return "Task switch"; 6521 case VMX_EXIT_CPUID: return "CPUID instruction"; 6522 case VMX_EXIT_GETSEC: return "GETSEC instruction"; 6523 case VMX_EXIT_HLT: return "HLT instruction"; 6524 case VMX_EXIT_INVD: return "INVD instruction"; 6525 case VMX_EXIT_INVLPG: return "INVLPG instruction"; 6526 case VMX_EXIT_RDPMC: return "RDPMC instruction"; 6527 case VMX_EXIT_RDTSC: return "RDTSC instruction"; 6528 case VMX_EXIT_RSM: return "RSM instruction"; 6529 case VMX_EXIT_VMCALL: return "VMCALL instruction"; 6530 case VMX_EXIT_VMCLEAR: return "VMCLEAR instruction"; 6531 case VMX_EXIT_VMLAUNCH: return "VMLAUNCH instruction"; 6532 case VMX_EXIT_VMPTRLD: return "VMPTRLD instruction"; 6533 case VMX_EXIT_VMPTRST: return "VMPTRST instruction"; 6534 case VMX_EXIT_VMREAD: return "VMREAD instruction"; 6535 case VMX_EXIT_VMRESUME: return "VMRESUME instruction"; 6536 case VMX_EXIT_VMWRITE: return "VMWRITE instruction"; 6537 case VMX_EXIT_VMXOFF: return "VMXOFF instruction"; 6538 case VMX_EXIT_VMXON: return "VMXON instruction"; 6539 case VMX_EXIT_CR_ACCESS: return "CR access"; 6540 case VMX_EXIT_MOV_DR: return "MOV DR instruction"; 6541 case VMX_EXIT_IO: return "I/O instruction"; 6542 case VMX_EXIT_RDMSR: return "RDMSR instruction"; 6543 case VMX_EXIT_WRMSR: return "WRMSR instruction"; 6544 case VMX_EXIT_ENTRY_FAILED_GUEST_STATE: return "guest state invalid"; 6545 case VMX_EXIT_ENTRY_FAILED_MSR_LOAD: return "MSR load failed"; 6546 case VMX_EXIT_MWAIT: return "MWAIT instruction"; 6547 case VMX_EXIT_MTF: return "monitor trap flag"; 6548 case VMX_EXIT_MONITOR: return "MONITOR instruction"; 6549 case VMX_EXIT_PAUSE: return "PAUSE instruction"; 6550 case VMX_EXIT_ENTRY_FAILED_MCE: return "MCE during entry"; 6551 case VMX_EXIT_TPR_BELOW_THRESHOLD: return "TPR below threshold"; 6552 case VMX_EXIT_APIC_ACCESS: return "APIC access"; 6553 case VMX_EXIT_VIRTUALIZED_EOI: return "virtualized EOI"; 6554 case VMX_EXIT_GDTR_IDTR: return "GDTR/IDTR access"; 6555 case VMX_EXIT_LDTR_TR: return "LDTR/TR access"; 6556 case VMX_EXIT_EPT_VIOLATION: return "EPT violation"; 6557 case VMX_EXIT_EPT_MISCONFIGURATION: return "EPT misconfiguration"; 6558 case VMX_EXIT_INVEPT: return "INVEPT instruction"; 6559 case VMX_EXIT_RDTSCP: return "RDTSCP instruction"; 6560 case VMX_EXIT_VMX_PREEMPTION_TIMER_EXPIRED: 6561 return "preemption timer expired"; 6562 case VMX_EXIT_INVVPID: return "INVVPID instruction"; 6563 case VMX_EXIT_WBINVD: return "WBINVD instruction"; 6564 case VMX_EXIT_XSETBV: return "XSETBV instruction"; 6565 case VMX_EXIT_APIC_WRITE: return "APIC write"; 6566 case VMX_EXIT_RDRAND: return "RDRAND instruction"; 6567 case VMX_EXIT_INVPCID: return "INVPCID instruction"; 6568 case VMX_EXIT_VMFUNC: return "VMFUNC instruction"; 6569 case VMX_EXIT_RDSEED: return "RDSEED instruction"; 6570 case VMX_EXIT_XSAVES: return "XSAVES instruction"; 6571 case VMX_EXIT_XRSTORS: return "XRSTORS instruction"; 6572 default: return "unknown"; 6573 } 6574 } 6575 6576 /* 6577 * svm_exit_reason_decode 6578 * 6579 * Returns a human readable string describing exit type 'code' 6580 */ 6581 const char * 6582 svm_exit_reason_decode(uint32_t code) 6583 { 6584 switch (code) { 6585 case SVM_VMEXIT_CR0_READ: return "CR0 read"; /* 0x00 */ 6586 case SVM_VMEXIT_CR1_READ: return "CR1 read"; /* 0x01 */ 6587 case SVM_VMEXIT_CR2_READ: return "CR2 read"; /* 0x02 */ 6588 case SVM_VMEXIT_CR3_READ: return "CR3 read"; /* 0x03 */ 6589 case SVM_VMEXIT_CR4_READ: return "CR4 read"; /* 0x04 */ 6590 case SVM_VMEXIT_CR5_READ: return "CR5 read"; /* 0x05 */ 6591 case SVM_VMEXIT_CR6_READ: return "CR6 read"; /* 0x06 */ 6592 case SVM_VMEXIT_CR7_READ: return "CR7 read"; /* 0x07 */ 6593 case SVM_VMEXIT_CR8_READ: return "CR8 read"; /* 0x08 */ 6594 case SVM_VMEXIT_CR9_READ: return "CR9 read"; /* 0x09 */ 6595 case SVM_VMEXIT_CR10_READ: return "CR10 read"; /* 0x0A */ 6596 case SVM_VMEXIT_CR11_READ: return "CR11 read"; /* 0x0B */ 6597 case SVM_VMEXIT_CR12_READ: return "CR12 read"; /* 0x0C */ 6598 case SVM_VMEXIT_CR13_READ: return "CR13 read"; /* 0x0D */ 6599 case SVM_VMEXIT_CR14_READ: return "CR14 read"; /* 0x0E */ 6600 case SVM_VMEXIT_CR15_READ: return "CR15 read"; /* 0x0F */ 6601 case SVM_VMEXIT_CR0_WRITE: return "CR0 write"; /* 0x10 */ 6602 case SVM_VMEXIT_CR1_WRITE: return "CR1 write"; /* 0x11 */ 6603 case SVM_VMEXIT_CR2_WRITE: return "CR2 write"; /* 0x12 */ 6604 case SVM_VMEXIT_CR3_WRITE: return "CR3 write"; /* 0x13 */ 6605 case SVM_VMEXIT_CR4_WRITE: return "CR4 write"; /* 0x14 */ 6606 case SVM_VMEXIT_CR5_WRITE: return "CR5 write"; /* 0x15 */ 6607 case SVM_VMEXIT_CR6_WRITE: return "CR6 write"; /* 0x16 */ 6608 case SVM_VMEXIT_CR7_WRITE: return "CR7 write"; /* 0x17 */ 6609 case SVM_VMEXIT_CR8_WRITE: return "CR8 write"; /* 0x18 */ 6610 case SVM_VMEXIT_CR9_WRITE: return "CR9 write"; /* 0x19 */ 6611 case SVM_VMEXIT_CR10_WRITE: return "CR10 write"; /* 0x1A */ 6612 case SVM_VMEXIT_CR11_WRITE: return "CR11 write"; /* 0x1B */ 6613 case SVM_VMEXIT_CR12_WRITE: return "CR12 write"; /* 0x1C */ 6614 case SVM_VMEXIT_CR13_WRITE: return "CR13 write"; /* 0x1D */ 6615 case SVM_VMEXIT_CR14_WRITE: return "CR14 write"; /* 0x1E */ 6616 case SVM_VMEXIT_CR15_WRITE: return "CR15 write"; /* 0x1F */ 6617 case SVM_VMEXIT_DR0_READ: return "DR0 read"; /* 0x20 */ 6618 case SVM_VMEXIT_DR1_READ: return "DR1 read"; /* 0x21 */ 6619 case SVM_VMEXIT_DR2_READ: return "DR2 read"; /* 0x22 */ 6620 case SVM_VMEXIT_DR3_READ: return "DR3 read"; /* 0x23 */ 6621 case SVM_VMEXIT_DR4_READ: return "DR4 read"; /* 0x24 */ 6622 case SVM_VMEXIT_DR5_READ: return "DR5 read"; /* 0x25 */ 6623 case SVM_VMEXIT_DR6_READ: return "DR6 read"; /* 0x26 */ 6624 case SVM_VMEXIT_DR7_READ: return "DR7 read"; /* 0x27 */ 6625 case SVM_VMEXIT_DR8_READ: return "DR8 read"; /* 0x28 */ 6626 case SVM_VMEXIT_DR9_READ: return "DR9 read"; /* 0x29 */ 6627 case SVM_VMEXIT_DR10_READ: return "DR10 read"; /* 0x2A */ 6628 case SVM_VMEXIT_DR11_READ: return "DR11 read"; /* 0x2B */ 6629 case SVM_VMEXIT_DR12_READ: return "DR12 read"; /* 0x2C */ 6630 case SVM_VMEXIT_DR13_READ: return "DR13 read"; /* 0x2D */ 6631 case SVM_VMEXIT_DR14_READ: return "DR14 read"; /* 0x2E */ 6632 case SVM_VMEXIT_DR15_READ: return "DR15 read"; /* 0x2F */ 6633 case SVM_VMEXIT_DR0_WRITE: return "DR0 write"; /* 0x30 */ 6634 case SVM_VMEXIT_DR1_WRITE: return "DR1 write"; /* 0x31 */ 6635 case SVM_VMEXIT_DR2_WRITE: return "DR2 write"; /* 0x32 */ 6636 case SVM_VMEXIT_DR3_WRITE: return "DR3 write"; /* 0x33 */ 6637 case SVM_VMEXIT_DR4_WRITE: return "DR4 write"; /* 0x34 */ 6638 case SVM_VMEXIT_DR5_WRITE: return "DR5 write"; /* 0x35 */ 6639 case SVM_VMEXIT_DR6_WRITE: return "DR6 write"; /* 0x36 */ 6640 case SVM_VMEXIT_DR7_WRITE: return "DR7 write"; /* 0x37 */ 6641 case SVM_VMEXIT_DR8_WRITE: return "DR8 write"; /* 0x38 */ 6642 case SVM_VMEXIT_DR9_WRITE: return "DR9 write"; /* 0x39 */ 6643 case SVM_VMEXIT_DR10_WRITE: return "DR10 write"; /* 0x3A */ 6644 case SVM_VMEXIT_DR11_WRITE: return "DR11 write"; /* 0x3B */ 6645 case SVM_VMEXIT_DR12_WRITE: return "DR12 write"; /* 0x3C */ 6646 case SVM_VMEXIT_DR13_WRITE: return "DR13 write"; /* 0x3D */ 6647 case SVM_VMEXIT_DR14_WRITE: return "DR14 write"; /* 0x3E */ 6648 case SVM_VMEXIT_DR15_WRITE: return "DR15 write"; /* 0x3F */ 6649 case SVM_VMEXIT_EXCP0: return "Exception 0x00"; /* 0x40 */ 6650 case SVM_VMEXIT_EXCP1: return "Exception 0x01"; /* 0x41 */ 6651 case SVM_VMEXIT_EXCP2: return "Exception 0x02"; /* 0x42 */ 6652 case SVM_VMEXIT_EXCP3: return "Exception 0x03"; /* 0x43 */ 6653 case SVM_VMEXIT_EXCP4: return "Exception 0x04"; /* 0x44 */ 6654 case SVM_VMEXIT_EXCP5: return "Exception 0x05"; /* 0x45 */ 6655 case SVM_VMEXIT_EXCP6: return "Exception 0x06"; /* 0x46 */ 6656 case SVM_VMEXIT_EXCP7: return "Exception 0x07"; /* 0x47 */ 6657 case SVM_VMEXIT_EXCP8: return "Exception 0x08"; /* 0x48 */ 6658 case SVM_VMEXIT_EXCP9: return "Exception 0x09"; /* 0x49 */ 6659 case SVM_VMEXIT_EXCP10: return "Exception 0x0A"; /* 0x4A */ 6660 case SVM_VMEXIT_EXCP11: return "Exception 0x0B"; /* 0x4B */ 6661 case SVM_VMEXIT_EXCP12: return "Exception 0x0C"; /* 0x4C */ 6662 case SVM_VMEXIT_EXCP13: return "Exception 0x0D"; /* 0x4D */ 6663 case SVM_VMEXIT_EXCP14: return "Exception 0x0E"; /* 0x4E */ 6664 case SVM_VMEXIT_EXCP15: return "Exception 0x0F"; /* 0x4F */ 6665 case SVM_VMEXIT_EXCP16: return "Exception 0x10"; /* 0x50 */ 6666 case SVM_VMEXIT_EXCP17: return "Exception 0x11"; /* 0x51 */ 6667 case SVM_VMEXIT_EXCP18: return "Exception 0x12"; /* 0x52 */ 6668 case SVM_VMEXIT_EXCP19: return "Exception 0x13"; /* 0x53 */ 6669 case SVM_VMEXIT_EXCP20: return "Exception 0x14"; /* 0x54 */ 6670 case SVM_VMEXIT_EXCP21: return "Exception 0x15"; /* 0x55 */ 6671 case SVM_VMEXIT_EXCP22: return "Exception 0x16"; /* 0x56 */ 6672 case SVM_VMEXIT_EXCP23: return "Exception 0x17"; /* 0x57 */ 6673 case SVM_VMEXIT_EXCP24: return "Exception 0x18"; /* 0x58 */ 6674 case SVM_VMEXIT_EXCP25: return "Exception 0x19"; /* 0x59 */ 6675 case SVM_VMEXIT_EXCP26: return "Exception 0x1A"; /* 0x5A */ 6676 case SVM_VMEXIT_EXCP27: return "Exception 0x1B"; /* 0x5B */ 6677 case SVM_VMEXIT_EXCP28: return "Exception 0x1C"; /* 0x5C */ 6678 case SVM_VMEXIT_EXCP29: return "Exception 0x1D"; /* 0x5D */ 6679 case SVM_VMEXIT_EXCP30: return "Exception 0x1E"; /* 0x5E */ 6680 case SVM_VMEXIT_EXCP31: return "Exception 0x1F"; /* 0x5F */ 6681 case SVM_VMEXIT_INTR: return "External interrupt"; /* 0x60 */ 6682 case SVM_VMEXIT_NMI: return "NMI"; /* 0x61 */ 6683 case SVM_VMEXIT_SMI: return "SMI"; /* 0x62 */ 6684 case SVM_VMEXIT_INIT: return "INIT"; /* 0x63 */ 6685 case SVM_VMEXIT_VINTR: return "Interrupt window"; /* 0x64 */ 6686 case SVM_VMEXIT_CR0_SEL_WRITE: return "Sel CR0 write"; /* 0x65 */ 6687 case SVM_VMEXIT_IDTR_READ: return "IDTR read"; /* 0x66 */ 6688 case SVM_VMEXIT_GDTR_READ: return "GDTR read"; /* 0x67 */ 6689 case SVM_VMEXIT_LDTR_READ: return "LDTR read"; /* 0x68 */ 6690 case SVM_VMEXIT_TR_READ: return "TR read"; /* 0x69 */ 6691 case SVM_VMEXIT_IDTR_WRITE: return "IDTR write"; /* 0x6A */ 6692 case SVM_VMEXIT_GDTR_WRITE: return "GDTR write"; /* 0x6B */ 6693 case SVM_VMEXIT_LDTR_WRITE: return "LDTR write"; /* 0x6C */ 6694 case SVM_VMEXIT_TR_WRITE: return "TR write"; /* 0x6D */ 6695 case SVM_VMEXIT_RDTSC: return "RDTSC instruction"; /* 0x6E */ 6696 case SVM_VMEXIT_RDPMC: return "RDPMC instruction"; /* 0x6F */ 6697 case SVM_VMEXIT_PUSHF: return "PUSHF instruction"; /* 0x70 */ 6698 case SVM_VMEXIT_POPF: return "POPF instruction"; /* 0x71 */ 6699 case SVM_VMEXIT_CPUID: return "CPUID instruction"; /* 0x72 */ 6700 case SVM_VMEXIT_RSM: return "RSM instruction"; /* 0x73 */ 6701 case SVM_VMEXIT_IRET: return "IRET instruction"; /* 0x74 */ 6702 case SVM_VMEXIT_SWINT: return "SWINT instruction"; /* 0x75 */ 6703 case SVM_VMEXIT_INVD: return "INVD instruction"; /* 0x76 */ 6704 case SVM_VMEXIT_PAUSE: return "PAUSE instruction"; /* 0x77 */ 6705 case SVM_VMEXIT_HLT: return "HLT instruction"; /* 0x78 */ 6706 case SVM_VMEXIT_INVLPG: return "INVLPG instruction"; /* 0x79 */ 6707 case SVM_VMEXIT_INVLPGA: return "INVLPGA instruction"; /* 0x7A */ 6708 case SVM_VMEXIT_IOIO: return "I/O instruction"; /* 0x7B */ 6709 case SVM_VMEXIT_MSR: return "RDMSR/WRMSR instruction"; /* 0x7C */ 6710 case SVM_VMEXIT_TASK_SWITCH: return "Task switch"; /* 0x7D */ 6711 case SVM_VMEXIT_FERR_FREEZE: return "FERR_FREEZE"; /* 0x7E */ 6712 case SVM_VMEXIT_SHUTDOWN: return "Triple fault"; /* 0x7F */ 6713 case SVM_VMEXIT_VMRUN: return "VMRUN instruction"; /* 0x80 */ 6714 case SVM_VMEXIT_VMMCALL: return "VMMCALL instruction"; /* 0x81 */ 6715 case SVM_VMEXIT_VMLOAD: return "VMLOAD instruction"; /* 0x82 */ 6716 case SVM_VMEXIT_VMSAVE: return "VMSAVE instruction"; /* 0x83 */ 6717 case SVM_VMEXIT_STGI: return "STGI instruction"; /* 0x84 */ 6718 case SVM_VMEXIT_CLGI: return "CLGI instruction"; /* 0x85 */ 6719 case SVM_VMEXIT_SKINIT: return "SKINIT instruction"; /* 0x86 */ 6720 case SVM_VMEXIT_RDTSCP: return "RDTSCP instruction"; /* 0x87 */ 6721 case SVM_VMEXIT_ICEBP: return "ICEBP instruction"; /* 0x88 */ 6722 case SVM_VMEXIT_WBINVD: return "WBINVD instruction"; /* 0x89 */ 6723 case SVM_VMEXIT_MONITOR: return "MONITOR instruction"; /* 0x8A */ 6724 case SVM_VMEXIT_MWAIT: return "MWAIT instruction"; /* 0x8B */ 6725 case SVM_VMEXIT_MWAIT_CONDITIONAL: return "Cond MWAIT"; /* 0x8C */ 6726 case SVM_VMEXIT_NPF: return "NPT violation"; /* 0x400 */ 6727 default: return "unknown"; 6728 } 6729 } 6730 6731 /* 6732 * vmx_instruction_error_decode 6733 * 6734 * Returns a human readable string describing the instruction error in 'code' 6735 */ 6736 const char * 6737 vmx_instruction_error_decode(uint32_t code) 6738 { 6739 switch (code) { 6740 case 1: return "VMCALL: unsupported in VMX root"; 6741 case 2: return "VMCLEAR: invalid paddr"; 6742 case 3: return "VMCLEAR: VMXON pointer"; 6743 case 4: return "VMLAUNCH: non-clear VMCS"; 6744 case 5: return "VMRESUME: non-launched VMCS"; 6745 case 6: return "VMRESUME: executed after VMXOFF"; 6746 case 7: return "VM entry: invalid control field(s)"; 6747 case 8: return "VM entry: invalid host state field(s)"; 6748 case 9: return "VMPTRLD: invalid paddr"; 6749 case 10: return "VMPTRLD: VMXON pointer"; 6750 case 11: return "VMPTRLD: incorrect VMCS revid"; 6751 case 12: return "VMREAD/VMWRITE: unsupported VMCS field"; 6752 case 13: return "VMWRITE: RO VMCS field"; 6753 case 15: return "VMXON: unsupported in VMX root"; 6754 case 20: return "VMCALL: invalid VM exit control fields"; 6755 case 26: return "VM entry: blocked by MOV SS"; 6756 case 28: return "Invalid operand to INVEPT/INVVPID"; 6757 case 0x80000021: return "VM entry: invalid guest state"; 6758 case 0x80000022: return "VM entry: failure due to MSR loading"; 6759 case 0x80000029: return "VM entry: machine-check event"; 6760 default: return "unknown"; 6761 } 6762 } 6763 6764 /* 6765 * vcpu_state_decode 6766 * 6767 * Returns a human readable string describing the vcpu state in 'state'. 6768 */ 6769 const char * 6770 vcpu_state_decode(u_int state) 6771 { 6772 switch (state) { 6773 case VCPU_STATE_STOPPED: return "stopped"; 6774 case VCPU_STATE_RUNNING: return "running"; 6775 case VCPU_STATE_REQTERM: return "requesting termination"; 6776 case VCPU_STATE_TERMINATED: return "terminated"; 6777 case VCPU_STATE_UNKNOWN: return "unknown"; 6778 default: return "invalid"; 6779 } 6780 } 6781 6782 #ifdef VMM_DEBUG 6783 /* 6784 * dump_vcpu 6785 * 6786 * Dumps the VMX capabilities of vcpu 'vcpu' 6787 */ 6788 void 6789 dump_vcpu(struct vcpu *vcpu) 6790 { 6791 printf("vcpu @ %p\n", vcpu); 6792 printf(" parent vm @ %p\n", vcpu->vc_parent); 6793 printf(" mode: "); 6794 if (vcpu->vc_virt_mode == VMM_MODE_EPT) { 6795 printf("VMX\n"); 6796 printf(" pinbased ctls: 0x%llx\n", 6797 vcpu->vc_vmx_pinbased_ctls); 6798 printf(" true pinbased ctls: 0x%llx\n", 6799 vcpu->vc_vmx_true_pinbased_ctls); 6800 CTRL_DUMP(vcpu, PINBASED, EXTERNAL_INT_EXITING); 6801 CTRL_DUMP(vcpu, PINBASED, NMI_EXITING); 6802 CTRL_DUMP(vcpu, PINBASED, VIRTUAL_NMIS); 6803 CTRL_DUMP(vcpu, PINBASED, ACTIVATE_VMX_PREEMPTION_TIMER); 6804 CTRL_DUMP(vcpu, PINBASED, PROCESS_POSTED_INTERRUPTS); 6805 printf(" procbased ctls: 0x%llx\n", 6806 vcpu->vc_vmx_procbased_ctls); 6807 printf(" true procbased ctls: 0x%llx\n", 6808 vcpu->vc_vmx_true_procbased_ctls); 6809 CTRL_DUMP(vcpu, PROCBASED, INTERRUPT_WINDOW_EXITING); 6810 CTRL_DUMP(vcpu, PROCBASED, USE_TSC_OFFSETTING); 6811 CTRL_DUMP(vcpu, PROCBASED, HLT_EXITING); 6812 CTRL_DUMP(vcpu, PROCBASED, INVLPG_EXITING); 6813 CTRL_DUMP(vcpu, PROCBASED, MWAIT_EXITING); 6814 CTRL_DUMP(vcpu, PROCBASED, RDPMC_EXITING); 6815 CTRL_DUMP(vcpu, PROCBASED, RDTSC_EXITING); 6816 CTRL_DUMP(vcpu, PROCBASED, CR3_LOAD_EXITING); 6817 CTRL_DUMP(vcpu, PROCBASED, CR3_STORE_EXITING); 6818 CTRL_DUMP(vcpu, PROCBASED, CR8_LOAD_EXITING); 6819 CTRL_DUMP(vcpu, PROCBASED, CR8_STORE_EXITING); 6820 CTRL_DUMP(vcpu, PROCBASED, USE_TPR_SHADOW); 6821 CTRL_DUMP(vcpu, PROCBASED, NMI_WINDOW_EXITING); 6822 CTRL_DUMP(vcpu, PROCBASED, MOV_DR_EXITING); 6823 CTRL_DUMP(vcpu, PROCBASED, UNCONDITIONAL_IO_EXITING); 6824 CTRL_DUMP(vcpu, PROCBASED, USE_IO_BITMAPS); 6825 CTRL_DUMP(vcpu, PROCBASED, MONITOR_TRAP_FLAG); 6826 CTRL_DUMP(vcpu, PROCBASED, USE_MSR_BITMAPS); 6827 CTRL_DUMP(vcpu, PROCBASED, MONITOR_EXITING); 6828 CTRL_DUMP(vcpu, PROCBASED, PAUSE_EXITING); 6829 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED_CTLS, 6830 IA32_VMX_ACTIVATE_SECONDARY_CONTROLS, 1)) { 6831 printf(" procbased2 ctls: 0x%llx\n", 6832 vcpu->vc_vmx_procbased2_ctls); 6833 CTRL_DUMP(vcpu, PROCBASED2, VIRTUALIZE_APIC); 6834 CTRL_DUMP(vcpu, PROCBASED2, ENABLE_EPT); 6835 CTRL_DUMP(vcpu, PROCBASED2, DESCRIPTOR_TABLE_EXITING); 6836 CTRL_DUMP(vcpu, PROCBASED2, ENABLE_RDTSCP); 6837 CTRL_DUMP(vcpu, PROCBASED2, VIRTUALIZE_X2APIC_MODE); 6838 CTRL_DUMP(vcpu, PROCBASED2, ENABLE_VPID); 6839 CTRL_DUMP(vcpu, PROCBASED2, WBINVD_EXITING); 6840 CTRL_DUMP(vcpu, PROCBASED2, UNRESTRICTED_GUEST); 6841 CTRL_DUMP(vcpu, PROCBASED2, 6842 APIC_REGISTER_VIRTUALIZATION); 6843 CTRL_DUMP(vcpu, PROCBASED2, 6844 VIRTUAL_INTERRUPT_DELIVERY); 6845 CTRL_DUMP(vcpu, PROCBASED2, PAUSE_LOOP_EXITING); 6846 CTRL_DUMP(vcpu, PROCBASED2, RDRAND_EXITING); 6847 CTRL_DUMP(vcpu, PROCBASED2, ENABLE_INVPCID); 6848 CTRL_DUMP(vcpu, PROCBASED2, ENABLE_VM_FUNCTIONS); 6849 CTRL_DUMP(vcpu, PROCBASED2, VMCS_SHADOWING); 6850 CTRL_DUMP(vcpu, PROCBASED2, ENABLE_ENCLS_EXITING); 6851 CTRL_DUMP(vcpu, PROCBASED2, RDSEED_EXITING); 6852 CTRL_DUMP(vcpu, PROCBASED2, ENABLE_PML); 6853 CTRL_DUMP(vcpu, PROCBASED2, EPT_VIOLATION_VE); 6854 CTRL_DUMP(vcpu, PROCBASED2, CONCEAL_VMX_FROM_PT); 6855 CTRL_DUMP(vcpu, PROCBASED2, ENABLE_XSAVES_XRSTORS); 6856 CTRL_DUMP(vcpu, PROCBASED2, ENABLE_TSC_SCALING); 6857 } 6858 printf(" entry ctls: 0x%llx\n", 6859 vcpu->vc_vmx_entry_ctls); 6860 printf(" true entry ctls: 0x%llx\n", 6861 vcpu->vc_vmx_true_entry_ctls); 6862 CTRL_DUMP(vcpu, ENTRY, LOAD_DEBUG_CONTROLS); 6863 CTRL_DUMP(vcpu, ENTRY, IA32E_MODE_GUEST); 6864 CTRL_DUMP(vcpu, ENTRY, ENTRY_TO_SMM); 6865 CTRL_DUMP(vcpu, ENTRY, DEACTIVATE_DUAL_MONITOR_TREATMENT); 6866 CTRL_DUMP(vcpu, ENTRY, LOAD_IA32_PERF_GLOBAL_CTRL_ON_ENTRY); 6867 CTRL_DUMP(vcpu, ENTRY, LOAD_IA32_PAT_ON_ENTRY); 6868 CTRL_DUMP(vcpu, ENTRY, LOAD_IA32_EFER_ON_ENTRY); 6869 CTRL_DUMP(vcpu, ENTRY, LOAD_IA32_BNDCFGS_ON_ENTRY); 6870 CTRL_DUMP(vcpu, ENTRY, CONCEAL_VM_ENTRIES_FROM_PT); 6871 printf(" exit ctls: 0x%llx\n", 6872 vcpu->vc_vmx_exit_ctls); 6873 printf(" true exit ctls: 0x%llx\n", 6874 vcpu->vc_vmx_true_exit_ctls); 6875 CTRL_DUMP(vcpu, EXIT, SAVE_DEBUG_CONTROLS); 6876 CTRL_DUMP(vcpu, EXIT, HOST_SPACE_ADDRESS_SIZE); 6877 CTRL_DUMP(vcpu, EXIT, LOAD_IA32_PERF_GLOBAL_CTRL_ON_EXIT); 6878 CTRL_DUMP(vcpu, EXIT, ACKNOWLEDGE_INTERRUPT_ON_EXIT); 6879 CTRL_DUMP(vcpu, EXIT, SAVE_IA32_PAT_ON_EXIT); 6880 CTRL_DUMP(vcpu, EXIT, LOAD_IA32_PAT_ON_EXIT); 6881 CTRL_DUMP(vcpu, EXIT, SAVE_IA32_EFER_ON_EXIT); 6882 CTRL_DUMP(vcpu, EXIT, LOAD_IA32_EFER_ON_EXIT); 6883 CTRL_DUMP(vcpu, EXIT, SAVE_VMX_PREEMPTION_TIMER); 6884 CTRL_DUMP(vcpu, EXIT, CLEAR_IA32_BNDCFGS_ON_EXIT); 6885 CTRL_DUMP(vcpu, EXIT, CONCEAL_VM_EXITS_FROM_PT); 6886 } 6887 } 6888 6889 /* 6890 * vmx_dump_vmcs_field 6891 * 6892 * Debug function to dump the contents of a single VMCS field 6893 * 6894 * Parameters: 6895 * fieldid: VMCS Field ID 6896 * msg: string to display 6897 */ 6898 void 6899 vmx_dump_vmcs_field(uint16_t fieldid, const char *msg) 6900 { 6901 uint8_t width; 6902 uint64_t val; 6903 6904 6905 DPRINTF("%s (0x%04x): ", msg, fieldid); 6906 if (vmread(fieldid, &val)) 6907 DPRINTF("???? "); 6908 else { 6909 /* 6910 * Field width encoding : bits 13:14 6911 * 6912 * 0: 16-bit 6913 * 1: 64-bit 6914 * 2: 32-bit 6915 * 3: natural width 6916 */ 6917 width = (fieldid >> 13) & 0x3; 6918 switch (width) { 6919 case 0: DPRINTF("0x%04llx ", val); break; 6920 case 1: 6921 case 3: DPRINTF("0x%016llx ", val); break; 6922 case 2: DPRINTF("0x%08llx ", val); 6923 } 6924 } 6925 } 6926 6927 /* 6928 * vmx_dump_vmcs 6929 * 6930 * Debug function to dump the contents of the current VMCS. 6931 */ 6932 void 6933 vmx_dump_vmcs(struct vcpu *vcpu) 6934 { 6935 int has_sec, i; 6936 uint32_t cr3_tgt_ct; 6937 6938 /* XXX save and load new vmcs, restore at end */ 6939 6940 DPRINTF("--CURRENT VMCS STATE--\n"); 6941 printf("VMCS launched: %s\n", 6942 (vcpu->vc_vmx_vmcs_state == VMCS_LAUNCHED) ? "Yes" : "No"); 6943 DPRINTF("VMXON revision : 0x%x\n", 6944 curcpu()->ci_vmm_cap.vcc_vmx.vmx_vmxon_revision); 6945 DPRINTF("CR0 fixed0: 0x%llx\n", 6946 curcpu()->ci_vmm_cap.vcc_vmx.vmx_cr0_fixed0); 6947 DPRINTF("CR0 fixed1: 0x%llx\n", 6948 curcpu()->ci_vmm_cap.vcc_vmx.vmx_cr0_fixed1); 6949 DPRINTF("CR4 fixed0: 0x%llx\n", 6950 curcpu()->ci_vmm_cap.vcc_vmx.vmx_cr4_fixed0); 6951 DPRINTF("CR4 fixed1: 0x%llx\n", 6952 curcpu()->ci_vmm_cap.vcc_vmx.vmx_cr4_fixed1); 6953 DPRINTF("MSR table size: 0x%x\n", 6954 512 * (curcpu()->ci_vmm_cap.vcc_vmx.vmx_msr_table_size + 1)); 6955 6956 has_sec = vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED_CTLS, 6957 IA32_VMX_ACTIVATE_SECONDARY_CONTROLS, 1); 6958 6959 if (has_sec) { 6960 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS, 6961 IA32_VMX_ENABLE_VPID, 1)) { 6962 vmx_dump_vmcs_field(VMCS_GUEST_VPID, "VPID"); 6963 } 6964 } 6965 6966 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PINBASED_CTLS, 6967 IA32_VMX_PROCESS_POSTED_INTERRUPTS, 1)) { 6968 vmx_dump_vmcs_field(VMCS_POSTED_INT_NOTIF_VECTOR, 6969 "Posted Int Notif Vec"); 6970 } 6971 6972 if (has_sec) { 6973 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS, 6974 IA32_VMX_EPT_VIOLATION_VE, 1)) { 6975 vmx_dump_vmcs_field(VMCS_EPTP_INDEX, "EPTP idx"); 6976 } 6977 } 6978 6979 DPRINTF("\n"); 6980 vmx_dump_vmcs_field(VMCS_GUEST_IA32_ES_SEL, "G.ES"); 6981 vmx_dump_vmcs_field(VMCS_GUEST_IA32_CS_SEL, "G.CS"); 6982 vmx_dump_vmcs_field(VMCS_GUEST_IA32_SS_SEL, "G.SS"); 6983 DPRINTF("\n"); 6984 vmx_dump_vmcs_field(VMCS_GUEST_IA32_DS_SEL, "G.DS"); 6985 vmx_dump_vmcs_field(VMCS_GUEST_IA32_FS_SEL, "G.FS"); 6986 vmx_dump_vmcs_field(VMCS_GUEST_IA32_GS_SEL, "G.GS"); 6987 DPRINTF("\n"); 6988 vmx_dump_vmcs_field(VMCS_GUEST_IA32_LDTR_SEL, "LDTR"); 6989 vmx_dump_vmcs_field(VMCS_GUEST_IA32_TR_SEL, "G.TR"); 6990 6991 if (has_sec) { 6992 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS, 6993 IA32_VMX_VIRTUAL_INTERRUPT_DELIVERY, 1)) { 6994 vmx_dump_vmcs_field(VMCS_GUEST_INTERRUPT_STATUS, 6995 "Int sts"); 6996 } 6997 6998 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS, 6999 IA32_VMX_ENABLE_PML, 1)) { 7000 vmx_dump_vmcs_field(VMCS_GUEST_PML_INDEX, "PML Idx"); 7001 } 7002 } 7003 7004 DPRINTF("\n"); 7005 vmx_dump_vmcs_field(VMCS_HOST_IA32_ES_SEL, "H.ES"); 7006 vmx_dump_vmcs_field(VMCS_HOST_IA32_CS_SEL, "H.CS"); 7007 vmx_dump_vmcs_field(VMCS_HOST_IA32_SS_SEL, "H.SS"); 7008 DPRINTF("\n"); 7009 vmx_dump_vmcs_field(VMCS_HOST_IA32_DS_SEL, "H.DS"); 7010 vmx_dump_vmcs_field(VMCS_HOST_IA32_FS_SEL, "H.FS"); 7011 vmx_dump_vmcs_field(VMCS_HOST_IA32_GS_SEL, "H.GS"); 7012 DPRINTF("\n"); 7013 7014 vmx_dump_vmcs_field(VMCS_IO_BITMAP_A, "I/O Bitmap A"); 7015 DPRINTF("\n"); 7016 vmx_dump_vmcs_field(VMCS_IO_BITMAP_B, "I/O Bitmap B"); 7017 DPRINTF("\n"); 7018 7019 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED_CTLS, 7020 IA32_VMX_USE_MSR_BITMAPS, 1)) { 7021 vmx_dump_vmcs_field(VMCS_MSR_BITMAP_ADDRESS, "MSR Bitmap"); 7022 DPRINTF("\n"); 7023 } 7024 7025 vmx_dump_vmcs_field(VMCS_EXIT_STORE_MSR_ADDRESS, "Exit Store MSRs"); 7026 DPRINTF("\n"); 7027 vmx_dump_vmcs_field(VMCS_EXIT_LOAD_MSR_ADDRESS, "Exit Load MSRs"); 7028 DPRINTF("\n"); 7029 vmx_dump_vmcs_field(VMCS_ENTRY_LOAD_MSR_ADDRESS, "Entry Load MSRs"); 7030 DPRINTF("\n"); 7031 vmx_dump_vmcs_field(VMCS_EXECUTIVE_VMCS_POINTER, "Exec VMCS Ptr"); 7032 DPRINTF("\n"); 7033 7034 if (has_sec) { 7035 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS, 7036 IA32_VMX_ENABLE_PML, 1)) { 7037 vmx_dump_vmcs_field(VMCS_PML_ADDRESS, "PML Addr"); 7038 DPRINTF("\n"); 7039 } 7040 } 7041 7042 vmx_dump_vmcs_field(VMCS_TSC_OFFSET, "TSC Offset"); 7043 DPRINTF("\n"); 7044 7045 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED_CTLS, 7046 IA32_VMX_USE_TPR_SHADOW, 1)) { 7047 vmx_dump_vmcs_field(VMCS_VIRTUAL_APIC_ADDRESS, 7048 "Virtual APIC Addr"); 7049 DPRINTF("\n"); 7050 } 7051 7052 if (has_sec) { 7053 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS, 7054 IA32_VMX_VIRTUALIZE_APIC, 1)) { 7055 vmx_dump_vmcs_field(VMCS_APIC_ACCESS_ADDRESS, 7056 "APIC Access Addr"); 7057 DPRINTF("\n"); 7058 } 7059 } 7060 7061 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PINBASED_CTLS, 7062 IA32_VMX_PROCESS_POSTED_INTERRUPTS, 1)) { 7063 vmx_dump_vmcs_field(VMCS_POSTED_INTERRUPT_DESC, 7064 "Posted Int Desc Addr"); 7065 DPRINTF("\n"); 7066 } 7067 7068 if (has_sec) { 7069 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS, 7070 IA32_VMX_ENABLE_VM_FUNCTIONS, 1)) { 7071 vmx_dump_vmcs_field(VMCS_VM_FUNCTION_CONTROLS, 7072 "VM Function Controls"); 7073 DPRINTF("\n"); 7074 } 7075 7076 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS, 7077 IA32_VMX_ENABLE_EPT, 1)) { 7078 vmx_dump_vmcs_field(VMCS_GUEST_IA32_EPTP, 7079 "EPT Pointer"); 7080 DPRINTF("\n"); 7081 } 7082 7083 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS, 7084 IA32_VMX_VIRTUAL_INTERRUPT_DELIVERY, 1)) { 7085 vmx_dump_vmcs_field(VMCS_EOI_EXIT_BITMAP_0, 7086 "EOI Exit Bitmap 0"); 7087 DPRINTF("\n"); 7088 vmx_dump_vmcs_field(VMCS_EOI_EXIT_BITMAP_1, 7089 "EOI Exit Bitmap 1"); 7090 DPRINTF("\n"); 7091 vmx_dump_vmcs_field(VMCS_EOI_EXIT_BITMAP_2, 7092 "EOI Exit Bitmap 2"); 7093 DPRINTF("\n"); 7094 vmx_dump_vmcs_field(VMCS_EOI_EXIT_BITMAP_3, 7095 "EOI Exit Bitmap 3"); 7096 DPRINTF("\n"); 7097 } 7098 7099 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS, 7100 IA32_VMX_VMCS_SHADOWING, 1)) { 7101 vmx_dump_vmcs_field(VMCS_VMREAD_BITMAP_ADDRESS, 7102 "VMREAD Bitmap Addr"); 7103 DPRINTF("\n"); 7104 vmx_dump_vmcs_field(VMCS_VMWRITE_BITMAP_ADDRESS, 7105 "VMWRITE Bitmap Addr"); 7106 DPRINTF("\n"); 7107 } 7108 7109 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS, 7110 IA32_VMX_EPT_VIOLATION_VE, 1)) { 7111 vmx_dump_vmcs_field(VMCS_VIRTUALIZATION_EXC_ADDRESS, 7112 "#VE Addr"); 7113 DPRINTF("\n"); 7114 } 7115 7116 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS, 7117 IA32_VMX_ENABLE_XSAVES_XRSTORS, 1)) { 7118 vmx_dump_vmcs_field(VMCS_XSS_EXITING_BITMAP, 7119 "XSS exiting bitmap addr"); 7120 DPRINTF("\n"); 7121 } 7122 7123 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS, 7124 IA32_VMX_ENABLE_ENCLS_EXITING, 1)) { 7125 vmx_dump_vmcs_field(VMCS_ENCLS_EXITING_BITMAP, 7126 "Encls exiting bitmap addr"); 7127 DPRINTF("\n"); 7128 } 7129 7130 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS, 7131 IA32_VMX_ENABLE_TSC_SCALING, 1)) { 7132 vmx_dump_vmcs_field(VMCS_TSC_MULTIPLIER, 7133 "TSC scaling factor"); 7134 DPRINTF("\n"); 7135 } 7136 7137 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS, 7138 IA32_VMX_ENABLE_EPT, 1)) { 7139 vmx_dump_vmcs_field(VMCS_GUEST_PHYSICAL_ADDRESS, 7140 "Guest PA"); 7141 DPRINTF("\n"); 7142 } 7143 } 7144 7145 vmx_dump_vmcs_field(VMCS_LINK_POINTER, "VMCS Link Pointer"); 7146 DPRINTF("\n"); 7147 vmx_dump_vmcs_field(VMCS_GUEST_IA32_DEBUGCTL, "Guest DEBUGCTL"); 7148 DPRINTF("\n"); 7149 7150 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_ENTRY_CTLS, 7151 IA32_VMX_LOAD_IA32_PAT_ON_ENTRY, 1) || 7152 vcpu_vmx_check_cap(vcpu, IA32_VMX_EXIT_CTLS, 7153 IA32_VMX_SAVE_IA32_PAT_ON_EXIT, 1)) { 7154 vmx_dump_vmcs_field(VMCS_GUEST_IA32_PAT, 7155 "Guest PAT"); 7156 DPRINTF("\n"); 7157 } 7158 7159 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_ENTRY_CTLS, 7160 IA32_VMX_LOAD_IA32_EFER_ON_ENTRY, 1) || 7161 vcpu_vmx_check_cap(vcpu, IA32_VMX_EXIT_CTLS, 7162 IA32_VMX_SAVE_IA32_EFER_ON_EXIT, 1)) { 7163 vmx_dump_vmcs_field(VMCS_GUEST_IA32_EFER, 7164 "Guest EFER"); 7165 DPRINTF("\n"); 7166 } 7167 7168 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_ENTRY_CTLS, 7169 IA32_VMX_LOAD_IA32_PERF_GLOBAL_CTRL_ON_ENTRY, 1)) { 7170 vmx_dump_vmcs_field(VMCS_GUEST_IA32_PERF_GBL_CTRL, 7171 "Guest Perf Global Ctrl"); 7172 DPRINTF("\n"); 7173 } 7174 7175 if (has_sec) { 7176 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS, 7177 IA32_VMX_ENABLE_EPT, 1)) { 7178 vmx_dump_vmcs_field(VMCS_GUEST_PDPTE0, "Guest PDPTE0"); 7179 DPRINTF("\n"); 7180 vmx_dump_vmcs_field(VMCS_GUEST_PDPTE1, "Guest PDPTE1"); 7181 DPRINTF("\n"); 7182 vmx_dump_vmcs_field(VMCS_GUEST_PDPTE2, "Guest PDPTE2"); 7183 DPRINTF("\n"); 7184 vmx_dump_vmcs_field(VMCS_GUEST_PDPTE3, "Guest PDPTE3"); 7185 DPRINTF("\n"); 7186 } 7187 } 7188 7189 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_ENTRY_CTLS, 7190 IA32_VMX_LOAD_IA32_BNDCFGS_ON_ENTRY, 1) || 7191 vcpu_vmx_check_cap(vcpu, IA32_VMX_EXIT_CTLS, 7192 IA32_VMX_CLEAR_IA32_BNDCFGS_ON_EXIT, 1)) { 7193 vmx_dump_vmcs_field(VMCS_GUEST_IA32_BNDCFGS, 7194 "Guest BNDCFGS"); 7195 DPRINTF("\n"); 7196 } 7197 7198 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_EXIT_CTLS, 7199 IA32_VMX_LOAD_IA32_PAT_ON_EXIT, 1)) { 7200 vmx_dump_vmcs_field(VMCS_HOST_IA32_PAT, 7201 "Host PAT"); 7202 DPRINTF("\n"); 7203 } 7204 7205 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_EXIT_CTLS, 7206 IA32_VMX_LOAD_IA32_EFER_ON_EXIT, 1)) { 7207 vmx_dump_vmcs_field(VMCS_HOST_IA32_EFER, 7208 "Host EFER"); 7209 DPRINTF("\n"); 7210 } 7211 7212 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_EXIT_CTLS, 7213 IA32_VMX_LOAD_IA32_PERF_GLOBAL_CTRL_ON_EXIT, 1)) { 7214 vmx_dump_vmcs_field(VMCS_HOST_IA32_PERF_GBL_CTRL, 7215 "Host Perf Global Ctrl"); 7216 DPRINTF("\n"); 7217 } 7218 7219 vmx_dump_vmcs_field(VMCS_PINBASED_CTLS, "Pinbased Ctrls"); 7220 vmx_dump_vmcs_field(VMCS_PROCBASED_CTLS, "Procbased Ctrls"); 7221 DPRINTF("\n"); 7222 vmx_dump_vmcs_field(VMCS_EXCEPTION_BITMAP, "Exception Bitmap"); 7223 vmx_dump_vmcs_field(VMCS_PF_ERROR_CODE_MASK, "#PF Err Code Mask"); 7224 DPRINTF("\n"); 7225 vmx_dump_vmcs_field(VMCS_PF_ERROR_CODE_MATCH, "#PF Err Code Match"); 7226 vmx_dump_vmcs_field(VMCS_CR3_TARGET_COUNT, "CR3 Tgt Count"); 7227 DPRINTF("\n"); 7228 vmx_dump_vmcs_field(VMCS_EXIT_CTLS, "Exit Ctrls"); 7229 vmx_dump_vmcs_field(VMCS_EXIT_MSR_STORE_COUNT, "Exit MSR Store Ct"); 7230 DPRINTF("\n"); 7231 vmx_dump_vmcs_field(VMCS_EXIT_MSR_LOAD_COUNT, "Exit MSR Load Ct"); 7232 vmx_dump_vmcs_field(VMCS_ENTRY_CTLS, "Entry Ctrls"); 7233 DPRINTF("\n"); 7234 vmx_dump_vmcs_field(VMCS_ENTRY_MSR_LOAD_COUNT, "Entry MSR Load Ct"); 7235 vmx_dump_vmcs_field(VMCS_ENTRY_INTERRUPTION_INFO, "Entry Int. Info"); 7236 DPRINTF("\n"); 7237 vmx_dump_vmcs_field(VMCS_ENTRY_EXCEPTION_ERROR_CODE, 7238 "Entry Ex. Err Code"); 7239 vmx_dump_vmcs_field(VMCS_ENTRY_INSTRUCTION_LENGTH, "Entry Insn Len"); 7240 DPRINTF("\n"); 7241 7242 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED_CTLS, 7243 IA32_VMX_USE_TPR_SHADOW, 1)) { 7244 vmx_dump_vmcs_field(VMCS_TPR_THRESHOLD, "TPR Threshold"); 7245 DPRINTF("\n"); 7246 } 7247 7248 if (has_sec) { 7249 vmx_dump_vmcs_field(VMCS_PROCBASED2_CTLS, "2ndary Ctrls"); 7250 DPRINTF("\n"); 7251 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS, 7252 IA32_VMX_PAUSE_LOOP_EXITING, 1)) { 7253 vmx_dump_vmcs_field(VMCS_PLE_GAP, "PLE Gap"); 7254 vmx_dump_vmcs_field(VMCS_PLE_WINDOW, "PLE Window"); 7255 } 7256 DPRINTF("\n"); 7257 } 7258 7259 vmx_dump_vmcs_field(VMCS_INSTRUCTION_ERROR, "Insn Error"); 7260 vmx_dump_vmcs_field(VMCS_EXIT_REASON, "Exit Reason"); 7261 DPRINTF("\n"); 7262 7263 vmx_dump_vmcs_field(VMCS_EXIT_INTERRUPTION_INFO, "Exit Int. Info"); 7264 vmx_dump_vmcs_field(VMCS_EXIT_INTERRUPTION_ERR_CODE, 7265 "Exit Int. Err Code"); 7266 DPRINTF("\n"); 7267 7268 vmx_dump_vmcs_field(VMCS_IDT_VECTORING_INFO, "IDT vect info"); 7269 vmx_dump_vmcs_field(VMCS_IDT_VECTORING_ERROR_CODE, 7270 "IDT vect err code"); 7271 DPRINTF("\n"); 7272 7273 vmx_dump_vmcs_field(VMCS_INSTRUCTION_LENGTH, "Insn Len"); 7274 vmx_dump_vmcs_field(VMCS_EXIT_INSTRUCTION_INFO, "Exit Insn Info"); 7275 DPRINTF("\n"); 7276 7277 vmx_dump_vmcs_field(VMCS_GUEST_IA32_ES_LIMIT, "G. ES Lim"); 7278 vmx_dump_vmcs_field(VMCS_GUEST_IA32_CS_LIMIT, "G. CS Lim"); 7279 DPRINTF("\n"); 7280 7281 vmx_dump_vmcs_field(VMCS_GUEST_IA32_SS_LIMIT, "G. SS Lim"); 7282 vmx_dump_vmcs_field(VMCS_GUEST_IA32_DS_LIMIT, "G. DS Lim"); 7283 DPRINTF("\n"); 7284 7285 vmx_dump_vmcs_field(VMCS_GUEST_IA32_FS_LIMIT, "G. FS Lim"); 7286 vmx_dump_vmcs_field(VMCS_GUEST_IA32_GS_LIMIT, "G. GS Lim"); 7287 DPRINTF("\n"); 7288 7289 vmx_dump_vmcs_field(VMCS_GUEST_IA32_LDTR_LIMIT, "G. LDTR Lim"); 7290 vmx_dump_vmcs_field(VMCS_GUEST_IA32_TR_LIMIT, "G. TR Lim"); 7291 DPRINTF("\n"); 7292 7293 vmx_dump_vmcs_field(VMCS_GUEST_IA32_GDTR_LIMIT, "G. GDTR Lim"); 7294 vmx_dump_vmcs_field(VMCS_GUEST_IA32_IDTR_LIMIT, "G. IDTR Lim"); 7295 DPRINTF("\n"); 7296 7297 vmx_dump_vmcs_field(VMCS_GUEST_IA32_ES_AR, "G. ES AR"); 7298 vmx_dump_vmcs_field(VMCS_GUEST_IA32_CS_AR, "G. CS AR"); 7299 DPRINTF("\n"); 7300 7301 vmx_dump_vmcs_field(VMCS_GUEST_IA32_SS_AR, "G. SS AR"); 7302 vmx_dump_vmcs_field(VMCS_GUEST_IA32_DS_AR, "G. DS AR"); 7303 DPRINTF("\n"); 7304 7305 vmx_dump_vmcs_field(VMCS_GUEST_IA32_FS_AR, "G. FS AR"); 7306 vmx_dump_vmcs_field(VMCS_GUEST_IA32_GS_AR, "G. GS AR"); 7307 DPRINTF("\n"); 7308 7309 vmx_dump_vmcs_field(VMCS_GUEST_IA32_LDTR_AR, "G. LDTR AR"); 7310 vmx_dump_vmcs_field(VMCS_GUEST_IA32_TR_AR, "G. TR AR"); 7311 DPRINTF("\n"); 7312 7313 vmx_dump_vmcs_field(VMCS_GUEST_INTERRUPTIBILITY_ST, "G. Int St."); 7314 vmx_dump_vmcs_field(VMCS_GUEST_ACTIVITY_STATE, "G. Act St."); 7315 DPRINTF("\n"); 7316 7317 vmx_dump_vmcs_field(VMCS_GUEST_SMBASE, "G. SMBASE"); 7318 vmx_dump_vmcs_field(VMCS_GUEST_IA32_SYSENTER_CS, "G. SYSENTER CS"); 7319 DPRINTF("\n"); 7320 7321 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PINBASED_CTLS, 7322 IA32_VMX_ACTIVATE_VMX_PREEMPTION_TIMER, 1)) { 7323 vmx_dump_vmcs_field(VMCS_VMX_PREEMPTION_TIMER_VAL, 7324 "VMX Preempt Timer"); 7325 DPRINTF("\n"); 7326 } 7327 7328 vmx_dump_vmcs_field(VMCS_HOST_IA32_SYSENTER_CS, "H. SYSENTER CS"); 7329 DPRINTF("\n"); 7330 7331 vmx_dump_vmcs_field(VMCS_CR0_MASK, "CR0 Mask"); 7332 DPRINTF("\n"); 7333 vmx_dump_vmcs_field(VMCS_CR4_MASK, "CR4 Mask"); 7334 DPRINTF("\n"); 7335 7336 vmx_dump_vmcs_field(VMCS_CR0_READ_SHADOW, "CR0 RD Shadow"); 7337 DPRINTF("\n"); 7338 vmx_dump_vmcs_field(VMCS_CR4_READ_SHADOW, "CR4 RD Shadow"); 7339 DPRINTF("\n"); 7340 7341 /* We assume all CPUs have the same max CR3 target ct */ 7342 cr3_tgt_ct = curcpu()->ci_vmm_cap.vcc_vmx.vmx_cr3_tgt_count; 7343 DPRINTF("Max CR3 target count: 0x%x\n", cr3_tgt_ct); 7344 if (cr3_tgt_ct <= VMX_MAX_CR3_TARGETS) { 7345 for (i = 0 ; i < cr3_tgt_ct; i++) { 7346 vmx_dump_vmcs_field(VMCS_CR3_TARGET_0 + (2 * i), 7347 "CR3 Target"); 7348 DPRINTF("\n"); 7349 } 7350 } else { 7351 DPRINTF("(Bogus CR3 Target Count > %d", VMX_MAX_CR3_TARGETS); 7352 } 7353 7354 vmx_dump_vmcs_field(VMCS_GUEST_EXIT_QUALIFICATION, "G. Exit Qual"); 7355 DPRINTF("\n"); 7356 vmx_dump_vmcs_field(VMCS_IO_RCX, "I/O RCX"); 7357 DPRINTF("\n"); 7358 vmx_dump_vmcs_field(VMCS_IO_RSI, "I/O RSI"); 7359 DPRINTF("\n"); 7360 vmx_dump_vmcs_field(VMCS_IO_RDI, "I/O RDI"); 7361 DPRINTF("\n"); 7362 vmx_dump_vmcs_field(VMCS_IO_RIP, "I/O RIP"); 7363 DPRINTF("\n"); 7364 vmx_dump_vmcs_field(VMCS_GUEST_LINEAR_ADDRESS, "G. Lin Addr"); 7365 DPRINTF("\n"); 7366 vmx_dump_vmcs_field(VMCS_GUEST_IA32_CR0, "G. CR0"); 7367 DPRINTF("\n"); 7368 vmx_dump_vmcs_field(VMCS_GUEST_IA32_CR3, "G. CR3"); 7369 DPRINTF("\n"); 7370 vmx_dump_vmcs_field(VMCS_GUEST_IA32_CR4, "G. CR4"); 7371 DPRINTF("\n"); 7372 vmx_dump_vmcs_field(VMCS_GUEST_IA32_ES_BASE, "G. ES Base"); 7373 DPRINTF("\n"); 7374 vmx_dump_vmcs_field(VMCS_GUEST_IA32_CS_BASE, "G. CS Base"); 7375 DPRINTF("\n"); 7376 vmx_dump_vmcs_field(VMCS_GUEST_IA32_SS_BASE, "G. SS Base"); 7377 DPRINTF("\n"); 7378 vmx_dump_vmcs_field(VMCS_GUEST_IA32_DS_BASE, "G. DS Base"); 7379 DPRINTF("\n"); 7380 vmx_dump_vmcs_field(VMCS_GUEST_IA32_FS_BASE, "G. FS Base"); 7381 DPRINTF("\n"); 7382 vmx_dump_vmcs_field(VMCS_GUEST_IA32_GS_BASE, "G. GS Base"); 7383 DPRINTF("\n"); 7384 vmx_dump_vmcs_field(VMCS_GUEST_IA32_LDTR_BASE, "G. LDTR Base"); 7385 DPRINTF("\n"); 7386 vmx_dump_vmcs_field(VMCS_GUEST_IA32_TR_BASE, "G. TR Base"); 7387 DPRINTF("\n"); 7388 vmx_dump_vmcs_field(VMCS_GUEST_IA32_GDTR_BASE, "G. GDTR Base"); 7389 DPRINTF("\n"); 7390 vmx_dump_vmcs_field(VMCS_GUEST_IA32_IDTR_BASE, "G. IDTR Base"); 7391 DPRINTF("\n"); 7392 vmx_dump_vmcs_field(VMCS_GUEST_IA32_DR7, "G. DR7"); 7393 DPRINTF("\n"); 7394 vmx_dump_vmcs_field(VMCS_GUEST_IA32_RSP, "G. RSP"); 7395 DPRINTF("\n"); 7396 vmx_dump_vmcs_field(VMCS_GUEST_IA32_RIP, "G. RIP"); 7397 DPRINTF("\n"); 7398 vmx_dump_vmcs_field(VMCS_GUEST_IA32_RFLAGS, "G. RFLAGS"); 7399 DPRINTF("\n"); 7400 vmx_dump_vmcs_field(VMCS_GUEST_PENDING_DBG_EXC, "G. Pend Dbg Exc"); 7401 DPRINTF("\n"); 7402 vmx_dump_vmcs_field(VMCS_GUEST_IA32_SYSENTER_ESP, "G. SYSENTER ESP"); 7403 DPRINTF("\n"); 7404 vmx_dump_vmcs_field(VMCS_GUEST_IA32_SYSENTER_EIP, "G. SYSENTER EIP"); 7405 DPRINTF("\n"); 7406 vmx_dump_vmcs_field(VMCS_HOST_IA32_CR0, "H. CR0"); 7407 DPRINTF("\n"); 7408 vmx_dump_vmcs_field(VMCS_HOST_IA32_CR3, "H. CR3"); 7409 DPRINTF("\n"); 7410 vmx_dump_vmcs_field(VMCS_HOST_IA32_CR4, "H. CR4"); 7411 DPRINTF("\n"); 7412 vmx_dump_vmcs_field(VMCS_HOST_IA32_FS_BASE, "H. FS Base"); 7413 DPRINTF("\n"); 7414 vmx_dump_vmcs_field(VMCS_HOST_IA32_GS_BASE, "H. GS Base"); 7415 DPRINTF("\n"); 7416 vmx_dump_vmcs_field(VMCS_HOST_IA32_TR_BASE, "H. TR Base"); 7417 DPRINTF("\n"); 7418 vmx_dump_vmcs_field(VMCS_HOST_IA32_GDTR_BASE, "H. GDTR Base"); 7419 DPRINTF("\n"); 7420 vmx_dump_vmcs_field(VMCS_HOST_IA32_IDTR_BASE, "H. IDTR Base"); 7421 DPRINTF("\n"); 7422 vmx_dump_vmcs_field(VMCS_HOST_IA32_SYSENTER_ESP, "H. SYSENTER ESP"); 7423 DPRINTF("\n"); 7424 vmx_dump_vmcs_field(VMCS_HOST_IA32_SYSENTER_EIP, "H. SYSENTER EIP"); 7425 DPRINTF("\n"); 7426 vmx_dump_vmcs_field(VMCS_HOST_IA32_RSP, "H. RSP"); 7427 DPRINTF("\n"); 7428 vmx_dump_vmcs_field(VMCS_HOST_IA32_RIP, "H. RIP"); 7429 DPRINTF("\n"); 7430 } 7431 7432 /* 7433 * vmx_vcpu_dump_regs 7434 * 7435 * Debug function to print vcpu regs from the current vcpu 7436 * note - vmcs for 'vcpu' must be on this pcpu. 7437 * 7438 * Parameters: 7439 * vcpu - vcpu whose registers should be dumped 7440 */ 7441 void 7442 vmx_vcpu_dump_regs(struct vcpu *vcpu) 7443 { 7444 uint64_t r; 7445 int i; 7446 struct vmx_msr_store *msr_store; 7447 7448 /* XXX reformat this for 32 bit guest as needed */ 7449 DPRINTF("vcpu @ %p in %s mode\n", vcpu, vmm_decode_cpu_mode(vcpu)); 7450 i = vmm_get_guest_cpu_cpl(vcpu); 7451 if (i == -1) 7452 DPRINTF(" CPL=unknown\n"); 7453 else 7454 DPRINTF(" CPL=%d\n", i); 7455 DPRINTF(" rax=0x%016llx rbx=0x%016llx rcx=0x%016llx\n", 7456 vcpu->vc_gueststate.vg_rax, vcpu->vc_gueststate.vg_rbx, 7457 vcpu->vc_gueststate.vg_rcx); 7458 DPRINTF(" rdx=0x%016llx rbp=0x%016llx rdi=0x%016llx\n", 7459 vcpu->vc_gueststate.vg_rdx, vcpu->vc_gueststate.vg_rbp, 7460 vcpu->vc_gueststate.vg_rdi); 7461 DPRINTF(" rsi=0x%016llx r8=0x%016llx r9=0x%016llx\n", 7462 vcpu->vc_gueststate.vg_rsi, vcpu->vc_gueststate.vg_r8, 7463 vcpu->vc_gueststate.vg_r9); 7464 DPRINTF(" r10=0x%016llx r11=0x%016llx r12=0x%016llx\n", 7465 vcpu->vc_gueststate.vg_r10, vcpu->vc_gueststate.vg_r11, 7466 vcpu->vc_gueststate.vg_r12); 7467 DPRINTF(" r13=0x%016llx r14=0x%016llx r15=0x%016llx\n", 7468 vcpu->vc_gueststate.vg_r13, vcpu->vc_gueststate.vg_r14, 7469 vcpu->vc_gueststate.vg_r15); 7470 7471 DPRINTF(" rip=0x%016llx rsp=", vcpu->vc_gueststate.vg_rip); 7472 if (vmread(VMCS_GUEST_IA32_RSP, &r)) 7473 DPRINTF("(error reading)\n"); 7474 else 7475 DPRINTF("0x%016llx\n", r); 7476 7477 DPRINTF(" rflags="); 7478 if (vmread(VMCS_GUEST_IA32_RFLAGS, &r)) 7479 DPRINTF("(error reading)\n"); 7480 else { 7481 DPRINTF("0x%016llx ", r); 7482 vmm_decode_rflags(r); 7483 } 7484 7485 DPRINTF(" cr0="); 7486 if (vmread(VMCS_GUEST_IA32_CR0, &r)) 7487 DPRINTF("(error reading)\n"); 7488 else { 7489 DPRINTF("0x%016llx ", r); 7490 vmm_decode_cr0(r); 7491 } 7492 7493 DPRINTF(" cr2=0x%016llx\n", vcpu->vc_gueststate.vg_cr2); 7494 7495 DPRINTF(" cr3="); 7496 if (vmread(VMCS_GUEST_IA32_CR3, &r)) 7497 DPRINTF("(error reading)\n"); 7498 else { 7499 DPRINTF("0x%016llx ", r); 7500 vmm_decode_cr3(r); 7501 } 7502 7503 DPRINTF(" cr4="); 7504 if (vmread(VMCS_GUEST_IA32_CR4, &r)) 7505 DPRINTF("(error reading)\n"); 7506 else { 7507 DPRINTF("0x%016llx ", r); 7508 vmm_decode_cr4(r); 7509 } 7510 7511 DPRINTF(" --Guest Segment Info--\n"); 7512 7513 DPRINTF(" cs="); 7514 if (vmread(VMCS_GUEST_IA32_CS_SEL, &r)) 7515 DPRINTF("(error reading)"); 7516 else 7517 DPRINTF("0x%04llx rpl=%lld", r, r & 0x3); 7518 7519 DPRINTF(" base="); 7520 if (vmread(VMCS_GUEST_IA32_CS_BASE, &r)) 7521 DPRINTF("(error reading)"); 7522 else 7523 DPRINTF("0x%016llx", r); 7524 7525 DPRINTF(" limit="); 7526 if (vmread(VMCS_GUEST_IA32_CS_LIMIT, &r)) 7527 DPRINTF("(error reading)"); 7528 else 7529 DPRINTF("0x%016llx", r); 7530 7531 DPRINTF(" a/r="); 7532 if (vmread(VMCS_GUEST_IA32_CS_AR, &r)) 7533 DPRINTF("(error reading)\n"); 7534 else { 7535 DPRINTF("0x%04llx\n ", r); 7536 vmm_segment_desc_decode(r); 7537 } 7538 7539 DPRINTF(" ds="); 7540 if (vmread(VMCS_GUEST_IA32_DS_SEL, &r)) 7541 DPRINTF("(error reading)"); 7542 else 7543 DPRINTF("0x%04llx rpl=%lld", r, r & 0x3); 7544 7545 DPRINTF(" base="); 7546 if (vmread(VMCS_GUEST_IA32_DS_BASE, &r)) 7547 DPRINTF("(error reading)"); 7548 else 7549 DPRINTF("0x%016llx", r); 7550 7551 DPRINTF(" limit="); 7552 if (vmread(VMCS_GUEST_IA32_DS_LIMIT, &r)) 7553 DPRINTF("(error reading)"); 7554 else 7555 DPRINTF("0x%016llx", r); 7556 7557 DPRINTF(" a/r="); 7558 if (vmread(VMCS_GUEST_IA32_DS_AR, &r)) 7559 DPRINTF("(error reading)\n"); 7560 else { 7561 DPRINTF("0x%04llx\n ", r); 7562 vmm_segment_desc_decode(r); 7563 } 7564 7565 DPRINTF(" es="); 7566 if (vmread(VMCS_GUEST_IA32_ES_SEL, &r)) 7567 DPRINTF("(error reading)"); 7568 else 7569 DPRINTF("0x%04llx rpl=%lld", r, r & 0x3); 7570 7571 DPRINTF(" base="); 7572 if (vmread(VMCS_GUEST_IA32_ES_BASE, &r)) 7573 DPRINTF("(error reading)"); 7574 else 7575 DPRINTF("0x%016llx", r); 7576 7577 DPRINTF(" limit="); 7578 if (vmread(VMCS_GUEST_IA32_ES_LIMIT, &r)) 7579 DPRINTF("(error reading)"); 7580 else 7581 DPRINTF("0x%016llx", r); 7582 7583 DPRINTF(" a/r="); 7584 if (vmread(VMCS_GUEST_IA32_ES_AR, &r)) 7585 DPRINTF("(error reading)\n"); 7586 else { 7587 DPRINTF("0x%04llx\n ", r); 7588 vmm_segment_desc_decode(r); 7589 } 7590 7591 DPRINTF(" fs="); 7592 if (vmread(VMCS_GUEST_IA32_FS_SEL, &r)) 7593 DPRINTF("(error reading)"); 7594 else 7595 DPRINTF("0x%04llx rpl=%lld", r, r & 0x3); 7596 7597 DPRINTF(" base="); 7598 if (vmread(VMCS_GUEST_IA32_FS_BASE, &r)) 7599 DPRINTF("(error reading)"); 7600 else 7601 DPRINTF("0x%016llx", r); 7602 7603 DPRINTF(" limit="); 7604 if (vmread(VMCS_GUEST_IA32_FS_LIMIT, &r)) 7605 DPRINTF("(error reading)"); 7606 else 7607 DPRINTF("0x%016llx", r); 7608 7609 DPRINTF(" a/r="); 7610 if (vmread(VMCS_GUEST_IA32_FS_AR, &r)) 7611 DPRINTF("(error reading)\n"); 7612 else { 7613 DPRINTF("0x%04llx\n ", r); 7614 vmm_segment_desc_decode(r); 7615 } 7616 7617 DPRINTF(" gs="); 7618 if (vmread(VMCS_GUEST_IA32_GS_SEL, &r)) 7619 DPRINTF("(error reading)"); 7620 else 7621 DPRINTF("0x%04llx rpl=%lld", r, r & 0x3); 7622 7623 DPRINTF(" base="); 7624 if (vmread(VMCS_GUEST_IA32_GS_BASE, &r)) 7625 DPRINTF("(error reading)"); 7626 else 7627 DPRINTF("0x%016llx", r); 7628 7629 DPRINTF(" limit="); 7630 if (vmread(VMCS_GUEST_IA32_GS_LIMIT, &r)) 7631 DPRINTF("(error reading)"); 7632 else 7633 DPRINTF("0x%016llx", r); 7634 7635 DPRINTF(" a/r="); 7636 if (vmread(VMCS_GUEST_IA32_GS_AR, &r)) 7637 DPRINTF("(error reading)\n"); 7638 else { 7639 DPRINTF("0x%04llx\n ", r); 7640 vmm_segment_desc_decode(r); 7641 } 7642 7643 DPRINTF(" ss="); 7644 if (vmread(VMCS_GUEST_IA32_SS_SEL, &r)) 7645 DPRINTF("(error reading)"); 7646 else 7647 DPRINTF("0x%04llx rpl=%lld", r, r & 0x3); 7648 7649 DPRINTF(" base="); 7650 if (vmread(VMCS_GUEST_IA32_SS_BASE, &r)) 7651 DPRINTF("(error reading)"); 7652 else 7653 DPRINTF("0x%016llx", r); 7654 7655 DPRINTF(" limit="); 7656 if (vmread(VMCS_GUEST_IA32_SS_LIMIT, &r)) 7657 DPRINTF("(error reading)"); 7658 else 7659 DPRINTF("0x%016llx", r); 7660 7661 DPRINTF(" a/r="); 7662 if (vmread(VMCS_GUEST_IA32_SS_AR, &r)) 7663 DPRINTF("(error reading)\n"); 7664 else { 7665 DPRINTF("0x%04llx\n ", r); 7666 vmm_segment_desc_decode(r); 7667 } 7668 7669 DPRINTF(" tr="); 7670 if (vmread(VMCS_GUEST_IA32_TR_SEL, &r)) 7671 DPRINTF("(error reading)"); 7672 else 7673 DPRINTF("0x%04llx", r); 7674 7675 DPRINTF(" base="); 7676 if (vmread(VMCS_GUEST_IA32_TR_BASE, &r)) 7677 DPRINTF("(error reading)"); 7678 else 7679 DPRINTF("0x%016llx", r); 7680 7681 DPRINTF(" limit="); 7682 if (vmread(VMCS_GUEST_IA32_TR_LIMIT, &r)) 7683 DPRINTF("(error reading)"); 7684 else 7685 DPRINTF("0x%016llx", r); 7686 7687 DPRINTF(" a/r="); 7688 if (vmread(VMCS_GUEST_IA32_TR_AR, &r)) 7689 DPRINTF("(error reading)\n"); 7690 else { 7691 DPRINTF("0x%04llx\n ", r); 7692 vmm_segment_desc_decode(r); 7693 } 7694 7695 DPRINTF(" gdtr base="); 7696 if (vmread(VMCS_GUEST_IA32_GDTR_BASE, &r)) 7697 DPRINTF("(error reading) "); 7698 else 7699 DPRINTF("0x%016llx", r); 7700 7701 DPRINTF(" limit="); 7702 if (vmread(VMCS_GUEST_IA32_GDTR_LIMIT, &r)) 7703 DPRINTF("(error reading)\n"); 7704 else 7705 DPRINTF("0x%016llx\n", r); 7706 7707 DPRINTF(" idtr base="); 7708 if (vmread(VMCS_GUEST_IA32_IDTR_BASE, &r)) 7709 DPRINTF("(error reading) "); 7710 else 7711 DPRINTF("0x%016llx", r); 7712 7713 DPRINTF(" limit="); 7714 if (vmread(VMCS_GUEST_IA32_IDTR_LIMIT, &r)) 7715 DPRINTF("(error reading)\n"); 7716 else 7717 DPRINTF("0x%016llx\n", r); 7718 7719 DPRINTF(" ldtr="); 7720 if (vmread(VMCS_GUEST_IA32_LDTR_SEL, &r)) 7721 DPRINTF("(error reading)"); 7722 else 7723 DPRINTF("0x%04llx", r); 7724 7725 DPRINTF(" base="); 7726 if (vmread(VMCS_GUEST_IA32_LDTR_BASE, &r)) 7727 DPRINTF("(error reading)"); 7728 else 7729 DPRINTF("0x%016llx", r); 7730 7731 DPRINTF(" limit="); 7732 if (vmread(VMCS_GUEST_IA32_LDTR_LIMIT, &r)) 7733 DPRINTF("(error reading)"); 7734 else 7735 DPRINTF("0x%016llx", r); 7736 7737 DPRINTF(" a/r="); 7738 if (vmread(VMCS_GUEST_IA32_LDTR_AR, &r)) 7739 DPRINTF("(error reading)\n"); 7740 else { 7741 DPRINTF("0x%04llx\n ", r); 7742 vmm_segment_desc_decode(r); 7743 } 7744 7745 DPRINTF(" --Guest MSRs @ 0x%016llx (paddr: 0x%016llx)--\n", 7746 (uint64_t)vcpu->vc_vmx_msr_exit_save_va, 7747 (uint64_t)vcpu->vc_vmx_msr_exit_save_pa); 7748 7749 msr_store = (struct vmx_msr_store *)vcpu->vc_vmx_msr_exit_save_va; 7750 7751 for (i = 0; i < VCPU_REGS_NMSRS; i++) { 7752 DPRINTF(" MSR %d @ %p : 0x%08llx (%s), " 7753 "value=0x%016llx ", 7754 i, &msr_store[i], msr_store[i].vms_index, 7755 msr_name_decode(msr_store[i].vms_index), 7756 msr_store[i].vms_data); 7757 vmm_decode_msr_value(msr_store[i].vms_index, 7758 msr_store[i].vms_data); 7759 } 7760 } 7761 7762 /* 7763 * msr_name_decode 7764 * 7765 * Returns a human-readable name for the MSR supplied in 'msr'. 7766 * 7767 * Parameters: 7768 * msr - The MSR to decode 7769 * 7770 * Return value: 7771 * NULL-terminated character string containing the name of the MSR requested 7772 */ 7773 const char * 7774 msr_name_decode(uint32_t msr) 7775 { 7776 /* 7777 * Add as needed. Also consider adding a decode function when 7778 * adding to this table. 7779 */ 7780 7781 switch (msr) { 7782 case MSR_TSC: return "TSC"; 7783 case MSR_APICBASE: return "APIC base"; 7784 case MSR_IA32_FEATURE_CONTROL: return "IA32 feature control"; 7785 case MSR_PERFCTR0: return "perf counter 0"; 7786 case MSR_PERFCTR1: return "perf counter 1"; 7787 case MSR_TEMPERATURE_TARGET: return "temperature target"; 7788 case MSR_MTRRcap: return "MTRR cap"; 7789 case MSR_PERF_STATUS: return "perf status"; 7790 case MSR_PERF_CTL: return "perf control"; 7791 case MSR_MTRRvarBase: return "MTRR variable base"; 7792 case MSR_MTRRfix64K_00000: return "MTRR fixed 64K"; 7793 case MSR_MTRRfix16K_80000: return "MTRR fixed 16K"; 7794 case MSR_MTRRfix4K_C0000: return "MTRR fixed 4K"; 7795 case MSR_CR_PAT: return "PAT"; 7796 case MSR_MTRRdefType: return "MTRR default type"; 7797 case MSR_EFER: return "EFER"; 7798 case MSR_STAR: return "STAR"; 7799 case MSR_LSTAR: return "LSTAR"; 7800 case MSR_CSTAR: return "CSTAR"; 7801 case MSR_SFMASK: return "SFMASK"; 7802 case MSR_FSBASE: return "FSBASE"; 7803 case MSR_GSBASE: return "GSBASE"; 7804 case MSR_KERNELGSBASE: return "KGSBASE"; 7805 case MSR_MISC_ENABLE: return "Misc Enable"; 7806 default: return "Unknown MSR"; 7807 } 7808 } 7809 7810 /* 7811 * vmm_segment_desc_decode 7812 * 7813 * Debug function to print segment information for supplied descriptor 7814 * 7815 * Parameters: 7816 * val - The A/R bytes for the segment descriptor to decode 7817 */ 7818 void 7819 vmm_segment_desc_decode(uint64_t val) 7820 { 7821 uint16_t ar; 7822 uint8_t g, type, s, dpl, p, dib, l; 7823 uint32_t unusable; 7824 7825 /* Exit early on unusable descriptors */ 7826 unusable = val & 0x10000; 7827 if (unusable) { 7828 DPRINTF("(unusable)\n"); 7829 return; 7830 } 7831 7832 ar = (uint16_t)val; 7833 7834 g = (ar & 0x8000) >> 15; 7835 dib = (ar & 0x4000) >> 14; 7836 l = (ar & 0x2000) >> 13; 7837 p = (ar & 0x80) >> 7; 7838 dpl = (ar & 0x60) >> 5; 7839 s = (ar & 0x10) >> 4; 7840 type = (ar & 0xf); 7841 7842 DPRINTF("granularity=%d dib=%d l(64 bit)=%d present=%d sys=%d ", 7843 g, dib, l, p, s); 7844 7845 DPRINTF("type="); 7846 if (!s) { 7847 switch (type) { 7848 case SDT_SYSLDT: DPRINTF("ldt\n"); break; 7849 case SDT_SYS386TSS: DPRINTF("tss (available)\n"); break; 7850 case SDT_SYS386BSY: DPRINTF("tss (busy)\n"); break; 7851 case SDT_SYS386CGT: DPRINTF("call gate\n"); break; 7852 case SDT_SYS386IGT: DPRINTF("interrupt gate\n"); break; 7853 case SDT_SYS386TGT: DPRINTF("trap gate\n"); break; 7854 /* XXX handle 32 bit segment types by inspecting mode */ 7855 default: DPRINTF("unknown"); 7856 } 7857 } else { 7858 switch (type + 16) { 7859 case SDT_MEMRO: DPRINTF("data, r/o\n"); break; 7860 case SDT_MEMROA: DPRINTF("data, r/o, accessed\n"); break; 7861 case SDT_MEMRW: DPRINTF("data, r/w\n"); break; 7862 case SDT_MEMRWA: DPRINTF("data, r/w, accessed\n"); break; 7863 case SDT_MEMROD: DPRINTF("data, r/o, expand down\n"); break; 7864 case SDT_MEMRODA: DPRINTF("data, r/o, expand down, " 7865 "accessed\n"); 7866 break; 7867 case SDT_MEMRWD: DPRINTF("data, r/w, expand down\n"); break; 7868 case SDT_MEMRWDA: DPRINTF("data, r/w, expand down, " 7869 "accessed\n"); 7870 break; 7871 case SDT_MEME: DPRINTF("code, x only\n"); break; 7872 case SDT_MEMEA: DPRINTF("code, x only, accessed\n"); 7873 case SDT_MEMER: DPRINTF("code, r/x\n"); break; 7874 case SDT_MEMERA: DPRINTF("code, r/x, accessed\n"); break; 7875 case SDT_MEMEC: DPRINTF("code, x only, conforming\n"); break; 7876 case SDT_MEMEAC: DPRINTF("code, x only, conforming, " 7877 "accessed\n"); 7878 break; 7879 case SDT_MEMERC: DPRINTF("code, r/x, conforming\n"); break; 7880 case SDT_MEMERAC: DPRINTF("code, r/x, conforming, accessed\n"); 7881 break; 7882 } 7883 } 7884 } 7885 7886 void 7887 vmm_decode_cr0(uint64_t cr0) 7888 { 7889 struct vmm_reg_debug_info cr0_info[11] = { 7890 { CR0_PG, "PG ", "pg " }, 7891 { CR0_CD, "CD ", "cd " }, 7892 { CR0_NW, "NW ", "nw " }, 7893 { CR0_AM, "AM ", "am " }, 7894 { CR0_WP, "WP ", "wp " }, 7895 { CR0_NE, "NE ", "ne " }, 7896 { CR0_ET, "ET ", "et " }, 7897 { CR0_TS, "TS ", "ts " }, 7898 { CR0_EM, "EM ", "em " }, 7899 { CR0_MP, "MP ", "mp " }, 7900 { CR0_PE, "PE", "pe" } 7901 }; 7902 7903 uint8_t i; 7904 7905 DPRINTF("("); 7906 for (i = 0; i < nitems(cr0_info); i++) 7907 if (cr0 & cr0_info[i].vrdi_bit) 7908 DPRINTF("%s", cr0_info[i].vrdi_present); 7909 else 7910 DPRINTF("%s", cr0_info[i].vrdi_absent); 7911 7912 DPRINTF(")\n"); 7913 } 7914 7915 void 7916 vmm_decode_cr3(uint64_t cr3) 7917 { 7918 struct vmm_reg_debug_info cr3_info[2] = { 7919 { CR3_PWT, "PWT ", "pwt "}, 7920 { CR3_PCD, "PCD", "pcd"} 7921 }; 7922 7923 uint64_t cr4; 7924 uint8_t i; 7925 7926 if (vmread(VMCS_GUEST_IA32_CR4, &cr4)) { 7927 DPRINTF("(error)\n"); 7928 return; 7929 } 7930 7931 /* If CR4.PCIDE = 0, interpret CR3.PWT and CR3.PCD */ 7932 if ((cr4 & CR4_PCIDE) == 0) { 7933 DPRINTF("("); 7934 for (i = 0 ; i < nitems(cr3_info) ; i++) 7935 if (cr3 & cr3_info[i].vrdi_bit) 7936 DPRINTF("%s", cr3_info[i].vrdi_present); 7937 else 7938 DPRINTF("%s", cr3_info[i].vrdi_absent); 7939 7940 DPRINTF(")\n"); 7941 } else { 7942 DPRINTF("(pcid=0x%llx)\n", cr3 & 0xFFF); 7943 } 7944 } 7945 7946 void 7947 vmm_decode_cr4(uint64_t cr4) 7948 { 7949 struct vmm_reg_debug_info cr4_info[19] = { 7950 { CR4_PKE, "PKE ", "pke "}, 7951 { CR4_SMAP, "SMAP ", "smap "}, 7952 { CR4_SMEP, "SMEP ", "smep "}, 7953 { CR4_OSXSAVE, "OSXSAVE ", "osxsave "}, 7954 { CR4_PCIDE, "PCIDE ", "pcide "}, 7955 { CR4_FSGSBASE, "FSGSBASE ", "fsgsbase "}, 7956 { CR4_SMXE, "SMXE ", "smxe "}, 7957 { CR4_VMXE, "VMXE ", "vmxe "}, 7958 { CR4_OSXMMEXCPT, "OSXMMEXCPT ", "osxmmexcpt "}, 7959 { CR4_OSFXSR, "OSFXSR ", "osfxsr "}, 7960 { CR4_PCE, "PCE ", "pce "}, 7961 { CR4_PGE, "PGE ", "pge "}, 7962 { CR4_MCE, "MCE ", "mce "}, 7963 { CR4_PAE, "PAE ", "pae "}, 7964 { CR4_PSE, "PSE ", "pse "}, 7965 { CR4_DE, "DE ", "de "}, 7966 { CR4_TSD, "TSD ", "tsd "}, 7967 { CR4_PVI, "PVI ", "pvi "}, 7968 { CR4_VME, "VME", "vme"} 7969 }; 7970 7971 uint8_t i; 7972 7973 DPRINTF("("); 7974 for (i = 0; i < nitems(cr4_info); i++) 7975 if (cr4 & cr4_info[i].vrdi_bit) 7976 DPRINTF("%s", cr4_info[i].vrdi_present); 7977 else 7978 DPRINTF("%s", cr4_info[i].vrdi_absent); 7979 7980 DPRINTF(")\n"); 7981 } 7982 7983 void 7984 vmm_decode_apicbase_msr_value(uint64_t apicbase) 7985 { 7986 struct vmm_reg_debug_info apicbase_info[3] = { 7987 { APICBASE_BSP, "BSP ", "bsp "}, 7988 { APICBASE_ENABLE_X2APIC, "X2APIC ", "x2apic "}, 7989 { APICBASE_GLOBAL_ENABLE, "GLB_EN", "glb_en"} 7990 }; 7991 7992 uint8_t i; 7993 7994 DPRINTF("("); 7995 for (i = 0; i < nitems(apicbase_info); i++) 7996 if (apicbase & apicbase_info[i].vrdi_bit) 7997 DPRINTF("%s", apicbase_info[i].vrdi_present); 7998 else 7999 DPRINTF("%s", apicbase_info[i].vrdi_absent); 8000 8001 DPRINTF(")\n"); 8002 } 8003 8004 void 8005 vmm_decode_ia32_fc_value(uint64_t fcr) 8006 { 8007 struct vmm_reg_debug_info fcr_info[4] = { 8008 { IA32_FEATURE_CONTROL_LOCK, "LOCK ", "lock "}, 8009 { IA32_FEATURE_CONTROL_SMX_EN, "SMX ", "smx "}, 8010 { IA32_FEATURE_CONTROL_VMX_EN, "VMX ", "vmx "}, 8011 { IA32_FEATURE_CONTROL_SENTER_EN, "SENTER ", "senter "} 8012 }; 8013 8014 uint8_t i; 8015 8016 DPRINTF("("); 8017 for (i = 0; i < nitems(fcr_info); i++) 8018 if (fcr & fcr_info[i].vrdi_bit) 8019 DPRINTF("%s", fcr_info[i].vrdi_present); 8020 else 8021 DPRINTF("%s", fcr_info[i].vrdi_absent); 8022 8023 if (fcr & IA32_FEATURE_CONTROL_SENTER_EN) 8024 DPRINTF(" [SENTER param = 0x%llx]", 8025 (fcr & IA32_FEATURE_CONTROL_SENTER_PARAM_MASK) >> 8); 8026 8027 DPRINTF(")\n"); 8028 } 8029 8030 void 8031 vmm_decode_mtrrcap_value(uint64_t val) 8032 { 8033 struct vmm_reg_debug_info mtrrcap_info[3] = { 8034 { MTRRcap_FIXED, "FIXED ", "fixed "}, 8035 { MTRRcap_WC, "WC ", "wc "}, 8036 { MTRRcap_SMRR, "SMRR ", "smrr "} 8037 }; 8038 8039 uint8_t i; 8040 8041 DPRINTF("("); 8042 for (i = 0; i < nitems(mtrrcap_info); i++) 8043 if (val & mtrrcap_info[i].vrdi_bit) 8044 DPRINTF("%s", mtrrcap_info[i].vrdi_present); 8045 else 8046 DPRINTF("%s", mtrrcap_info[i].vrdi_absent); 8047 8048 if (val & MTRRcap_FIXED) 8049 DPRINTF(" [nr fixed ranges = 0x%llx]", 8050 (val & 0xff)); 8051 8052 DPRINTF(")\n"); 8053 } 8054 8055 void 8056 vmm_decode_perf_status_value(uint64_t val) 8057 { 8058 DPRINTF("(pstate ratio = 0x%llx)\n", (val & 0xffff)); 8059 } 8060 8061 void 8062 vmm_decode_perf_ctl_value(uint64_t val) 8063 { 8064 DPRINTF("(%s ", (val & PERF_CTL_TURBO) ? "TURBO" : "turbo"); 8065 DPRINTF("pstate req = 0x%llx)\n", (val & 0xfffF)); 8066 } 8067 8068 void 8069 vmm_decode_mtrrdeftype_value(uint64_t mtrrdeftype) 8070 { 8071 struct vmm_reg_debug_info mtrrdeftype_info[2] = { 8072 { MTRRdefType_FIXED_ENABLE, "FIXED ", "fixed "}, 8073 { MTRRdefType_ENABLE, "ENABLED ", "enabled "}, 8074 }; 8075 8076 uint8_t i; 8077 int type; 8078 8079 DPRINTF("("); 8080 for (i = 0; i < nitems(mtrrdeftype_info); i++) 8081 if (mtrrdeftype & mtrrdeftype_info[i].vrdi_bit) 8082 DPRINTF("%s", mtrrdeftype_info[i].vrdi_present); 8083 else 8084 DPRINTF("%s", mtrrdeftype_info[i].vrdi_absent); 8085 8086 DPRINTF("type = "); 8087 type = mtrr2mrt(mtrrdeftype & 0xff); 8088 switch (type) { 8089 case MDF_UNCACHEABLE: DPRINTF("UC"); break; 8090 case MDF_WRITECOMBINE: DPRINTF("WC"); break; 8091 case MDF_WRITETHROUGH: DPRINTF("WT"); break; 8092 case MDF_WRITEPROTECT: DPRINTF("RO"); break; 8093 case MDF_WRITEBACK: DPRINTF("WB"); break; 8094 case MDF_UNKNOWN: 8095 default: 8096 DPRINTF("??"); 8097 break; 8098 } 8099 8100 DPRINTF(")\n"); 8101 } 8102 8103 void 8104 vmm_decode_efer_value(uint64_t efer) 8105 { 8106 struct vmm_reg_debug_info efer_info[4] = { 8107 { EFER_SCE, "SCE ", "sce "}, 8108 { EFER_LME, "LME ", "lme "}, 8109 { EFER_LMA, "LMA ", "lma "}, 8110 { EFER_NXE, "NXE", "nxe"}, 8111 }; 8112 8113 uint8_t i; 8114 8115 DPRINTF("("); 8116 for (i = 0; i < nitems(efer_info); i++) 8117 if (efer & efer_info[i].vrdi_bit) 8118 DPRINTF("%s", efer_info[i].vrdi_present); 8119 else 8120 DPRINTF("%s", efer_info[i].vrdi_absent); 8121 8122 DPRINTF(")\n"); 8123 } 8124 8125 void 8126 vmm_decode_msr_value(uint64_t msr, uint64_t val) 8127 { 8128 switch (msr) { 8129 case MSR_APICBASE: vmm_decode_apicbase_msr_value(val); break; 8130 case MSR_IA32_FEATURE_CONTROL: vmm_decode_ia32_fc_value(val); break; 8131 case MSR_MTRRcap: vmm_decode_mtrrcap_value(val); break; 8132 case MSR_PERF_STATUS: vmm_decode_perf_status_value(val); break; 8133 case MSR_PERF_CTL: vmm_decode_perf_ctl_value(val); break; 8134 case MSR_MTRRdefType: vmm_decode_mtrrdeftype_value(val); break; 8135 case MSR_EFER: vmm_decode_efer_value(val); break; 8136 case MSR_MISC_ENABLE: vmm_decode_misc_enable_value(val); break; 8137 default: DPRINTF("\n"); 8138 } 8139 } 8140 8141 void 8142 vmm_decode_rflags(uint64_t rflags) 8143 { 8144 struct vmm_reg_debug_info rflags_info[16] = { 8145 { PSL_C, "CF ", "cf "}, 8146 { PSL_PF, "PF ", "pf "}, 8147 { PSL_AF, "AF ", "af "}, 8148 { PSL_Z, "ZF ", "zf "}, 8149 { PSL_N, "SF ", "sf "}, /* sign flag */ 8150 { PSL_T, "TF ", "tf "}, 8151 { PSL_I, "IF ", "if "}, 8152 { PSL_D, "DF ", "df "}, 8153 { PSL_V, "OF ", "of "}, /* overflow flag */ 8154 { PSL_NT, "NT ", "nt "}, 8155 { PSL_RF, "RF ", "rf "}, 8156 { PSL_VM, "VM ", "vm "}, 8157 { PSL_AC, "AC ", "ac "}, 8158 { PSL_VIF, "VIF ", "vif "}, 8159 { PSL_VIP, "VIP ", "vip "}, 8160 { PSL_ID, "ID ", "id "}, 8161 }; 8162 8163 uint8_t i, iopl; 8164 8165 DPRINTF("("); 8166 for (i = 0; i < nitems(rflags_info); i++) 8167 if (rflags & rflags_info[i].vrdi_bit) 8168 DPRINTF("%s", rflags_info[i].vrdi_present); 8169 else 8170 DPRINTF("%s", rflags_info[i].vrdi_absent); 8171 8172 iopl = (rflags & PSL_IOPL) >> 12; 8173 DPRINTF("IOPL=%d", iopl); 8174 8175 DPRINTF(")\n"); 8176 } 8177 8178 void 8179 vmm_decode_misc_enable_value(uint64_t misc) 8180 { 8181 struct vmm_reg_debug_info misc_info[10] = { 8182 { MISC_ENABLE_FAST_STRINGS, "FSE ", "fse "}, 8183 { MISC_ENABLE_TCC, "TCC ", "tcc "}, 8184 { MISC_ENABLE_PERF_MON_AVAILABLE, "PERF ", "perf "}, 8185 { MISC_ENABLE_BTS_UNAVAILABLE, "BTSU ", "btsu "}, 8186 { MISC_ENABLE_PEBS_UNAVAILABLE, "PEBSU ", "pebsu "}, 8187 { MISC_ENABLE_EIST_ENABLED, "EIST ", "eist "}, 8188 { MISC_ENABLE_ENABLE_MONITOR_FSM, "MFSM ", "mfsm "}, 8189 { MISC_ENABLE_LIMIT_CPUID_MAXVAL, "CMAX ", "cmax "}, 8190 { MISC_ENABLE_xTPR_MESSAGE_DISABLE, "xTPRD ", "xtprd "}, 8191 { MISC_ENABLE_XD_BIT_DISABLE, "NXD", "nxd"}, 8192 }; 8193 8194 uint8_t i; 8195 8196 DPRINTF("("); 8197 for (i = 0; i < nitems(misc_info); i++) 8198 if (misc & misc_info[i].vrdi_bit) 8199 DPRINTF("%s", misc_info[i].vrdi_present); 8200 else 8201 DPRINTF("%s", misc_info[i].vrdi_absent); 8202 8203 DPRINTF(")\n"); 8204 } 8205 8206 const char * 8207 vmm_decode_cpu_mode(struct vcpu *vcpu) 8208 { 8209 int mode = vmm_get_guest_cpu_mode(vcpu); 8210 8211 switch (mode) { 8212 case VMM_CPU_MODE_REAL: return "real"; 8213 case VMM_CPU_MODE_PROT: return "16 bit protected"; 8214 case VMM_CPU_MODE_PROT32: return "32 bit protected"; 8215 case VMM_CPU_MODE_COMPAT: return "compatibility"; 8216 case VMM_CPU_MODE_LONG: return "long"; 8217 default: return "unknown"; 8218 } 8219 } 8220 #endif /* VMM_DEBUG */ 8221