1 /* $OpenBSD: vmm.c,v 1.44 2016/09/03 11:38:08 mlarkin Exp $ */ 2 3 /* 4 * Copyright (c) 2015 Mike Larkin <mlarkin@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 19 #include <sys/param.h> /* nitems */ 20 #include <sys/ioctl.h> 21 #include <sys/queue.h> 22 #include <sys/wait.h> 23 #include <sys/uio.h> 24 #include <sys/socket.h> 25 #include <sys/time.h> 26 #include <sys/mman.h> 27 28 #include <dev/ic/i8253reg.h> 29 #include <dev/isa/isareg.h> 30 #include <dev/pci/pcireg.h> 31 32 #include <machine/param.h> 33 #include <machine/psl.h> 34 #include <machine/specialreg.h> 35 #include <machine/vmmvar.h> 36 37 #include <errno.h> 38 #include <event.h> 39 #include <fcntl.h> 40 #include <imsg.h> 41 #include <limits.h> 42 #include <poll.h> 43 #include <pthread.h> 44 #include <stddef.h> 45 #include <stdio.h> 46 #include <stdlib.h> 47 #include <string.h> 48 #include <unistd.h> 49 #include <util.h> 50 51 #include "vmd.h" 52 #include "vmm.h" 53 #include "loadfile.h" 54 #include "pci.h" 55 #include "virtio.h" 56 #include "proc.h" 57 #include "i8253.h" 58 #include "i8259.h" 59 #include "ns8250.h" 60 #include "mc146818.h" 61 62 io_fn_t ioports_map[MAX_PORTS]; 63 64 void vmm_sighdlr(int, short, void *); 65 int start_client_vmd(void); 66 int opentap(void); 67 int start_vm(struct imsg *, uint32_t *); 68 int terminate_vm(struct vm_terminate_params *); 69 int get_info_vm(struct privsep *, struct imsg *, int); 70 int run_vm(int *, int *, struct vm_create_params *, struct vcpu_reg_state *); 71 void *event_thread(void *); 72 void *vcpu_run_loop(void *); 73 int vcpu_exit(struct vm_run_params *); 74 int vcpu_reset(uint32_t, uint32_t, struct vcpu_reg_state *); 75 void create_memory_map(struct vm_create_params *); 76 int alloc_guest_mem(struct vm_create_params *); 77 int vmm_create_vm(struct vm_create_params *); 78 void init_emulated_hw(struct vm_create_params *, int *, int *); 79 void vcpu_exit_inout(struct vm_run_params *); 80 uint8_t vcpu_exit_pci(struct vm_run_params *); 81 int vmm_dispatch_parent(int, struct privsep_proc *, struct imsg *); 82 void vmm_run(struct privsep *, struct privsep_proc *, void *); 83 int vcpu_pic_intr(uint32_t, uint32_t, uint8_t); 84 85 static struct vm_mem_range *find_gpa_range(struct vm_create_params *, paddr_t, 86 size_t); 87 88 int con_fd; 89 struct vmd_vm *current_vm; 90 91 extern struct vmd *env; 92 93 extern char *__progname; 94 95 pthread_mutex_t threadmutex; 96 pthread_cond_t threadcond; 97 98 pthread_cond_t vcpu_run_cond[VMM_MAX_VCPUS_PER_VM]; 99 pthread_mutex_t vcpu_run_mtx[VMM_MAX_VCPUS_PER_VM]; 100 uint8_t vcpu_hlt[VMM_MAX_VCPUS_PER_VM]; 101 uint8_t vcpu_done[VMM_MAX_VCPUS_PER_VM]; 102 103 static struct privsep_proc procs[] = { 104 { "parent", PROC_PARENT, vmm_dispatch_parent }, 105 }; 106 107 /* 108 * Represents a standard register set for an OS to be booted 109 * as a flat 32 bit address space, before paging is enabled. 110 * 111 * NOT set here are: 112 * RIP 113 * RSP 114 * GDTR BASE 115 * 116 * Specific bootloaders should clone this structure and override 117 * those fields as needed. 118 * 119 * Note - CR3 and various bits in CR0 may be overridden by vmm(4) based on 120 * features of the CPU in use. 121 */ 122 static const struct vcpu_reg_state vcpu_init_flat32 = { 123 .vrs_gprs[VCPU_REGS_RFLAGS] = 0x2, 124 .vrs_gprs[VCPU_REGS_RIP] = 0x0, 125 .vrs_gprs[VCPU_REGS_RSP] = 0x0, 126 .vrs_crs[VCPU_REGS_CR0] = CR0_CD | CR0_NW | CR0_ET | CR0_PE | CR0_PG, 127 .vrs_crs[VCPU_REGS_CR3] = PML4_PAGE, 128 .vrs_sregs[VCPU_REGS_CS] = { 0x8, 0xFFFFFFFF, 0xC09F, 0x0}, 129 .vrs_sregs[VCPU_REGS_DS] = { 0x10, 0xFFFFFFFF, 0xC093, 0x0}, 130 .vrs_sregs[VCPU_REGS_ES] = { 0x10, 0xFFFFFFFF, 0xC093, 0x0}, 131 .vrs_sregs[VCPU_REGS_FS] = { 0x10, 0xFFFFFFFF, 0xC093, 0x0}, 132 .vrs_sregs[VCPU_REGS_GS] = { 0x10, 0xFFFFFFFF, 0xC093, 0x0}, 133 .vrs_sregs[VCPU_REGS_SS] = { 0x10, 0xFFFFFFFF, 0xC093, 0x0}, 134 .vrs_gdtr = { 0x0, 0xFFFF, 0x0, 0x0}, 135 .vrs_idtr = { 0x0, 0xFFFF, 0x0, 0x0}, 136 .vrs_sregs[VCPU_REGS_LDTR] = { 0x0, 0xFFFF, 0x0082, 0x0}, 137 .vrs_sregs[VCPU_REGS_TR] = { 0x0, 0xFFFF, 0x008B, 0x0}, 138 }; 139 140 pid_t 141 vmm(struct privsep *ps, struct privsep_proc *p) 142 { 143 return (proc_run(ps, p, procs, nitems(procs), vmm_run, NULL)); 144 } 145 146 void 147 vmm_run(struct privsep *ps, struct privsep_proc *p, void *arg) 148 { 149 if (config_init(ps->ps_env) == -1) 150 fatal("failed to initialize configuration"); 151 152 signal_del(&ps->ps_evsigchld); 153 signal_set(&ps->ps_evsigchld, SIGCHLD, vmm_sighdlr, ps); 154 signal_add(&ps->ps_evsigchld, NULL); 155 156 #if 0 157 /* 158 * pledge in the vmm process: 159 * stdio - for malloc and basic I/O including events. 160 * vmm - for the vmm ioctls and operations. 161 * proc - for forking and maitaining vms. 162 * recvfd - for disks, interfaces and other fds. 163 */ 164 /* XXX'ed pledge to hide it from grep as long as it's disabled */ 165 if (XXX("stdio vmm recvfd proc", NULL) == -1) 166 fatal("pledge"); 167 #endif 168 169 /* Get and terminate all running VMs */ 170 get_info_vm(ps, NULL, 1); 171 } 172 173 int 174 vmm_dispatch_parent(int fd, struct privsep_proc *p, struct imsg *imsg) 175 { 176 struct privsep *ps = p->p_ps; 177 int res = 0, cmd = 0; 178 struct vm_create_params vcp; 179 struct vm_terminate_params vtp; 180 struct vmop_result vmr; 181 uint32_t id = 0; 182 struct vmd_vm *vm; 183 184 switch (imsg->hdr.type) { 185 case IMSG_VMDOP_START_VM_REQUEST: 186 IMSG_SIZE_CHECK(imsg, &vcp); 187 memcpy(&vcp, imsg->data, sizeof(vcp)); 188 res = config_getvm(ps, &vcp, imsg->fd, imsg->hdr.peerid); 189 if (res == -1) { 190 res = errno; 191 cmd = IMSG_VMDOP_START_VM_RESPONSE; 192 } 193 break; 194 case IMSG_VMDOP_START_VM_DISK: 195 res = config_getdisk(ps, imsg); 196 if (res == -1) { 197 res = errno; 198 cmd = IMSG_VMDOP_START_VM_RESPONSE; 199 } 200 break; 201 case IMSG_VMDOP_START_VM_IF: 202 res = config_getif(ps, imsg); 203 if (res == -1) { 204 res = errno; 205 cmd = IMSG_VMDOP_START_VM_RESPONSE; 206 } 207 break; 208 case IMSG_VMDOP_START_VM_END: 209 res = start_vm(imsg, &id); 210 cmd = IMSG_VMDOP_START_VM_RESPONSE; 211 break; 212 case IMSG_VMDOP_TERMINATE_VM_REQUEST: 213 IMSG_SIZE_CHECK(imsg, &vtp); 214 memcpy(&vtp, imsg->data, sizeof(vtp)); 215 id = vtp.vtp_vm_id; 216 res = terminate_vm(&vtp); 217 cmd = IMSG_VMDOP_TERMINATE_VM_RESPONSE; 218 if (res == 0) { 219 /* Remove local reference */ 220 vm = vm_getbyid(id); 221 vm_remove(vm); 222 } 223 break; 224 case IMSG_VMDOP_GET_INFO_VM_REQUEST: 225 res = get_info_vm(ps, imsg, 0); 226 cmd = IMSG_VMDOP_GET_INFO_VM_END_DATA; 227 break; 228 case IMSG_CTL_RESET: 229 config_getreset(env, imsg); 230 break; 231 default: 232 return (-1); 233 } 234 235 switch (cmd) { 236 case 0: 237 break; 238 case IMSG_VMDOP_START_VM_RESPONSE: 239 if (res != 0) { 240 vm = vm_getbyvmid(imsg->hdr.peerid); 241 vm_remove(vm); 242 } 243 case IMSG_VMDOP_TERMINATE_VM_RESPONSE: 244 memset(&vmr, 0, sizeof(vmr)); 245 vmr.vmr_result = res; 246 vmr.vmr_id = id; 247 if (proc_compose_imsg(ps, PROC_PARENT, -1, cmd, 248 imsg->hdr.peerid, -1, &vmr, sizeof(vmr)) == -1) 249 return (-1); 250 break; 251 default: 252 if (proc_compose_imsg(ps, PROC_PARENT, -1, cmd, 253 imsg->hdr.peerid, -1, &res, sizeof(res)) == -1) 254 return (-1); 255 break; 256 } 257 258 return (0); 259 } 260 261 void 262 vmm_sighdlr(int sig, short event, void *arg) 263 { 264 struct privsep *ps = arg; 265 int status; 266 uint32_t vmid; 267 pid_t pid; 268 struct vmop_result vmr; 269 struct vmd_vm *vm; 270 struct vm_terminate_params vtp; 271 272 switch (sig) { 273 case SIGCHLD: 274 do { 275 pid = waitpid(-1, &status, WNOHANG); 276 if (pid <= 0) 277 continue; 278 279 if (WIFEXITED(status) || WIFSIGNALED(status)) { 280 vm = vm_getbypid(pid); 281 if (vm == NULL) { 282 /* 283 * If the VM is gone already, it 284 * got terminated via a 285 * IMSG_VMDOP_TERMINATE_VM_REQUEST. 286 */ 287 continue; 288 } 289 290 vmid = vm->vm_params.vcp_id; 291 vtp.vtp_vm_id = vmid; 292 if (terminate_vm(&vtp) == 0) { 293 memset(&vmr, 0, sizeof(vmr)); 294 vmr.vmr_result = 0; 295 vmr.vmr_id = vmid; 296 vm_remove(vm); 297 if (proc_compose_imsg(ps, PROC_PARENT, 298 -1, IMSG_VMDOP_TERMINATE_VM_EVENT, 299 0, -1, &vmr, sizeof(vmr)) == -1) 300 log_warnx("could not signal " 301 "termination of VM %u to " 302 "parent", vmid); 303 } else 304 log_warnx("could not terminate VM %u", 305 vmid); 306 } else 307 fatalx("unexpected cause of SIGCHLD"); 308 } while (pid > 0 || (pid == -1 && errno == EINTR)); 309 break; 310 default: 311 fatalx("unexpected signal"); 312 } 313 } 314 315 /* 316 * vcpu_reset 317 * 318 * Requests vmm(4) to reset the VCPUs in the indicated VM to 319 * the register state provided 320 * 321 * Parameters 322 * vmid: VM ID to reset 323 * vcpu_id: VCPU ID to reset 324 * vrs: the register state to initialize 325 * 326 * Return values: 327 * 0: success 328 * !0 : ioctl to vmm(4) failed (eg, ENOENT if the supplied VM ID is not 329 * valid) 330 */ 331 int 332 vcpu_reset(uint32_t vmid, uint32_t vcpu_id, struct vcpu_reg_state *vrs) 333 { 334 struct vm_resetcpu_params vrp; 335 336 memset(&vrp, 0, sizeof(vrp)); 337 vrp.vrp_vm_id = vmid; 338 vrp.vrp_vcpu_id = vcpu_id; 339 memcpy(&vrp.vrp_init_state, vrs, sizeof(struct vcpu_reg_state)); 340 341 log_debug("%s: resetting vcpu %d for vm %d", __func__, vcpu_id, vmid); 342 343 if (ioctl(env->vmd_fd, VMM_IOC_RESETCPU, &vrp) < 0) 344 return (errno); 345 346 return (0); 347 } 348 349 /* 350 * terminate_vm 351 * 352 * Requests vmm(4) to terminate the VM whose ID is provided in the 353 * supplied vm_terminate_params structure (vtp->vtp_vm_id) 354 * 355 * Parameters 356 * vtp: vm_create_params struct containing the ID of the VM to terminate 357 * 358 * Return values: 359 * 0: success 360 * !0 : ioctl to vmm(4) failed (eg, ENOENT if the supplied VM is not 361 * valid) 362 */ 363 int 364 terminate_vm(struct vm_terminate_params *vtp) 365 { 366 if (ioctl(env->vmd_fd, VMM_IOC_TERM, vtp) < 0) 367 return (errno); 368 369 return (0); 370 } 371 372 /* 373 * opentap 374 * 375 * Opens the next available tap device, up to MAX_TAP. 376 * 377 * Returns a file descriptor to the tap node opened, or -1 if no tap 378 * devices were available. 379 */ 380 int 381 opentap(void) 382 { 383 int i, fd; 384 char path[PATH_MAX]; 385 386 for (i = 0; i < MAX_TAP; i++) { 387 snprintf(path, PATH_MAX, "/dev/tap%d", i); 388 fd = open(path, O_RDWR | O_NONBLOCK); 389 if (fd != -1) 390 return (fd); 391 } 392 393 return (-1); 394 } 395 396 /* 397 * start_vm 398 * 399 * Starts a new VM with the creation parameters supplied (in the incoming 400 * imsg->data field). This function performs a basic sanity check on the 401 * incoming parameters and then performs the following steps to complete 402 * the creation of the VM: 403 * 404 * 1. opens the VM disk image files specified in the VM creation parameters 405 * 2. opens the specified VM kernel 406 * 3. creates a VM console tty pair using openpty 407 * 4. forks, passing the file descriptors opened in steps 1-3 to the child 408 * vmd responsible for dropping privilege and running the VM's VCPU 409 * loops. 410 * 411 * Parameters: 412 * imsg: The incoming imsg body whose 'data' field is a vm_create_params 413 * struct containing the VM creation parameters. 414 * id: Returns the VM id as reported by the kernel. 415 * 416 * Return values: 417 * 0: success 418 * !0 : failure - typically an errno indicating the source of the failure 419 */ 420 int 421 start_vm(struct imsg *imsg, uint32_t *id) 422 { 423 struct vm_create_params *vcp; 424 struct vmd_vm *vm; 425 size_t i; 426 int ret = EINVAL; 427 int fds[2]; 428 struct vcpu_reg_state vrs; 429 430 if ((vm = vm_getbyvmid(imsg->hdr.peerid)) == NULL) { 431 log_warnx("%s: can't find vm", __func__); 432 ret = ENOENT; 433 goto err; 434 } 435 vcp = &vm->vm_params; 436 437 if ((vm->vm_tty = imsg->fd) == -1) { 438 log_warnx("%s: can't get tty", __func__); 439 goto err; 440 } 441 442 if (socketpair(AF_UNIX, SOCK_STREAM, PF_UNSPEC, fds) == -1) 443 fatal("socketpair"); 444 445 /* Start child vmd for this VM (fork, chroot, drop privs) */ 446 ret = start_client_vmd(); 447 448 /* Start child failed? - cleanup and leave */ 449 if (ret == -1) { 450 log_warnx("%s: start child failed", __func__); 451 ret = EIO; 452 goto err; 453 } 454 455 if (ret > 0) { 456 /* Parent */ 457 vm->vm_pid = ret; 458 459 for (i = 0 ; i < vcp->vcp_ndisks; i++) { 460 close(vm->vm_disks[i]); 461 vm->vm_disks[i] = -1; 462 } 463 464 for (i = 0 ; i < vcp->vcp_nnics; i++) { 465 close(vm->vm_ifs[i]); 466 vm->vm_ifs[i] = -1; 467 } 468 469 close(vm->vm_kernel); 470 vm->vm_kernel = -1; 471 472 close(vm->vm_tty); 473 vm->vm_tty = -1; 474 475 /* read back the kernel-generated vm id from the child */ 476 close(fds[1]); 477 if (read(fds[0], &vcp->vcp_id, sizeof(vcp->vcp_id)) != 478 sizeof(vcp->vcp_id)) 479 fatal("read vcp id"); 480 close(fds[0]); 481 482 if (vcp->vcp_id == 0) 483 goto err; 484 485 *id = vcp->vcp_id; 486 487 return (0); 488 } else { 489 /* Child */ 490 setproctitle("%s", vcp->vcp_name); 491 log_procinit(vcp->vcp_name); 492 493 create_memory_map(vcp); 494 ret = alloc_guest_mem(vcp); 495 if (ret) { 496 errno = ret; 497 fatal("could not allocate guest memory - exiting"); 498 } 499 500 ret = vmm_create_vm(vcp); 501 current_vm = vm; 502 503 /* send back the kernel-generated vm id (0 on error) */ 504 close(fds[0]); 505 if (write(fds[1], &vcp->vcp_id, sizeof(vcp->vcp_id)) != 506 sizeof(vcp->vcp_id)) 507 fatal("write vcp id"); 508 close(fds[1]); 509 510 if (ret) { 511 errno = ret; 512 fatal("create vmm ioctl failed - exiting"); 513 } 514 515 #if 0 516 /* 517 * pledge in the vm processes: 518 * stdio - for malloc and basic I/O including events. 519 * vmm - for the vmm ioctls and operations. 520 */ 521 if (XXX("stdio vmm", NULL) == -1) 522 fatal("pledge"); 523 #endif 524 525 /* 526 * Set up default "flat 32 bit" register state - RIP, 527 * RSP, and GDT info will be set in bootloader 528 */ 529 memcpy(&vrs, &vcpu_init_flat32, sizeof(struct vcpu_reg_state)); 530 531 /* Load kernel image */ 532 ret = loadelf_main(vm->vm_kernel, vcp, &vrs); 533 if (ret) { 534 errno = ret; 535 fatal("failed to load kernel - exiting"); 536 } 537 538 close(vm->vm_kernel); 539 540 con_fd = vm->vm_tty; 541 if (fcntl(con_fd, F_SETFL, O_NONBLOCK) == -1) 542 fatal("failed to set nonblocking mode on console"); 543 544 /* Execute the vcpu run loop(s) for this VM */ 545 ret = run_vm(vm->vm_disks, vm->vm_ifs, vcp, &vrs); 546 547 _exit(ret != 0); 548 } 549 550 return (0); 551 552 err: 553 vm_remove(vm); 554 555 return (ret); 556 } 557 558 /* 559 * get_info_vm 560 * 561 * Returns a list of VMs known to vmm(4). 562 * 563 * Parameters: 564 * ps: the privsep context. 565 * imsg: the received imsg including the peer id. 566 * terminate: terminate the listed vm. 567 * 568 * Return values: 569 * 0: success 570 * !0 : failure (eg, ENOMEM, EIO or another error code from vmm(4) ioctl) 571 */ 572 int 573 get_info_vm(struct privsep *ps, struct imsg *imsg, int terminate) 574 { 575 int ret; 576 size_t ct, i; 577 struct vm_info_params vip; 578 struct vm_info_result *info; 579 struct vm_terminate_params vtp; 580 struct vmop_info_result vir; 581 582 /* 583 * We issue the VMM_IOC_INFO ioctl twice, once with an input 584 * buffer size of 0, which results in vmm(4) returning the 585 * number of bytes required back to us in vip.vip_size, 586 * and then we call it again after malloc'ing the required 587 * number of bytes. 588 * 589 * It is possible that we could fail a second time (eg, if 590 * another VM was created in the instant between the two 591 * ioctls, but in that case the caller can just try again 592 * as vmm(4) will return a zero-sized list in that case. 593 */ 594 vip.vip_size = 0; 595 info = NULL; 596 ret = 0; 597 memset(&vir, 0, sizeof(vir)); 598 599 /* First ioctl to see how many bytes needed (vip.vip_size) */ 600 if (ioctl(env->vmd_fd, VMM_IOC_INFO, &vip) < 0) 601 return (errno); 602 603 if (vip.vip_info_ct != 0) 604 return (EIO); 605 606 info = malloc(vip.vip_size); 607 if (info == NULL) 608 return (ENOMEM); 609 610 /* Second ioctl to get the actual list */ 611 vip.vip_info = info; 612 if (ioctl(env->vmd_fd, VMM_IOC_INFO, &vip) < 0) { 613 ret = errno; 614 free(info); 615 return (ret); 616 } 617 618 /* Return info */ 619 ct = vip.vip_size / sizeof(struct vm_info_result); 620 for (i = 0; i < ct; i++) { 621 if (terminate) { 622 vtp.vtp_vm_id = info[i].vir_id; 623 if ((ret = terminate_vm(&vtp)) != 0) 624 return (ret); 625 log_debug("%s: terminated VM %s (id %d)", __func__, 626 info[i].vir_name, info[i].vir_id); 627 continue; 628 } 629 memcpy(&vir.vir_info, &info[i], sizeof(vir.vir_info)); 630 if (proc_compose_imsg(ps, PROC_PARENT, -1, 631 IMSG_VMDOP_GET_INFO_VM_DATA, imsg->hdr.peerid, -1, 632 &vir, sizeof(vir)) == -1) 633 return (EIO); 634 } 635 free(info); 636 return (0); 637 } 638 639 640 /* 641 * start_client_vmd 642 * 643 * forks a copy of the parent vmd, chroots to VMD_USER's home, drops 644 * privileges (changes to user VMD_USER), and returns. 645 * Should the fork operation succeed, but later chroot/privsep 646 * fail, the child exits. 647 * 648 * Return values (returns to both child and parent on success): 649 * -1 : failure 650 * 0: return to child vmd returns 0 651 * !0 : return to parent vmd returns the child's pid 652 */ 653 int 654 start_client_vmd(void) 655 { 656 int child_pid; 657 658 child_pid = fork(); 659 if (child_pid < 0) 660 return (-1); 661 662 if (!child_pid) { 663 /* child, already running without privileges */ 664 return (0); 665 } 666 667 /* Parent */ 668 return (child_pid); 669 } 670 671 /* 672 * create_memory_map 673 * 674 * Sets up the guest physical memory ranges that the VM can access. 675 * 676 * Return values: 677 * nothing 678 */ 679 void 680 create_memory_map(struct vm_create_params *vcp) 681 { 682 size_t len, mem_bytes, mem_mb; 683 684 mem_mb = vcp->vcp_memranges[0].vmr_size; 685 vcp->vcp_nmemranges = 0; 686 if (mem_mb < 1 || mem_mb > VMM_MAX_VM_MEM_SIZE) 687 return; 688 689 mem_bytes = mem_mb * 1024 * 1024; 690 691 /* First memory region: 0 - LOWMEM_KB (DOS low mem) */ 692 len = LOWMEM_KB * 1024; 693 vcp->vcp_memranges[0].vmr_gpa = 0x0; 694 vcp->vcp_memranges[0].vmr_size = len; 695 mem_bytes -= len; 696 697 /* 698 * Second memory region: LOWMEM_KB - 1MB. 699 * 700 * N.B. - Normally ROMs or parts of video RAM are mapped here. 701 * We have to add this region, because some systems 702 * unconditionally write to 0xb8000 (VGA RAM), and 703 * we need to make sure that vmm(4) permits accesses 704 * to it. So allocate guest memory for it. 705 */ 706 len = 0x100000 - LOWMEM_KB * 1024; 707 vcp->vcp_memranges[1].vmr_gpa = LOWMEM_KB * 1024; 708 vcp->vcp_memranges[1].vmr_size = len; 709 mem_bytes -= len; 710 711 /* Make sure that we do not place physical memory into MMIO ranges. */ 712 if (mem_bytes > VMM_PCI_MMIO_BAR_BASE - 0x100000) 713 len = VMM_PCI_MMIO_BAR_BASE - 0x100000; 714 else 715 len = mem_bytes; 716 717 /* Third memory region: 1MB - (1MB + len) */ 718 vcp->vcp_memranges[2].vmr_gpa = 0x100000; 719 vcp->vcp_memranges[2].vmr_size = len; 720 mem_bytes -= len; 721 722 if (mem_bytes > 0) { 723 /* Fourth memory region for the remaining memory (if any) */ 724 vcp->vcp_memranges[3].vmr_gpa = VMM_PCI_MMIO_BAR_END + 1; 725 vcp->vcp_memranges[3].vmr_size = mem_bytes; 726 vcp->vcp_nmemranges = 4; 727 } else 728 vcp->vcp_nmemranges = 3; 729 } 730 731 /* 732 * alloc_guest_mem 733 * 734 * Allocates memory for the guest. 735 * Instead of doing a single allocation with one mmap(), we allocate memory 736 * separately for every range for the following reasons: 737 * - ASLR for the individual ranges 738 * - to reduce memory consumption in the UVM subsystem: if vmm(4) had to 739 * map the single mmap'd userspace memory to the individual guest physical 740 * memory ranges, the underlying amap of the single mmap'd range would have 741 * to allocate per-page reference counters. The reason is that the 742 * individual guest physical ranges would reference the single mmap'd region 743 * only partially. However, if every guest physical range has its own 744 * corresponding mmap'd userspace allocation, there are no partial 745 * references: every guest physical range fully references an mmap'd 746 * range => no per-page reference counters have to be allocated. 747 * 748 * Return values: 749 * 0: success 750 * !0: failure - errno indicating the source of the failure 751 */ 752 int 753 alloc_guest_mem(struct vm_create_params *vcp) 754 { 755 void *p; 756 int ret; 757 size_t i, j; 758 struct vm_mem_range *vmr; 759 760 for (i = 0; i < vcp->vcp_nmemranges; i++) { 761 vmr = &vcp->vcp_memranges[i]; 762 p = mmap(NULL, vmr->vmr_size, PROT_READ | PROT_WRITE, 763 MAP_PRIVATE | MAP_ANON, -1, 0); 764 if (p == MAP_FAILED) { 765 ret = errno; 766 for (j = 0; j < i; j++) { 767 vmr = &vcp->vcp_memranges[j]; 768 munmap((void *)vmr->vmr_va, vmr->vmr_size); 769 } 770 771 return (ret); 772 } 773 774 vmr->vmr_va = (vaddr_t)p; 775 } 776 777 return (0); 778 } 779 780 /* 781 * vmm_create_vm 782 * 783 * Requests vmm(4) to create a new VM using the supplied creation 784 * parameters. This operation results in the creation of the in-kernel 785 * structures for the VM, but does not start the VM's vcpu(s). 786 * 787 * Parameters: 788 * vcp: vm_create_params struct containing the VM's desired creation 789 * configuration 790 * 791 * Return values: 792 * 0: success 793 * !0 : ioctl to vmm(4) failed 794 */ 795 int 796 vmm_create_vm(struct vm_create_params *vcp) 797 { 798 /* Sanity check arguments */ 799 if (vcp->vcp_ncpus > VMM_MAX_VCPUS_PER_VM) 800 return (EINVAL); 801 802 if (vcp->vcp_nmemranges == 0 || 803 vcp->vcp_nmemranges > VMM_MAX_MEM_RANGES) 804 return (EINVAL); 805 806 if (vcp->vcp_ndisks > VMM_MAX_DISKS_PER_VM) 807 return (EINVAL); 808 809 if (vcp->vcp_nnics > VMM_MAX_NICS_PER_VM) 810 return (EINVAL); 811 812 if (ioctl(env->vmd_fd, VMM_IOC_CREATE, vcp) < 0) 813 return (errno); 814 815 return (0); 816 } 817 818 /* 819 * init_emulated_hw 820 * 821 * Initializes the userspace hardware emulation 822 */ 823 void 824 init_emulated_hw(struct vm_create_params *vcp, int *child_disks, 825 int *child_taps) 826 { 827 int i; 828 829 /* Reset the IO port map */ 830 memset(&ioports_map, 0, sizeof(io_fn_t) * MAX_PORTS); 831 832 /* Init i8253 PIT */ 833 i8253_init(vcp->vcp_id); 834 ioports_map[TIMER_CTRL] = vcpu_exit_i8253; 835 ioports_map[TIMER_BASE + TIMER_CNTR0] = vcpu_exit_i8253; 836 ioports_map[TIMER_BASE + TIMER_CNTR1] = vcpu_exit_i8253; 837 ioports_map[TIMER_BASE + TIMER_CNTR2] = vcpu_exit_i8253; 838 839 /* Init mc146818 RTC */ 840 mc146818_init(vcp->vcp_id); 841 ioports_map[IO_RTC] = vcpu_exit_mc146818; 842 ioports_map[IO_RTC + 1] = vcpu_exit_mc146818; 843 844 /* Init master and slave PICs */ 845 i8259_init(); 846 ioports_map[IO_ICU1] = vcpu_exit_i8259; 847 ioports_map[IO_ICU1 + 1] = vcpu_exit_i8259; 848 ioports_map[IO_ICU2] = vcpu_exit_i8259; 849 ioports_map[IO_ICU2 + 1] = vcpu_exit_i8259; 850 851 /* Init ns8250 UART */ 852 ns8250_init(con_fd, vcp->vcp_id); 853 for (i = COM1_DATA; i <= COM1_SCR; i++) 854 ioports_map[i] = vcpu_exit_com; 855 856 /* Initialize PCI */ 857 for (i = VMM_PCI_IO_BAR_BASE; i <= VMM_PCI_IO_BAR_END; i++) 858 ioports_map[i] = vcpu_exit_pci; 859 860 ioports_map[PCI_MODE1_ADDRESS_REG] = vcpu_exit_pci; 861 ioports_map[PCI_MODE1_DATA_REG] = vcpu_exit_pci; 862 pci_init(); 863 864 /* Initialize virtio devices */ 865 virtio_init(vcp, child_disks, child_taps); 866 } 867 868 /* 869 * run_vm 870 * 871 * Runs the VM whose creation parameters are specified in vcp 872 * 873 * Parameters: 874 * child_disks: previously-opened child VM disk file file descriptors 875 * child_taps: previously-opened child tap file descriptors 876 * vcp: vm_create_params struct containing the VM's desired creation 877 * configuration 878 * vrs: VCPU register state to initialize 879 * 880 * Return values: 881 * 0: the VM exited normally 882 * !0 : the VM exited abnormally or failed to start 883 */ 884 int 885 run_vm(int *child_disks, int *child_taps, struct vm_create_params *vcp, 886 struct vcpu_reg_state *vrs) 887 { 888 uint8_t evdone = 0; 889 size_t i; 890 int ret; 891 pthread_t *tid, evtid; 892 struct vm_run_params **vrp; 893 void *exit_status; 894 895 if (vcp == NULL) 896 return (EINVAL); 897 898 if (child_disks == NULL && vcp->vcp_ndisks != 0) 899 return (EINVAL); 900 901 if (child_taps == NULL && vcp->vcp_nnics != 0) 902 return (EINVAL); 903 904 if (vcp->vcp_ncpus > VMM_MAX_VCPUS_PER_VM) 905 return (EINVAL); 906 907 if (vcp->vcp_ndisks > VMM_MAX_DISKS_PER_VM) 908 return (EINVAL); 909 910 if (vcp->vcp_nnics > VMM_MAX_NICS_PER_VM) 911 return (EINVAL); 912 913 if (vcp->vcp_nmemranges == 0 || 914 vcp->vcp_nmemranges > VMM_MAX_MEM_RANGES) 915 return (EINVAL); 916 917 event_init(); 918 919 tid = calloc(vcp->vcp_ncpus, sizeof(pthread_t)); 920 vrp = calloc(vcp->vcp_ncpus, sizeof(struct vm_run_params *)); 921 if (tid == NULL || vrp == NULL) { 922 log_warn("%s: memory allocation error - exiting.", 923 __progname); 924 return (ENOMEM); 925 } 926 927 log_debug("%s: initializing hardware for vm %s", __func__, 928 vcp->vcp_name); 929 930 init_emulated_hw(vcp, child_disks, child_taps); 931 932 ret = pthread_mutex_init(&threadmutex, NULL); 933 if (ret) { 934 log_warn("%s: could not initialize thread state mutex", 935 __func__); 936 return (ret); 937 } 938 ret = pthread_cond_init(&threadcond, NULL); 939 if (ret) { 940 log_warn("%s: could not initialize thread state " 941 "condition variable", __func__); 942 return (ret); 943 } 944 945 mutex_lock(&threadmutex); 946 947 log_debug("%s: starting vcpu threads for vm %s", __func__, 948 vcp->vcp_name); 949 950 /* 951 * Create and launch one thread for each VCPU. These threads may 952 * migrate between PCPUs over time; the need to reload CPU state 953 * in such situations is detected and performed by vmm(4) in the 954 * kernel. 955 */ 956 for (i = 0 ; i < vcp->vcp_ncpus; i++) { 957 vrp[i] = malloc(sizeof(struct vm_run_params)); 958 if (vrp[i] == NULL) { 959 log_warn("%s: memory allocation error - " 960 "exiting.", __progname); 961 /* caller will exit, so skip free'ing */ 962 return (ENOMEM); 963 } 964 vrp[i]->vrp_exit = malloc(sizeof(union vm_exit)); 965 if (vrp[i]->vrp_exit == NULL) { 966 log_warn("%s: memory allocation error - " 967 "exiting.", __progname); 968 /* caller will exit, so skip free'ing */ 969 return (ENOMEM); 970 } 971 vrp[i]->vrp_vm_id = vcp->vcp_id; 972 vrp[i]->vrp_vcpu_id = i; 973 974 if (vcpu_reset(vcp->vcp_id, i, vrs)) { 975 log_warnx("%s: cannot reset VCPU %zu - exiting.", 976 __progname, i); 977 return (EIO); 978 } 979 980 ret = pthread_cond_init(&vcpu_run_cond[i], NULL); 981 if (ret) { 982 log_warnx("%s: cannot initialize cond var (%d)", 983 __progname, ret); 984 return (ret); 985 } 986 987 ret = pthread_mutex_init(&vcpu_run_mtx[i], NULL); 988 if (ret) { 989 log_warnx("%s: cannot initialize mtx (%d)", 990 __progname, ret); 991 return (ret); 992 } 993 994 vcpu_hlt[i] = 0; 995 996 /* Start each VCPU run thread at vcpu_run_loop */ 997 ret = pthread_create(&tid[i], NULL, vcpu_run_loop, vrp[i]); 998 if (ret) { 999 /* caller will _exit after this return */ 1000 ret = errno; 1001 log_warn("%s: could not create vcpu thread %zu", 1002 __func__, i); 1003 return (ret); 1004 } 1005 } 1006 1007 log_debug("%s: waiting on events for VM %s", __func__, vcp->vcp_name); 1008 ret = pthread_create(&evtid, NULL, event_thread, &evdone); 1009 if (ret) { 1010 errno = ret; 1011 log_warn("%s: could not create event thread", __func__); 1012 return (ret); 1013 } 1014 1015 for (;;) { 1016 ret = pthread_cond_wait(&threadcond, &threadmutex); 1017 if (ret) { 1018 log_warn("%s: waiting on thread state condition " 1019 "variable failed", __func__); 1020 return (ret); 1021 } 1022 1023 /* 1024 * Did a VCPU thread exit with an error? => return the first one 1025 */ 1026 for (i = 0; i < vcp->vcp_ncpus; i++) { 1027 if (vcpu_done[i] == 0) 1028 continue; 1029 1030 if (pthread_join(tid[i], &exit_status)) { 1031 log_warn("%s: failed to join thread %zd - " 1032 "exiting", __progname, i); 1033 return (EIO); 1034 } 1035 1036 if (exit_status != NULL) { 1037 log_warnx("%s: vm %d vcpu run thread %zd " 1038 "exited abnormally", __progname, 1039 vcp->vcp_id, i); 1040 return (EIO); 1041 } 1042 } 1043 1044 /* Did the event thread exit? => return with an error */ 1045 if (evdone) { 1046 if (pthread_join(evtid, &exit_status)) { 1047 log_warn("%s: failed to join event thread - " 1048 "exiting", __progname); 1049 return (EIO); 1050 } 1051 1052 log_warnx("%s: vm %d event thread exited " 1053 "unexpectedly", __progname, vcp->vcp_id); 1054 return (EIO); 1055 } 1056 1057 /* Did all VCPU threads exit successfully? => return 0 */ 1058 for (i = 0; i < vcp->vcp_ncpus; i++) { 1059 if (vcpu_done[i] == 0) 1060 break; 1061 } 1062 if (i == vcp->vcp_ncpus) 1063 return (0); 1064 1065 /* Some more threads to wait for, start over */ 1066 1067 } 1068 1069 return (0); 1070 } 1071 1072 void * 1073 event_thread(void *arg) 1074 { 1075 uint8_t *donep = arg; 1076 intptr_t ret; 1077 1078 ret = event_dispatch(); 1079 1080 mutex_lock(&threadmutex); 1081 *donep = 1; 1082 pthread_cond_signal(&threadcond); 1083 mutex_unlock(&threadmutex); 1084 1085 return (void *)ret; 1086 } 1087 1088 /* 1089 * vcpu_run_loop 1090 * 1091 * Runs a single VCPU until vmm(4) requires help handling an exit, 1092 * or the VM terminates. 1093 * 1094 * Parameters: 1095 * arg: vcpu_run_params for the VCPU being run by this thread 1096 * 1097 * Return values: 1098 * NULL: the VCPU shutdown properly 1099 * !NULL: error processing VCPU run, or the VCPU shutdown abnormally 1100 */ 1101 void * 1102 vcpu_run_loop(void *arg) 1103 { 1104 struct vm_run_params *vrp = (struct vm_run_params *)arg; 1105 intptr_t ret = 0; 1106 int irq; 1107 uint32_t n; 1108 1109 vrp->vrp_continue = 0; 1110 n = vrp->vrp_vcpu_id; 1111 1112 for (;;) { 1113 ret = pthread_mutex_lock(&vcpu_run_mtx[n]); 1114 1115 if (ret) { 1116 log_warnx("%s: can't lock vcpu run mtx (%d)", 1117 __func__, (int)ret); 1118 return ((void *)ret); 1119 } 1120 1121 /* If we are halted, wait */ 1122 if (vcpu_hlt[n]) { 1123 ret = pthread_cond_wait(&vcpu_run_cond[n], 1124 &vcpu_run_mtx[n]); 1125 1126 if (ret) { 1127 log_warnx("%s: can't wait on cond (%d)", 1128 __func__, (int)ret); 1129 (void)pthread_mutex_unlock(&vcpu_run_mtx[n]); 1130 break; 1131 } 1132 } 1133 1134 ret = pthread_mutex_unlock(&vcpu_run_mtx[n]); 1135 if (ret) { 1136 log_warnx("%s: can't unlock mutex on cond (%d)", 1137 __func__, (int)ret); 1138 break; 1139 } 1140 1141 if (vrp->vrp_irqready && i8259_is_pending()) { 1142 irq = i8259_ack(); 1143 vrp->vrp_irq = irq; 1144 } else 1145 vrp->vrp_irq = 0xFFFF; 1146 1147 /* Still more pending? */ 1148 if (i8259_is_pending()) { 1149 /* XXX can probably avoid ioctls here by providing intr in vrp */ 1150 if (vcpu_pic_intr(vrp->vrp_vm_id, vrp->vrp_vcpu_id, 1)) { 1151 fatal("can't set INTR"); 1152 } 1153 } else { 1154 if (vcpu_pic_intr(vrp->vrp_vm_id, vrp->vrp_vcpu_id, 0)) { 1155 fatal("can't clear INTR"); 1156 } 1157 } 1158 1159 if (ioctl(env->vmd_fd, VMM_IOC_RUN, vrp) < 0) { 1160 /* If run ioctl failed, exit */ 1161 ret = errno; 1162 log_warn("%s: vm %d / vcpu %d run ioctl failed", 1163 __func__, vrp->vrp_vm_id, n); 1164 break; 1165 } 1166 1167 /* If the VM is terminating, exit normally */ 1168 if (vrp->vrp_exit_reason == VM_EXIT_TERMINATED) { 1169 ret = (intptr_t)NULL; 1170 break; 1171 } 1172 1173 if (vrp->vrp_exit_reason != VM_EXIT_NONE) { 1174 /* 1175 * vmm(4) needs help handling an exit, handle in 1176 * vcpu_exit. 1177 */ 1178 if (vcpu_exit(vrp)) { 1179 ret = EIO; 1180 break; 1181 } 1182 } 1183 } 1184 1185 mutex_lock(&threadmutex); 1186 vcpu_done[n] = 1; 1187 pthread_cond_signal(&threadcond); 1188 mutex_unlock(&threadmutex); 1189 1190 return ((void *)ret); 1191 } 1192 1193 int 1194 vcpu_pic_intr(uint32_t vm_id, uint32_t vcpu_id, uint8_t intr) 1195 { 1196 struct vm_intr_params vip; 1197 1198 memset(&vip, 0, sizeof(vip)); 1199 1200 vip.vip_vm_id = vm_id; 1201 vip.vip_vcpu_id = vcpu_id; /* XXX always 0? */ 1202 vip.vip_intr = intr; 1203 1204 if (ioctl(env->vmd_fd, VMM_IOC_INTR, &vip) < 0) 1205 return (errno); 1206 1207 return (0); 1208 } 1209 1210 /* 1211 * vcpu_exit_pci 1212 * 1213 * Handle all I/O to the emulated PCI subsystem. 1214 * 1215 * Parameters: 1216 * vrp: vcpu run paramters containing guest state for this exit 1217 * 1218 * Return value: 1219 * Interrupt to inject to the guest VM, or 0xFF if no interrupt should 1220 * be injected. 1221 */ 1222 uint8_t 1223 vcpu_exit_pci(struct vm_run_params *vrp) 1224 { 1225 union vm_exit *vei = vrp->vrp_exit; 1226 uint8_t intr; 1227 1228 intr = 0xFF; 1229 1230 switch (vei->vei.vei_port) { 1231 case PCI_MODE1_ADDRESS_REG: 1232 pci_handle_address_reg(vrp); 1233 break; 1234 case PCI_MODE1_DATA_REG: 1235 pci_handle_data_reg(vrp); 1236 break; 1237 case VMM_PCI_IO_BAR_BASE ... VMM_PCI_IO_BAR_END: 1238 intr = pci_handle_io(vrp); 1239 break; 1240 default: 1241 log_warnx("%s: unknown PCI register 0x%llx", 1242 __progname, (uint64_t)vei->vei.vei_port); 1243 break; 1244 } 1245 1246 return (intr); 1247 } 1248 1249 /* 1250 * vcpu_exit_inout 1251 * 1252 * Handle all I/O exits that need to be emulated in vmd. This includes the 1253 * i8253 PIT, the com1 ns8250 UART, and the MC146818 RTC/NVRAM device. 1254 * 1255 * Parameters: 1256 * vrp: vcpu run parameters containing guest state for this exit 1257 */ 1258 void 1259 vcpu_exit_inout(struct vm_run_params *vrp) 1260 { 1261 union vm_exit *vei = vrp->vrp_exit; 1262 uint8_t intr = 0xFF; 1263 1264 if (ioports_map[vei->vei.vei_port] != NULL) 1265 intr = ioports_map[vei->vei.vei_port](vrp); 1266 else if (vei->vei.vei_dir == VEI_DIR_IN) 1267 vei->vei.vei_data = 0xFFFFFFFF; 1268 1269 if (intr != 0xFF) 1270 vcpu_assert_pic_irq(vrp->vrp_vm_id, vrp->vrp_vcpu_id, intr); 1271 } 1272 1273 /* 1274 * vcpu_exit 1275 * 1276 * Handle a vcpu exit. This function is called when it is determined that 1277 * vmm(4) requires the assistance of vmd to support a particular guest 1278 * exit type (eg, accessing an I/O port or device). Guest state is contained 1279 * in 'vrp', and will be resent to vmm(4) on exit completion. 1280 * 1281 * Upon conclusion of handling the exit, the function determines if any 1282 * interrupts should be injected into the guest, and asserts the proper 1283 * IRQ line whose interrupt should be vectored. 1284 * 1285 * Parameters: 1286 * vrp: vcpu run parameters containing guest state for this exit 1287 * 1288 * Return values: 1289 * 0: the exit was handled successfully 1290 * 1: an error occurred (eg, unknown exit reason passed in 'vrp') 1291 */ 1292 int 1293 vcpu_exit(struct vm_run_params *vrp) 1294 { 1295 int ret; 1296 1297 switch (vrp->vrp_exit_reason) { 1298 case VMX_EXIT_IO: 1299 vcpu_exit_inout(vrp); 1300 break; 1301 case VMX_EXIT_HLT: 1302 ret = pthread_mutex_lock(&vcpu_run_mtx[vrp->vrp_vcpu_id]); 1303 if (ret) { 1304 log_warnx("%s: can't lock vcpu mutex (%d)", 1305 __func__, ret); 1306 return (1); 1307 } 1308 vcpu_hlt[vrp->vrp_vcpu_id] = 1; 1309 ret = pthread_mutex_unlock(&vcpu_run_mtx[vrp->vrp_vcpu_id]); 1310 if (ret) { 1311 log_warnx("%s: can't unlock vcpu mutex (%d)", 1312 __func__, ret); 1313 return (1); 1314 } 1315 break; 1316 case VMX_EXIT_INT_WINDOW: 1317 break; 1318 case VMX_EXIT_TRIPLE_FAULT: 1319 log_warnx("%s: triple fault", __progname); 1320 return (1); 1321 default: 1322 log_debug("%s: unknown exit reason %d", 1323 __progname, vrp->vrp_exit_reason); 1324 } 1325 1326 /* XXX this may not be irq 9 all the time */ 1327 if (vionet_process_rx()) 1328 vcpu_assert_pic_irq(vrp->vrp_vm_id, vrp->vrp_vcpu_id, 9); 1329 1330 vrp->vrp_continue = 1; 1331 1332 return (0); 1333 } 1334 1335 /* 1336 * find_gpa_range 1337 * 1338 * Search for a contiguous guest physical mem range. 1339 * 1340 * Parameters: 1341 * vcp: VM create parameters that contain the memory map to search in 1342 * gpa: the starting guest physical address 1343 * len: the length of the memory range 1344 * 1345 * Return values: 1346 * NULL: on failure if there is no memory range as described by the parameters 1347 * Pointer to vm_mem_range that contains the start of the range otherwise. 1348 */ 1349 static struct vm_mem_range * 1350 find_gpa_range(struct vm_create_params *vcp, paddr_t gpa, size_t len) 1351 { 1352 size_t i, n; 1353 struct vm_mem_range *vmr; 1354 1355 /* Find the first vm_mem_range that contains gpa */ 1356 for (i = 0; i < vcp->vcp_nmemranges; i++) { 1357 vmr = &vcp->vcp_memranges[i]; 1358 if (vmr->vmr_gpa + vmr->vmr_size >= gpa) 1359 break; 1360 } 1361 1362 /* No range found. */ 1363 if (i == vcp->vcp_nmemranges) 1364 return (NULL); 1365 1366 /* 1367 * vmr may cover the range [gpa, gpa + len) only partly. Make 1368 * sure that the following vm_mem_ranges are contiguous and 1369 * cover the rest. 1370 */ 1371 n = vmr->vmr_size - (gpa - vmr->vmr_gpa); 1372 if (len < n) 1373 len = 0; 1374 else 1375 len -= n; 1376 gpa = vmr->vmr_gpa + vmr->vmr_size; 1377 for (i = i + 1; len != 0 && i < vcp->vcp_nmemranges; i++) { 1378 vmr = &vcp->vcp_memranges[i]; 1379 if (gpa != vmr->vmr_gpa) 1380 return (NULL); 1381 if (len <= vmr->vmr_size) 1382 len = 0; 1383 else 1384 len -= vmr->vmr_size; 1385 1386 gpa = vmr->vmr_gpa + vmr->vmr_size; 1387 } 1388 1389 if (len != 0) 1390 return (NULL); 1391 1392 return (vmr); 1393 } 1394 1395 /* 1396 * write_mem 1397 * 1398 * Copies data from 'buf' into the guest VM's memory at paddr 'dst'. 1399 * 1400 * Parameters: 1401 * dst: the destination paddr_t in the guest VM 1402 * buf: data to copy 1403 * len: number of bytes to copy 1404 * 1405 * Return values: 1406 * 0: success 1407 * EINVAL: if the guest physical memory range [dst, dst + len) does not 1408 * exist in the guest. 1409 */ 1410 int 1411 write_mem(paddr_t dst, void *buf, size_t len) 1412 { 1413 char *from = buf, *to; 1414 size_t n, off; 1415 struct vm_mem_range *vmr; 1416 1417 vmr = find_gpa_range(¤t_vm->vm_params, dst, len); 1418 if (vmr == NULL) { 1419 errno = EINVAL; 1420 log_warn("%s: failed - invalid memory range dst = 0x%lx, " 1421 "len = 0x%zx", __func__, dst, len); 1422 return (EINVAL); 1423 } 1424 1425 off = dst - vmr->vmr_gpa; 1426 while (len != 0) { 1427 n = vmr->vmr_size - off; 1428 if (len < n) 1429 n = len; 1430 1431 to = (char *)vmr->vmr_va + off; 1432 memcpy(to, from, n); 1433 1434 from += n; 1435 len -= n; 1436 off = 0; 1437 vmr++; 1438 } 1439 1440 return (0); 1441 } 1442 1443 /* 1444 * read_mem 1445 * 1446 * Reads memory at guest paddr 'src' into 'buf'. 1447 * 1448 * Parameters: 1449 * src: the source paddr_t in the guest VM to read from. 1450 * buf: destination (local) buffer 1451 * len: number of bytes to read 1452 * 1453 * Return values: 1454 * 0: success 1455 * EINVAL: if the guest physical memory range [dst, dst + len) does not 1456 * exist in the guest. 1457 */ 1458 int 1459 read_mem(paddr_t src, void *buf, size_t len) 1460 { 1461 char *from, *to = buf; 1462 size_t n, off; 1463 struct vm_mem_range *vmr; 1464 1465 vmr = find_gpa_range(¤t_vm->vm_params, src, len); 1466 if (vmr == NULL) { 1467 errno = EINVAL; 1468 log_warn("%s: failed - invalid memory range src = 0x%lx, " 1469 "len = 0x%zx", __func__, src, len); 1470 return (EINVAL); 1471 } 1472 1473 off = src - vmr->vmr_gpa; 1474 while (len != 0) { 1475 n = vmr->vmr_size - off; 1476 if (len < n) 1477 n = len; 1478 1479 from = (char *)vmr->vmr_va + off; 1480 memcpy(to, from, n); 1481 1482 to += n; 1483 len -= n; 1484 off = 0; 1485 vmr++; 1486 } 1487 1488 return (0); 1489 } 1490 1491 /* 1492 * vcpu_assert_pic_irq 1493 * 1494 * Injects the specified IRQ on the supplied vcpu/vm 1495 * 1496 * Parameters: 1497 * vm_id: VM ID to inject to 1498 * vcpu_id: VCPU ID to inject to 1499 * irq: IRQ to inject 1500 */ 1501 void 1502 vcpu_assert_pic_irq(uint32_t vm_id, uint32_t vcpu_id, int irq) 1503 { 1504 int ret; 1505 1506 i8259_assert_irq(irq); 1507 1508 if (i8259_is_pending()) { 1509 if (vcpu_pic_intr(vm_id, vcpu_id, 1)) 1510 fatalx("%s: can't assert INTR", __func__); 1511 1512 ret = pthread_mutex_lock(&vcpu_run_mtx[vcpu_id]); 1513 if (ret) 1514 fatalx("%s: can't lock vcpu mtx (%d)", __func__, ret); 1515 1516 vcpu_hlt[vcpu_id] = 0; 1517 ret = pthread_cond_signal(&vcpu_run_cond[vcpu_id]); 1518 if (ret) 1519 fatalx("%s: can't signal (%d)", __func__, ret); 1520 ret = pthread_mutex_unlock(&vcpu_run_mtx[vcpu_id]); 1521 if (ret) 1522 fatalx("%s: can't unlock vcpu mtx (%d)", __func__, ret); 1523 } 1524 } 1525 1526 /* 1527 * fd_hasdata 1528 * 1529 * Determines if data can be read from a file descriptor. 1530 * 1531 * Parameters: 1532 * fd: the fd to check 1533 * 1534 * Return values: 1535 * 1 if data can be read from an fd, or 0 otherwise. 1536 */ 1537 int 1538 fd_hasdata(int fd) 1539 { 1540 struct pollfd pfd[1]; 1541 int nready, hasdata = 0; 1542 1543 pfd[0].fd = fd; 1544 pfd[0].events = POLLIN; 1545 nready = poll(pfd, 1, 0); 1546 if (nready == -1) 1547 log_warn("checking file descriptor for data failed"); 1548 else if (nready == 1 && pfd[0].revents & POLLIN) 1549 hasdata = 1; 1550 return (hasdata); 1551 } 1552 1553 /* 1554 * mutex_lock 1555 * 1556 * Wrapper function for pthread_mutex_lock that does error checking and that 1557 * exits on failure 1558 */ 1559 void 1560 mutex_lock(pthread_mutex_t *m) 1561 { 1562 int ret; 1563 1564 ret = pthread_mutex_lock(m); 1565 if (ret) { 1566 errno = ret; 1567 fatal("could not acquire mutex"); 1568 } 1569 } 1570 1571 /* 1572 * mutex_unlock 1573 * 1574 * Wrapper function for pthread_mutex_unlock that does error checking and that 1575 * exits on failure 1576 */ 1577 void 1578 mutex_unlock(pthread_mutex_t *m) 1579 { 1580 int ret; 1581 1582 ret = pthread_mutex_unlock(m); 1583 if (ret) { 1584 errno = ret; 1585 fatal("could not release mutex"); 1586 } 1587 } 1588