1 /* $OpenBSD: vm.c,v 1.110 2024/11/21 13:25:30 claudio Exp $ */ 2 3 /* 4 * Copyright (c) 2015 Mike Larkin <mlarkin@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 19 #include <sys/param.h> /* PAGE_SIZE, MAXCOMLEN */ 20 #include <sys/types.h> 21 #include <sys/ioctl.h> 22 #include <sys/mman.h> 23 #include <sys/resource.h> 24 25 #include <dev/vmm/vmm.h> 26 27 #include <errno.h> 28 #include <event.h> 29 #include <fcntl.h> 30 #include <imsg.h> 31 #include <poll.h> 32 #include <pthread.h> 33 #include <pthread_np.h> 34 #include <stdio.h> 35 #include <stdlib.h> 36 #include <string.h> 37 #include <unistd.h> 38 #include <util.h> 39 40 #include "atomicio.h" 41 #include "pci.h" 42 #include "virtio.h" 43 #include "vmd.h" 44 45 #define MMIO_NOTYET 0 46 47 static int run_vm(struct vmop_create_params *, struct vcpu_reg_state *); 48 static void vm_dispatch_vmm(int, short, void *); 49 static void *event_thread(void *); 50 static void *vcpu_run_loop(void *); 51 static int vmm_create_vm(struct vmd_vm *); 52 static int alloc_guest_mem(struct vmd_vm *); 53 static int send_vm(int, struct vmd_vm *); 54 static int dump_vmr(int , struct vm_mem_range *); 55 static int dump_mem(int, struct vmd_vm *); 56 static void restore_vmr(int, struct vm_mem_range *); 57 static void restore_mem(int, struct vm_create_params *); 58 static int restore_vm_params(int, struct vm_create_params *); 59 static void pause_vm(struct vmd_vm *); 60 static void unpause_vm(struct vmd_vm *); 61 static int start_vm(struct vmd_vm *, int); 62 63 int con_fd; 64 struct vmd_vm *current_vm; 65 66 extern struct vmd *env; 67 68 extern char *__progname; 69 70 pthread_mutex_t threadmutex; 71 pthread_cond_t threadcond; 72 73 pthread_cond_t vcpu_run_cond[VMM_MAX_VCPUS_PER_VM]; 74 pthread_mutex_t vcpu_run_mtx[VMM_MAX_VCPUS_PER_VM]; 75 pthread_barrier_t vm_pause_barrier; 76 pthread_cond_t vcpu_unpause_cond[VMM_MAX_VCPUS_PER_VM]; 77 pthread_mutex_t vcpu_unpause_mtx[VMM_MAX_VCPUS_PER_VM]; 78 79 pthread_mutex_t vm_mtx; 80 uint8_t vcpu_hlt[VMM_MAX_VCPUS_PER_VM]; 81 uint8_t vcpu_done[VMM_MAX_VCPUS_PER_VM]; 82 83 /* 84 * vm_main 85 * 86 * Primary entrypoint for launching a vm. Does not return. 87 * 88 * fd: file descriptor for communicating with vmm process. 89 * fd_vmm: file descriptor for communicating with vmm(4) device 90 */ 91 void 92 vm_main(int fd, int fd_vmm) 93 { 94 struct vm_create_params *vcp = NULL; 95 struct vmd_vm vm; 96 size_t sz = 0; 97 int ret = 0; 98 99 /* 100 * The vm process relies on global state. Set the fd for /dev/vmm. 101 */ 102 env->vmd_fd = fd_vmm; 103 104 /* 105 * We aren't root, so we can't chroot(2). Use unveil(2) instead. 106 */ 107 if (unveil(env->argv0, "x") == -1) 108 fatal("unveil %s", env->argv0); 109 if (unveil(NULL, NULL) == -1) 110 fatal("unveil lock"); 111 112 /* 113 * pledge in the vm processes: 114 * stdio - for malloc and basic I/O including events. 115 * vmm - for the vmm ioctls and operations. 116 * proc exec - fork/exec for launching devices. 117 * recvfd - for vm send/recv and sending fd to devices. 118 */ 119 if (pledge("stdio vmm proc exec recvfd", NULL) == -1) 120 fatal("pledge"); 121 122 /* Receive our vm configuration. */ 123 memset(&vm, 0, sizeof(vm)); 124 sz = atomicio(read, fd, &vm, sizeof(vm)); 125 if (sz != sizeof(vm)) { 126 log_warnx("failed to receive start message"); 127 _exit(EIO); 128 } 129 130 /* Update process with the vm name. */ 131 vcp = &vm.vm_params.vmc_params; 132 setproctitle("%s", vcp->vcp_name); 133 log_procinit("vm/%s", vcp->vcp_name); 134 135 /* Receive the local prefix settings. */ 136 sz = atomicio(read, fd, &env->vmd_cfg.cfg_localprefix, 137 sizeof(env->vmd_cfg.cfg_localprefix)); 138 if (sz != sizeof(env->vmd_cfg.cfg_localprefix)) { 139 log_warnx("failed to receive local prefix"); 140 _exit(EIO); 141 } 142 143 /* 144 * We need, at minimum, a vm_kernel fd to boot a vm. This is either a 145 * kernel or a BIOS image. 146 */ 147 if (!(vm.vm_state & VM_STATE_RECEIVED)) { 148 if (vm.vm_kernel == -1) { 149 log_warnx("%s: failed to receive boot fd", 150 vcp->vcp_name); 151 _exit(EINVAL); 152 } 153 } 154 155 if (vcp->vcp_sev && env->vmd_psp_fd < 0) { 156 log_warnx("%s not available", PSP_NODE); 157 _exit(EINVAL); 158 } 159 160 ret = start_vm(&vm, fd); 161 _exit(ret); 162 } 163 164 /* 165 * start_vm 166 * 167 * After forking a new VM process, starts the new VM with the creation 168 * parameters supplied (in the incoming vm->vm_params field). This 169 * function performs a basic sanity check on the incoming parameters 170 * and then performs the following steps to complete the creation of the VM: 171 * 172 * 1. validates and create the new VM 173 * 2. opens the imsg control channel to the parent and drops more privilege 174 * 3. drops additional privileges by calling pledge(2) 175 * 4. loads the kernel from the disk image or file descriptor 176 * 5. runs the VM's VCPU loops. 177 * 178 * Parameters: 179 * vm: The VM data structure that is including the VM create parameters. 180 * fd: The imsg socket that is connected to the parent process. 181 * 182 * Return values: 183 * 0: success 184 * !0 : failure - typically an errno indicating the source of the failure 185 */ 186 int 187 start_vm(struct vmd_vm *vm, int fd) 188 { 189 struct vmop_create_params *vmc = &vm->vm_params; 190 struct vm_create_params *vcp = &vmc->vmc_params; 191 struct vcpu_reg_state vrs; 192 int nicfds[VM_MAX_NICS_PER_VM]; 193 int ret; 194 size_t i; 195 struct vm_rwregs_params vrp; 196 197 /* 198 * We first try to initialize and allocate memory before bothering 199 * vmm(4) with a request to create a new vm. 200 */ 201 if (!(vm->vm_state & VM_STATE_RECEIVED)) 202 create_memory_map(vcp); 203 204 ret = alloc_guest_mem(vm); 205 if (ret) { 206 struct rlimit lim; 207 char buf[FMT_SCALED_STRSIZE]; 208 if (ret == ENOMEM && getrlimit(RLIMIT_DATA, &lim) == 0) { 209 if (fmt_scaled(lim.rlim_cur, buf) == 0) 210 fatalx("could not allocate guest memory (data " 211 "limit is %s)", buf); 212 } 213 errno = ret; 214 log_warn("could not allocate guest memory"); 215 return (ret); 216 } 217 218 /* We've allocated guest memory, so now create the vm in vmm(4). */ 219 ret = vmm_create_vm(vm); 220 if (ret) { 221 /* Let the vmm process know we failed by sending a 0 vm id. */ 222 vcp->vcp_id = 0; 223 atomicio(vwrite, fd, &vcp->vcp_id, sizeof(vcp->vcp_id)); 224 return (ret); 225 } 226 227 /* Setup SEV. */ 228 ret = sev_init(vm); 229 if (ret) { 230 log_warnx("could not initialize SEV"); 231 return (ret); 232 } 233 234 /* 235 * Some of vmd currently relies on global state (current_vm, con_fd). 236 */ 237 current_vm = vm; 238 con_fd = vm->vm_tty; 239 if (fcntl(con_fd, F_SETFL, O_NONBLOCK) == -1) { 240 log_warn("failed to set nonblocking mode on console"); 241 return (1); 242 } 243 244 /* 245 * We now let the vmm process know we were successful by sending it our 246 * vmm(4) assigned vm id. 247 */ 248 if (atomicio(vwrite, fd, &vcp->vcp_id, sizeof(vcp->vcp_id)) != 249 sizeof(vcp->vcp_id)) { 250 log_warn("failed to send created vm id to vmm process"); 251 return (1); 252 } 253 254 /* Prepare either our boot image or receive an existing vm to launch. */ 255 if (vm->vm_state & VM_STATE_RECEIVED) { 256 ret = atomicio(read, vm->vm_receive_fd, &vrp, sizeof(vrp)); 257 if (ret != sizeof(vrp)) 258 fatal("received incomplete vrp - exiting"); 259 vrs = vrp.vrwp_regs; 260 } else if (load_firmware(vm, &vrs)) 261 fatalx("failed to load kernel or firmware image"); 262 263 if (vm->vm_kernel != -1) 264 close_fd(vm->vm_kernel); 265 266 /* Initialize our mutexes. */ 267 ret = pthread_mutex_init(&threadmutex, NULL); 268 if (ret) { 269 log_warn("%s: could not initialize thread state mutex", 270 __func__); 271 return (ret); 272 } 273 ret = pthread_cond_init(&threadcond, NULL); 274 if (ret) { 275 log_warn("%s: could not initialize thread state " 276 "condition variable", __func__); 277 return (ret); 278 } 279 ret = pthread_mutex_init(&vm_mtx, NULL); 280 if (ret) { 281 log_warn("%s: could not initialize vm state mutex", 282 __func__); 283 return (ret); 284 } 285 286 /* Lock thread mutex now. It's unlocked when waiting on threadcond. */ 287 mutex_lock(&threadmutex); 288 289 /* 290 * Finalize our communication socket with the vmm process. From here 291 * onwards, communication with the vmm process is event-based. 292 */ 293 event_init(); 294 if (vmm_pipe(vm, fd, vm_dispatch_vmm) == -1) 295 fatal("setup vm pipe"); 296 297 /* 298 * Initialize or restore our emulated hardware. 299 */ 300 for (i = 0; i < VMM_MAX_NICS_PER_VM; i++) 301 nicfds[i] = vm->vm_ifs[i].vif_fd; 302 303 if (vm->vm_state & VM_STATE_RECEIVED) { 304 restore_mem(vm->vm_receive_fd, vcp); 305 restore_emulated_hw(vcp, vm->vm_receive_fd, nicfds, 306 vm->vm_disks, vm->vm_cdrom); 307 if (restore_vm_params(vm->vm_receive_fd, vcp)) 308 fatal("restore vm params failed"); 309 unpause_vm(vm); 310 } else 311 init_emulated_hw(vmc, vm->vm_cdrom, vm->vm_disks, nicfds); 312 313 /* Drop privleges further before starting the vcpu run loop(s). */ 314 if (pledge("stdio vmm recvfd", NULL) == -1) 315 fatal("pledge"); 316 317 /* 318 * Execute the vcpu run loop(s) for this VM. 319 */ 320 ret = run_vm(&vm->vm_params, &vrs); 321 322 /* Shutdown SEV. */ 323 if (sev_shutdown(vm)) 324 log_warnx("%s: could not shutdown SEV", __func__); 325 326 /* Ensure that any in-flight data is written back */ 327 virtio_shutdown(vm); 328 329 return (ret); 330 } 331 332 /* 333 * vm_dispatch_vmm 334 * 335 * imsg callback for messages that are received from the vmm parent process. 336 */ 337 void 338 vm_dispatch_vmm(int fd, short event, void *arg) 339 { 340 struct vmd_vm *vm = arg; 341 struct vmop_result vmr; 342 struct vmop_addr_result var; 343 struct imsgev *iev = &vm->vm_iev; 344 struct imsgbuf *ibuf = &iev->ibuf; 345 struct imsg imsg; 346 ssize_t n; 347 int verbose; 348 349 if (event & EV_READ) { 350 if ((n = imsgbuf_read(ibuf)) == -1) 351 fatal("%s: imsgbuf_read", __func__); 352 if (n == 0) 353 _exit(0); 354 } 355 356 if (event & EV_WRITE) { 357 if (imsgbuf_write(ibuf) == -1) { 358 if (errno == EPIPE) 359 _exit(0); 360 fatal("%s: imsgbuf_write fd %d", __func__, ibuf->fd); 361 } 362 } 363 364 for (;;) { 365 if ((n = imsg_get(ibuf, &imsg)) == -1) 366 fatal("%s: imsg_get", __func__); 367 if (n == 0) 368 break; 369 370 #if DEBUG > 1 371 log_debug("%s: got imsg %d from %s", 372 __func__, imsg.hdr.type, 373 vm->vm_params.vmc_params.vcp_name); 374 #endif 375 376 switch (imsg.hdr.type) { 377 case IMSG_CTL_VERBOSE: 378 IMSG_SIZE_CHECK(&imsg, &verbose); 379 memcpy(&verbose, imsg.data, sizeof(verbose)); 380 log_setverbose(verbose); 381 virtio_broadcast_imsg(vm, IMSG_CTL_VERBOSE, &verbose, 382 sizeof(verbose)); 383 break; 384 case IMSG_VMDOP_VM_SHUTDOWN: 385 if (vmmci_ctl(VMMCI_SHUTDOWN) == -1) 386 _exit(0); 387 break; 388 case IMSG_VMDOP_VM_REBOOT: 389 if (vmmci_ctl(VMMCI_REBOOT) == -1) 390 _exit(0); 391 break; 392 case IMSG_VMDOP_PAUSE_VM: 393 vmr.vmr_result = 0; 394 vmr.vmr_id = vm->vm_vmid; 395 pause_vm(vm); 396 imsg_compose_event(&vm->vm_iev, 397 IMSG_VMDOP_PAUSE_VM_RESPONSE, 398 imsg.hdr.peerid, imsg.hdr.pid, -1, &vmr, 399 sizeof(vmr)); 400 break; 401 case IMSG_VMDOP_UNPAUSE_VM: 402 vmr.vmr_result = 0; 403 vmr.vmr_id = vm->vm_vmid; 404 unpause_vm(vm); 405 imsg_compose_event(&vm->vm_iev, 406 IMSG_VMDOP_UNPAUSE_VM_RESPONSE, 407 imsg.hdr.peerid, imsg.hdr.pid, -1, &vmr, 408 sizeof(vmr)); 409 break; 410 case IMSG_VMDOP_SEND_VM_REQUEST: 411 vmr.vmr_id = vm->vm_vmid; 412 vmr.vmr_result = send_vm(imsg_get_fd(&imsg), vm); 413 imsg_compose_event(&vm->vm_iev, 414 IMSG_VMDOP_SEND_VM_RESPONSE, 415 imsg.hdr.peerid, imsg.hdr.pid, -1, &vmr, 416 sizeof(vmr)); 417 if (!vmr.vmr_result) { 418 imsgbuf_flush(¤t_vm->vm_iev.ibuf); 419 _exit(0); 420 } 421 break; 422 case IMSG_VMDOP_PRIV_GET_ADDR_RESPONSE: 423 IMSG_SIZE_CHECK(&imsg, &var); 424 memcpy(&var, imsg.data, sizeof(var)); 425 426 log_debug("%s: received tap addr %s for nic %d", 427 vm->vm_params.vmc_params.vcp_name, 428 ether_ntoa((void *)var.var_addr), var.var_nic_idx); 429 430 vionet_set_hostmac(vm, var.var_nic_idx, var.var_addr); 431 break; 432 default: 433 fatalx("%s: got invalid imsg %d from %s", 434 __func__, imsg.hdr.type, 435 vm->vm_params.vmc_params.vcp_name); 436 } 437 imsg_free(&imsg); 438 } 439 imsg_event_add(iev); 440 } 441 442 /* 443 * vm_shutdown 444 * 445 * Tell the vmm parent process to shutdown or reboot the VM and exit. 446 */ 447 __dead void 448 vm_shutdown(unsigned int cmd) 449 { 450 switch (cmd) { 451 case VMMCI_NONE: 452 case VMMCI_SHUTDOWN: 453 (void)imsg_compose_event(¤t_vm->vm_iev, 454 IMSG_VMDOP_VM_SHUTDOWN, 0, 0, -1, NULL, 0); 455 break; 456 case VMMCI_REBOOT: 457 (void)imsg_compose_event(¤t_vm->vm_iev, 458 IMSG_VMDOP_VM_REBOOT, 0, 0, -1, NULL, 0); 459 break; 460 default: 461 fatalx("invalid vm ctl command: %d", cmd); 462 } 463 imsgbuf_flush(¤t_vm->vm_iev.ibuf); 464 465 if (sev_shutdown(current_vm)) 466 log_warnx("%s: could not shutdown SEV", __func__); 467 468 _exit(0); 469 } 470 471 int 472 send_vm(int fd, struct vmd_vm *vm) 473 { 474 struct vm_rwregs_params vrp; 475 struct vm_rwvmparams_params vpp; 476 struct vmop_create_params *vmc; 477 struct vm_terminate_params vtp; 478 unsigned int flags = 0; 479 unsigned int i; 480 int ret = 0; 481 size_t sz; 482 483 if (dump_send_header(fd)) { 484 log_warnx("%s: failed to send vm dump header", __func__); 485 goto err; 486 } 487 488 pause_vm(vm); 489 490 vmc = calloc(1, sizeof(struct vmop_create_params)); 491 if (vmc == NULL) { 492 log_warn("%s: calloc error getting vmc", __func__); 493 ret = -1; 494 goto err; 495 } 496 497 flags |= VMOP_CREATE_MEMORY; 498 memcpy(&vmc->vmc_params, ¤t_vm->vm_params, sizeof(struct 499 vmop_create_params)); 500 vmc->vmc_flags = flags; 501 vrp.vrwp_vm_id = vm->vm_params.vmc_params.vcp_id; 502 vrp.vrwp_mask = VM_RWREGS_ALL; 503 vpp.vpp_mask = VM_RWVMPARAMS_ALL; 504 vpp.vpp_vm_id = vm->vm_params.vmc_params.vcp_id; 505 506 sz = atomicio(vwrite, fd, vmc, sizeof(struct vmop_create_params)); 507 if (sz != sizeof(struct vmop_create_params)) { 508 ret = -1; 509 goto err; 510 } 511 512 for (i = 0; i < vm->vm_params.vmc_params.vcp_ncpus; i++) { 513 vrp.vrwp_vcpu_id = i; 514 if ((ret = ioctl(env->vmd_fd, VMM_IOC_READREGS, &vrp))) { 515 log_warn("%s: readregs failed", __func__); 516 goto err; 517 } 518 519 sz = atomicio(vwrite, fd, &vrp, 520 sizeof(struct vm_rwregs_params)); 521 if (sz != sizeof(struct vm_rwregs_params)) { 522 log_warn("%s: dumping registers failed", __func__); 523 ret = -1; 524 goto err; 525 } 526 } 527 528 /* Dump memory before devices to aid in restoration. */ 529 if ((ret = dump_mem(fd, vm))) 530 goto err; 531 if ((ret = dump_devs(fd))) 532 goto err; 533 if ((ret = pci_dump(fd))) 534 goto err; 535 if ((ret = virtio_dump(fd))) 536 goto err; 537 538 for (i = 0; i < vm->vm_params.vmc_params.vcp_ncpus; i++) { 539 vpp.vpp_vcpu_id = i; 540 if ((ret = ioctl(env->vmd_fd, VMM_IOC_READVMPARAMS, &vpp))) { 541 log_warn("%s: readvmparams failed", __func__); 542 goto err; 543 } 544 545 sz = atomicio(vwrite, fd, &vpp, 546 sizeof(struct vm_rwvmparams_params)); 547 if (sz != sizeof(struct vm_rwvmparams_params)) { 548 log_warn("%s: dumping vm params failed", __func__); 549 ret = -1; 550 goto err; 551 } 552 } 553 554 vtp.vtp_vm_id = vm->vm_params.vmc_params.vcp_id; 555 if (ioctl(env->vmd_fd, VMM_IOC_TERM, &vtp) == -1) { 556 log_warnx("%s: term IOC error: %d, %d", __func__, 557 errno, ENOENT); 558 } 559 err: 560 close(fd); 561 if (ret) 562 unpause_vm(vm); 563 return ret; 564 } 565 566 int 567 dump_mem(int fd, struct vmd_vm *vm) 568 { 569 unsigned int i; 570 int ret; 571 struct vm_mem_range *vmr; 572 573 for (i = 0; i < vm->vm_params.vmc_params.vcp_nmemranges; i++) { 574 vmr = &vm->vm_params.vmc_params.vcp_memranges[i]; 575 ret = dump_vmr(fd, vmr); 576 if (ret) 577 return ret; 578 } 579 return (0); 580 } 581 582 int 583 restore_vm_params(int fd, struct vm_create_params *vcp) { 584 unsigned int i; 585 struct vm_rwvmparams_params vpp; 586 587 for (i = 0; i < vcp->vcp_ncpus; i++) { 588 if (atomicio(read, fd, &vpp, sizeof(vpp)) != sizeof(vpp)) { 589 log_warn("%s: error restoring vm params", __func__); 590 return (-1); 591 } 592 vpp.vpp_vm_id = vcp->vcp_id; 593 vpp.vpp_vcpu_id = i; 594 if (ioctl(env->vmd_fd, VMM_IOC_WRITEVMPARAMS, &vpp) < 0) { 595 log_debug("%s: writing vm params failed", __func__); 596 return (-1); 597 } 598 } 599 return (0); 600 } 601 602 void 603 restore_mem(int fd, struct vm_create_params *vcp) 604 { 605 unsigned int i; 606 struct vm_mem_range *vmr; 607 608 for (i = 0; i < vcp->vcp_nmemranges; i++) { 609 vmr = &vcp->vcp_memranges[i]; 610 restore_vmr(fd, vmr); 611 } 612 } 613 614 int 615 dump_vmr(int fd, struct vm_mem_range *vmr) 616 { 617 size_t rem = vmr->vmr_size, read=0; 618 char buf[PAGE_SIZE]; 619 620 while (rem > 0) { 621 if (read_mem(vmr->vmr_gpa + read, buf, PAGE_SIZE)) { 622 log_warn("failed to read vmr"); 623 return (-1); 624 } 625 if (atomicio(vwrite, fd, buf, sizeof(buf)) != sizeof(buf)) { 626 log_warn("failed to dump vmr"); 627 return (-1); 628 } 629 rem = rem - PAGE_SIZE; 630 read = read + PAGE_SIZE; 631 } 632 return (0); 633 } 634 635 void 636 restore_vmr(int fd, struct vm_mem_range *vmr) 637 { 638 size_t rem = vmr->vmr_size, wrote=0; 639 char buf[PAGE_SIZE]; 640 641 while (rem > 0) { 642 if (atomicio(read, fd, buf, sizeof(buf)) != sizeof(buf)) 643 fatal("failed to restore vmr"); 644 if (write_mem(vmr->vmr_gpa + wrote, buf, PAGE_SIZE)) 645 fatal("failed to write vmr"); 646 rem = rem - PAGE_SIZE; 647 wrote = wrote + PAGE_SIZE; 648 } 649 } 650 651 static void 652 pause_vm(struct vmd_vm *vm) 653 { 654 unsigned int n; 655 int ret; 656 657 mutex_lock(&vm_mtx); 658 if (vm->vm_state & VM_STATE_PAUSED) { 659 mutex_unlock(&vm_mtx); 660 return; 661 } 662 current_vm->vm_state |= VM_STATE_PAUSED; 663 mutex_unlock(&vm_mtx); 664 665 ret = pthread_barrier_init(&vm_pause_barrier, NULL, 666 vm->vm_params.vmc_params.vcp_ncpus + 1); 667 if (ret) { 668 log_warnx("%s: cannot initialize pause barrier (%d)", 669 __progname, ret); 670 return; 671 } 672 673 for (n = 0; n < vm->vm_params.vmc_params.vcp_ncpus; n++) { 674 ret = pthread_cond_broadcast(&vcpu_run_cond[n]); 675 if (ret) { 676 log_warnx("%s: can't broadcast vcpu run cond (%d)", 677 __func__, (int)ret); 678 return; 679 } 680 } 681 ret = pthread_barrier_wait(&vm_pause_barrier); 682 if (ret != 0 && ret != PTHREAD_BARRIER_SERIAL_THREAD) { 683 log_warnx("%s: could not wait on pause barrier (%d)", 684 __func__, (int)ret); 685 return; 686 } 687 688 ret = pthread_barrier_destroy(&vm_pause_barrier); 689 if (ret) { 690 log_warnx("%s: could not destroy pause barrier (%d)", 691 __progname, ret); 692 return; 693 } 694 695 pause_vm_md(vm); 696 } 697 698 static void 699 unpause_vm(struct vmd_vm *vm) 700 { 701 unsigned int n; 702 int ret; 703 704 mutex_lock(&vm_mtx); 705 if (!(vm->vm_state & VM_STATE_PAUSED)) { 706 mutex_unlock(&vm_mtx); 707 return; 708 } 709 current_vm->vm_state &= ~VM_STATE_PAUSED; 710 mutex_unlock(&vm_mtx); 711 712 for (n = 0; n < vm->vm_params.vmc_params.vcp_ncpus; n++) { 713 ret = pthread_cond_broadcast(&vcpu_unpause_cond[n]); 714 if (ret) { 715 log_warnx("%s: can't broadcast vcpu unpause cond (%d)", 716 __func__, (int)ret); 717 return; 718 } 719 } 720 721 unpause_vm_md(vm); 722 } 723 724 /* 725 * vcpu_reset 726 * 727 * Requests vmm(4) to reset the VCPUs in the indicated VM to 728 * the register state provided 729 * 730 * Parameters 731 * vmid: VM ID to reset 732 * vcpu_id: VCPU ID to reset 733 * vrs: the register state to initialize 734 * 735 * Return values: 736 * 0: success 737 * !0 : ioctl to vmm(4) failed (eg, ENOENT if the supplied VM ID is not 738 * valid) 739 */ 740 int 741 vcpu_reset(uint32_t vmid, uint32_t vcpu_id, struct vcpu_reg_state *vrs) 742 { 743 struct vm_resetcpu_params vrp; 744 745 memset(&vrp, 0, sizeof(vrp)); 746 vrp.vrp_vm_id = vmid; 747 vrp.vrp_vcpu_id = vcpu_id; 748 memcpy(&vrp.vrp_init_state, vrs, sizeof(struct vcpu_reg_state)); 749 750 log_debug("%s: resetting vcpu %d for vm %d", __func__, vcpu_id, vmid); 751 752 if (ioctl(env->vmd_fd, VMM_IOC_RESETCPU, &vrp) == -1) 753 return (errno); 754 755 return (0); 756 } 757 758 /* 759 * alloc_guest_mem 760 * 761 * Allocates memory for the guest. 762 * Instead of doing a single allocation with one mmap(), we allocate memory 763 * separately for every range for the following reasons: 764 * - ASLR for the individual ranges 765 * - to reduce memory consumption in the UVM subsystem: if vmm(4) had to 766 * map the single mmap'd userspace memory to the individual guest physical 767 * memory ranges, the underlying amap of the single mmap'd range would have 768 * to allocate per-page reference counters. The reason is that the 769 * individual guest physical ranges would reference the single mmap'd region 770 * only partially. However, if every guest physical range has its own 771 * corresponding mmap'd userspace allocation, there are no partial 772 * references: every guest physical range fully references an mmap'd 773 * range => no per-page reference counters have to be allocated. 774 * 775 * Return values: 776 * 0: success 777 * !0: failure - errno indicating the source of the failure 778 */ 779 int 780 alloc_guest_mem(struct vmd_vm *vm) 781 { 782 void *p; 783 int ret = 0; 784 size_t i, j; 785 struct vm_create_params *vcp = &vm->vm_params.vmc_params; 786 struct vm_mem_range *vmr; 787 788 for (i = 0; i < vcp->vcp_nmemranges; i++) { 789 vmr = &vcp->vcp_memranges[i]; 790 791 /* 792 * We only need R/W as userland. vmm(4) will use R/W/X in its 793 * mapping. 794 * 795 * We must use MAP_SHARED so emulated devices will be able 796 * to generate shared mappings. 797 */ 798 p = mmap(NULL, vmr->vmr_size, PROT_READ | PROT_WRITE, 799 MAP_ANON | MAP_CONCEAL | MAP_SHARED, -1, 0); 800 if (p == MAP_FAILED) { 801 ret = errno; 802 for (j = 0; j < i; j++) { 803 vmr = &vcp->vcp_memranges[j]; 804 munmap((void *)vmr->vmr_va, vmr->vmr_size); 805 } 806 return (ret); 807 } 808 vmr->vmr_va = (vaddr_t)p; 809 } 810 811 return (ret); 812 } 813 814 /* 815 * vmm_create_vm 816 * 817 * Requests vmm(4) to create a new VM using the supplied creation 818 * parameters. This operation results in the creation of the in-kernel 819 * structures for the VM, but does not start the VM's vcpu(s). 820 * 821 * Parameters: 822 * vm: pointer to the vm object 823 * 824 * Return values: 825 * 0: success 826 * !0 : ioctl to vmm(4) failed 827 */ 828 static int 829 vmm_create_vm(struct vmd_vm *vm) 830 { 831 struct vm_create_params *vcp = &vm->vm_params.vmc_params; 832 size_t i; 833 834 /* Sanity check arguments */ 835 if (vcp->vcp_ncpus > VMM_MAX_VCPUS_PER_VM) 836 return (EINVAL); 837 838 if (vcp->vcp_nmemranges == 0 || 839 vcp->vcp_nmemranges > VMM_MAX_MEM_RANGES) 840 return (EINVAL); 841 842 if (vm->vm_params.vmc_ndisks > VM_MAX_DISKS_PER_VM) 843 return (EINVAL); 844 845 if (vm->vm_params.vmc_nnics > VM_MAX_NICS_PER_VM) 846 return (EINVAL); 847 848 if (ioctl(env->vmd_fd, VMM_IOC_CREATE, vcp) == -1) 849 return (errno); 850 851 for (i = 0; i < vcp->vcp_ncpus; i++) 852 vm->vm_sev_asid[i] = vcp->vcp_asid[i]; 853 854 return (0); 855 } 856 857 858 /* 859 * run_vm 860 * 861 * Runs the VM whose creation parameters are specified in vcp 862 * 863 * Parameters: 864 * child_cdrom: previously-opened child ISO disk file descriptor 865 * child_disks: previously-opened child VM disk file file descriptors 866 * child_taps: previously-opened child tap file descriptors 867 * vmc: vmop_create_params struct containing the VM's desired creation 868 * configuration 869 * vrs: VCPU register state to initialize 870 * 871 * Return values: 872 * 0: the VM exited normally 873 * !0 : the VM exited abnormally or failed to start 874 */ 875 static int 876 run_vm(struct vmop_create_params *vmc, struct vcpu_reg_state *vrs) 877 { 878 struct vm_create_params *vcp = &vmc->vmc_params; 879 struct vm_rwregs_params vregsp; 880 uint8_t evdone = 0; 881 size_t i; 882 int ret; 883 pthread_t *tid, evtid; 884 char tname[MAXCOMLEN + 1]; 885 struct vm_run_params **vrp; 886 void *exit_status; 887 888 if (vcp == NULL) 889 return (EINVAL); 890 891 if (vcp->vcp_nmemranges == 0 || 892 vcp->vcp_nmemranges > VMM_MAX_MEM_RANGES) 893 return (EINVAL); 894 895 tid = calloc(vcp->vcp_ncpus, sizeof(pthread_t)); 896 vrp = calloc(vcp->vcp_ncpus, sizeof(struct vm_run_params *)); 897 if (tid == NULL || vrp == NULL) { 898 log_warn("%s: memory allocation error - exiting.", 899 __progname); 900 return (ENOMEM); 901 } 902 903 log_debug("%s: starting %zu vcpu thread(s) for vm %s", __func__, 904 vcp->vcp_ncpus, vcp->vcp_name); 905 906 /* 907 * Create and launch one thread for each VCPU. These threads may 908 * migrate between PCPUs over time; the need to reload CPU state 909 * in such situations is detected and performed by vmm(4) in the 910 * kernel. 911 */ 912 for (i = 0 ; i < vcp->vcp_ncpus; i++) { 913 vrp[i] = malloc(sizeof(struct vm_run_params)); 914 if (vrp[i] == NULL) { 915 log_warn("%s: memory allocation error - " 916 "exiting.", __progname); 917 /* caller will exit, so skip freeing */ 918 return (ENOMEM); 919 } 920 vrp[i]->vrp_exit = malloc(sizeof(struct vm_exit)); 921 if (vrp[i]->vrp_exit == NULL) { 922 log_warn("%s: memory allocation error - " 923 "exiting.", __progname); 924 /* caller will exit, so skip freeing */ 925 return (ENOMEM); 926 } 927 vrp[i]->vrp_vm_id = vcp->vcp_id; 928 vrp[i]->vrp_vcpu_id = i; 929 930 if (vcpu_reset(vcp->vcp_id, i, vrs)) { 931 log_warnx("%s: cannot reset VCPU %zu - exiting.", 932 __progname, i); 933 return (EIO); 934 } 935 936 if (sev_activate(current_vm, i)) { 937 log_warnx("%s: SEV activatation failed for VCPU " 938 "%zu failed - exiting.", __progname, i); 939 return (EIO); 940 } 941 942 if (sev_encrypt_memory(current_vm)) { 943 log_warnx("%s: memory encryption failed for VCPU " 944 "%zu failed - exiting.", __progname, i); 945 return (EIO); 946 } 947 948 /* once more because reset_cpu changes regs */ 949 if (current_vm->vm_state & VM_STATE_RECEIVED) { 950 vregsp.vrwp_vm_id = vcp->vcp_id; 951 vregsp.vrwp_vcpu_id = i; 952 vregsp.vrwp_regs = *vrs; 953 vregsp.vrwp_mask = VM_RWREGS_ALL; 954 if ((ret = ioctl(env->vmd_fd, VMM_IOC_WRITEREGS, 955 &vregsp)) == -1) { 956 log_warn("%s: writeregs failed", __func__); 957 return (ret); 958 } 959 } 960 961 ret = pthread_cond_init(&vcpu_run_cond[i], NULL); 962 if (ret) { 963 log_warnx("%s: cannot initialize cond var (%d)", 964 __progname, ret); 965 return (ret); 966 } 967 968 ret = pthread_mutex_init(&vcpu_run_mtx[i], NULL); 969 if (ret) { 970 log_warnx("%s: cannot initialize mtx (%d)", 971 __progname, ret); 972 return (ret); 973 } 974 975 ret = pthread_cond_init(&vcpu_unpause_cond[i], NULL); 976 if (ret) { 977 log_warnx("%s: cannot initialize unpause var (%d)", 978 __progname, ret); 979 return (ret); 980 } 981 982 ret = pthread_mutex_init(&vcpu_unpause_mtx[i], NULL); 983 if (ret) { 984 log_warnx("%s: cannot initialize unpause mtx (%d)", 985 __progname, ret); 986 return (ret); 987 } 988 989 vcpu_hlt[i] = 0; 990 991 /* Start each VCPU run thread at vcpu_run_loop */ 992 ret = pthread_create(&tid[i], NULL, vcpu_run_loop, vrp[i]); 993 if (ret) { 994 /* caller will _exit after this return */ 995 ret = errno; 996 log_warn("%s: could not create vcpu thread %zu", 997 __func__, i); 998 return (ret); 999 } 1000 1001 snprintf(tname, sizeof(tname), "vcpu-%zu", i); 1002 pthread_set_name_np(tid[i], tname); 1003 } 1004 1005 log_debug("%s: waiting on events for VM %s", __func__, vcp->vcp_name); 1006 ret = pthread_create(&evtid, NULL, event_thread, &evdone); 1007 if (ret) { 1008 errno = ret; 1009 log_warn("%s: could not create event thread", __func__); 1010 return (ret); 1011 } 1012 pthread_set_name_np(evtid, "event"); 1013 1014 for (;;) { 1015 ret = pthread_cond_wait(&threadcond, &threadmutex); 1016 if (ret) { 1017 log_warn("%s: waiting on thread state condition " 1018 "variable failed", __func__); 1019 return (ret); 1020 } 1021 1022 /* 1023 * Did a VCPU thread exit with an error? => return the first one 1024 */ 1025 mutex_lock(&vm_mtx); 1026 for (i = 0; i < vcp->vcp_ncpus; i++) { 1027 if (vcpu_done[i] == 0) 1028 continue; 1029 1030 if (pthread_join(tid[i], &exit_status)) { 1031 log_warn("%s: failed to join thread %zd - " 1032 "exiting", __progname, i); 1033 mutex_unlock(&vm_mtx); 1034 return (EIO); 1035 } 1036 1037 ret = (intptr_t)exit_status; 1038 } 1039 mutex_unlock(&vm_mtx); 1040 1041 /* Did the event thread exit? => return with an error */ 1042 if (evdone) { 1043 if (pthread_join(evtid, &exit_status)) { 1044 log_warn("%s: failed to join event thread - " 1045 "exiting", __progname); 1046 return (EIO); 1047 } 1048 1049 log_warnx("%s: vm %d event thread exited " 1050 "unexpectedly", __progname, vcp->vcp_id); 1051 return (EIO); 1052 } 1053 1054 /* Did all VCPU threads exit successfully? => return */ 1055 mutex_lock(&vm_mtx); 1056 for (i = 0; i < vcp->vcp_ncpus; i++) { 1057 if (vcpu_done[i] == 0) 1058 break; 1059 } 1060 mutex_unlock(&vm_mtx); 1061 if (i == vcp->vcp_ncpus) 1062 return (ret); 1063 1064 /* Some more threads to wait for, start over */ 1065 } 1066 1067 return (ret); 1068 } 1069 1070 static void * 1071 event_thread(void *arg) 1072 { 1073 uint8_t *donep = arg; 1074 intptr_t ret; 1075 1076 ret = event_dispatch(); 1077 1078 *donep = 1; 1079 1080 mutex_lock(&threadmutex); 1081 pthread_cond_signal(&threadcond); 1082 mutex_unlock(&threadmutex); 1083 1084 return (void *)ret; 1085 } 1086 1087 /* 1088 * vcpu_run_loop 1089 * 1090 * Runs a single VCPU until vmm(4) requires help handling an exit, 1091 * or the VM terminates. 1092 * 1093 * Parameters: 1094 * arg: vcpu_run_params for the VCPU being run by this thread 1095 * 1096 * Return values: 1097 * NULL: the VCPU shutdown properly 1098 * !NULL: error processing VCPU run, or the VCPU shutdown abnormally 1099 */ 1100 static void * 1101 vcpu_run_loop(void *arg) 1102 { 1103 struct vm_run_params *vrp = (struct vm_run_params *)arg; 1104 intptr_t ret = 0; 1105 uint32_t n = vrp->vrp_vcpu_id; 1106 int paused = 0, halted = 0; 1107 1108 for (;;) { 1109 ret = pthread_mutex_lock(&vcpu_run_mtx[n]); 1110 1111 if (ret) { 1112 log_warnx("%s: can't lock vcpu run mtx (%d)", 1113 __func__, (int)ret); 1114 return ((void *)ret); 1115 } 1116 1117 mutex_lock(&vm_mtx); 1118 paused = (current_vm->vm_state & VM_STATE_PAUSED) != 0; 1119 halted = vcpu_hlt[n]; 1120 mutex_unlock(&vm_mtx); 1121 1122 /* If we are halted and need to pause, pause */ 1123 if (halted && paused) { 1124 ret = pthread_barrier_wait(&vm_pause_barrier); 1125 if (ret != 0 && ret != PTHREAD_BARRIER_SERIAL_THREAD) { 1126 log_warnx("%s: could not wait on pause barrier (%d)", 1127 __func__, (int)ret); 1128 return ((void *)ret); 1129 } 1130 1131 ret = pthread_mutex_lock(&vcpu_unpause_mtx[n]); 1132 if (ret) { 1133 log_warnx("%s: can't lock vcpu unpause mtx (%d)", 1134 __func__, (int)ret); 1135 return ((void *)ret); 1136 } 1137 1138 /* Interrupt may be firing, release run mtx. */ 1139 mutex_unlock(&vcpu_run_mtx[n]); 1140 ret = pthread_cond_wait(&vcpu_unpause_cond[n], 1141 &vcpu_unpause_mtx[n]); 1142 if (ret) { 1143 log_warnx( 1144 "%s: can't wait on unpause cond (%d)", 1145 __func__, (int)ret); 1146 break; 1147 } 1148 mutex_lock(&vcpu_run_mtx[n]); 1149 1150 ret = pthread_mutex_unlock(&vcpu_unpause_mtx[n]); 1151 if (ret) { 1152 log_warnx("%s: can't unlock unpause mtx (%d)", 1153 __func__, (int)ret); 1154 break; 1155 } 1156 } 1157 1158 /* If we are halted and not paused, wait */ 1159 if (halted) { 1160 ret = pthread_cond_wait(&vcpu_run_cond[n], 1161 &vcpu_run_mtx[n]); 1162 1163 if (ret) { 1164 log_warnx( 1165 "%s: can't wait on cond (%d)", 1166 __func__, (int)ret); 1167 (void)pthread_mutex_unlock( 1168 &vcpu_run_mtx[n]); 1169 break; 1170 } 1171 } 1172 1173 ret = pthread_mutex_unlock(&vcpu_run_mtx[n]); 1174 1175 if (ret) { 1176 log_warnx("%s: can't unlock mutex on cond (%d)", 1177 __func__, (int)ret); 1178 break; 1179 } 1180 1181 if (vrp->vrp_irqready && intr_pending(current_vm)) { 1182 vrp->vrp_inject.vie_vector = intr_ack(current_vm); 1183 vrp->vrp_inject.vie_type = VCPU_INJECT_INTR; 1184 } else 1185 vrp->vrp_inject.vie_type = VCPU_INJECT_NONE; 1186 1187 /* Still more interrupts pending? */ 1188 vrp->vrp_intr_pending = intr_pending(current_vm); 1189 1190 if (ioctl(env->vmd_fd, VMM_IOC_RUN, vrp) == -1) { 1191 /* If run ioctl failed, exit */ 1192 ret = errno; 1193 log_warn("%s: vm %d / vcpu %d run ioctl failed", 1194 __func__, current_vm->vm_vmid, n); 1195 break; 1196 } 1197 1198 /* If the VM is terminating, exit normally */ 1199 if (vrp->vrp_exit_reason == VM_EXIT_TERMINATED) { 1200 ret = (intptr_t)NULL; 1201 break; 1202 } 1203 1204 if (vrp->vrp_exit_reason != VM_EXIT_NONE) { 1205 /* 1206 * vmm(4) needs help handling an exit, handle in 1207 * vcpu_exit. 1208 */ 1209 ret = vcpu_exit(vrp); 1210 if (ret) 1211 break; 1212 } 1213 } 1214 1215 mutex_lock(&vm_mtx); 1216 vcpu_done[n] = 1; 1217 mutex_unlock(&vm_mtx); 1218 1219 mutex_lock(&threadmutex); 1220 pthread_cond_signal(&threadcond); 1221 mutex_unlock(&threadmutex); 1222 1223 return ((void *)ret); 1224 } 1225 1226 int 1227 vcpu_intr(uint32_t vm_id, uint32_t vcpu_id, uint8_t intr) 1228 { 1229 struct vm_intr_params vip; 1230 1231 memset(&vip, 0, sizeof(vip)); 1232 1233 vip.vip_vm_id = vm_id; 1234 vip.vip_vcpu_id = vcpu_id; /* XXX always 0? */ 1235 vip.vip_intr = intr; 1236 1237 if (ioctl(env->vmd_fd, VMM_IOC_INTR, &vip) == -1) 1238 return (errno); 1239 1240 return (0); 1241 } 1242 1243 /* 1244 * fd_hasdata 1245 * 1246 * Determines if data can be read from a file descriptor. 1247 * 1248 * Parameters: 1249 * fd: the fd to check 1250 * 1251 * Return values: 1252 * 1 if data can be read from an fd, or 0 otherwise. 1253 */ 1254 int 1255 fd_hasdata(int fd) 1256 { 1257 struct pollfd pfd[1]; 1258 int nready, hasdata = 0; 1259 1260 pfd[0].fd = fd; 1261 pfd[0].events = POLLIN; 1262 nready = poll(pfd, 1, 0); 1263 if (nready == -1) 1264 log_warn("checking file descriptor for data failed"); 1265 else if (nready == 1 && pfd[0].revents & POLLIN) 1266 hasdata = 1; 1267 return (hasdata); 1268 } 1269 1270 /* 1271 * mutex_lock 1272 * 1273 * Wrapper function for pthread_mutex_lock that does error checking and that 1274 * exits on failure 1275 */ 1276 void 1277 mutex_lock(pthread_mutex_t *m) 1278 { 1279 int ret; 1280 1281 ret = pthread_mutex_lock(m); 1282 if (ret) { 1283 errno = ret; 1284 fatal("could not acquire mutex"); 1285 } 1286 } 1287 1288 /* 1289 * mutex_unlock 1290 * 1291 * Wrapper function for pthread_mutex_unlock that does error checking and that 1292 * exits on failure 1293 */ 1294 void 1295 mutex_unlock(pthread_mutex_t *m) 1296 { 1297 int ret; 1298 1299 ret = pthread_mutex_unlock(m); 1300 if (ret) { 1301 errno = ret; 1302 fatal("could not release mutex"); 1303 } 1304 } 1305 1306 1307 void 1308 vm_pipe_init(struct vm_dev_pipe *p, void (*cb)(int, short, void *)) 1309 { 1310 vm_pipe_init2(p, cb, NULL); 1311 } 1312 1313 /* 1314 * vm_pipe_init2 1315 * 1316 * Initialize a vm_dev_pipe, setting up its file descriptors and its 1317 * event structure with the given callback and argument. 1318 * 1319 * Parameters: 1320 * p: pointer to vm_dev_pipe struct to initizlize 1321 * cb: callback to use for READ events on the read end of the pipe 1322 * arg: pointer to pass to the callback on event trigger 1323 */ 1324 void 1325 vm_pipe_init2(struct vm_dev_pipe *p, void (*cb)(int, short, void *), void *arg) 1326 { 1327 int ret; 1328 int fds[2]; 1329 1330 memset(p, 0, sizeof(struct vm_dev_pipe)); 1331 1332 ret = pipe2(fds, O_CLOEXEC); 1333 if (ret) 1334 fatal("failed to create vm_dev_pipe pipe"); 1335 1336 p->read = fds[0]; 1337 p->write = fds[1]; 1338 1339 event_set(&p->read_ev, p->read, EV_READ | EV_PERSIST, cb, arg); 1340 } 1341 1342 /* 1343 * vm_pipe_send 1344 * 1345 * Send a message to an emulated device vie the provided vm_dev_pipe. This 1346 * relies on the fact sizeof(msg) < PIPE_BUF to ensure atomic writes. 1347 * 1348 * Parameters: 1349 * p: pointer to initialized vm_dev_pipe 1350 * msg: message to send in the channel 1351 */ 1352 void 1353 vm_pipe_send(struct vm_dev_pipe *p, enum pipe_msg_type msg) 1354 { 1355 size_t n; 1356 n = write(p->write, &msg, sizeof(msg)); 1357 if (n != sizeof(msg)) 1358 fatal("failed to write to device pipe"); 1359 } 1360 1361 /* 1362 * vm_pipe_recv 1363 * 1364 * Receive a message for an emulated device via the provided vm_dev_pipe. 1365 * Returns the message value, otherwise will exit on failure. This relies on 1366 * the fact sizeof(enum pipe_msg_type) < PIPE_BUF for atomic reads. 1367 * 1368 * Parameters: 1369 * p: pointer to initialized vm_dev_pipe 1370 * 1371 * Return values: 1372 * a value of enum pipe_msg_type or fatal exit on read(2) error 1373 */ 1374 enum pipe_msg_type 1375 vm_pipe_recv(struct vm_dev_pipe *p) 1376 { 1377 size_t n; 1378 enum pipe_msg_type msg; 1379 n = read(p->read, &msg, sizeof(msg)); 1380 if (n != sizeof(msg)) 1381 fatal("failed to read from device pipe"); 1382 1383 return msg; 1384 } 1385 1386 /* 1387 * Re-map the guest address space using vmm(4)'s VMM_IOC_SHARE 1388 * 1389 * Returns 0 on success, non-zero in event of failure. 1390 */ 1391 int 1392 remap_guest_mem(struct vmd_vm *vm, int vmm_fd) 1393 { 1394 struct vm_create_params *vcp; 1395 struct vm_mem_range *vmr; 1396 struct vm_sharemem_params vsp; 1397 size_t i, j; 1398 void *p = NULL; 1399 int ret; 1400 1401 if (vm == NULL) 1402 return (1); 1403 1404 vcp = &vm->vm_params.vmc_params; 1405 1406 /* 1407 * Initialize our VM shared memory request using our original 1408 * creation parameters. We'll overwrite the va's after mmap(2). 1409 */ 1410 memset(&vsp, 0, sizeof(vsp)); 1411 vsp.vsp_nmemranges = vcp->vcp_nmemranges; 1412 vsp.vsp_vm_id = vcp->vcp_id; 1413 memcpy(&vsp.vsp_memranges, &vcp->vcp_memranges, 1414 sizeof(vsp.vsp_memranges)); 1415 1416 /* 1417 * Use mmap(2) to identify virtual address space for our mappings. 1418 */ 1419 for (i = 0; i < VMM_MAX_MEM_RANGES; i++) { 1420 if (i < vsp.vsp_nmemranges) { 1421 vmr = &vsp.vsp_memranges[i]; 1422 1423 /* Ignore any MMIO ranges. */ 1424 if (vmr->vmr_type == VM_MEM_MMIO) { 1425 vmr->vmr_va = 0; 1426 vcp->vcp_memranges[i].vmr_va = 0; 1427 continue; 1428 } 1429 1430 /* Make initial mappings for the memrange. */ 1431 p = mmap(NULL, vmr->vmr_size, PROT_READ, MAP_ANON, -1, 1432 0); 1433 if (p == MAP_FAILED) { 1434 ret = errno; 1435 log_warn("%s: mmap", __func__); 1436 for (j = 0; j < i; j++) { 1437 vmr = &vcp->vcp_memranges[j]; 1438 munmap((void *)vmr->vmr_va, 1439 vmr->vmr_size); 1440 } 1441 return (ret); 1442 } 1443 vmr->vmr_va = (vaddr_t)p; 1444 vcp->vcp_memranges[i].vmr_va = vmr->vmr_va; 1445 } 1446 } 1447 1448 /* 1449 * munmap(2) now that we have va's and ranges that don't overlap. vmm 1450 * will use the va's and sizes to recreate the mappings for us. 1451 */ 1452 for (i = 0; i < vsp.vsp_nmemranges; i++) { 1453 vmr = &vsp.vsp_memranges[i]; 1454 if (vmr->vmr_type == VM_MEM_MMIO) 1455 continue; 1456 if (munmap((void*)vmr->vmr_va, vmr->vmr_size) == -1) 1457 fatal("%s: munmap", __func__); 1458 } 1459 1460 /* 1461 * Ask vmm to enter the shared mappings for us. They'll point 1462 * to the same host physical memory, but will have a randomized 1463 * virtual address for the calling process. 1464 */ 1465 if (ioctl(vmm_fd, VMM_IOC_SHAREMEM, &vsp) == -1) 1466 return (errno); 1467 1468 return (0); 1469 } 1470 1471 void 1472 vcpu_halt(uint32_t vcpu_id) 1473 { 1474 mutex_lock(&vm_mtx); 1475 vcpu_hlt[vcpu_id] = 1; 1476 mutex_unlock(&vm_mtx); 1477 } 1478 1479 void 1480 vcpu_unhalt(uint32_t vcpu_id) 1481 { 1482 mutex_lock(&vm_mtx); 1483 vcpu_hlt[vcpu_id] = 0; 1484 mutex_unlock(&vm_mtx); 1485 } 1486 1487 void 1488 vcpu_signal_run(uint32_t vcpu_id) 1489 { 1490 int ret; 1491 1492 mutex_lock(&vcpu_run_mtx[vcpu_id]); 1493 ret = pthread_cond_signal(&vcpu_run_cond[vcpu_id]); 1494 if (ret) 1495 fatalx("%s: can't signal (%d)", __func__, ret); 1496 mutex_unlock(&vcpu_run_mtx[vcpu_id]); 1497 } 1498