1 /* $OpenBSD: vmm.c,v 1.112 2023/05/13 23:15:28 dv Exp $ */ 2 3 /* 4 * Copyright (c) 2015 Mike Larkin <mlarkin@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 19 #include <sys/types.h> 20 #include <sys/ioctl.h> 21 #include <sys/queue.h> 22 #include <sys/wait.h> 23 #include <sys/uio.h> 24 #include <sys/socket.h> 25 #include <sys/time.h> 26 #include <sys/mman.h> 27 28 #include <dev/ic/i8253reg.h> 29 #include <dev/isa/isareg.h> 30 #include <dev/pci/pcireg.h> 31 32 #include <machine/psl.h> 33 #include <machine/specialreg.h> 34 #include <machine/vmmvar.h> 35 36 #include <net/if.h> 37 38 #include <errno.h> 39 #include <event.h> 40 #include <fcntl.h> 41 #include <imsg.h> 42 #include <limits.h> 43 #include <poll.h> 44 #include <pthread.h> 45 #include <stddef.h> 46 #include <stdio.h> 47 #include <stdlib.h> 48 #include <string.h> 49 #include <unistd.h> 50 #include <util.h> 51 52 #include "vmd.h" 53 #include "vmm.h" 54 #include "atomicio.h" 55 56 void vmm_sighdlr(int, short, void *); 57 int vmm_start_vm(struct imsg *, uint32_t *, pid_t *); 58 int vmm_dispatch_parent(int, struct privsep_proc *, struct imsg *); 59 void vmm_run(struct privsep *, struct privsep_proc *, void *); 60 void vmm_dispatch_vm(int, short, void *); 61 int terminate_vm(struct vm_terminate_params *); 62 int get_info_vm(struct privsep *, struct imsg *, int); 63 int opentap(char *); 64 65 extern struct vmd *env; 66 67 static struct privsep_proc procs[] = { 68 { "parent", PROC_PARENT, vmm_dispatch_parent }, 69 }; 70 71 void 72 vmm(struct privsep *ps, struct privsep_proc *p) 73 { 74 proc_run(ps, p, procs, nitems(procs), vmm_run, NULL); 75 } 76 77 void 78 vmm_run(struct privsep *ps, struct privsep_proc *p, void *arg) 79 { 80 if (config_init(ps->ps_env) == -1) 81 fatal("failed to initialize configuration"); 82 83 /* 84 * We aren't root, so we can't chroot(2). Use unveil(2) instead. 85 */ 86 if (unveil(env->argv0, "x") == -1) 87 fatal("unveil %s", env->argv0); 88 if (unveil(NULL, NULL) == -1) 89 fatal("unveil lock"); 90 91 /* 92 * pledge in the vmm process: 93 * stdio - for malloc and basic I/O including events. 94 * vmm - for the vmm ioctls and operations. 95 * proc, exec - for forking and execing new vm's. 96 * sendfd - for sending send/recv fds to vm proc. 97 * recvfd - for disks, interfaces and other fds. 98 */ 99 if (pledge("stdio vmm sendfd recvfd proc exec", NULL) == -1) 100 fatal("pledge"); 101 102 signal_del(&ps->ps_evsigchld); 103 signal_set(&ps->ps_evsigchld, SIGCHLD, vmm_sighdlr, ps); 104 signal_add(&ps->ps_evsigchld, NULL); 105 } 106 107 int 108 vmm_dispatch_parent(int fd, struct privsep_proc *p, struct imsg *imsg) 109 { 110 struct privsep *ps = p->p_ps; 111 int res = 0, cmd = 0, verbose; 112 struct vmd_vm *vm = NULL; 113 struct vm_terminate_params vtp; 114 struct vmop_id vid; 115 struct vmop_result vmr; 116 struct vmop_create_params vmc; 117 struct vmop_addr_result var; 118 uint32_t id = 0, peerid = imsg->hdr.peerid; 119 pid_t pid = 0; 120 unsigned int mode, flags; 121 122 switch (imsg->hdr.type) { 123 case IMSG_VMDOP_START_VM_REQUEST: 124 res = config_getvm(ps, imsg); 125 if (res == -1) { 126 res = errno; 127 cmd = IMSG_VMDOP_START_VM_RESPONSE; 128 } 129 break; 130 case IMSG_VMDOP_START_VM_CDROM: 131 res = config_getcdrom(ps, imsg); 132 if (res == -1) { 133 res = errno; 134 cmd = IMSG_VMDOP_START_VM_RESPONSE; 135 } 136 break; 137 case IMSG_VMDOP_START_VM_DISK: 138 res = config_getdisk(ps, imsg); 139 if (res == -1) { 140 res = errno; 141 cmd = IMSG_VMDOP_START_VM_RESPONSE; 142 } 143 break; 144 case IMSG_VMDOP_START_VM_IF: 145 res = config_getif(ps, imsg); 146 if (res == -1) { 147 res = errno; 148 cmd = IMSG_VMDOP_START_VM_RESPONSE; 149 } 150 break; 151 case IMSG_VMDOP_START_VM_END: 152 res = vmm_start_vm(imsg, &id, &pid); 153 /* Check if the ID can be mapped correctly */ 154 if (res == 0 && (id = vm_id2vmid(id, NULL)) == 0) 155 res = ENOENT; 156 cmd = IMSG_VMDOP_START_VM_RESPONSE; 157 break; 158 case IMSG_VMDOP_TERMINATE_VM_REQUEST: 159 IMSG_SIZE_CHECK(imsg, &vid); 160 memcpy(&vid, imsg->data, sizeof(vid)); 161 id = vid.vid_id; 162 flags = vid.vid_flags; 163 164 DPRINTF("%s: recv'ed TERMINATE_VM for %d", __func__, id); 165 166 cmd = IMSG_VMDOP_TERMINATE_VM_RESPONSE; 167 168 if (id == 0) { 169 res = ENOENT; 170 } else if ((vm = vm_getbyvmid(id)) != NULL) { 171 if (flags & VMOP_FORCE) { 172 vtp.vtp_vm_id = vm_vmid2id(vm->vm_vmid, vm); 173 vm->vm_state |= VM_STATE_SHUTDOWN; 174 (void)terminate_vm(&vtp); 175 res = 0; 176 } else if (!(vm->vm_state & VM_STATE_SHUTDOWN)) { 177 log_debug("%s: sending shutdown request" 178 " to vm %d", __func__, id); 179 180 /* 181 * Request reboot but mark the VM as shutting 182 * down. This way we can terminate the VM after 183 * the triple fault instead of reboot and 184 * avoid being stuck in the ACPI-less powerdown 185 * ("press any key to reboot") of the VM. 186 */ 187 vm->vm_state |= VM_STATE_SHUTDOWN; 188 if (imsg_compose_event(&vm->vm_iev, 189 IMSG_VMDOP_VM_REBOOT, 190 0, 0, -1, NULL, 0) == -1) 191 res = errno; 192 else 193 res = 0; 194 } else { 195 /* 196 * VM is currently being shutdown. 197 * Check to see if the VM process is still 198 * active. If not, return VMD_VM_STOP_INVALID. 199 */ 200 if (vm_vmid2id(vm->vm_vmid, vm) == 0) { 201 log_debug("%s: no vm running anymore", 202 __func__); 203 res = VMD_VM_STOP_INVALID; 204 } 205 } 206 } else { 207 /* VM doesn't exist, cannot stop vm */ 208 log_debug("%s: cannot stop vm that is not running", 209 __func__); 210 res = VMD_VM_STOP_INVALID; 211 } 212 break; 213 case IMSG_VMDOP_GET_INFO_VM_REQUEST: 214 res = get_info_vm(ps, imsg, 0); 215 cmd = IMSG_VMDOP_GET_INFO_VM_END_DATA; 216 break; 217 case IMSG_VMDOP_CONFIG: 218 config_getconfig(env, imsg); 219 break; 220 case IMSG_CTL_RESET: 221 IMSG_SIZE_CHECK(imsg, &mode); 222 memcpy(&mode, imsg->data, sizeof(mode)); 223 224 if (mode & CONFIG_VMS) { 225 /* Terminate and remove all VMs */ 226 vmm_shutdown(); 227 mode &= ~CONFIG_VMS; 228 } 229 230 config_getreset(env, imsg); 231 break; 232 case IMSG_CTL_VERBOSE: 233 IMSG_SIZE_CHECK(imsg, &verbose); 234 memcpy(&verbose, imsg->data, sizeof(verbose)); 235 log_setverbose(verbose); 236 237 /* Forward message to each VM process */ 238 TAILQ_FOREACH(vm, env->vmd_vms, vm_entry) { 239 imsg_compose_event(&vm->vm_iev, 240 imsg->hdr.type, imsg->hdr.peerid, imsg->hdr.pid, 241 -1, &verbose, sizeof(verbose)); 242 } 243 break; 244 case IMSG_VMDOP_PAUSE_VM: 245 IMSG_SIZE_CHECK(imsg, &vid); 246 memcpy(&vid, imsg->data, sizeof(vid)); 247 id = vid.vid_id; 248 if ((vm = vm_getbyvmid(id)) == NULL) { 249 res = ENOENT; 250 cmd = IMSG_VMDOP_PAUSE_VM_RESPONSE; 251 break; 252 } 253 imsg_compose_event(&vm->vm_iev, 254 imsg->hdr.type, imsg->hdr.peerid, imsg->hdr.pid, 255 imsg->fd, &vid, sizeof(vid)); 256 break; 257 case IMSG_VMDOP_UNPAUSE_VM: 258 IMSG_SIZE_CHECK(imsg, &vid); 259 memcpy(&vid, imsg->data, sizeof(vid)); 260 id = vid.vid_id; 261 if ((vm = vm_getbyvmid(id)) == NULL) { 262 res = ENOENT; 263 cmd = IMSG_VMDOP_UNPAUSE_VM_RESPONSE; 264 break; 265 } 266 imsg_compose_event(&vm->vm_iev, 267 imsg->hdr.type, imsg->hdr.peerid, imsg->hdr.pid, 268 imsg->fd, &vid, sizeof(vid)); 269 break; 270 case IMSG_VMDOP_SEND_VM_REQUEST: 271 IMSG_SIZE_CHECK(imsg, &vid); 272 memcpy(&vid, imsg->data, sizeof(vid)); 273 id = vid.vid_id; 274 if ((vm = vm_getbyvmid(id)) == NULL) { 275 res = ENOENT; 276 close(imsg->fd); 277 cmd = IMSG_VMDOP_START_VM_RESPONSE; 278 break; 279 } 280 imsg_compose_event(&vm->vm_iev, 281 imsg->hdr.type, imsg->hdr.peerid, imsg->hdr.pid, 282 imsg->fd, &vid, sizeof(vid)); 283 break; 284 case IMSG_VMDOP_RECEIVE_VM_REQUEST: 285 IMSG_SIZE_CHECK(imsg, &vmc); 286 memcpy(&vmc, imsg->data, sizeof(vmc)); 287 if (vm_register(ps, &vmc, &vm, 288 imsg->hdr.peerid, vmc.vmc_owner.uid) != 0) { 289 res = errno; 290 cmd = IMSG_VMDOP_START_VM_RESPONSE; 291 break; 292 } 293 vm->vm_tty = imsg->fd; 294 vm->vm_state |= VM_STATE_RECEIVED; 295 vm->vm_state |= VM_STATE_PAUSED; 296 break; 297 case IMSG_VMDOP_RECEIVE_VM_END: 298 if ((vm = vm_getbyvmid(imsg->hdr.peerid)) == NULL) { 299 res = ENOENT; 300 close(imsg->fd); 301 cmd = IMSG_VMDOP_START_VM_RESPONSE; 302 break; 303 } 304 vm->vm_receive_fd = imsg->fd; 305 res = vmm_start_vm(imsg, &id, &pid); 306 /* Check if the ID can be mapped correctly */ 307 if ((id = vm_id2vmid(id, NULL)) == 0) 308 res = ENOENT; 309 cmd = IMSG_VMDOP_START_VM_RESPONSE; 310 break; 311 case IMSG_VMDOP_PRIV_GET_ADDR_RESPONSE: 312 IMSG_SIZE_CHECK(imsg, &var); 313 memcpy(&var, imsg->data, sizeof(var)); 314 if ((vm = vm_getbyvmid(var.var_vmid)) == NULL) { 315 res = ENOENT; 316 break; 317 } 318 /* Forward hardware address details to the guest vm */ 319 imsg_compose_event(&vm->vm_iev, 320 imsg->hdr.type, imsg->hdr.peerid, imsg->hdr.pid, 321 imsg->fd, &var, sizeof(var)); 322 break; 323 case IMSG_VMDOP_RECEIVE_VMM_FD: 324 if (env->vmd_fd > -1) 325 fatalx("already received vmm fd"); 326 env->vmd_fd = imsg->fd; 327 328 /* Get and terminate all running VMs */ 329 get_info_vm(ps, NULL, 1); 330 break; 331 default: 332 return (-1); 333 } 334 335 switch (cmd) { 336 case 0: 337 break; 338 case IMSG_VMDOP_START_VM_RESPONSE: 339 if (res != 0) { 340 /* Remove local reference if it exists */ 341 if ((vm = vm_getbyvmid(imsg->hdr.peerid)) != NULL) { 342 log_debug("%s: removing vm, START_VM_RESPONSE", 343 __func__); 344 vm_remove(vm, __func__); 345 } 346 } 347 if (id == 0) 348 id = imsg->hdr.peerid; 349 /* FALLTHROUGH */ 350 case IMSG_VMDOP_PAUSE_VM_RESPONSE: 351 case IMSG_VMDOP_UNPAUSE_VM_RESPONSE: 352 case IMSG_VMDOP_TERMINATE_VM_RESPONSE: 353 memset(&vmr, 0, sizeof(vmr)); 354 vmr.vmr_result = res; 355 vmr.vmr_id = id; 356 vmr.vmr_pid = pid; 357 if (proc_compose_imsg(ps, PROC_PARENT, -1, cmd, 358 peerid, -1, &vmr, sizeof(vmr)) == -1) 359 return (-1); 360 break; 361 default: 362 if (proc_compose_imsg(ps, PROC_PARENT, -1, cmd, 363 peerid, -1, &res, sizeof(res)) == -1) 364 return (-1); 365 break; 366 } 367 368 return (0); 369 } 370 371 void 372 vmm_sighdlr(int sig, short event, void *arg) 373 { 374 struct privsep *ps = arg; 375 int status, ret = 0; 376 uint32_t vmid; 377 pid_t pid; 378 struct vmop_result vmr; 379 struct vmd_vm *vm; 380 struct vm_terminate_params vtp; 381 382 log_debug("%s: handling signal %d", __func__, sig); 383 switch (sig) { 384 case SIGCHLD: 385 do { 386 pid = waitpid(-1, &status, WNOHANG); 387 if (pid <= 0) 388 continue; 389 390 if (WIFEXITED(status) || WIFSIGNALED(status)) { 391 vm = vm_getbypid(pid); 392 if (vm == NULL) { 393 /* 394 * If the VM is gone already, it 395 * got terminated via a 396 * IMSG_VMDOP_TERMINATE_VM_REQUEST. 397 */ 398 continue; 399 } 400 401 if (WIFEXITED(status)) 402 ret = WEXITSTATUS(status); 403 404 /* Don't reboot on pending shutdown */ 405 if (ret == EAGAIN && 406 (vm->vm_state & VM_STATE_SHUTDOWN)) 407 ret = 0; 408 409 vmid = vm->vm_params.vmc_params.vcp_id; 410 vtp.vtp_vm_id = vmid; 411 412 if (terminate_vm(&vtp) == 0) 413 log_debug("%s: terminated vm %s" 414 " (id %d)", __func__, 415 vm->vm_params.vmc_params.vcp_name, 416 vm->vm_vmid); 417 418 memset(&vmr, 0, sizeof(vmr)); 419 vmr.vmr_result = ret; 420 vmr.vmr_id = vm_id2vmid(vmid, vm); 421 if (proc_compose_imsg(ps, PROC_PARENT, 422 -1, IMSG_VMDOP_TERMINATE_VM_EVENT, 423 vm->vm_peerid, -1, 424 &vmr, sizeof(vmr)) == -1) 425 log_warnx("could not signal " 426 "termination of VM %u to " 427 "parent", vm->vm_vmid); 428 429 vm_remove(vm, __func__); 430 } else 431 fatalx("unexpected cause of SIGCHLD"); 432 } while (pid > 0 || (pid == -1 && errno == EINTR)); 433 break; 434 default: 435 fatalx("unexpected signal"); 436 } 437 } 438 439 /* 440 * vmm_shutdown 441 * 442 * Terminate VMs on shutdown to avoid "zombie VM" processes. 443 */ 444 void 445 vmm_shutdown(void) 446 { 447 struct vm_terminate_params vtp; 448 struct vmd_vm *vm, *vm_next; 449 450 TAILQ_FOREACH_SAFE(vm, env->vmd_vms, vm_entry, vm_next) { 451 vtp.vtp_vm_id = vm_vmid2id(vm->vm_vmid, vm); 452 453 /* XXX suspend or request graceful shutdown */ 454 (void)terminate_vm(&vtp); 455 vm_remove(vm, __func__); 456 } 457 } 458 459 /* 460 * vmm_pipe 461 * 462 * Create a new imsg control channel between vmm parent and a VM 463 * (can be called on both sides). 464 */ 465 int 466 vmm_pipe(struct vmd_vm *vm, int fd, void (*cb)(int, short, void *)) 467 { 468 struct imsgev *iev = &vm->vm_iev; 469 470 if (fcntl(fd, F_SETFL, O_NONBLOCK) == -1) { 471 log_warn("failed to set nonblocking mode on vm pipe"); 472 return (-1); 473 } 474 475 imsg_init(&iev->ibuf, fd); 476 iev->handler = cb; 477 iev->data = vm; 478 imsg_event_add(iev); 479 480 return (0); 481 } 482 483 /* 484 * vmm_dispatch_vm 485 * 486 * imsg callback for messages that are received from a VM child process. 487 */ 488 void 489 vmm_dispatch_vm(int fd, short event, void *arg) 490 { 491 struct vmd_vm *vm = arg; 492 struct vmop_result vmr; 493 struct imsgev *iev = &vm->vm_iev; 494 struct imsgbuf *ibuf = &iev->ibuf; 495 struct imsg imsg; 496 ssize_t n; 497 unsigned int i; 498 499 if (event & EV_READ) { 500 if ((n = imsg_read(ibuf)) == -1 && errno != EAGAIN) 501 fatal("%s: imsg_read", __func__); 502 if (n == 0) { 503 /* This pipe is dead, so remove the event handler */ 504 event_del(&iev->ev); 505 return; 506 } 507 } 508 509 if (event & EV_WRITE) { 510 if ((n = msgbuf_write(&ibuf->w)) == -1 && errno != EAGAIN) 511 fatal("%s: msgbuf_write fd %d", __func__, ibuf->fd); 512 if (n == 0) { 513 /* This pipe is dead, so remove the event handler */ 514 event_del(&iev->ev); 515 return; 516 } 517 } 518 519 for (;;) { 520 if ((n = imsg_get(ibuf, &imsg)) == -1) 521 fatal("%s: imsg_get", __func__); 522 if (n == 0) 523 break; 524 525 DPRINTF("%s: got imsg %d from %s", 526 __func__, imsg.hdr.type, 527 vm->vm_params.vmc_params.vcp_name); 528 529 switch (imsg.hdr.type) { 530 case IMSG_VMDOP_VM_SHUTDOWN: 531 vm->vm_state |= VM_STATE_SHUTDOWN; 532 break; 533 case IMSG_VMDOP_VM_REBOOT: 534 vm->vm_state &= ~VM_STATE_SHUTDOWN; 535 break; 536 case IMSG_VMDOP_SEND_VM_RESPONSE: 537 IMSG_SIZE_CHECK(&imsg, &vmr); 538 case IMSG_VMDOP_PAUSE_VM_RESPONSE: 539 case IMSG_VMDOP_UNPAUSE_VM_RESPONSE: 540 for (i = 0; i < nitems(procs); i++) { 541 if (procs[i].p_id == PROC_PARENT) { 542 proc_forward_imsg(procs[i].p_ps, 543 &imsg, PROC_PARENT, -1); 544 break; 545 } 546 } 547 break; 548 549 default: 550 fatalx("%s: got invalid imsg %d from %s", 551 __func__, imsg.hdr.type, 552 vm->vm_params.vmc_params.vcp_name); 553 } 554 imsg_free(&imsg); 555 } 556 imsg_event_add(iev); 557 } 558 559 /* 560 * terminate_vm 561 * 562 * Requests vmm(4) to terminate the VM whose ID is provided in the 563 * supplied vm_terminate_params structure (vtp->vtp_vm_id) 564 * 565 * Parameters 566 * vtp: vm_terminate_params struct containing the ID of the VM to terminate 567 * 568 * Return values: 569 * 0: success 570 * !0: ioctl to vmm(4) failed (eg, ENOENT if the supplied VM is not valid) 571 */ 572 int 573 terminate_vm(struct vm_terminate_params *vtp) 574 { 575 if (ioctl(env->vmd_fd, VMM_IOC_TERM, vtp) == -1) 576 return (errno); 577 578 return (0); 579 } 580 581 /* 582 * opentap 583 * 584 * Opens the next available tap device, up to MAX_TAP. 585 * 586 * Parameters 587 * ifname: a buffer of at least IF_NAMESIZE bytes. 588 * 589 * Returns a file descriptor to the tap node opened, or -1 if no tap 590 * devices were available. 591 */ 592 int 593 opentap(char *ifname) 594 { 595 int i, fd; 596 char path[PATH_MAX]; 597 598 for (i = 0; i < MAX_TAP; i++) { 599 snprintf(path, PATH_MAX, "/dev/tap%d", i); 600 fd = open(path, O_RDWR | O_NONBLOCK); 601 if (fd != -1) { 602 snprintf(ifname, IF_NAMESIZE, "tap%d", i); 603 return (fd); 604 } 605 } 606 strlcpy(ifname, "tap", IF_NAMESIZE); 607 608 return (-1); 609 } 610 611 /* 612 * vmm_start_vm 613 * 614 * Prepares and fork+execs a new VM process. 615 * 616 * Parameters: 617 * imsg: The VM data structure that is including the VM create parameters. 618 * id: Returns the VM id as reported by the kernel and obtained from the VM. 619 * pid: Returns the VM pid to the parent. 620 * 621 * Return values: 622 * 0: success 623 * !0: failure - typically an errno indicating the source of the failure 624 */ 625 int 626 vmm_start_vm(struct imsg *imsg, uint32_t *id, pid_t *pid) 627 { 628 struct vm_create_params *vcp; 629 struct vmd_vm *vm; 630 char *nargv[8], num[32], vmm_fd[32]; 631 int fd, ret = EINVAL; 632 int fds[2]; 633 pid_t vm_pid; 634 size_t i, j, sz; 635 636 if ((vm = vm_getbyvmid(imsg->hdr.peerid)) == NULL) { 637 log_warnx("%s: can't find vm", __func__); 638 ret = ENOENT; 639 goto err; 640 } 641 vcp = &vm->vm_params.vmc_params; 642 643 if (!(vm->vm_state & VM_STATE_RECEIVED)) { 644 if ((vm->vm_tty = imsg->fd) == -1) { 645 log_warnx("%s: can't get tty", __func__); 646 goto err; 647 } 648 } 649 650 if (socketpair(AF_UNIX, SOCK_STREAM, PF_UNSPEC, fds) == -1) 651 fatal("socketpair"); 652 653 /* Keep our channel open after exec. */ 654 if (fcntl(fds[1], F_SETFD, 0)) { 655 ret = errno; 656 log_warn("%s: fcntl", __func__); 657 goto err; 658 } 659 660 /* Start child vmd for this VM (fork, chroot, drop privs) */ 661 vm_pid = fork(); 662 if (vm_pid == -1) { 663 log_warn("%s: start child failed", __func__); 664 ret = EIO; 665 goto err; 666 } 667 668 if (vm_pid > 0) { 669 /* Parent */ 670 vm->vm_pid = vm_pid; 671 close_fd(fds[1]); 672 673 /* Send the details over the pipe to the child. */ 674 sz = atomicio(vwrite, fds[0], vm, sizeof(*vm)); 675 if (sz != sizeof(*vm)) { 676 log_warnx("%s: failed to send config for vm '%s'", 677 __func__, vcp->vcp_name); 678 ret = EIO; 679 /* Defer error handling until after fd closing. */ 680 } 681 682 /* As the parent/vmm process, we no longer need these fds. */ 683 for (i = 0 ; i < vm->vm_params.vmc_ndisks; i++) { 684 for (j = 0; j < VM_MAX_BASE_PER_DISK; j++) { 685 if (close_fd(vm->vm_disks[i][j]) == 0) 686 vm->vm_disks[i][j] = -1; 687 } 688 } 689 for (i = 0 ; i < vm->vm_params.vmc_nnics; i++) { 690 if (close_fd(vm->vm_ifs[i].vif_fd) == 0) 691 vm->vm_ifs[i].vif_fd = -1; 692 } 693 if (close_fd(vm->vm_kernel) == 0) 694 vm->vm_kernel = -1; 695 if (close_fd(vm->vm_cdrom) == 0) 696 vm->vm_cdrom = -1; 697 if (close_fd(vm->vm_tty) == 0) 698 vm->vm_tty = -1; 699 700 /* Deferred error handling from sending the vm struct. */ 701 if (ret == EIO) 702 goto err; 703 704 /* Read back the kernel-generated vm id from the child */ 705 sz = atomicio(read, fds[0], &vcp->vcp_id, sizeof(vcp->vcp_id)); 706 if (sz != sizeof(vcp->vcp_id)) { 707 log_debug("%s: failed to receive vm id from vm %s", 708 __func__, vcp->vcp_name); 709 /* vmd could not allocate memory for the vm. */ 710 ret = ENOMEM; 711 goto err; 712 } 713 714 /* Check for an invalid id. This indicates child failure. */ 715 if (vcp->vcp_id == 0) 716 goto err; 717 718 *id = vcp->vcp_id; 719 *pid = vm->vm_pid; 720 721 /* Wire up our pipe into the event handling. */ 722 if (vmm_pipe(vm, fds[0], vmm_dispatch_vm) == -1) 723 fatal("setup vm pipe"); 724 725 return (0); 726 } else { 727 /* Child. Create a new session. */ 728 if (setsid() == -1) 729 fatal("setsid"); 730 731 close_fd(fds[0]); 732 close_fd(PROC_PARENT_SOCK_FILENO); 733 734 /* Detach from terminal. */ 735 if (!env->vmd_debug && (fd = 736 open("/dev/null", O_RDWR, 0)) != -1) { 737 dup2(fd, STDIN_FILENO); 738 dup2(fd, STDOUT_FILENO); 739 dup2(fd, STDERR_FILENO); 740 if (fd > 2) 741 close(fd); 742 } 743 744 /* Toggle all fds to not close on exec. */ 745 for (i = 0 ; i < vm->vm_params.vmc_ndisks; i++) 746 for (j = 0; j < VM_MAX_BASE_PER_DISK; j++) 747 if (vm->vm_disks[i][j] != -1) 748 fcntl(vm->vm_disks[i][j], F_SETFD, 0); 749 for (i = 0 ; i < vm->vm_params.vmc_nnics; i++) 750 fcntl(vm->vm_ifs[i].vif_fd, F_SETFD, 0); 751 if (vm->vm_kernel != -1) 752 fcntl(vm->vm_kernel, F_SETFD, 0); 753 if (vm->vm_cdrom != -1) 754 fcntl(vm->vm_cdrom, F_SETFD, 0); 755 if (vm->vm_tty != -1) 756 fcntl(vm->vm_tty, F_SETFD, 0); 757 fcntl(env->vmd_fd, F_SETFD, 0); /* vmm device fd */ 758 759 /* 760 * Prepare our new argv for execvp(2) with the fd of our open 761 * pipe to the parent/vmm process as an argument. 762 */ 763 memset(&nargv, 0, sizeof(nargv)); 764 memset(num, 0, sizeof(num)); 765 snprintf(num, sizeof(num), "%d", fds[1]); 766 memset(vmm_fd, 0, sizeof(vmm_fd)); 767 snprintf(vmm_fd, sizeof(vmm_fd), "%d", env->vmd_fd); 768 769 nargv[0] = env->argv0; 770 nargv[1] = "-V"; 771 nargv[2] = num; 772 nargv[3] = "-n"; 773 nargv[4] = "-i"; 774 nargv[5] = vmm_fd; 775 776 if (env->vmd_verbose) { 777 nargv[6] = "-v"; 778 nargv[7] = NULL; 779 } else 780 nargv[6] = NULL; 781 782 /* Control resumes in vmd main(). */ 783 execvp(nargv[0], nargv); 784 785 ret = errno; 786 log_warn("execvp %s", nargv[0]); 787 _exit(ret); 788 /* NOTREACHED */ 789 } 790 791 return (0); 792 793 err: 794 if (!vm->vm_from_config) 795 vm_remove(vm, __func__); 796 797 return (ret); 798 } 799 800 /* 801 * get_info_vm 802 * 803 * Returns a list of VMs known to vmm(4). 804 * 805 * Parameters: 806 * ps: the privsep context. 807 * imsg: the received imsg including the peer id. 808 * terminate: terminate the listed vm. 809 * 810 * Return values: 811 * 0: success 812 * !0: failure (eg, ENOMEM, EIO or another error code from vmm(4) ioctl) 813 */ 814 int 815 get_info_vm(struct privsep *ps, struct imsg *imsg, int terminate) 816 { 817 int ret; 818 size_t ct, i; 819 struct vm_info_params vip; 820 struct vm_info_result *info; 821 struct vm_terminate_params vtp; 822 struct vmop_info_result vir; 823 824 /* 825 * We issue the VMM_IOC_INFO ioctl twice, once with an input 826 * buffer size of 0, which results in vmm(4) returning the 827 * number of bytes required back to us in vip.vip_size, 828 * and then we call it again after malloc'ing the required 829 * number of bytes. 830 * 831 * It is possible that we could fail a second time (e.g. if 832 * another VM was created in the instant between the two 833 * ioctls, but in that case the caller can just try again 834 * as vmm(4) will return a zero-sized list in that case. 835 */ 836 vip.vip_size = 0; 837 info = NULL; 838 ret = 0; 839 memset(&vir, 0, sizeof(vir)); 840 841 /* First ioctl to see how many bytes needed (vip.vip_size) */ 842 if (ioctl(env->vmd_fd, VMM_IOC_INFO, &vip) == -1) 843 return (errno); 844 845 if (vip.vip_info_ct != 0) 846 return (EIO); 847 848 info = malloc(vip.vip_size); 849 if (info == NULL) 850 return (ENOMEM); 851 852 /* Second ioctl to get the actual list */ 853 vip.vip_info = info; 854 if (ioctl(env->vmd_fd, VMM_IOC_INFO, &vip) == -1) { 855 ret = errno; 856 free(info); 857 return (ret); 858 } 859 860 /* Return info */ 861 ct = vip.vip_size / sizeof(struct vm_info_result); 862 for (i = 0; i < ct; i++) { 863 if (terminate) { 864 vtp.vtp_vm_id = info[i].vir_id; 865 if ((ret = terminate_vm(&vtp)) != 0) 866 break; 867 log_debug("%s: terminated vm %s (id %d)", __func__, 868 info[i].vir_name, info[i].vir_id); 869 continue; 870 } 871 memcpy(&vir.vir_info, &info[i], sizeof(vir.vir_info)); 872 vir.vir_info.vir_id = vm_id2vmid(info[i].vir_id, NULL); 873 if (proc_compose_imsg(ps, PROC_PARENT, -1, 874 IMSG_VMDOP_GET_INFO_VM_DATA, imsg->hdr.peerid, -1, 875 &vir, sizeof(vir)) == -1) { 876 ret = EIO; 877 break; 878 } 879 } 880 free(info); 881 882 return (ret); 883 } 884