1 /* $OpenBSD: vmm.c,v 1.130 2024/11/21 13:39:34 claudio Exp $ */ 2 3 /* 4 * Copyright (c) 2015 Mike Larkin <mlarkin@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 19 #include <sys/types.h> 20 #include <sys/ioctl.h> 21 #include <sys/queue.h> 22 #include <sys/wait.h> 23 #include <sys/socket.h> 24 25 #include <dev/vmm/vmm.h> 26 27 #include <net/if.h> 28 29 #include <errno.h> 30 #include <event.h> 31 #include <fcntl.h> 32 #include <imsg.h> 33 #include <limits.h> 34 #include <stdio.h> 35 #include <stdlib.h> 36 #include <string.h> 37 #include <unistd.h> 38 39 #include "vmd.h" 40 #include "atomicio.h" 41 #include "proc.h" 42 43 void vmm_sighdlr(int, short, void *); 44 int vmm_start_vm(struct imsg *, uint32_t *, pid_t *); 45 int vmm_dispatch_parent(int, struct privsep_proc *, struct imsg *); 46 void vmm_run(struct privsep *, struct privsep_proc *, void *); 47 void vmm_dispatch_vm(int, short, void *); 48 int terminate_vm(struct vm_terminate_params *); 49 int get_info_vm(struct privsep *, struct imsg *, int); 50 int opentap(char *); 51 52 extern struct vmd *env; 53 54 static struct privsep_proc procs[] = { 55 { "parent", PROC_PARENT, vmm_dispatch_parent }, 56 }; 57 58 void 59 vmm(struct privsep *ps, struct privsep_proc *p) 60 { 61 proc_run(ps, p, procs, nitems(procs), vmm_run, NULL); 62 } 63 64 void 65 vmm_run(struct privsep *ps, struct privsep_proc *p, void *arg) 66 { 67 if (config_init(ps->ps_env) == -1) 68 fatal("failed to initialize configuration"); 69 70 /* 71 * We aren't root, so we can't chroot(2). Use unveil(2) instead. 72 */ 73 if (unveil(env->argv0, "x") == -1) 74 fatal("unveil %s", env->argv0); 75 if (unveil(NULL, NULL) == -1) 76 fatal("unveil lock"); 77 78 /* 79 * pledge in the vmm process: 80 * stdio - for malloc and basic I/O including events. 81 * vmm - for the vmm ioctls and operations. 82 * proc, exec - for forking and execing new vm's. 83 * sendfd - for sending send/recv fds to vm proc. 84 * recvfd - for disks, interfaces and other fds. 85 */ 86 if (pledge("stdio vmm sendfd recvfd proc exec", NULL) == -1) 87 fatal("pledge"); 88 89 signal_del(&ps->ps_evsigchld); 90 signal_set(&ps->ps_evsigchld, SIGCHLD, vmm_sighdlr, ps); 91 signal_add(&ps->ps_evsigchld, NULL); 92 } 93 94 int 95 vmm_dispatch_parent(int fd, struct privsep_proc *p, struct imsg *imsg) 96 { 97 struct privsep *ps = p->p_ps; 98 int res = 0, cmd = 0, verbose; 99 struct vmd_vm *vm = NULL; 100 struct vm_terminate_params vtp; 101 struct vmop_id vid; 102 struct vmop_result vmr; 103 struct vmop_create_params vmc; 104 struct vmop_addr_result var; 105 uint32_t id = 0, peerid = imsg->hdr.peerid; 106 pid_t pid = 0; 107 unsigned int mode, flags; 108 109 switch (imsg->hdr.type) { 110 case IMSG_VMDOP_START_VM_REQUEST: 111 res = config_getvm(ps, imsg); 112 if (res == -1) { 113 res = errno; 114 cmd = IMSG_VMDOP_START_VM_RESPONSE; 115 } 116 break; 117 case IMSG_VMDOP_START_VM_CDROM: 118 res = config_getcdrom(ps, imsg); 119 if (res == -1) { 120 res = errno; 121 cmd = IMSG_VMDOP_START_VM_RESPONSE; 122 } 123 break; 124 case IMSG_VMDOP_START_VM_DISK: 125 res = config_getdisk(ps, imsg); 126 if (res == -1) { 127 res = errno; 128 cmd = IMSG_VMDOP_START_VM_RESPONSE; 129 } 130 break; 131 case IMSG_VMDOP_START_VM_IF: 132 res = config_getif(ps, imsg); 133 if (res == -1) { 134 res = errno; 135 cmd = IMSG_VMDOP_START_VM_RESPONSE; 136 } 137 break; 138 case IMSG_VMDOP_START_VM_END: 139 res = vmm_start_vm(imsg, &id, &pid); 140 /* Check if the ID can be mapped correctly */ 141 if (res == 0 && (id = vm_id2vmid(id, NULL)) == 0) 142 res = ENOENT; 143 cmd = IMSG_VMDOP_START_VM_RESPONSE; 144 break; 145 case IMSG_VMDOP_TERMINATE_VM_REQUEST: 146 IMSG_SIZE_CHECK(imsg, &vid); 147 memcpy(&vid, imsg->data, sizeof(vid)); 148 id = vid.vid_id; 149 flags = vid.vid_flags; 150 151 DPRINTF("%s: recv'ed TERMINATE_VM for %d", __func__, id); 152 153 cmd = IMSG_VMDOP_TERMINATE_VM_RESPONSE; 154 155 if (id == 0) { 156 res = ENOENT; 157 } else if ((vm = vm_getbyvmid(id)) != NULL) { 158 if (flags & VMOP_FORCE) { 159 vtp.vtp_vm_id = vm_vmid2id(vm->vm_vmid, vm); 160 vm->vm_state |= VM_STATE_SHUTDOWN; 161 (void)terminate_vm(&vtp); 162 res = 0; 163 } else if (!(vm->vm_state & VM_STATE_SHUTDOWN)) { 164 log_debug("%s: sending shutdown request" 165 " to vm %d", __func__, id); 166 167 /* 168 * Request reboot but mark the VM as shutting 169 * down. This way we can terminate the VM after 170 * the triple fault instead of reboot and 171 * avoid being stuck in the ACPI-less powerdown 172 * ("press any key to reboot") of the VM. 173 */ 174 vm->vm_state |= VM_STATE_SHUTDOWN; 175 if (imsg_compose_event(&vm->vm_iev, 176 IMSG_VMDOP_VM_REBOOT, 177 0, 0, -1, NULL, 0) == -1) 178 res = errno; 179 else 180 res = 0; 181 } else { 182 /* 183 * VM is currently being shutdown. 184 * Check to see if the VM process is still 185 * active. If not, return VMD_VM_STOP_INVALID. 186 */ 187 if (vm_vmid2id(vm->vm_vmid, vm) == 0) { 188 log_debug("%s: no vm running anymore", 189 __func__); 190 res = VMD_VM_STOP_INVALID; 191 } 192 } 193 } else { 194 /* VM doesn't exist, cannot stop vm */ 195 log_debug("%s: cannot stop vm that is not running", 196 __func__); 197 res = VMD_VM_STOP_INVALID; 198 } 199 break; 200 case IMSG_VMDOP_GET_INFO_VM_REQUEST: 201 res = get_info_vm(ps, imsg, 0); 202 cmd = IMSG_VMDOP_GET_INFO_VM_END_DATA; 203 break; 204 case IMSG_VMDOP_CONFIG: 205 config_getconfig(env, imsg); 206 break; 207 case IMSG_CTL_RESET: 208 IMSG_SIZE_CHECK(imsg, &mode); 209 memcpy(&mode, imsg->data, sizeof(mode)); 210 211 if (mode & CONFIG_VMS) { 212 /* Terminate and remove all VMs */ 213 vmm_shutdown(); 214 mode &= ~CONFIG_VMS; 215 } 216 217 config_getreset(env, imsg); 218 break; 219 case IMSG_CTL_VERBOSE: 220 IMSG_SIZE_CHECK(imsg, &verbose); 221 memcpy(&verbose, imsg->data, sizeof(verbose)); 222 log_setverbose(verbose); 223 env->vmd_verbose = verbose; 224 /* Forward message to each VM process */ 225 TAILQ_FOREACH(vm, env->vmd_vms, vm_entry) { 226 imsg_compose_event(&vm->vm_iev, 227 imsg->hdr.type, imsg->hdr.peerid, imsg->hdr.pid, 228 -1, &verbose, sizeof(verbose)); 229 } 230 break; 231 case IMSG_VMDOP_PAUSE_VM: 232 IMSG_SIZE_CHECK(imsg, &vid); 233 memcpy(&vid, imsg->data, sizeof(vid)); 234 id = vid.vid_id; 235 if ((vm = vm_getbyvmid(id)) == NULL) { 236 res = ENOENT; 237 cmd = IMSG_VMDOP_PAUSE_VM_RESPONSE; 238 break; 239 } 240 imsg_compose_event(&vm->vm_iev, 241 imsg->hdr.type, imsg->hdr.peerid, imsg->hdr.pid, 242 imsg_get_fd(imsg), &vid, sizeof(vid)); 243 break; 244 case IMSG_VMDOP_UNPAUSE_VM: 245 IMSG_SIZE_CHECK(imsg, &vid); 246 memcpy(&vid, imsg->data, sizeof(vid)); 247 id = vid.vid_id; 248 if ((vm = vm_getbyvmid(id)) == NULL) { 249 res = ENOENT; 250 cmd = IMSG_VMDOP_UNPAUSE_VM_RESPONSE; 251 break; 252 } 253 imsg_compose_event(&vm->vm_iev, 254 imsg->hdr.type, imsg->hdr.peerid, imsg->hdr.pid, 255 imsg_get_fd(imsg), &vid, sizeof(vid)); 256 break; 257 case IMSG_VMDOP_SEND_VM_REQUEST: 258 IMSG_SIZE_CHECK(imsg, &vid); 259 memcpy(&vid, imsg->data, sizeof(vid)); 260 id = vid.vid_id; 261 if ((vm = vm_getbyvmid(id)) == NULL) { 262 res = ENOENT; 263 close(imsg_get_fd(imsg)); /* XXX */ 264 cmd = IMSG_VMDOP_START_VM_RESPONSE; 265 break; 266 } 267 imsg_compose_event(&vm->vm_iev, 268 imsg->hdr.type, imsg->hdr.peerid, imsg->hdr.pid, 269 imsg_get_fd(imsg), &vid, sizeof(vid)); 270 break; 271 case IMSG_VMDOP_RECEIVE_VM_REQUEST: 272 IMSG_SIZE_CHECK(imsg, &vmc); 273 memcpy(&vmc, imsg->data, sizeof(vmc)); 274 if (vm_register(ps, &vmc, &vm, 275 imsg->hdr.peerid, vmc.vmc_owner.uid) != 0) { 276 res = errno; 277 cmd = IMSG_VMDOP_START_VM_RESPONSE; 278 break; 279 } 280 vm->vm_tty = imsg_get_fd(imsg); 281 vm->vm_state |= VM_STATE_RECEIVED; 282 vm->vm_state |= VM_STATE_PAUSED; 283 break; 284 case IMSG_VMDOP_RECEIVE_VM_END: 285 if ((vm = vm_getbyvmid(imsg->hdr.peerid)) == NULL) { 286 res = ENOENT; 287 close(imsg_get_fd(imsg)); /* XXX */ 288 cmd = IMSG_VMDOP_START_VM_RESPONSE; 289 break; 290 } 291 vm->vm_receive_fd = imsg_get_fd(imsg); 292 res = vmm_start_vm(imsg, &id, &pid); 293 /* Check if the ID can be mapped correctly */ 294 if ((id = vm_id2vmid(id, NULL)) == 0) 295 res = ENOENT; 296 cmd = IMSG_VMDOP_START_VM_RESPONSE; 297 break; 298 case IMSG_VMDOP_PRIV_GET_ADDR_RESPONSE: 299 IMSG_SIZE_CHECK(imsg, &var); 300 memcpy(&var, imsg->data, sizeof(var)); 301 if ((vm = vm_getbyvmid(var.var_vmid)) == NULL) { 302 res = ENOENT; 303 break; 304 } 305 /* Forward hardware address details to the guest vm */ 306 imsg_compose_event(&vm->vm_iev, 307 imsg->hdr.type, imsg->hdr.peerid, imsg->hdr.pid, 308 imsg_get_fd(imsg), &var, sizeof(var)); 309 break; 310 case IMSG_VMDOP_RECEIVE_VMM_FD: 311 if (env->vmd_fd > -1) 312 fatalx("already received vmm fd"); 313 env->vmd_fd = imsg_get_fd(imsg); 314 315 /* Get and terminate all running VMs */ 316 get_info_vm(ps, NULL, 1); 317 break; 318 case IMSG_VMDOP_RECEIVE_PSP_FD: 319 if (env->vmd_psp_fd > -1) 320 fatalx("already received psp fd"); 321 env->vmd_psp_fd = imsg_get_fd(imsg); 322 break; 323 default: 324 return (-1); 325 } 326 327 switch (cmd) { 328 case 0: 329 break; 330 case IMSG_VMDOP_START_VM_RESPONSE: 331 if (res != 0) { 332 /* Remove local reference if it exists */ 333 if ((vm = vm_getbyvmid(imsg->hdr.peerid)) != NULL) { 334 log_debug("%s: removing vm, START_VM_RESPONSE", 335 __func__); 336 vm_remove(vm, __func__); 337 } 338 } 339 if (id == 0) 340 id = imsg->hdr.peerid; 341 /* FALLTHROUGH */ 342 case IMSG_VMDOP_PAUSE_VM_RESPONSE: 343 case IMSG_VMDOP_UNPAUSE_VM_RESPONSE: 344 case IMSG_VMDOP_TERMINATE_VM_RESPONSE: 345 memset(&vmr, 0, sizeof(vmr)); 346 vmr.vmr_result = res; 347 vmr.vmr_id = id; 348 vmr.vmr_pid = pid; 349 if (proc_compose_imsg(ps, PROC_PARENT, -1, cmd, 350 peerid, -1, &vmr, sizeof(vmr)) == -1) 351 return (-1); 352 break; 353 default: 354 if (proc_compose_imsg(ps, PROC_PARENT, -1, cmd, 355 peerid, -1, &res, sizeof(res)) == -1) 356 return (-1); 357 break; 358 } 359 360 return (0); 361 } 362 363 void 364 vmm_sighdlr(int sig, short event, void *arg) 365 { 366 struct privsep *ps = arg; 367 int status, ret = 0; 368 uint32_t vmid; 369 pid_t pid; 370 struct vmop_result vmr; 371 struct vmd_vm *vm; 372 struct vm_terminate_params vtp; 373 374 log_debug("%s: handling signal %d", __func__, sig); 375 switch (sig) { 376 case SIGCHLD: 377 do { 378 pid = waitpid(-1, &status, WNOHANG); 379 if (pid <= 0) 380 continue; 381 382 if (WIFEXITED(status) || WIFSIGNALED(status)) { 383 vm = vm_getbypid(pid); 384 if (vm == NULL) { 385 /* 386 * If the VM is gone already, it 387 * got terminated via a 388 * IMSG_VMDOP_TERMINATE_VM_REQUEST. 389 */ 390 continue; 391 } 392 393 if (WIFEXITED(status)) 394 ret = WEXITSTATUS(status); 395 396 /* Don't reboot on pending shutdown */ 397 if (ret == EAGAIN && 398 (vm->vm_state & VM_STATE_SHUTDOWN)) 399 ret = 0; 400 401 vmid = vm->vm_params.vmc_params.vcp_id; 402 vtp.vtp_vm_id = vmid; 403 404 if (terminate_vm(&vtp) == 0) 405 log_debug("%s: terminated vm %s" 406 " (id %d)", __func__, 407 vm->vm_params.vmc_params.vcp_name, 408 vm->vm_vmid); 409 410 memset(&vmr, 0, sizeof(vmr)); 411 vmr.vmr_result = ret; 412 vmr.vmr_id = vm_id2vmid(vmid, vm); 413 if (proc_compose_imsg(ps, PROC_PARENT, 414 -1, IMSG_VMDOP_TERMINATE_VM_EVENT, 415 vm->vm_peerid, -1, 416 &vmr, sizeof(vmr)) == -1) 417 log_warnx("could not signal " 418 "termination of VM %u to " 419 "parent", vm->vm_vmid); 420 421 vm_remove(vm, __func__); 422 } else 423 fatalx("unexpected cause of SIGCHLD"); 424 } while (pid > 0 || (pid == -1 && errno == EINTR)); 425 break; 426 default: 427 fatalx("unexpected signal"); 428 } 429 } 430 431 /* 432 * vmm_shutdown 433 * 434 * Terminate VMs on shutdown to avoid "zombie VM" processes. 435 */ 436 void 437 vmm_shutdown(void) 438 { 439 struct vm_terminate_params vtp; 440 struct vmd_vm *vm, *vm_next; 441 442 TAILQ_FOREACH_SAFE(vm, env->vmd_vms, vm_entry, vm_next) { 443 vtp.vtp_vm_id = vm_vmid2id(vm->vm_vmid, vm); 444 445 /* XXX suspend or request graceful shutdown */ 446 (void)terminate_vm(&vtp); 447 vm_remove(vm, __func__); 448 } 449 } 450 451 /* 452 * vmm_pipe 453 * 454 * Create a new imsg control channel between vmm parent and a VM 455 * (can be called on both sides). 456 */ 457 int 458 vmm_pipe(struct vmd_vm *vm, int fd, void (*cb)(int, short, void *)) 459 { 460 struct imsgev *iev = &vm->vm_iev; 461 462 /* 463 * Set to close-on-exec as vmm_pipe is used after fork+exec to 464 * establish async ipc between vm and vmd's vmm process. This 465 * prevents future vm processes or virtio subprocesses from 466 * inheriting this control channel. 467 */ 468 if (fcntl(fd, F_SETFD, FD_CLOEXEC) == -1) { 469 log_warn("failed to set close-on-exec for vmm ipc channel"); 470 return (-1); 471 } 472 473 if (imsgbuf_init(&iev->ibuf, fd) == -1) { 474 log_warn("failed to init imsgbuf"); 475 return (-1); 476 } 477 imsgbuf_allow_fdpass(&iev->ibuf); 478 iev->handler = cb; 479 iev->data = vm; 480 imsg_event_add(iev); 481 482 return (0); 483 } 484 485 /* 486 * vmm_dispatch_vm 487 * 488 * imsg callback for messages that are received from a VM child process. 489 */ 490 void 491 vmm_dispatch_vm(int fd, short event, void *arg) 492 { 493 struct vmd_vm *vm = arg; 494 struct vmop_result vmr; 495 struct imsgev *iev = &vm->vm_iev; 496 struct imsgbuf *ibuf = &iev->ibuf; 497 struct imsg imsg; 498 ssize_t n; 499 unsigned int i; 500 501 if (event & EV_READ) { 502 if ((n = imsgbuf_read(ibuf)) == -1) 503 fatal("%s: imsgbuf_read", __func__); 504 if (n == 0) { 505 /* This pipe is dead, so remove the event handler */ 506 event_del(&iev->ev); 507 return; 508 } 509 } 510 511 if (event & EV_WRITE) { 512 if (imsgbuf_write(ibuf) == -1) { 513 if (errno == EPIPE) { 514 /* This pipe is dead, remove the handler */ 515 event_del(&iev->ev); 516 return; 517 } 518 fatal("%s: imsgbuf_write fd %d", __func__, ibuf->fd); 519 } 520 } 521 522 for (;;) { 523 if ((n = imsg_get(ibuf, &imsg)) == -1) 524 fatal("%s: imsg_get", __func__); 525 if (n == 0) 526 break; 527 528 DPRINTF("%s: got imsg %d from %s", 529 __func__, imsg.hdr.type, 530 vm->vm_params.vmc_params.vcp_name); 531 532 switch (imsg.hdr.type) { 533 case IMSG_VMDOP_VM_SHUTDOWN: 534 vm->vm_state |= VM_STATE_SHUTDOWN; 535 break; 536 case IMSG_VMDOP_VM_REBOOT: 537 vm->vm_state &= ~VM_STATE_SHUTDOWN; 538 break; 539 case IMSG_VMDOP_SEND_VM_RESPONSE: 540 IMSG_SIZE_CHECK(&imsg, &vmr); 541 case IMSG_VMDOP_PAUSE_VM_RESPONSE: 542 case IMSG_VMDOP_UNPAUSE_VM_RESPONSE: 543 for (i = 0; i < nitems(procs); i++) { 544 if (procs[i].p_id == PROC_PARENT) { 545 proc_forward_imsg(procs[i].p_ps, 546 &imsg, PROC_PARENT, -1); 547 break; 548 } 549 } 550 break; 551 552 default: 553 fatalx("%s: got invalid imsg %d from %s", 554 __func__, imsg.hdr.type, 555 vm->vm_params.vmc_params.vcp_name); 556 } 557 imsg_free(&imsg); 558 } 559 imsg_event_add(iev); 560 } 561 562 /* 563 * terminate_vm 564 * 565 * Requests vmm(4) to terminate the VM whose ID is provided in the 566 * supplied vm_terminate_params structure (vtp->vtp_vm_id) 567 * 568 * Parameters 569 * vtp: vm_terminate_params struct containing the ID of the VM to terminate 570 * 571 * Return values: 572 * 0: success 573 * !0: ioctl to vmm(4) failed (eg, ENOENT if the supplied VM is not valid) 574 */ 575 int 576 terminate_vm(struct vm_terminate_params *vtp) 577 { 578 if (ioctl(env->vmd_fd, VMM_IOC_TERM, vtp) == -1) 579 return (errno); 580 581 return (0); 582 } 583 584 /* 585 * opentap 586 * 587 * Opens the next available tap device, up to MAX_TAP. 588 * 589 * Parameters 590 * ifname: a buffer of at least IF_NAMESIZE bytes. 591 * 592 * Returns a file descriptor to the tap node opened or -1 if no tap devices were 593 * available, setting errno to the open(2) error. 594 */ 595 int 596 opentap(char *ifname) 597 { 598 int err = 0, i, fd; 599 char path[PATH_MAX]; 600 601 for (i = 0; i < MAX_TAP; i++) { 602 snprintf(path, PATH_MAX, "/dev/tap%d", i); 603 604 errno = 0; 605 fd = open(path, O_RDWR | O_NONBLOCK); 606 if (fd != -1) 607 break; 608 err = errno; 609 if (err == EBUSY) { 610 /* Busy...try next tap. */ 611 continue; 612 } else if (err == ENOENT) { 613 /* Ran out of /dev/tap* special files. */ 614 break; 615 } else { 616 log_warn("%s: unexpected error", __func__); 617 break; 618 } 619 } 620 621 /* Record the last opened tap device. */ 622 snprintf(ifname, IF_NAMESIZE, "tap%d", i); 623 624 if (err) 625 errno = err; 626 return (fd); 627 } 628 629 /* 630 * vmm_start_vm 631 * 632 * Prepares and fork+execs a new VM process. 633 * 634 * Parameters: 635 * imsg: The VM data structure that is including the VM create parameters. 636 * id: Returns the VM id as reported by the kernel and obtained from the VM. 637 * pid: Returns the VM pid to the parent. 638 * 639 * Return values: 640 * 0: success 641 * !0: failure - typically an errno indicating the source of the failure 642 */ 643 int 644 vmm_start_vm(struct imsg *imsg, uint32_t *id, pid_t *pid) 645 { 646 struct vm_create_params *vcp; 647 struct vmd_vm *vm; 648 char *nargv[10], num[32], vmm_fd[32], psp_fd[32]; 649 int fd, ret = EINVAL; 650 int fds[2]; 651 pid_t vm_pid; 652 size_t i, j, sz; 653 654 if ((vm = vm_getbyvmid(imsg->hdr.peerid)) == NULL) { 655 log_warnx("%s: can't find vm", __func__); 656 return (ENOENT); 657 } 658 vcp = &vm->vm_params.vmc_params; 659 660 if (!(vm->vm_state & VM_STATE_RECEIVED)) { 661 if ((vm->vm_tty = imsg_get_fd(imsg)) == -1) { 662 log_warnx("%s: can't get tty", __func__); 663 goto err; 664 } 665 } 666 667 if (socketpair(AF_UNIX, SOCK_STREAM | SOCK_NONBLOCK, PF_UNSPEC, fds) 668 == -1) 669 fatal("socketpair"); 670 671 /* Start child vmd for this VM (fork, chroot, drop privs) */ 672 vm_pid = fork(); 673 if (vm_pid == -1) { 674 log_warn("%s: start child failed", __func__); 675 ret = EIO; 676 goto err; 677 } 678 679 if (vm_pid > 0) { 680 /* Parent */ 681 vm->vm_pid = vm_pid; 682 close_fd(fds[1]); 683 684 /* Send the details over the pipe to the child. */ 685 sz = atomicio(vwrite, fds[0], vm, sizeof(*vm)); 686 if (sz != sizeof(*vm)) { 687 log_warnx("%s: failed to send config for vm '%s'", 688 __func__, vcp->vcp_name); 689 ret = EIO; 690 /* Defer error handling until after fd closing. */ 691 } 692 693 /* As the parent/vmm process, we no longer need these fds. */ 694 for (i = 0 ; i < vm->vm_params.vmc_ndisks; i++) { 695 for (j = 0; j < VM_MAX_BASE_PER_DISK; j++) { 696 if (close_fd(vm->vm_disks[i][j]) == 0) 697 vm->vm_disks[i][j] = -1; 698 } 699 } 700 for (i = 0 ; i < vm->vm_params.vmc_nnics; i++) { 701 if (close_fd(vm->vm_ifs[i].vif_fd) == 0) 702 vm->vm_ifs[i].vif_fd = -1; 703 } 704 if (close_fd(vm->vm_kernel) == 0) 705 vm->vm_kernel = -1; 706 if (close_fd(vm->vm_cdrom) == 0) 707 vm->vm_cdrom = -1; 708 if (close_fd(vm->vm_tty) == 0) 709 vm->vm_tty = -1; 710 711 /* Deferred error handling from sending the vm struct. */ 712 if (ret == EIO) 713 goto err; 714 715 /* Send the current local prefix configuration. */ 716 sz = atomicio(vwrite, fds[0], &env->vmd_cfg.cfg_localprefix, 717 sizeof(env->vmd_cfg.cfg_localprefix)); 718 if (sz != sizeof(env->vmd_cfg.cfg_localprefix)) { 719 log_warnx("%s: failed to send local prefix for vm '%s'", 720 __func__, vcp->vcp_name); 721 ret = EIO; 722 goto err; 723 } 724 725 /* Read back the kernel-generated vm id from the child */ 726 sz = atomicio(read, fds[0], &vcp->vcp_id, sizeof(vcp->vcp_id)); 727 if (sz != sizeof(vcp->vcp_id)) { 728 log_debug("%s: failed to receive vm id from vm %s", 729 __func__, vcp->vcp_name); 730 /* vmd could not allocate memory for the vm. */ 731 ret = ENOMEM; 732 goto err; 733 } 734 735 /* Check for an invalid id. This indicates child failure. */ 736 if (vcp->vcp_id == 0) 737 goto err; 738 739 *id = vcp->vcp_id; 740 *pid = vm->vm_pid; 741 742 /* Wire up our pipe into the event handling. */ 743 if (vmm_pipe(vm, fds[0], vmm_dispatch_vm) == -1) 744 fatal("setup vm pipe"); 745 } else { 746 /* Child. Create a new session. */ 747 if (setsid() == -1) 748 fatal("setsid"); 749 750 close_fd(fds[0]); 751 close_fd(PROC_PARENT_SOCK_FILENO); 752 753 /* Detach from terminal. */ 754 if (!env->vmd_debug && (fd = 755 open("/dev/null", O_RDWR, 0)) != -1) { 756 dup2(fd, STDIN_FILENO); 757 dup2(fd, STDOUT_FILENO); 758 dup2(fd, STDERR_FILENO); 759 if (fd > 2) 760 close(fd); 761 } 762 763 if (env->vmd_psp_fd > 0) 764 fcntl(env->vmd_psp_fd, F_SETFD, 0); /* psp device fd */ 765 766 /* 767 * Prepare our new argv for execvp(2) with the fd of our open 768 * pipe to the parent/vmm process as an argument. 769 */ 770 memset(num, 0, sizeof(num)); 771 snprintf(num, sizeof(num), "%d", fds[1]); 772 memset(vmm_fd, 0, sizeof(vmm_fd)); 773 snprintf(vmm_fd, sizeof(vmm_fd), "%d", env->vmd_fd); 774 memset(psp_fd, 0, sizeof(psp_fd)); 775 snprintf(psp_fd, sizeof(psp_fd), "%d", env->vmd_psp_fd); 776 777 i = 0; 778 nargv[i++] = env->argv0; 779 nargv[i++] = "-V"; 780 nargv[i++] = num; 781 nargv[i++] = "-i"; 782 nargv[i++] = vmm_fd; 783 nargv[i++] = "-j"; 784 nargv[i++] = psp_fd; 785 if (env->vmd_debug) 786 nargv[i++] = "-d"; 787 if (env->vmd_verbose == 1) 788 nargv[i++] = "-v"; 789 else if (env->vmd_verbose > 1) 790 nargv[i++] = "-vv"; 791 nargv[i++] = NULL; 792 if (i > sizeof(nargv) / sizeof(nargv[0])) 793 fatalx("%s: nargv overflow", __func__); 794 795 /* Control resumes in vmd main(). */ 796 execvp(nargv[0], nargv); 797 798 ret = errno; 799 log_warn("execvp %s", nargv[0]); 800 _exit(ret); 801 /* NOTREACHED */ 802 } 803 804 return (0); 805 806 err: 807 if (!vm->vm_from_config) 808 vm_remove(vm, __func__); 809 810 return (ret); 811 } 812 813 /* 814 * get_info_vm 815 * 816 * Returns a list of VMs known to vmm(4). 817 * 818 * Parameters: 819 * ps: the privsep context. 820 * imsg: the received imsg including the peer id. 821 * terminate: terminate the listed vm. 822 * 823 * Return values: 824 * 0: success 825 * !0: failure (eg, ENOMEM, EIO or another error code from vmm(4) ioctl) 826 */ 827 int 828 get_info_vm(struct privsep *ps, struct imsg *imsg, int terminate) 829 { 830 int ret; 831 size_t ct, i; 832 struct vm_info_params vip; 833 struct vm_info_result *info; 834 struct vm_terminate_params vtp; 835 struct vmop_info_result vir; 836 837 /* 838 * We issue the VMM_IOC_INFO ioctl twice, once with an input 839 * buffer size of 0, which results in vmm(4) returning the 840 * number of bytes required back to us in vip.vip_size, 841 * and then we call it again after malloc'ing the required 842 * number of bytes. 843 * 844 * It is possible that we could fail a second time (e.g. if 845 * another VM was created in the instant between the two 846 * ioctls, but in that case the caller can just try again 847 * as vmm(4) will return a zero-sized list in that case. 848 */ 849 vip.vip_size = 0; 850 info = NULL; 851 ret = 0; 852 memset(&vir, 0, sizeof(vir)); 853 854 /* First ioctl to see how many bytes needed (vip.vip_size) */ 855 if (ioctl(env->vmd_fd, VMM_IOC_INFO, &vip) == -1) 856 return (errno); 857 858 if (vip.vip_info_ct != 0) 859 return (EIO); 860 861 info = malloc(vip.vip_size); 862 if (info == NULL) 863 return (ENOMEM); 864 865 /* Second ioctl to get the actual list */ 866 vip.vip_info = info; 867 if (ioctl(env->vmd_fd, VMM_IOC_INFO, &vip) == -1) { 868 ret = errno; 869 free(info); 870 return (ret); 871 } 872 873 /* Return info */ 874 ct = vip.vip_size / sizeof(struct vm_info_result); 875 for (i = 0; i < ct; i++) { 876 if (terminate) { 877 vtp.vtp_vm_id = info[i].vir_id; 878 if ((ret = terminate_vm(&vtp)) != 0) 879 break; 880 log_debug("%s: terminated vm %s (id %d)", __func__, 881 info[i].vir_name, info[i].vir_id); 882 continue; 883 } 884 memcpy(&vir.vir_info, &info[i], sizeof(vir.vir_info)); 885 vir.vir_info.vir_id = vm_id2vmid(info[i].vir_id, NULL); 886 if (proc_compose_imsg(ps, PROC_PARENT, -1, 887 IMSG_VMDOP_GET_INFO_VM_DATA, imsg->hdr.peerid, -1, 888 &vir, sizeof(vir)) == -1) { 889 ret = EIO; 890 break; 891 } 892 } 893 free(info); 894 895 return (ret); 896 } 897