1*0a9d031fSclaudio /* $OpenBSD: vmm.c,v 1.130 2024/11/21 13:39:34 claudio Exp $ */ 2af96af6cSreyk 3af96af6cSreyk /* 4af96af6cSreyk * Copyright (c) 2015 Mike Larkin <mlarkin@openbsd.org> 5af96af6cSreyk * 6af96af6cSreyk * Permission to use, copy, modify, and distribute this software for any 7af96af6cSreyk * purpose with or without fee is hereby granted, provided that the above 8af96af6cSreyk * copyright notice and this permission notice appear in all copies. 9af96af6cSreyk * 10af96af6cSreyk * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11af96af6cSreyk * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12af96af6cSreyk * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13af96af6cSreyk * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14af96af6cSreyk * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15af96af6cSreyk * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16af96af6cSreyk * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17af96af6cSreyk */ 18af96af6cSreyk 1924fb43d0Sderaadt #include <sys/types.h> 20af96af6cSreyk #include <sys/ioctl.h> 21af96af6cSreyk #include <sys/queue.h> 22299ee841Sstefan #include <sys/wait.h> 23af96af6cSreyk #include <sys/socket.h> 24af96af6cSreyk 25ba66f564Sdv #include <dev/vmm/vmm.h> 26af96af6cSreyk 275921535cSreyk #include <net/if.h> 285921535cSreyk 29af96af6cSreyk #include <errno.h> 30ecc93de1Smlarkin #include <event.h> 31af96af6cSreyk #include <fcntl.h> 32af96af6cSreyk #include <imsg.h> 33af96af6cSreyk #include <limits.h> 34af96af6cSreyk #include <stdio.h> 35af96af6cSreyk #include <stdlib.h> 36af96af6cSreyk #include <string.h> 37af96af6cSreyk #include <unistd.h> 38af96af6cSreyk 39af96af6cSreyk #include "vmd.h" 40fbbcf6cdSdv #include "atomicio.h" 41b3bc6112Sdv #include "proc.h" 42af96af6cSreyk 43299ee841Sstefan void vmm_sighdlr(int, short, void *); 44a014dd99Sreyk int vmm_start_vm(struct imsg *, uint32_t *, pid_t *); 45f0bbd60cSreyk int vmm_dispatch_parent(int, struct privsep_proc *, struct imsg *); 46f0bbd60cSreyk void vmm_run(struct privsep *, struct privsep_proc *, void *); 473afb90b0Sreyk void vmm_dispatch_vm(int, short, void *); 481e1977eeSreyk int terminate_vm(struct vm_terminate_params *); 491e1977eeSreyk int get_info_vm(struct privsep *, struct imsg *, int); 501e1977eeSreyk int opentap(char *); 51af96af6cSreyk 52af96af6cSreyk extern struct vmd *env; 53af96af6cSreyk 54f0bbd60cSreyk static struct privsep_proc procs[] = { 55f0bbd60cSreyk { "parent", PROC_PARENT, vmm_dispatch_parent }, 56f0bbd60cSreyk }; 57f0bbd60cSreyk 58bcc679a1Sreyk void 59f0bbd60cSreyk vmm(struct privsep *ps, struct privsep_proc *p) 60f0bbd60cSreyk { 61bcc679a1Sreyk proc_run(ps, p, procs, nitems(procs), vmm_run, NULL); 62f0bbd60cSreyk } 63f0bbd60cSreyk 64f0bbd60cSreyk void 65f0bbd60cSreyk vmm_run(struct privsep *ps, struct privsep_proc *p, void *arg) 66f0bbd60cSreyk { 67f0bbd60cSreyk if (config_init(ps->ps_env) == -1) 68f0bbd60cSreyk fatal("failed to initialize configuration"); 69f0bbd60cSreyk 7024386e31Sdv /* 7124386e31Sdv * We aren't root, so we can't chroot(2). Use unveil(2) instead. 7224386e31Sdv */ 7324386e31Sdv if (unveil(env->argv0, "x") == -1) 7424386e31Sdv fatal("unveil %s", env->argv0); 7524386e31Sdv if (unveil(NULL, NULL) == -1) 7624386e31Sdv fatal("unveil lock"); 77299ee841Sstefan 78f0bbd60cSreyk /* 79f0bbd60cSreyk * pledge in the vmm process: 80f0bbd60cSreyk * stdio - for malloc and basic I/O including events. 816bde4a58Sreyk * vmm - for the vmm ioctls and operations. 8224386e31Sdv * proc, exec - for forking and execing new vm's. 8394c51922Sdv * sendfd - for sending send/recv fds to vm proc. 846bde4a58Sreyk * recvfd - for disks, interfaces and other fds. 85f0bbd60cSreyk */ 8624386e31Sdv if (pledge("stdio vmm sendfd recvfd proc exec", NULL) == -1) 87f0bbd60cSreyk fatal("pledge"); 8824386e31Sdv 8924386e31Sdv signal_del(&ps->ps_evsigchld); 9024386e31Sdv signal_set(&ps->ps_evsigchld, SIGCHLD, vmm_sighdlr, ps); 9124386e31Sdv signal_add(&ps->ps_evsigchld, NULL); 92f0bbd60cSreyk } 93f0bbd60cSreyk 94af96af6cSreyk int 95f0bbd60cSreyk vmm_dispatch_parent(int fd, struct privsep_proc *p, struct imsg *imsg) 96af96af6cSreyk { 97af96af6cSreyk struct privsep *ps = p->p_ps; 98ec4b63a0Sclaudio int res = 0, cmd = 0, verbose; 99eb1cd41dSreyk struct vmd_vm *vm = NULL; 1006fcc05d2Sreyk struct vm_terminate_params vtp; 10152e954a3Spd struct vmop_id vid; 10248665f9bSreyk struct vmop_result vmr; 103eed20f3bSpd struct vmop_create_params vmc; 10497f33f1dSdv struct vmop_addr_result var; 10595176482Sclaudio uint32_t id = 0, peerid = imsg->hdr.peerid; 106a014dd99Sreyk pid_t pid = 0; 1073be9785fSreyk unsigned int mode, flags; 108af96af6cSreyk 109af96af6cSreyk switch (imsg->hdr.type) { 110af96af6cSreyk case IMSG_VMDOP_START_VM_REQUEST: 1114f76ab55Sreyk res = config_getvm(ps, imsg); 11248665f9bSreyk if (res == -1) { 11348665f9bSreyk res = errno; 114f0bbd60cSreyk cmd = IMSG_VMDOP_START_VM_RESPONSE; 11548665f9bSreyk } 116f0bbd60cSreyk break; 11795ab188fSccardenas case IMSG_VMDOP_START_VM_CDROM: 11895ab188fSccardenas res = config_getcdrom(ps, imsg); 11995ab188fSccardenas if (res == -1) { 12095ab188fSccardenas res = errno; 12195ab188fSccardenas cmd = IMSG_VMDOP_START_VM_RESPONSE; 12295ab188fSccardenas } 12395ab188fSccardenas break; 124f0bbd60cSreyk case IMSG_VMDOP_START_VM_DISK: 125f0bbd60cSreyk res = config_getdisk(ps, imsg); 1264a0e5604Sreyk if (res == -1) { 1274a0e5604Sreyk res = errno; 128f0bbd60cSreyk cmd = IMSG_VMDOP_START_VM_RESPONSE; 1294a0e5604Sreyk } 130f0bbd60cSreyk break; 131f0bbd60cSreyk case IMSG_VMDOP_START_VM_IF: 132f0bbd60cSreyk res = config_getif(ps, imsg); 1334a0e5604Sreyk if (res == -1) { 1344a0e5604Sreyk res = errno; 135f0bbd60cSreyk cmd = IMSG_VMDOP_START_VM_RESPONSE; 1364a0e5604Sreyk } 137f0bbd60cSreyk break; 138f0bbd60cSreyk case IMSG_VMDOP_START_VM_END: 139a014dd99Sreyk res = vmm_start_vm(imsg, &id, &pid); 140eb1cd41dSreyk /* Check if the ID can be mapped correctly */ 141fbbcf6cdSdv if (res == 0 && (id = vm_id2vmid(id, NULL)) == 0) 142eb1cd41dSreyk res = ENOENT; 143af96af6cSreyk cmd = IMSG_VMDOP_START_VM_RESPONSE; 144af96af6cSreyk break; 145af96af6cSreyk case IMSG_VMDOP_TERMINATE_VM_REQUEST: 1463be9785fSreyk IMSG_SIZE_CHECK(imsg, &vid); 1473be9785fSreyk memcpy(&vid, imsg->data, sizeof(vid)); 1483be9785fSreyk id = vid.vid_id; 1493be9785fSreyk flags = vid.vid_flags; 150ddadf993Sreyk 151ddadf993Sreyk DPRINTF("%s: recv'ed TERMINATE_VM for %d", __func__, id); 152f84d5d33Sreyk 1533be9785fSreyk cmd = IMSG_VMDOP_TERMINATE_VM_RESPONSE; 1543be9785fSreyk 155eb1cd41dSreyk if (id == 0) { 156eb1cd41dSreyk res = ENOENT; 1572da5c9dbSmlarkin } else if ((vm = vm_getbyvmid(id)) != NULL) { 1583be9785fSreyk if (flags & VMOP_FORCE) { 159f6e5c9ebSreyk vtp.vtp_vm_id = vm_vmid2id(vm->vm_vmid, vm); 16019700f36Sjasper vm->vm_state |= VM_STATE_SHUTDOWN; 161f6e5c9ebSreyk (void)terminate_vm(&vtp); 162f6e5c9ebSreyk res = 0; 16319700f36Sjasper } else if (!(vm->vm_state & VM_STATE_SHUTDOWN)) { 164ddadf993Sreyk log_debug("%s: sending shutdown request" 165ddadf993Sreyk " to vm %d", __func__, id); 166f84d5d33Sreyk 167f84d5d33Sreyk /* 1682da5c9dbSmlarkin * Request reboot but mark the VM as shutting 1692da5c9dbSmlarkin * down. This way we can terminate the VM after 1702da5c9dbSmlarkin * the triple fault instead of reboot and 1712da5c9dbSmlarkin * avoid being stuck in the ACPI-less powerdown 1722da5c9dbSmlarkin * ("press any key to reboot") of the VM. 173f84d5d33Sreyk */ 17419700f36Sjasper vm->vm_state |= VM_STATE_SHUTDOWN; 175f84d5d33Sreyk if (imsg_compose_event(&vm->vm_iev, 1762da5c9dbSmlarkin IMSG_VMDOP_VM_REBOOT, 1772da5c9dbSmlarkin 0, 0, -1, NULL, 0) == -1) 178f84d5d33Sreyk res = errno; 179f84d5d33Sreyk else 180f84d5d33Sreyk res = 0; 181f84d5d33Sreyk } else { 182bb4a0381Smlarkin /* 183bb4a0381Smlarkin * VM is currently being shutdown. 184bb4a0381Smlarkin * Check to see if the VM process is still 185bb4a0381Smlarkin * active. If not, return VMD_VM_STOP_INVALID. 186bb4a0381Smlarkin */ 1873be9785fSreyk if (vm_vmid2id(vm->vm_vmid, vm) == 0) { 1882da5c9dbSmlarkin log_debug("%s: no vm running anymore", 1892da5c9dbSmlarkin __func__); 1902da5c9dbSmlarkin res = VMD_VM_STOP_INVALID; 1912da5c9dbSmlarkin } 1922da5c9dbSmlarkin } 1932da5c9dbSmlarkin } else { 194549bbfbbSdv /* VM doesn't exist, cannot stop vm */ 1952da5c9dbSmlarkin log_debug("%s: cannot stop vm that is not running", 1962da5c9dbSmlarkin __func__); 1972da5c9dbSmlarkin res = VMD_VM_STOP_INVALID; 198f84d5d33Sreyk } 199af96af6cSreyk break; 200af96af6cSreyk case IMSG_VMDOP_GET_INFO_VM_REQUEST: 2016fcc05d2Sreyk res = get_info_vm(ps, imsg, 0); 202af96af6cSreyk cmd = IMSG_VMDOP_GET_INFO_VM_END_DATA; 203af96af6cSreyk break; 204c48cfcf4Sreyk case IMSG_VMDOP_CONFIG: 205c48cfcf4Sreyk config_getconfig(env, imsg); 206c48cfcf4Sreyk break; 207008065a5Sreyk case IMSG_CTL_RESET: 2081f7fe034Sreyk IMSG_SIZE_CHECK(imsg, &mode); 2091f7fe034Sreyk memcpy(&mode, imsg->data, sizeof(mode)); 2101f7fe034Sreyk 2111f7fe034Sreyk if (mode & CONFIG_VMS) { 2121f7fe034Sreyk /* Terminate and remove all VMs */ 2131f7fe034Sreyk vmm_shutdown(); 2141f7fe034Sreyk mode &= ~CONFIG_VMS; 2151f7fe034Sreyk } 2161f7fe034Sreyk 217008065a5Sreyk config_getreset(env, imsg); 218008065a5Sreyk break; 2193afb90b0Sreyk case IMSG_CTL_VERBOSE: 2203afb90b0Sreyk IMSG_SIZE_CHECK(imsg, &verbose); 2213afb90b0Sreyk memcpy(&verbose, imsg->data, sizeof(verbose)); 2223afb90b0Sreyk log_setverbose(verbose); 22308d0da61Sdv env->vmd_verbose = verbose; 2243afb90b0Sreyk /* Forward message to each VM process */ 2253afb90b0Sreyk TAILQ_FOREACH(vm, env->vmd_vms, vm_entry) { 2263afb90b0Sreyk imsg_compose_event(&vm->vm_iev, 2273afb90b0Sreyk imsg->hdr.type, imsg->hdr.peerid, imsg->hdr.pid, 2283afb90b0Sreyk -1, &verbose, sizeof(verbose)); 2293afb90b0Sreyk } 2303afb90b0Sreyk break; 23152e954a3Spd case IMSG_VMDOP_PAUSE_VM: 23252e954a3Spd IMSG_SIZE_CHECK(imsg, &vid); 23352e954a3Spd memcpy(&vid, imsg->data, sizeof(vid)); 23452e954a3Spd id = vid.vid_id; 23552e954a3Spd if ((vm = vm_getbyvmid(id)) == NULL) { 23652e954a3Spd res = ENOENT; 23752e954a3Spd cmd = IMSG_VMDOP_PAUSE_VM_RESPONSE; 23852e954a3Spd break; 23952e954a3Spd } 24052e954a3Spd imsg_compose_event(&vm->vm_iev, 24152e954a3Spd imsg->hdr.type, imsg->hdr.peerid, imsg->hdr.pid, 24253027660Sclaudio imsg_get_fd(imsg), &vid, sizeof(vid)); 24352e954a3Spd break; 24452e954a3Spd case IMSG_VMDOP_UNPAUSE_VM: 24552e954a3Spd IMSG_SIZE_CHECK(imsg, &vid); 24652e954a3Spd memcpy(&vid, imsg->data, sizeof(vid)); 24752e954a3Spd id = vid.vid_id; 24852e954a3Spd if ((vm = vm_getbyvmid(id)) == NULL) { 24952e954a3Spd res = ENOENT; 25052e954a3Spd cmd = IMSG_VMDOP_UNPAUSE_VM_RESPONSE; 25152e954a3Spd break; 25252e954a3Spd } 25352e954a3Spd imsg_compose_event(&vm->vm_iev, 25452e954a3Spd imsg->hdr.type, imsg->hdr.peerid, imsg->hdr.pid, 25553027660Sclaudio imsg_get_fd(imsg), &vid, sizeof(vid)); 25652e954a3Spd break; 257eed20f3bSpd case IMSG_VMDOP_SEND_VM_REQUEST: 258eed20f3bSpd IMSG_SIZE_CHECK(imsg, &vid); 259eed20f3bSpd memcpy(&vid, imsg->data, sizeof(vid)); 260eed20f3bSpd id = vid.vid_id; 261eed20f3bSpd if ((vm = vm_getbyvmid(id)) == NULL) { 262eed20f3bSpd res = ENOENT; 26353027660Sclaudio close(imsg_get_fd(imsg)); /* XXX */ 264eed20f3bSpd cmd = IMSG_VMDOP_START_VM_RESPONSE; 265eed20f3bSpd break; 266eed20f3bSpd } 267eed20f3bSpd imsg_compose_event(&vm->vm_iev, 268eed20f3bSpd imsg->hdr.type, imsg->hdr.peerid, imsg->hdr.pid, 26953027660Sclaudio imsg_get_fd(imsg), &vid, sizeof(vid)); 270eed20f3bSpd break; 271eed20f3bSpd case IMSG_VMDOP_RECEIVE_VM_REQUEST: 272eed20f3bSpd IMSG_SIZE_CHECK(imsg, &vmc); 273eed20f3bSpd memcpy(&vmc, imsg->data, sizeof(vmc)); 274ec4b63a0Sclaudio if (vm_register(ps, &vmc, &vm, 275ec4b63a0Sclaudio imsg->hdr.peerid, vmc.vmc_owner.uid) != 0) { 276ec4b63a0Sclaudio res = errno; 277ec4b63a0Sclaudio cmd = IMSG_VMDOP_START_VM_RESPONSE; 278ec4b63a0Sclaudio break; 279ec4b63a0Sclaudio } 28053027660Sclaudio vm->vm_tty = imsg_get_fd(imsg); 28119700f36Sjasper vm->vm_state |= VM_STATE_RECEIVED; 282548054a9Spd vm->vm_state |= VM_STATE_PAUSED; 283eed20f3bSpd break; 284eed20f3bSpd case IMSG_VMDOP_RECEIVE_VM_END: 285eed20f3bSpd if ((vm = vm_getbyvmid(imsg->hdr.peerid)) == NULL) { 286eed20f3bSpd res = ENOENT; 28753027660Sclaudio close(imsg_get_fd(imsg)); /* XXX */ 288eed20f3bSpd cmd = IMSG_VMDOP_START_VM_RESPONSE; 289eed20f3bSpd break; 290eed20f3bSpd } 29153027660Sclaudio vm->vm_receive_fd = imsg_get_fd(imsg); 292a014dd99Sreyk res = vmm_start_vm(imsg, &id, &pid); 293661d0050Spd /* Check if the ID can be mapped correctly */ 294661d0050Spd if ((id = vm_id2vmid(id, NULL)) == 0) 295661d0050Spd res = ENOENT; 296eed20f3bSpd cmd = IMSG_VMDOP_START_VM_RESPONSE; 297eed20f3bSpd break; 29897f33f1dSdv case IMSG_VMDOP_PRIV_GET_ADDR_RESPONSE: 29997f33f1dSdv IMSG_SIZE_CHECK(imsg, &var); 30097f33f1dSdv memcpy(&var, imsg->data, sizeof(var)); 30197f33f1dSdv if ((vm = vm_getbyvmid(var.var_vmid)) == NULL) { 30297f33f1dSdv res = ENOENT; 30397f33f1dSdv break; 30497f33f1dSdv } 30597f33f1dSdv /* Forward hardware address details to the guest vm */ 30697f33f1dSdv imsg_compose_event(&vm->vm_iev, 30797f33f1dSdv imsg->hdr.type, imsg->hdr.peerid, imsg->hdr.pid, 30853027660Sclaudio imsg_get_fd(imsg), &var, sizeof(var)); 30997f33f1dSdv break; 3102748f5e2Sdv case IMSG_VMDOP_RECEIVE_VMM_FD: 3112748f5e2Sdv if (env->vmd_fd > -1) 3122748f5e2Sdv fatalx("already received vmm fd"); 31353027660Sclaudio env->vmd_fd = imsg_get_fd(imsg); 3142748f5e2Sdv 3152748f5e2Sdv /* Get and terminate all running VMs */ 3162748f5e2Sdv get_info_vm(ps, NULL, 1); 3172748f5e2Sdv break; 318f4b47ae8Sbluhm case IMSG_VMDOP_RECEIVE_PSP_FD: 319f4b47ae8Sbluhm if (env->vmd_psp_fd > -1) 320f4b47ae8Sbluhm fatalx("already received psp fd"); 3210a6e69e4Sclaudio env->vmd_psp_fd = imsg_get_fd(imsg); 322f4b47ae8Sbluhm break; 323af96af6cSreyk default: 324af96af6cSreyk return (-1); 325af96af6cSreyk } 326af96af6cSreyk 32748665f9bSreyk switch (cmd) { 32848665f9bSreyk case 0: 32948665f9bSreyk break; 33048665f9bSreyk case IMSG_VMDOP_START_VM_RESPONSE: 331d8cb3559Sreyk if (res != 0) { 332d3b18ed2Sreyk /* Remove local reference if it exists */ 333e0994759Smlarkin if ((vm = vm_getbyvmid(imsg->hdr.peerid)) != NULL) { 334e0994759Smlarkin log_debug("%s: removing vm, START_VM_RESPONSE", 335e0994759Smlarkin __func__); 336ddadf993Sreyk vm_remove(vm, __func__); 337d8cb3559Sreyk } 338e0994759Smlarkin } 339eb1cd41dSreyk if (id == 0) 340eb1cd41dSreyk id = imsg->hdr.peerid; 341ec4b63a0Sclaudio /* FALLTHROUGH */ 34252e954a3Spd case IMSG_VMDOP_PAUSE_VM_RESPONSE: 34352e954a3Spd case IMSG_VMDOP_UNPAUSE_VM_RESPONSE: 34448665f9bSreyk case IMSG_VMDOP_TERMINATE_VM_RESPONSE: 34548665f9bSreyk memset(&vmr, 0, sizeof(vmr)); 34648665f9bSreyk vmr.vmr_result = res; 34748665f9bSreyk vmr.vmr_id = id; 348a014dd99Sreyk vmr.vmr_pid = pid; 34948665f9bSreyk if (proc_compose_imsg(ps, PROC_PARENT, -1, cmd, 35095176482Sclaudio peerid, -1, &vmr, sizeof(vmr)) == -1) 351af96af6cSreyk return (-1); 35248665f9bSreyk break; 35348665f9bSreyk default: 35448665f9bSreyk if (proc_compose_imsg(ps, PROC_PARENT, -1, cmd, 35595176482Sclaudio peerid, -1, &res, sizeof(res)) == -1) 35648665f9bSreyk return (-1); 35748665f9bSreyk break; 35848665f9bSreyk } 359af96af6cSreyk 360af96af6cSreyk return (0); 361af96af6cSreyk } 362af96af6cSreyk 363299ee841Sstefan void 364299ee841Sstefan vmm_sighdlr(int sig, short event, void *arg) 365299ee841Sstefan { 366299ee841Sstefan struct privsep *ps = arg; 367c8f0d715Sreyk int status, ret = 0; 368299ee841Sstefan uint32_t vmid; 369299ee841Sstefan pid_t pid; 370299ee841Sstefan struct vmop_result vmr; 371299ee841Sstefan struct vmd_vm *vm; 372299ee841Sstefan struct vm_terminate_params vtp; 373299ee841Sstefan 374bd6f7673Smlarkin log_debug("%s: handling signal %d", __func__, sig); 375299ee841Sstefan switch (sig) { 376299ee841Sstefan case SIGCHLD: 377299ee841Sstefan do { 378299ee841Sstefan pid = waitpid(-1, &status, WNOHANG); 379299ee841Sstefan if (pid <= 0) 380299ee841Sstefan continue; 381299ee841Sstefan 382299ee841Sstefan if (WIFEXITED(status) || WIFSIGNALED(status)) { 383299ee841Sstefan vm = vm_getbypid(pid); 384299ee841Sstefan if (vm == NULL) { 385299ee841Sstefan /* 386299ee841Sstefan * If the VM is gone already, it 387299ee841Sstefan * got terminated via a 388299ee841Sstefan * IMSG_VMDOP_TERMINATE_VM_REQUEST. 389299ee841Sstefan */ 390299ee841Sstefan continue; 391299ee841Sstefan } 392299ee841Sstefan 393c8f0d715Sreyk if (WIFEXITED(status)) 394c8f0d715Sreyk ret = WEXITSTATUS(status); 395c8f0d715Sreyk 396549bbfbbSdv /* Don't reboot on pending shutdown */ 397549bbfbbSdv if (ret == EAGAIN && 398549bbfbbSdv (vm->vm_state & VM_STATE_SHUTDOWN)) 399f84d5d33Sreyk ret = 0; 400f84d5d33Sreyk 401ea9c30d9Sedd vmid = vm->vm_params.vmc_params.vcp_id; 402299ee841Sstefan vtp.vtp_vm_id = vmid; 403f6e5c9ebSreyk 404f6e5c9ebSreyk if (terminate_vm(&vtp) == 0) 405f6e5c9ebSreyk log_debug("%s: terminated vm %s" 406f6e5c9ebSreyk " (id %d)", __func__, 407f6e5c9ebSreyk vm->vm_params.vmc_params.vcp_name, 408f6e5c9ebSreyk vm->vm_vmid); 409f6e5c9ebSreyk 410299ee841Sstefan memset(&vmr, 0, sizeof(vmr)); 411c8f0d715Sreyk vmr.vmr_result = ret; 412eb1cd41dSreyk vmr.vmr_id = vm_id2vmid(vmid, vm); 413299ee841Sstefan if (proc_compose_imsg(ps, PROC_PARENT, 414299ee841Sstefan -1, IMSG_VMDOP_TERMINATE_VM_EVENT, 4153be9785fSreyk vm->vm_peerid, -1, 4163be9785fSreyk &vmr, sizeof(vmr)) == -1) 417299ee841Sstefan log_warnx("could not signal " 418299ee841Sstefan "termination of VM %u to " 419eb1cd41dSreyk "parent", vm->vm_vmid); 42013dd90b2Sreyk 421ddadf993Sreyk vm_remove(vm, __func__); 422299ee841Sstefan } else 423299ee841Sstefan fatalx("unexpected cause of SIGCHLD"); 424299ee841Sstefan } while (pid > 0 || (pid == -1 && errno == EINTR)); 425299ee841Sstefan break; 426299ee841Sstefan default: 427299ee841Sstefan fatalx("unexpected signal"); 428299ee841Sstefan } 429299ee841Sstefan } 430299ee841Sstefan 431af96af6cSreyk /* 432ae6ee9deSreyk * vmm_shutdown 433ae6ee9deSreyk * 434ae6ee9deSreyk * Terminate VMs on shutdown to avoid "zombie VM" processes. 435ae6ee9deSreyk */ 436ae6ee9deSreyk void 437ae6ee9deSreyk vmm_shutdown(void) 438ae6ee9deSreyk { 439ae6ee9deSreyk struct vm_terminate_params vtp; 440ae6ee9deSreyk struct vmd_vm *vm, *vm_next; 441ae6ee9deSreyk 442ae6ee9deSreyk TAILQ_FOREACH_SAFE(vm, env->vmd_vms, vm_entry, vm_next) { 443eb1cd41dSreyk vtp.vtp_vm_id = vm_vmid2id(vm->vm_vmid, vm); 444ae6ee9deSreyk 445ae6ee9deSreyk /* XXX suspend or request graceful shutdown */ 44613dd90b2Sreyk (void)terminate_vm(&vtp); 447ddadf993Sreyk vm_remove(vm, __func__); 448ae6ee9deSreyk } 449ae6ee9deSreyk } 450ae6ee9deSreyk 4511e1977eeSreyk /* 4521e1977eeSreyk * vmm_pipe 4531e1977eeSreyk * 4541e1977eeSreyk * Create a new imsg control channel between vmm parent and a VM 4551e1977eeSreyk * (can be called on both sides). 4561e1977eeSreyk */ 4573afb90b0Sreyk int 4583afb90b0Sreyk vmm_pipe(struct vmd_vm *vm, int fd, void (*cb)(int, short, void *)) 4593afb90b0Sreyk { 4603afb90b0Sreyk struct imsgev *iev = &vm->vm_iev; 4613afb90b0Sreyk 462b3bc6112Sdv /* 463b3bc6112Sdv * Set to close-on-exec as vmm_pipe is used after fork+exec to 464b3bc6112Sdv * establish async ipc between vm and vmd's vmm process. This 465b3bc6112Sdv * prevents future vm processes or virtio subprocesses from 466b3bc6112Sdv * inheriting this control channel. 467b3bc6112Sdv */ 468b3bc6112Sdv if (fcntl(fd, F_SETFD, FD_CLOEXEC) == -1) { 469b3bc6112Sdv log_warn("failed to set close-on-exec for vmm ipc channel"); 4703afb90b0Sreyk return (-1); 4713afb90b0Sreyk } 4723afb90b0Sreyk 473*0a9d031fSclaudio if (imsgbuf_init(&iev->ibuf, fd) == -1) { 474*0a9d031fSclaudio log_warn("failed to init imsgbuf"); 475*0a9d031fSclaudio return (-1); 476*0a9d031fSclaudio } 477*0a9d031fSclaudio imsgbuf_allow_fdpass(&iev->ibuf); 4783afb90b0Sreyk iev->handler = cb; 4793afb90b0Sreyk iev->data = vm; 4803afb90b0Sreyk imsg_event_add(iev); 4813afb90b0Sreyk 4823afb90b0Sreyk return (0); 4833afb90b0Sreyk } 4843afb90b0Sreyk 4851e1977eeSreyk /* 4861e1977eeSreyk * vmm_dispatch_vm 4871e1977eeSreyk * 4881e1977eeSreyk * imsg callback for messages that are received from a VM child process. 4891e1977eeSreyk */ 4903afb90b0Sreyk void 4913afb90b0Sreyk vmm_dispatch_vm(int fd, short event, void *arg) 4923afb90b0Sreyk { 4933afb90b0Sreyk struct vmd_vm *vm = arg; 494eed20f3bSpd struct vmop_result vmr; 4953afb90b0Sreyk struct imsgev *iev = &vm->vm_iev; 4963afb90b0Sreyk struct imsgbuf *ibuf = &iev->ibuf; 4973afb90b0Sreyk struct imsg imsg; 4983afb90b0Sreyk ssize_t n; 49952e954a3Spd unsigned int i; 5003afb90b0Sreyk 5013afb90b0Sreyk if (event & EV_READ) { 502d12ef5f3Sclaudio if ((n = imsgbuf_read(ibuf)) == -1) 503dd7efffeSclaudio fatal("%s: imsgbuf_read", __func__); 5043afb90b0Sreyk if (n == 0) { 505549bbfbbSdv /* This pipe is dead, so remove the event handler */ 5063afb90b0Sreyk event_del(&iev->ev); 5073afb90b0Sreyk return; 5083afb90b0Sreyk } 5093afb90b0Sreyk } 5103afb90b0Sreyk 5113afb90b0Sreyk if (event & EV_WRITE) { 512dd7efffeSclaudio if (imsgbuf_write(ibuf) == -1) { 513c1aa9554Sclaudio if (errno == EPIPE) { 514c1aa9554Sclaudio /* This pipe is dead, remove the handler */ 5153afb90b0Sreyk event_del(&iev->ev); 5163afb90b0Sreyk return; 5173afb90b0Sreyk } 518dd7efffeSclaudio fatal("%s: imsgbuf_write fd %d", __func__, ibuf->fd); 519c1aa9554Sclaudio } 5203afb90b0Sreyk } 5213afb90b0Sreyk 5223afb90b0Sreyk for (;;) { 5233afb90b0Sreyk if ((n = imsg_get(ibuf, &imsg)) == -1) 5243afb90b0Sreyk fatal("%s: imsg_get", __func__); 5253afb90b0Sreyk if (n == 0) 5263afb90b0Sreyk break; 5273afb90b0Sreyk 5281f5e00e0Sreyk DPRINTF("%s: got imsg %d from %s", 5293afb90b0Sreyk __func__, imsg.hdr.type, 5303afb90b0Sreyk vm->vm_params.vmc_params.vcp_name); 5313afb90b0Sreyk 5323afb90b0Sreyk switch (imsg.hdr.type) { 5333320a88dSreyk case IMSG_VMDOP_VM_SHUTDOWN: 53419700f36Sjasper vm->vm_state |= VM_STATE_SHUTDOWN; 5353320a88dSreyk break; 5363320a88dSreyk case IMSG_VMDOP_VM_REBOOT: 53719700f36Sjasper vm->vm_state &= ~VM_STATE_SHUTDOWN; 5383320a88dSreyk break; 539eed20f3bSpd case IMSG_VMDOP_SEND_VM_RESPONSE: 540eed20f3bSpd IMSG_SIZE_CHECK(&imsg, &vmr); 54152e954a3Spd case IMSG_VMDOP_PAUSE_VM_RESPONSE: 54252e954a3Spd case IMSG_VMDOP_UNPAUSE_VM_RESPONSE: 543e8991ca8Sjsg for (i = 0; i < nitems(procs); i++) { 54452e954a3Spd if (procs[i].p_id == PROC_PARENT) { 54552e954a3Spd proc_forward_imsg(procs[i].p_ps, 54667de8c6dSreyk &imsg, PROC_PARENT, -1); 54752e954a3Spd break; 54852e954a3Spd } 54952e954a3Spd } 55052e954a3Spd break; 55152e954a3Spd 5523afb90b0Sreyk default: 5533afb90b0Sreyk fatalx("%s: got invalid imsg %d from %s", 5543afb90b0Sreyk __func__, imsg.hdr.type, 5553afb90b0Sreyk vm->vm_params.vmc_params.vcp_name); 5563afb90b0Sreyk } 5573afb90b0Sreyk imsg_free(&imsg); 5583afb90b0Sreyk } 5593afb90b0Sreyk imsg_event_add(iev); 5603afb90b0Sreyk } 5613afb90b0Sreyk 5623475ba91Smlarkin /* 563af96af6cSreyk * terminate_vm 564af96af6cSreyk * 565af96af6cSreyk * Requests vmm(4) to terminate the VM whose ID is provided in the 566af96af6cSreyk * supplied vm_terminate_params structure (vtp->vtp_vm_id) 567af96af6cSreyk * 568af96af6cSreyk * Parameters 56969558196Smlarkin * vtp: vm_terminate_params struct containing the ID of the VM to terminate 570af96af6cSreyk * 571af96af6cSreyk * Return values: 572af96af6cSreyk * 0: success 573549bbfbbSdv * !0: ioctl to vmm(4) failed (eg, ENOENT if the supplied VM is not valid) 574af96af6cSreyk */ 575af96af6cSreyk int 5766fcc05d2Sreyk terminate_vm(struct vm_terminate_params *vtp) 577af96af6cSreyk { 578f6e5c9ebSreyk if (ioctl(env->vmd_fd, VMM_IOC_TERM, vtp) == -1) 579af96af6cSreyk return (errno); 580af96af6cSreyk 581af96af6cSreyk return (0); 582af96af6cSreyk } 583af96af6cSreyk 584af96af6cSreyk /* 585af96af6cSreyk * opentap 586af96af6cSreyk * 587af96af6cSreyk * Opens the next available tap device, up to MAX_TAP. 588af96af6cSreyk * 5895921535cSreyk * Parameters 5904d52d473Skn * ifname: a buffer of at least IF_NAMESIZE bytes. 5915921535cSreyk * 592adb7c8aaSdv * Returns a file descriptor to the tap node opened or -1 if no tap devices were 593adb7c8aaSdv * available, setting errno to the open(2) error. 594af96af6cSreyk */ 595af96af6cSreyk int 5965921535cSreyk opentap(char *ifname) 597af96af6cSreyk { 598adb7c8aaSdv int err = 0, i, fd; 599af96af6cSreyk char path[PATH_MAX]; 600af96af6cSreyk 601af96af6cSreyk for (i = 0; i < MAX_TAP; i++) { 602af96af6cSreyk snprintf(path, PATH_MAX, "/dev/tap%d", i); 603af96af6cSreyk 604adb7c8aaSdv errno = 0; 605adb7c8aaSdv fd = open(path, O_RDWR | O_NONBLOCK); 606adb7c8aaSdv if (fd != -1) 607adb7c8aaSdv break; 608adb7c8aaSdv err = errno; 609adb7c8aaSdv if (err == EBUSY) { 610adb7c8aaSdv /* Busy...try next tap. */ 611adb7c8aaSdv continue; 612adb7c8aaSdv } else if (err == ENOENT) { 613adb7c8aaSdv /* Ran out of /dev/tap* special files. */ 614adb7c8aaSdv break; 615adb7c8aaSdv } else { 616adb7c8aaSdv log_warn("%s: unexpected error", __func__); 617adb7c8aaSdv break; 618adb7c8aaSdv } 619adb7c8aaSdv } 620adb7c8aaSdv 621adb7c8aaSdv /* Record the last opened tap device. */ 622adb7c8aaSdv snprintf(ifname, IF_NAMESIZE, "tap%d", i); 623adb7c8aaSdv 624adb7c8aaSdv if (err) 625adb7c8aaSdv errno = err; 626adb7c8aaSdv return (fd); 627af96af6cSreyk } 628af96af6cSreyk 629af96af6cSreyk /* 6301e1977eeSreyk * vmm_start_vm 631af96af6cSreyk * 63224386e31Sdv * Prepares and fork+execs a new VM process. 633af96af6cSreyk * 634af96af6cSreyk * Parameters: 6351e1977eeSreyk * imsg: The VM data structure that is including the VM create parameters. 6361e1977eeSreyk * id: Returns the VM id as reported by the kernel and obtained from the VM. 637a014dd99Sreyk * pid: Returns the VM pid to the parent. 638af96af6cSreyk * 639af96af6cSreyk * Return values: 640af96af6cSreyk * 0: success 641af96af6cSreyk * !0: failure - typically an errno indicating the source of the failure 642af96af6cSreyk */ 643af96af6cSreyk int 644a014dd99Sreyk vmm_start_vm(struct imsg *imsg, uint32_t *id, pid_t *pid) 645af96af6cSreyk { 646af96af6cSreyk struct vm_create_params *vcp; 647f0bbd60cSreyk struct vmd_vm *vm; 648f4b47ae8Sbluhm char *nargv[10], num[32], vmm_fd[32], psp_fd[32]; 64924386e31Sdv int fd, ret = EINVAL; 6501e1977eeSreyk int fds[2]; 651fbbcf6cdSdv pid_t vm_pid; 652fbbcf6cdSdv size_t i, j, sz; 653af96af6cSreyk 654f0bbd60cSreyk if ((vm = vm_getbyvmid(imsg->hdr.peerid)) == NULL) { 6556ee12970Smlarkin log_warnx("%s: can't find vm", __func__); 65658ab8896Sdv return (ENOENT); 657f0bbd60cSreyk } 658ea9c30d9Sedd vcp = &vm->vm_params.vmc_params; 659af96af6cSreyk 66019700f36Sjasper if (!(vm->vm_state & VM_STATE_RECEIVED)) { 66153027660Sclaudio if ((vm->vm_tty = imsg_get_fd(imsg)) == -1) { 6626ee12970Smlarkin log_warnx("%s: can't get tty", __func__); 663af96af6cSreyk goto err; 664af96af6cSreyk } 665eed20f3bSpd } 666af96af6cSreyk 667b3bc6112Sdv if (socketpair(AF_UNIX, SOCK_STREAM | SOCK_NONBLOCK, PF_UNSPEC, fds) 668b3bc6112Sdv == -1) 66948665f9bSreyk fatal("socketpair"); 67048665f9bSreyk 67124386e31Sdv /* Start child vmd for this VM (fork, chroot, drop privs) */ 672fbbcf6cdSdv vm_pid = fork(); 673fbbcf6cdSdv if (vm_pid == -1) { 67424386e31Sdv log_warn("%s: start child failed", __func__); 675af96af6cSreyk ret = EIO; 676af96af6cSreyk goto err; 677af96af6cSreyk } 678af96af6cSreyk 679fbbcf6cdSdv if (vm_pid > 0) { 680af96af6cSreyk /* Parent */ 681fbbcf6cdSdv vm->vm_pid = vm_pid; 6823bbc9b4eSdv close_fd(fds[1]); 683299ee841Sstefan 68424386e31Sdv /* Send the details over the pipe to the child. */ 68524386e31Sdv sz = atomicio(vwrite, fds[0], vm, sizeof(*vm)); 68624386e31Sdv if (sz != sizeof(*vm)) { 68724386e31Sdv log_warnx("%s: failed to send config for vm '%s'", 68824386e31Sdv __func__, vcp->vcp_name); 68924386e31Sdv ret = EIO; 69024386e31Sdv /* Defer error handling until after fd closing. */ 69124386e31Sdv } 69224386e31Sdv 69324386e31Sdv /* As the parent/vmm process, we no longer need these fds. */ 69473a98491Sdv for (i = 0 ; i < vm->vm_params.vmc_ndisks; i++) { 69573613953Sreyk for (j = 0; j < VM_MAX_BASE_PER_DISK; j++) { 6963bbc9b4eSdv if (close_fd(vm->vm_disks[i][j]) == 0) 69773613953Sreyk vm->vm_disks[i][j] = -1; 69873613953Sreyk } 699f0bbd60cSreyk } 70073a98491Sdv for (i = 0 ; i < vm->vm_params.vmc_nnics; i++) { 7013bbc9b4eSdv if (close_fd(vm->vm_ifs[i].vif_fd) == 0) 702789e0822Sreyk vm->vm_ifs[i].vif_fd = -1; 703f0bbd60cSreyk } 7043bbc9b4eSdv if (close_fd(vm->vm_kernel) == 0) 705f0bbd60cSreyk vm->vm_kernel = -1; 7063bbc9b4eSdv if (close_fd(vm->vm_cdrom) == 0) 70795ab188fSccardenas vm->vm_cdrom = -1; 7083bbc9b4eSdv if (close_fd(vm->vm_tty) == 0) 709f0bbd60cSreyk vm->vm_tty = -1; 710af96af6cSreyk 71124386e31Sdv /* Deferred error handling from sending the vm struct. */ 71224386e31Sdv if (ret == EIO) 71324386e31Sdv goto err; 71424386e31Sdv 7152272e586Sdv /* Send the current local prefix configuration. */ 7162272e586Sdv sz = atomicio(vwrite, fds[0], &env->vmd_cfg.cfg_localprefix, 7172272e586Sdv sizeof(env->vmd_cfg.cfg_localprefix)); 7182272e586Sdv if (sz != sizeof(env->vmd_cfg.cfg_localprefix)) { 7192272e586Sdv log_warnx("%s: failed to send local prefix for vm '%s'", 7202272e586Sdv __func__, vcp->vcp_name); 7212272e586Sdv ret = EIO; 7222272e586Sdv goto err; 7232272e586Sdv } 7242272e586Sdv 725549bbfbbSdv /* Read back the kernel-generated vm id from the child */ 726fbbcf6cdSdv sz = atomicio(read, fds[0], &vcp->vcp_id, sizeof(vcp->vcp_id)); 727fbbcf6cdSdv if (sz != sizeof(vcp->vcp_id)) { 728fbbcf6cdSdv log_debug("%s: failed to receive vm id from vm %s", 729fbbcf6cdSdv __func__, vcp->vcp_name); 730fbbcf6cdSdv /* vmd could not allocate memory for the vm. */ 731fbbcf6cdSdv ret = ENOMEM; 732fbbcf6cdSdv goto err; 733fbbcf6cdSdv } 73448665f9bSreyk 73524386e31Sdv /* Check for an invalid id. This indicates child failure. */ 73648665f9bSreyk if (vcp->vcp_id == 0) 73748665f9bSreyk goto err; 73848665f9bSreyk 73948665f9bSreyk *id = vcp->vcp_id; 740a014dd99Sreyk *pid = vm->vm_pid; 74148665f9bSreyk 74224386e31Sdv /* Wire up our pipe into the event handling. */ 7433afb90b0Sreyk if (vmm_pipe(vm, fds[0], vmm_dispatch_vm) == -1) 7443afb90b0Sreyk fatal("setup vm pipe"); 745f0bbd60cSreyk } else { 74624386e31Sdv /* Child. Create a new session. */ 74724386e31Sdv if (setsid() == -1) 74824386e31Sdv fatal("setsid"); 74924386e31Sdv 7503bbc9b4eSdv close_fd(fds[0]); 7513bbc9b4eSdv close_fd(PROC_PARENT_SOCK_FILENO); 75248665f9bSreyk 75324386e31Sdv /* Detach from terminal. */ 75424386e31Sdv if (!env->vmd_debug && (fd = 75524386e31Sdv open("/dev/null", O_RDWR, 0)) != -1) { 75624386e31Sdv dup2(fd, STDIN_FILENO); 75724386e31Sdv dup2(fd, STDOUT_FILENO); 75824386e31Sdv dup2(fd, STDERR_FILENO); 75924386e31Sdv if (fd > 2) 76024386e31Sdv close(fd); 76124386e31Sdv } 762f0bbd60cSreyk 763f4b47ae8Sbluhm if (env->vmd_psp_fd > 0) 764f4b47ae8Sbluhm fcntl(env->vmd_psp_fd, F_SETFD, 0); /* psp device fd */ 765f4b47ae8Sbluhm 76624386e31Sdv /* 76724386e31Sdv * Prepare our new argv for execvp(2) with the fd of our open 76824386e31Sdv * pipe to the parent/vmm process as an argument. 76924386e31Sdv */ 77024386e31Sdv memset(num, 0, sizeof(num)); 77124386e31Sdv snprintf(num, sizeof(num), "%d", fds[1]); 7723c817da7Sdv memset(vmm_fd, 0, sizeof(vmm_fd)); 7733c817da7Sdv snprintf(vmm_fd, sizeof(vmm_fd), "%d", env->vmd_fd); 774f4b47ae8Sbluhm memset(psp_fd, 0, sizeof(psp_fd)); 775f4b47ae8Sbluhm snprintf(psp_fd, sizeof(psp_fd), "%d", env->vmd_psp_fd); 77624386e31Sdv 7777f22b52aSbluhm i = 0; 7787f22b52aSbluhm nargv[i++] = env->argv0; 7797f22b52aSbluhm nargv[i++] = "-V"; 7807f22b52aSbluhm nargv[i++] = num; 7817f22b52aSbluhm nargv[i++] = "-i"; 7827f22b52aSbluhm nargv[i++] = vmm_fd; 7837f22b52aSbluhm nargv[i++] = "-j"; 7847f22b52aSbluhm nargv[i++] = psp_fd; 7857f22b52aSbluhm if (env->vmd_debug) 7867f22b52aSbluhm nargv[i++] = "-d"; 7877f22b52aSbluhm if (env->vmd_verbose == 1) 7887f22b52aSbluhm nargv[i++] = "-v"; 7897f22b52aSbluhm else if (env->vmd_verbose > 1) 7907f22b52aSbluhm nargv[i++] = "-vv"; 7917f22b52aSbluhm nargv[i++] = NULL; 7927f22b52aSbluhm if (i > sizeof(nargv) / sizeof(nargv[0])) 7937f22b52aSbluhm fatalx("%s: nargv overflow", __func__); 79461f4cd73Sdv 79524386e31Sdv /* Control resumes in vmd main(). */ 79624386e31Sdv execvp(nargv[0], nargv); 79724386e31Sdv 79824386e31Sdv ret = errno; 79924386e31Sdv log_warn("execvp %s", nargv[0]); 800c8f0d715Sreyk _exit(ret); 80124386e31Sdv /* NOTREACHED */ 802af96af6cSreyk } 803af96af6cSreyk 804f0bbd60cSreyk return (0); 805af96af6cSreyk 806af96af6cSreyk err: 80724386e31Sdv if (!vm->vm_from_config) 808ddadf993Sreyk vm_remove(vm, __func__); 809af96af6cSreyk 810af96af6cSreyk return (ret); 811af96af6cSreyk } 812af96af6cSreyk 813af96af6cSreyk /* 814af96af6cSreyk * get_info_vm 815af96af6cSreyk * 816af96af6cSreyk * Returns a list of VMs known to vmm(4). 817af96af6cSreyk * 818af96af6cSreyk * Parameters: 8196fcc05d2Sreyk * ps: the privsep context. 8206fcc05d2Sreyk * imsg: the received imsg including the peer id. 8216fcc05d2Sreyk * terminate: terminate the listed vm. 822af96af6cSreyk * 823af96af6cSreyk * Return values: 824af96af6cSreyk * 0: success 825af96af6cSreyk * !0: failure (eg, ENOMEM, EIO or another error code from vmm(4) ioctl) 826af96af6cSreyk */ 827af96af6cSreyk int 8286fcc05d2Sreyk get_info_vm(struct privsep *ps, struct imsg *imsg, int terminate) 829af96af6cSreyk { 830af96af6cSreyk int ret; 831af96af6cSreyk size_t ct, i; 832af96af6cSreyk struct vm_info_params vip; 833af96af6cSreyk struct vm_info_result *info; 8346fcc05d2Sreyk struct vm_terminate_params vtp; 8353a45f7b6Sreyk struct vmop_info_result vir; 836af96af6cSreyk 837af96af6cSreyk /* 838af96af6cSreyk * We issue the VMM_IOC_INFO ioctl twice, once with an input 839af96af6cSreyk * buffer size of 0, which results in vmm(4) returning the 840af96af6cSreyk * number of bytes required back to us in vip.vip_size, 841af96af6cSreyk * and then we call it again after malloc'ing the required 842af96af6cSreyk * number of bytes. 843af96af6cSreyk * 844549bbfbbSdv * It is possible that we could fail a second time (e.g. if 845af96af6cSreyk * another VM was created in the instant between the two 846af96af6cSreyk * ioctls, but in that case the caller can just try again 847af96af6cSreyk * as vmm(4) will return a zero-sized list in that case. 848af96af6cSreyk */ 849af96af6cSreyk vip.vip_size = 0; 850af96af6cSreyk info = NULL; 851af96af6cSreyk ret = 0; 8523a45f7b6Sreyk memset(&vir, 0, sizeof(vir)); 853af96af6cSreyk 854af96af6cSreyk /* First ioctl to see how many bytes needed (vip.vip_size) */ 855df69c215Sderaadt if (ioctl(env->vmd_fd, VMM_IOC_INFO, &vip) == -1) 856af96af6cSreyk return (errno); 857af96af6cSreyk 858af96af6cSreyk if (vip.vip_info_ct != 0) 859af96af6cSreyk return (EIO); 860af96af6cSreyk 861af96af6cSreyk info = malloc(vip.vip_size); 862af96af6cSreyk if (info == NULL) 863af96af6cSreyk return (ENOMEM); 864af96af6cSreyk 865af96af6cSreyk /* Second ioctl to get the actual list */ 866af96af6cSreyk vip.vip_info = info; 867df69c215Sderaadt if (ioctl(env->vmd_fd, VMM_IOC_INFO, &vip) == -1) { 868af96af6cSreyk ret = errno; 869af96af6cSreyk free(info); 870af96af6cSreyk return (ret); 871af96af6cSreyk } 872af96af6cSreyk 873f0bbd60cSreyk /* Return info */ 874af96af6cSreyk ct = vip.vip_size / sizeof(struct vm_info_result); 875af96af6cSreyk for (i = 0; i < ct; i++) { 8766fcc05d2Sreyk if (terminate) { 8776fcc05d2Sreyk vtp.vtp_vm_id = info[i].vir_id; 8786fcc05d2Sreyk if ((ret = terminate_vm(&vtp)) != 0) 879ed010ab2Sdv break; 880f6e5c9ebSreyk log_debug("%s: terminated vm %s (id %d)", __func__, 8816fcc05d2Sreyk info[i].vir_name, info[i].vir_id); 8823a45f7b6Sreyk continue; 8833a45f7b6Sreyk } 8843a45f7b6Sreyk memcpy(&vir.vir_info, &info[i], sizeof(vir.vir_info)); 885eb1cd41dSreyk vir.vir_info.vir_id = vm_id2vmid(info[i].vir_id, NULL); 8863a45f7b6Sreyk if (proc_compose_imsg(ps, PROC_PARENT, -1, 88741b2bea6Sreyk IMSG_VMDOP_GET_INFO_VM_DATA, imsg->hdr.peerid, -1, 888ed010ab2Sdv &vir, sizeof(vir)) == -1) { 889ed010ab2Sdv ret = EIO; 890ed010ab2Sdv break; 891ed010ab2Sdv } 892af96af6cSreyk } 893af96af6cSreyk free(info); 894549bbfbbSdv 895ed010ab2Sdv return (ret); 896af96af6cSreyk } 897