xref: /openbsd-src/usr.sbin/vmd/vmm.c (revision 0a9d031fce78c0ebce0995b311938b1c87b1e208)
1*0a9d031fSclaudio /*	$OpenBSD: vmm.c,v 1.130 2024/11/21 13:39:34 claudio Exp $	*/
2af96af6cSreyk 
3af96af6cSreyk /*
4af96af6cSreyk  * Copyright (c) 2015 Mike Larkin <mlarkin@openbsd.org>
5af96af6cSreyk  *
6af96af6cSreyk  * Permission to use, copy, modify, and distribute this software for any
7af96af6cSreyk  * purpose with or without fee is hereby granted, provided that the above
8af96af6cSreyk  * copyright notice and this permission notice appear in all copies.
9af96af6cSreyk  *
10af96af6cSreyk  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11af96af6cSreyk  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12af96af6cSreyk  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13af96af6cSreyk  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14af96af6cSreyk  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15af96af6cSreyk  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16af96af6cSreyk  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17af96af6cSreyk  */
18af96af6cSreyk 
1924fb43d0Sderaadt #include <sys/types.h>
20af96af6cSreyk #include <sys/ioctl.h>
21af96af6cSreyk #include <sys/queue.h>
22299ee841Sstefan #include <sys/wait.h>
23af96af6cSreyk #include <sys/socket.h>
24af96af6cSreyk 
25ba66f564Sdv #include <dev/vmm/vmm.h>
26af96af6cSreyk 
275921535cSreyk #include <net/if.h>
285921535cSreyk 
29af96af6cSreyk #include <errno.h>
30ecc93de1Smlarkin #include <event.h>
31af96af6cSreyk #include <fcntl.h>
32af96af6cSreyk #include <imsg.h>
33af96af6cSreyk #include <limits.h>
34af96af6cSreyk #include <stdio.h>
35af96af6cSreyk #include <stdlib.h>
36af96af6cSreyk #include <string.h>
37af96af6cSreyk #include <unistd.h>
38af96af6cSreyk 
39af96af6cSreyk #include "vmd.h"
40fbbcf6cdSdv #include "atomicio.h"
41b3bc6112Sdv #include "proc.h"
42af96af6cSreyk 
43299ee841Sstefan void	vmm_sighdlr(int, short, void *);
44a014dd99Sreyk int	vmm_start_vm(struct imsg *, uint32_t *, pid_t *);
45f0bbd60cSreyk int	vmm_dispatch_parent(int, struct privsep_proc *, struct imsg *);
46f0bbd60cSreyk void	vmm_run(struct privsep *, struct privsep_proc *, void *);
473afb90b0Sreyk void	vmm_dispatch_vm(int, short, void *);
481e1977eeSreyk int	terminate_vm(struct vm_terminate_params *);
491e1977eeSreyk int	get_info_vm(struct privsep *, struct imsg *, int);
501e1977eeSreyk int	opentap(char *);
51af96af6cSreyk 
52af96af6cSreyk extern struct vmd *env;
53af96af6cSreyk 
54f0bbd60cSreyk static struct privsep_proc procs[] = {
55f0bbd60cSreyk 	{ "parent",	PROC_PARENT,	vmm_dispatch_parent  },
56f0bbd60cSreyk };
57f0bbd60cSreyk 
58bcc679a1Sreyk void
59f0bbd60cSreyk vmm(struct privsep *ps, struct privsep_proc *p)
60f0bbd60cSreyk {
61bcc679a1Sreyk 	proc_run(ps, p, procs, nitems(procs), vmm_run, NULL);
62f0bbd60cSreyk }
63f0bbd60cSreyk 
64f0bbd60cSreyk void
65f0bbd60cSreyk vmm_run(struct privsep *ps, struct privsep_proc *p, void *arg)
66f0bbd60cSreyk {
67f0bbd60cSreyk 	if (config_init(ps->ps_env) == -1)
68f0bbd60cSreyk 		fatal("failed to initialize configuration");
69f0bbd60cSreyk 
7024386e31Sdv 	/*
7124386e31Sdv 	 * We aren't root, so we can't chroot(2). Use unveil(2) instead.
7224386e31Sdv 	 */
7324386e31Sdv 	if (unveil(env->argv0, "x") == -1)
7424386e31Sdv 		fatal("unveil %s", env->argv0);
7524386e31Sdv 	if (unveil(NULL, NULL) == -1)
7624386e31Sdv 		fatal("unveil lock");
77299ee841Sstefan 
78f0bbd60cSreyk 	/*
79f0bbd60cSreyk 	 * pledge in the vmm process:
80f0bbd60cSreyk 	 * stdio - for malloc and basic I/O including events.
816bde4a58Sreyk 	 * vmm - for the vmm ioctls and operations.
8224386e31Sdv 	 * proc, exec - for forking and execing new vm's.
8394c51922Sdv 	 * sendfd - for sending send/recv fds to vm proc.
846bde4a58Sreyk 	 * recvfd - for disks, interfaces and other fds.
85f0bbd60cSreyk 	 */
8624386e31Sdv 	if (pledge("stdio vmm sendfd recvfd proc exec", NULL) == -1)
87f0bbd60cSreyk 		fatal("pledge");
8824386e31Sdv 
8924386e31Sdv 	signal_del(&ps->ps_evsigchld);
9024386e31Sdv 	signal_set(&ps->ps_evsigchld, SIGCHLD, vmm_sighdlr, ps);
9124386e31Sdv 	signal_add(&ps->ps_evsigchld, NULL);
92f0bbd60cSreyk }
93f0bbd60cSreyk 
94af96af6cSreyk int
95f0bbd60cSreyk vmm_dispatch_parent(int fd, struct privsep_proc *p, struct imsg *imsg)
96af96af6cSreyk {
97af96af6cSreyk 	struct privsep		*ps = p->p_ps;
98ec4b63a0Sclaudio 	int			 res = 0, cmd = 0, verbose;
99eb1cd41dSreyk 	struct vmd_vm		*vm = NULL;
1006fcc05d2Sreyk 	struct vm_terminate_params vtp;
10152e954a3Spd 	struct vmop_id		 vid;
10248665f9bSreyk 	struct vmop_result	 vmr;
103eed20f3bSpd 	struct vmop_create_params vmc;
10497f33f1dSdv 	struct vmop_addr_result  var;
10595176482Sclaudio 	uint32_t		 id = 0, peerid = imsg->hdr.peerid;
106a014dd99Sreyk 	pid_t			 pid = 0;
1073be9785fSreyk 	unsigned int		 mode, flags;
108af96af6cSreyk 
109af96af6cSreyk 	switch (imsg->hdr.type) {
110af96af6cSreyk 	case IMSG_VMDOP_START_VM_REQUEST:
1114f76ab55Sreyk 		res = config_getvm(ps, imsg);
11248665f9bSreyk 		if (res == -1) {
11348665f9bSreyk 			res = errno;
114f0bbd60cSreyk 			cmd = IMSG_VMDOP_START_VM_RESPONSE;
11548665f9bSreyk 		}
116f0bbd60cSreyk 		break;
11795ab188fSccardenas 	case IMSG_VMDOP_START_VM_CDROM:
11895ab188fSccardenas 		res = config_getcdrom(ps, imsg);
11995ab188fSccardenas 		if (res == -1) {
12095ab188fSccardenas 			res = errno;
12195ab188fSccardenas 			cmd = IMSG_VMDOP_START_VM_RESPONSE;
12295ab188fSccardenas 		}
12395ab188fSccardenas 		break;
124f0bbd60cSreyk 	case IMSG_VMDOP_START_VM_DISK:
125f0bbd60cSreyk 		res = config_getdisk(ps, imsg);
1264a0e5604Sreyk 		if (res == -1) {
1274a0e5604Sreyk 			res = errno;
128f0bbd60cSreyk 			cmd = IMSG_VMDOP_START_VM_RESPONSE;
1294a0e5604Sreyk 		}
130f0bbd60cSreyk 		break;
131f0bbd60cSreyk 	case IMSG_VMDOP_START_VM_IF:
132f0bbd60cSreyk 		res = config_getif(ps, imsg);
1334a0e5604Sreyk 		if (res == -1) {
1344a0e5604Sreyk 			res = errno;
135f0bbd60cSreyk 			cmd = IMSG_VMDOP_START_VM_RESPONSE;
1364a0e5604Sreyk 		}
137f0bbd60cSreyk 		break;
138f0bbd60cSreyk 	case IMSG_VMDOP_START_VM_END:
139a014dd99Sreyk 		res = vmm_start_vm(imsg, &id, &pid);
140eb1cd41dSreyk 		/* Check if the ID can be mapped correctly */
141fbbcf6cdSdv 		if (res == 0 && (id = vm_id2vmid(id, NULL)) == 0)
142eb1cd41dSreyk 			res = ENOENT;
143af96af6cSreyk 		cmd = IMSG_VMDOP_START_VM_RESPONSE;
144af96af6cSreyk 		break;
145af96af6cSreyk 	case IMSG_VMDOP_TERMINATE_VM_REQUEST:
1463be9785fSreyk 		IMSG_SIZE_CHECK(imsg, &vid);
1473be9785fSreyk 		memcpy(&vid, imsg->data, sizeof(vid));
1483be9785fSreyk 		id = vid.vid_id;
1493be9785fSreyk 		flags = vid.vid_flags;
150ddadf993Sreyk 
151ddadf993Sreyk 		DPRINTF("%s: recv'ed TERMINATE_VM for %d", __func__, id);
152f84d5d33Sreyk 
1533be9785fSreyk 		cmd = IMSG_VMDOP_TERMINATE_VM_RESPONSE;
1543be9785fSreyk 
155eb1cd41dSreyk 		if (id == 0) {
156eb1cd41dSreyk 			res = ENOENT;
1572da5c9dbSmlarkin 		} else if ((vm = vm_getbyvmid(id)) != NULL) {
1583be9785fSreyk 			if (flags & VMOP_FORCE) {
159f6e5c9ebSreyk 				vtp.vtp_vm_id = vm_vmid2id(vm->vm_vmid, vm);
16019700f36Sjasper 				vm->vm_state |= VM_STATE_SHUTDOWN;
161f6e5c9ebSreyk 				(void)terminate_vm(&vtp);
162f6e5c9ebSreyk 				res = 0;
16319700f36Sjasper 			} else if (!(vm->vm_state & VM_STATE_SHUTDOWN)) {
164ddadf993Sreyk 				log_debug("%s: sending shutdown request"
165ddadf993Sreyk 				    " to vm %d", __func__, id);
166f84d5d33Sreyk 
167f84d5d33Sreyk 				/*
1682da5c9dbSmlarkin 				 * Request reboot but mark the VM as shutting
1692da5c9dbSmlarkin 				 * down. This way we can terminate the VM after
1702da5c9dbSmlarkin 				 * the triple fault instead of reboot and
1712da5c9dbSmlarkin 				 * avoid being stuck in the ACPI-less powerdown
1722da5c9dbSmlarkin 				 * ("press any key to reboot") of the VM.
173f84d5d33Sreyk 				 */
17419700f36Sjasper 				vm->vm_state |= VM_STATE_SHUTDOWN;
175f84d5d33Sreyk 				if (imsg_compose_event(&vm->vm_iev,
1762da5c9dbSmlarkin 				    IMSG_VMDOP_VM_REBOOT,
1772da5c9dbSmlarkin 				    0, 0, -1, NULL, 0) == -1)
178f84d5d33Sreyk 					res = errno;
179f84d5d33Sreyk 				else
180f84d5d33Sreyk 					res = 0;
181f84d5d33Sreyk 			} else {
182bb4a0381Smlarkin 				/*
183bb4a0381Smlarkin 				 * VM is currently being shutdown.
184bb4a0381Smlarkin 				 * Check to see if the VM process is still
185bb4a0381Smlarkin 				 * active.  If not, return VMD_VM_STOP_INVALID.
186bb4a0381Smlarkin 				 */
1873be9785fSreyk 				if (vm_vmid2id(vm->vm_vmid, vm) == 0) {
1882da5c9dbSmlarkin 					log_debug("%s: no vm running anymore",
1892da5c9dbSmlarkin 					    __func__);
1902da5c9dbSmlarkin 					res = VMD_VM_STOP_INVALID;
1912da5c9dbSmlarkin 				}
1922da5c9dbSmlarkin 			}
1932da5c9dbSmlarkin 		} else {
194549bbfbbSdv 			/* VM doesn't exist, cannot stop vm */
1952da5c9dbSmlarkin 			log_debug("%s: cannot stop vm that is not running",
1962da5c9dbSmlarkin 			    __func__);
1972da5c9dbSmlarkin 			res = VMD_VM_STOP_INVALID;
198f84d5d33Sreyk 		}
199af96af6cSreyk 		break;
200af96af6cSreyk 	case IMSG_VMDOP_GET_INFO_VM_REQUEST:
2016fcc05d2Sreyk 		res = get_info_vm(ps, imsg, 0);
202af96af6cSreyk 		cmd = IMSG_VMDOP_GET_INFO_VM_END_DATA;
203af96af6cSreyk 		break;
204c48cfcf4Sreyk 	case IMSG_VMDOP_CONFIG:
205c48cfcf4Sreyk 		config_getconfig(env, imsg);
206c48cfcf4Sreyk 		break;
207008065a5Sreyk 	case IMSG_CTL_RESET:
2081f7fe034Sreyk 		IMSG_SIZE_CHECK(imsg, &mode);
2091f7fe034Sreyk 		memcpy(&mode, imsg->data, sizeof(mode));
2101f7fe034Sreyk 
2111f7fe034Sreyk 		if (mode & CONFIG_VMS) {
2121f7fe034Sreyk 			/* Terminate and remove all VMs */
2131f7fe034Sreyk 			vmm_shutdown();
2141f7fe034Sreyk 			mode &= ~CONFIG_VMS;
2151f7fe034Sreyk 		}
2161f7fe034Sreyk 
217008065a5Sreyk 		config_getreset(env, imsg);
218008065a5Sreyk 		break;
2193afb90b0Sreyk 	case IMSG_CTL_VERBOSE:
2203afb90b0Sreyk 		IMSG_SIZE_CHECK(imsg, &verbose);
2213afb90b0Sreyk 		memcpy(&verbose, imsg->data, sizeof(verbose));
2223afb90b0Sreyk 		log_setverbose(verbose);
22308d0da61Sdv 		env->vmd_verbose = verbose;
2243afb90b0Sreyk 		/* Forward message to each VM process */
2253afb90b0Sreyk 		TAILQ_FOREACH(vm, env->vmd_vms, vm_entry) {
2263afb90b0Sreyk 			imsg_compose_event(&vm->vm_iev,
2273afb90b0Sreyk 			    imsg->hdr.type, imsg->hdr.peerid, imsg->hdr.pid,
2283afb90b0Sreyk 			    -1, &verbose, sizeof(verbose));
2293afb90b0Sreyk 		}
2303afb90b0Sreyk 		break;
23152e954a3Spd 	case IMSG_VMDOP_PAUSE_VM:
23252e954a3Spd 		IMSG_SIZE_CHECK(imsg, &vid);
23352e954a3Spd 		memcpy(&vid, imsg->data, sizeof(vid));
23452e954a3Spd 		id = vid.vid_id;
23552e954a3Spd 		if ((vm = vm_getbyvmid(id)) == NULL) {
23652e954a3Spd 			res = ENOENT;
23752e954a3Spd 			cmd = IMSG_VMDOP_PAUSE_VM_RESPONSE;
23852e954a3Spd 			break;
23952e954a3Spd 		}
24052e954a3Spd 		imsg_compose_event(&vm->vm_iev,
24152e954a3Spd 		    imsg->hdr.type, imsg->hdr.peerid, imsg->hdr.pid,
24253027660Sclaudio 		    imsg_get_fd(imsg), &vid, sizeof(vid));
24352e954a3Spd 		break;
24452e954a3Spd 	case IMSG_VMDOP_UNPAUSE_VM:
24552e954a3Spd 		IMSG_SIZE_CHECK(imsg, &vid);
24652e954a3Spd 		memcpy(&vid, imsg->data, sizeof(vid));
24752e954a3Spd 		id = vid.vid_id;
24852e954a3Spd 		if ((vm = vm_getbyvmid(id)) == NULL) {
24952e954a3Spd 			res = ENOENT;
25052e954a3Spd 			cmd = IMSG_VMDOP_UNPAUSE_VM_RESPONSE;
25152e954a3Spd 			break;
25252e954a3Spd 		}
25352e954a3Spd 		imsg_compose_event(&vm->vm_iev,
25452e954a3Spd 		    imsg->hdr.type, imsg->hdr.peerid, imsg->hdr.pid,
25553027660Sclaudio 		    imsg_get_fd(imsg), &vid, sizeof(vid));
25652e954a3Spd 		break;
257eed20f3bSpd 	case IMSG_VMDOP_SEND_VM_REQUEST:
258eed20f3bSpd 		IMSG_SIZE_CHECK(imsg, &vid);
259eed20f3bSpd 		memcpy(&vid, imsg->data, sizeof(vid));
260eed20f3bSpd 		id = vid.vid_id;
261eed20f3bSpd 		if ((vm = vm_getbyvmid(id)) == NULL) {
262eed20f3bSpd 			res = ENOENT;
26353027660Sclaudio 			close(imsg_get_fd(imsg));	/* XXX */
264eed20f3bSpd 			cmd = IMSG_VMDOP_START_VM_RESPONSE;
265eed20f3bSpd 			break;
266eed20f3bSpd 		}
267eed20f3bSpd 		imsg_compose_event(&vm->vm_iev,
268eed20f3bSpd 		    imsg->hdr.type, imsg->hdr.peerid, imsg->hdr.pid,
26953027660Sclaudio 		    imsg_get_fd(imsg), &vid, sizeof(vid));
270eed20f3bSpd 		break;
271eed20f3bSpd 	case IMSG_VMDOP_RECEIVE_VM_REQUEST:
272eed20f3bSpd 		IMSG_SIZE_CHECK(imsg, &vmc);
273eed20f3bSpd 		memcpy(&vmc, imsg->data, sizeof(vmc));
274ec4b63a0Sclaudio 		if (vm_register(ps, &vmc, &vm,
275ec4b63a0Sclaudio 		    imsg->hdr.peerid, vmc.vmc_owner.uid) != 0) {
276ec4b63a0Sclaudio 			res = errno;
277ec4b63a0Sclaudio 			cmd = IMSG_VMDOP_START_VM_RESPONSE;
278ec4b63a0Sclaudio 			break;
279ec4b63a0Sclaudio 		}
28053027660Sclaudio 		vm->vm_tty = imsg_get_fd(imsg);
28119700f36Sjasper 		vm->vm_state |= VM_STATE_RECEIVED;
282548054a9Spd 		vm->vm_state |= VM_STATE_PAUSED;
283eed20f3bSpd 		break;
284eed20f3bSpd 	case IMSG_VMDOP_RECEIVE_VM_END:
285eed20f3bSpd 		if ((vm = vm_getbyvmid(imsg->hdr.peerid)) == NULL) {
286eed20f3bSpd 			res = ENOENT;
28753027660Sclaudio 			close(imsg_get_fd(imsg));	/* XXX */
288eed20f3bSpd 			cmd = IMSG_VMDOP_START_VM_RESPONSE;
289eed20f3bSpd 			break;
290eed20f3bSpd 		}
29153027660Sclaudio 		vm->vm_receive_fd = imsg_get_fd(imsg);
292a014dd99Sreyk 		res = vmm_start_vm(imsg, &id, &pid);
293661d0050Spd 		/* Check if the ID can be mapped correctly */
294661d0050Spd 		if ((id = vm_id2vmid(id, NULL)) == 0)
295661d0050Spd 			res = ENOENT;
296eed20f3bSpd 		cmd = IMSG_VMDOP_START_VM_RESPONSE;
297eed20f3bSpd 		break;
29897f33f1dSdv 	case IMSG_VMDOP_PRIV_GET_ADDR_RESPONSE:
29997f33f1dSdv 		IMSG_SIZE_CHECK(imsg, &var);
30097f33f1dSdv 		memcpy(&var, imsg->data, sizeof(var));
30197f33f1dSdv 		if ((vm = vm_getbyvmid(var.var_vmid)) == NULL) {
30297f33f1dSdv 			res = ENOENT;
30397f33f1dSdv 			break;
30497f33f1dSdv 		}
30597f33f1dSdv 		/* Forward hardware address details to the guest vm */
30697f33f1dSdv 		imsg_compose_event(&vm->vm_iev,
30797f33f1dSdv 		    imsg->hdr.type, imsg->hdr.peerid, imsg->hdr.pid,
30853027660Sclaudio 		    imsg_get_fd(imsg), &var, sizeof(var));
30997f33f1dSdv 		break;
3102748f5e2Sdv 	case IMSG_VMDOP_RECEIVE_VMM_FD:
3112748f5e2Sdv 		if (env->vmd_fd > -1)
3122748f5e2Sdv 			fatalx("already received vmm fd");
31353027660Sclaudio 		env->vmd_fd = imsg_get_fd(imsg);
3142748f5e2Sdv 
3152748f5e2Sdv 		/* Get and terminate all running VMs */
3162748f5e2Sdv 		get_info_vm(ps, NULL, 1);
3172748f5e2Sdv 		break;
318f4b47ae8Sbluhm 	case IMSG_VMDOP_RECEIVE_PSP_FD:
319f4b47ae8Sbluhm 		if (env->vmd_psp_fd > -1)
320f4b47ae8Sbluhm 			fatalx("already received psp fd");
3210a6e69e4Sclaudio 		env->vmd_psp_fd = imsg_get_fd(imsg);
322f4b47ae8Sbluhm 		break;
323af96af6cSreyk 	default:
324af96af6cSreyk 		return (-1);
325af96af6cSreyk 	}
326af96af6cSreyk 
32748665f9bSreyk 	switch (cmd) {
32848665f9bSreyk 	case 0:
32948665f9bSreyk 		break;
33048665f9bSreyk 	case IMSG_VMDOP_START_VM_RESPONSE:
331d8cb3559Sreyk 		if (res != 0) {
332d3b18ed2Sreyk 			/* Remove local reference if it exists */
333e0994759Smlarkin 			if ((vm = vm_getbyvmid(imsg->hdr.peerid)) != NULL) {
334e0994759Smlarkin 				log_debug("%s: removing vm, START_VM_RESPONSE",
335e0994759Smlarkin 				    __func__);
336ddadf993Sreyk 				vm_remove(vm, __func__);
337d8cb3559Sreyk 			}
338e0994759Smlarkin 		}
339eb1cd41dSreyk 		if (id == 0)
340eb1cd41dSreyk 			id = imsg->hdr.peerid;
341ec4b63a0Sclaudio 		/* FALLTHROUGH */
34252e954a3Spd 	case IMSG_VMDOP_PAUSE_VM_RESPONSE:
34352e954a3Spd 	case IMSG_VMDOP_UNPAUSE_VM_RESPONSE:
34448665f9bSreyk 	case IMSG_VMDOP_TERMINATE_VM_RESPONSE:
34548665f9bSreyk 		memset(&vmr, 0, sizeof(vmr));
34648665f9bSreyk 		vmr.vmr_result = res;
34748665f9bSreyk 		vmr.vmr_id = id;
348a014dd99Sreyk 		vmr.vmr_pid = pid;
34948665f9bSreyk 		if (proc_compose_imsg(ps, PROC_PARENT, -1, cmd,
35095176482Sclaudio 		    peerid, -1, &vmr, sizeof(vmr)) == -1)
351af96af6cSreyk 			return (-1);
35248665f9bSreyk 		break;
35348665f9bSreyk 	default:
35448665f9bSreyk 		if (proc_compose_imsg(ps, PROC_PARENT, -1, cmd,
35595176482Sclaudio 		    peerid, -1, &res, sizeof(res)) == -1)
35648665f9bSreyk 			return (-1);
35748665f9bSreyk 		break;
35848665f9bSreyk 	}
359af96af6cSreyk 
360af96af6cSreyk 	return (0);
361af96af6cSreyk }
362af96af6cSreyk 
363299ee841Sstefan void
364299ee841Sstefan vmm_sighdlr(int sig, short event, void *arg)
365299ee841Sstefan {
366299ee841Sstefan 	struct privsep *ps = arg;
367c8f0d715Sreyk 	int status, ret = 0;
368299ee841Sstefan 	uint32_t vmid;
369299ee841Sstefan 	pid_t pid;
370299ee841Sstefan 	struct vmop_result vmr;
371299ee841Sstefan 	struct vmd_vm *vm;
372299ee841Sstefan 	struct vm_terminate_params vtp;
373299ee841Sstefan 
374bd6f7673Smlarkin 	log_debug("%s: handling signal %d", __func__, sig);
375299ee841Sstefan 	switch (sig) {
376299ee841Sstefan 	case SIGCHLD:
377299ee841Sstefan 		do {
378299ee841Sstefan 			pid = waitpid(-1, &status, WNOHANG);
379299ee841Sstefan 			if (pid <= 0)
380299ee841Sstefan 				continue;
381299ee841Sstefan 
382299ee841Sstefan 			if (WIFEXITED(status) || WIFSIGNALED(status)) {
383299ee841Sstefan 				vm = vm_getbypid(pid);
384299ee841Sstefan 				if (vm == NULL) {
385299ee841Sstefan 					/*
386299ee841Sstefan 					 * If the VM is gone already, it
387299ee841Sstefan 					 * got terminated via a
388299ee841Sstefan 					 * IMSG_VMDOP_TERMINATE_VM_REQUEST.
389299ee841Sstefan 					 */
390299ee841Sstefan 					continue;
391299ee841Sstefan 				}
392299ee841Sstefan 
393c8f0d715Sreyk 				if (WIFEXITED(status))
394c8f0d715Sreyk 					ret = WEXITSTATUS(status);
395c8f0d715Sreyk 
396549bbfbbSdv 				/* Don't reboot on pending shutdown */
397549bbfbbSdv 				if (ret == EAGAIN &&
398549bbfbbSdv 				    (vm->vm_state & VM_STATE_SHUTDOWN))
399f84d5d33Sreyk 					ret = 0;
400f84d5d33Sreyk 
401ea9c30d9Sedd 				vmid = vm->vm_params.vmc_params.vcp_id;
402299ee841Sstefan 				vtp.vtp_vm_id = vmid;
403f6e5c9ebSreyk 
404f6e5c9ebSreyk 				if (terminate_vm(&vtp) == 0)
405f6e5c9ebSreyk 					log_debug("%s: terminated vm %s"
406f6e5c9ebSreyk 					    " (id %d)", __func__,
407f6e5c9ebSreyk 					    vm->vm_params.vmc_params.vcp_name,
408f6e5c9ebSreyk 					    vm->vm_vmid);
409f6e5c9ebSreyk 
410299ee841Sstefan 				memset(&vmr, 0, sizeof(vmr));
411c8f0d715Sreyk 				vmr.vmr_result = ret;
412eb1cd41dSreyk 				vmr.vmr_id = vm_id2vmid(vmid, vm);
413299ee841Sstefan 				if (proc_compose_imsg(ps, PROC_PARENT,
414299ee841Sstefan 				    -1, IMSG_VMDOP_TERMINATE_VM_EVENT,
4153be9785fSreyk 				    vm->vm_peerid, -1,
4163be9785fSreyk 				    &vmr, sizeof(vmr)) == -1)
417299ee841Sstefan 					log_warnx("could not signal "
418299ee841Sstefan 					    "termination of VM %u to "
419eb1cd41dSreyk 					    "parent", vm->vm_vmid);
42013dd90b2Sreyk 
421ddadf993Sreyk 				vm_remove(vm, __func__);
422299ee841Sstefan 			} else
423299ee841Sstefan 				fatalx("unexpected cause of SIGCHLD");
424299ee841Sstefan 		} while (pid > 0 || (pid == -1 && errno == EINTR));
425299ee841Sstefan 		break;
426299ee841Sstefan 	default:
427299ee841Sstefan 		fatalx("unexpected signal");
428299ee841Sstefan 	}
429299ee841Sstefan }
430299ee841Sstefan 
431af96af6cSreyk /*
432ae6ee9deSreyk  * vmm_shutdown
433ae6ee9deSreyk  *
434ae6ee9deSreyk  * Terminate VMs on shutdown to avoid "zombie VM" processes.
435ae6ee9deSreyk  */
436ae6ee9deSreyk void
437ae6ee9deSreyk vmm_shutdown(void)
438ae6ee9deSreyk {
439ae6ee9deSreyk 	struct vm_terminate_params vtp;
440ae6ee9deSreyk 	struct vmd_vm *vm, *vm_next;
441ae6ee9deSreyk 
442ae6ee9deSreyk 	TAILQ_FOREACH_SAFE(vm, env->vmd_vms, vm_entry, vm_next) {
443eb1cd41dSreyk 		vtp.vtp_vm_id = vm_vmid2id(vm->vm_vmid, vm);
444ae6ee9deSreyk 
445ae6ee9deSreyk 		/* XXX suspend or request graceful shutdown */
44613dd90b2Sreyk 		(void)terminate_vm(&vtp);
447ddadf993Sreyk 		vm_remove(vm, __func__);
448ae6ee9deSreyk 	}
449ae6ee9deSreyk }
450ae6ee9deSreyk 
4511e1977eeSreyk /*
4521e1977eeSreyk  * vmm_pipe
4531e1977eeSreyk  *
4541e1977eeSreyk  * Create a new imsg control channel between vmm parent and a VM
4551e1977eeSreyk  * (can be called on both sides).
4561e1977eeSreyk  */
4573afb90b0Sreyk int
4583afb90b0Sreyk vmm_pipe(struct vmd_vm *vm, int fd, void (*cb)(int, short, void *))
4593afb90b0Sreyk {
4603afb90b0Sreyk 	struct imsgev	*iev = &vm->vm_iev;
4613afb90b0Sreyk 
462b3bc6112Sdv 	/*
463b3bc6112Sdv 	 * Set to close-on-exec as vmm_pipe is used after fork+exec to
464b3bc6112Sdv 	 * establish async ipc between vm and vmd's vmm process. This
465b3bc6112Sdv 	 * prevents future vm processes or virtio subprocesses from
466b3bc6112Sdv 	 * inheriting this control channel.
467b3bc6112Sdv 	 */
468b3bc6112Sdv 	if (fcntl(fd, F_SETFD, FD_CLOEXEC) == -1) {
469b3bc6112Sdv 		log_warn("failed to set close-on-exec for vmm ipc channel");
4703afb90b0Sreyk 		return (-1);
4713afb90b0Sreyk 	}
4723afb90b0Sreyk 
473*0a9d031fSclaudio 	if (imsgbuf_init(&iev->ibuf, fd) == -1) {
474*0a9d031fSclaudio 		log_warn("failed to init imsgbuf");
475*0a9d031fSclaudio 		return (-1);
476*0a9d031fSclaudio 	}
477*0a9d031fSclaudio 	imsgbuf_allow_fdpass(&iev->ibuf);
4783afb90b0Sreyk 	iev->handler = cb;
4793afb90b0Sreyk 	iev->data = vm;
4803afb90b0Sreyk 	imsg_event_add(iev);
4813afb90b0Sreyk 
4823afb90b0Sreyk 	return (0);
4833afb90b0Sreyk }
4843afb90b0Sreyk 
4851e1977eeSreyk /*
4861e1977eeSreyk  * vmm_dispatch_vm
4871e1977eeSreyk  *
4881e1977eeSreyk  * imsg callback for messages that are received from a VM child process.
4891e1977eeSreyk  */
4903afb90b0Sreyk void
4913afb90b0Sreyk vmm_dispatch_vm(int fd, short event, void *arg)
4923afb90b0Sreyk {
4933afb90b0Sreyk 	struct vmd_vm		*vm = arg;
494eed20f3bSpd 	struct vmop_result	 vmr;
4953afb90b0Sreyk 	struct imsgev		*iev = &vm->vm_iev;
4963afb90b0Sreyk 	struct imsgbuf		*ibuf = &iev->ibuf;
4973afb90b0Sreyk 	struct imsg		 imsg;
4983afb90b0Sreyk 	ssize_t			 n;
49952e954a3Spd 	unsigned int		 i;
5003afb90b0Sreyk 
5013afb90b0Sreyk 	if (event & EV_READ) {
502d12ef5f3Sclaudio 		if ((n = imsgbuf_read(ibuf)) == -1)
503dd7efffeSclaudio 			fatal("%s: imsgbuf_read", __func__);
5043afb90b0Sreyk 		if (n == 0) {
505549bbfbbSdv 			/* This pipe is dead, so remove the event handler */
5063afb90b0Sreyk 			event_del(&iev->ev);
5073afb90b0Sreyk 			return;
5083afb90b0Sreyk 		}
5093afb90b0Sreyk 	}
5103afb90b0Sreyk 
5113afb90b0Sreyk 	if (event & EV_WRITE) {
512dd7efffeSclaudio 		if (imsgbuf_write(ibuf) == -1) {
513c1aa9554Sclaudio 			if (errno == EPIPE) {
514c1aa9554Sclaudio 				/* This pipe is dead, remove the handler */
5153afb90b0Sreyk 				event_del(&iev->ev);
5163afb90b0Sreyk 				return;
5173afb90b0Sreyk 			}
518dd7efffeSclaudio 			fatal("%s: imsgbuf_write fd %d", __func__, ibuf->fd);
519c1aa9554Sclaudio 		}
5203afb90b0Sreyk 	}
5213afb90b0Sreyk 
5223afb90b0Sreyk 	for (;;) {
5233afb90b0Sreyk 		if ((n = imsg_get(ibuf, &imsg)) == -1)
5243afb90b0Sreyk 			fatal("%s: imsg_get", __func__);
5253afb90b0Sreyk 		if (n == 0)
5263afb90b0Sreyk 			break;
5273afb90b0Sreyk 
5281f5e00e0Sreyk 		DPRINTF("%s: got imsg %d from %s",
5293afb90b0Sreyk 		    __func__, imsg.hdr.type,
5303afb90b0Sreyk 		    vm->vm_params.vmc_params.vcp_name);
5313afb90b0Sreyk 
5323afb90b0Sreyk 		switch (imsg.hdr.type) {
5333320a88dSreyk 		case IMSG_VMDOP_VM_SHUTDOWN:
53419700f36Sjasper 			vm->vm_state |= VM_STATE_SHUTDOWN;
5353320a88dSreyk 			break;
5363320a88dSreyk 		case IMSG_VMDOP_VM_REBOOT:
53719700f36Sjasper 			vm->vm_state &= ~VM_STATE_SHUTDOWN;
5383320a88dSreyk 			break;
539eed20f3bSpd 		case IMSG_VMDOP_SEND_VM_RESPONSE:
540eed20f3bSpd 			IMSG_SIZE_CHECK(&imsg, &vmr);
54152e954a3Spd 		case IMSG_VMDOP_PAUSE_VM_RESPONSE:
54252e954a3Spd 		case IMSG_VMDOP_UNPAUSE_VM_RESPONSE:
543e8991ca8Sjsg 			for (i = 0; i < nitems(procs); i++) {
54452e954a3Spd 				if (procs[i].p_id == PROC_PARENT) {
54552e954a3Spd 					proc_forward_imsg(procs[i].p_ps,
54667de8c6dSreyk 					    &imsg, PROC_PARENT, -1);
54752e954a3Spd 					break;
54852e954a3Spd 				}
54952e954a3Spd 			}
55052e954a3Spd 			break;
55152e954a3Spd 
5523afb90b0Sreyk 		default:
5533afb90b0Sreyk 			fatalx("%s: got invalid imsg %d from %s",
5543afb90b0Sreyk 			    __func__, imsg.hdr.type,
5553afb90b0Sreyk 			    vm->vm_params.vmc_params.vcp_name);
5563afb90b0Sreyk 		}
5573afb90b0Sreyk 		imsg_free(&imsg);
5583afb90b0Sreyk 	}
5593afb90b0Sreyk 	imsg_event_add(iev);
5603afb90b0Sreyk }
5613afb90b0Sreyk 
5623475ba91Smlarkin /*
563af96af6cSreyk  * terminate_vm
564af96af6cSreyk  *
565af96af6cSreyk  * Requests vmm(4) to terminate the VM whose ID is provided in the
566af96af6cSreyk  * supplied vm_terminate_params structure (vtp->vtp_vm_id)
567af96af6cSreyk  *
568af96af6cSreyk  * Parameters
56969558196Smlarkin  *  vtp: vm_terminate_params struct containing the ID of the VM to terminate
570af96af6cSreyk  *
571af96af6cSreyk  * Return values:
572af96af6cSreyk  *  0: success
573549bbfbbSdv  *  !0: ioctl to vmm(4) failed (eg, ENOENT if the supplied VM is not valid)
574af96af6cSreyk  */
575af96af6cSreyk int
5766fcc05d2Sreyk terminate_vm(struct vm_terminate_params *vtp)
577af96af6cSreyk {
578f6e5c9ebSreyk 	if (ioctl(env->vmd_fd, VMM_IOC_TERM, vtp) == -1)
579af96af6cSreyk 		return (errno);
580af96af6cSreyk 
581af96af6cSreyk 	return (0);
582af96af6cSreyk }
583af96af6cSreyk 
584af96af6cSreyk /*
585af96af6cSreyk  * opentap
586af96af6cSreyk  *
587af96af6cSreyk  * Opens the next available tap device, up to MAX_TAP.
588af96af6cSreyk  *
5895921535cSreyk  * Parameters
5904d52d473Skn  *  ifname: a buffer of at least IF_NAMESIZE bytes.
5915921535cSreyk  *
592adb7c8aaSdv  * Returns a file descriptor to the tap node opened or -1 if no tap devices were
593adb7c8aaSdv  * available, setting errno to the open(2) error.
594af96af6cSreyk  */
595af96af6cSreyk int
5965921535cSreyk opentap(char *ifname)
597af96af6cSreyk {
598adb7c8aaSdv 	int err = 0, i, fd;
599af96af6cSreyk 	char path[PATH_MAX];
600af96af6cSreyk 
601af96af6cSreyk 	for (i = 0; i < MAX_TAP; i++) {
602af96af6cSreyk 		snprintf(path, PATH_MAX, "/dev/tap%d", i);
603af96af6cSreyk 
604adb7c8aaSdv 		errno = 0;
605adb7c8aaSdv 		fd = open(path, O_RDWR | O_NONBLOCK);
606adb7c8aaSdv 		if (fd != -1)
607adb7c8aaSdv 			break;
608adb7c8aaSdv 		err = errno;
609adb7c8aaSdv 		if (err == EBUSY) {
610adb7c8aaSdv 			/* Busy...try next tap. */
611adb7c8aaSdv 			continue;
612adb7c8aaSdv 		} else if (err == ENOENT) {
613adb7c8aaSdv 			/* Ran out of /dev/tap* special files. */
614adb7c8aaSdv 			break;
615adb7c8aaSdv 		} else {
616adb7c8aaSdv 			log_warn("%s: unexpected error", __func__);
617adb7c8aaSdv 			break;
618adb7c8aaSdv 		}
619adb7c8aaSdv 	}
620adb7c8aaSdv 
621adb7c8aaSdv 	/* Record the last opened tap device. */
622adb7c8aaSdv 	snprintf(ifname, IF_NAMESIZE, "tap%d", i);
623adb7c8aaSdv 
624adb7c8aaSdv 	if (err)
625adb7c8aaSdv 		errno = err;
626adb7c8aaSdv 	return (fd);
627af96af6cSreyk }
628af96af6cSreyk 
629af96af6cSreyk /*
6301e1977eeSreyk  * vmm_start_vm
631af96af6cSreyk  *
63224386e31Sdv  * Prepares and fork+execs a new VM process.
633af96af6cSreyk  *
634af96af6cSreyk  * Parameters:
6351e1977eeSreyk  *  imsg: The VM data structure that is including the VM create parameters.
6361e1977eeSreyk  *  id: Returns the VM id as reported by the kernel and obtained from the VM.
637a014dd99Sreyk  *  pid: Returns the VM pid to the parent.
638af96af6cSreyk  *
639af96af6cSreyk  * Return values:
640af96af6cSreyk  *  0: success
641af96af6cSreyk  *  !0: failure - typically an errno indicating the source of the failure
642af96af6cSreyk  */
643af96af6cSreyk int
644a014dd99Sreyk vmm_start_vm(struct imsg *imsg, uint32_t *id, pid_t *pid)
645af96af6cSreyk {
646af96af6cSreyk 	struct vm_create_params	*vcp;
647f0bbd60cSreyk 	struct vmd_vm		*vm;
648f4b47ae8Sbluhm 	char			*nargv[10], num[32], vmm_fd[32], psp_fd[32];
64924386e31Sdv 	int			 fd, ret = EINVAL;
6501e1977eeSreyk 	int			 fds[2];
651fbbcf6cdSdv 	pid_t			 vm_pid;
652fbbcf6cdSdv 	size_t			 i, j, sz;
653af96af6cSreyk 
654f0bbd60cSreyk 	if ((vm = vm_getbyvmid(imsg->hdr.peerid)) == NULL) {
6556ee12970Smlarkin 		log_warnx("%s: can't find vm", __func__);
65658ab8896Sdv 		return (ENOENT);
657f0bbd60cSreyk 	}
658ea9c30d9Sedd 	vcp = &vm->vm_params.vmc_params;
659af96af6cSreyk 
66019700f36Sjasper 	if (!(vm->vm_state & VM_STATE_RECEIVED)) {
66153027660Sclaudio 		if ((vm->vm_tty = imsg_get_fd(imsg)) == -1) {
6626ee12970Smlarkin 			log_warnx("%s: can't get tty", __func__);
663af96af6cSreyk 			goto err;
664af96af6cSreyk 		}
665eed20f3bSpd 	}
666af96af6cSreyk 
667b3bc6112Sdv 	if (socketpair(AF_UNIX, SOCK_STREAM | SOCK_NONBLOCK, PF_UNSPEC, fds)
668b3bc6112Sdv 	    == -1)
66948665f9bSreyk 		fatal("socketpair");
67048665f9bSreyk 
67124386e31Sdv 	/* Start child vmd for this VM (fork, chroot, drop privs) */
672fbbcf6cdSdv 	vm_pid = fork();
673fbbcf6cdSdv 	if (vm_pid == -1) {
67424386e31Sdv 		log_warn("%s: start child failed", __func__);
675af96af6cSreyk 		ret = EIO;
676af96af6cSreyk 		goto err;
677af96af6cSreyk 	}
678af96af6cSreyk 
679fbbcf6cdSdv 	if (vm_pid > 0) {
680af96af6cSreyk 		/* Parent */
681fbbcf6cdSdv 		vm->vm_pid = vm_pid;
6823bbc9b4eSdv 		close_fd(fds[1]);
683299ee841Sstefan 
68424386e31Sdv 		/* Send the details over the pipe to the child. */
68524386e31Sdv 		sz = atomicio(vwrite, fds[0], vm, sizeof(*vm));
68624386e31Sdv 		if (sz != sizeof(*vm)) {
68724386e31Sdv 			log_warnx("%s: failed to send config for vm '%s'",
68824386e31Sdv 			    __func__, vcp->vcp_name);
68924386e31Sdv 			ret = EIO;
69024386e31Sdv 			/* Defer error handling until after fd closing. */
69124386e31Sdv 		}
69224386e31Sdv 
69324386e31Sdv 		/* As the parent/vmm process, we no longer need these fds. */
69473a98491Sdv 		for (i = 0 ; i < vm->vm_params.vmc_ndisks; i++) {
69573613953Sreyk 			for (j = 0; j < VM_MAX_BASE_PER_DISK; j++) {
6963bbc9b4eSdv 				if (close_fd(vm->vm_disks[i][j]) == 0)
69773613953Sreyk 				    vm->vm_disks[i][j] = -1;
69873613953Sreyk 			}
699f0bbd60cSreyk 		}
70073a98491Sdv 		for (i = 0 ; i < vm->vm_params.vmc_nnics; i++) {
7013bbc9b4eSdv 			if (close_fd(vm->vm_ifs[i].vif_fd) == 0)
702789e0822Sreyk 			    vm->vm_ifs[i].vif_fd = -1;
703f0bbd60cSreyk 		}
7043bbc9b4eSdv 		if (close_fd(vm->vm_kernel) == 0)
705f0bbd60cSreyk 			vm->vm_kernel = -1;
7063bbc9b4eSdv 		if (close_fd(vm->vm_cdrom) == 0)
70795ab188fSccardenas 			vm->vm_cdrom = -1;
7083bbc9b4eSdv 		if (close_fd(vm->vm_tty) == 0)
709f0bbd60cSreyk 			vm->vm_tty = -1;
710af96af6cSreyk 
71124386e31Sdv 		/* Deferred error handling from sending the vm struct. */
71224386e31Sdv 		if (ret == EIO)
71324386e31Sdv 			goto err;
71424386e31Sdv 
7152272e586Sdv 		/* Send the current local prefix configuration. */
7162272e586Sdv 		sz = atomicio(vwrite, fds[0], &env->vmd_cfg.cfg_localprefix,
7172272e586Sdv 		    sizeof(env->vmd_cfg.cfg_localprefix));
7182272e586Sdv 		if (sz != sizeof(env->vmd_cfg.cfg_localprefix)) {
7192272e586Sdv 			log_warnx("%s: failed to send local prefix for vm '%s'",
7202272e586Sdv 			    __func__, vcp->vcp_name);
7212272e586Sdv 			ret = EIO;
7222272e586Sdv 			goto err;
7232272e586Sdv 		}
7242272e586Sdv 
725549bbfbbSdv 		/* Read back the kernel-generated vm id from the child */
726fbbcf6cdSdv 		sz = atomicio(read, fds[0], &vcp->vcp_id, sizeof(vcp->vcp_id));
727fbbcf6cdSdv 		if (sz != sizeof(vcp->vcp_id)) {
728fbbcf6cdSdv 			log_debug("%s: failed to receive vm id from vm %s",
729fbbcf6cdSdv 			    __func__, vcp->vcp_name);
730fbbcf6cdSdv 			/* vmd could not allocate memory for the vm. */
731fbbcf6cdSdv 			ret = ENOMEM;
732fbbcf6cdSdv 			goto err;
733fbbcf6cdSdv 		}
73448665f9bSreyk 
73524386e31Sdv 		/* Check for an invalid id. This indicates child failure. */
73648665f9bSreyk 		if (vcp->vcp_id == 0)
73748665f9bSreyk 			goto err;
73848665f9bSreyk 
73948665f9bSreyk 		*id = vcp->vcp_id;
740a014dd99Sreyk 		*pid = vm->vm_pid;
74148665f9bSreyk 
74224386e31Sdv 		/* Wire up our pipe into the event handling. */
7433afb90b0Sreyk 		if (vmm_pipe(vm, fds[0], vmm_dispatch_vm) == -1)
7443afb90b0Sreyk 			fatal("setup vm pipe");
745f0bbd60cSreyk 	} else {
74624386e31Sdv 		/* Child. Create a new session. */
74724386e31Sdv 		if (setsid() == -1)
74824386e31Sdv 			fatal("setsid");
74924386e31Sdv 
7503bbc9b4eSdv 		close_fd(fds[0]);
7513bbc9b4eSdv 		close_fd(PROC_PARENT_SOCK_FILENO);
75248665f9bSreyk 
75324386e31Sdv 		/* Detach from terminal. */
75424386e31Sdv 		if (!env->vmd_debug && (fd =
75524386e31Sdv 			open("/dev/null", O_RDWR, 0)) != -1) {
75624386e31Sdv 			dup2(fd, STDIN_FILENO);
75724386e31Sdv 			dup2(fd, STDOUT_FILENO);
75824386e31Sdv 			dup2(fd, STDERR_FILENO);
75924386e31Sdv 			if (fd > 2)
76024386e31Sdv 				close(fd);
76124386e31Sdv 		}
762f0bbd60cSreyk 
763f4b47ae8Sbluhm 		if (env->vmd_psp_fd > 0)
764f4b47ae8Sbluhm 			fcntl(env->vmd_psp_fd, F_SETFD, 0); /* psp device fd */
765f4b47ae8Sbluhm 
76624386e31Sdv 		/*
76724386e31Sdv 		 * Prepare our new argv for execvp(2) with the fd of our open
76824386e31Sdv 		 * pipe to the parent/vmm process as an argument.
76924386e31Sdv 		 */
77024386e31Sdv 		memset(num, 0, sizeof(num));
77124386e31Sdv 		snprintf(num, sizeof(num), "%d", fds[1]);
7723c817da7Sdv 		memset(vmm_fd, 0, sizeof(vmm_fd));
7733c817da7Sdv 		snprintf(vmm_fd, sizeof(vmm_fd), "%d", env->vmd_fd);
774f4b47ae8Sbluhm 		memset(psp_fd, 0, sizeof(psp_fd));
775f4b47ae8Sbluhm 		snprintf(psp_fd, sizeof(psp_fd), "%d", env->vmd_psp_fd);
77624386e31Sdv 
7777f22b52aSbluhm 		i = 0;
7787f22b52aSbluhm 		nargv[i++] = env->argv0;
7797f22b52aSbluhm 		nargv[i++] = "-V";
7807f22b52aSbluhm 		nargv[i++] = num;
7817f22b52aSbluhm 		nargv[i++] = "-i";
7827f22b52aSbluhm 		nargv[i++] = vmm_fd;
7837f22b52aSbluhm 		nargv[i++] = "-j";
7847f22b52aSbluhm 		nargv[i++] = psp_fd;
7857f22b52aSbluhm 		if (env->vmd_debug)
7867f22b52aSbluhm 			nargv[i++] = "-d";
7877f22b52aSbluhm 		if (env->vmd_verbose == 1)
7887f22b52aSbluhm 			nargv[i++] = "-v";
7897f22b52aSbluhm 		else if (env->vmd_verbose > 1)
7907f22b52aSbluhm 			nargv[i++] = "-vv";
7917f22b52aSbluhm 		nargv[i++] = NULL;
7927f22b52aSbluhm 		if (i > sizeof(nargv) / sizeof(nargv[0]))
7937f22b52aSbluhm 			fatalx("%s: nargv overflow", __func__);
79461f4cd73Sdv 
79524386e31Sdv 		/* Control resumes in vmd main(). */
79624386e31Sdv 		execvp(nargv[0], nargv);
79724386e31Sdv 
79824386e31Sdv 		ret = errno;
79924386e31Sdv 		log_warn("execvp %s", nargv[0]);
800c8f0d715Sreyk 		_exit(ret);
80124386e31Sdv 		/* NOTREACHED */
802af96af6cSreyk 	}
803af96af6cSreyk 
804f0bbd60cSreyk 	return (0);
805af96af6cSreyk 
806af96af6cSreyk  err:
80724386e31Sdv 	if (!vm->vm_from_config)
808ddadf993Sreyk 		vm_remove(vm, __func__);
809af96af6cSreyk 
810af96af6cSreyk 	return (ret);
811af96af6cSreyk }
812af96af6cSreyk 
813af96af6cSreyk /*
814af96af6cSreyk  * get_info_vm
815af96af6cSreyk  *
816af96af6cSreyk  * Returns a list of VMs known to vmm(4).
817af96af6cSreyk  *
818af96af6cSreyk  * Parameters:
8196fcc05d2Sreyk  *  ps: the privsep context.
8206fcc05d2Sreyk  *  imsg: the received imsg including the peer id.
8216fcc05d2Sreyk  *  terminate: terminate the listed vm.
822af96af6cSreyk  *
823af96af6cSreyk  * Return values:
824af96af6cSreyk  *  0: success
825af96af6cSreyk  *  !0: failure (eg, ENOMEM, EIO or another error code from vmm(4) ioctl)
826af96af6cSreyk  */
827af96af6cSreyk int
8286fcc05d2Sreyk get_info_vm(struct privsep *ps, struct imsg *imsg, int terminate)
829af96af6cSreyk {
830af96af6cSreyk 	int ret;
831af96af6cSreyk 	size_t ct, i;
832af96af6cSreyk 	struct vm_info_params vip;
833af96af6cSreyk 	struct vm_info_result *info;
8346fcc05d2Sreyk 	struct vm_terminate_params vtp;
8353a45f7b6Sreyk 	struct vmop_info_result vir;
836af96af6cSreyk 
837af96af6cSreyk 	/*
838af96af6cSreyk 	 * We issue the VMM_IOC_INFO ioctl twice, once with an input
839af96af6cSreyk 	 * buffer size of 0, which results in vmm(4) returning the
840af96af6cSreyk 	 * number of bytes required back to us in vip.vip_size,
841af96af6cSreyk 	 * and then we call it again after malloc'ing the required
842af96af6cSreyk 	 * number of bytes.
843af96af6cSreyk 	 *
844549bbfbbSdv 	 * It is possible that we could fail a second time (e.g. if
845af96af6cSreyk 	 * another VM was created in the instant between the two
846af96af6cSreyk 	 * ioctls, but in that case the caller can just try again
847af96af6cSreyk 	 * as vmm(4) will return a zero-sized list in that case.
848af96af6cSreyk 	 */
849af96af6cSreyk 	vip.vip_size = 0;
850af96af6cSreyk 	info = NULL;
851af96af6cSreyk 	ret = 0;
8523a45f7b6Sreyk 	memset(&vir, 0, sizeof(vir));
853af96af6cSreyk 
854af96af6cSreyk 	/* First ioctl to see how many bytes needed (vip.vip_size) */
855df69c215Sderaadt 	if (ioctl(env->vmd_fd, VMM_IOC_INFO, &vip) == -1)
856af96af6cSreyk 		return (errno);
857af96af6cSreyk 
858af96af6cSreyk 	if (vip.vip_info_ct != 0)
859af96af6cSreyk 		return (EIO);
860af96af6cSreyk 
861af96af6cSreyk 	info = malloc(vip.vip_size);
862af96af6cSreyk 	if (info == NULL)
863af96af6cSreyk 		return (ENOMEM);
864af96af6cSreyk 
865af96af6cSreyk 	/* Second ioctl to get the actual list */
866af96af6cSreyk 	vip.vip_info = info;
867df69c215Sderaadt 	if (ioctl(env->vmd_fd, VMM_IOC_INFO, &vip) == -1) {
868af96af6cSreyk 		ret = errno;
869af96af6cSreyk 		free(info);
870af96af6cSreyk 		return (ret);
871af96af6cSreyk 	}
872af96af6cSreyk 
873f0bbd60cSreyk 	/* Return info */
874af96af6cSreyk 	ct = vip.vip_size / sizeof(struct vm_info_result);
875af96af6cSreyk 	for (i = 0; i < ct; i++) {
8766fcc05d2Sreyk 		if (terminate) {
8776fcc05d2Sreyk 			vtp.vtp_vm_id = info[i].vir_id;
8786fcc05d2Sreyk 			if ((ret = terminate_vm(&vtp)) != 0)
879ed010ab2Sdv 				break;
880f6e5c9ebSreyk 			log_debug("%s: terminated vm %s (id %d)", __func__,
8816fcc05d2Sreyk 			    info[i].vir_name, info[i].vir_id);
8823a45f7b6Sreyk 			continue;
8833a45f7b6Sreyk 		}
8843a45f7b6Sreyk 		memcpy(&vir.vir_info, &info[i], sizeof(vir.vir_info));
885eb1cd41dSreyk 		vir.vir_info.vir_id = vm_id2vmid(info[i].vir_id, NULL);
8863a45f7b6Sreyk 		if (proc_compose_imsg(ps, PROC_PARENT, -1,
88741b2bea6Sreyk 		    IMSG_VMDOP_GET_INFO_VM_DATA, imsg->hdr.peerid, -1,
888ed010ab2Sdv 		    &vir, sizeof(vir)) == -1) {
889ed010ab2Sdv 			ret = EIO;
890ed010ab2Sdv 			break;
891ed010ab2Sdv 		}
892af96af6cSreyk 	}
893af96af6cSreyk 	free(info);
894549bbfbbSdv 
895ed010ab2Sdv 	return (ret);
896af96af6cSreyk }
897