xref: /openbsd-src/usr.sbin/vmd/vmm.c (revision 4e1ee0786f11cc571bd0be17d38e46f635c719fc)
1 /*	$OpenBSD: vmm.c,v 1.101 2021/04/26 22:58:27 dv Exp $	*/
2 
3 /*
4  * Copyright (c) 2015 Mike Larkin <mlarkin@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 
19 #include <sys/param.h>	/* nitems */
20 #include <sys/ioctl.h>
21 #include <sys/queue.h>
22 #include <sys/wait.h>
23 #include <sys/uio.h>
24 #include <sys/socket.h>
25 #include <sys/time.h>
26 #include <sys/mman.h>
27 
28 #include <dev/ic/i8253reg.h>
29 #include <dev/isa/isareg.h>
30 #include <dev/pci/pcireg.h>
31 
32 #include <machine/param.h>
33 #include <machine/psl.h>
34 #include <machine/specialreg.h>
35 #include <machine/vmmvar.h>
36 
37 #include <net/if.h>
38 
39 #include <errno.h>
40 #include <event.h>
41 #include <fcntl.h>
42 #include <imsg.h>
43 #include <limits.h>
44 #include <poll.h>
45 #include <pthread.h>
46 #include <stddef.h>
47 #include <stdio.h>
48 #include <stdlib.h>
49 #include <string.h>
50 #include <unistd.h>
51 #include <util.h>
52 
53 #include "vmd.h"
54 #include "vmm.h"
55 
56 void	vmm_sighdlr(int, short, void *);
57 int	vmm_start_vm(struct imsg *, uint32_t *, pid_t *);
58 int	vmm_dispatch_parent(int, struct privsep_proc *, struct imsg *);
59 void	vmm_run(struct privsep *, struct privsep_proc *, void *);
60 void	vmm_dispatch_vm(int, short, void *);
61 int	terminate_vm(struct vm_terminate_params *);
62 int	get_info_vm(struct privsep *, struct imsg *, int);
63 int	opentap(char *);
64 
65 extern struct vmd *env;
66 
67 static struct privsep_proc procs[] = {
68 	{ "parent",	PROC_PARENT,	vmm_dispatch_parent  },
69 };
70 
71 void
72 vmm(struct privsep *ps, struct privsep_proc *p)
73 {
74 	proc_run(ps, p, procs, nitems(procs), vmm_run, NULL);
75 }
76 
77 void
78 vmm_run(struct privsep *ps, struct privsep_proc *p, void *arg)
79 {
80 	if (config_init(ps->ps_env) == -1)
81 		fatal("failed to initialize configuration");
82 
83 	signal_del(&ps->ps_evsigchld);
84 	signal_set(&ps->ps_evsigchld, SIGCHLD, vmm_sighdlr, ps);
85 	signal_add(&ps->ps_evsigchld, NULL);
86 
87 	/*
88 	 * pledge in the vmm process:
89 	 * stdio - for malloc and basic I/O including events.
90 	 * vmm - for the vmm ioctls and operations.
91 	 * proc - for forking and maitaining vms.
92 	 * send - for sending send/recv fds to vm proc.
93 	 * recvfd - for disks, interfaces and other fds.
94 	 */
95 	if (pledge("stdio vmm sendfd recvfd proc", NULL) == -1)
96 		fatal("pledge");
97 
98 	/* Get and terminate all running VMs */
99 	get_info_vm(ps, NULL, 1);
100 }
101 
102 int
103 vmm_dispatch_parent(int fd, struct privsep_proc *p, struct imsg *imsg)
104 {
105 	struct privsep		*ps = p->p_ps;
106 	int			 res = 0, cmd = 0, verbose, ret;
107 	struct vmd_vm		*vm = NULL;
108 	struct vm_terminate_params vtp;
109 	struct vmop_id		 vid;
110 	struct vmop_result	 vmr;
111 	struct vmop_create_params vmc;
112 	struct vmop_addr_result  var;
113 	uint32_t		 id = 0, peerid = imsg->hdr.peerid;
114 	pid_t			 pid = 0;
115 	unsigned int		 mode, flags;
116 
117 	switch (imsg->hdr.type) {
118 	case IMSG_VMDOP_START_VM_REQUEST:
119 		res = config_getvm(ps, imsg);
120 		if (res == -1) {
121 			res = errno;
122 			cmd = IMSG_VMDOP_START_VM_RESPONSE;
123 		}
124 		break;
125 	case IMSG_VMDOP_START_VM_CDROM:
126 		res = config_getcdrom(ps, imsg);
127 		if (res == -1) {
128 			res = errno;
129 			cmd = IMSG_VMDOP_START_VM_RESPONSE;
130 		}
131 		break;
132 	case IMSG_VMDOP_START_VM_DISK:
133 		res = config_getdisk(ps, imsg);
134 		if (res == -1) {
135 			res = errno;
136 			cmd = IMSG_VMDOP_START_VM_RESPONSE;
137 		}
138 		break;
139 	case IMSG_VMDOP_START_VM_IF:
140 		res = config_getif(ps, imsg);
141 		if (res == -1) {
142 			res = errno;
143 			cmd = IMSG_VMDOP_START_VM_RESPONSE;
144 		}
145 		break;
146 	case IMSG_VMDOP_START_VM_END:
147 		res = vmm_start_vm(imsg, &id, &pid);
148 		/* Check if the ID can be mapped correctly */
149 		if ((id = vm_id2vmid(id, NULL)) == 0)
150 			res = ENOENT;
151 		cmd = IMSG_VMDOP_START_VM_RESPONSE;
152 		break;
153 	case IMSG_VMDOP_TERMINATE_VM_REQUEST:
154 		IMSG_SIZE_CHECK(imsg, &vid);
155 		memcpy(&vid, imsg->data, sizeof(vid));
156 		id = vid.vid_id;
157 		flags = vid.vid_flags;
158 
159 		DPRINTF("%s: recv'ed TERMINATE_VM for %d", __func__, id);
160 
161 		cmd = IMSG_VMDOP_TERMINATE_VM_RESPONSE;
162 
163 		if (id == 0) {
164 			res = ENOENT;
165 		} else if ((vm = vm_getbyvmid(id)) != NULL) {
166 			if (flags & VMOP_FORCE) {
167 				vtp.vtp_vm_id = vm_vmid2id(vm->vm_vmid, vm);
168 				vm->vm_state |= VM_STATE_SHUTDOWN;
169 				(void)terminate_vm(&vtp);
170 				res = 0;
171 			} else if (!(vm->vm_state & VM_STATE_SHUTDOWN)) {
172 				log_debug("%s: sending shutdown request"
173 				    " to vm %d", __func__, id);
174 
175 				/*
176 				 * Request reboot but mark the VM as shutting
177 				 * down. This way we can terminate the VM after
178 				 * the triple fault instead of reboot and
179 				 * avoid being stuck in the ACPI-less powerdown
180 				 * ("press any key to reboot") of the VM.
181 				 */
182 				vm->vm_state |= VM_STATE_SHUTDOWN;
183 				if (imsg_compose_event(&vm->vm_iev,
184 				    IMSG_VMDOP_VM_REBOOT,
185 				    0, 0, -1, NULL, 0) == -1)
186 					res = errno;
187 				else
188 					res = 0;
189 			} else {
190 				/*
191 				 * VM is currently being shutdown.
192 				 * Check to see if the VM process is still
193 				 * active.  If not, return VMD_VM_STOP_INVALID.
194 				 */
195 				if (vm_vmid2id(vm->vm_vmid, vm) == 0) {
196 					log_debug("%s: no vm running anymore",
197 					    __func__);
198 					res = VMD_VM_STOP_INVALID;
199 				}
200 			}
201 		} else {
202 			/* VM doesn't exist, cannot stop vm */
203 			log_debug("%s: cannot stop vm that is not running",
204 			    __func__);
205 			res = VMD_VM_STOP_INVALID;
206 		}
207 		break;
208 	case IMSG_VMDOP_GET_INFO_VM_REQUEST:
209 		res = get_info_vm(ps, imsg, 0);
210 		cmd = IMSG_VMDOP_GET_INFO_VM_END_DATA;
211 		break;
212 	case IMSG_VMDOP_CONFIG:
213 		config_getconfig(env, imsg);
214 		break;
215 	case IMSG_CTL_RESET:
216 		IMSG_SIZE_CHECK(imsg, &mode);
217 		memcpy(&mode, imsg->data, sizeof(mode));
218 
219 		if (mode & CONFIG_VMS) {
220 			/* Terminate and remove all VMs */
221 			vmm_shutdown();
222 			mode &= ~CONFIG_VMS;
223 		}
224 
225 		config_getreset(env, imsg);
226 		break;
227 	case IMSG_CTL_VERBOSE:
228 		IMSG_SIZE_CHECK(imsg, &verbose);
229 		memcpy(&verbose, imsg->data, sizeof(verbose));
230 		log_setverbose(verbose);
231 
232 		/* Forward message to each VM process */
233 		TAILQ_FOREACH(vm, env->vmd_vms, vm_entry) {
234 			imsg_compose_event(&vm->vm_iev,
235 			    imsg->hdr.type, imsg->hdr.peerid, imsg->hdr.pid,
236 			    -1, &verbose, sizeof(verbose));
237 		}
238 		break;
239 	case IMSG_VMDOP_PAUSE_VM:
240 		IMSG_SIZE_CHECK(imsg, &vid);
241 		memcpy(&vid, imsg->data, sizeof(vid));
242 		id = vid.vid_id;
243 		if ((vm = vm_getbyvmid(id)) == NULL) {
244 			res = ENOENT;
245 			cmd = IMSG_VMDOP_PAUSE_VM_RESPONSE;
246 			break;
247 		}
248 		imsg_compose_event(&vm->vm_iev,
249 		    imsg->hdr.type, imsg->hdr.peerid, imsg->hdr.pid,
250 		    imsg->fd, &vid, sizeof(vid));
251 		break;
252 	case IMSG_VMDOP_UNPAUSE_VM:
253 		IMSG_SIZE_CHECK(imsg, &vid);
254 		memcpy(&vid, imsg->data, sizeof(vid));
255 		id = vid.vid_id;
256 		if ((vm = vm_getbyvmid(id)) == NULL) {
257 			res = ENOENT;
258 			cmd = IMSG_VMDOP_UNPAUSE_VM_RESPONSE;
259 			break;
260 		}
261 		imsg_compose_event(&vm->vm_iev,
262 		    imsg->hdr.type, imsg->hdr.peerid, imsg->hdr.pid,
263 		    imsg->fd, &vid, sizeof(vid));
264 		break;
265 	case IMSG_VMDOP_SEND_VM_REQUEST:
266 		IMSG_SIZE_CHECK(imsg, &vid);
267 		memcpy(&vid, imsg->data, sizeof(vid));
268 		id = vid.vid_id;
269 		if ((vm = vm_getbyvmid(id)) == NULL) {
270 			res = ENOENT;
271 			close(imsg->fd);
272 			cmd = IMSG_VMDOP_START_VM_RESPONSE;
273 			break;
274 		}
275 		imsg_compose_event(&vm->vm_iev,
276 		    imsg->hdr.type, imsg->hdr.peerid, imsg->hdr.pid,
277 		    imsg->fd, &vid, sizeof(vid));
278 		break;
279 	case IMSG_VMDOP_RECEIVE_VM_REQUEST:
280 		IMSG_SIZE_CHECK(imsg, &vmc);
281 		memcpy(&vmc, imsg->data, sizeof(vmc));
282 		ret = vm_register(ps, &vmc, &vm,
283 		    imsg->hdr.peerid, vmc.vmc_owner.uid);
284 		vm->vm_tty = imsg->fd;
285 		vm->vm_state |= VM_STATE_RECEIVED;
286 		vm->vm_state |= VM_STATE_PAUSED;
287 		break;
288 	case IMSG_VMDOP_RECEIVE_VM_END:
289 		if ((vm = vm_getbyvmid(imsg->hdr.peerid)) == NULL) {
290 			res = ENOENT;
291 			close(imsg->fd);
292 			cmd = IMSG_VMDOP_START_VM_RESPONSE;
293 			break;
294 		}
295 		vm->vm_receive_fd = imsg->fd;
296 		res = vmm_start_vm(imsg, &id, &pid);
297 		/* Check if the ID can be mapped correctly */
298 		if ((id = vm_id2vmid(id, NULL)) == 0)
299 			res = ENOENT;
300 		cmd = IMSG_VMDOP_START_VM_RESPONSE;
301 		break;
302 	case IMSG_VMDOP_PRIV_GET_ADDR_RESPONSE:
303 		IMSG_SIZE_CHECK(imsg, &var);
304 		memcpy(&var, imsg->data, sizeof(var));
305 		if ((vm = vm_getbyvmid(var.var_vmid)) == NULL) {
306 			res = ENOENT;
307 			break;
308 		}
309 		/* Forward hardware address details to the guest vm */
310 		imsg_compose_event(&vm->vm_iev,
311 		    imsg->hdr.type, imsg->hdr.peerid, imsg->hdr.pid,
312 		    imsg->fd, &var, sizeof(var));
313 		break;
314 	default:
315 		return (-1);
316 	}
317 
318 	switch (cmd) {
319 	case 0:
320 		break;
321 	case IMSG_VMDOP_START_VM_RESPONSE:
322 		if (res != 0) {
323 			/* Remove local reference if it exists */
324 			if ((vm = vm_getbyvmid(imsg->hdr.peerid)) != NULL) {
325 				log_debug("%s: removing vm, START_VM_RESPONSE",
326 				    __func__);
327 				vm_remove(vm, __func__);
328 			}
329 		}
330 		if (id == 0)
331 			id = imsg->hdr.peerid;
332 	case IMSG_VMDOP_PAUSE_VM_RESPONSE:
333 	case IMSG_VMDOP_UNPAUSE_VM_RESPONSE:
334 	case IMSG_VMDOP_TERMINATE_VM_RESPONSE:
335 		memset(&vmr, 0, sizeof(vmr));
336 		vmr.vmr_result = res;
337 		vmr.vmr_id = id;
338 		vmr.vmr_pid = pid;
339 		if (proc_compose_imsg(ps, PROC_PARENT, -1, cmd,
340 		    peerid, -1, &vmr, sizeof(vmr)) == -1)
341 			return (-1);
342 		break;
343 	default:
344 		if (proc_compose_imsg(ps, PROC_PARENT, -1, cmd,
345 		    peerid, -1, &res, sizeof(res)) == -1)
346 			return (-1);
347 		break;
348 	}
349 
350 	return (0);
351 }
352 
353 void
354 vmm_sighdlr(int sig, short event, void *arg)
355 {
356 	struct privsep *ps = arg;
357 	int status, ret = 0;
358 	uint32_t vmid;
359 	pid_t pid;
360 	struct vmop_result vmr;
361 	struct vmd_vm *vm;
362 	struct vm_terminate_params vtp;
363 
364 	log_debug("%s: handling signal %d", __func__, sig);
365 	switch (sig) {
366 	case SIGCHLD:
367 		do {
368 			pid = waitpid(-1, &status, WNOHANG);
369 			if (pid <= 0)
370 				continue;
371 
372 			if (WIFEXITED(status) || WIFSIGNALED(status)) {
373 				vm = vm_getbypid(pid);
374 				if (vm == NULL) {
375 					/*
376 					 * If the VM is gone already, it
377 					 * got terminated via a
378 					 * IMSG_VMDOP_TERMINATE_VM_REQUEST.
379 					 */
380 					continue;
381 				}
382 
383 				if (WIFEXITED(status))
384 					ret = WEXITSTATUS(status);
385 
386 				/* Don't reboot on pending shutdown */
387 				if (ret == EAGAIN &&
388 				    (vm->vm_state & VM_STATE_SHUTDOWN))
389 					ret = 0;
390 
391 				vmid = vm->vm_params.vmc_params.vcp_id;
392 				vtp.vtp_vm_id = vmid;
393 
394 				if (terminate_vm(&vtp) == 0)
395 					log_debug("%s: terminated vm %s"
396 					    " (id %d)", __func__,
397 					    vm->vm_params.vmc_params.vcp_name,
398 					    vm->vm_vmid);
399 
400 				memset(&vmr, 0, sizeof(vmr));
401 				vmr.vmr_result = ret;
402 				vmr.vmr_id = vm_id2vmid(vmid, vm);
403 				if (proc_compose_imsg(ps, PROC_PARENT,
404 				    -1, IMSG_VMDOP_TERMINATE_VM_EVENT,
405 				    vm->vm_peerid, -1,
406 				    &vmr, sizeof(vmr)) == -1)
407 					log_warnx("could not signal "
408 					    "termination of VM %u to "
409 					    "parent", vm->vm_vmid);
410 
411 				vm_remove(vm, __func__);
412 			} else
413 				fatalx("unexpected cause of SIGCHLD");
414 		} while (pid > 0 || (pid == -1 && errno == EINTR));
415 		break;
416 	default:
417 		fatalx("unexpected signal");
418 	}
419 }
420 
421 /*
422  * vmm_shutdown
423  *
424  * Terminate VMs on shutdown to avoid "zombie VM" processes.
425  */
426 void
427 vmm_shutdown(void)
428 {
429 	struct vm_terminate_params vtp;
430 	struct vmd_vm *vm, *vm_next;
431 
432 	TAILQ_FOREACH_SAFE(vm, env->vmd_vms, vm_entry, vm_next) {
433 		vtp.vtp_vm_id = vm_vmid2id(vm->vm_vmid, vm);
434 
435 		/* XXX suspend or request graceful shutdown */
436 		(void)terminate_vm(&vtp);
437 		vm_remove(vm, __func__);
438 	}
439 }
440 
441 /*
442  * vmm_pipe
443  *
444  * Create a new imsg control channel between vmm parent and a VM
445  * (can be called on both sides).
446  */
447 int
448 vmm_pipe(struct vmd_vm *vm, int fd, void (*cb)(int, short, void *))
449 {
450 	struct imsgev	*iev = &vm->vm_iev;
451 
452 	if (fcntl(fd, F_SETFL, O_NONBLOCK) == -1) {
453 		log_warn("failed to set nonblocking mode on vm pipe");
454 		return (-1);
455 	}
456 
457 	imsg_init(&iev->ibuf, fd);
458 	iev->handler = cb;
459 	iev->data = vm;
460 	imsg_event_add(iev);
461 
462 	return (0);
463 }
464 
465 /*
466  * vmm_dispatch_vm
467  *
468  * imsg callback for messages that are received from a VM child process.
469  */
470 void
471 vmm_dispatch_vm(int fd, short event, void *arg)
472 {
473 	struct vmd_vm		*vm = arg;
474 	struct vmop_result	 vmr;
475 	struct imsgev		*iev = &vm->vm_iev;
476 	struct imsgbuf		*ibuf = &iev->ibuf;
477 	struct imsg		 imsg;
478 	ssize_t			 n;
479 	unsigned int		 i;
480 
481 	if (event & EV_READ) {
482 		if ((n = imsg_read(ibuf)) == -1 && errno != EAGAIN)
483 			fatal("%s: imsg_read", __func__);
484 		if (n == 0) {
485 			/* This pipe is dead, so remove the event handler */
486 			event_del(&iev->ev);
487 			return;
488 		}
489 	}
490 
491 	if (event & EV_WRITE) {
492 		if ((n = msgbuf_write(&ibuf->w)) == -1 && errno != EAGAIN)
493 			fatal("%s: msgbuf_write fd %d", __func__, ibuf->fd);
494 		if (n == 0) {
495 			/* This pipe is dead, so remove the event handler */
496 			event_del(&iev->ev);
497 			return;
498 		}
499 	}
500 
501 	for (;;) {
502 		if ((n = imsg_get(ibuf, &imsg)) == -1)
503 			fatal("%s: imsg_get", __func__);
504 		if (n == 0)
505 			break;
506 
507 		DPRINTF("%s: got imsg %d from %s",
508 		    __func__, imsg.hdr.type,
509 		    vm->vm_params.vmc_params.vcp_name);
510 
511 		switch (imsg.hdr.type) {
512 		case IMSG_VMDOP_VM_SHUTDOWN:
513 			vm->vm_state |= VM_STATE_SHUTDOWN;
514 			break;
515 		case IMSG_VMDOP_VM_REBOOT:
516 			vm->vm_state &= ~VM_STATE_SHUTDOWN;
517 			break;
518 		case IMSG_VMDOP_SEND_VM_RESPONSE:
519 			IMSG_SIZE_CHECK(&imsg, &vmr);
520 		case IMSG_VMDOP_PAUSE_VM_RESPONSE:
521 		case IMSG_VMDOP_UNPAUSE_VM_RESPONSE:
522 			for (i = 0; i < nitems(procs); i++) {
523 				if (procs[i].p_id == PROC_PARENT) {
524 					proc_forward_imsg(procs[i].p_ps,
525 					    &imsg, PROC_PARENT, -1);
526 					break;
527 				}
528 			}
529 			break;
530 
531 		default:
532 			fatalx("%s: got invalid imsg %d from %s",
533 			    __func__, imsg.hdr.type,
534 			    vm->vm_params.vmc_params.vcp_name);
535 		}
536 		imsg_free(&imsg);
537 	}
538 	imsg_event_add(iev);
539 }
540 
541 /*
542  * terminate_vm
543  *
544  * Requests vmm(4) to terminate the VM whose ID is provided in the
545  * supplied vm_terminate_params structure (vtp->vtp_vm_id)
546  *
547  * Parameters
548  *  vtp: vm_terminate_params struct containing the ID of the VM to terminate
549  *
550  * Return values:
551  *  0: success
552  *  !0: ioctl to vmm(4) failed (eg, ENOENT if the supplied VM is not valid)
553  */
554 int
555 terminate_vm(struct vm_terminate_params *vtp)
556 {
557 	if (ioctl(env->vmd_fd, VMM_IOC_TERM, vtp) == -1)
558 		return (errno);
559 
560 	return (0);
561 }
562 
563 /*
564  * opentap
565  *
566  * Opens the next available tap device, up to MAX_TAP.
567  *
568  * Parameters
569  *  ifname: a buffer of at least IF_NAMESIZE bytes.
570  *
571  * Returns a file descriptor to the tap node opened, or -1 if no tap
572  * devices were available.
573  */
574 int
575 opentap(char *ifname)
576 {
577 	int i, fd;
578 	char path[PATH_MAX];
579 
580 	for (i = 0; i < MAX_TAP; i++) {
581 		snprintf(path, PATH_MAX, "/dev/tap%d", i);
582 		fd = open(path, O_RDWR | O_NONBLOCK);
583 		if (fd != -1) {
584 			snprintf(ifname, IF_NAMESIZE, "tap%d", i);
585 			return (fd);
586 		}
587 	}
588 	strlcpy(ifname, "tap", IF_NAMESIZE);
589 
590 	return (-1);
591 }
592 
593 /*
594  * vmm_start_vm
595  *
596  * Prepares and forks a new VM process.
597  *
598  * Parameters:
599  *  imsg: The VM data structure that is including the VM create parameters.
600  *  id: Returns the VM id as reported by the kernel and obtained from the VM.
601  *  pid: Returns the VM pid to the parent.
602  *
603  * Return values:
604  *  0: success
605  *  !0: failure - typically an errno indicating the source of the failure
606  */
607 int
608 vmm_start_vm(struct imsg *imsg, uint32_t *id, pid_t *pid)
609 {
610 	struct vm_create_params	*vcp;
611 	struct vmd_vm		*vm;
612 	int			 ret = EINVAL;
613 	int			 fds[2];
614 	size_t			 i, j;
615 
616 	if ((vm = vm_getbyvmid(imsg->hdr.peerid)) == NULL) {
617 		log_warnx("%s: can't find vm", __func__);
618 		ret = ENOENT;
619 		goto err;
620 	}
621 	vcp = &vm->vm_params.vmc_params;
622 
623 	if (!(vm->vm_state & VM_STATE_RECEIVED)) {
624 		if ((vm->vm_tty = imsg->fd) == -1) {
625 			log_warnx("%s: can't get tty", __func__);
626 			goto err;
627 		}
628 	}
629 
630 	if (socketpair(AF_UNIX, SOCK_STREAM, PF_UNSPEC, fds) == -1)
631 		fatal("socketpair");
632 
633 	/* Start child vmd for this VM (fork, chroot, drop privs) */
634 	ret = fork();
635 
636 	/* Start child failed? - cleanup and leave */
637 	if (ret == -1) {
638 		log_warnx("%s: start child failed", __func__);
639 		ret = EIO;
640 		goto err;
641 	}
642 
643 	if (ret > 0) {
644 		/* Parent */
645 		vm->vm_pid = ret;
646 		close(fds[1]);
647 
648 		for (i = 0 ; i < vcp->vcp_ndisks; i++) {
649 			for (j = 0; j < VM_MAX_BASE_PER_DISK; j++) {
650 				if (vm->vm_disks[i][j] != -1)
651 					close(vm->vm_disks[i][j]);
652 				vm->vm_disks[i][j] = -1;
653 			}
654 		}
655 		for (i = 0 ; i < vcp->vcp_nnics; i++) {
656 			close(vm->vm_ifs[i].vif_fd);
657 			vm->vm_ifs[i].vif_fd = -1;
658 		}
659 		if (vm->vm_kernel != -1) {
660 			close(vm->vm_kernel);
661 			vm->vm_kernel = -1;
662 		}
663 		if (vm->vm_cdrom != -1) {
664 			close(vm->vm_cdrom);
665 			vm->vm_cdrom = -1;
666 		}
667 		if (vm->vm_tty != -1) {
668 			close(vm->vm_tty);
669 			vm->vm_tty = -1;
670 		}
671 
672 		/* Read back the kernel-generated vm id from the child */
673 		if (read(fds[0], &vcp->vcp_id, sizeof(vcp->vcp_id)) !=
674 		    sizeof(vcp->vcp_id))
675 			fatal("read vcp id");
676 
677 		if (vcp->vcp_id == 0)
678 			goto err;
679 
680 		*id = vcp->vcp_id;
681 		*pid = vm->vm_pid;
682 
683 		if (vmm_pipe(vm, fds[0], vmm_dispatch_vm) == -1)
684 			fatal("setup vm pipe");
685 
686 		return (0);
687 	} else {
688 		/* Child */
689 		close(fds[0]);
690 		close(PROC_PARENT_SOCK_FILENO);
691 
692 		ret = start_vm(vm, fds[1]);
693 
694 		_exit(ret);
695 	}
696 
697 	return (0);
698 
699  err:
700 	vm_remove(vm, __func__);
701 
702 	return (ret);
703 }
704 
705 /*
706  * get_info_vm
707  *
708  * Returns a list of VMs known to vmm(4).
709  *
710  * Parameters:
711  *  ps: the privsep context.
712  *  imsg: the received imsg including the peer id.
713  *  terminate: terminate the listed vm.
714  *
715  * Return values:
716  *  0: success
717  *  !0: failure (eg, ENOMEM, EIO or another error code from vmm(4) ioctl)
718  */
719 int
720 get_info_vm(struct privsep *ps, struct imsg *imsg, int terminate)
721 {
722 	int ret;
723 	size_t ct, i;
724 	struct vm_info_params vip;
725 	struct vm_info_result *info;
726 	struct vm_terminate_params vtp;
727 	struct vmop_info_result vir;
728 
729 	/*
730 	 * We issue the VMM_IOC_INFO ioctl twice, once with an input
731 	 * buffer size of 0, which results in vmm(4) returning the
732 	 * number of bytes required back to us in vip.vip_size,
733 	 * and then we call it again after malloc'ing the required
734 	 * number of bytes.
735 	 *
736 	 * It is possible that we could fail a second time (e.g. if
737 	 * another VM was created in the instant between the two
738 	 * ioctls, but in that case the caller can just try again
739 	 * as vmm(4) will return a zero-sized list in that case.
740 	 */
741 	vip.vip_size = 0;
742 	info = NULL;
743 	ret = 0;
744 	memset(&vir, 0, sizeof(vir));
745 
746 	/* First ioctl to see how many bytes needed (vip.vip_size) */
747 	if (ioctl(env->vmd_fd, VMM_IOC_INFO, &vip) == -1)
748 		return (errno);
749 
750 	if (vip.vip_info_ct != 0)
751 		return (EIO);
752 
753 	info = malloc(vip.vip_size);
754 	if (info == NULL)
755 		return (ENOMEM);
756 
757 	/* Second ioctl to get the actual list */
758 	vip.vip_info = info;
759 	if (ioctl(env->vmd_fd, VMM_IOC_INFO, &vip) == -1) {
760 		ret = errno;
761 		free(info);
762 		return (ret);
763 	}
764 
765 	/* Return info */
766 	ct = vip.vip_size / sizeof(struct vm_info_result);
767 	for (i = 0; i < ct; i++) {
768 		if (terminate) {
769 			vtp.vtp_vm_id = info[i].vir_id;
770 			if ((ret = terminate_vm(&vtp)) != 0)
771 				return (ret);
772 			log_debug("%s: terminated vm %s (id %d)", __func__,
773 			    info[i].vir_name, info[i].vir_id);
774 			continue;
775 		}
776 		memcpy(&vir.vir_info, &info[i], sizeof(vir.vir_info));
777 		vir.vir_info.vir_id = vm_id2vmid(info[i].vir_id, NULL);
778 		if (proc_compose_imsg(ps, PROC_PARENT, -1,
779 		    IMSG_VMDOP_GET_INFO_VM_DATA, imsg->hdr.peerid, -1,
780 		    &vir, sizeof(vir)) == -1)
781 			return (EIO);
782 	}
783 	free(info);
784 
785 	return (0);
786 }
787