xref: /openbsd-src/usr.sbin/vmd/vmm.c (revision c1a45aed656e7d5627c30c92421893a76f370ccb)
1 /*	$OpenBSD: vmm.c,v 1.105 2022/04/10 19:36:58 dv Exp $	*/
2 
3 /*
4  * Copyright (c) 2015 Mike Larkin <mlarkin@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 
19 #include <sys/types.h>
20 #include <sys/ioctl.h>
21 #include <sys/queue.h>
22 #include <sys/wait.h>
23 #include <sys/uio.h>
24 #include <sys/socket.h>
25 #include <sys/time.h>
26 #include <sys/mman.h>
27 
28 #include <dev/ic/i8253reg.h>
29 #include <dev/isa/isareg.h>
30 #include <dev/pci/pcireg.h>
31 
32 #include <machine/psl.h>
33 #include <machine/specialreg.h>
34 #include <machine/vmmvar.h>
35 
36 #include <net/if.h>
37 
38 #include <errno.h>
39 #include <event.h>
40 #include <fcntl.h>
41 #include <imsg.h>
42 #include <limits.h>
43 #include <poll.h>
44 #include <pthread.h>
45 #include <stddef.h>
46 #include <stdio.h>
47 #include <stdlib.h>
48 #include <string.h>
49 #include <unistd.h>
50 #include <util.h>
51 
52 #include "vmd.h"
53 #include "vmm.h"
54 #include "atomicio.h"
55 
56 void	vmm_sighdlr(int, short, void *);
57 int	vmm_start_vm(struct imsg *, uint32_t *, pid_t *);
58 int	vmm_dispatch_parent(int, struct privsep_proc *, struct imsg *);
59 void	vmm_run(struct privsep *, struct privsep_proc *, void *);
60 void	vmm_dispatch_vm(int, short, void *);
61 int	terminate_vm(struct vm_terminate_params *);
62 int	get_info_vm(struct privsep *, struct imsg *, int);
63 int	opentap(char *);
64 
65 extern struct vmd *env;
66 
67 static struct privsep_proc procs[] = {
68 	{ "parent",	PROC_PARENT,	vmm_dispatch_parent  },
69 };
70 
71 void
72 vmm(struct privsep *ps, struct privsep_proc *p)
73 {
74 	proc_run(ps, p, procs, nitems(procs), vmm_run, NULL);
75 }
76 
77 void
78 vmm_run(struct privsep *ps, struct privsep_proc *p, void *arg)
79 {
80 	if (config_init(ps->ps_env) == -1)
81 		fatal("failed to initialize configuration");
82 
83 	signal_del(&ps->ps_evsigchld);
84 	signal_set(&ps->ps_evsigchld, SIGCHLD, vmm_sighdlr, ps);
85 	signal_add(&ps->ps_evsigchld, NULL);
86 
87 	/*
88 	 * pledge in the vmm process:
89 	 * stdio - for malloc and basic I/O including events.
90 	 * vmm - for the vmm ioctls and operations.
91 	 * proc - for forking and maitaining vms.
92 	 * send - for sending send/recv fds to vm proc.
93 	 * recvfd - for disks, interfaces and other fds.
94 	 */
95 	if (pledge("stdio vmm sendfd recvfd proc", NULL) == -1)
96 		fatal("pledge");
97 
98 	/* Get and terminate all running VMs */
99 	get_info_vm(ps, NULL, 1);
100 }
101 
102 int
103 vmm_dispatch_parent(int fd, struct privsep_proc *p, struct imsg *imsg)
104 {
105 	struct privsep		*ps = p->p_ps;
106 	int			 res = 0, cmd = 0, verbose;
107 	struct vmd_vm		*vm = NULL;
108 	struct vm_terminate_params vtp;
109 	struct vmop_id		 vid;
110 	struct vmop_result	 vmr;
111 	struct vmop_create_params vmc;
112 	struct vmop_addr_result  var;
113 	uint32_t		 id = 0, peerid = imsg->hdr.peerid;
114 	pid_t			 pid = 0;
115 	unsigned int		 mode, flags;
116 
117 	switch (imsg->hdr.type) {
118 	case IMSG_VMDOP_START_VM_REQUEST:
119 		res = config_getvm(ps, imsg);
120 		if (res == -1) {
121 			res = errno;
122 			cmd = IMSG_VMDOP_START_VM_RESPONSE;
123 		}
124 		break;
125 	case IMSG_VMDOP_START_VM_CDROM:
126 		res = config_getcdrom(ps, imsg);
127 		if (res == -1) {
128 			res = errno;
129 			cmd = IMSG_VMDOP_START_VM_RESPONSE;
130 		}
131 		break;
132 	case IMSG_VMDOP_START_VM_DISK:
133 		res = config_getdisk(ps, imsg);
134 		if (res == -1) {
135 			res = errno;
136 			cmd = IMSG_VMDOP_START_VM_RESPONSE;
137 		}
138 		break;
139 	case IMSG_VMDOP_START_VM_IF:
140 		res = config_getif(ps, imsg);
141 		if (res == -1) {
142 			res = errno;
143 			cmd = IMSG_VMDOP_START_VM_RESPONSE;
144 		}
145 		break;
146 	case IMSG_VMDOP_START_VM_END:
147 		res = vmm_start_vm(imsg, &id, &pid);
148 		/* Check if the ID can be mapped correctly */
149 		if (res == 0 && (id = vm_id2vmid(id, NULL)) == 0)
150 			res = ENOENT;
151 		cmd = IMSG_VMDOP_START_VM_RESPONSE;
152 		break;
153 	case IMSG_VMDOP_TERMINATE_VM_REQUEST:
154 		IMSG_SIZE_CHECK(imsg, &vid);
155 		memcpy(&vid, imsg->data, sizeof(vid));
156 		id = vid.vid_id;
157 		flags = vid.vid_flags;
158 
159 		DPRINTF("%s: recv'ed TERMINATE_VM for %d", __func__, id);
160 
161 		cmd = IMSG_VMDOP_TERMINATE_VM_RESPONSE;
162 
163 		if (id == 0) {
164 			res = ENOENT;
165 		} else if ((vm = vm_getbyvmid(id)) != NULL) {
166 			if (flags & VMOP_FORCE) {
167 				vtp.vtp_vm_id = vm_vmid2id(vm->vm_vmid, vm);
168 				vm->vm_state |= VM_STATE_SHUTDOWN;
169 				(void)terminate_vm(&vtp);
170 				res = 0;
171 			} else if (!(vm->vm_state & VM_STATE_SHUTDOWN)) {
172 				log_debug("%s: sending shutdown request"
173 				    " to vm %d", __func__, id);
174 
175 				/*
176 				 * Request reboot but mark the VM as shutting
177 				 * down. This way we can terminate the VM after
178 				 * the triple fault instead of reboot and
179 				 * avoid being stuck in the ACPI-less powerdown
180 				 * ("press any key to reboot") of the VM.
181 				 */
182 				vm->vm_state |= VM_STATE_SHUTDOWN;
183 				if (imsg_compose_event(&vm->vm_iev,
184 				    IMSG_VMDOP_VM_REBOOT,
185 				    0, 0, -1, NULL, 0) == -1)
186 					res = errno;
187 				else
188 					res = 0;
189 			} else {
190 				/*
191 				 * VM is currently being shutdown.
192 				 * Check to see if the VM process is still
193 				 * active.  If not, return VMD_VM_STOP_INVALID.
194 				 */
195 				if (vm_vmid2id(vm->vm_vmid, vm) == 0) {
196 					log_debug("%s: no vm running anymore",
197 					    __func__);
198 					res = VMD_VM_STOP_INVALID;
199 				}
200 			}
201 		} else {
202 			/* VM doesn't exist, cannot stop vm */
203 			log_debug("%s: cannot stop vm that is not running",
204 			    __func__);
205 			res = VMD_VM_STOP_INVALID;
206 		}
207 		break;
208 	case IMSG_VMDOP_GET_INFO_VM_REQUEST:
209 		res = get_info_vm(ps, imsg, 0);
210 		cmd = IMSG_VMDOP_GET_INFO_VM_END_DATA;
211 		break;
212 	case IMSG_VMDOP_CONFIG:
213 		config_getconfig(env, imsg);
214 		break;
215 	case IMSG_CTL_RESET:
216 		IMSG_SIZE_CHECK(imsg, &mode);
217 		memcpy(&mode, imsg->data, sizeof(mode));
218 
219 		if (mode & CONFIG_VMS) {
220 			/* Terminate and remove all VMs */
221 			vmm_shutdown();
222 			mode &= ~CONFIG_VMS;
223 		}
224 
225 		config_getreset(env, imsg);
226 		break;
227 	case IMSG_CTL_VERBOSE:
228 		IMSG_SIZE_CHECK(imsg, &verbose);
229 		memcpy(&verbose, imsg->data, sizeof(verbose));
230 		log_setverbose(verbose);
231 
232 		/* Forward message to each VM process */
233 		TAILQ_FOREACH(vm, env->vmd_vms, vm_entry) {
234 			imsg_compose_event(&vm->vm_iev,
235 			    imsg->hdr.type, imsg->hdr.peerid, imsg->hdr.pid,
236 			    -1, &verbose, sizeof(verbose));
237 		}
238 		break;
239 	case IMSG_VMDOP_PAUSE_VM:
240 		IMSG_SIZE_CHECK(imsg, &vid);
241 		memcpy(&vid, imsg->data, sizeof(vid));
242 		id = vid.vid_id;
243 		if ((vm = vm_getbyvmid(id)) == NULL) {
244 			res = ENOENT;
245 			cmd = IMSG_VMDOP_PAUSE_VM_RESPONSE;
246 			break;
247 		}
248 		imsg_compose_event(&vm->vm_iev,
249 		    imsg->hdr.type, imsg->hdr.peerid, imsg->hdr.pid,
250 		    imsg->fd, &vid, sizeof(vid));
251 		break;
252 	case IMSG_VMDOP_UNPAUSE_VM:
253 		IMSG_SIZE_CHECK(imsg, &vid);
254 		memcpy(&vid, imsg->data, sizeof(vid));
255 		id = vid.vid_id;
256 		if ((vm = vm_getbyvmid(id)) == NULL) {
257 			res = ENOENT;
258 			cmd = IMSG_VMDOP_UNPAUSE_VM_RESPONSE;
259 			break;
260 		}
261 		imsg_compose_event(&vm->vm_iev,
262 		    imsg->hdr.type, imsg->hdr.peerid, imsg->hdr.pid,
263 		    imsg->fd, &vid, sizeof(vid));
264 		break;
265 	case IMSG_VMDOP_SEND_VM_REQUEST:
266 		IMSG_SIZE_CHECK(imsg, &vid);
267 		memcpy(&vid, imsg->data, sizeof(vid));
268 		id = vid.vid_id;
269 		if ((vm = vm_getbyvmid(id)) == NULL) {
270 			res = ENOENT;
271 			close(imsg->fd);
272 			cmd = IMSG_VMDOP_START_VM_RESPONSE;
273 			break;
274 		}
275 		imsg_compose_event(&vm->vm_iev,
276 		    imsg->hdr.type, imsg->hdr.peerid, imsg->hdr.pid,
277 		    imsg->fd, &vid, sizeof(vid));
278 		break;
279 	case IMSG_VMDOP_RECEIVE_VM_REQUEST:
280 		IMSG_SIZE_CHECK(imsg, &vmc);
281 		memcpy(&vmc, imsg->data, sizeof(vmc));
282 		if (vm_register(ps, &vmc, &vm,
283 		    imsg->hdr.peerid, vmc.vmc_owner.uid) != 0) {
284 			res = errno;
285 			cmd = IMSG_VMDOP_START_VM_RESPONSE;
286 			break;
287 		}
288 		vm->vm_tty = imsg->fd;
289 		vm->vm_state |= VM_STATE_RECEIVED;
290 		vm->vm_state |= VM_STATE_PAUSED;
291 		break;
292 	case IMSG_VMDOP_RECEIVE_VM_END:
293 		if ((vm = vm_getbyvmid(imsg->hdr.peerid)) == NULL) {
294 			res = ENOENT;
295 			close(imsg->fd);
296 			cmd = IMSG_VMDOP_START_VM_RESPONSE;
297 			break;
298 		}
299 		vm->vm_receive_fd = imsg->fd;
300 		res = vmm_start_vm(imsg, &id, &pid);
301 		/* Check if the ID can be mapped correctly */
302 		if ((id = vm_id2vmid(id, NULL)) == 0)
303 			res = ENOENT;
304 		cmd = IMSG_VMDOP_START_VM_RESPONSE;
305 		break;
306 	case IMSG_VMDOP_PRIV_GET_ADDR_RESPONSE:
307 		IMSG_SIZE_CHECK(imsg, &var);
308 		memcpy(&var, imsg->data, sizeof(var));
309 		if ((vm = vm_getbyvmid(var.var_vmid)) == NULL) {
310 			res = ENOENT;
311 			break;
312 		}
313 		/* Forward hardware address details to the guest vm */
314 		imsg_compose_event(&vm->vm_iev,
315 		    imsg->hdr.type, imsg->hdr.peerid, imsg->hdr.pid,
316 		    imsg->fd, &var, sizeof(var));
317 		break;
318 	default:
319 		return (-1);
320 	}
321 
322 	switch (cmd) {
323 	case 0:
324 		break;
325 	case IMSG_VMDOP_START_VM_RESPONSE:
326 		if (res != 0) {
327 			/* Remove local reference if it exists */
328 			if ((vm = vm_getbyvmid(imsg->hdr.peerid)) != NULL) {
329 				log_debug("%s: removing vm, START_VM_RESPONSE",
330 				    __func__);
331 				vm_remove(vm, __func__);
332 			}
333 		}
334 		if (id == 0)
335 			id = imsg->hdr.peerid;
336 		/* FALLTHROUGH */
337 	case IMSG_VMDOP_PAUSE_VM_RESPONSE:
338 	case IMSG_VMDOP_UNPAUSE_VM_RESPONSE:
339 	case IMSG_VMDOP_TERMINATE_VM_RESPONSE:
340 		memset(&vmr, 0, sizeof(vmr));
341 		vmr.vmr_result = res;
342 		vmr.vmr_id = id;
343 		vmr.vmr_pid = pid;
344 		if (proc_compose_imsg(ps, PROC_PARENT, -1, cmd,
345 		    peerid, -1, &vmr, sizeof(vmr)) == -1)
346 			return (-1);
347 		break;
348 	default:
349 		if (proc_compose_imsg(ps, PROC_PARENT, -1, cmd,
350 		    peerid, -1, &res, sizeof(res)) == -1)
351 			return (-1);
352 		break;
353 	}
354 
355 	return (0);
356 }
357 
358 void
359 vmm_sighdlr(int sig, short event, void *arg)
360 {
361 	struct privsep *ps = arg;
362 	int status, ret = 0;
363 	uint32_t vmid;
364 	pid_t pid;
365 	struct vmop_result vmr;
366 	struct vmd_vm *vm;
367 	struct vm_terminate_params vtp;
368 
369 	log_debug("%s: handling signal %d", __func__, sig);
370 	switch (sig) {
371 	case SIGCHLD:
372 		do {
373 			pid = waitpid(-1, &status, WNOHANG);
374 			if (pid <= 0)
375 				continue;
376 
377 			if (WIFEXITED(status) || WIFSIGNALED(status)) {
378 				vm = vm_getbypid(pid);
379 				if (vm == NULL) {
380 					/*
381 					 * If the VM is gone already, it
382 					 * got terminated via a
383 					 * IMSG_VMDOP_TERMINATE_VM_REQUEST.
384 					 */
385 					continue;
386 				}
387 
388 				if (WIFEXITED(status))
389 					ret = WEXITSTATUS(status);
390 
391 				/* Don't reboot on pending shutdown */
392 				if (ret == EAGAIN &&
393 				    (vm->vm_state & VM_STATE_SHUTDOWN))
394 					ret = 0;
395 
396 				vmid = vm->vm_params.vmc_params.vcp_id;
397 				vtp.vtp_vm_id = vmid;
398 
399 				if (terminate_vm(&vtp) == 0)
400 					log_debug("%s: terminated vm %s"
401 					    " (id %d)", __func__,
402 					    vm->vm_params.vmc_params.vcp_name,
403 					    vm->vm_vmid);
404 
405 				memset(&vmr, 0, sizeof(vmr));
406 				vmr.vmr_result = ret;
407 				vmr.vmr_id = vm_id2vmid(vmid, vm);
408 				if (proc_compose_imsg(ps, PROC_PARENT,
409 				    -1, IMSG_VMDOP_TERMINATE_VM_EVENT,
410 				    vm->vm_peerid, -1,
411 				    &vmr, sizeof(vmr)) == -1)
412 					log_warnx("could not signal "
413 					    "termination of VM %u to "
414 					    "parent", vm->vm_vmid);
415 
416 				vm_remove(vm, __func__);
417 			} else
418 				fatalx("unexpected cause of SIGCHLD");
419 		} while (pid > 0 || (pid == -1 && errno == EINTR));
420 		break;
421 	default:
422 		fatalx("unexpected signal");
423 	}
424 }
425 
426 /*
427  * vmm_shutdown
428  *
429  * Terminate VMs on shutdown to avoid "zombie VM" processes.
430  */
431 void
432 vmm_shutdown(void)
433 {
434 	struct vm_terminate_params vtp;
435 	struct vmd_vm *vm, *vm_next;
436 
437 	TAILQ_FOREACH_SAFE(vm, env->vmd_vms, vm_entry, vm_next) {
438 		vtp.vtp_vm_id = vm_vmid2id(vm->vm_vmid, vm);
439 
440 		/* XXX suspend or request graceful shutdown */
441 		(void)terminate_vm(&vtp);
442 		vm_remove(vm, __func__);
443 	}
444 }
445 
446 /*
447  * vmm_pipe
448  *
449  * Create a new imsg control channel between vmm parent and a VM
450  * (can be called on both sides).
451  */
452 int
453 vmm_pipe(struct vmd_vm *vm, int fd, void (*cb)(int, short, void *))
454 {
455 	struct imsgev	*iev = &vm->vm_iev;
456 
457 	if (fcntl(fd, F_SETFL, O_NONBLOCK) == -1) {
458 		log_warn("failed to set nonblocking mode on vm pipe");
459 		return (-1);
460 	}
461 
462 	imsg_init(&iev->ibuf, fd);
463 	iev->handler = cb;
464 	iev->data = vm;
465 	imsg_event_add(iev);
466 
467 	return (0);
468 }
469 
470 /*
471  * vmm_dispatch_vm
472  *
473  * imsg callback for messages that are received from a VM child process.
474  */
475 void
476 vmm_dispatch_vm(int fd, short event, void *arg)
477 {
478 	struct vmd_vm		*vm = arg;
479 	struct vmop_result	 vmr;
480 	struct imsgev		*iev = &vm->vm_iev;
481 	struct imsgbuf		*ibuf = &iev->ibuf;
482 	struct imsg		 imsg;
483 	ssize_t			 n;
484 	unsigned int		 i;
485 
486 	if (event & EV_READ) {
487 		if ((n = imsg_read(ibuf)) == -1 && errno != EAGAIN)
488 			fatal("%s: imsg_read", __func__);
489 		if (n == 0) {
490 			/* This pipe is dead, so remove the event handler */
491 			event_del(&iev->ev);
492 			return;
493 		}
494 	}
495 
496 	if (event & EV_WRITE) {
497 		if ((n = msgbuf_write(&ibuf->w)) == -1 && errno != EAGAIN)
498 			fatal("%s: msgbuf_write fd %d", __func__, ibuf->fd);
499 		if (n == 0) {
500 			/* This pipe is dead, so remove the event handler */
501 			event_del(&iev->ev);
502 			return;
503 		}
504 	}
505 
506 	for (;;) {
507 		if ((n = imsg_get(ibuf, &imsg)) == -1)
508 			fatal("%s: imsg_get", __func__);
509 		if (n == 0)
510 			break;
511 
512 		DPRINTF("%s: got imsg %d from %s",
513 		    __func__, imsg.hdr.type,
514 		    vm->vm_params.vmc_params.vcp_name);
515 
516 		switch (imsg.hdr.type) {
517 		case IMSG_VMDOP_VM_SHUTDOWN:
518 			vm->vm_state |= VM_STATE_SHUTDOWN;
519 			break;
520 		case IMSG_VMDOP_VM_REBOOT:
521 			vm->vm_state &= ~VM_STATE_SHUTDOWN;
522 			break;
523 		case IMSG_VMDOP_SEND_VM_RESPONSE:
524 			IMSG_SIZE_CHECK(&imsg, &vmr);
525 		case IMSG_VMDOP_PAUSE_VM_RESPONSE:
526 		case IMSG_VMDOP_UNPAUSE_VM_RESPONSE:
527 			for (i = 0; i < nitems(procs); i++) {
528 				if (procs[i].p_id == PROC_PARENT) {
529 					proc_forward_imsg(procs[i].p_ps,
530 					    &imsg, PROC_PARENT, -1);
531 					break;
532 				}
533 			}
534 			break;
535 
536 		default:
537 			fatalx("%s: got invalid imsg %d from %s",
538 			    __func__, imsg.hdr.type,
539 			    vm->vm_params.vmc_params.vcp_name);
540 		}
541 		imsg_free(&imsg);
542 	}
543 	imsg_event_add(iev);
544 }
545 
546 /*
547  * terminate_vm
548  *
549  * Requests vmm(4) to terminate the VM whose ID is provided in the
550  * supplied vm_terminate_params structure (vtp->vtp_vm_id)
551  *
552  * Parameters
553  *  vtp: vm_terminate_params struct containing the ID of the VM to terminate
554  *
555  * Return values:
556  *  0: success
557  *  !0: ioctl to vmm(4) failed (eg, ENOENT if the supplied VM is not valid)
558  */
559 int
560 terminate_vm(struct vm_terminate_params *vtp)
561 {
562 	if (ioctl(env->vmd_fd, VMM_IOC_TERM, vtp) == -1)
563 		return (errno);
564 
565 	return (0);
566 }
567 
568 /*
569  * opentap
570  *
571  * Opens the next available tap device, up to MAX_TAP.
572  *
573  * Parameters
574  *  ifname: a buffer of at least IF_NAMESIZE bytes.
575  *
576  * Returns a file descriptor to the tap node opened, or -1 if no tap
577  * devices were available.
578  */
579 int
580 opentap(char *ifname)
581 {
582 	int i, fd;
583 	char path[PATH_MAX];
584 
585 	for (i = 0; i < MAX_TAP; i++) {
586 		snprintf(path, PATH_MAX, "/dev/tap%d", i);
587 		fd = open(path, O_RDWR | O_NONBLOCK);
588 		if (fd != -1) {
589 			snprintf(ifname, IF_NAMESIZE, "tap%d", i);
590 			return (fd);
591 		}
592 	}
593 	strlcpy(ifname, "tap", IF_NAMESIZE);
594 
595 	return (-1);
596 }
597 
598 /*
599  * vmm_start_vm
600  *
601  * Prepares and forks a new VM process.
602  *
603  * Parameters:
604  *  imsg: The VM data structure that is including the VM create parameters.
605  *  id: Returns the VM id as reported by the kernel and obtained from the VM.
606  *  pid: Returns the VM pid to the parent.
607  *
608  * Return values:
609  *  0: success
610  *  !0: failure - typically an errno indicating the source of the failure
611  */
612 int
613 vmm_start_vm(struct imsg *imsg, uint32_t *id, pid_t *pid)
614 {
615 	struct vm_create_params	*vcp;
616 	struct vmd_vm		*vm;
617 	int			 ret = EINVAL;
618 	int			 fds[2];
619 	pid_t			 vm_pid;
620 	size_t			 i, j, sz;
621 
622 	if ((vm = vm_getbyvmid(imsg->hdr.peerid)) == NULL) {
623 		log_warnx("%s: can't find vm", __func__);
624 		ret = ENOENT;
625 		goto err;
626 	}
627 	vcp = &vm->vm_params.vmc_params;
628 
629 	if (!(vm->vm_state & VM_STATE_RECEIVED)) {
630 		if ((vm->vm_tty = imsg->fd) == -1) {
631 			log_warnx("%s: can't get tty", __func__);
632 			goto err;
633 		}
634 	}
635 
636 	if (socketpair(AF_UNIX, SOCK_STREAM, PF_UNSPEC, fds) == -1)
637 		fatal("socketpair");
638 
639 	/* Start child vmd for this VM (fork, chroot, drop privs) */
640 	vm_pid = fork();
641 
642 	/* Start child failed? - cleanup and leave */
643 	if (vm_pid == -1) {
644 		log_warnx("%s: start child failed", __func__);
645 		ret = EIO;
646 		goto err;
647 	}
648 
649 	if (vm_pid > 0) {
650 		/* Parent */
651 		vm->vm_pid = vm_pid;
652 		close(fds[1]);
653 
654 		for (i = 0 ; i < vcp->vcp_ndisks; i++) {
655 			for (j = 0; j < VM_MAX_BASE_PER_DISK; j++) {
656 				if (vm->vm_disks[i][j] != -1)
657 					close(vm->vm_disks[i][j]);
658 				vm->vm_disks[i][j] = -1;
659 			}
660 		}
661 		for (i = 0 ; i < vcp->vcp_nnics; i++) {
662 			close(vm->vm_ifs[i].vif_fd);
663 			vm->vm_ifs[i].vif_fd = -1;
664 		}
665 		if (vm->vm_kernel != -1) {
666 			close(vm->vm_kernel);
667 			vm->vm_kernel = -1;
668 		}
669 		if (vm->vm_cdrom != -1) {
670 			close(vm->vm_cdrom);
671 			vm->vm_cdrom = -1;
672 		}
673 		if (vm->vm_tty != -1) {
674 			close(vm->vm_tty);
675 			vm->vm_tty = -1;
676 		}
677 
678 		/* Read back the kernel-generated vm id from the child */
679 		sz = atomicio(read, fds[0], &vcp->vcp_id, sizeof(vcp->vcp_id));
680 		if (sz != sizeof(vcp->vcp_id)) {
681 			log_debug("%s: failed to receive vm id from vm %s",
682 			    __func__, vcp->vcp_name);
683 			/* vmd could not allocate memory for the vm. */
684 			ret = ENOMEM;
685 			goto err;
686 		}
687 
688 		if (vcp->vcp_id == 0)
689 			goto err;
690 
691 		*id = vcp->vcp_id;
692 		*pid = vm->vm_pid;
693 
694 		if (vmm_pipe(vm, fds[0], vmm_dispatch_vm) == -1)
695 			fatal("setup vm pipe");
696 
697 		return (0);
698 	} else {
699 		/* Child */
700 		close(fds[0]);
701 		close(PROC_PARENT_SOCK_FILENO);
702 
703 		ret = start_vm(vm, fds[1]);
704 
705 		_exit(ret);
706 	}
707 
708 	return (0);
709 
710  err:
711 	vm_remove(vm, __func__);
712 
713 	return (ret);
714 }
715 
716 /*
717  * get_info_vm
718  *
719  * Returns a list of VMs known to vmm(4).
720  *
721  * Parameters:
722  *  ps: the privsep context.
723  *  imsg: the received imsg including the peer id.
724  *  terminate: terminate the listed vm.
725  *
726  * Return values:
727  *  0: success
728  *  !0: failure (eg, ENOMEM, EIO or another error code from vmm(4) ioctl)
729  */
730 int
731 get_info_vm(struct privsep *ps, struct imsg *imsg, int terminate)
732 {
733 	int ret;
734 	size_t ct, i;
735 	struct vm_info_params vip;
736 	struct vm_info_result *info;
737 	struct vm_terminate_params vtp;
738 	struct vmop_info_result vir;
739 
740 	/*
741 	 * We issue the VMM_IOC_INFO ioctl twice, once with an input
742 	 * buffer size of 0, which results in vmm(4) returning the
743 	 * number of bytes required back to us in vip.vip_size,
744 	 * and then we call it again after malloc'ing the required
745 	 * number of bytes.
746 	 *
747 	 * It is possible that we could fail a second time (e.g. if
748 	 * another VM was created in the instant between the two
749 	 * ioctls, but in that case the caller can just try again
750 	 * as vmm(4) will return a zero-sized list in that case.
751 	 */
752 	vip.vip_size = 0;
753 	info = NULL;
754 	ret = 0;
755 	memset(&vir, 0, sizeof(vir));
756 
757 	/* First ioctl to see how many bytes needed (vip.vip_size) */
758 	if (ioctl(env->vmd_fd, VMM_IOC_INFO, &vip) == -1)
759 		return (errno);
760 
761 	if (vip.vip_info_ct != 0)
762 		return (EIO);
763 
764 	info = malloc(vip.vip_size);
765 	if (info == NULL)
766 		return (ENOMEM);
767 
768 	/* Second ioctl to get the actual list */
769 	vip.vip_info = info;
770 	if (ioctl(env->vmd_fd, VMM_IOC_INFO, &vip) == -1) {
771 		ret = errno;
772 		free(info);
773 		return (ret);
774 	}
775 
776 	/* Return info */
777 	ct = vip.vip_size / sizeof(struct vm_info_result);
778 	for (i = 0; i < ct; i++) {
779 		if (terminate) {
780 			vtp.vtp_vm_id = info[i].vir_id;
781 			if ((ret = terminate_vm(&vtp)) != 0)
782 				break;
783 			log_debug("%s: terminated vm %s (id %d)", __func__,
784 			    info[i].vir_name, info[i].vir_id);
785 			continue;
786 		}
787 		memcpy(&vir.vir_info, &info[i], sizeof(vir.vir_info));
788 		vir.vir_info.vir_id = vm_id2vmid(info[i].vir_id, NULL);
789 		if (proc_compose_imsg(ps, PROC_PARENT, -1,
790 		    IMSG_VMDOP_GET_INFO_VM_DATA, imsg->hdr.peerid, -1,
791 		    &vir, sizeof(vir)) == -1) {
792 			ret = EIO;
793 			break;
794 		}
795 	}
796 	free(info);
797 
798 	return (ret);
799 }
800