xref: /openbsd-src/usr.sbin/vmd/config.c (revision 65bbee46cad7861cd5a570f338df9e976422e3ab)
1*65bbee46Sjsg /*	$OpenBSD: config.c,v 1.76 2024/09/26 01:45:13 jsg Exp $	*/
2f0bbd60cSreyk 
3f0bbd60cSreyk /*
4f0bbd60cSreyk  * Copyright (c) 2015 Reyk Floeter <reyk@openbsd.org>
5f0bbd60cSreyk  *
6f0bbd60cSreyk  * Permission to use, copy, modify, and distribute this software for any
7f0bbd60cSreyk  * purpose with or without fee is hereby granted, provided that the above
8f0bbd60cSreyk  * copyright notice and this permission notice appear in all copies.
9f0bbd60cSreyk  *
10f0bbd60cSreyk  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11f0bbd60cSreyk  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12f0bbd60cSreyk  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13f0bbd60cSreyk  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14f0bbd60cSreyk  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15f0bbd60cSreyk  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16f0bbd60cSreyk  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17f0bbd60cSreyk  */
18f0bbd60cSreyk 
19f0bbd60cSreyk #include <sys/types.h>
20f0bbd60cSreyk #include <sys/queue.h>
21f0bbd60cSreyk #include <sys/time.h>
225921535cSreyk 
235921535cSreyk #include <net/if.h>
24f0bbd60cSreyk 
25f0bbd60cSreyk #include <stdio.h>
26f0bbd60cSreyk #include <stdlib.h>
27f0bbd60cSreyk #include <unistd.h>
28f0bbd60cSreyk #include <limits.h>
29f0bbd60cSreyk #include <string.h>
30f0bbd60cSreyk #include <fcntl.h>
3106071ef0Sreyk #include <errno.h>
32f0bbd60cSreyk #include <imsg.h>
33f0bbd60cSreyk 
34f0bbd60cSreyk #include "proc.h"
35f0bbd60cSreyk #include "vmd.h"
36f0bbd60cSreyk 
37789e0822Sreyk /* Supported bridge types */
3840d1acf5Ssthen const char *vmd_descsw[] = { "bridge", "veb", NULL };
39789e0822Sreyk 
40723f86d2Sreyk static int	 config_init_localprefix(struct vmd_config *);
41723f86d2Sreyk 
42723f86d2Sreyk static int
43723f86d2Sreyk config_init_localprefix(struct vmd_config *cfg)
44723f86d2Sreyk {
452272e586Sdv 	if (parse_prefix4(VMD_DHCP_PREFIX, &cfg->cfg_localprefix, NULL) == -1)
46723f86d2Sreyk 		return (-1);
47723f86d2Sreyk 
48723f86d2Sreyk 	/* IPv6 is disabled by default */
49723f86d2Sreyk 	cfg->cfg_flags &= ~VMD_CFG_INET6;
50723f86d2Sreyk 
51723f86d2Sreyk 	/* Generate random IPv6 prefix only once */
52723f86d2Sreyk 	if (cfg->cfg_flags & VMD_CFG_AUTOINET6)
53723f86d2Sreyk 		return (0);
542272e586Sdv 	if (parse_prefix6(VMD_ULA_PREFIX, &cfg->cfg_localprefix, NULL) == -1)
55723f86d2Sreyk 		return (-1);
562272e586Sdv 
57723f86d2Sreyk 	/* Randomize the 56 bits "Global ID" and "Subnet ID" */
582272e586Sdv 	arc4random_buf(&cfg->cfg_localprefix.lp_in6.s6_addr[1], 7);
59723f86d2Sreyk 	cfg->cfg_flags |= VMD_CFG_AUTOINET6;
60723f86d2Sreyk 
61723f86d2Sreyk 	return (0);
62723f86d2Sreyk }
63723f86d2Sreyk 
64f0bbd60cSreyk int
65f0bbd60cSreyk config_init(struct vmd *env)
66f0bbd60cSreyk {
67f0bbd60cSreyk 	struct privsep		*ps = &env->vmd_ps;
68008065a5Sreyk 	unsigned int		 what;
69f0bbd60cSreyk 
70f0bbd60cSreyk 	/* Global configuration */
71f0bbd60cSreyk 	ps->ps_what[PROC_PARENT] = CONFIG_ALL;
72f0bbd60cSreyk 	ps->ps_what[PROC_VMM] = CONFIG_VMS;
73c48cfcf4Sreyk 
74723f86d2Sreyk 	/* Local prefix */
75723f86d2Sreyk 	if (config_init_localprefix(&env->vmd_cfg) == -1)
76c48cfcf4Sreyk 		return (-1);
77f0bbd60cSreyk 
78f0bbd60cSreyk 	/* Other configuration */
79f0bbd60cSreyk 	what = ps->ps_what[privsep_process];
80f0bbd60cSreyk 	if (what & CONFIG_VMS) {
81f0bbd60cSreyk 		if ((env->vmd_vms = calloc(1, sizeof(*env->vmd_vms))) == NULL)
82f0bbd60cSreyk 			return (-1);
83b503c8f5Sori 		if ((env->vmd_known = calloc(1, sizeof(*env->vmd_known))) == NULL)
84b503c8f5Sori 			return (-1);
85f0bbd60cSreyk 		TAILQ_INIT(env->vmd_vms);
86b503c8f5Sori 		TAILQ_INIT(env->vmd_known);
87f0bbd60cSreyk 	}
88789e0822Sreyk 	if (what & CONFIG_SWITCHES) {
89789e0822Sreyk 		if ((env->vmd_switches = calloc(1,
90789e0822Sreyk 		    sizeof(*env->vmd_switches))) == NULL)
91789e0822Sreyk 			return (-1);
92789e0822Sreyk 		TAILQ_INIT(env->vmd_switches);
93789e0822Sreyk 	}
94f0bbd60cSreyk 
95f0bbd60cSreyk 	return (0);
96f0bbd60cSreyk }
97f0bbd60cSreyk 
98f0bbd60cSreyk void
99008065a5Sreyk config_purge(struct vmd *env, unsigned int reset)
100f0bbd60cSreyk {
101f0bbd60cSreyk 	struct privsep		*ps = &env->vmd_ps;
102b503c8f5Sori 	struct name2id		*n2i;
103f0bbd60cSreyk 	struct vmd_vm		*vm;
104789e0822Sreyk 	struct vmd_switch	*vsw;
105008065a5Sreyk 	unsigned int		 what;
106f0bbd60cSreyk 
107ddadf993Sreyk 	DPRINTF("%s: %s purging vms and switches",
108091d684bSreyk 	    __func__, ps->ps_title[privsep_process]);
109ddadf993Sreyk 
110c48cfcf4Sreyk 	/* Reset global configuration (prefix was verified before) */
111723f86d2Sreyk 	config_init_localprefix(&env->vmd_cfg);
112c48cfcf4Sreyk 
113c48cfcf4Sreyk 	/* Reset other configuration */
114f0bbd60cSreyk 	what = ps->ps_what[privsep_process] & reset;
115f0bbd60cSreyk 	if (what & CONFIG_VMS && env->vmd_vms != NULL) {
116e0994759Smlarkin 		while ((vm = TAILQ_FIRST(env->vmd_vms)) != NULL) {
117ddadf993Sreyk 			vm_remove(vm, __func__);
118e0994759Smlarkin 		}
119b503c8f5Sori 		while ((n2i = TAILQ_FIRST(env->vmd_known)) != NULL) {
120b503c8f5Sori 			TAILQ_REMOVE(env->vmd_known, n2i, entry);
121b503c8f5Sori 			free(n2i);
122b503c8f5Sori 		}
123789e0822Sreyk 		env->vmd_nvm = 0;
124789e0822Sreyk 	}
125789e0822Sreyk 	if (what & CONFIG_SWITCHES && env->vmd_switches != NULL) {
126789e0822Sreyk 		while ((vsw = TAILQ_FIRST(env->vmd_switches)) != NULL)
127789e0822Sreyk 			switch_remove(vsw);
128789e0822Sreyk 		env->vmd_nswitches = 0;
129f0bbd60cSreyk 	}
130f0bbd60cSreyk }
131f0bbd60cSreyk 
132f0bbd60cSreyk int
133c48cfcf4Sreyk config_setconfig(struct vmd *env)
134c48cfcf4Sreyk {
135c48cfcf4Sreyk 	struct privsep	*ps = &env->vmd_ps;
136c48cfcf4Sreyk 	unsigned int	 id;
137c48cfcf4Sreyk 
138ddadf993Sreyk 	DPRINTF("%s: setting config", __func__);
139ddadf993Sreyk 
140c48cfcf4Sreyk 	for (id = 0; id < PROC_MAX; id++) {
141c48cfcf4Sreyk 		if (id == privsep_process)
142c48cfcf4Sreyk 			continue;
143c48cfcf4Sreyk 		proc_compose(ps, id, IMSG_VMDOP_CONFIG, &env->vmd_cfg,
144c48cfcf4Sreyk 		    sizeof(env->vmd_cfg));
145c48cfcf4Sreyk 	}
146c48cfcf4Sreyk 
147c48cfcf4Sreyk 	return (0);
148c48cfcf4Sreyk }
149c48cfcf4Sreyk 
150c48cfcf4Sreyk int
151c48cfcf4Sreyk config_getconfig(struct vmd *env, struct imsg *imsg)
152c48cfcf4Sreyk {
153091d684bSreyk 	struct privsep	*ps = &env->vmd_ps;
154091d684bSreyk 
155091d684bSreyk 	log_debug("%s: %s retrieving config",
156091d684bSreyk 	    __func__, ps->ps_title[privsep_process]);
157091d684bSreyk 
158c48cfcf4Sreyk 	IMSG_SIZE_CHECK(imsg, &env->vmd_cfg);
159c48cfcf4Sreyk 	memcpy(&env->vmd_cfg, imsg->data, sizeof(env->vmd_cfg));
160c48cfcf4Sreyk 
161c48cfcf4Sreyk 	return (0);
162c48cfcf4Sreyk }
163c48cfcf4Sreyk 
164c48cfcf4Sreyk int
165008065a5Sreyk config_setreset(struct vmd *env, unsigned int reset)
166f0bbd60cSreyk {
167f0bbd60cSreyk 	struct privsep	*ps = &env->vmd_ps;
168f0bbd60cSreyk 	unsigned int	 id;
169f0bbd60cSreyk 
170ddadf993Sreyk 	DPRINTF("%s: resetting state", __func__);
171ddadf993Sreyk 
172f0bbd60cSreyk 	for (id = 0; id < PROC_MAX; id++) {
173f0bbd60cSreyk 		if ((reset & ps->ps_what[id]) == 0 ||
174f0bbd60cSreyk 		    id == privsep_process)
175f0bbd60cSreyk 			continue;
176f0bbd60cSreyk 		proc_compose(ps, id, IMSG_CTL_RESET, &reset, sizeof(reset));
177f0bbd60cSreyk 	}
178f0bbd60cSreyk 
179f0bbd60cSreyk 	return (0);
180f0bbd60cSreyk }
181f0bbd60cSreyk 
182f0bbd60cSreyk int
183f0bbd60cSreyk config_getreset(struct vmd *env, struct imsg *imsg)
184f0bbd60cSreyk {
185f0bbd60cSreyk 	unsigned int	 mode;
186f0bbd60cSreyk 
187f0bbd60cSreyk 	IMSG_SIZE_CHECK(imsg, &mode);
188f0bbd60cSreyk 	memcpy(&mode, imsg->data, sizeof(mode));
189f0bbd60cSreyk 
190091d684bSreyk 	log_debug("%s: %s resetting state",
191ddadf993Sreyk 	    __func__, env->vmd_ps.ps_title[privsep_process]);
192ddadf993Sreyk 
193f0bbd60cSreyk 	config_purge(env, mode);
194f0bbd60cSreyk 
195f0bbd60cSreyk 	return (0);
196f0bbd60cSreyk }
197f0bbd60cSreyk 
1982d545756Sdv /*
1992d545756Sdv  * config_setvm
2002d545756Sdv  *
2012d545756Sdv  * Configure a vm, opening any required file descriptors.
2022d545756Sdv  *
2032d545756Sdv  * Returns 0 on success, error code on failure.
2042d545756Sdv  */
205f0bbd60cSreyk int
206e5d5b350Sreyk config_setvm(struct privsep *ps, struct vmd_vm *vm, uint32_t peerid, uid_t uid)
207ea9c30d9Sedd {
208d489aa7eSdv 	int diskfds[VM_MAX_DISKS_PER_VM][VM_MAX_BASE_PER_DISK];
209ea9c30d9Sedd 	struct vmd_if		*vif;
210ea9c30d9Sedd 	struct vmop_create_params *vmc = &vm->vm_params;
211ea9c30d9Sedd 	struct vm_create_params	*vcp = &vmc->vmc_params;
21273613953Sreyk 	unsigned int		 i, j;
2132d545756Sdv 	int			 fd = -1, cdromfd = -1, kernfd = -1;
21473613953Sreyk 	int			*tapfds = NULL;
2152d545756Sdv 	int			 n = 0, aflags, oflags, ret = -1;
216ea9c30d9Sedd 	char			 ifname[IF_NAMESIZE], *s;
2172d545756Sdv 	char			 path[PATH_MAX], base[PATH_MAX];
218ea9c30d9Sedd 	unsigned int		 unit;
21975cf143aSreyk 	struct timeval		 tv, rate, since_last;
22097f33f1dSdv 	struct vmop_addr_req	 var;
2217933afb4Sdv 	size_t			 bytes = 0;
222ea9c30d9Sedd 
22319700f36Sjasper 	if (vm->vm_state & VM_STATE_RUNNING) {
2246f040439Sreyk 		log_warnx("%s: vm is already running", __func__);
2252d545756Sdv 		return (EALREADY);
226382b7e55Sreyk 	}
227382b7e55Sreyk 
22875cf143aSreyk 	/*
22975cf143aSreyk 	 * Rate-limit the VM so that it cannot restart in a loop:
23075cf143aSreyk 	 * if the VM restarts after less than VM_START_RATE_SEC seconds,
23175cf143aSreyk 	 * we increment the limit counter.  After VM_START_RATE_LIMIT
23275cf143aSreyk 	 * of suchs fast reboots the VM is stopped.
23375cf143aSreyk 	 */
23475cf143aSreyk 	getmonotime(&tv);
23575cf143aSreyk 	if (vm->vm_start_tv.tv_sec) {
23675cf143aSreyk 		timersub(&tv, &vm->vm_start_tv, &since_last);
23775cf143aSreyk 
23875cf143aSreyk 		rate.tv_sec = VM_START_RATE_SEC;
23975cf143aSreyk 		rate.tv_usec = 0;
24075cf143aSreyk 		if (timercmp(&since_last, &rate, <))
24175cf143aSreyk 			vm->vm_start_limit++;
24275cf143aSreyk 		else {
24375cf143aSreyk 			/* Reset counter */
24475cf143aSreyk 			vm->vm_start_limit = 0;
24575cf143aSreyk 		}
24675cf143aSreyk 
24775cf143aSreyk 		log_debug("%s: vm %u restarted after %lld.%ld seconds,"
24875cf143aSreyk 		    " limit %d/%d", __func__, vcp->vcp_id, since_last.tv_sec,
24975cf143aSreyk 		    since_last.tv_usec, vm->vm_start_limit,
25075cf143aSreyk 		    VM_START_RATE_LIMIT);
25175cf143aSreyk 
25275cf143aSreyk 		if (vm->vm_start_limit >= VM_START_RATE_LIMIT) {
25375cf143aSreyk 			log_warnx("%s: vm %u restarted too quickly",
25475cf143aSreyk 			    __func__, vcp->vcp_id);
2552d545756Sdv 			return (EPERM);
25675cf143aSreyk 		}
25775cf143aSreyk 	}
25875cf143aSreyk 	vm->vm_start_tv = tv;
25975cf143aSreyk 
260d489aa7eSdv 	for (i = 0; i < VM_MAX_DISKS_PER_VM; i++)
26173613953Sreyk 		for (j = 0; j < VM_MAX_BASE_PER_DISK; j++)
26273613953Sreyk 			diskfds[i][j] = -1;
263c94d559dSstefan 
26473a98491Sdv 	tapfds = reallocarray(NULL, vmc->vmc_nnics, sizeof(*tapfds));
265c94d559dSstefan 	if (tapfds == NULL) {
2662d545756Sdv 		ret = errno;
2676f040439Sreyk 		log_warn("%s: can't allocate tap fds", __func__);
2682d545756Sdv 		return (ret);
269c94d559dSstefan 	}
27073a98491Sdv 	for (i = 0; i < vmc->vmc_nnics; i++)
271c94d559dSstefan 		tapfds[i] = -1;
272c94d559dSstefan 
27352956b0dSreyk 	vm->vm_peerid = peerid;
274e5d5b350Sreyk 	vm->vm_uid = uid;
275f0bbd60cSreyk 
2762d545756Sdv 	/*
2772d545756Sdv 	 * From here onward, all failures need cleanup and use goto fail
2782d545756Sdv 	 */
279b848b186Sdv 	if (!(vm->vm_state & VM_STATE_RECEIVED) && vm->vm_kernel == -1) {
280b848b186Sdv 		if (vm->vm_kernel_path != NULL) {
28182fb2f0aSreyk 			/* Open external kernel for child */
282b3bc6112Sdv 			kernfd = open(vm->vm_kernel_path, O_RDONLY | O_CLOEXEC);
283b848b186Sdv 			if (kernfd == -1) {
2842d545756Sdv 				ret = errno;
28567de8c6dSreyk 				log_warn("%s: can't open kernel or BIOS "
286b848b186Sdv 				    "boot image %s", __func__,
287b848b186Sdv 				    vm->vm_kernel_path);
28838d0e0c3Sreyk 				goto fail;
28938d0e0c3Sreyk 			}
29065329a6fSreyk 		}
29138d0e0c3Sreyk 
29238d0e0c3Sreyk 		/*
293eed20f3bSpd 		 * Try to open the default BIOS image if no kernel/BIOS has been
294eed20f3bSpd 		 * specified.  The BIOS is an external firmware file that is
295eed20f3bSpd 		 * typically distributed separately due to an incompatible
296eed20f3bSpd 		 * license.
29738d0e0c3Sreyk 		 */
298b848b186Sdv 		if (kernfd == -1) {
299b3bc6112Sdv 			if ((kernfd = open(VM_DEFAULT_BIOS,
300b3bc6112Sdv 			    O_RDONLY | O_CLOEXEC)) == -1) {
30107e1a8caSori 				log_warn("can't open %s", VM_DEFAULT_BIOS);
3022d545756Sdv 				ret = VMD_BIOS_MISSING;
303f0bbd60cSreyk 				goto fail;
304f0bbd60cSreyk 			}
305b848b186Sdv 		}
306a882c86aSreyk 
307a13de4d1Skn 		if (vm_checkaccess(kernfd,
308a882c86aSreyk 		    vmc->vmc_checkaccess & VMOP_CREATE_KERNEL,
309a882c86aSreyk 		    uid, R_OK) == -1) {
310b848b186Sdv 			log_warnx("vm \"%s\" no read access to kernel "
311b848b186Sdv 			    "%s", vcp->vcp_name, vm->vm_kernel_path);
3122d545756Sdv 			ret = EPERM;
313a882c86aSreyk 			goto fail;
314a882c86aSreyk 		}
315b848b186Sdv 
316b848b186Sdv 		vm->vm_kernel = kernfd;
317b848b186Sdv 		vmc->vmc_kernel = kernfd;
318eed20f3bSpd 	}
319f0bbd60cSreyk 
32095ab188fSccardenas 	/* Open CDROM image for child */
32173a98491Sdv 	if (strlen(vmc->vmc_cdrom)) {
32295ab188fSccardenas 		/* Stat cdrom to ensure it is a regular file */
32395ab188fSccardenas 		if ((cdromfd =
32473a98491Sdv 		    open(vmc->vmc_cdrom, O_RDONLY)) == -1) {
32573a98491Sdv 			log_warn("can't open cdrom %s", vmc->vmc_cdrom);
3262d545756Sdv 			ret = VMD_CDROM_MISSING;
32795ab188fSccardenas 			goto fail;
32895ab188fSccardenas 		}
329a882c86aSreyk 
330a882c86aSreyk 		if (vm_checkaccess(cdromfd,
331a882c86aSreyk 		    vmc->vmc_checkaccess & VMOP_CREATE_CDROM,
332a882c86aSreyk 		    uid, R_OK) == -1) {
3333495a04bSreyk 			log_warnx("vm \"%s\" no read access to cdrom %s",
33473a98491Sdv 			    vcp->vcp_name, vmc->vmc_cdrom);
3352d545756Sdv 			ret = EPERM;
33695ab188fSccardenas 			goto fail;
33795ab188fSccardenas 		}
33895ab188fSccardenas 	}
33995ab188fSccardenas 
340b3bc6112Sdv 	/*
341b3bc6112Sdv 	 * Open disk images for child. Don't set O_CLOEXEC as these must be
342b3bc6112Sdv 	 * explicitly closed by the vm process during virtio subprocess launch.
343b3bc6112Sdv 	 */
34473a98491Sdv 	for (i = 0 ; i < vmc->vmc_ndisks; i++) {
34573a98491Sdv 		if (strlcpy(path, vmc->vmc_disks[i], sizeof(path))
34673613953Sreyk 		   >= sizeof(path))
34773a98491Sdv 			log_warnx("disk path %s too long", vmc->vmc_disks[i]);
34873613953Sreyk 		memset(vmc->vmc_diskbases, 0, sizeof(vmc->vmc_diskbases));
34973613953Sreyk 		oflags = O_RDWR | O_EXLOCK | O_NONBLOCK;
35073613953Sreyk 		aflags = R_OK | W_OK;
35173613953Sreyk 		for (j = 0; j < VM_MAX_BASE_PER_DISK; j++) {
35231749646Smlarkin 			/* Stat disk[i] to ensure it is a regular file */
35373613953Sreyk 			if ((diskfds[i][j] = open(path, oflags)) == -1) {
35407e1a8caSori 				log_warn("can't open disk %s",
35573a98491Sdv 				    vmc->vmc_disks[i]);
3562d545756Sdv 				ret = VMD_DISK_MISSING;
35731749646Smlarkin 				goto fail;
35831749646Smlarkin 			}
359a882c86aSreyk 
36073613953Sreyk 			if (vm_checkaccess(diskfds[i][j],
361a882c86aSreyk 			    vmc->vmc_checkaccess & VMOP_CREATE_DISK,
36273613953Sreyk 			    uid, aflags) == -1) {
36373613953Sreyk 				log_warnx("vm \"%s\" unable to access "
36473613953Sreyk 				    "disk %s", vcp->vcp_name, path);
365a882c86aSreyk 				errno = EPERM;
36631749646Smlarkin 				goto fail;
36731749646Smlarkin 			}
36873613953Sreyk 
36973613953Sreyk 			/*
37073613953Sreyk 			 * Clear the write and exclusive flags for base images.
37173613953Sreyk 			 * All writes should go to the top image, allowing them
37273613953Sreyk 			 * to be shared.
37373613953Sreyk 			 */
37473613953Sreyk 			oflags = O_RDONLY | O_NONBLOCK;
37573613953Sreyk 			aflags = R_OK;
376a7eff89fSreyk 			n = virtio_get_base(diskfds[i][j], base, sizeof(base),
3774d2a1fb2Sreyk 			    vmc->vmc_disktypes[i], path);
37873613953Sreyk 			if (n == 0)
37973613953Sreyk 				break;
38073613953Sreyk 			if (n == -1) {
38173613953Sreyk 				log_warnx("vm \"%s\" unable to read "
382f6eb3116Smlarkin 				    "base for disk %s", vcp->vcp_name,
38373a98491Sdv 				    vmc->vmc_disks[i]);
38473613953Sreyk 				goto fail;
38573613953Sreyk 			}
386a7eff89fSreyk 			(void)strlcpy(path, base, sizeof(path));
38773613953Sreyk 		}
388f0bbd60cSreyk 	}
389f0bbd60cSreyk 
390789e0822Sreyk 	/* Open network interfaces */
39173a98491Sdv 	for (i = 0 ; i < vmc->vmc_nnics; i++) {
392789e0822Sreyk 		vif = &vm->vm_ifs[i];
393789e0822Sreyk 
394789e0822Sreyk 		/* Check if the user has requested a specific tap(4) */
395789e0822Sreyk 		s = vmc->vmc_ifnames[i];
396789e0822Sreyk 		if (*s != '\0' && strcmp("tap", s) != 0) {
397789e0822Sreyk 			if (priv_getiftype(s, ifname, &unit) == -1 ||
398789e0822Sreyk 			    strcmp(ifname, "tap") != 0) {
3996f040439Sreyk 				log_warnx("%s: invalid tap name %s",
4006f040439Sreyk 				    __func__, s);
4012d545756Sdv 				ret = EINVAL;
402f0bbd60cSreyk 				goto fail;
403f0bbd60cSreyk 			}
404789e0822Sreyk 		} else
405789e0822Sreyk 			s = NULL;
4065921535cSreyk 
407789e0822Sreyk 		/*
408789e0822Sreyk 		 * Either open the requested tap(4) device or get
409b3bc6112Sdv 		 * the next available one. Don't set O_CLOEXEC as these
410b3bc6112Sdv 		 * should be closed by the vm process during virtio device
411b3bc6112Sdv 		 * launch.
412789e0822Sreyk 		 */
413789e0822Sreyk 		if (s != NULL) {
414789e0822Sreyk 			snprintf(path, PATH_MAX, "/dev/%s", s);
415789e0822Sreyk 			tapfds[i] = open(path, O_RDWR | O_NONBLOCK);
416789e0822Sreyk 		} else {
417789e0822Sreyk 			tapfds[i] = opentap(ifname);
418789e0822Sreyk 			s = ifname;
419789e0822Sreyk 		}
420789e0822Sreyk 		if (tapfds[i] == -1) {
421adb7c8aaSdv 			ret = errno;
422adb7c8aaSdv 			log_warnx("%s: can't open /dev/%s", __func__, s);
423789e0822Sreyk 			goto fail;
424789e0822Sreyk 		}
425789e0822Sreyk 		if ((vif->vif_name = strdup(s)) == NULL) {
4266f040439Sreyk 			log_warn("%s: can't save tap %s", __func__, s);
4275921535cSreyk 			goto fail;
4285921535cSreyk 		}
429789e0822Sreyk 
430789e0822Sreyk 		/* Check if the the interface is attached to a switch */
431789e0822Sreyk 		s = vmc->vmc_ifswitch[i];
432789e0822Sreyk 		if (*s != '\0') {
433789e0822Sreyk 			if ((vif->vif_switch = strdup(s)) == NULL) {
4346f040439Sreyk 				log_warn("%s: can't save switch %s",
4356f040439Sreyk 				    __func__, s);
436789e0822Sreyk 				goto fail;
437789e0822Sreyk 			}
438789e0822Sreyk 		}
439789e0822Sreyk 
4402b519c1fSreyk 		/* Check if the the interface is assigned to a group */
4412b519c1fSreyk 		s = vmc->vmc_ifgroup[i];
4422b519c1fSreyk 		if (*s != '\0') {
4432b519c1fSreyk 			if ((vif->vif_group = strdup(s)) == NULL) {
4446f040439Sreyk 				log_warn("%s: can't save group %s",
4456f040439Sreyk 				    __func__, s);
4462b519c1fSreyk 				goto fail;
4472b519c1fSreyk 			}
4482b519c1fSreyk 		}
4492b519c1fSreyk 
450f418e70cSreyk 		/* non-default rdomain (requires VMIFF_RDOMAIN below) */
451f418e70cSreyk 		vif->vif_rdomain = vmc->vmc_ifrdomain[i];
452f418e70cSreyk 
453789e0822Sreyk 		/* Set the interface status */
4542b2a5f0dSreyk 		vif->vif_flags =
4552b2a5f0dSreyk 		    vmc->vmc_ifflags[i] & (VMIFF_UP|VMIFF_OPTMASK);
456f0bbd60cSreyk 	}
457f0bbd60cSreyk 
458b3bc6112Sdv 	/*
459b3bc6112Sdv 	 * Open TTY. Duplicate the fd before sending so the privileged parent
460b3bc6112Sdv 	 * process can perform permissions cleanup of the pty on vm termination.
461b3bc6112Sdv 	 */
46222647093Sdv 	if (vm->vm_ttyname[0] == '\0') {
463d9589da9Sreyk 		if (vm_opentty(vm) == -1) {
464d9589da9Sreyk 			log_warn("%s: can't open tty %s", __func__,
46522647093Sdv 			    vm->vm_ttyname[0] == '\0' ? "" : vm->vm_ttyname);
466f0bbd60cSreyk 			goto fail;
467f0bbd60cSreyk 		}
468850fcd3fSreyk 	}
469850fcd3fSreyk 	if ((fd = dup(vm->vm_tty)) == -1) {
470850fcd3fSreyk 		log_warn("%s: can't re-open tty %s", __func__, vm->vm_ttyname);
471850fcd3fSreyk 		goto fail;
472850fcd3fSreyk 	}
473f0bbd60cSreyk 
474c94d559dSstefan 	/* Send VM information */
4752d545756Sdv 	/* XXX check proc_compose_imsg return values */
47619700f36Sjasper 	if (vm->vm_state & VM_STATE_RECEIVED)
477eed20f3bSpd 		proc_compose_imsg(ps, PROC_VMM, -1,
478eed20f3bSpd 		    IMSG_VMDOP_RECEIVE_VM_REQUEST, vm->vm_vmid, fd, vmc,
479eed20f3bSpd 		    sizeof(struct vmop_create_params));
480eed20f3bSpd 	else
481c94d559dSstefan 		proc_compose_imsg(ps, PROC_VMM, -1,
482b848b186Sdv 		    IMSG_VMDOP_START_VM_REQUEST, vm->vm_vmid, vm->vm_kernel,
483789e0822Sreyk 		    vmc, sizeof(*vmc));
48495ab188fSccardenas 
48573a98491Sdv 	if (strlen(vmc->vmc_cdrom))
48695ab188fSccardenas 		proc_compose_imsg(ps, PROC_VMM, -1,
48795ab188fSccardenas 		    IMSG_VMDOP_START_VM_CDROM, vm->vm_vmid, cdromfd,
48895ab188fSccardenas 		    NULL, 0);
48995ab188fSccardenas 
49073a98491Sdv 	for (i = 0; i < vmc->vmc_ndisks; i++) {
49173613953Sreyk 		for (j = 0; j < VM_MAX_BASE_PER_DISK; j++) {
49273613953Sreyk 			if (diskfds[i][j] == -1)
49373613953Sreyk 				break;
494c94d559dSstefan 			proc_compose_imsg(ps, PROC_VMM, -1,
49573613953Sreyk 			    IMSG_VMDOP_START_VM_DISK, vm->vm_vmid,
49673613953Sreyk 			    diskfds[i][j], &i, sizeof(i));
49773613953Sreyk 		}
498c94d559dSstefan 	}
49973a98491Sdv 	for (i = 0; i < vmc->vmc_nnics; i++) {
500c94d559dSstefan 		proc_compose_imsg(ps, PROC_VMM, -1,
501c94d559dSstefan 		    IMSG_VMDOP_START_VM_IF, vm->vm_vmid, tapfds[i],
502c94d559dSstefan 		    &i, sizeof(i));
50397f33f1dSdv 
50497f33f1dSdv 		memset(&var, 0, sizeof(var));
50597f33f1dSdv 		var.var_vmid = vm->vm_vmid;
50697f33f1dSdv 		var.var_nic_idx = i;
50797f33f1dSdv 		proc_compose_imsg(ps, PROC_PRIV, -1, IMSG_VMDOP_PRIV_GET_ADDR,
50897f33f1dSdv 		    vm->vm_vmid, dup(tapfds[i]), &var, sizeof(var));
509c94d559dSstefan 	}
510c94d559dSstefan 
51119700f36Sjasper 	if (!(vm->vm_state & VM_STATE_RECEIVED))
512f0bbd60cSreyk 		proc_compose_imsg(ps, PROC_VMM, -1,
5135921535cSreyk 		    IMSG_VMDOP_START_VM_END, vm->vm_vmid, fd, NULL, 0);
514f0bbd60cSreyk 
515c94d559dSstefan 	free(tapfds);
516c94d559dSstefan 
5177933afb4Sdv 	/* Collapse any memranges after the vm was sent to PROC_VMM */
5187933afb4Sdv 	if (vcp->vcp_nmemranges > 0) {
5197933afb4Sdv 		for (i = 0; i < vcp->vcp_nmemranges; i++)
5207933afb4Sdv 			bytes += vcp->vcp_memranges[i].vmr_size;
5217933afb4Sdv 		memset(&vcp->vcp_memranges, 0, sizeof(vcp->vcp_memranges));
5227933afb4Sdv 		vcp->vcp_nmemranges = 0;
5237933afb4Sdv 		vcp->vcp_memranges[0].vmr_size = bytes;
5247933afb4Sdv 	}
52519700f36Sjasper 	vm->vm_state |= VM_STATE_RUNNING;
526f0bbd60cSreyk 	return (0);
527f0bbd60cSreyk 
528f0bbd60cSreyk  fail:
52907e1a8caSori 	log_warnx("failed to start vm %s", vcp->vcp_name);
5306f040439Sreyk 
531b848b186Sdv 	if (vm->vm_kernel != -1)
532c94d559dSstefan 		close(kernfd);
53395ab188fSccardenas 	if (cdromfd != -1)
53495ab188fSccardenas 		close(cdromfd);
53573a98491Sdv 	for (i = 0; i < vmc->vmc_ndisks; i++)
53673613953Sreyk 		for (j = 0; j < VM_MAX_BASE_PER_DISK; j++)
53773613953Sreyk 			if (diskfds[i][j] != -1)
53873613953Sreyk 				close(diskfds[i][j]);
539c94d559dSstefan 	if (tapfds != NULL) {
54073a98491Sdv 		for (i = 0; i < vmc->vmc_nnics; i++)
541c94d559dSstefan 			close(tapfds[i]);
542c94d559dSstefan 		free(tapfds);
543c94d559dSstefan 	}
544c94d559dSstefan 
545649f977fSccardenas 	if (vm->vm_from_config) {
546ddadf993Sreyk 		vm_stop(vm, 0, __func__);
547649f977fSccardenas 	} else {
548ddadf993Sreyk 		vm_remove(vm, __func__);
549649f977fSccardenas 	}
5502d545756Sdv 
5512d545756Sdv 	return (ret);
552f0bbd60cSreyk }
553f0bbd60cSreyk 
554f0bbd60cSreyk int
5554f76ab55Sreyk config_getvm(struct privsep *ps, struct imsg *imsg)
5564f76ab55Sreyk {
5574f76ab55Sreyk 	struct vmop_create_params	 vmc;
558501909c0Smbuhl 	struct vmd_vm			*vm = NULL;
55953027660Sclaudio 	int				 fd;
5604f76ab55Sreyk 
5614f76ab55Sreyk 	IMSG_SIZE_CHECK(imsg, &vmc);
5624f76ab55Sreyk 	memcpy(&vmc, imsg->data, sizeof(vmc));
56353027660Sclaudio 	fd = imsg_get_fd(imsg);
56453027660Sclaudio 	vmc.vmc_kernel = fd;
5654f76ab55Sreyk 
5664f76ab55Sreyk 	errno = 0;
567e5d5b350Sreyk 	if (vm_register(ps, &vmc, &vm, imsg->hdr.peerid, 0) == -1)
5684f76ab55Sreyk 		goto fail;
56982fb2f0aSreyk 
57019700f36Sjasper 	vm->vm_state |= VM_STATE_RUNNING;
5713be9785fSreyk 	vm->vm_peerid = (uint32_t)-1;
57253027660Sclaudio 	vm->vm_kernel = fd;
5734f76ab55Sreyk 	return (0);
5744f76ab55Sreyk 
5754f76ab55Sreyk  fail:
57653027660Sclaudio 	if (fd != -1)
57753027660Sclaudio 		close(fd);
5784f76ab55Sreyk 
579ddadf993Sreyk 	vm_remove(vm, __func__);
5804f76ab55Sreyk 	if (errno == 0)
5814f76ab55Sreyk 		errno = EINVAL;
5824f76ab55Sreyk 
5834f76ab55Sreyk 	return (-1);
5844f76ab55Sreyk }
5854f76ab55Sreyk 
5864f76ab55Sreyk int
587f0bbd60cSreyk config_getdisk(struct privsep *ps, struct imsg *imsg)
588f0bbd60cSreyk {
589f0bbd60cSreyk 	struct vmd_vm	*vm;
59073613953Sreyk 	unsigned int	 n, idx;
59153027660Sclaudio 	int		 fd;
592f0bbd60cSreyk 
5934a0e5604Sreyk 	errno = 0;
5944a0e5604Sreyk 	if ((vm = vm_getbyvmid(imsg->hdr.peerid)) == NULL) {
5954a0e5604Sreyk 		errno = ENOENT;
596f0bbd60cSreyk 		return (-1);
5974a0e5604Sreyk 	}
598f0bbd60cSreyk 
599f0bbd60cSreyk 	IMSG_SIZE_CHECK(imsg, &n);
600f0bbd60cSreyk 	memcpy(&n, imsg->data, sizeof(n));
60153027660Sclaudio 	fd = imsg_get_fd(imsg);
602f0bbd60cSreyk 
60353027660Sclaudio 	if (n >= vm->vm_params.vmc_ndisks || fd == -1) {
604ddadf993Sreyk 		log_warnx("invalid disk id");
6054a0e5604Sreyk 		errno = EINVAL;
606f0bbd60cSreyk 		return (-1);
607f0bbd60cSreyk 	}
60873613953Sreyk 	idx = vm->vm_params.vmc_diskbases[n]++;
60973613953Sreyk 	if (idx >= VM_MAX_BASE_PER_DISK) {
61073613953Sreyk 		log_warnx("too many bases for disk");
61173613953Sreyk 		errno = EINVAL;
61273613953Sreyk 		return (-1);
61373613953Sreyk 	}
61453027660Sclaudio 	vm->vm_disks[n][idx] = fd;
615f0bbd60cSreyk 	return (0);
616f0bbd60cSreyk }
617f0bbd60cSreyk 
618f0bbd60cSreyk int
619f0bbd60cSreyk config_getif(struct privsep *ps, struct imsg *imsg)
620f0bbd60cSreyk {
621f0bbd60cSreyk 	struct vmd_vm	*vm;
622f0bbd60cSreyk 	unsigned int	 n;
62353027660Sclaudio 	int		 fd;
624f0bbd60cSreyk 
6254a0e5604Sreyk 	errno = 0;
6264a0e5604Sreyk 	if ((vm = vm_getbyvmid(imsg->hdr.peerid)) == NULL) {
6274a0e5604Sreyk 		errno = ENOENT;
628f0bbd60cSreyk 		return (-1);
6294a0e5604Sreyk 	}
630f0bbd60cSreyk 
631f0bbd60cSreyk 	IMSG_SIZE_CHECK(imsg, &n);
632f0bbd60cSreyk 	memcpy(&n, imsg->data, sizeof(n));
63353027660Sclaudio 	fd = imsg_get_fd(imsg);
63453027660Sclaudio 
63573a98491Sdv 	if (n >= vm->vm_params.vmc_nnics ||
63653027660Sclaudio 	    vm->vm_ifs[n].vif_fd != -1 || fd == -1) {
637ddadf993Sreyk 		log_warnx("invalid interface id");
6385921535cSreyk 		goto fail;
639f0bbd60cSreyk 	}
64053027660Sclaudio 	vm->vm_ifs[n].vif_fd = fd;
641f0bbd60cSreyk 	return (0);
6425921535cSreyk  fail:
64353027660Sclaudio 	if (fd != -1)
64453027660Sclaudio 		close(fd);
6455921535cSreyk 	errno = EINVAL;
6465921535cSreyk 	return (-1);
647f0bbd60cSreyk }
64895ab188fSccardenas 
64995ab188fSccardenas int
65095ab188fSccardenas config_getcdrom(struct privsep *ps, struct imsg *imsg)
65195ab188fSccardenas {
65295ab188fSccardenas 	struct vmd_vm	*vm;
65353027660Sclaudio 	int		 fd;
65495ab188fSccardenas 
65595ab188fSccardenas 	errno = 0;
65695ab188fSccardenas 	if ((vm = vm_getbyvmid(imsg->hdr.peerid)) == NULL) {
65795ab188fSccardenas 		errno = ENOENT;
65895ab188fSccardenas 		return (-1);
65995ab188fSccardenas 	}
66095ab188fSccardenas 
66153027660Sclaudio 	fd = imsg_get_fd(imsg);
66253027660Sclaudio 	if (fd == -1) {
663ddadf993Sreyk 		log_warnx("invalid cdrom id");
66495ab188fSccardenas 		goto fail;
66595ab188fSccardenas 	}
66695ab188fSccardenas 
66753027660Sclaudio 	vm->vm_cdrom = fd;
66895ab188fSccardenas 	return (0);
66995ab188fSccardenas  fail:
67095ab188fSccardenas 	errno = EINVAL;
67195ab188fSccardenas 	return (-1);
67295ab188fSccardenas }
673