xref: /openbsd-src/usr.sbin/vmd/control.c (revision 1ad61ae0a79a724d2d3ec69e69c8e1d1ff6b53a0)
1 /*	$OpenBSD: control.c,v 1.41 2023/04/28 19:46:42 dv Exp $	*/
2 
3 /*
4  * Copyright (c) 2010-2015 Reyk Floeter <reyk@openbsd.org>
5  * Copyright (c) 2003, 2004 Henning Brauer <henning@openbsd.org>
6  *
7  * Permission to use, copy, modify, and distribute this software for any
8  * purpose with or without fee is hereby granted, provided that the above
9  * copyright notice and this permission notice appear in all copies.
10  *
11  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18  */
19 
20 #include <sys/types.h>
21 #include <sys/queue.h>
22 #include <sys/stat.h>
23 #include <sys/socket.h>
24 #include <sys/un.h>
25 #include <sys/tree.h>
26 
27 #include <net/if.h>
28 
29 #include <errno.h>
30 #include <event.h>
31 #include <fcntl.h>
32 #include <signal.h>
33 #include <stdlib.h>
34 #include <string.h>
35 #include <unistd.h>
36 
37 #include "proc.h"
38 #include "vmd.h"
39 
40 #define	CONTROL_BACKLOG	5
41 
42 struct ctl_connlist ctl_conns = TAILQ_HEAD_INITIALIZER(ctl_conns);
43 
44 struct ctl_notify {
45 	int			ctl_fd;
46 	uint32_t		ctl_vmid;
47 	TAILQ_ENTRY(ctl_notify)	entry;
48 };
49 TAILQ_HEAD(ctl_notify_q, ctl_notify) ctl_notify_q =
50 	TAILQ_HEAD_INITIALIZER(ctl_notify_q);
51 void
52 	 control_accept(int, short, void *);
53 struct ctl_conn
54 	*control_connbyfd(int);
55 void	 control_close(int, struct control_sock *);
56 void	 control_dispatch_imsg(int, short, void *);
57 int	 control_dispatch_vmd(int, struct privsep_proc *, struct imsg *);
58 void	 control_run(struct privsep *, struct privsep_proc *, void *);
59 
60 static struct privsep_proc procs[] = {
61 	{ "parent",	PROC_PARENT,	control_dispatch_vmd }
62 };
63 
64 void
65 control(struct privsep *ps, struct privsep_proc *p)
66 {
67 	proc_run(ps, p, procs, nitems(procs), control_run, NULL);
68 }
69 
70 void
71 control_run(struct privsep *ps, struct privsep_proc *p, void *arg)
72 {
73 	/*
74 	 * pledge in the control process:
75 	 * stdio - for malloc and basic I/O including events.
76 	 * unix - for the control socket.
77 	 * recvfd - for the proc fd exchange.
78 	 * sendfd - for send and receive.
79 	 */
80 	if (pledge("stdio unix recvfd sendfd", NULL) == -1)
81 		fatal("pledge");
82 }
83 
84 int
85 control_dispatch_vmd(int fd, struct privsep_proc *p, struct imsg *imsg)
86 {
87 	struct ctl_conn		*c;
88 	struct ctl_notify	*notify = NULL, *notify_next;
89 	struct privsep		*ps = p->p_ps;
90 	struct vmop_result	 vmr;
91 	int			 waiting = 0;
92 
93 	switch (imsg->hdr.type) {
94 	case IMSG_VMDOP_START_VM_RESPONSE:
95 	case IMSG_VMDOP_PAUSE_VM_RESPONSE:
96 	case IMSG_VMDOP_SEND_VM_RESPONSE:
97 	case IMSG_VMDOP_UNPAUSE_VM_RESPONSE:
98 	case IMSG_VMDOP_GET_INFO_VM_DATA:
99 	case IMSG_VMDOP_GET_INFO_VM_END_DATA:
100 	case IMSG_CTL_FAIL:
101 	case IMSG_CTL_OK:
102 		/* Provide basic response back to a specific control client */
103 		if ((c = control_connbyfd(imsg->hdr.peerid)) == NULL) {
104 			log_warnx("%s: lost control connection: fd %d",
105 			    __func__, imsg->hdr.peerid);
106 			return (0);
107 		}
108 		imsg_compose_event(&c->iev, imsg->hdr.type,
109 		    0, 0, -1, imsg->data, IMSG_DATA_SIZE(imsg));
110 		break;
111 	case IMSG_VMDOP_TERMINATE_VM_RESPONSE:
112 		IMSG_SIZE_CHECK(imsg, &vmr);
113 		memcpy(&vmr, imsg->data, sizeof(vmr));
114 
115 		if ((c = control_connbyfd(imsg->hdr.peerid)) == NULL) {
116 			log_warnx("%s: lost control connection: fd %d",
117 			    __func__, imsg->hdr.peerid);
118 			return (0);
119 		}
120 
121 		TAILQ_FOREACH(notify, &ctl_notify_q, entry) {
122 			if (notify->ctl_fd == (int) imsg->hdr.peerid) {
123 				/*
124 				 * Update if waiting by vm name. This is only
125 				 * supported when stopping a single vm. If
126 				 * stopping all vms, vmctl(8) sends the request
127 				 * using the vmid.
128 				 */
129 				if (notify->ctl_vmid < 1)
130 					notify->ctl_vmid = vmr.vmr_id;
131 				waiting = 1;
132 				break;
133 			}
134 		}
135 
136 		/* An error needs to be relayed to the client immediately */
137 		if (!waiting || vmr.vmr_result) {
138 			imsg_compose_event(&c->iev, imsg->hdr.type,
139 			    0, 0, -1, imsg->data, IMSG_DATA_SIZE(imsg));
140 
141 			if (notify) {
142 				TAILQ_REMOVE(&ctl_notify_q, notify, entry);
143 				free(notify);
144 			}
145 		}
146 		break;
147 	case IMSG_VMDOP_TERMINATE_VM_EVENT:
148 		/* Notify any waiting clients that a VM terminated */
149 		IMSG_SIZE_CHECK(imsg, &vmr);
150 		memcpy(&vmr, imsg->data, sizeof(vmr));
151 
152 		TAILQ_FOREACH_SAFE(notify, &ctl_notify_q, entry, notify_next) {
153 			if (notify->ctl_vmid != vmr.vmr_id)
154 				continue;
155 			if ((c = control_connbyfd(notify->ctl_fd)) != NULL) {
156 				/* Forward to the vmctl(8) client */
157 				imsg_compose_event(&c->iev, imsg->hdr.type,
158 				    0, 0, -1, imsg->data, IMSG_DATA_SIZE(imsg));
159 				TAILQ_REMOVE(&ctl_notify_q, notify, entry);
160 				free(notify);
161 			}
162 		}
163 		break;
164 	case IMSG_VMDOP_CONFIG:
165 		config_getconfig(ps->ps_env, imsg);
166 		proc_compose(ps, PROC_PARENT, IMSG_VMDOP_DONE, NULL, 0);
167 		break;
168 	case IMSG_CTL_RESET:
169 		config_getreset(ps->ps_env, imsg);
170 		break;
171 	default:
172 		return (-1);
173 	}
174 
175 	return (0);
176 }
177 
178 int
179 control_init(struct privsep *ps, struct control_sock *cs)
180 {
181 	struct sockaddr_un	 sun;
182 	int			 fd;
183 	mode_t			 old_umask, mode;
184 
185 	if (cs->cs_name == NULL)
186 		return (0);
187 
188 	if ((fd = socket(AF_UNIX, SOCK_STREAM | SOCK_NONBLOCK, 0)) == -1) {
189 		log_warn("%s: socket", __func__);
190 		return (-1);
191 	}
192 
193 	sun.sun_family = AF_UNIX;
194 	if (strlcpy(sun.sun_path, cs->cs_name,
195 	    sizeof(sun.sun_path)) >= sizeof(sun.sun_path)) {
196 		log_warn("%s: %s name too long", __func__, cs->cs_name);
197 		close(fd);
198 		return (-1);
199 	}
200 
201 	if (unlink(cs->cs_name) == -1)
202 		if (errno != ENOENT) {
203 			log_warn("%s: unlink %s", __func__, cs->cs_name);
204 			close(fd);
205 			return (-1);
206 		}
207 
208 	if (cs->cs_restricted) {
209 		old_umask = umask(S_IXUSR|S_IXGRP|S_IXOTH);
210 		mode = S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH;
211 	} else {
212 		old_umask = umask(S_IXUSR|S_IXGRP|S_IWOTH|S_IROTH|S_IXOTH);
213 		mode = S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP;
214 	}
215 
216 	if (bind(fd, (struct sockaddr *)&sun, sizeof(sun)) == -1) {
217 		log_warn("%s: bind: %s", __func__, cs->cs_name);
218 		close(fd);
219 		(void)umask(old_umask);
220 		return (-1);
221 	}
222 	(void)umask(old_umask);
223 
224 	if (chmod(cs->cs_name, mode) == -1) {
225 		log_warn("%s: chmod", __func__);
226 		close(fd);
227 		(void)unlink(cs->cs_name);
228 		return (-1);
229 	}
230 
231 	cs->cs_fd = fd;
232 	cs->cs_env = ps;
233 
234 	proc_compose(ps, PROC_PARENT, IMSG_VMDOP_DONE, NULL, 0);
235 
236 	return (0);
237 }
238 
239 int
240 control_reset(struct control_sock *cs)
241 {
242 	/* Updating owner of the control socket */
243 	if (chown(cs->cs_name, cs->cs_uid, cs->cs_gid) == -1)
244 		return (-1);
245 
246 	return (0);
247 }
248 
249 int
250 control_listen(struct control_sock *cs)
251 {
252 	if (cs->cs_name == NULL)
253 		return (0);
254 
255 	if (listen(cs->cs_fd, CONTROL_BACKLOG) == -1) {
256 		log_warn("%s: listen", __func__);
257 		return (-1);
258 	}
259 
260 	event_set(&cs->cs_ev, cs->cs_fd, EV_READ,
261 	    control_accept, cs);
262 	event_add(&cs->cs_ev, NULL);
263 	evtimer_set(&cs->cs_evt, control_accept, cs);
264 
265 	return (0);
266 }
267 
268 void
269 control_accept(int listenfd, short event, void *arg)
270 {
271 	struct control_sock	*cs = arg;
272 	int			 connfd;
273 	socklen_t		 len;
274 	struct sockaddr_un	 sun;
275 	struct ctl_conn		*c;
276 
277 	event_add(&cs->cs_ev, NULL);
278 	if ((event & EV_TIMEOUT))
279 		return;
280 
281 	len = sizeof(sun);
282 	if ((connfd = accept4(listenfd,
283 	    (struct sockaddr *)&sun, &len, SOCK_NONBLOCK)) == -1) {
284 		/*
285 		 * Pause accept if we are out of file descriptors, or
286 		 * libevent will haunt us here too.
287 		 */
288 		if (errno == ENFILE || errno == EMFILE) {
289 			struct timeval evtpause = { 1, 0 };
290 
291 			event_del(&cs->cs_ev);
292 			evtimer_add(&cs->cs_evt, &evtpause);
293 		} else if (errno != EWOULDBLOCK && errno != EINTR &&
294 		    errno != ECONNABORTED)
295 			log_warn("%s: accept", __func__);
296 		return;
297 	}
298 
299 	if ((c = calloc(1, sizeof(struct ctl_conn))) == NULL) {
300 		log_warn("%s", __func__);
301 		close(connfd);
302 		return;
303 	}
304 
305 	if (getsockopt(connfd, SOL_SOCKET, SO_PEERCRED,
306 	    &c->peercred, &len) != 0) {
307 		log_warn("%s: failed to get peer credentials", __func__);
308 		close(connfd);
309 		free(c);
310 		return;
311 	}
312 
313 	imsg_init(&c->iev.ibuf, connfd);
314 	c->iev.handler = control_dispatch_imsg;
315 	c->iev.events = EV_READ;
316 	c->iev.data = cs;
317 	event_set(&c->iev.ev, c->iev.ibuf.fd, c->iev.events,
318 	    c->iev.handler, c->iev.data);
319 	event_add(&c->iev.ev, NULL);
320 
321 	TAILQ_INSERT_TAIL(&ctl_conns, c, entry);
322 }
323 
324 struct ctl_conn *
325 control_connbyfd(int fd)
326 {
327 	struct ctl_conn	*c;
328 
329 	TAILQ_FOREACH(c, &ctl_conns, entry) {
330 		if (c->iev.ibuf.fd == fd)
331 			break;
332 	}
333 
334 	return (c);
335 }
336 
337 void
338 control_close(int fd, struct control_sock *cs)
339 {
340 	struct ctl_conn		*c;
341 	struct ctl_notify	*notify, *notify_next;
342 
343 	if ((c = control_connbyfd(fd)) == NULL) {
344 		log_warn("%s: fd %d: not found", __func__, fd);
345 		return;
346 	}
347 
348 	msgbuf_clear(&c->iev.ibuf.w);
349 	TAILQ_REMOVE(&ctl_conns, c, entry);
350 
351 	TAILQ_FOREACH_SAFE(notify, &ctl_notify_q, entry, notify_next) {
352 		if (notify->ctl_fd == fd) {
353 			TAILQ_REMOVE(&ctl_notify_q, notify, entry);
354 			free(notify);
355 			break;
356 		}
357 	}
358 
359 	event_del(&c->iev.ev);
360 	close(c->iev.ibuf.fd);
361 
362 	/* Some file descriptors are available again. */
363 	if (evtimer_pending(&cs->cs_evt, NULL)) {
364 		evtimer_del(&cs->cs_evt);
365 		event_add(&cs->cs_ev, NULL);
366 	}
367 
368 	free(c);
369 }
370 
371 void
372 control_dispatch_imsg(int fd, short event, void *arg)
373 {
374 	struct control_sock		*cs = arg;
375 	struct privsep			*ps = cs->cs_env;
376 	struct ctl_conn			*c;
377 	struct imsg			 imsg;
378 	struct vmop_create_params	 vmc;
379 	struct vmop_id			 vid;
380 	struct ctl_notify		*notify;
381 	int				 n, v, wait = 0, ret = 0;
382 
383 	if ((c = control_connbyfd(fd)) == NULL) {
384 		log_warn("%s: fd %d: not found", __func__, fd);
385 		return;
386 	}
387 
388 	if (event & EV_READ) {
389 		if (((n = imsg_read(&c->iev.ibuf)) == -1 && errno != EAGAIN) ||
390 		    n == 0) {
391 			control_close(fd, cs);
392 			return;
393 		}
394 	}
395 	if (event & EV_WRITE) {
396 		if (msgbuf_write(&c->iev.ibuf.w) <= 0 && errno != EAGAIN) {
397 			control_close(fd, cs);
398 			return;
399 		}
400 	}
401 
402 	for (;;) {
403 		if ((n = imsg_get(&c->iev.ibuf, &imsg)) == -1) {
404 			control_close(fd, cs);
405 			return;
406 		}
407 
408 		if (n == 0)
409 			break;
410 
411 		switch (imsg.hdr.type) {
412 		case IMSG_VMDOP_GET_INFO_VM_REQUEST:
413 		case IMSG_VMDOP_WAIT_VM_REQUEST:
414 		case IMSG_VMDOP_TERMINATE_VM_REQUEST:
415 		case IMSG_VMDOP_START_VM_REQUEST:
416 		case IMSG_VMDOP_PAUSE_VM:
417 		case IMSG_VMDOP_UNPAUSE_VM:
418 			break;
419 		default:
420 			if (c->peercred.uid != 0) {
421 				log_warnx("denied request %d from uid %d",
422 				    imsg.hdr.type, c->peercred.uid);
423 				ret = EPERM;
424 				goto fail;
425 			}
426 			break;
427 		}
428 
429 		switch (imsg.hdr.type) {
430 		case IMSG_CTL_VERBOSE:
431 			if (IMSG_DATA_SIZE(&imsg) < sizeof(v))
432 				goto fail;
433 			memcpy(&v, imsg.data, sizeof(v));
434 			log_setverbose(v);
435 
436 			/* FALLTHROUGH */
437 		case IMSG_VMDOP_RECEIVE_VM_REQUEST:
438 		case IMSG_VMDOP_SEND_VM_REQUEST:
439 		case IMSG_VMDOP_LOAD:
440 		case IMSG_VMDOP_RELOAD:
441 		case IMSG_CTL_RESET:
442 			if (proc_compose_imsg(ps, PROC_PARENT, -1,
443 			    imsg.hdr.type, fd, imsg.fd,
444 			    imsg.data, IMSG_DATA_SIZE(&imsg)) == -1)
445 				goto fail;
446 			break;
447 		case IMSG_VMDOP_START_VM_REQUEST:
448 			if (IMSG_DATA_SIZE(&imsg) < sizeof(vmc))
449 				goto fail;
450 			memcpy(&vmc, imsg.data, sizeof(vmc));
451 			vmc.vmc_owner.uid = c->peercred.uid;
452 			vmc.vmc_owner.gid = -1;
453 
454 			/* imsg.fd may contain kernel image fd. */
455 			if (proc_compose_imsg(ps, PROC_PARENT, -1,
456 			    imsg.hdr.type, fd, imsg.fd, &vmc,
457 			    sizeof(vmc)) == -1) {
458 				control_close(fd, cs);
459 				return;
460 			}
461 			break;
462 		case IMSG_VMDOP_WAIT_VM_REQUEST:
463 			wait = 1;
464 			/* FALLTHROUGH */
465 		case IMSG_VMDOP_TERMINATE_VM_REQUEST:
466 			if (IMSG_DATA_SIZE(&imsg) < sizeof(vid))
467 				goto fail;
468 			memcpy(&vid, imsg.data, sizeof(vid));
469 			vid.vid_uid = c->peercred.uid;
470 
471 			if (wait || vid.vid_flags & VMOP_WAIT) {
472 				vid.vid_flags |= VMOP_WAIT;
473 				notify = calloc(1, sizeof(struct ctl_notify));
474 				if (notify == NULL)
475 					fatal("%s: calloc", __func__);
476 				notify->ctl_vmid = vid.vid_id;
477 				notify->ctl_fd = fd;
478 				TAILQ_INSERT_TAIL(&ctl_notify_q, notify, entry);
479 				log_debug("%s: registered wait for peer %d",
480 				    __func__, fd);
481 			}
482 
483 			if (proc_compose_imsg(ps, PROC_PARENT, -1,
484 			    imsg.hdr.type, fd, -1, &vid, sizeof(vid)) == -1) {
485 				log_debug("%s: proc_compose_imsg failed",
486 				    __func__);
487 				control_close(fd, cs);
488 				return;
489 			}
490 			break;
491 		case IMSG_VMDOP_GET_INFO_VM_REQUEST:
492 			if (IMSG_DATA_SIZE(&imsg) != 0)
493 				goto fail;
494 			if (proc_compose_imsg(ps, PROC_PARENT, -1,
495 			    imsg.hdr.type, fd, -1, NULL, 0) == -1) {
496 				control_close(fd, cs);
497 				return;
498 			}
499 			break;
500 		case IMSG_VMDOP_PAUSE_VM:
501 		case IMSG_VMDOP_UNPAUSE_VM:
502 			if (IMSG_DATA_SIZE(&imsg) < sizeof(vid))
503 				goto fail;
504 			memcpy(&vid, imsg.data, sizeof(vid));
505 			vid.vid_uid = c->peercred.uid;
506 			log_debug("%s id: %d, name: %s, uid: %d",
507 			    __func__, vid.vid_id, vid.vid_name,
508 			    vid.vid_uid);
509 
510 			if (proc_compose_imsg(ps, PROC_PARENT, -1,
511 			    imsg.hdr.type, fd, imsg.fd,
512 			    &vid, sizeof(vid)) == -1)
513 				goto fail;
514 			break;
515 		default:
516 			log_debug("%s: error handling imsg %d",
517 			    __func__, imsg.hdr.type);
518 			control_close(fd, cs);
519 			break;
520 		}
521 		imsg_free(&imsg);
522 	}
523 
524 	imsg_event_add(&c->iev);
525 	return;
526 
527  fail:
528 	if (ret == 0)
529 		ret = EINVAL;
530 	imsg_compose_event(&c->iev, IMSG_CTL_FAIL,
531 	    0, 0, -1, &ret, sizeof(ret));
532 	imsg_flush(&c->iev.ibuf);
533 	control_close(fd, cs);
534 }
535