xref: /openbsd-src/usr.sbin/vmd/control.c (revision 0a9d031fce78c0ebce0995b311938b1c87b1e208)
1 /*	$OpenBSD: control.c,v 1.49 2024/11/21 13:39:34 claudio Exp $	*/
2 
3 /*
4  * Copyright (c) 2010-2015 Reyk Floeter <reyk@openbsd.org>
5  * Copyright (c) 2003, 2004 Henning Brauer <henning@openbsd.org>
6  *
7  * Permission to use, copy, modify, and distribute this software for any
8  * purpose with or without fee is hereby granted, provided that the above
9  * copyright notice and this permission notice appear in all copies.
10  *
11  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18  */
19 
20 #include <sys/types.h>
21 #include <sys/queue.h>
22 #include <sys/stat.h>
23 #include <sys/socket.h>
24 #include <sys/un.h>
25 
26 #include <errno.h>
27 #include <event.h>
28 #include <stdlib.h>
29 #include <string.h>
30 #include <unistd.h>
31 
32 #include "proc.h"
33 #include "vmd.h"
34 
35 #define	CONTROL_BACKLOG	5
36 
37 struct ctl_connlist ctl_conns = TAILQ_HEAD_INITIALIZER(ctl_conns);
38 
39 struct ctl_notify {
40 	int			ctl_fd;
41 	uint32_t		ctl_vmid;
42 	TAILQ_ENTRY(ctl_notify)	entry;
43 };
44 TAILQ_HEAD(ctl_notify_q, ctl_notify) ctl_notify_q =
45 	TAILQ_HEAD_INITIALIZER(ctl_notify_q);
46 void
47 	 control_accept(int, short, void *);
48 struct ctl_conn
49 	*control_connbyfd(int);
50 void	 control_close(int, struct control_sock *);
51 void	 control_dispatch_imsg(int, short, void *);
52 int	 control_dispatch_vmd(int, struct privsep_proc *, struct imsg *);
53 void	 control_run(struct privsep *, struct privsep_proc *, void *);
54 
55 static struct privsep_proc procs[] = {
56 	{ "parent",	PROC_PARENT,	control_dispatch_vmd }
57 };
58 
59 void
60 control(struct privsep *ps, struct privsep_proc *p)
61 {
62 	proc_run(ps, p, procs, nitems(procs), control_run, NULL);
63 }
64 
65 void
66 control_run(struct privsep *ps, struct privsep_proc *p, void *arg)
67 {
68 	/*
69 	 * pledge in the control process:
70 	 * stdio - for malloc and basic I/O including events.
71 	 * unix - for the control socket.
72 	 * recvfd - for the proc fd exchange.
73 	 * sendfd - for send and receive.
74 	 */
75 	if (pledge("stdio unix recvfd sendfd", NULL) == -1)
76 		fatal("pledge");
77 }
78 
79 int
80 control_dispatch_vmd(int fd, struct privsep_proc *p, struct imsg *imsg)
81 {
82 	struct ctl_conn		*c;
83 	struct ctl_notify	*notify = NULL, *notify_next;
84 	struct privsep		*ps = p->p_ps;
85 	struct vmop_result	 vmr;
86 	int			 waiting = 0;
87 
88 	switch (imsg->hdr.type) {
89 	case IMSG_VMDOP_START_VM_RESPONSE:
90 	case IMSG_VMDOP_PAUSE_VM_RESPONSE:
91 	case IMSG_VMDOP_SEND_VM_RESPONSE:
92 	case IMSG_VMDOP_UNPAUSE_VM_RESPONSE:
93 	case IMSG_VMDOP_GET_INFO_VM_DATA:
94 	case IMSG_VMDOP_GET_INFO_VM_END_DATA:
95 	case IMSG_CTL_FAIL:
96 	case IMSG_CTL_OK:
97 		/* Provide basic response back to a specific control client */
98 		if ((c = control_connbyfd(imsg->hdr.peerid)) == NULL) {
99 			log_warnx("%s: lost control connection: fd %d",
100 			    __func__, imsg->hdr.peerid);
101 			return (0);
102 		}
103 		imsg_compose_event(&c->iev, imsg->hdr.type,
104 		    0, 0, -1, imsg->data, IMSG_DATA_SIZE(imsg));
105 		break;
106 	case IMSG_VMDOP_TERMINATE_VM_RESPONSE:
107 		IMSG_SIZE_CHECK(imsg, &vmr);
108 		memcpy(&vmr, imsg->data, sizeof(vmr));
109 
110 		if ((c = control_connbyfd(imsg->hdr.peerid)) == NULL) {
111 			log_warnx("%s: lost control connection: fd %d",
112 			    __func__, imsg->hdr.peerid);
113 			return (0);
114 		}
115 
116 		TAILQ_FOREACH(notify, &ctl_notify_q, entry) {
117 			if (notify->ctl_fd == (int) imsg->hdr.peerid) {
118 				/*
119 				 * Update if waiting by vm name. This is only
120 				 * supported when stopping a single vm. If
121 				 * stopping all vms, vmctl(8) sends the request
122 				 * using the vmid.
123 				 */
124 				if (notify->ctl_vmid < 1)
125 					notify->ctl_vmid = vmr.vmr_id;
126 				waiting = 1;
127 				break;
128 			}
129 		}
130 
131 		/* An error needs to be relayed to the client immediately */
132 		if (!waiting || vmr.vmr_result) {
133 			imsg_compose_event(&c->iev, imsg->hdr.type,
134 			    0, 0, -1, imsg->data, IMSG_DATA_SIZE(imsg));
135 
136 			if (notify) {
137 				TAILQ_REMOVE(&ctl_notify_q, notify, entry);
138 				free(notify);
139 			}
140 		}
141 		break;
142 	case IMSG_VMDOP_TERMINATE_VM_EVENT:
143 		/* Notify any waiting clients that a VM terminated */
144 		IMSG_SIZE_CHECK(imsg, &vmr);
145 		memcpy(&vmr, imsg->data, sizeof(vmr));
146 
147 		TAILQ_FOREACH_SAFE(notify, &ctl_notify_q, entry, notify_next) {
148 			if (notify->ctl_vmid != vmr.vmr_id)
149 				continue;
150 			if ((c = control_connbyfd(notify->ctl_fd)) != NULL) {
151 				/* Forward to the vmctl(8) client */
152 				imsg_compose_event(&c->iev, imsg->hdr.type,
153 				    0, 0, -1, imsg->data, IMSG_DATA_SIZE(imsg));
154 				TAILQ_REMOVE(&ctl_notify_q, notify, entry);
155 				free(notify);
156 			}
157 		}
158 		break;
159 	case IMSG_VMDOP_CONFIG:
160 		config_getconfig(ps->ps_env, imsg);
161 		proc_compose(ps, PROC_PARENT, IMSG_VMDOP_DONE, NULL, 0);
162 		break;
163 	case IMSG_CTL_RESET:
164 		config_getreset(ps->ps_env, imsg);
165 		break;
166 	default:
167 		return (-1);
168 	}
169 
170 	return (0);
171 }
172 
173 int
174 control_init(struct privsep *ps, struct control_sock *cs)
175 {
176 	struct sockaddr_un	 sun;
177 	int			 fd;
178 	mode_t			 old_umask, mode;
179 
180 	if (cs->cs_name == NULL)
181 		return (0);
182 
183 	if ((fd = socket(AF_UNIX, SOCK_STREAM | SOCK_NONBLOCK, 0)) == -1) {
184 		log_warn("%s: socket", __func__);
185 		return (-1);
186 	}
187 
188 	sun.sun_family = AF_UNIX;
189 	if (strlcpy(sun.sun_path, cs->cs_name,
190 	    sizeof(sun.sun_path)) >= sizeof(sun.sun_path)) {
191 		log_warn("%s: %s name too long", __func__, cs->cs_name);
192 		close(fd);
193 		return (-1);
194 	}
195 
196 	if (unlink(cs->cs_name) == -1)
197 		if (errno != ENOENT) {
198 			log_warn("%s: unlink %s", __func__, cs->cs_name);
199 			close(fd);
200 			return (-1);
201 		}
202 
203 	if (cs->cs_restricted) {
204 		old_umask = umask(S_IXUSR|S_IXGRP|S_IXOTH);
205 		mode = S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH;
206 	} else {
207 		old_umask = umask(S_IXUSR|S_IXGRP|S_IWOTH|S_IROTH|S_IXOTH);
208 		mode = S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP;
209 	}
210 
211 	if (bind(fd, (struct sockaddr *)&sun, sizeof(sun)) == -1) {
212 		log_warn("%s: bind: %s", __func__, cs->cs_name);
213 		close(fd);
214 		(void)umask(old_umask);
215 		return (-1);
216 	}
217 	(void)umask(old_umask);
218 
219 	if (chmod(cs->cs_name, mode) == -1) {
220 		log_warn("%s: chmod", __func__);
221 		close(fd);
222 		(void)unlink(cs->cs_name);
223 		return (-1);
224 	}
225 
226 	cs->cs_fd = fd;
227 	cs->cs_env = ps;
228 
229 	proc_compose(ps, PROC_PARENT, IMSG_VMDOP_DONE, NULL, 0);
230 
231 	return (0);
232 }
233 
234 int
235 control_reset(struct control_sock *cs)
236 {
237 	/* Updating owner of the control socket */
238 	if (chown(cs->cs_name, cs->cs_uid, cs->cs_gid) == -1)
239 		return (-1);
240 
241 	return (0);
242 }
243 
244 int
245 control_listen(struct control_sock *cs)
246 {
247 	if (cs->cs_name == NULL)
248 		return (0);
249 
250 	if (listen(cs->cs_fd, CONTROL_BACKLOG) == -1) {
251 		log_warn("%s: listen", __func__);
252 		return (-1);
253 	}
254 
255 	event_set(&cs->cs_ev, cs->cs_fd, EV_READ,
256 	    control_accept, cs);
257 	event_add(&cs->cs_ev, NULL);
258 	evtimer_set(&cs->cs_evt, control_accept, cs);
259 
260 	return (0);
261 }
262 
263 void
264 control_accept(int listenfd, short event, void *arg)
265 {
266 	struct control_sock	*cs = arg;
267 	int			 connfd;
268 	socklen_t		 len;
269 	struct sockaddr_un	 sun;
270 	struct ctl_conn		*c;
271 
272 	event_add(&cs->cs_ev, NULL);
273 	if ((event & EV_TIMEOUT))
274 		return;
275 
276 	len = sizeof(sun);
277 	if ((connfd = accept4(listenfd,
278 	    (struct sockaddr *)&sun, &len, SOCK_NONBLOCK)) == -1) {
279 		/*
280 		 * Pause accept if we are out of file descriptors, or
281 		 * libevent will haunt us here too.
282 		 */
283 		if (errno == ENFILE || errno == EMFILE) {
284 			struct timeval evtpause = { 1, 0 };
285 
286 			event_del(&cs->cs_ev);
287 			evtimer_add(&cs->cs_evt, &evtpause);
288 		} else if (errno != EWOULDBLOCK && errno != EINTR &&
289 		    errno != ECONNABORTED)
290 			log_warn("%s: accept", __func__);
291 		return;
292 	}
293 
294 	if ((c = calloc(1, sizeof(struct ctl_conn))) == NULL) {
295 		log_warn("%s", __func__);
296 		close(connfd);
297 		return;
298 	}
299 
300 	if (getsockopt(connfd, SOL_SOCKET, SO_PEERCRED,
301 	    &c->peercred, &len) != 0) {
302 		log_warn("%s: failed to get peer credentials", __func__);
303 		close(connfd);
304 		free(c);
305 		return;
306 	}
307 
308 	if (imsgbuf_init(&c->iev.ibuf, connfd) == -1) {
309 		log_warn("%s: failed to init imsgbuf", __func__);
310 		close(connfd);
311 		free(c);
312 		return;
313 	}
314 	imsgbuf_allow_fdpass(&c->iev.ibuf);
315 	c->iev.handler = control_dispatch_imsg;
316 	c->iev.events = EV_READ;
317 	c->iev.data = cs;
318 	event_set(&c->iev.ev, c->iev.ibuf.fd, c->iev.events,
319 	    c->iev.handler, c->iev.data);
320 	event_add(&c->iev.ev, NULL);
321 
322 	TAILQ_INSERT_TAIL(&ctl_conns, c, entry);
323 }
324 
325 struct ctl_conn *
326 control_connbyfd(int fd)
327 {
328 	struct ctl_conn	*c;
329 
330 	TAILQ_FOREACH(c, &ctl_conns, entry) {
331 		if (c->iev.ibuf.fd == fd)
332 			break;
333 	}
334 
335 	return (c);
336 }
337 
338 void
339 control_close(int fd, struct control_sock *cs)
340 {
341 	struct ctl_conn		*c;
342 	struct ctl_notify	*notify, *notify_next;
343 
344 	if ((c = control_connbyfd(fd)) == NULL) {
345 		log_warn("%s: fd %d: not found", __func__, fd);
346 		return;
347 	}
348 
349 	imsgbuf_clear(&c->iev.ibuf);
350 	TAILQ_REMOVE(&ctl_conns, c, entry);
351 
352 	TAILQ_FOREACH_SAFE(notify, &ctl_notify_q, entry, notify_next) {
353 		if (notify->ctl_fd == fd) {
354 			TAILQ_REMOVE(&ctl_notify_q, notify, entry);
355 			free(notify);
356 			break;
357 		}
358 	}
359 
360 	event_del(&c->iev.ev);
361 	close(c->iev.ibuf.fd);
362 
363 	/* Some file descriptors are available again. */
364 	if (evtimer_pending(&cs->cs_evt, NULL)) {
365 		evtimer_del(&cs->cs_evt);
366 		event_add(&cs->cs_ev, NULL);
367 	}
368 
369 	free(c);
370 }
371 
372 void
373 control_dispatch_imsg(int fd, short event, void *arg)
374 {
375 	struct control_sock		*cs = arg;
376 	struct privsep			*ps = cs->cs_env;
377 	struct ctl_conn			*c;
378 	struct imsg			 imsg;
379 	struct vmop_create_params	 vmc;
380 	struct vmop_id			 vid;
381 	struct ctl_notify		*notify;
382 	int				 n, v, wait = 0, ret = 0;
383 
384 	if ((c = control_connbyfd(fd)) == NULL) {
385 		log_warn("%s: fd %d: not found", __func__, fd);
386 		return;
387 	}
388 
389 	if (event & EV_READ) {
390 		if (imsgbuf_read(&c->iev.ibuf) != 1) {
391 			control_close(fd, cs);
392 			return;
393 		}
394 	}
395 	if (event & EV_WRITE) {
396 		if (imsgbuf_write(&c->iev.ibuf) == -1) {
397 			control_close(fd, cs);
398 			return;
399 		}
400 	}
401 
402 	for (;;) {
403 		if ((n = imsg_get(&c->iev.ibuf, &imsg)) == -1) {
404 			control_close(fd, cs);
405 			return;
406 		}
407 
408 		if (n == 0)
409 			break;
410 
411 		switch (imsg.hdr.type) {
412 		case IMSG_VMDOP_GET_INFO_VM_REQUEST:
413 		case IMSG_VMDOP_WAIT_VM_REQUEST:
414 		case IMSG_VMDOP_TERMINATE_VM_REQUEST:
415 		case IMSG_VMDOP_START_VM_REQUEST:
416 		case IMSG_VMDOP_PAUSE_VM:
417 		case IMSG_VMDOP_UNPAUSE_VM:
418 			break;
419 		default:
420 			if (c->peercred.uid != 0) {
421 				log_warnx("denied request %d from uid %d",
422 				    imsg.hdr.type, c->peercred.uid);
423 				ret = EPERM;
424 				goto fail;
425 			}
426 			break;
427 		}
428 
429 		switch (imsg.hdr.type) {
430 		case IMSG_CTL_VERBOSE:
431 			if (IMSG_DATA_SIZE(&imsg) < sizeof(v))
432 				goto fail;
433 			memcpy(&v, imsg.data, sizeof(v));
434 			log_setverbose(v);
435 
436 			/* FALLTHROUGH */
437 		case IMSG_VMDOP_RECEIVE_VM_REQUEST:
438 		case IMSG_VMDOP_SEND_VM_REQUEST:
439 		case IMSG_VMDOP_LOAD:
440 		case IMSG_VMDOP_RELOAD:
441 		case IMSG_CTL_RESET:
442 			if (proc_compose_imsg(ps, PROC_PARENT, -1,
443 			    imsg.hdr.type, fd, imsg_get_fd(&imsg),
444 			    imsg.data, IMSG_DATA_SIZE(&imsg)) == -1)
445 				goto fail;
446 			break;
447 		case IMSG_VMDOP_START_VM_REQUEST:
448 			if (IMSG_DATA_SIZE(&imsg) < sizeof(vmc))
449 				goto fail;
450 			memcpy(&vmc, imsg.data, sizeof(vmc));
451 			vmc.vmc_owner.uid = c->peercred.uid;
452 			vmc.vmc_owner.gid = -1;
453 
454 			/* imsg.fd may contain kernel image fd. */
455 			if (proc_compose_imsg(ps, PROC_PARENT, -1,
456 			    imsg.hdr.type, fd, imsg_get_fd(&imsg), &vmc,
457 			    sizeof(vmc)) == -1) {
458 				control_close(fd, cs);
459 				return;
460 			}
461 			break;
462 		case IMSG_VMDOP_WAIT_VM_REQUEST:
463 			wait = 1;
464 			/* FALLTHROUGH */
465 		case IMSG_VMDOP_TERMINATE_VM_REQUEST:
466 			if (IMSG_DATA_SIZE(&imsg) < sizeof(vid))
467 				goto fail;
468 			memcpy(&vid, imsg.data, sizeof(vid));
469 			vid.vid_uid = c->peercred.uid;
470 
471 			if (wait || vid.vid_flags & VMOP_WAIT) {
472 				vid.vid_flags |= VMOP_WAIT;
473 				notify = calloc(1, sizeof(struct ctl_notify));
474 				if (notify == NULL)
475 					fatal("%s: calloc", __func__);
476 				notify->ctl_vmid = vid.vid_id;
477 				notify->ctl_fd = fd;
478 				TAILQ_INSERT_TAIL(&ctl_notify_q, notify, entry);
479 				log_debug("%s: registered wait for peer %d",
480 				    __func__, fd);
481 			}
482 
483 			if (proc_compose_imsg(ps, PROC_PARENT, -1,
484 			    imsg.hdr.type, fd, -1, &vid, sizeof(vid)) == -1) {
485 				log_debug("%s: proc_compose_imsg failed",
486 				    __func__);
487 				control_close(fd, cs);
488 				return;
489 			}
490 			break;
491 		case IMSG_VMDOP_GET_INFO_VM_REQUEST:
492 			if (IMSG_DATA_SIZE(&imsg) != 0)
493 				goto fail;
494 			if (proc_compose_imsg(ps, PROC_PARENT, -1,
495 			    imsg.hdr.type, fd, -1, NULL, 0) == -1) {
496 				control_close(fd, cs);
497 				return;
498 			}
499 			break;
500 		case IMSG_VMDOP_PAUSE_VM:
501 		case IMSG_VMDOP_UNPAUSE_VM:
502 			if (IMSG_DATA_SIZE(&imsg) < sizeof(vid))
503 				goto fail;
504 			memcpy(&vid, imsg.data, sizeof(vid));
505 			vid.vid_uid = c->peercred.uid;
506 			log_debug("%s id: %d, name: %s, uid: %d",
507 			    __func__, vid.vid_id, vid.vid_name,
508 			    vid.vid_uid);
509 
510 			if (proc_compose_imsg(ps, PROC_PARENT, -1,
511 			    imsg.hdr.type, fd, imsg_get_fd(&imsg),
512 			    &vid, sizeof(vid)) == -1)
513 				goto fail;
514 			break;
515 		default:
516 			log_debug("%s: error handling imsg %d",
517 			    __func__, imsg.hdr.type);
518 			control_close(fd, cs);
519 			break;
520 		}
521 		imsg_free(&imsg);
522 	}
523 
524 	imsg_event_add(&c->iev);
525 	return;
526 
527  fail:
528 	if (ret == 0)
529 		ret = EINVAL;
530 	imsg_compose_event(&c->iev, IMSG_CTL_FAIL,
531 	    0, 0, -1, &ret, sizeof(ret));
532 	imsgbuf_flush(&c->iev.ibuf);
533 	control_close(fd, cs);
534 }
535