xref: /netbsd-src/lib/librumpuser/sp_common.c (revision daf6c4152fcddc27c445489775ed1f66ab4ea9a9)
1 /*      $NetBSD: sp_common.c,v 1.28 2011/02/15 10:37:07 pooka Exp $	*/
2 
3 /*
4  * Copyright (c) 2010, 2011 Antti Kantee.  All Rights Reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
16  * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18  * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27 
28 /*
29  * Common client/server sysproxy routines.  #included.
30  */
31 
32 #include <sys/cdefs.h>
33 
34 #include <sys/types.h>
35 #include <sys/mman.h>
36 #include <sys/queue.h>
37 #include <sys/socket.h>
38 #include <sys/un.h>
39 #include <sys/syslimits.h>
40 
41 #include <arpa/inet.h>
42 #include <netinet/in.h>
43 #include <netinet/tcp.h>
44 
45 #include <assert.h>
46 #include <errno.h>
47 #include <fcntl.h>
48 #include <inttypes.h>
49 #include <poll.h>
50 #include <pthread.h>
51 #include <stdarg.h>
52 #include <stddef.h>
53 #include <stdio.h>
54 #include <stdlib.h>
55 #include <string.h>
56 #include <unistd.h>
57 
58 //#define DEBUG
59 #ifdef DEBUG
60 #define DPRINTF(x) mydprintf x
61 static void
62 mydprintf(const char *fmt, ...)
63 {
64 	va_list ap;
65 
66 	va_start(ap, fmt);
67 	vfprintf(stderr, fmt, ap);
68 	va_end(ap);
69 }
70 #else
71 #define DPRINTF(x)
72 #endif
73 
74 #ifndef HOSTOPS
75 #define host_poll poll
76 #define host_read read
77 #define host_sendto sendto
78 #define host_setsockopt setsockopt
79 #endif
80 
81 /*
82  * Bah, I hate writing on-off-wire conversions in C
83  */
84 
85 enum { RUMPSP_REQ, RUMPSP_RESP, RUMPSP_ERROR };
86 enum {	RUMPSP_HANDSHAKE,
87 	RUMPSP_SYSCALL,
88 	RUMPSP_COPYIN, RUMPSP_COPYINSTR,
89 	RUMPSP_COPYOUT, RUMPSP_COPYOUTSTR,
90 	RUMPSP_ANONMMAP,
91 	RUMPSP_PREFORK,
92 	RUMPSP_RAISE };
93 
94 enum { HANDSHAKE_GUEST, HANDSHAKE_AUTH, HANDSHAKE_FORK, HANDSHAKE_EXEC };
95 
96 #define AUTHLEN 4 /* 128bit fork auth */
97 
98 struct rsp_hdr {
99 	uint64_t rsp_len;
100 	uint64_t rsp_reqno;
101 	uint16_t rsp_class;
102 	uint16_t rsp_type;
103 	/*
104 	 * We want this structure 64bit-aligned for typecast fun,
105 	 * so might as well use the following for something.
106 	 */
107 	union {
108 		uint32_t sysnum;
109 		uint32_t error;
110 		uint32_t handshake;
111 		uint32_t signo;
112 	} u;
113 };
114 #define HDRSZ sizeof(struct rsp_hdr)
115 #define rsp_sysnum u.sysnum
116 #define rsp_error u.error
117 #define rsp_handshake u.handshake
118 #define rsp_signo u.signo
119 
120 #define MAXBANNER 96
121 
122 /*
123  * Data follows the header.  We have two types of structured data.
124  */
125 
126 /* copyin/copyout */
127 struct rsp_copydata {
128 	size_t rcp_len;
129 	void *rcp_addr;
130 	uint8_t rcp_data[0];
131 };
132 
133 /* syscall response */
134 struct rsp_sysresp {
135 	int rsys_error;
136 	register_t rsys_retval[2];
137 };
138 
139 struct handshake_fork {
140 	uint32_t rf_auth[4];
141 	int rf_cancel;
142 };
143 
144 struct respwait {
145 	uint64_t rw_reqno;
146 	void *rw_data;
147 	size_t rw_dlen;
148 	int rw_done;
149 	int rw_error;
150 
151 	pthread_cond_t rw_cv;
152 
153 	TAILQ_ENTRY(respwait) rw_entries;
154 };
155 
156 struct prefork;
157 struct spclient {
158 	int spc_fd;
159 	int spc_refcnt;
160 	int spc_state;
161 
162 	pthread_mutex_t spc_mtx;
163 	pthread_cond_t spc_cv;
164 
165 	struct lwp *spc_mainlwp;
166 	pid_t spc_pid;
167 
168 	TAILQ_HEAD(, respwait) spc_respwait;
169 
170 	/* rest of the fields are zeroed upon disconnect */
171 #define SPC_ZEROFF offsetof(struct spclient, spc_pfd)
172 	struct pollfd *spc_pfd;
173 
174 	struct rsp_hdr spc_hdr;
175 	uint8_t *spc_buf;
176 	size_t spc_off;
177 
178 	uint64_t spc_nextreq;
179 	uint64_t spc_syscallreq;
180 	uint64_t spc_generation;
181 	int spc_ostatus, spc_istatus;
182 	int spc_reconnecting;
183 
184 	LIST_HEAD(, prefork) spc_pflist;
185 };
186 #define SPCSTATUS_FREE 0
187 #define SPCSTATUS_BUSY 1
188 #define SPCSTATUS_WANTED 2
189 
190 #define SPCSTATE_NEW     0
191 #define SPCSTATE_RUNNING 1
192 #define SPCSTATE_DYING   2
193 
194 typedef int (*addrparse_fn)(const char *, struct sockaddr **, int);
195 typedef int (*connecthook_fn)(int);
196 typedef void (*cleanup_fn)(struct sockaddr *);
197 
198 static int readframe(struct spclient *);
199 static void handlereq(struct spclient *);
200 
201 static __inline void
202 spcresetbuf(struct spclient *spc)
203 {
204 
205 	spc->spc_buf = NULL;
206 	spc->spc_off = 0;
207 }
208 
209 static __inline void
210 spcfreebuf(struct spclient *spc)
211 {
212 
213 	free(spc->spc_buf);
214 	spcresetbuf(spc);
215 }
216 
217 static void
218 sendlockl(struct spclient *spc)
219 {
220 
221 	while (spc->spc_ostatus != SPCSTATUS_FREE) {
222 		spc->spc_ostatus = SPCSTATUS_WANTED;
223 		pthread_cond_wait(&spc->spc_cv, &spc->spc_mtx);
224 	}
225 	spc->spc_ostatus = SPCSTATUS_BUSY;
226 }
227 
228 static void __unused
229 sendlock(struct spclient *spc)
230 {
231 
232 	pthread_mutex_lock(&spc->spc_mtx);
233 	sendlockl(spc);
234 	pthread_mutex_unlock(&spc->spc_mtx);
235 }
236 
237 static void
238 sendunlockl(struct spclient *spc)
239 {
240 
241 	if (spc->spc_ostatus == SPCSTATUS_WANTED)
242 		pthread_cond_broadcast(&spc->spc_cv);
243 	spc->spc_ostatus = SPCSTATUS_FREE;
244 }
245 
246 static void
247 sendunlock(struct spclient *spc)
248 {
249 
250 	pthread_mutex_lock(&spc->spc_mtx);
251 	sendunlockl(spc);
252 	pthread_mutex_unlock(&spc->spc_mtx);
253 }
254 
255 static int
256 dosend(struct spclient *spc, const void *data, size_t dlen)
257 {
258 	struct pollfd pfd;
259 	const uint8_t *sdata = data;
260 	ssize_t n;
261 	size_t sent;
262 	int fd = spc->spc_fd;
263 
264 	pfd.fd = fd;
265 	pfd.events = POLLOUT;
266 
267 	for (sent = 0, n = 0; sent < dlen; ) {
268 		if (n) {
269 			if (host_poll(&pfd, 1, INFTIM) == -1) {
270 				if (errno == EINTR)
271 					continue;
272 				return errno;
273 			}
274 		}
275 
276 		n = host_sendto(fd, sdata + sent, dlen - sent,
277 		    MSG_NOSIGNAL, NULL, 0);
278 		if (n == -1)  {
279 			if (errno == EPIPE)
280 				return ENOTCONN;
281 			if (errno != EAGAIN)
282 				return errno;
283 			continue;
284 		}
285 		if (n == 0) {
286 			return ENOTCONN;
287 		}
288 		sent += n;
289 	}
290 
291 	return 0;
292 }
293 
294 static void
295 doputwait(struct spclient *spc, struct respwait *rw, struct rsp_hdr *rhdr)
296 {
297 
298 	rw->rw_data = NULL;
299 	rw->rw_dlen = rw->rw_done = rw->rw_error = 0;
300 	pthread_cond_init(&rw->rw_cv, NULL);
301 
302 	pthread_mutex_lock(&spc->spc_mtx);
303 	rw->rw_reqno = rhdr->rsp_reqno = spc->spc_nextreq++;
304 	TAILQ_INSERT_TAIL(&spc->spc_respwait, rw, rw_entries);
305 }
306 
307 static void __unused
308 putwait_locked(struct spclient *spc, struct respwait *rw, struct rsp_hdr *rhdr)
309 {
310 
311 	doputwait(spc, rw, rhdr);
312 	pthread_mutex_unlock(&spc->spc_mtx);
313 }
314 
315 static void
316 putwait(struct spclient *spc, struct respwait *rw, struct rsp_hdr *rhdr)
317 {
318 
319 	doputwait(spc, rw, rhdr);
320 	sendlockl(spc);
321 	pthread_mutex_unlock(&spc->spc_mtx);
322 }
323 
324 static void
325 dounputwait(struct spclient *spc, struct respwait *rw)
326 {
327 
328 	TAILQ_REMOVE(&spc->spc_respwait, rw, rw_entries);
329 	pthread_mutex_unlock(&spc->spc_mtx);
330 	pthread_cond_destroy(&rw->rw_cv);
331 
332 }
333 
334 static void __unused
335 unputwait_locked(struct spclient *spc, struct respwait *rw)
336 {
337 
338 	pthread_mutex_lock(&spc->spc_mtx);
339 	dounputwait(spc, rw);
340 }
341 
342 static void
343 unputwait(struct spclient *spc, struct respwait *rw)
344 {
345 
346 	pthread_mutex_lock(&spc->spc_mtx);
347 	sendunlockl(spc);
348 
349 	dounputwait(spc, rw);
350 }
351 
352 static void
353 kickwaiter(struct spclient *spc)
354 {
355 	struct respwait *rw;
356 	int error = 0;
357 
358 	pthread_mutex_lock(&spc->spc_mtx);
359 	TAILQ_FOREACH(rw, &spc->spc_respwait, rw_entries) {
360 		if (rw->rw_reqno == spc->spc_hdr.rsp_reqno)
361 			break;
362 	}
363 	if (rw == NULL) {
364 		DPRINTF(("no waiter found, invalid reqno %" PRIu64 "?\n",
365 		    spc->spc_hdr.rsp_reqno));
366 		pthread_mutex_unlock(&spc->spc_mtx);
367 		spcfreebuf(spc);
368 		return;
369 	}
370 	DPRINTF(("rump_sp: client %p woke up waiter at %p\n", spc, rw));
371 	rw->rw_data = spc->spc_buf;
372 	rw->rw_done = 1;
373 	rw->rw_dlen = (size_t)(spc->spc_off - HDRSZ);
374 	if (spc->spc_hdr.rsp_class == RUMPSP_ERROR) {
375 		error = rw->rw_error = spc->spc_hdr.rsp_error;
376 	}
377 	pthread_cond_signal(&rw->rw_cv);
378 	pthread_mutex_unlock(&spc->spc_mtx);
379 
380 	if (error)
381 		spcfreebuf(spc);
382 	else
383 		spcresetbuf(spc);
384 }
385 
386 static void
387 kickall(struct spclient *spc)
388 {
389 	struct respwait *rw;
390 
391 	/* DIAGASSERT(mutex_owned(spc_lock)) */
392 	TAILQ_FOREACH(rw, &spc->spc_respwait, rw_entries)
393 		pthread_cond_broadcast(&rw->rw_cv);
394 }
395 
396 static int
397 readframe(struct spclient *spc)
398 {
399 	int fd = spc->spc_fd;
400 	size_t left;
401 	size_t framelen;
402 	ssize_t n;
403 
404 	/* still reading header? */
405 	if (spc->spc_off < HDRSZ) {
406 		DPRINTF(("rump_sp: readframe getting header at offset %zu\n",
407 		    spc->spc_off));
408 
409 		left = HDRSZ - spc->spc_off;
410 		/*LINTED: cast ok */
411 		n = host_read(fd, (uint8_t*)&spc->spc_hdr + spc->spc_off, left);
412 		if (n == 0) {
413 			return -1;
414 		}
415 		if (n == -1) {
416 			if (errno == EAGAIN)
417 				return 0;
418 			return -1;
419 		}
420 
421 		spc->spc_off += n;
422 		if (spc->spc_off < HDRSZ)
423 			return -1;
424 
425 		/*LINTED*/
426 		framelen = spc->spc_hdr.rsp_len;
427 
428 		if (framelen < HDRSZ) {
429 			return -1;
430 		} else if (framelen == HDRSZ) {
431 			return 1;
432 		}
433 
434 		spc->spc_buf = malloc(framelen - HDRSZ);
435 		if (spc->spc_buf == NULL) {
436 			return -1;
437 		}
438 		memset(spc->spc_buf, 0, framelen - HDRSZ);
439 
440 		/* "fallthrough" */
441 	} else {
442 		/*LINTED*/
443 		framelen = spc->spc_hdr.rsp_len;
444 	}
445 
446 	left = framelen - spc->spc_off;
447 
448 	DPRINTF(("rump_sp: readframe getting body at offset %zu, left %zu\n",
449 	    spc->spc_off, left));
450 
451 	if (left == 0)
452 		return 1;
453 	n = host_read(fd, spc->spc_buf + (spc->spc_off - HDRSZ), left);
454 	if (n == 0) {
455 		return -1;
456 	}
457 	if (n == -1) {
458 		if (errno == EAGAIN)
459 			return 0;
460 		return -1;
461 	}
462 	spc->spc_off += n;
463 	left -= n;
464 
465 	/* got everything? */
466 	if (left == 0)
467 		return 1;
468 	else
469 		return 0;
470 }
471 
472 static int
473 tcp_parse(const char *addr, struct sockaddr **sa, int allow_wildcard)
474 {
475 	struct sockaddr_in sin;
476 	char buf[64];
477 	const char *p;
478 	size_t l;
479 	int port;
480 
481 	memset(&sin, 0, sizeof(sin));
482 	sin.sin_len = sizeof(sin);
483 	sin.sin_family = AF_INET;
484 
485 	p = strchr(addr, ':');
486 	if (!p) {
487 		fprintf(stderr, "rump_sp_tcp: missing port specifier\n");
488 		return EINVAL;
489 	}
490 
491 	l = p - addr;
492 	if (l > sizeof(buf)-1) {
493 		fprintf(stderr, "rump_sp_tcp: address too long\n");
494 		return EINVAL;
495 	}
496 	strncpy(buf, addr, l);
497 	buf[l] = '\0';
498 
499 	/* special INADDR_ANY treatment */
500 	if (strcmp(buf, "*") == 0 || strcmp(buf, "0") == 0) {
501 		sin.sin_addr.s_addr = INADDR_ANY;
502 	} else {
503 		switch (inet_pton(AF_INET, buf, &sin.sin_addr)) {
504 		case 1:
505 			break;
506 		case 0:
507 			fprintf(stderr, "rump_sp_tcp: cannot parse %s\n", buf);
508 			return EINVAL;
509 		case -1:
510 			fprintf(stderr, "rump_sp_tcp: inet_pton failed\n");
511 			return errno;
512 		default:
513 			assert(/*CONSTCOND*/0);
514 			return EINVAL;
515 		}
516 	}
517 
518 	if (!allow_wildcard && sin.sin_addr.s_addr == INADDR_ANY) {
519 		fprintf(stderr, "rump_sp_tcp: client needs !INADDR_ANY\n");
520 		return EINVAL;
521 	}
522 
523 	/* advance to port number & parse */
524 	p++;
525 	l = strspn(p, "0123456789");
526 	if (l == 0) {
527 		fprintf(stderr, "rump_sp_tcp: port now found: %s\n", p);
528 		return EINVAL;
529 	}
530 	strncpy(buf, p, l);
531 	buf[l] = '\0';
532 
533 	if (*(p+l) != '/' && *(p+l) != '\0') {
534 		fprintf(stderr, "rump_sp_tcp: junk at end of port: %s\n", addr);
535 		return EINVAL;
536 	}
537 
538 	port = atoi(buf);
539 	if (port < 0 || port >= (1<<(8*sizeof(in_port_t)))) {
540 		fprintf(stderr, "rump_sp_tcp: port %d out of range\n", port);
541 		return ERANGE;
542 	}
543 	sin.sin_port = htons(port);
544 
545 	*sa = malloc(sizeof(sin));
546 	if (*sa == NULL)
547 		return errno;
548 	memcpy(*sa, &sin, sizeof(sin));
549 	return 0;
550 }
551 
552 static int
553 tcp_connecthook(int s)
554 {
555 	int x;
556 
557 	x = 1;
558 	host_setsockopt(s, IPPROTO_TCP, TCP_NODELAY, &x, sizeof(x));
559 
560 	return 0;
561 }
562 
563 static char parsedurl[256];
564 
565 /*ARGSUSED*/
566 static int
567 unix_parse(const char *addr, struct sockaddr **sa, int allow_wildcard)
568 {
569 	struct sockaddr_un sun;
570 	size_t slen;
571 	int savepath = 0;
572 
573 	if (strlen(addr) > sizeof(sun.sun_path))
574 		return ENAMETOOLONG;
575 
576 	/*
577 	 * The pathname can be all kinds of spaghetti elementals,
578 	 * so meek and obidient we accept everything.  However, use
579 	 * full path for easy cleanup in case someone gives a relative
580 	 * one and the server does a chdir() between now than the
581 	 * cleanup.
582 	 */
583 	memset(&sun, 0, sizeof(sun));
584 	sun.sun_family = AF_LOCAL;
585 	if (*addr != '/') {
586 		char mywd[PATH_MAX];
587 
588 		if (getcwd(mywd, sizeof(mywd)) == NULL) {
589 			fprintf(stderr, "warning: cannot determine cwd, "
590 			    "omitting socket cleanup\n");
591 		} else {
592 			if (strlen(addr) + strlen(mywd) > sizeof(sun.sun_path))
593 				return ENAMETOOLONG;
594 			strlcpy(sun.sun_path, mywd, sizeof(sun.sun_path));
595 			strlcat(sun.sun_path, "/", sizeof(sun.sun_path));
596 			savepath = 1;
597 		}
598 	}
599 	strlcat(sun.sun_path, addr, sizeof(sun.sun_path));
600 	sun.sun_len = SUN_LEN(&sun);
601 	slen = sun.sun_len+1; /* get the 0 too */
602 
603 	if (savepath && *parsedurl == '\0') {
604 		snprintf(parsedurl, sizeof(parsedurl),
605 		    "unix://%s", sun.sun_path);
606 	}
607 
608 	*sa = malloc(slen);
609 	if (*sa == NULL)
610 		return errno;
611 	memcpy(*sa, &sun, slen);
612 
613 	return 0;
614 }
615 
616 static void
617 unix_cleanup(struct sockaddr *sa)
618 {
619 	struct sockaddr_un *sun = (void *)sa;
620 
621 	/*
622 	 * cleanup only absolute paths.  see unix_parse() above
623 	 */
624 	if (*sun->sun_path == '/') {
625 		unlink(sun->sun_path);
626 	}
627 }
628 
629 /*ARGSUSED*/
630 static int
631 notsupp(void)
632 {
633 
634 	fprintf(stderr, "rump_sp: support not yet implemented\n");
635 	return EOPNOTSUPP;
636 }
637 
638 static int
639 success(void)
640 {
641 
642 	return 0;
643 }
644 
645 struct {
646 	const char *id;
647 	int domain;
648 	addrparse_fn ap;
649 	connecthook_fn connhook;
650 	cleanup_fn cleanup;
651 } parsetab[] = {
652 	{ "tcp", PF_INET, tcp_parse, tcp_connecthook, (cleanup_fn)success },
653 	{ "unix", PF_LOCAL, unix_parse, (connecthook_fn)success, unix_cleanup },
654 	{ "tcp6", PF_INET6, (addrparse_fn)notsupp, (connecthook_fn)success,
655 			    (cleanup_fn)success },
656 };
657 #define NPARSE (sizeof(parsetab)/sizeof(parsetab[0]))
658 
659 static int
660 parseurl(const char *url, struct sockaddr **sap, unsigned *idxp,
661 	int allow_wildcard)
662 {
663 	char id[16];
664 	const char *p, *p2;
665 	size_t l;
666 	unsigned i;
667 	int error;
668 
669 	/*
670 	 * Parse the url
671 	 */
672 
673 	p = url;
674 	p2 = strstr(p, "://");
675 	if (!p2) {
676 		fprintf(stderr, "rump_sp: invalid locator ``%s''\n", p);
677 		return EINVAL;
678 	}
679 	l = p2-p;
680 	if (l > sizeof(id)-1) {
681 		fprintf(stderr, "rump_sp: identifier too long in ``%s''\n", p);
682 		return EINVAL;
683 	}
684 
685 	strncpy(id, p, l);
686 	id[l] = '\0';
687 	p2 += 3; /* beginning of address */
688 
689 	for (i = 0; i < NPARSE; i++) {
690 		if (strcmp(id, parsetab[i].id) == 0) {
691 			error = parsetab[i].ap(p2, sap, allow_wildcard);
692 			if (error)
693 				return error;
694 			break;
695 		}
696 	}
697 	if (i == NPARSE) {
698 		fprintf(stderr, "rump_sp: invalid identifier ``%s''\n", p);
699 		return EINVAL;
700 	}
701 
702 	*idxp = i;
703 	return 0;
704 }
705