xref: /netbsd-src/lib/librumpuser/sp_common.c (revision 4817a0b0b8fe9612e8ebe21a9bf2d97b95038a97)
1 /*      $NetBSD: sp_common.c,v 1.17 2010/12/16 17:05:44 pooka Exp $	*/
2 
3 /*
4  * Copyright (c) 2010 Antti Kantee.  All Rights Reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
16  * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18  * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27 
28 /*
29  * Common client/server sysproxy routines.  #included.
30  */
31 
32 #include <sys/cdefs.h>
33 
34 #include <sys/types.h>
35 #include <sys/mman.h>
36 #include <sys/queue.h>
37 #include <sys/socket.h>
38 #include <sys/un.h>
39 #include <sys/syslimits.h>
40 
41 #include <arpa/inet.h>
42 #include <netinet/in.h>
43 #include <netinet/tcp.h>
44 
45 #include <assert.h>
46 #include <errno.h>
47 #include <fcntl.h>
48 #include <inttypes.h>
49 #include <poll.h>
50 #include <pthread.h>
51 #include <stdarg.h>
52 #include <stddef.h>
53 #include <stdio.h>
54 #include <stdlib.h>
55 #include <string.h>
56 #include <unistd.h>
57 
58 //#define DEBUG
59 #ifdef DEBUG
60 #define DPRINTF(x) mydprintf x
61 static void
62 mydprintf(const char *fmt, ...)
63 {
64 	va_list ap;
65 
66 	va_start(ap, fmt);
67 	vfprintf(stderr, fmt, ap);
68 	va_end(ap);
69 }
70 #else
71 #define DPRINTF(x)
72 #endif
73 
74 /*
75  * Bah, I hate writing on-off-wire conversions in C
76  */
77 
78 enum { RUMPSP_REQ, RUMPSP_RESP, RUMPSP_ERROR };
79 enum {	RUMPSP_HANDSHAKE,
80 	RUMPSP_SYSCALL,
81 	RUMPSP_COPYIN, RUMPSP_COPYINSTR,
82 	RUMPSP_COPYOUT, RUMPSP_COPYOUTSTR,
83 	RUMPSP_ANONMMAP };
84 
85 enum { HANDSHAKE_GUEST, HANDSHAKE_AUTH }; /* more to come */
86 
87 struct rsp_hdr {
88 	uint64_t rsp_len;
89 	uint64_t rsp_reqno;
90 	uint16_t rsp_class;
91 	uint16_t rsp_type;
92 	/*
93 	 * We want this structure 64bit-aligned for typecast fun,
94 	 * so might as well use the following for something.
95 	 */
96 	union {
97 		uint32_t sysnum;
98 		uint32_t error;
99 		uint32_t handshake;
100 	} u;
101 };
102 #define HDRSZ sizeof(struct rsp_hdr)
103 #define rsp_sysnum u.sysnum
104 #define rsp_error u.error
105 #define rsp_handshake u.handshake
106 
107 #define MAXBANNER 96
108 
109 /*
110  * Data follows the header.  We have two types of structured data.
111  */
112 
113 /* copyin/copyout */
114 struct rsp_copydata {
115 	size_t rcp_len;
116 	void *rcp_addr;
117 	uint8_t rcp_data[0];
118 };
119 
120 /* syscall response */
121 struct rsp_sysresp {
122 	int rsys_error;
123 	register_t rsys_retval[2];
124 };
125 
126 struct respwait {
127 	uint64_t rw_reqno;
128 	void *rw_data;
129 	size_t rw_dlen;
130 	int rw_error;
131 
132 	pthread_cond_t rw_cv;
133 
134 	TAILQ_ENTRY(respwait) rw_entries;
135 };
136 
137 struct spclient {
138 	int spc_fd;
139 	int spc_refcnt;
140 	int spc_state;
141 
142 	pthread_mutex_t spc_mtx;
143 	pthread_cond_t spc_cv;
144 
145 	struct lwp *spc_mainlwp;
146 	pid_t spc_pid;
147 
148 	TAILQ_HEAD(, respwait) spc_respwait;
149 
150 	/* rest of the fields are zeroed upon disconnect */
151 #define SPC_ZEROFF offsetof(struct spclient, spc_pfd)
152 	struct pollfd *spc_pfd;
153 
154 	struct rsp_hdr spc_hdr;
155 	uint8_t *spc_buf;
156 	size_t spc_off;
157 
158 	uint64_t spc_nextreq;
159 	int spc_ostatus, spc_istatus;
160 };
161 #define SPCSTATUS_FREE 0
162 #define SPCSTATUS_BUSY 1
163 #define SPCSTATUS_WANTED 2
164 
165 #define SPCSTATE_NEW     0
166 #define SPCSTATE_RUNNING 1
167 #define SPCSTATE_DYING   2
168 
169 typedef int (*addrparse_fn)(const char *, struct sockaddr **, int);
170 typedef int (*connecthook_fn)(int);
171 typedef void (*cleanup_fn)(struct sockaddr *);
172 
173 static int readframe(struct spclient *);
174 static void handlereq(struct spclient *);
175 
176 static __inline void
177 spcresetbuf(struct spclient *spc)
178 {
179 
180 	spc->spc_buf = NULL;
181 	spc->spc_off = 0;
182 }
183 
184 static __inline void
185 spcfreebuf(struct spclient *spc)
186 {
187 
188 	free(spc->spc_buf);
189 	spcresetbuf(spc);
190 }
191 
192 static void
193 sendlockl(struct spclient *spc)
194 {
195 
196 	/* assert(pthread_mutex_owned) */
197 	while (spc->spc_ostatus != SPCSTATUS_FREE) {
198 		spc->spc_ostatus = SPCSTATUS_WANTED;
199 		pthread_cond_wait(&spc->spc_cv, &spc->spc_mtx);
200 	}
201 	spc->spc_ostatus = SPCSTATUS_BUSY;
202 }
203 
204 static void
205 sendlock(struct spclient *spc)
206 {
207 
208 	pthread_mutex_lock(&spc->spc_mtx);
209 	sendlockl(spc);
210 	pthread_mutex_unlock(&spc->spc_mtx);
211 }
212 
213 static void
214 sendunlockl(struct spclient *spc)
215 {
216 
217 	/* assert(pthread_mutex_owned) */
218 	if (spc->spc_ostatus == SPCSTATUS_WANTED)
219 		pthread_cond_broadcast(&spc->spc_cv);
220 	spc->spc_ostatus = SPCSTATUS_FREE;
221 }
222 
223 static void
224 sendunlock(struct spclient *spc)
225 {
226 
227 	pthread_mutex_lock(&spc->spc_mtx);
228 	sendunlockl(spc);
229 	pthread_mutex_unlock(&spc->spc_mtx);
230 }
231 
232 static int
233 dosend(struct spclient *spc, const void *data, size_t dlen)
234 {
235 	struct pollfd pfd;
236 	const uint8_t *sdata = data;
237 	ssize_t n;
238 	size_t sent;
239 	int fd = spc->spc_fd;
240 
241 	pfd.fd = fd;
242 	pfd.events = POLLOUT;
243 
244 	for (sent = 0, n = 0; sent < dlen; ) {
245 		if (n) {
246 			if (poll(&pfd, 1, INFTIM) == -1) {
247 				if (errno == EINTR)
248 					continue;
249 				return errno;
250 			}
251 		}
252 
253 		n = send(fd, sdata + sent, dlen - sent, MSG_NOSIGNAL);
254 		if (n == 0) {
255 			return EFAULT;
256 		}
257 		if (n == -1)  {
258 			if (errno != EAGAIN)
259 				return EFAULT;
260 			continue;
261 		}
262 		sent += n;
263 	}
264 
265 	return 0;
266 }
267 
268 static void
269 putwait(struct spclient *spc, struct respwait *rw, struct rsp_hdr *rhdr)
270 {
271 
272 	rw->rw_data = NULL;
273 	rw->rw_dlen = 0;
274 	pthread_cond_init(&rw->rw_cv, NULL);
275 
276 	pthread_mutex_lock(&spc->spc_mtx);
277 	rw->rw_reqno = rhdr->rsp_reqno = spc->spc_nextreq++;
278 	TAILQ_INSERT_TAIL(&spc->spc_respwait, rw, rw_entries);
279 
280 	sendlockl(spc);
281 }
282 
283 static void
284 unputwait(struct spclient *spc, struct respwait *rw)
285 {
286 
287 	sendunlockl(spc);
288 
289 	TAILQ_REMOVE(&spc->spc_respwait, rw, rw_entries);
290 	pthread_mutex_unlock(&spc->spc_mtx);
291 	pthread_cond_destroy(&rw->rw_cv);
292 }
293 
294 static void
295 kickwaiter(struct spclient *spc)
296 {
297 	struct respwait *rw;
298 	int error;
299 
300 	pthread_mutex_lock(&spc->spc_mtx);
301 	TAILQ_FOREACH(rw, &spc->spc_respwait, rw_entries) {
302 		if (rw->rw_reqno == spc->spc_hdr.rsp_reqno)
303 			break;
304 	}
305 	if (rw == NULL) {
306 		DPRINTF(("no waiter found, invalid reqno %" PRIu64 "?\n",
307 		    spc->spc_hdr.rsp_reqno));
308 		return;
309 	}
310 	DPRINTF(("rump_sp: client %p woke up waiter at %p\n", spc, rw));
311 	rw->rw_data = spc->spc_buf;
312 	rw->rw_dlen = (size_t)(spc->spc_off - HDRSZ);
313 	if (spc->spc_hdr.rsp_class == RUMPSP_ERROR) {
314 		error = rw->rw_error = spc->spc_hdr.rsp_error;
315 	} else {
316 		error = rw->rw_error = 0;
317 	}
318 	pthread_cond_signal(&rw->rw_cv);
319 	pthread_mutex_unlock(&spc->spc_mtx);
320 
321 	if (error)
322 		spcfreebuf(spc);
323 	else
324 		spcresetbuf(spc);
325 }
326 
327 static void
328 kickall(struct spclient *spc)
329 {
330 	struct respwait *rw;
331 
332 	/* DIAGASSERT(mutex_owned(spc_lock)) */
333 	TAILQ_FOREACH(rw, &spc->spc_respwait, rw_entries)
334 		pthread_cond_broadcast(&rw->rw_cv);
335 }
336 
337 static int
338 waitresp(struct spclient *spc, struct respwait *rw)
339 {
340 	struct pollfd pfd;
341 	int rv = 0;
342 
343 	sendunlockl(spc);
344 
345 	rw->rw_error = 0;
346 	while (rw->rw_data == NULL && rw->rw_error == 0
347 	    && spc->spc_state != SPCSTATE_DYING){
348 		/* are we free to receive? */
349 		if (spc->spc_istatus == SPCSTATUS_FREE) {
350 			int gotresp;
351 
352 			spc->spc_istatus = SPCSTATUS_BUSY;
353 			pthread_mutex_unlock(&spc->spc_mtx);
354 
355 			pfd.fd = spc->spc_fd;
356 			pfd.events = POLLIN;
357 
358 			for (gotresp = 0; !gotresp; ) {
359 				switch (readframe(spc)) {
360 				case 0:
361 					poll(&pfd, 1, INFTIM);
362 					continue;
363 				case -1:
364 					rv = errno;
365 					spc->spc_state = SPCSTATE_DYING;
366 					goto cleanup;
367 				default:
368 					break;
369 				}
370 
371 				switch (spc->spc_hdr.rsp_class) {
372 				case RUMPSP_RESP:
373 				case RUMPSP_ERROR:
374 					kickwaiter(spc);
375 					gotresp = spc->spc_hdr.rsp_reqno ==
376 					    rw->rw_reqno;
377 					break;
378 				case RUMPSP_REQ:
379 					handlereq(spc);
380 					break;
381 				default:
382 					/* panic */
383 					break;
384 				}
385 			}
386  cleanup:
387 			pthread_mutex_lock(&spc->spc_mtx);
388 			if (spc->spc_istatus == SPCSTATUS_WANTED)
389 				kickall(spc);
390 			spc->spc_istatus = SPCSTATUS_FREE;
391 		} else {
392 			spc->spc_istatus = SPCSTATUS_WANTED;
393 			pthread_cond_wait(&rw->rw_cv, &spc->spc_mtx);
394 		}
395 	}
396 
397 	TAILQ_REMOVE(&spc->spc_respwait, rw, rw_entries);
398 	pthread_mutex_unlock(&spc->spc_mtx);
399 
400 	pthread_cond_destroy(&rw->rw_cv);
401 
402 	if (rv)
403 		return rv;
404 	if (spc->spc_state == SPCSTATE_DYING)
405 		return ENOTCONN;
406 	return rw->rw_error;
407 }
408 
409 static int
410 readframe(struct spclient *spc)
411 {
412 	int fd = spc->spc_fd;
413 	size_t left;
414 	size_t framelen;
415 	ssize_t n;
416 
417 	/* still reading header? */
418 	if (spc->spc_off < HDRSZ) {
419 		DPRINTF(("rump_sp: readframe getting header at offset %zu\n",
420 		    spc->spc_off));
421 
422 		left = HDRSZ - spc->spc_off;
423 		/*LINTED: cast ok */
424 		n = read(fd, (uint8_t *)&spc->spc_hdr + spc->spc_off, left);
425 		if (n == 0) {
426 			return -1;
427 		}
428 		if (n == -1) {
429 			if (errno == EAGAIN)
430 				return 0;
431 			return -1;
432 		}
433 
434 		spc->spc_off += n;
435 		if (spc->spc_off < HDRSZ)
436 			return -1;
437 
438 		/*LINTED*/
439 		framelen = spc->spc_hdr.rsp_len;
440 
441 		if (framelen < HDRSZ) {
442 			return -1;
443 		} else if (framelen == HDRSZ) {
444 			return 1;
445 		}
446 
447 		spc->spc_buf = malloc(framelen - HDRSZ);
448 		if (spc->spc_buf == NULL) {
449 			return -1;
450 		}
451 		memset(spc->spc_buf, 0, framelen - HDRSZ);
452 
453 		/* "fallthrough" */
454 	} else {
455 		/*LINTED*/
456 		framelen = spc->spc_hdr.rsp_len;
457 	}
458 
459 	left = framelen - spc->spc_off;
460 
461 	DPRINTF(("rump_sp: readframe getting body at offset %zu, left %zu\n",
462 	    spc->spc_off, left));
463 
464 	if (left == 0)
465 		return 1;
466 	n = read(fd, spc->spc_buf + (spc->spc_off - HDRSZ), left);
467 	if (n == 0) {
468 		return -1;
469 	}
470 	if (n == -1) {
471 		if (errno == EAGAIN)
472 			return 0;
473 		return -1;
474 	}
475 	spc->spc_off += n;
476 	left -= n;
477 
478 	/* got everything? */
479 	if (left == 0)
480 		return 1;
481 	else
482 		return 0;
483 }
484 
485 static int
486 tcp_parse(const char *addr, struct sockaddr **sa, int allow_wildcard)
487 {
488 	struct sockaddr_in sin;
489 	char buf[64];
490 	const char *p;
491 	size_t l;
492 	int port;
493 
494 	memset(&sin, 0, sizeof(sin));
495 	sin.sin_len = sizeof(sin);
496 	sin.sin_family = AF_INET;
497 
498 	p = strchr(addr, ':');
499 	if (!p) {
500 		fprintf(stderr, "rump_sp_tcp: missing port specifier\n");
501 		return EINVAL;
502 	}
503 
504 	l = p - addr;
505 	if (l > sizeof(buf)-1) {
506 		fprintf(stderr, "rump_sp_tcp: address too long\n");
507 		return EINVAL;
508 	}
509 	strncpy(buf, addr, l);
510 	buf[l] = '\0';
511 
512 	/* special INADDR_ANY treatment */
513 	if (strcmp(buf, "*") == 0 || strcmp(buf, "0") == 0) {
514 		sin.sin_addr.s_addr = INADDR_ANY;
515 	} else {
516 		switch (inet_pton(AF_INET, buf, &sin.sin_addr)) {
517 		case 1:
518 			break;
519 		case 0:
520 			fprintf(stderr, "rump_sp_tcp: cannot parse %s\n", buf);
521 			return EINVAL;
522 		case -1:
523 			fprintf(stderr, "rump_sp_tcp: inet_pton failed\n");
524 			return errno;
525 		default:
526 			assert(/*CONSTCOND*/0);
527 			return EINVAL;
528 		}
529 	}
530 
531 	if (!allow_wildcard && sin.sin_addr.s_addr == INADDR_ANY) {
532 		fprintf(stderr, "rump_sp_tcp: client needs !INADDR_ANY\n");
533 		return EINVAL;
534 	}
535 
536 	/* advance to port number & parse */
537 	p++;
538 	l = strspn(p, "0123456789");
539 	if (l == 0) {
540 		fprintf(stderr, "rump_sp_tcp: port now found: %s\n", p);
541 		return EINVAL;
542 	}
543 	strncpy(buf, p, l);
544 	buf[l] = '\0';
545 
546 	if (*(p+l) != '/' && *(p+l) != '\0') {
547 		fprintf(stderr, "rump_sp_tcp: junk at end of port: %s\n", addr);
548 		return EINVAL;
549 	}
550 
551 	port = atoi(buf);
552 	if (port < 0 || port >= (1<<(8*sizeof(in_port_t)))) {
553 		fprintf(stderr, "rump_sp_tcp: port %d out of range\n", port);
554 		return ERANGE;
555 	}
556 	sin.sin_port = htons(port);
557 
558 	*sa = malloc(sizeof(sin));
559 	if (*sa == NULL)
560 		return errno;
561 	memcpy(*sa, &sin, sizeof(sin));
562 	return 0;
563 }
564 
565 static int
566 tcp_connecthook(int s)
567 {
568 	int x;
569 
570 	x = 1;
571 	setsockopt(s, IPPROTO_TCP, TCP_NODELAY, &x, sizeof(x));
572 
573 	return 0;
574 }
575 
576 /*ARGSUSED*/
577 static int
578 unix_parse(const char *addr, struct sockaddr **sa, int allow_wildcard)
579 {
580 	struct sockaddr_un sun;
581 	size_t slen;
582 
583 	if (strlen(addr) > sizeof(sun.sun_path))
584 		return ENAMETOOLONG;
585 
586 	/*
587 	 * The pathname can be all kinds of spaghetti elementals,
588 	 * so meek and obidient we accept everything.  However, use
589 	 * full path for easy cleanup in case someone gives a relative
590 	 * one and the server does a chdir() between now than the
591 	 * cleanup.
592 	 */
593 	memset(&sun, 0, sizeof(sun));
594 	sun.sun_family = AF_LOCAL;
595 	if (*addr != '/') {
596 		char mywd[PATH_MAX];
597 
598 		if (getcwd(mywd, sizeof(mywd)) == NULL) {
599 			fprintf(stderr, "warning: cannot determine cwd, "
600 			    "omitting socket cleanup\n");
601 		} else {
602 			if (strlen(addr) + strlen(mywd) > sizeof(sun.sun_path))
603 				return ENAMETOOLONG;
604 			strlcpy(sun.sun_path, mywd, sizeof(sun.sun_path));
605 			strlcat(sun.sun_path, "/", sizeof(sun.sun_path));
606 		}
607 	}
608 	strlcat(sun.sun_path, addr, sizeof(sun.sun_path));
609 	sun.sun_len = SUN_LEN(&sun);
610 	slen = sun.sun_len+1; /* get the 0 too */
611 
612 	*sa = malloc(slen);
613 	if (*sa == NULL)
614 		return errno;
615 	memcpy(*sa, &sun, slen);
616 
617 	return 0;
618 }
619 
620 static void
621 unix_cleanup(struct sockaddr *sa)
622 {
623 	struct sockaddr_un *sun = (void *)sa;
624 
625 	/*
626 	 * cleanup only absolute paths.  see unix_parse() above
627 	 */
628 	if (*sun->sun_path == '/') {
629 		unlink(sun->sun_path);
630 	}
631 }
632 
633 /*ARGSUSED*/
634 static int
635 notsupp(void)
636 {
637 
638 	fprintf(stderr, "rump_sp: support not yet implemented\n");
639 	return EOPNOTSUPP;
640 }
641 
642 static int
643 success(void)
644 {
645 
646 	return 0;
647 }
648 
649 struct {
650 	const char *id;
651 	int domain;
652 	addrparse_fn ap;
653 	connecthook_fn connhook;
654 	cleanup_fn cleanup;
655 } parsetab[] = {
656 	{ "tcp", PF_INET, tcp_parse, tcp_connecthook, (cleanup_fn)success },
657 	{ "unix", PF_LOCAL, unix_parse, (connecthook_fn)success, unix_cleanup },
658 	{ "tcp6", PF_INET6, (addrparse_fn)notsupp, (connecthook_fn)success,
659 			    (cleanup_fn)success },
660 };
661 #define NPARSE (sizeof(parsetab)/sizeof(parsetab[0]))
662 
663 static int
664 parseurl(const char *url, struct sockaddr **sap, unsigned *idxp,
665 	int allow_wildcard)
666 {
667 	char id[16];
668 	const char *p, *p2;
669 	size_t l;
670 	unsigned i;
671 	int error;
672 
673 	/*
674 	 * Parse the url
675 	 */
676 
677 	p = url;
678 	p2 = strstr(p, "://");
679 	if (!p2) {
680 		fprintf(stderr, "rump_sp: invalid locator ``%s''\n", p);
681 		return EINVAL;
682 	}
683 	l = p2-p;
684 	if (l > sizeof(id)-1) {
685 		fprintf(stderr, "rump_sp: identifier too long in ``%s''\n", p);
686 		return EINVAL;
687 	}
688 
689 	strncpy(id, p, l);
690 	id[l] = '\0';
691 	p2 += 3; /* beginning of address */
692 
693 	for (i = 0; i < NPARSE; i++) {
694 		if (strcmp(id, parsetab[i].id) == 0) {
695 			error = parsetab[i].ap(p2, sap, allow_wildcard);
696 			if (error)
697 				return error;
698 			break;
699 		}
700 	}
701 	if (i == NPARSE) {
702 		fprintf(stderr, "rump_sp: invalid identifier ``%s''\n", p);
703 		return EINVAL;
704 	}
705 
706 	*idxp = i;
707 	return 0;
708 }
709