xref: /netbsd-src/lib/librumpclient/rumpclient.c (revision 56bb44cae5b13a6b74792381ba1e6d930b26aa67)
1 /*      $NetBSD: rumpclient.c,v 1.38 2011/02/27 12:58:29 pooka Exp $	*/
2 
3 /*
4  * Copyright (c) 2010, 2011 Antti Kantee.  All Rights Reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
16  * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18  * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27 
28 /*
29  * Client side routines for rump syscall proxy.
30  */
31 
32 #include <sys/cdefs.h>
33 __RCSID("$NetBSD: rumpclient.c,v 1.38 2011/02/27 12:58:29 pooka Exp $");
34 
35 #include <sys/param.h>
36 #include <sys/event.h>
37 #include <sys/mman.h>
38 #include <sys/socket.h>
39 
40 #include <arpa/inet.h>
41 #include <netinet/in.h>
42 #include <netinet/tcp.h>
43 
44 #include <assert.h>
45 #include <dlfcn.h>
46 #include <err.h>
47 #include <errno.h>
48 #include <fcntl.h>
49 #include <link.h>
50 #include <poll.h>
51 #include <pthread.h>
52 #include <signal.h>
53 #include <stdarg.h>
54 #include <stdbool.h>
55 #include <stdio.h>
56 #include <stdlib.h>
57 #include <string.h>
58 #include <unistd.h>
59 
60 #include <rump/rumpclient.h>
61 
62 #define HOSTOPS
63 int	(*host_socket)(int, int, int);
64 int	(*host_close)(int);
65 int	(*host_connect)(int, const struct sockaddr *, socklen_t);
66 int	(*host_fcntl)(int, int, ...);
67 int	(*host_poll)(struct pollfd *, nfds_t, int);
68 ssize_t	(*host_read)(int, void *, size_t);
69 ssize_t (*host_sendto)(int, const void *, size_t, int,
70 		       const struct sockaddr *, socklen_t);
71 int	(*host_setsockopt)(int, int, int, const void *, socklen_t);
72 int	(*host_dup)(int);
73 
74 int	(*host_kqueue)(void);
75 int	(*host_kevent)(int, const struct kevent *, size_t,
76 		       struct kevent *, size_t, const struct timespec *);
77 
78 int	(*host_execve)(const char *, char *const[], char *const[]);
79 
80 #include "sp_common.c"
81 
82 static struct spclient clispc = {
83 	.spc_fd = -1,
84 };
85 
86 static int kq = -1;
87 static sigset_t fullset;
88 
89 static int doconnect(bool);
90 static int handshake_req(struct spclient *, int, void *, int, bool);
91 
92 /*
93  * Default: don't retry.  Most clients can't handle it
94  * (consider e.g. fds suddenly going missing).
95  */
96 static time_t retrytimo = 0;
97 
98 static int
99 send_with_recon(struct spclient *spc, const void *data, size_t dlen)
100 {
101 	struct timeval starttime, curtime;
102 	time_t prevreconmsg;
103 	unsigned reconretries;
104 	int rv;
105 
106 	for (prevreconmsg = 0, reconretries = 0;;) {
107 		rv = dosend(spc, data, dlen);
108 		if (__predict_false(rv == ENOTCONN || rv == EBADF)) {
109 			/* no persistent connections */
110 			if (retrytimo == 0) {
111 				rv = ENOTCONN;
112 				break;
113 			}
114 			if (retrytimo == RUMPCLIENT_RETRYCONN_DIE)
115 				exit(1);
116 
117 			if (!prevreconmsg) {
118 				prevreconmsg = time(NULL);
119 				gettimeofday(&starttime, NULL);
120 			}
121 			if (reconretries == 1) {
122 				if (retrytimo == RUMPCLIENT_RETRYCONN_ONCE) {
123 					rv = ENOTCONN;
124 					break;
125 				}
126 				fprintf(stderr, "rump_sp: connection to "
127 				    "kernel lost, trying to reconnect ...\n");
128 			} else if (time(NULL) - prevreconmsg > 120) {
129 				fprintf(stderr, "rump_sp: still trying to "
130 				    "reconnect ...\n");
131 				prevreconmsg = time(NULL);
132 			}
133 
134 			/* check that we aren't over the limit */
135 			if (retrytimo > 0) {
136 				struct timeval tmp;
137 
138 				gettimeofday(&curtime, NULL);
139 				timersub(&curtime, &starttime, &tmp);
140 				if (tmp.tv_sec >= retrytimo) {
141 					fprintf(stderr, "rump_sp: reconnect "
142 					    "failed, %lld second timeout\n",
143 					    (long long)retrytimo);
144 					return ENOTCONN;
145 				}
146 			}
147 
148 			/* adhoc backoff timer */
149 			if (reconretries < 10) {
150 				usleep(100000 * reconretries);
151 			} else {
152 				sleep(MIN(10, reconretries-9));
153 			}
154 			reconretries++;
155 
156 			if ((rv = doconnect(false)) != 0)
157 				continue;
158 			if ((rv = handshake_req(&clispc, HANDSHAKE_GUEST,
159 			    NULL, 0, true)) != 0)
160 				continue;
161 
162 			/*
163 			 * ok, reconnect succesful.  we need to return to
164 			 * the upper layer to get the entire PDU resent.
165 			 */
166 			if (reconretries != 1)
167 				fprintf(stderr, "rump_sp: reconnected!\n");
168 			rv = EAGAIN;
169 			break;
170 		} else {
171 			_DIAGASSERT(errno != EAGAIN);
172 			break;
173 		}
174 	}
175 
176 	return rv;
177 }
178 
179 static int
180 cliwaitresp(struct spclient *spc, struct respwait *rw, sigset_t *mask,
181 	bool keeplock)
182 {
183 	uint64_t mygen;
184 	bool imalive = true;
185 
186 	pthread_mutex_lock(&spc->spc_mtx);
187 	if (!keeplock)
188 		sendunlockl(spc);
189 	mygen = spc->spc_generation;
190 
191 	rw->rw_error = 0;
192 	while (!rw->rw_done && rw->rw_error == 0) {
193 		if (__predict_false(spc->spc_generation != mygen || !imalive))
194 			break;
195 
196 		/* are we free to receive? */
197 		if (spc->spc_istatus == SPCSTATUS_FREE) {
198 			struct kevent kev[8];
199 			int gotresp, dosig, rv, i;
200 
201 			spc->spc_istatus = SPCSTATUS_BUSY;
202 			pthread_mutex_unlock(&spc->spc_mtx);
203 
204 			dosig = 0;
205 			for (gotresp = 0; !gotresp; ) {
206 				switch (readframe(spc)) {
207 				case 0:
208 					rv = host_kevent(kq, NULL, 0,
209 					    kev, __arraycount(kev), NULL);
210 
211 					if (__predict_false(rv == -1)) {
212 						goto cleanup;
213 					}
214 
215 					/*
216 					 * XXX: don't know how this can
217 					 * happen (timeout cannot expire
218 					 * since there isn't one), but
219 					 * it does happen
220 					 */
221 					if (__predict_false(rv == 0))
222 						continue;
223 
224 					for (i = 0; i < rv; i++) {
225 						if (kev[i].filter
226 						    == EVFILT_SIGNAL)
227 							dosig++;
228 					}
229 					if (dosig)
230 						goto cleanup;
231 
232 					continue;
233 				case -1:
234 					imalive = false;
235 					goto cleanup;
236 				default:
237 					break;
238 				}
239 
240 				switch (spc->spc_hdr.rsp_class) {
241 				case RUMPSP_RESP:
242 				case RUMPSP_ERROR:
243 					kickwaiter(spc);
244 					gotresp = spc->spc_hdr.rsp_reqno ==
245 					    rw->rw_reqno;
246 					break;
247 				case RUMPSP_REQ:
248 					handlereq(spc);
249 					break;
250 				default:
251 					/* panic */
252 					break;
253 				}
254 			}
255 
256  cleanup:
257 			pthread_mutex_lock(&spc->spc_mtx);
258 			if (spc->spc_istatus == SPCSTATUS_WANTED)
259 				kickall(spc);
260 			spc->spc_istatus = SPCSTATUS_FREE;
261 
262 			/* take one for the team */
263 			if (dosig) {
264 				pthread_mutex_unlock(&spc->spc_mtx);
265 				pthread_sigmask(SIG_SETMASK, mask, NULL);
266 				pthread_sigmask(SIG_SETMASK, &fullset, NULL);
267 				pthread_mutex_lock(&spc->spc_mtx);
268 			}
269 		} else {
270 			spc->spc_istatus = SPCSTATUS_WANTED;
271 			pthread_cond_wait(&rw->rw_cv, &spc->spc_mtx);
272 		}
273 	}
274 	TAILQ_REMOVE(&spc->spc_respwait, rw, rw_entries);
275 	pthread_mutex_unlock(&spc->spc_mtx);
276 	pthread_cond_destroy(&rw->rw_cv);
277 
278 	if (spc->spc_generation != mygen || !imalive) {
279 		return ENOTCONN;
280 	}
281 	return rw->rw_error;
282 }
283 
284 static int
285 syscall_req(struct spclient *spc, sigset_t *omask, int sysnum,
286 	const void *data, size_t dlen, void **resp)
287 {
288 	struct rsp_hdr rhdr;
289 	struct respwait rw;
290 	int rv;
291 
292 	rhdr.rsp_len = sizeof(rhdr) + dlen;
293 	rhdr.rsp_class = RUMPSP_REQ;
294 	rhdr.rsp_type = RUMPSP_SYSCALL;
295 	rhdr.rsp_sysnum = sysnum;
296 
297 	do {
298 		putwait(spc, &rw, &rhdr);
299 		if ((rv = send_with_recon(spc, &rhdr, sizeof(rhdr))) != 0) {
300 			unputwait(spc, &rw);
301 			continue;
302 		}
303 		if ((rv = send_with_recon(spc, data, dlen)) != 0) {
304 			unputwait(spc, &rw);
305 			continue;
306 		}
307 
308 		rv = cliwaitresp(spc, &rw, omask, false);
309 		if (rv == ENOTCONN)
310 			rv = EAGAIN;
311 	} while (rv == EAGAIN);
312 
313 	*resp = rw.rw_data;
314 	return rv;
315 }
316 
317 static int
318 handshake_req(struct spclient *spc, int type, void *data,
319 	int cancel, bool haslock)
320 {
321 	struct handshake_fork rf;
322 	struct rsp_hdr rhdr;
323 	struct respwait rw;
324 	sigset_t omask;
325 	size_t bonus;
326 	int rv;
327 
328 	if (type == HANDSHAKE_FORK) {
329 		bonus = sizeof(rf);
330 	} else {
331 		bonus = strlen(getprogname())+1;
332 	}
333 
334 	/* performs server handshake */
335 	rhdr.rsp_len = sizeof(rhdr) + bonus;
336 	rhdr.rsp_class = RUMPSP_REQ;
337 	rhdr.rsp_type = RUMPSP_HANDSHAKE;
338 	rhdr.rsp_handshake = type;
339 
340 	pthread_sigmask(SIG_SETMASK, &fullset, &omask);
341 	if (haslock)
342 		putwait_locked(spc, &rw, &rhdr);
343 	else
344 		putwait(spc, &rw, &rhdr);
345 	rv = dosend(spc, &rhdr, sizeof(rhdr));
346 	if (type == HANDSHAKE_FORK) {
347 		memcpy(rf.rf_auth, data, sizeof(rf.rf_auth)); /* uh, why? */
348 		rf.rf_cancel = cancel;
349 		rv = send_with_recon(spc, &rf, sizeof(rf));
350 	} else {
351 		rv = dosend(spc, getprogname(), strlen(getprogname())+1);
352 	}
353 	if (rv || cancel) {
354 		if (haslock)
355 			unputwait_locked(spc, &rw);
356 		else
357 			unputwait(spc, &rw);
358 		if (cancel) {
359 			goto out;
360 		}
361 	} else {
362 		rv = cliwaitresp(spc, &rw, &omask, haslock);
363 	}
364 	if (rv)
365 		goto out;
366 
367 	rv = *(int *)rw.rw_data;
368 	free(rw.rw_data);
369 
370  out:
371 	pthread_sigmask(SIG_SETMASK, &omask, NULL);
372 	return rv;
373 }
374 
375 static int
376 prefork_req(struct spclient *spc, sigset_t *omask, void **resp)
377 {
378 	struct rsp_hdr rhdr;
379 	struct respwait rw;
380 	int rv;
381 
382 	rhdr.rsp_len = sizeof(rhdr);
383 	rhdr.rsp_class = RUMPSP_REQ;
384 	rhdr.rsp_type = RUMPSP_PREFORK;
385 	rhdr.rsp_error = 0;
386 
387 	do {
388 		putwait(spc, &rw, &rhdr);
389 		rv = send_with_recon(spc, &rhdr, sizeof(rhdr));
390 		if (rv != 0) {
391 			unputwait(spc, &rw);
392 			continue;
393 		}
394 
395 		rv = cliwaitresp(spc, &rw, omask, false);
396 		if (rv == ENOTCONN)
397 			rv = EAGAIN;
398 	} while (rv == EAGAIN);
399 
400 	*resp = rw.rw_data;
401 	return rv;
402 }
403 
404 /*
405  * prevent response code from deadlocking with reconnect code
406  */
407 static int
408 resp_sendlock(struct spclient *spc)
409 {
410 	int rv = 0;
411 
412 	pthread_mutex_lock(&spc->spc_mtx);
413 	while (spc->spc_ostatus != SPCSTATUS_FREE) {
414 		if (__predict_false(spc->spc_reconnecting)) {
415 			rv = EBUSY;
416 			goto out;
417 		}
418 		spc->spc_ostatus = SPCSTATUS_WANTED;
419 		pthread_cond_wait(&spc->spc_cv, &spc->spc_mtx);
420 	}
421 	spc->spc_ostatus = SPCSTATUS_BUSY;
422 
423  out:
424 	pthread_mutex_unlock(&spc->spc_mtx);
425 	return rv;
426 }
427 
428 static void
429 send_copyin_resp(struct spclient *spc, uint64_t reqno, void *data, size_t dlen,
430 	int wantstr)
431 {
432 	struct rsp_hdr rhdr;
433 
434 	if (wantstr)
435 		dlen = MIN(dlen, strlen(data)+1);
436 
437 	rhdr.rsp_len = sizeof(rhdr) + dlen;
438 	rhdr.rsp_reqno = reqno;
439 	rhdr.rsp_class = RUMPSP_RESP;
440 	rhdr.rsp_type = RUMPSP_COPYIN;
441 	rhdr.rsp_sysnum = 0;
442 
443 	if (resp_sendlock(spc) != 0)
444 		return;
445 	(void)dosend(spc, &rhdr, sizeof(rhdr));
446 	(void)dosend(spc, data, dlen);
447 	sendunlock(spc);
448 }
449 
450 static void
451 send_anonmmap_resp(struct spclient *spc, uint64_t reqno, void *addr)
452 {
453 	struct rsp_hdr rhdr;
454 
455 	rhdr.rsp_len = sizeof(rhdr) + sizeof(addr);
456 	rhdr.rsp_reqno = reqno;
457 	rhdr.rsp_class = RUMPSP_RESP;
458 	rhdr.rsp_type = RUMPSP_ANONMMAP;
459 	rhdr.rsp_sysnum = 0;
460 
461 	if (resp_sendlock(spc) != 0)
462 		return;
463 	(void)dosend(spc, &rhdr, sizeof(rhdr));
464 	(void)dosend(spc, &addr, sizeof(addr));
465 	sendunlock(spc);
466 }
467 
468 int
469 rumpclient_syscall(int sysnum, const void *data, size_t dlen,
470 	register_t *retval)
471 {
472 	struct rsp_sysresp *resp;
473 	sigset_t omask;
474 	void *rdata;
475 	int rv;
476 
477 	pthread_sigmask(SIG_SETMASK, &fullset, &omask);
478 
479 	DPRINTF(("rumpsp syscall_req: syscall %d with %p/%zu\n",
480 	    sysnum, data, dlen));
481 
482 	rv = syscall_req(&clispc, &omask, sysnum, data, dlen, &rdata);
483 	if (rv)
484 		goto out;
485 
486 	resp = rdata;
487 	DPRINTF(("rumpsp syscall_resp: syscall %d error %d, rv: %d/%d\n",
488 	    sysnum, rv, resp->rsys_retval[0], resp->rsys_retval[1]));
489 
490 	memcpy(retval, &resp->rsys_retval, sizeof(resp->rsys_retval));
491 	rv = resp->rsys_error;
492 	free(rdata);
493 
494  out:
495 	pthread_sigmask(SIG_SETMASK, &omask, NULL);
496 	return rv;
497 }
498 
499 static void
500 handlereq(struct spclient *spc)
501 {
502 	struct rsp_copydata *copydata;
503 	struct rsp_hdr *rhdr = &spc->spc_hdr;
504 	void *mapaddr;
505 	size_t maplen;
506 	int reqtype = spc->spc_hdr.rsp_type;
507 
508 	switch (reqtype) {
509 	case RUMPSP_COPYIN:
510 	case RUMPSP_COPYINSTR:
511 		/*LINTED*/
512 		copydata = (struct rsp_copydata *)spc->spc_buf;
513 		DPRINTF(("rump_sp handlereq: copyin request: %p/%zu\n",
514 		    copydata->rcp_addr, copydata->rcp_len));
515 		send_copyin_resp(spc, spc->spc_hdr.rsp_reqno,
516 		    copydata->rcp_addr, copydata->rcp_len,
517 		    reqtype == RUMPSP_COPYINSTR);
518 		break;
519 	case RUMPSP_COPYOUT:
520 	case RUMPSP_COPYOUTSTR:
521 		/*LINTED*/
522 		copydata = (struct rsp_copydata *)spc->spc_buf;
523 		DPRINTF(("rump_sp handlereq: copyout request: %p/%zu\n",
524 		    copydata->rcp_addr, copydata->rcp_len));
525 		/*LINTED*/
526 		memcpy(copydata->rcp_addr, copydata->rcp_data,
527 		    copydata->rcp_len);
528 		break;
529 	case RUMPSP_ANONMMAP:
530 		/*LINTED*/
531 		maplen = *(size_t *)spc->spc_buf;
532 		mapaddr = mmap(NULL, maplen, PROT_READ|PROT_WRITE,
533 		    MAP_ANON, -1, 0);
534 		if (mapaddr == MAP_FAILED)
535 			mapaddr = NULL;
536 		DPRINTF(("rump_sp handlereq: anonmmap: %p\n", mapaddr));
537 		send_anonmmap_resp(spc, spc->spc_hdr.rsp_reqno, mapaddr);
538 		break;
539 	case RUMPSP_RAISE:
540 		DPRINTF(("rump_sp handlereq: raise sig %d\n", rhdr->rsp_signo));
541 		raise((int)rhdr->rsp_signo);
542 		/*
543 		 * We most likely have signals blocked, but the signal
544 		 * will be handled soon enough when we return.
545 		 */
546 		break;
547 	default:
548 		printf("PANIC: INVALID TYPE %d\n", reqtype);
549 		abort();
550 		break;
551 	}
552 
553 	spcfreebuf(spc);
554 }
555 
556 static unsigned ptab_idx;
557 static struct sockaddr *serv_sa;
558 
559 /* dup until we get a "good" fd which does not collide with stdio */
560 static int
561 dupgood(int myfd, int mustchange)
562 {
563 	int ofds[4];
564 	int i;
565 
566 	for (i = 0; (myfd <= 2 || mustchange) && myfd != -1; i++) {
567 		assert(i < __arraycount(ofds));
568 		ofds[i] = myfd;
569 		myfd = host_dup(myfd);
570 		if (mustchange) {
571 			i--; /* prevent closing old fd */
572 			mustchange = 0;
573 		}
574 	}
575 
576 	for (i--; i >= 0; i--) {
577 		host_close(ofds[i]);
578 	}
579 
580 	return myfd;
581 }
582 
583 static int
584 doconnect(bool noisy)
585 {
586 	struct respwait rw;
587 	struct rsp_hdr rhdr;
588 	struct kevent kev[NSIG+1];
589 	char banner[MAXBANNER];
590 	struct pollfd pfd;
591 	int s, error, flags, i;
592 	ssize_t n;
593 
594 	if (kq != -1)
595 		host_close(kq);
596 	kq = -1;
597 	s = -1;
598 
599 	if (clispc.spc_fd != -1)
600 		host_close(clispc.spc_fd);
601 	clispc.spc_fd = -1;
602 
603 	/*
604 	 * for reconnect, gate everyone out of the receiver code
605 	 */
606 	putwait_locked(&clispc, &rw, &rhdr);
607 
608 	pthread_mutex_lock(&clispc.spc_mtx);
609 	clispc.spc_reconnecting = 1;
610 	pthread_cond_broadcast(&clispc.spc_cv);
611 	clispc.spc_generation++;
612 	while (clispc.spc_istatus != SPCSTATUS_FREE) {
613 		clispc.spc_istatus = SPCSTATUS_WANTED;
614 		pthread_cond_wait(&rw.rw_cv, &clispc.spc_mtx);
615 	}
616 	kickall(&clispc);
617 
618 	/*
619 	 * we can release it already since we hold the
620 	 * send lock during reconnect
621 	 * XXX: assert it
622 	 */
623 	clispc.spc_istatus = SPCSTATUS_FREE;
624 	pthread_mutex_unlock(&clispc.spc_mtx);
625 	unputwait_locked(&clispc, &rw);
626 
627 	free(clispc.spc_buf);
628 	clispc.spc_off = 0;
629 
630 	s = dupgood(host_socket(parsetab[ptab_idx].domain, SOCK_STREAM, 0), 0);
631 	if (s == -1)
632 		return -1;
633 
634 	pfd.fd = s;
635 	pfd.events = POLLIN;
636 	while (host_connect(s, serv_sa, (socklen_t)serv_sa->sa_len) == -1) {
637 		if (errno == EINTR)
638 			continue;
639 		error = errno;
640 		if (noisy)
641 			fprintf(stderr, "rump_sp: client connect failed: %s\n",
642 			    strerror(errno));
643 		errno = error;
644 		return -1;
645 	}
646 
647 	if ((error = parsetab[ptab_idx].connhook(s)) != 0) {
648 		error = errno;
649 		if (noisy)
650 			fprintf(stderr, "rump_sp: connect hook failed\n");
651 		errno = error;
652 		return -1;
653 	}
654 
655 	if ((n = host_read(s, banner, sizeof(banner)-1)) < 0) {
656 		error = errno;
657 		if (noisy)
658 			fprintf(stderr, "rump_sp: failed to read banner\n");
659 		errno = error;
660 		return -1;
661 	}
662 
663 	if (banner[n-1] != '\n') {
664 		if (noisy)
665 			fprintf(stderr, "rump_sp: invalid banner\n");
666 		errno = EINVAL;
667 		return -1;
668 	}
669 	banner[n] = '\0';
670 	/* parse the banner some day */
671 
672 	flags = host_fcntl(s, F_GETFL, 0);
673 	if (host_fcntl(s, F_SETFL, flags | O_NONBLOCK) == -1) {
674 		if (noisy)
675 			fprintf(stderr, "rump_sp: socket fd NONBLOCK: %s\n",
676 			    strerror(errno));
677 		errno = EINVAL;
678 		return -1;
679 	}
680 	clispc.spc_fd = s;
681 	clispc.spc_state = SPCSTATE_RUNNING;
682 	clispc.spc_reconnecting = 0;
683 
684 	/* setup kqueue, we want all signals and the fd */
685 	if ((kq = dupgood(host_kqueue(), 0)) == -1) {
686 		error = errno;
687 		if (noisy)
688 			fprintf(stderr, "rump_sp: cannot setup kqueue");
689 		errno = error;
690 		return -1;
691 	}
692 
693 	for (i = 0; i < NSIG; i++) {
694 		EV_SET(&kev[i], i+1, EVFILT_SIGNAL, EV_ADD|EV_ENABLE, 0, 0, 0);
695 	}
696 	EV_SET(&kev[NSIG], clispc.spc_fd,
697 	    EVFILT_READ, EV_ADD|EV_ENABLE, 0, 0, 0);
698 	if (host_kevent(kq, kev, NSIG+1, NULL, 0, NULL) == -1) {
699 		error = errno;
700 		if (noisy)
701 			fprintf(stderr, "rump_sp: kevent() failed");
702 		errno = error;
703 		return -1;
704 	}
705 
706 	return 0;
707 }
708 
709 static int
710 doinit(void)
711 {
712 
713 	TAILQ_INIT(&clispc.spc_respwait);
714 	pthread_mutex_init(&clispc.spc_mtx, NULL);
715 	pthread_cond_init(&clispc.spc_cv, NULL);
716 
717 	return 0;
718 }
719 
720 void *rumpclient__dlsym(void *, const char *);
721 void *rumphijack_dlsym(void *, const char *);
722 void *
723 rumpclient__dlsym(void *handle, const char *symbol)
724 {
725 
726 	return dlsym(handle, symbol);
727 }
728 __weak_alias(rumphijack_dlsym,rumpclient__dlsym);
729 
730 static pid_t init_done = 0;
731 
732 int
733 rumpclient_init()
734 {
735 	char *p;
736 	int error;
737 	int rv = -1;
738 	int hstype;
739 	pid_t mypid;
740 
741 	/*
742 	 * Make sure we're not riding the context of a previous
743 	 * host fork.  Note: it's *possible* that after n>1 forks
744 	 * we have the same pid as one of our exited parents, but
745 	 * I'm pretty sure there are 0 practical implications, since
746 	 * it means generations would have to skip rumpclient init.
747 	 */
748 	if (init_done == (mypid = getpid()))
749 		return 0;
750 
751 	/* kq does not traverse fork() */
752 	if (init_done != 0)
753 		kq = -1;
754 	init_done = mypid;
755 
756 	sigfillset(&fullset);
757 
758 	/*
759 	 * sag mir, wo die symbol sind.  zogen fort, der krieg beginnt.
760 	 * wann wird man je verstehen?  wann wird man je verstehen?
761 	 */
762 #define FINDSYM2(_name_,_syscall_)					\
763 	if ((host_##_name_ = rumphijack_dlsym(RTLD_NEXT,		\
764 	    #_syscall_)) == NULL) {					\
765 		if (rumphijack_dlsym == rumpclient__dlsym)		\
766 			host_##_name_ = _name_; /* static fallback */	\
767 		if (host_##_name_ == NULL)				\
768 			errx(1, "cannot find %s: %s", #_syscall_,	\
769 			    dlerror());					\
770 	}
771 #define FINDSYM(_name_) FINDSYM2(_name_,_name_)
772 	FINDSYM2(socket,__socket30)
773 	FINDSYM(close)
774 	FINDSYM(connect)
775 	FINDSYM(fcntl)
776 	FINDSYM(poll)
777 	FINDSYM(read)
778 	FINDSYM(sendto)
779 	FINDSYM(setsockopt)
780 	FINDSYM(dup)
781 	FINDSYM(kqueue)
782 	FINDSYM(execve)
783 #if !__NetBSD_Prereq__(5,99,7)
784 	FINDSYM(kevent)
785 #else
786 	FINDSYM2(kevent,_sys___kevent50)
787 #endif
788 #undef	FINDSYM
789 #undef	FINDSY2
790 
791 	if ((p = getenv("RUMP__PARSEDSERVER")) == NULL) {
792 		if ((p = getenv("RUMP_SERVER")) == NULL) {
793 			errno = ENOENT;
794 			goto out;
795 		}
796 	}
797 
798 	if ((error = parseurl(p, &serv_sa, &ptab_idx, 0)) != 0) {
799 		errno = error;
800 		goto out;
801 	}
802 
803 	if (doinit() == -1)
804 		goto out;
805 
806 	if ((p = getenv("RUMPCLIENT__EXECFD")) != NULL) {
807 		sscanf(p, "%d,%d", &clispc.spc_fd, &kq);
808 		unsetenv("RUMPCLIENT__EXECFD");
809 		hstype = HANDSHAKE_EXEC;
810 	} else {
811 		if (doconnect(true) == -1)
812 			goto out;
813 		hstype = HANDSHAKE_GUEST;
814 	}
815 
816 	error = handshake_req(&clispc, hstype, NULL, 0, false);
817 	if (error) {
818 		pthread_mutex_destroy(&clispc.spc_mtx);
819 		pthread_cond_destroy(&clispc.spc_cv);
820 		if (clispc.spc_fd != -1)
821 			host_close(clispc.spc_fd);
822 		errno = error;
823 		goto out;
824 	}
825 	rv = 0;
826 
827  out:
828 	if (rv == -1)
829 		init_done = 0;
830 	return rv;
831 }
832 
833 struct rumpclient_fork {
834 	uint32_t fork_auth[AUTHLEN];
835 	struct spclient fork_spc;
836 	int fork_kq;
837 };
838 
839 struct rumpclient_fork *
840 rumpclient_prefork(void)
841 {
842 	struct rumpclient_fork *rpf;
843 	sigset_t omask;
844 	void *resp;
845 	int rv;
846 
847 	pthread_sigmask(SIG_SETMASK, &fullset, &omask);
848 	rpf = malloc(sizeof(*rpf));
849 	if (rpf == NULL)
850 		goto out;
851 
852 	if ((rv = prefork_req(&clispc, &omask, &resp)) != 0) {
853 		free(rpf);
854 		errno = rv;
855 		rpf = NULL;
856 		goto out;
857 	}
858 
859 	memcpy(rpf->fork_auth, resp, sizeof(rpf->fork_auth));
860 	free(resp);
861 
862 	rpf->fork_spc = clispc;
863 	rpf->fork_kq = kq;
864 
865  out:
866 	pthread_sigmask(SIG_SETMASK, &omask, NULL);
867 	return rpf;
868 }
869 
870 int
871 rumpclient_fork_init(struct rumpclient_fork *rpf)
872 {
873 	int error;
874 	int osock;
875 
876 	osock = clispc.spc_fd;
877 	memset(&clispc, 0, sizeof(clispc));
878 	clispc.spc_fd = osock;
879 
880 	kq = -1; /* kqueue descriptor is not copied over fork() */
881 
882 	if (doinit() == -1)
883 		return -1;
884 	if (doconnect(false) == -1)
885 		return -1;
886 
887 	error = handshake_req(&clispc, HANDSHAKE_FORK, rpf->fork_auth,
888 	    0, false);
889 	if (error) {
890 		pthread_mutex_destroy(&clispc.spc_mtx);
891 		pthread_cond_destroy(&clispc.spc_cv);
892 		errno = error;
893 		return -1;
894 	}
895 
896 	return 0;
897 }
898 
899 void
900 rumpclient_fork_cancel(struct rumpclient_fork *rpf)
901 {
902 
903 	/* EUNIMPL */
904 }
905 
906 void
907 rumpclient_fork_vparent(struct rumpclient_fork *rpf)
908 {
909 
910 	clispc = rpf->fork_spc;
911 	kq = rpf->fork_kq;
912 }
913 
914 void
915 rumpclient_setconnretry(time_t timeout)
916 {
917 
918 	if (timeout < RUMPCLIENT_RETRYCONN_DIE)
919 		return; /* gigo */
920 
921 	retrytimo = timeout;
922 }
923 
924 int
925 rumpclient__closenotify(int *fdp, enum rumpclient_closevariant variant)
926 {
927 	int fd = *fdp;
928 	int untilfd, rv;
929 	int newfd;
930 
931 	switch (variant) {
932 	case RUMPCLIENT_CLOSE_FCLOSEM:
933 		untilfd = MAX(clispc.spc_fd, kq);
934 		for (; fd <= untilfd; fd++) {
935 			if (fd == clispc.spc_fd || fd == kq)
936 				continue;
937 			rv = host_close(fd);
938 			if (rv == -1)
939 				return -1;
940 		}
941 		*fdp = fd;
942 		break;
943 
944 	case RUMPCLIENT_CLOSE_CLOSE:
945 	case RUMPCLIENT_CLOSE_DUP2:
946 		if (fd == clispc.spc_fd) {
947 			struct kevent kev[2];
948 
949 			newfd = dupgood(clispc.spc_fd, 1);
950 			if (newfd == -1)
951 				return -1;
952 			/*
953 			 * now, we have a new socket number, so change
954 			 * the file descriptor that kqueue is
955 			 * monitoring.  remove old and add new.
956 			 */
957 			EV_SET(&kev[0], clispc.spc_fd,
958 			    EVFILT_READ, EV_DELETE, 0, 0, 0);
959 			EV_SET(&kev[1], newfd,
960 			    EVFILT_READ, EV_ADD|EV_ENABLE, 0, 0, 0);
961 			if (host_kevent(kq, kev, 2, NULL, 0, NULL) == -1) {
962 				int sverrno = errno;
963 				host_close(newfd);
964 				errno = sverrno;
965 				return -1;
966 			}
967 			clispc.spc_fd = newfd;
968 		}
969 		if (fd == kq) {
970 			newfd = dupgood(kq, 1);
971 			if (newfd == -1)
972 				return -1;
973 			kq = newfd;
974 		}
975 		break;
976 	}
977 
978 	return 0;
979 }
980 
981 pid_t
982 rumpclient_fork()
983 {
984 
985 	return rumpclient__dofork(fork);
986 }
987 
988 /*
989  * Process is about to exec.  Save info about our existing connection
990  * in the env.  rumpclient will check for this info in init().
991  * This is mostly for the benefit of rumphijack, but regular applications
992  * may use it as well.
993  */
994 int
995 rumpclient_exec(const char *path, char *const argv[], char *const envp[])
996 {
997 	char buf[4096];
998 	char **newenv;
999 	char *envstr, *envstr2;
1000 	size_t nelem;
1001 	int rv, sverrno;
1002 
1003 	snprintf(buf, sizeof(buf), "RUMPCLIENT__EXECFD=%d,%d",
1004 	    clispc.spc_fd, kq);
1005 	envstr = malloc(strlen(buf)+1);
1006 	if (envstr == NULL) {
1007 		return ENOMEM;
1008 	}
1009 	strcpy(envstr, buf);
1010 
1011 	/* do we have a fully parsed url we want to forward in the env? */
1012 	if (*parsedurl != '\0') {
1013 		snprintf(buf, sizeof(buf),
1014 		    "RUMP__PARSEDSERVER=%s", parsedurl);
1015 		envstr2 = malloc(strlen(buf)+1);
1016 		if (envstr2 == NULL) {
1017 			free(envstr);
1018 			return ENOMEM;
1019 		}
1020 		strcpy(envstr2, buf);
1021 	} else {
1022 		envstr2 = NULL;
1023 	}
1024 
1025 	for (nelem = 0; envp && envp[nelem]; nelem++)
1026 		continue;
1027 
1028 	newenv = malloc(sizeof(*newenv) * (nelem+3));
1029 	if (newenv == NULL) {
1030 		free(envstr2);
1031 		free(envstr);
1032 		return ENOMEM;
1033 	}
1034 	memcpy(&newenv[0], envp, nelem*sizeof(*envp));
1035 
1036 	newenv[nelem] = envstr;
1037 	newenv[nelem+1] = envstr2;
1038 	newenv[nelem+2] = NULL;
1039 
1040 	rv = host_execve(path, argv, newenv);
1041 
1042 	_DIAGASSERT(rv != 0);
1043 	sverrno = errno;
1044 	free(envstr2);
1045 	free(envstr);
1046 	free(newenv);
1047 	errno = sverrno;
1048 	return rv;
1049 }
1050 
1051 int
1052 rumpclient_daemon(int nochdir, int noclose)
1053 {
1054 	struct rumpclient_fork *rf;
1055 	int sverrno;
1056 
1057 	if ((rf = rumpclient_prefork()) == NULL)
1058 		return -1;
1059 
1060 	if (daemon(nochdir, noclose) == -1) {
1061 		sverrno = errno;
1062 		rumpclient_fork_cancel(rf);
1063 		errno = sverrno;
1064 		return -1;
1065 	}
1066 
1067 	if (rumpclient_fork_init(rf) == -1)
1068 		return -1;
1069 
1070 	return 0;
1071 }
1072