xref: /netbsd-src/lib/librumpclient/rumpclient.c (revision 9ddb6ab554e70fb9bbd90c3d96b812bc57755a14)
1 /*      $NetBSD: rumpclient.c,v 1.47 2011/12/12 16:53:53 joerg Exp $	*/
2 
3 /*
4  * Copyright (c) 2010, 2011 Antti Kantee.  All Rights Reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
16  * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18  * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27 
28 /*
29  * Client side routines for rump syscall proxy.
30  */
31 
32 #include <sys/cdefs.h>
33 __RCSID("$NetBSD: rumpclient.c,v 1.47 2011/12/12 16:53:53 joerg Exp $");
34 
35 #include <sys/param.h>
36 #include <sys/event.h>
37 #include <sys/mman.h>
38 #include <sys/socket.h>
39 
40 #include <arpa/inet.h>
41 #include <netinet/in.h>
42 #include <netinet/tcp.h>
43 
44 #include <assert.h>
45 #include <dlfcn.h>
46 #include <err.h>
47 #include <errno.h>
48 #include <fcntl.h>
49 #include <link.h>
50 #include <poll.h>
51 #include <pthread.h>
52 #include <signal.h>
53 #include <stdarg.h>
54 #include <stdbool.h>
55 #include <stdio.h>
56 #include <stdlib.h>
57 #include <string.h>
58 #include <unistd.h>
59 
60 #include <rump/rumpclient.h>
61 
62 #define HOSTOPS
63 int	(*host_socket)(int, int, int);
64 int	(*host_close)(int);
65 int	(*host_connect)(int, const struct sockaddr *, socklen_t);
66 int	(*host_fcntl)(int, int, ...);
67 int	(*host_poll)(struct pollfd *, nfds_t, int);
68 ssize_t	(*host_read)(int, void *, size_t);
69 ssize_t (*host_sendmsg)(int, const struct msghdr *, int);
70 int	(*host_setsockopt)(int, int, int, const void *, socklen_t);
71 int	(*host_dup)(int);
72 
73 int	(*host_kqueue)(void);
74 int	(*host_kevent)(int, const struct kevent *, size_t,
75 		       struct kevent *, size_t, const struct timespec *);
76 
77 int	(*host_execve)(const char *, char *const[], char *const[]);
78 
79 #include "sp_common.c"
80 
81 static struct spclient clispc = {
82 	.spc_fd = -1,
83 };
84 
85 static int kq = -1;
86 static sigset_t fullset;
87 
88 static int doconnect(void);
89 static int handshake_req(struct spclient *, int, void *, int, bool);
90 
91 /*
92  * Default: don't retry.  Most clients can't handle it
93  * (consider e.g. fds suddenly going missing).
94  */
95 static time_t retrytimo = 0;
96 
97 /* always defined to nothingness for now */
98 #define ERRLOG(a)
99 
100 static int
101 send_with_recon(struct spclient *spc, struct iovec *iov, size_t iovlen)
102 {
103 	struct timeval starttime, curtime;
104 	time_t prevreconmsg;
105 	unsigned reconretries;
106 	int rv;
107 
108 	for (prevreconmsg = 0, reconretries = 0;;) {
109 		rv = dosend(spc, iov, iovlen);
110 		if (__predict_false(rv == ENOTCONN || rv == EBADF)) {
111 			/* no persistent connections */
112 			if (retrytimo == 0) {
113 				rv = ENOTCONN;
114 				break;
115 			}
116 			if (retrytimo == RUMPCLIENT_RETRYCONN_DIE)
117 				_exit(1);
118 
119 			if (!prevreconmsg) {
120 				prevreconmsg = time(NULL);
121 				gettimeofday(&starttime, NULL);
122 			}
123 			if (reconretries == 1) {
124 				if (retrytimo == RUMPCLIENT_RETRYCONN_ONCE) {
125 					rv = ENOTCONN;
126 					break;
127 				}
128 				fprintf(stderr, "rump_sp: connection to "
129 				    "kernel lost, trying to reconnect ...\n");
130 			} else if (time(NULL) - prevreconmsg > 120) {
131 				fprintf(stderr, "rump_sp: still trying to "
132 				    "reconnect ...\n");
133 				prevreconmsg = time(NULL);
134 			}
135 
136 			/* check that we aren't over the limit */
137 			if (retrytimo > 0) {
138 				struct timeval tmp;
139 
140 				gettimeofday(&curtime, NULL);
141 				timersub(&curtime, &starttime, &tmp);
142 				if (tmp.tv_sec >= retrytimo) {
143 					fprintf(stderr, "rump_sp: reconnect "
144 					    "failed, %lld second timeout\n",
145 					    (long long)retrytimo);
146 					return ENOTCONN;
147 				}
148 			}
149 
150 			/* adhoc backoff timer */
151 			if (reconretries < 10) {
152 				usleep(100000 * reconretries);
153 			} else {
154 				sleep(MIN(10, reconretries-9));
155 			}
156 			reconretries++;
157 
158 			if ((rv = doconnect()) != 0)
159 				continue;
160 			if ((rv = handshake_req(&clispc, HANDSHAKE_GUEST,
161 			    NULL, 0, true)) != 0)
162 				continue;
163 
164 			/*
165 			 * ok, reconnect succesful.  we need to return to
166 			 * the upper layer to get the entire PDU resent.
167 			 */
168 			if (reconretries != 1)
169 				fprintf(stderr, "rump_sp: reconnected!\n");
170 			rv = EAGAIN;
171 			break;
172 		} else {
173 			_DIAGASSERT(errno != EAGAIN);
174 			break;
175 		}
176 	}
177 
178 	return rv;
179 }
180 
181 static int
182 cliwaitresp(struct spclient *spc, struct respwait *rw, sigset_t *mask,
183 	bool keeplock)
184 {
185 	uint64_t mygen;
186 	bool imalive = true;
187 
188 	pthread_mutex_lock(&spc->spc_mtx);
189 	if (!keeplock)
190 		sendunlockl(spc);
191 	mygen = spc->spc_generation;
192 
193 	rw->rw_error = 0;
194 	while (!rw->rw_done && rw->rw_error == 0) {
195 		if (__predict_false(spc->spc_generation != mygen || !imalive))
196 			break;
197 
198 		/* are we free to receive? */
199 		if (spc->spc_istatus == SPCSTATUS_FREE) {
200 			struct kevent kev[8];
201 			int gotresp, dosig, rv, i;
202 
203 			spc->spc_istatus = SPCSTATUS_BUSY;
204 			pthread_mutex_unlock(&spc->spc_mtx);
205 
206 			dosig = 0;
207 			for (gotresp = 0; !gotresp; ) {
208 				/*
209 				 * typically we don't have a frame waiting
210 				 * when we come in here, so call kevent now
211 				 */
212 				rv = host_kevent(kq, NULL, 0,
213 				    kev, __arraycount(kev), NULL);
214 
215 				if (__predict_false(rv == -1)) {
216 					goto activity;
217 				}
218 
219 				/*
220 				 * XXX: don't know how this can happen
221 				 * (timeout cannot expire since there
222 				 * isn't one), but it does happen.
223 				 * treat it as an expectional condition
224 				 * and go through tryread to determine
225 				 * alive status.
226 				 */
227 				if (__predict_false(rv == 0))
228 					goto activity;
229 
230 				for (i = 0; i < rv; i++) {
231 					if (kev[i].filter == EVFILT_SIGNAL)
232 						dosig++;
233 				}
234 				if (dosig)
235 					goto cleanup;
236 
237 				/*
238 				 * ok, activity.  try to read a frame to
239 				 * determine what happens next.
240 				 */
241  activity:
242 				switch (readframe(spc)) {
243 				case 0:
244 					continue;
245 				case -1:
246 					imalive = false;
247 					goto cleanup;
248 				default:
249 					/* case 1 */
250 					break;
251 				}
252 
253 				switch (spc->spc_hdr.rsp_class) {
254 				case RUMPSP_RESP:
255 				case RUMPSP_ERROR:
256 					kickwaiter(spc);
257 					gotresp = spc->spc_hdr.rsp_reqno ==
258 					    rw->rw_reqno;
259 					break;
260 				case RUMPSP_REQ:
261 					handlereq(spc);
262 					break;
263 				default:
264 					/* panic */
265 					break;
266 				}
267 			}
268 
269  cleanup:
270 			pthread_mutex_lock(&spc->spc_mtx);
271 			if (spc->spc_istatus == SPCSTATUS_WANTED)
272 				kickall(spc);
273 			spc->spc_istatus = SPCSTATUS_FREE;
274 
275 			/* take one for the team */
276 			if (dosig) {
277 				pthread_mutex_unlock(&spc->spc_mtx);
278 				pthread_sigmask(SIG_SETMASK, mask, NULL);
279 				pthread_sigmask(SIG_SETMASK, &fullset, NULL);
280 				pthread_mutex_lock(&spc->spc_mtx);
281 			}
282 		} else {
283 			spc->spc_istatus = SPCSTATUS_WANTED;
284 			pthread_cond_wait(&rw->rw_cv, &spc->spc_mtx);
285 		}
286 	}
287 	TAILQ_REMOVE(&spc->spc_respwait, rw, rw_entries);
288 	pthread_mutex_unlock(&spc->spc_mtx);
289 	pthread_cond_destroy(&rw->rw_cv);
290 
291 	if (spc->spc_generation != mygen || !imalive) {
292 		return ENOTCONN;
293 	}
294 	return rw->rw_error;
295 }
296 
297 static int
298 syscall_req(struct spclient *spc, sigset_t *omask, int sysnum,
299 	const void *data, size_t dlen, void **resp)
300 {
301 	struct rsp_hdr rhdr;
302 	struct respwait rw;
303 	struct iovec iov[2];
304 	int rv;
305 
306 	rhdr.rsp_len = sizeof(rhdr) + dlen;
307 	rhdr.rsp_class = RUMPSP_REQ;
308 	rhdr.rsp_type = RUMPSP_SYSCALL;
309 	rhdr.rsp_sysnum = sysnum;
310 
311 	IOVPUT(iov[0], rhdr);
312 	IOVPUT_WITHSIZE(iov[1], __UNCONST(data), dlen);
313 
314 	do {
315 		putwait(spc, &rw, &rhdr);
316 		if ((rv = send_with_recon(spc, iov, __arraycount(iov))) != 0) {
317 			unputwait(spc, &rw);
318 			continue;
319 		}
320 
321 		rv = cliwaitresp(spc, &rw, omask, false);
322 		if (rv == ENOTCONN)
323 			rv = EAGAIN;
324 	} while (rv == EAGAIN);
325 
326 	*resp = rw.rw_data;
327 	return rv;
328 }
329 
330 static int
331 handshake_req(struct spclient *spc, int type, void *data,
332 	int cancel, bool haslock)
333 {
334 	struct handshake_fork rf;
335 	const char *myprogname = NULL; /* XXXgcc */
336 	struct rsp_hdr rhdr;
337 	struct respwait rw;
338 	sigset_t omask;
339 	size_t bonus;
340 	struct iovec iov[2];
341 	int rv;
342 
343 	if (type == HANDSHAKE_FORK) {
344 		bonus = sizeof(rf);
345 	} else {
346 		myprogname = getprogname();
347 		bonus = strlen(myprogname)+1;
348 	}
349 
350 	/* performs server handshake */
351 	rhdr.rsp_len = sizeof(rhdr) + bonus;
352 	rhdr.rsp_class = RUMPSP_REQ;
353 	rhdr.rsp_type = RUMPSP_HANDSHAKE;
354 	rhdr.rsp_handshake = type;
355 
356 	IOVPUT(iov[0], rhdr);
357 
358 	pthread_sigmask(SIG_SETMASK, &fullset, &omask);
359 	if (haslock)
360 		putwait_locked(spc, &rw, &rhdr);
361 	else
362 		putwait(spc, &rw, &rhdr);
363 	if (type == HANDSHAKE_FORK) {
364 		memcpy(rf.rf_auth, data, sizeof(rf.rf_auth)); /* uh, why? */
365 		rf.rf_cancel = cancel;
366 		IOVPUT(iov[1], rf);
367 	} else {
368 		IOVPUT_WITHSIZE(iov[1], __UNCONST(myprogname), bonus);
369 	}
370 	rv = send_with_recon(spc, iov, __arraycount(iov));
371 	if (rv || cancel) {
372 		if (haslock)
373 			unputwait_locked(spc, &rw);
374 		else
375 			unputwait(spc, &rw);
376 		if (cancel) {
377 			goto out;
378 		}
379 	} else {
380 		rv = cliwaitresp(spc, &rw, &omask, haslock);
381 	}
382 	if (rv)
383 		goto out;
384 
385 	rv = *(int *)rw.rw_data;
386 	free(rw.rw_data);
387 
388  out:
389 	pthread_sigmask(SIG_SETMASK, &omask, NULL);
390 	return rv;
391 }
392 
393 static int
394 prefork_req(struct spclient *spc, sigset_t *omask, void **resp)
395 {
396 	struct rsp_hdr rhdr;
397 	struct respwait rw;
398 	struct iovec iov[1];
399 	int rv;
400 
401 	rhdr.rsp_len = sizeof(rhdr);
402 	rhdr.rsp_class = RUMPSP_REQ;
403 	rhdr.rsp_type = RUMPSP_PREFORK;
404 	rhdr.rsp_error = 0;
405 
406 	IOVPUT(iov[0], rhdr);
407 
408 	do {
409 		putwait(spc, &rw, &rhdr);
410 		rv = send_with_recon(spc, iov, __arraycount(iov));
411 		if (rv != 0) {
412 			unputwait(spc, &rw);
413 			continue;
414 		}
415 
416 		rv = cliwaitresp(spc, &rw, omask, false);
417 		if (rv == ENOTCONN)
418 			rv = EAGAIN;
419 	} while (rv == EAGAIN);
420 
421 	*resp = rw.rw_data;
422 	return rv;
423 }
424 
425 /*
426  * prevent response code from deadlocking with reconnect code
427  */
428 static int
429 resp_sendlock(struct spclient *spc)
430 {
431 	int rv = 0;
432 
433 	pthread_mutex_lock(&spc->spc_mtx);
434 	while (spc->spc_ostatus != SPCSTATUS_FREE) {
435 		if (__predict_false(spc->spc_reconnecting)) {
436 			rv = EBUSY;
437 			goto out;
438 		}
439 		spc->spc_ostatus = SPCSTATUS_WANTED;
440 		pthread_cond_wait(&spc->spc_cv, &spc->spc_mtx);
441 	}
442 	spc->spc_ostatus = SPCSTATUS_BUSY;
443 
444  out:
445 	pthread_mutex_unlock(&spc->spc_mtx);
446 	return rv;
447 }
448 
449 static void
450 send_copyin_resp(struct spclient *spc, uint64_t reqno, void *data, size_t dlen,
451 	int wantstr)
452 {
453 	struct rsp_hdr rhdr;
454 	struct iovec iov[2];
455 
456 	if (wantstr)
457 		dlen = MIN(dlen, strlen(data)+1);
458 
459 	rhdr.rsp_len = sizeof(rhdr) + dlen;
460 	rhdr.rsp_reqno = reqno;
461 	rhdr.rsp_class = RUMPSP_RESP;
462 	rhdr.rsp_type = RUMPSP_COPYIN;
463 	rhdr.rsp_sysnum = 0;
464 
465 	IOVPUT(iov[0], rhdr);
466 	IOVPUT_WITHSIZE(iov[1], data, dlen);
467 
468 	if (resp_sendlock(spc) != 0)
469 		return;
470 	(void)SENDIOV(spc, iov);
471 	sendunlock(spc);
472 }
473 
474 static void
475 send_anonmmap_resp(struct spclient *spc, uint64_t reqno, void *addr)
476 {
477 	struct rsp_hdr rhdr;
478 	struct iovec iov[2];
479 
480 	rhdr.rsp_len = sizeof(rhdr) + sizeof(addr);
481 	rhdr.rsp_reqno = reqno;
482 	rhdr.rsp_class = RUMPSP_RESP;
483 	rhdr.rsp_type = RUMPSP_ANONMMAP;
484 	rhdr.rsp_sysnum = 0;
485 
486 	IOVPUT(iov[0], rhdr);
487 	IOVPUT(iov[1], addr);
488 
489 	if (resp_sendlock(spc) != 0)
490 		return;
491 	(void)SENDIOV(spc, iov);
492 	sendunlock(spc);
493 }
494 
495 int
496 rumpclient_syscall(int sysnum, const void *data, size_t dlen,
497 	register_t *retval)
498 {
499 	struct rsp_sysresp *resp;
500 	sigset_t omask;
501 	void *rdata;
502 	int rv;
503 
504 	pthread_sigmask(SIG_SETMASK, &fullset, &omask);
505 
506 	DPRINTF(("rumpsp syscall_req: syscall %d with %p/%zu\n",
507 	    sysnum, data, dlen));
508 
509 	rv = syscall_req(&clispc, &omask, sysnum, data, dlen, &rdata);
510 	if (rv)
511 		goto out;
512 
513 	resp = rdata;
514 	DPRINTF(("rumpsp syscall_resp: syscall %d error %d, rv: %d/%d\n",
515 	    sysnum, rv, resp->rsys_retval[0], resp->rsys_retval[1]));
516 
517 	memcpy(retval, &resp->rsys_retval, sizeof(resp->rsys_retval));
518 	rv = resp->rsys_error;
519 	free(rdata);
520 
521  out:
522 	pthread_sigmask(SIG_SETMASK, &omask, NULL);
523 	return rv;
524 }
525 
526 static void
527 handlereq(struct spclient *spc)
528 {
529 	struct rsp_copydata *copydata;
530 	struct rsp_hdr *rhdr = &spc->spc_hdr;
531 	void *mapaddr;
532 	size_t maplen;
533 	int reqtype = spc->spc_hdr.rsp_type;
534 
535 	switch (reqtype) {
536 	case RUMPSP_COPYIN:
537 	case RUMPSP_COPYINSTR:
538 		/*LINTED*/
539 		copydata = (struct rsp_copydata *)spc->spc_buf;
540 		DPRINTF(("rump_sp handlereq: copyin request: %p/%zu\n",
541 		    copydata->rcp_addr, copydata->rcp_len));
542 		send_copyin_resp(spc, spc->spc_hdr.rsp_reqno,
543 		    copydata->rcp_addr, copydata->rcp_len,
544 		    reqtype == RUMPSP_COPYINSTR);
545 		break;
546 	case RUMPSP_COPYOUT:
547 	case RUMPSP_COPYOUTSTR:
548 		/*LINTED*/
549 		copydata = (struct rsp_copydata *)spc->spc_buf;
550 		DPRINTF(("rump_sp handlereq: copyout request: %p/%zu\n",
551 		    copydata->rcp_addr, copydata->rcp_len));
552 		/*LINTED*/
553 		memcpy(copydata->rcp_addr, copydata->rcp_data,
554 		    copydata->rcp_len);
555 		break;
556 	case RUMPSP_ANONMMAP:
557 		/*LINTED*/
558 		maplen = *(size_t *)spc->spc_buf;
559 		mapaddr = mmap(NULL, maplen, PROT_READ|PROT_WRITE,
560 		    MAP_ANON, -1, 0);
561 		if (mapaddr == MAP_FAILED)
562 			mapaddr = NULL;
563 		DPRINTF(("rump_sp handlereq: anonmmap: %p\n", mapaddr));
564 		send_anonmmap_resp(spc, spc->spc_hdr.rsp_reqno, mapaddr);
565 		break;
566 	case RUMPSP_RAISE:
567 		DPRINTF(("rump_sp handlereq: raise sig %d\n", rhdr->rsp_signo));
568 		raise((int)rhdr->rsp_signo);
569 		/*
570 		 * We most likely have signals blocked, but the signal
571 		 * will be handled soon enough when we return.
572 		 */
573 		break;
574 	default:
575 		printf("PANIC: INVALID TYPE %d\n", reqtype);
576 		abort();
577 		break;
578 	}
579 
580 	spcfreebuf(spc);
581 }
582 
583 static unsigned ptab_idx;
584 static struct sockaddr *serv_sa;
585 
586 /* dup until we get a "good" fd which does not collide with stdio */
587 static int
588 dupgood(int myfd, int mustchange)
589 {
590 	int ofds[4];
591 	int i;
592 	int sverrno;
593 
594 	for (i = 0; (myfd <= 2 || mustchange) && myfd != -1; i++) {
595 		assert(i < __arraycount(ofds));
596 		ofds[i] = myfd;
597 		myfd = host_dup(myfd);
598 		if (mustchange) {
599 			i--; /* prevent closing old fd */
600 			mustchange = 0;
601 		}
602 	}
603 
604 	sverrno = 0;
605 	if (myfd == -1 && i > 0)
606 		sverrno = errno;
607 
608 	for (i--; i >= 0; i--) {
609 		host_close(ofds[i]);
610 	}
611 
612 	if (sverrno)
613 		errno = sverrno;
614 
615 	return myfd;
616 }
617 
618 static int
619 doconnect(void)
620 {
621 	struct respwait rw;
622 	struct rsp_hdr rhdr;
623 	struct kevent kev[NSIG+1];
624 	char banner[MAXBANNER];
625 	struct pollfd pfd;
626 	int s, error, flags, i;
627 	ssize_t n;
628 
629 	if (kq != -1)
630 		host_close(kq);
631 	kq = -1;
632 	s = -1;
633 
634 	if (clispc.spc_fd != -1)
635 		host_close(clispc.spc_fd);
636 	clispc.spc_fd = -1;
637 
638 	/*
639 	 * for reconnect, gate everyone out of the receiver code
640 	 */
641 	putwait_locked(&clispc, &rw, &rhdr);
642 
643 	pthread_mutex_lock(&clispc.spc_mtx);
644 	clispc.spc_reconnecting = 1;
645 	pthread_cond_broadcast(&clispc.spc_cv);
646 	clispc.spc_generation++;
647 	while (clispc.spc_istatus != SPCSTATUS_FREE) {
648 		clispc.spc_istatus = SPCSTATUS_WANTED;
649 		pthread_cond_wait(&rw.rw_cv, &clispc.spc_mtx);
650 	}
651 	kickall(&clispc);
652 
653 	/*
654 	 * we can release it already since we hold the
655 	 * send lock during reconnect
656 	 * XXX: assert it
657 	 */
658 	clispc.spc_istatus = SPCSTATUS_FREE;
659 	pthread_mutex_unlock(&clispc.spc_mtx);
660 	unputwait_locked(&clispc, &rw);
661 
662 	free(clispc.spc_buf);
663 	clispc.spc_off = 0;
664 
665 	s = dupgood(host_socket(parsetab[ptab_idx].domain, SOCK_STREAM, 0), 0);
666 	if (s == -1)
667 		return -1;
668 
669 	pfd.fd = s;
670 	pfd.events = POLLIN;
671 	while (host_connect(s, serv_sa, (socklen_t)serv_sa->sa_len) == -1) {
672 		if (errno == EINTR)
673 			continue;
674 		ERRLOG(("rump_sp: client connect failed: %s\n",
675 		    strerror(errno)));
676 		return -1;
677 	}
678 
679 	if ((error = parsetab[ptab_idx].connhook(s)) != 0) {
680 		ERRLOG(("rump_sp: connect hook failed\n"));
681 		return -1;
682 	}
683 
684 	if ((n = host_read(s, banner, sizeof(banner)-1)) <= 0) {
685 		ERRLOG(("rump_sp: failed to read banner\n"));
686 		return -1;
687 	}
688 
689 	if (banner[n-1] != '\n') {
690 		ERRLOG(("rump_sp: invalid banner\n"));
691 		return -1;
692 	}
693 	banner[n] = '\0';
694 	/* XXX parse the banner some day */
695 
696 	flags = host_fcntl(s, F_GETFL, 0);
697 	if (host_fcntl(s, F_SETFL, flags | O_NONBLOCK) == -1) {
698 		ERRLOG(("rump_sp: socket fd NONBLOCK: %s\n", strerror(errno)));
699 		return -1;
700 	}
701 	clispc.spc_fd = s;
702 	clispc.spc_state = SPCSTATE_RUNNING;
703 	clispc.spc_reconnecting = 0;
704 
705 	/* setup kqueue, we want all signals and the fd */
706 	if ((kq = dupgood(host_kqueue(), 0)) == -1) {
707 		ERRLOG(("rump_sp: cannot setup kqueue"));
708 		return -1;
709 	}
710 
711 	for (i = 0; i < NSIG; i++) {
712 		EV_SET(&kev[i], i+1, EVFILT_SIGNAL, EV_ADD|EV_ENABLE, 0, 0, 0);
713 	}
714 	EV_SET(&kev[NSIG], clispc.spc_fd,
715 	    EVFILT_READ, EV_ADD|EV_ENABLE, 0, 0, 0);
716 	if (host_kevent(kq, kev, NSIG+1, NULL, 0, NULL) == -1) {
717 		ERRLOG(("rump_sp: kevent() failed"));
718 		return -1;
719 	}
720 
721 	return 0;
722 }
723 
724 static int
725 doinit(void)
726 {
727 
728 	TAILQ_INIT(&clispc.spc_respwait);
729 	pthread_mutex_init(&clispc.spc_mtx, NULL);
730 	pthread_cond_init(&clispc.spc_cv, NULL);
731 
732 	return 0;
733 }
734 
735 void *rumpclient__dlsym(void *, const char *);
736 void *rumphijack_dlsym(void *, const char *) __attribute__((__weak__));
737 void *
738 rumpclient__dlsym(void *handle, const char *symbol)
739 {
740 
741 	return dlsym(handle, symbol);
742 }
743 __weak_alias(rumphijack_dlsym,rumpclient__dlsym);
744 
745 static pid_t init_done = 0;
746 
747 int
748 rumpclient_init(void)
749 {
750 	char *p;
751 	int error;
752 	int rv = -1;
753 	int hstype;
754 	pid_t mypid;
755 
756 	/*
757 	 * Make sure we're not riding the context of a previous
758 	 * host fork.  Note: it's *possible* that after n>1 forks
759 	 * we have the same pid as one of our exited parents, but
760 	 * I'm pretty sure there are 0 practical implications, since
761 	 * it means generations would have to skip rumpclient init.
762 	 */
763 	if (init_done == (mypid = getpid()))
764 		return 0;
765 
766 	/* kq does not traverse fork() */
767 	if (init_done != 0)
768 		kq = -1;
769 	init_done = mypid;
770 
771 	sigfillset(&fullset);
772 
773 	/*
774 	 * sag mir, wo die symbol sind.  zogen fort, der krieg beginnt.
775 	 * wann wird man je verstehen?  wann wird man je verstehen?
776 	 */
777 #define FINDSYM2(_name_,_syscall_)					\
778 	if ((host_##_name_ = rumphijack_dlsym(RTLD_NEXT,		\
779 	    #_syscall_)) == NULL) {					\
780 		if (rumphijack_dlsym == rumpclient__dlsym)		\
781 			host_##_name_ = _name_; /* static fallback */	\
782 		if (host_##_name_ == NULL)				\
783 			errx(1, "cannot find %s: %s", #_syscall_,	\
784 			    dlerror());					\
785 	}
786 #define FINDSYM(_name_) FINDSYM2(_name_,_name_)
787 	FINDSYM2(socket,__socket30)
788 	FINDSYM(close)
789 	FINDSYM(connect)
790 	FINDSYM(fcntl)
791 	FINDSYM(poll)
792 	FINDSYM(read)
793 	FINDSYM(sendmsg)
794 	FINDSYM(setsockopt)
795 	FINDSYM(dup)
796 	FINDSYM(kqueue)
797 	FINDSYM(execve)
798 #if !__NetBSD_Prereq__(5,99,7)
799 	FINDSYM(kevent)
800 #else
801 	FINDSYM2(kevent,_sys___kevent50)
802 #endif
803 #undef	FINDSYM
804 #undef	FINDSY2
805 
806 	if ((p = getenv("RUMP__PARSEDSERVER")) == NULL) {
807 		if ((p = getenv("RUMP_SERVER")) == NULL) {
808 			errno = ENOENT;
809 			goto out;
810 		}
811 	}
812 
813 	if ((error = parseurl(p, &serv_sa, &ptab_idx, 0)) != 0) {
814 		errno = error;
815 		goto out;
816 	}
817 
818 	if (doinit() == -1)
819 		goto out;
820 
821 	if ((p = getenv("RUMPCLIENT__EXECFD")) != NULL) {
822 		sscanf(p, "%d,%d", &clispc.spc_fd, &kq);
823 		unsetenv("RUMPCLIENT__EXECFD");
824 		hstype = HANDSHAKE_EXEC;
825 	} else {
826 		if (doconnect() == -1)
827 			goto out;
828 		hstype = HANDSHAKE_GUEST;
829 	}
830 
831 	error = handshake_req(&clispc, hstype, NULL, 0, false);
832 	if (error) {
833 		pthread_mutex_destroy(&clispc.spc_mtx);
834 		pthread_cond_destroy(&clispc.spc_cv);
835 		if (clispc.spc_fd != -1)
836 			host_close(clispc.spc_fd);
837 		errno = error;
838 		goto out;
839 	}
840 	rv = 0;
841 
842  out:
843 	if (rv == -1)
844 		init_done = 0;
845 	return rv;
846 }
847 
848 struct rumpclient_fork {
849 	uint32_t fork_auth[AUTHLEN];
850 	struct spclient fork_spc;
851 	int fork_kq;
852 };
853 
854 struct rumpclient_fork *
855 rumpclient_prefork(void)
856 {
857 	struct rumpclient_fork *rpf;
858 	sigset_t omask;
859 	void *resp;
860 	int rv;
861 
862 	pthread_sigmask(SIG_SETMASK, &fullset, &omask);
863 	rpf = malloc(sizeof(*rpf));
864 	if (rpf == NULL)
865 		goto out;
866 
867 	if ((rv = prefork_req(&clispc, &omask, &resp)) != 0) {
868 		free(rpf);
869 		errno = rv;
870 		rpf = NULL;
871 		goto out;
872 	}
873 
874 	memcpy(rpf->fork_auth, resp, sizeof(rpf->fork_auth));
875 	free(resp);
876 
877 	rpf->fork_spc = clispc;
878 	rpf->fork_kq = kq;
879 
880  out:
881 	pthread_sigmask(SIG_SETMASK, &omask, NULL);
882 	return rpf;
883 }
884 
885 int
886 rumpclient_fork_init(struct rumpclient_fork *rpf)
887 {
888 	int error;
889 	int osock;
890 
891 	osock = clispc.spc_fd;
892 	memset(&clispc, 0, sizeof(clispc));
893 	clispc.spc_fd = osock;
894 
895 	kq = -1; /* kqueue descriptor is not copied over fork() */
896 
897 	if (doinit() == -1)
898 		return -1;
899 	if (doconnect() == -1)
900 		return -1;
901 
902 	error = handshake_req(&clispc, HANDSHAKE_FORK, rpf->fork_auth,
903 	    0, false);
904 	if (error) {
905 		pthread_mutex_destroy(&clispc.spc_mtx);
906 		pthread_cond_destroy(&clispc.spc_cv);
907 		errno = error;
908 		return -1;
909 	}
910 
911 	return 0;
912 }
913 
914 /*ARGSUSED*/
915 void
916 rumpclient_fork_cancel(struct rumpclient_fork *rpf)
917 {
918 
919 	/* EUNIMPL */
920 }
921 
922 void
923 rumpclient_fork_vparent(struct rumpclient_fork *rpf)
924 {
925 
926 	clispc = rpf->fork_spc;
927 	kq = rpf->fork_kq;
928 }
929 
930 void
931 rumpclient_setconnretry(time_t timeout)
932 {
933 
934 	if (timeout < RUMPCLIENT_RETRYCONN_DIE)
935 		return; /* gigo */
936 
937 	retrytimo = timeout;
938 }
939 
940 int
941 rumpclient__closenotify(int *fdp, enum rumpclient_closevariant variant)
942 {
943 	int fd = *fdp;
944 	int untilfd, rv;
945 	int newfd;
946 
947 	switch (variant) {
948 	case RUMPCLIENT_CLOSE_FCLOSEM:
949 		untilfd = MAX(clispc.spc_fd, kq);
950 		for (; fd <= untilfd; fd++) {
951 			if (fd == clispc.spc_fd || fd == kq)
952 				continue;
953 			rv = host_close(fd);
954 			if (rv == -1)
955 				return -1;
956 		}
957 		*fdp = fd;
958 		break;
959 
960 	case RUMPCLIENT_CLOSE_CLOSE:
961 	case RUMPCLIENT_CLOSE_DUP2:
962 		if (fd == clispc.spc_fd) {
963 			struct kevent kev[2];
964 
965 			newfd = dupgood(clispc.spc_fd, 1);
966 			if (newfd == -1)
967 				return -1;
968 			/*
969 			 * now, we have a new socket number, so change
970 			 * the file descriptor that kqueue is
971 			 * monitoring.  remove old and add new.
972 			 */
973 			EV_SET(&kev[0], clispc.spc_fd,
974 			    EVFILT_READ, EV_DELETE, 0, 0, 0);
975 			EV_SET(&kev[1], newfd,
976 			    EVFILT_READ, EV_ADD|EV_ENABLE, 0, 0, 0);
977 			if (host_kevent(kq, kev, 2, NULL, 0, NULL) == -1) {
978 				int sverrno = errno;
979 				host_close(newfd);
980 				errno = sverrno;
981 				return -1;
982 			}
983 			clispc.spc_fd = newfd;
984 		}
985 		if (fd == kq) {
986 			newfd = dupgood(kq, 1);
987 			if (newfd == -1)
988 				return -1;
989 			kq = newfd;
990 		}
991 		break;
992 	}
993 
994 	return 0;
995 }
996 
997 pid_t
998 rumpclient_fork(void)
999 {
1000 
1001 	return rumpclient__dofork(fork);
1002 }
1003 
1004 /*
1005  * Process is about to exec.  Save info about our existing connection
1006  * in the env.  rumpclient will check for this info in init().
1007  * This is mostly for the benefit of rumphijack, but regular applications
1008  * may use it as well.
1009  */
1010 int
1011 rumpclient_exec(const char *path, char *const argv[], char *const envp[])
1012 {
1013 	char buf[4096];
1014 	char **newenv;
1015 	char *envstr, *envstr2;
1016 	size_t nelem;
1017 	int rv, sverrno;
1018 
1019 	snprintf(buf, sizeof(buf), "RUMPCLIENT__EXECFD=%d,%d",
1020 	    clispc.spc_fd, kq);
1021 	envstr = malloc(strlen(buf)+1);
1022 	if (envstr == NULL) {
1023 		return ENOMEM;
1024 	}
1025 	strcpy(envstr, buf);
1026 
1027 	/* do we have a fully parsed url we want to forward in the env? */
1028 	if (*parsedurl != '\0') {
1029 		snprintf(buf, sizeof(buf),
1030 		    "RUMP__PARSEDSERVER=%s", parsedurl);
1031 		envstr2 = malloc(strlen(buf)+1);
1032 		if (envstr2 == NULL) {
1033 			free(envstr);
1034 			return ENOMEM;
1035 		}
1036 		strcpy(envstr2, buf);
1037 	} else {
1038 		envstr2 = NULL;
1039 	}
1040 
1041 	for (nelem = 0; envp && envp[nelem]; nelem++)
1042 		continue;
1043 
1044 	newenv = malloc(sizeof(*newenv) * (nelem+3));
1045 	if (newenv == NULL) {
1046 		free(envstr2);
1047 		free(envstr);
1048 		return ENOMEM;
1049 	}
1050 	memcpy(&newenv[0], envp, nelem*sizeof(*envp));
1051 
1052 	newenv[nelem] = envstr;
1053 	newenv[nelem+1] = envstr2;
1054 	newenv[nelem+2] = NULL;
1055 
1056 	rv = host_execve(path, argv, newenv);
1057 
1058 	_DIAGASSERT(rv != 0);
1059 	sverrno = errno;
1060 	free(envstr2);
1061 	free(envstr);
1062 	free(newenv);
1063 	errno = sverrno;
1064 	return rv;
1065 }
1066 
1067 int
1068 rumpclient_daemon(int nochdir, int noclose)
1069 {
1070 	struct rumpclient_fork *rf;
1071 	int sverrno;
1072 
1073 	if ((rf = rumpclient_prefork()) == NULL)
1074 		return -1;
1075 
1076 	if (daemon(nochdir, noclose) == -1) {
1077 		sverrno = errno;
1078 		rumpclient_fork_cancel(rf);
1079 		errno = sverrno;
1080 		return -1;
1081 	}
1082 
1083 	if (rumpclient_fork_init(rf) == -1)
1084 		return -1;
1085 
1086 	return 0;
1087 }
1088