xref: /netbsd-src/lib/librumpclient/rumpclient.c (revision 213144e1de7024d4193d04aa51005ba3a5ad95e7)
1 /*      $NetBSD: rumpclient.c,v 1.33 2011/02/18 16:22:10 pooka Exp $	*/
2 
3 /*
4  * Copyright (c) 2010, 2011 Antti Kantee.  All Rights Reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
16  * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18  * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27 
28 /*
29  * Client side routines for rump syscall proxy.
30  */
31 
32 #include <sys/cdefs.h>
33 __RCSID("$NetBSD");
34 
35 #include <sys/param.h>
36 #include <sys/event.h>
37 #include <sys/mman.h>
38 #include <sys/socket.h>
39 
40 #include <arpa/inet.h>
41 #include <netinet/in.h>
42 #include <netinet/tcp.h>
43 
44 #include <assert.h>
45 #include <dlfcn.h>
46 #include <errno.h>
47 #include <fcntl.h>
48 #include <link.h>
49 #include <poll.h>
50 #include <pthread.h>
51 #include <signal.h>
52 #include <stdarg.h>
53 #include <stdbool.h>
54 #include <stdio.h>
55 #include <stdlib.h>
56 #include <string.h>
57 #include <unistd.h>
58 
59 #include <rump/rumpclient.h>
60 
61 #define HOSTOPS
62 int	(*host_socket)(int, int, int);
63 int	(*host_close)(int);
64 int	(*host_connect)(int, const struct sockaddr *, socklen_t);
65 int	(*host_fcntl)(int, int, ...);
66 int	(*host_poll)(struct pollfd *, nfds_t, int);
67 ssize_t	(*host_read)(int, void *, size_t);
68 ssize_t (*host_sendto)(int, const void *, size_t, int,
69 		       const struct sockaddr *, socklen_t);
70 int	(*host_setsockopt)(int, int, int, const void *, socklen_t);
71 int	(*host_dup)(int);
72 
73 int	(*host_kqueue)(void);
74 int	(*host_kevent)(int, const struct kevent *, size_t,
75 		       struct kevent *, size_t, const struct timespec *);
76 
77 int	(*host_execve)(const char *, char *const[], char *const[]);
78 
79 #include "sp_common.c"
80 
81 static struct spclient clispc = {
82 	.spc_fd = -1,
83 };
84 
85 static int kq = -1;
86 static sigset_t fullset;
87 
88 static int doconnect(bool);
89 static int handshake_req(struct spclient *, int, void *, int, bool);
90 
91 /*
92  * Default: don't retry.  Most clients can't handle it
93  * (consider e.g. fds suddenly going missing).
94  */
95 static time_t retrytimo = 0;
96 
97 static int
98 send_with_recon(struct spclient *spc, const void *data, size_t dlen)
99 {
100 	struct timeval starttime, curtime;
101 	time_t prevreconmsg;
102 	unsigned reconretries;
103 	int rv;
104 
105 	for (prevreconmsg = 0, reconretries = 0;;) {
106 		rv = dosend(spc, data, dlen);
107 		if (__predict_false(rv == ENOTCONN || rv == EBADF)) {
108 			/* no persistent connections */
109 			if (retrytimo == 0) {
110 				rv = ENOTCONN;
111 				break;
112 			}
113 			if (retrytimo == RUMPCLIENT_RETRYCONN_DIE)
114 				exit(1);
115 
116 			if (!prevreconmsg) {
117 				prevreconmsg = time(NULL);
118 				gettimeofday(&starttime, NULL);
119 			}
120 			if (reconretries == 1) {
121 				if (retrytimo == RUMPCLIENT_RETRYCONN_ONCE) {
122 					rv = ENOTCONN;
123 					break;
124 				}
125 				fprintf(stderr, "rump_sp: connection to "
126 				    "kernel lost, trying to reconnect ...\n");
127 			} else if (time(NULL) - prevreconmsg > 120) {
128 				fprintf(stderr, "rump_sp: still trying to "
129 				    "reconnect ...\n");
130 				prevreconmsg = time(NULL);
131 			}
132 
133 			/* check that we aren't over the limit */
134 			if (retrytimo > 0) {
135 				struct timeval tmp;
136 
137 				gettimeofday(&curtime, NULL);
138 				timersub(&curtime, &starttime, &tmp);
139 				if (tmp.tv_sec >= retrytimo) {
140 					fprintf(stderr, "rump_sp: reconnect "
141 					    "failed, %lld second timeout\n",
142 					    (long long)retrytimo);
143 					return ENOTCONN;
144 				}
145 			}
146 
147 			/* adhoc backoff timer */
148 			if (reconretries < 10) {
149 				usleep(100000 * reconretries);
150 			} else {
151 				sleep(MIN(10, reconretries-9));
152 			}
153 			reconretries++;
154 
155 			if ((rv = doconnect(false)) != 0)
156 				continue;
157 			if ((rv = handshake_req(&clispc, HANDSHAKE_GUEST,
158 			    NULL, 0, true)) != 0)
159 				continue;
160 
161 			/*
162 			 * ok, reconnect succesful.  we need to return to
163 			 * the upper layer to get the entire PDU resent.
164 			 */
165 			if (reconretries != 1)
166 				fprintf(stderr, "rump_sp: reconnected!\n");
167 			rv = EAGAIN;
168 			break;
169 		} else {
170 			_DIAGASSERT(errno != EAGAIN);
171 			break;
172 		}
173 	}
174 
175 	return rv;
176 }
177 
178 static int
179 cliwaitresp(struct spclient *spc, struct respwait *rw, sigset_t *mask,
180 	bool keeplock)
181 {
182 	uint64_t mygen;
183 	bool imalive = true;
184 
185 	pthread_mutex_lock(&spc->spc_mtx);
186 	if (!keeplock)
187 		sendunlockl(spc);
188 	mygen = spc->spc_generation;
189 
190 	rw->rw_error = 0;
191 	while (!rw->rw_done && rw->rw_error == 0) {
192 		if (__predict_false(spc->spc_generation != mygen || !imalive))
193 			break;
194 
195 		/* are we free to receive? */
196 		if (spc->spc_istatus == SPCSTATUS_FREE) {
197 			struct kevent kev[8];
198 			int gotresp, dosig, rv, i;
199 
200 			spc->spc_istatus = SPCSTATUS_BUSY;
201 			pthread_mutex_unlock(&spc->spc_mtx);
202 
203 			dosig = 0;
204 			for (gotresp = 0; !gotresp; ) {
205 				switch (readframe(spc)) {
206 				case 0:
207 					rv = host_kevent(kq, NULL, 0,
208 					    kev, __arraycount(kev), NULL);
209 
210 					if (__predict_false(rv == -1)) {
211 						goto cleanup;
212 					}
213 
214 					/*
215 					 * XXX: don't know how this can
216 					 * happen (timeout cannot expire
217 					 * since there isn't one), but
218 					 * it does happen
219 					 */
220 					if (__predict_false(rv == 0))
221 						continue;
222 
223 					for (i = 0; i < rv; i++) {
224 						if (kev[i].filter
225 						    == EVFILT_SIGNAL)
226 							dosig++;
227 					}
228 					if (dosig)
229 						goto cleanup;
230 
231 					continue;
232 				case -1:
233 					imalive = false;
234 					goto cleanup;
235 				default:
236 					break;
237 				}
238 
239 				switch (spc->spc_hdr.rsp_class) {
240 				case RUMPSP_RESP:
241 				case RUMPSP_ERROR:
242 					kickwaiter(spc);
243 					gotresp = spc->spc_hdr.rsp_reqno ==
244 					    rw->rw_reqno;
245 					break;
246 				case RUMPSP_REQ:
247 					handlereq(spc);
248 					break;
249 				default:
250 					/* panic */
251 					break;
252 				}
253 			}
254 
255  cleanup:
256 			pthread_mutex_lock(&spc->spc_mtx);
257 			if (spc->spc_istatus == SPCSTATUS_WANTED)
258 				kickall(spc);
259 			spc->spc_istatus = SPCSTATUS_FREE;
260 
261 			/* take one for the team */
262 			if (dosig) {
263 				pthread_mutex_unlock(&spc->spc_mtx);
264 				pthread_sigmask(SIG_SETMASK, mask, NULL);
265 				pthread_sigmask(SIG_SETMASK, &fullset, NULL);
266 				pthread_mutex_lock(&spc->spc_mtx);
267 			}
268 		} else {
269 			spc->spc_istatus = SPCSTATUS_WANTED;
270 			pthread_cond_wait(&rw->rw_cv, &spc->spc_mtx);
271 		}
272 	}
273 	TAILQ_REMOVE(&spc->spc_respwait, rw, rw_entries);
274 	pthread_mutex_unlock(&spc->spc_mtx);
275 	pthread_cond_destroy(&rw->rw_cv);
276 
277 	if (spc->spc_generation != mygen || !imalive) {
278 		return ENOTCONN;
279 	}
280 	return rw->rw_error;
281 }
282 
283 static int
284 syscall_req(struct spclient *spc, sigset_t *omask, int sysnum,
285 	const void *data, size_t dlen, void **resp)
286 {
287 	struct rsp_hdr rhdr;
288 	struct respwait rw;
289 	int rv;
290 
291 	rhdr.rsp_len = sizeof(rhdr) + dlen;
292 	rhdr.rsp_class = RUMPSP_REQ;
293 	rhdr.rsp_type = RUMPSP_SYSCALL;
294 	rhdr.rsp_sysnum = sysnum;
295 
296 	do {
297 		putwait(spc, &rw, &rhdr);
298 		if ((rv = send_with_recon(spc, &rhdr, sizeof(rhdr))) != 0) {
299 			unputwait(spc, &rw);
300 			continue;
301 		}
302 		if ((rv = send_with_recon(spc, data, dlen)) != 0) {
303 			unputwait(spc, &rw);
304 			continue;
305 		}
306 
307 		rv = cliwaitresp(spc, &rw, omask, false);
308 		if (rv == ENOTCONN)
309 			rv = EAGAIN;
310 	} while (rv == EAGAIN);
311 
312 	*resp = rw.rw_data;
313 	return rv;
314 }
315 
316 static int
317 handshake_req(struct spclient *spc, int type, void *data,
318 	int cancel, bool haslock)
319 {
320 	struct handshake_fork rf;
321 	struct rsp_hdr rhdr;
322 	struct respwait rw;
323 	sigset_t omask;
324 	size_t bonus;
325 	int rv;
326 
327 	if (type == HANDSHAKE_FORK) {
328 		bonus = sizeof(rf);
329 	} else {
330 		bonus = strlen(getprogname())+1;
331 	}
332 
333 	/* performs server handshake */
334 	rhdr.rsp_len = sizeof(rhdr) + bonus;
335 	rhdr.rsp_class = RUMPSP_REQ;
336 	rhdr.rsp_type = RUMPSP_HANDSHAKE;
337 	rhdr.rsp_handshake = type;
338 
339 	pthread_sigmask(SIG_SETMASK, &fullset, &omask);
340 	if (haslock)
341 		putwait_locked(spc, &rw, &rhdr);
342 	else
343 		putwait(spc, &rw, &rhdr);
344 	rv = dosend(spc, &rhdr, sizeof(rhdr));
345 	if (type == HANDSHAKE_FORK) {
346 		memcpy(rf.rf_auth, data, sizeof(rf.rf_auth)); /* uh, why? */
347 		rf.rf_cancel = cancel;
348 		rv = send_with_recon(spc, &rf, sizeof(rf));
349 	} else {
350 		rv = dosend(spc, getprogname(), strlen(getprogname())+1);
351 	}
352 	if (rv || cancel) {
353 		if (haslock)
354 			unputwait_locked(spc, &rw);
355 		else
356 			unputwait(spc, &rw);
357 		if (cancel) {
358 			goto out;
359 		}
360 	} else {
361 		rv = cliwaitresp(spc, &rw, &omask, haslock);
362 	}
363 	if (rv)
364 		goto out;
365 
366 	rv = *(int *)rw.rw_data;
367 	free(rw.rw_data);
368 
369  out:
370 	pthread_sigmask(SIG_SETMASK, &omask, NULL);
371 	return rv;
372 }
373 
374 static int
375 prefork_req(struct spclient *spc, sigset_t *omask, void **resp)
376 {
377 	struct rsp_hdr rhdr;
378 	struct respwait rw;
379 	int rv;
380 
381 	rhdr.rsp_len = sizeof(rhdr);
382 	rhdr.rsp_class = RUMPSP_REQ;
383 	rhdr.rsp_type = RUMPSP_PREFORK;
384 	rhdr.rsp_error = 0;
385 
386 	do {
387 		putwait(spc, &rw, &rhdr);
388 		rv = send_with_recon(spc, &rhdr, sizeof(rhdr));
389 		if (rv != 0) {
390 			unputwait(spc, &rw);
391 			continue;
392 		}
393 
394 		rv = cliwaitresp(spc, &rw, omask, false);
395 		if (rv == ENOTCONN)
396 			rv = EAGAIN;
397 	} while (rv == EAGAIN);
398 
399 	*resp = rw.rw_data;
400 	return rv;
401 }
402 
403 /*
404  * prevent response code from deadlocking with reconnect code
405  */
406 static int
407 resp_sendlock(struct spclient *spc)
408 {
409 	int rv = 0;
410 
411 	pthread_mutex_lock(&spc->spc_mtx);
412 	while (spc->spc_ostatus != SPCSTATUS_FREE) {
413 		if (__predict_false(spc->spc_reconnecting)) {
414 			rv = EBUSY;
415 			goto out;
416 		}
417 		spc->spc_ostatus = SPCSTATUS_WANTED;
418 		pthread_cond_wait(&spc->spc_cv, &spc->spc_mtx);
419 	}
420 	spc->spc_ostatus = SPCSTATUS_BUSY;
421 
422  out:
423 	pthread_mutex_unlock(&spc->spc_mtx);
424 	return rv;
425 }
426 
427 static void
428 send_copyin_resp(struct spclient *spc, uint64_t reqno, void *data, size_t dlen,
429 	int wantstr)
430 {
431 	struct rsp_hdr rhdr;
432 
433 	if (wantstr)
434 		dlen = MIN(dlen, strlen(data)+1);
435 
436 	rhdr.rsp_len = sizeof(rhdr) + dlen;
437 	rhdr.rsp_reqno = reqno;
438 	rhdr.rsp_class = RUMPSP_RESP;
439 	rhdr.rsp_type = RUMPSP_COPYIN;
440 	rhdr.rsp_sysnum = 0;
441 
442 	if (resp_sendlock(spc) != 0)
443 		return;
444 	(void)dosend(spc, &rhdr, sizeof(rhdr));
445 	(void)dosend(spc, data, dlen);
446 	sendunlock(spc);
447 }
448 
449 static void
450 send_anonmmap_resp(struct spclient *spc, uint64_t reqno, void *addr)
451 {
452 	struct rsp_hdr rhdr;
453 
454 	rhdr.rsp_len = sizeof(rhdr) + sizeof(addr);
455 	rhdr.rsp_reqno = reqno;
456 	rhdr.rsp_class = RUMPSP_RESP;
457 	rhdr.rsp_type = RUMPSP_ANONMMAP;
458 	rhdr.rsp_sysnum = 0;
459 
460 	if (resp_sendlock(spc) != 0)
461 		return;
462 	(void)dosend(spc, &rhdr, sizeof(rhdr));
463 	(void)dosend(spc, &addr, sizeof(addr));
464 	sendunlock(spc);
465 }
466 
467 int
468 rumpclient_syscall(int sysnum, const void *data, size_t dlen,
469 	register_t *retval)
470 {
471 	struct rsp_sysresp *resp;
472 	sigset_t omask;
473 	void *rdata;
474 	int rv;
475 
476 	pthread_sigmask(SIG_SETMASK, &fullset, &omask);
477 
478 	DPRINTF(("rumpsp syscall_req: syscall %d with %p/%zu\n",
479 	    sysnum, data, dlen));
480 
481 	rv = syscall_req(&clispc, &omask, sysnum, data, dlen, &rdata);
482 	if (rv)
483 		goto out;
484 
485 	resp = rdata;
486 	DPRINTF(("rumpsp syscall_resp: syscall %d error %d, rv: %d/%d\n",
487 	    sysnum, rv, resp->rsys_retval[0], resp->rsys_retval[1]));
488 
489 	memcpy(retval, &resp->rsys_retval, sizeof(resp->rsys_retval));
490 	rv = resp->rsys_error;
491 	free(rdata);
492 
493  out:
494 	pthread_sigmask(SIG_SETMASK, &omask, NULL);
495 	return rv;
496 }
497 
498 static void
499 handlereq(struct spclient *spc)
500 {
501 	struct rsp_copydata *copydata;
502 	struct rsp_hdr *rhdr = &spc->spc_hdr;
503 	void *mapaddr;
504 	size_t maplen;
505 	int reqtype = spc->spc_hdr.rsp_type;
506 
507 	switch (reqtype) {
508 	case RUMPSP_COPYIN:
509 	case RUMPSP_COPYINSTR:
510 		/*LINTED*/
511 		copydata = (struct rsp_copydata *)spc->spc_buf;
512 		DPRINTF(("rump_sp handlereq: copyin request: %p/%zu\n",
513 		    copydata->rcp_addr, copydata->rcp_len));
514 		send_copyin_resp(spc, spc->spc_hdr.rsp_reqno,
515 		    copydata->rcp_addr, copydata->rcp_len,
516 		    reqtype == RUMPSP_COPYINSTR);
517 		break;
518 	case RUMPSP_COPYOUT:
519 	case RUMPSP_COPYOUTSTR:
520 		/*LINTED*/
521 		copydata = (struct rsp_copydata *)spc->spc_buf;
522 		DPRINTF(("rump_sp handlereq: copyout request: %p/%zu\n",
523 		    copydata->rcp_addr, copydata->rcp_len));
524 		/*LINTED*/
525 		memcpy(copydata->rcp_addr, copydata->rcp_data,
526 		    copydata->rcp_len);
527 		break;
528 	case RUMPSP_ANONMMAP:
529 		/*LINTED*/
530 		maplen = *(size_t *)spc->spc_buf;
531 		mapaddr = mmap(NULL, maplen, PROT_READ|PROT_WRITE,
532 		    MAP_ANON, -1, 0);
533 		if (mapaddr == MAP_FAILED)
534 			mapaddr = NULL;
535 		DPRINTF(("rump_sp handlereq: anonmmap: %p\n", mapaddr));
536 		send_anonmmap_resp(spc, spc->spc_hdr.rsp_reqno, mapaddr);
537 		break;
538 	case RUMPSP_RAISE:
539 		DPRINTF(("rump_sp handlereq: raise sig %d\n", rhdr->rsp_signo));
540 		raise((int)rhdr->rsp_signo);
541 		/*
542 		 * We most likely have signals blocked, but the signal
543 		 * will be handled soon enough when we return.
544 		 */
545 		break;
546 	default:
547 		printf("PANIC: INVALID TYPE %d\n", reqtype);
548 		abort();
549 		break;
550 	}
551 
552 	spcfreebuf(spc);
553 }
554 
555 static unsigned ptab_idx;
556 static struct sockaddr *serv_sa;
557 
558 /* dup until we get a "good" fd which does not collide with stdio */
559 static int
560 dupgood(int myfd, int mustchange)
561 {
562 	int ofds[4];
563 	int i;
564 
565 	for (i = 0; (myfd <= 2 || mustchange) && myfd != -1; i++) {
566 		assert(i < __arraycount(ofds));
567 		ofds[i] = myfd;
568 		myfd = host_dup(myfd);
569 		if (mustchange) {
570 			i--; /* prevent closing old fd */
571 			mustchange = 0;
572 		}
573 	}
574 
575 	for (i--; i >= 0; i--) {
576 		host_close(ofds[i]);
577 	}
578 
579 	return myfd;
580 }
581 
582 static int
583 doconnect(bool noisy)
584 {
585 	struct respwait rw;
586 	struct rsp_hdr rhdr;
587 	struct kevent kev[NSIG+1];
588 	char banner[MAXBANNER];
589 	struct pollfd pfd;
590 	int s, error, flags, i;
591 	ssize_t n;
592 
593 	if (kq != -1)
594 		host_close(kq);
595 	kq = -1;
596 	s = -1;
597 
598 	if (clispc.spc_fd != -1)
599 		host_close(clispc.spc_fd);
600 	clispc.spc_fd = -1;
601 
602 	/*
603 	 * for reconnect, gate everyone out of the receiver code
604 	 */
605 	putwait_locked(&clispc, &rw, &rhdr);
606 
607 	pthread_mutex_lock(&clispc.spc_mtx);
608 	clispc.spc_reconnecting = 1;
609 	pthread_cond_broadcast(&clispc.spc_cv);
610 	clispc.spc_generation++;
611 	while (clispc.spc_istatus != SPCSTATUS_FREE) {
612 		clispc.spc_istatus = SPCSTATUS_WANTED;
613 		pthread_cond_wait(&rw.rw_cv, &clispc.spc_mtx);
614 	}
615 	kickall(&clispc);
616 
617 	/*
618 	 * we can release it already since we hold the
619 	 * send lock during reconnect
620 	 * XXX: assert it
621 	 */
622 	clispc.spc_istatus = SPCSTATUS_FREE;
623 	pthread_mutex_unlock(&clispc.spc_mtx);
624 	unputwait_locked(&clispc, &rw);
625 
626 	free(clispc.spc_buf);
627 	clispc.spc_off = 0;
628 
629 	s = dupgood(host_socket(parsetab[ptab_idx].domain, SOCK_STREAM, 0), 0);
630 	if (s == -1)
631 		return -1;
632 
633 	pfd.fd = s;
634 	pfd.events = POLLIN;
635 	while (host_connect(s, serv_sa, (socklen_t)serv_sa->sa_len) == -1) {
636 		if (errno == EINTR)
637 			continue;
638 		error = errno;
639 		if (noisy)
640 			fprintf(stderr, "rump_sp: client connect failed: %s\n",
641 			    strerror(errno));
642 		errno = error;
643 		return -1;
644 	}
645 
646 	if ((error = parsetab[ptab_idx].connhook(s)) != 0) {
647 		error = errno;
648 		if (noisy)
649 			fprintf(stderr, "rump_sp: connect hook failed\n");
650 		errno = error;
651 		return -1;
652 	}
653 
654 	if ((n = host_read(s, banner, sizeof(banner)-1)) < 0) {
655 		error = errno;
656 		if (noisy)
657 			fprintf(stderr, "rump_sp: failed to read banner\n");
658 		errno = error;
659 		return -1;
660 	}
661 
662 	if (banner[n-1] != '\n') {
663 		if (noisy)
664 			fprintf(stderr, "rump_sp: invalid banner\n");
665 		errno = EINVAL;
666 		return -1;
667 	}
668 	banner[n] = '\0';
669 	/* parse the banner some day */
670 
671 	flags = host_fcntl(s, F_GETFL, 0);
672 	if (host_fcntl(s, F_SETFL, flags | O_NONBLOCK) == -1) {
673 		if (noisy)
674 			fprintf(stderr, "rump_sp: socket fd NONBLOCK: %s\n",
675 			    strerror(errno));
676 		errno = EINVAL;
677 		return -1;
678 	}
679 	clispc.spc_fd = s;
680 	clispc.spc_state = SPCSTATE_RUNNING;
681 	clispc.spc_reconnecting = 0;
682 
683 	/* setup kqueue, we want all signals and the fd */
684 	if ((kq = dupgood(host_kqueue(), 0)) == -1) {
685 		error = errno;
686 		if (noisy)
687 			fprintf(stderr, "rump_sp: cannot setup kqueue");
688 		errno = error;
689 		return -1;
690 	}
691 
692 	for (i = 0; i < NSIG; i++) {
693 		EV_SET(&kev[i], i+1, EVFILT_SIGNAL, EV_ADD|EV_ENABLE, 0, 0, 0);
694 	}
695 	EV_SET(&kev[NSIG], clispc.spc_fd,
696 	    EVFILT_READ, EV_ADD|EV_ENABLE, 0, 0, 0);
697 	if (host_kevent(kq, kev, NSIG+1, NULL, 0, NULL) == -1) {
698 		error = errno;
699 		if (noisy)
700 			fprintf(stderr, "rump_sp: kevent() failed");
701 		errno = error;
702 		return -1;
703 	}
704 
705 	return 0;
706 }
707 
708 static int
709 doinit(void)
710 {
711 
712 	TAILQ_INIT(&clispc.spc_respwait);
713 	pthread_mutex_init(&clispc.spc_mtx, NULL);
714 	pthread_cond_init(&clispc.spc_cv, NULL);
715 
716 	return 0;
717 }
718 
719 void *(*rumpclient_dlsym)(void *, const char *);
720 static int init_done = 0;
721 
722 int
723 rumpclient_init()
724 {
725 	char *p;
726 	int error;
727 	int rv = -1;
728 	int hstype;
729 
730 	if (init_done)
731 		return 0;
732 	init_done = 1;
733 
734 	sigfillset(&fullset);
735 
736 	/* dlsym overrided by rumphijack? */
737 	if (!rumpclient_dlsym)
738 		rumpclient_dlsym = dlsym;
739 
740 	/*
741 	 * sag mir, wo die symbol sind.  zogen fort, der krieg beginnt.
742 	 * wann wird man je verstehen?  wann wird man je verstehen?
743 	 */
744 #define FINDSYM2(_name_,_syscall_)					\
745 	if ((host_##_name_ = rumpclient_dlsym(RTLD_NEXT,		\
746 	    #_syscall_)) == NULL)					\
747 		/* host_##_name_ = _syscall_ */;
748 #define FINDSYM(_name_) FINDSYM2(_name_,_name_)
749 	FINDSYM2(socket,__socket30);
750 	FINDSYM(close);
751 	FINDSYM(connect);
752 	FINDSYM(fcntl);
753 	FINDSYM(poll);
754 	FINDSYM(read);
755 	FINDSYM(sendto);
756 	FINDSYM(setsockopt);
757 	FINDSYM(dup);
758 	FINDSYM(kqueue);
759 	FINDSYM(execve);
760 #if !__NetBSD_Prereq__(5,99,7)
761 	FINDSYM(kevent);
762 #else
763 	FINDSYM2(kevent,_sys___kevent50);
764 #endif
765 #undef	FINDSYM
766 #undef	FINDSY2
767 
768 	if ((p = getenv("RUMP__PARSEDSERVER")) == NULL) {
769 		if ((p = getenv("RUMP_SERVER")) == NULL) {
770 			errno = ENOENT;
771 			goto out;
772 		}
773 	}
774 
775 	if ((error = parseurl(p, &serv_sa, &ptab_idx, 0)) != 0) {
776 		errno = error;
777 		goto out;
778 	}
779 
780 	if (doinit() == -1)
781 		goto out;
782 
783 	if ((p = getenv("RUMPCLIENT__EXECFD")) != NULL) {
784 		sscanf(p, "%d,%d", &clispc.spc_fd, &kq);
785 		unsetenv("RUMPCLIENT__EXECFD");
786 		hstype = HANDSHAKE_EXEC;
787 	} else {
788 		if (doconnect(true) == -1)
789 			goto out;
790 		hstype = HANDSHAKE_GUEST;
791 	}
792 
793 	error = handshake_req(&clispc, hstype, NULL, 0, false);
794 	if (error) {
795 		pthread_mutex_destroy(&clispc.spc_mtx);
796 		pthread_cond_destroy(&clispc.spc_cv);
797 		if (clispc.spc_fd != -1)
798 			host_close(clispc.spc_fd);
799 		errno = error;
800 		goto out;
801 	}
802 	rv = 0;
803 
804  out:
805 	if (rv == -1)
806 		init_done = 0;
807 	return rv;
808 }
809 
810 struct rumpclient_fork {
811 	uint32_t fork_auth[AUTHLEN];
812 	struct spclient fork_spc;
813 	int fork_kq;
814 };
815 
816 struct rumpclient_fork *
817 rumpclient_prefork(void)
818 {
819 	struct rumpclient_fork *rpf;
820 	sigset_t omask;
821 	void *resp;
822 	int rv;
823 
824 	pthread_sigmask(SIG_SETMASK, &fullset, &omask);
825 	rpf = malloc(sizeof(*rpf));
826 	if (rpf == NULL)
827 		goto out;
828 
829 	if ((rv = prefork_req(&clispc, &omask, &resp)) != 0) {
830 		free(rpf);
831 		errno = rv;
832 		rpf = NULL;
833 		goto out;
834 	}
835 
836 	memcpy(rpf->fork_auth, resp, sizeof(rpf->fork_auth));
837 	free(resp);
838 
839 	rpf->fork_spc = clispc;
840 	rpf->fork_kq = kq;
841 
842  out:
843 	pthread_sigmask(SIG_SETMASK, &omask, NULL);
844 	return rpf;
845 }
846 
847 int
848 rumpclient_fork_init(struct rumpclient_fork *rpf)
849 {
850 	int error;
851 	int osock;
852 
853 	osock = clispc.spc_fd;
854 	memset(&clispc, 0, sizeof(clispc));
855 	clispc.spc_fd = osock;
856 
857 	kq = -1; /* kqueue descriptor is not copied over fork() */
858 
859 	if (doinit() == -1)
860 		return -1;
861 	if (doconnect(false) == -1)
862 		return -1;
863 
864 	error = handshake_req(&clispc, HANDSHAKE_FORK, rpf->fork_auth,
865 	    0, false);
866 	if (error) {
867 		pthread_mutex_destroy(&clispc.spc_mtx);
868 		pthread_cond_destroy(&clispc.spc_cv);
869 		errno = error;
870 		return -1;
871 	}
872 
873 	return 0;
874 }
875 
876 void
877 rumpclient_fork_cancel(struct rumpclient_fork *rpf)
878 {
879 
880 	/* EUNIMPL */
881 }
882 
883 void
884 rumpclient_fork_vparent(struct rumpclient_fork *rpf)
885 {
886 
887 	clispc = rpf->fork_spc;
888 	kq = rpf->fork_kq;
889 }
890 
891 void
892 rumpclient_setconnretry(time_t timeout)
893 {
894 
895 	if (timeout < RUMPCLIENT_RETRYCONN_DIE)
896 		return; /* gigo */
897 
898 	retrytimo = timeout;
899 }
900 
901 int
902 rumpclient__closenotify(int *fdp, enum rumpclient_closevariant variant)
903 {
904 	int fd = *fdp;
905 	int untilfd, rv;
906 	int newfd;
907 
908 	switch (variant) {
909 	case RUMPCLIENT_CLOSE_FCLOSEM:
910 		untilfd = MAX(clispc.spc_fd, kq);
911 		for (; fd <= untilfd; fd++) {
912 			if (fd == clispc.spc_fd || fd == kq)
913 				continue;
914 			rv = host_close(fd);
915 			if (rv == -1)
916 				return -1;
917 		}
918 		*fdp = fd;
919 		break;
920 
921 	case RUMPCLIENT_CLOSE_CLOSE:
922 	case RUMPCLIENT_CLOSE_DUP2:
923 		if (fd == clispc.spc_fd) {
924 			struct kevent kev[2];
925 
926 			newfd = dupgood(clispc.spc_fd, 1);
927 			if (newfd == -1)
928 				return -1;
929 			/*
930 			 * now, we have a new socket number, so change
931 			 * the file descriptor that kqueue is
932 			 * monitoring.  remove old and add new.
933 			 */
934 			EV_SET(&kev[0], clispc.spc_fd,
935 			    EVFILT_READ, EV_DELETE, 0, 0, 0);
936 			EV_SET(&kev[1], newfd,
937 			    EVFILT_READ, EV_ADD|EV_ENABLE, 0, 0, 0);
938 			if (host_kevent(kq, kev, 2, NULL, 0, NULL) == -1) {
939 				int sverrno = errno;
940 				host_close(newfd);
941 				errno = sverrno;
942 				return -1;
943 			}
944 			clispc.spc_fd = newfd;
945 		}
946 		if (fd == kq) {
947 			newfd = dupgood(kq, 1);
948 			if (newfd == -1)
949 				return -1;
950 			kq = newfd;
951 		}
952 		break;
953 	}
954 
955 	return 0;
956 }
957 
958 pid_t
959 rumpclient_fork()
960 {
961 
962 	return rumpclient__dofork(fork);
963 }
964 
965 /*
966  * Process is about to exec.  Save info about our existing connection
967  * in the env.  rumpclient will check for this info in init().
968  * This is mostly for the benefit of rumphijack, but regular applications
969  * may use it as well.
970  */
971 int
972 rumpclient_exec(const char *path, char *const argv[], char *const envp[])
973 {
974 	char buf[4096];
975 	char **newenv;
976 	char *envstr, *envstr2;
977 	size_t nelem;
978 	int rv, sverrno;
979 
980 	snprintf(buf, sizeof(buf), "RUMPCLIENT__EXECFD=%d,%d",
981 	    clispc.spc_fd, kq);
982 	envstr = malloc(strlen(buf)+1);
983 	if (envstr == NULL) {
984 		return ENOMEM;
985 	}
986 	strcpy(envstr, buf);
987 
988 	/* do we have a fully parsed url we want to forward in the env? */
989 	if (*parsedurl != '\0') {
990 		snprintf(buf, sizeof(buf),
991 		    "RUMP__PARSEDSERVER=%s", parsedurl);
992 		envstr2 = malloc(strlen(buf)+1);
993 		if (envstr2 == NULL) {
994 			free(envstr);
995 			return ENOMEM;
996 		}
997 		strcpy(envstr2, buf);
998 	} else {
999 		envstr2 = NULL;
1000 	}
1001 
1002 	for (nelem = 0; envp && envp[nelem]; nelem++)
1003 		continue;
1004 
1005 	newenv = malloc(sizeof(*newenv) * (nelem+3));
1006 	if (newenv == NULL) {
1007 		free(envstr2);
1008 		free(envstr);
1009 		return ENOMEM;
1010 	}
1011 	memcpy(&newenv[0], envp, nelem*sizeof(*envp));
1012 
1013 	newenv[nelem] = envstr;
1014 	newenv[nelem+1] = envstr2;
1015 	newenv[nelem+2] = NULL;
1016 
1017 	rv = host_execve(path, argv, newenv);
1018 
1019 	_DIAGASSERT(rv != 0);
1020 	sverrno = errno;
1021 	free(envstr2);
1022 	free(envstr);
1023 	free(newenv);
1024 	errno = sverrno;
1025 	return rv;
1026 }
1027 
1028 int
1029 rumpclient_daemon(int nochdir, int noclose)
1030 {
1031 	struct rumpclient_fork *rf;
1032 	int sverrno;
1033 
1034 	if ((rf = rumpclient_prefork()) == NULL)
1035 		return -1;
1036 
1037 	if (daemon(nochdir, noclose) == -1) {
1038 		sverrno = errno;
1039 		rumpclient_fork_cancel(rf);
1040 		errno = sverrno;
1041 		return -1;
1042 	}
1043 
1044 	if (rumpclient_fork_init(rf) == -1)
1045 		return -1;
1046 
1047 	return 0;
1048 }
1049