xref: /netbsd-src/lib/librumpclient/rumpclient.c (revision 5bbd2a12505d72a8177929a37b5cee489d0a1cfd)
1 /*      $NetBSD: rumpclient.c,v 1.51 2012/08/29 10:38:53 msaitoh Exp $	*/
2 
3 /*
4  * Copyright (c) 2010, 2011 Antti Kantee.  All Rights Reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
16  * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18  * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27 
28 /*
29  * Client side routines for rump syscall proxy.
30  */
31 
32 #include "rumpuser_port.h"
33 
34 /*
35  * We use kqueue on NetBSD, poll elsewhere.  Theoretically we could
36  * use kqueue on other BSD's too, but I haven't tested those.  We
37  * want to use kqueue because it will give us the ability to get signal
38  * notifications but defer their handling to a stage where we do not
39  * hold the communication lock.  Taking a signal while holding on to
40  * that lock may cause a deadlock.  Therefore, block signals throughout
41  * the RPC when using poll.  This unfortunately means that the normal
42  * SIGINT way of stopping a process while it is undergoing rump kernel
43  * RPC will not work.  If anyone know which Linux system call handles
44  * the above scenario correctly, I'm all ears.
45  */
46 
47 #ifdef __NetBSD__
48 #define USE_KQUEUE
49 #endif
50 
51 #include <sys/cdefs.h>
52 __RCSID("$NetBSD: rumpclient.c,v 1.51 2012/08/29 10:38:53 msaitoh Exp $");
53 
54 #include <sys/param.h>
55 #include <sys/mman.h>
56 #include <sys/socket.h>
57 #include <sys/time.h>
58 
59 #ifdef USE_KQUEUE
60 #include <sys/event.h>
61 #endif
62 
63 #include <arpa/inet.h>
64 #include <netinet/in.h>
65 #include <netinet/tcp.h>
66 
67 #include <assert.h>
68 #include <dlfcn.h>
69 #include <err.h>
70 #include <errno.h>
71 #include <fcntl.h>
72 #include <link.h>
73 #include <poll.h>
74 #include <pthread.h>
75 #include <signal.h>
76 #include <stdarg.h>
77 #include <stdbool.h>
78 #include <stdio.h>
79 #include <stdlib.h>
80 #include <string.h>
81 #include <unistd.h>
82 
83 #include <rump/rumpclient.h>
84 
85 #define HOSTOPS
86 int	(*host_socket)(int, int, int);
87 int	(*host_close)(int);
88 int	(*host_connect)(int, const struct sockaddr *, socklen_t);
89 int	(*host_fcntl)(int, int, ...);
90 int	(*host_poll)(struct pollfd *, nfds_t, int);
91 ssize_t	(*host_read)(int, void *, size_t);
92 ssize_t (*host_sendmsg)(int, const struct msghdr *, int);
93 int	(*host_setsockopt)(int, int, int, const void *, socklen_t);
94 int	(*host_dup)(int);
95 
96 #ifdef USE_KQUEUE
97 int	(*host_kqueue)(void);
98 int	(*host_kevent)(int, const struct kevent *, size_t,
99 		       struct kevent *, size_t, const struct timespec *);
100 #endif
101 
102 int	(*host_execve)(const char *, char *const[], char *const[]);
103 
104 #include "sp_common.c"
105 
106 static struct spclient clispc = {
107 	.spc_fd = -1,
108 };
109 
110 static int kq = -1;
111 static sigset_t fullset;
112 
113 static int doconnect(void);
114 static int handshake_req(struct spclient *, int, void *, int, bool);
115 
116 /*
117  * Default: don't retry.  Most clients can't handle it
118  * (consider e.g. fds suddenly going missing).
119  */
120 static time_t retrytimo = 0;
121 
122 /* always defined to nothingness for now */
123 #define ERRLOG(a)
124 
125 static int
126 send_with_recon(struct spclient *spc, struct iovec *iov, size_t iovlen)
127 {
128 	struct timeval starttime, curtime;
129 	time_t prevreconmsg;
130 	unsigned reconretries;
131 	int rv;
132 
133 	for (prevreconmsg = 0, reconretries = 0;;) {
134 		rv = dosend(spc, iov, iovlen);
135 		if (__predict_false(rv == ENOTCONN || rv == EBADF)) {
136 			/* no persistent connections */
137 			if (retrytimo == 0) {
138 				rv = ENOTCONN;
139 				break;
140 			}
141 			if (retrytimo == RUMPCLIENT_RETRYCONN_DIE)
142 				_exit(1);
143 
144 			if (!prevreconmsg) {
145 				prevreconmsg = time(NULL);
146 				gettimeofday(&starttime, NULL);
147 			}
148 			if (reconretries == 1) {
149 				if (retrytimo == RUMPCLIENT_RETRYCONN_ONCE) {
150 					rv = ENOTCONN;
151 					break;
152 				}
153 				fprintf(stderr, "rump_sp: connection to "
154 				    "kernel lost, trying to reconnect ...\n");
155 			} else if (time(NULL) - prevreconmsg > 120) {
156 				fprintf(stderr, "rump_sp: still trying to "
157 				    "reconnect ...\n");
158 				prevreconmsg = time(NULL);
159 			}
160 
161 			/* check that we aren't over the limit */
162 			if (retrytimo > 0) {
163 				struct timeval tmp;
164 
165 				gettimeofday(&curtime, NULL);
166 				timersub(&curtime, &starttime, &tmp);
167 				if (tmp.tv_sec >= retrytimo) {
168 					fprintf(stderr, "rump_sp: reconnect "
169 					    "failed, %lld second timeout\n",
170 					    (long long)retrytimo);
171 					return ENOTCONN;
172 				}
173 			}
174 
175 			/* adhoc backoff timer */
176 			if (reconretries < 10) {
177 				usleep(100000 * reconretries);
178 			} else {
179 				sleep(MIN(10, reconretries-9));
180 			}
181 			reconretries++;
182 
183 			if ((rv = doconnect()) != 0)
184 				continue;
185 			if ((rv = handshake_req(&clispc, HANDSHAKE_GUEST,
186 			    NULL, 0, true)) != 0)
187 				continue;
188 
189 			/*
190 			 * ok, reconnect succesful.  we need to return to
191 			 * the upper layer to get the entire PDU resent.
192 			 */
193 			if (reconretries != 1)
194 				fprintf(stderr, "rump_sp: reconnected!\n");
195 			rv = EAGAIN;
196 			break;
197 		} else {
198 			_DIAGASSERT(errno != EAGAIN);
199 			break;
200 		}
201 	}
202 
203 	return rv;
204 }
205 
206 static int
207 cliwaitresp(struct spclient *spc, struct respwait *rw, sigset_t *mask,
208 	bool keeplock)
209 {
210 	uint64_t mygen;
211 	bool imalive = true;
212 
213 	pthread_mutex_lock(&spc->spc_mtx);
214 	if (!keeplock)
215 		sendunlockl(spc);
216 	mygen = spc->spc_generation;
217 
218 	rw->rw_error = 0;
219 	while (!rw->rw_done && rw->rw_error == 0) {
220 		if (__predict_false(spc->spc_generation != mygen || !imalive))
221 			break;
222 
223 		/* are we free to receive? */
224 		if (spc->spc_istatus == SPCSTATUS_FREE) {
225 			int gotresp, dosig, rv;
226 
227 			spc->spc_istatus = SPCSTATUS_BUSY;
228 			pthread_mutex_unlock(&spc->spc_mtx);
229 
230 			dosig = 0;
231 			for (gotresp = 0; !gotresp; ) {
232 #ifdef USE_KQUEUE
233 				struct kevent kev[8];
234 				int i;
235 
236 				/*
237 				 * typically we don't have a frame waiting
238 				 * when we come in here, so call kevent now
239 				 */
240 				rv = host_kevent(kq, NULL, 0,
241 				    kev, __arraycount(kev), NULL);
242 
243 				if (__predict_false(rv == -1)) {
244 					goto activity;
245 				}
246 
247 				/*
248 				 * XXX: don't know how this can happen
249 				 * (timeout cannot expire since there
250 				 * isn't one), but it does happen.
251 				 * treat it as an expectional condition
252 				 * and go through tryread to determine
253 				 * alive status.
254 				 */
255 				if (__predict_false(rv == 0))
256 					goto activity;
257 
258 				for (i = 0; i < rv; i++) {
259 					if (kev[i].filter == EVFILT_SIGNAL)
260 						dosig++;
261 				}
262 				if (dosig)
263 					goto cleanup;
264 
265 				/*
266 				 * ok, activity.  try to read a frame to
267 				 * determine what happens next.
268 				 */
269  activity:
270 #else /* USE_KQUEUE */
271 				struct pollfd pfd;
272 
273 				pfd.fd = clispc.spc_fd;
274 				pfd.events = POLLIN;
275 
276 				rv = host_poll(&pfd, 1, -1);
277 #endif /* !USE_KQUEUE */
278 
279 				switch (readframe(spc)) {
280 				case 0:
281 					continue;
282 				case -1:
283 					imalive = false;
284 					goto cleanup;
285 				default:
286 					/* case 1 */
287 					break;
288 				}
289 
290 				switch (spc->spc_hdr.rsp_class) {
291 				case RUMPSP_RESP:
292 				case RUMPSP_ERROR:
293 					kickwaiter(spc);
294 					gotresp = spc->spc_hdr.rsp_reqno ==
295 					    rw->rw_reqno;
296 					break;
297 				case RUMPSP_REQ:
298 					handlereq(spc);
299 					break;
300 				default:
301 					/* panic */
302 					break;
303 				}
304 			}
305 
306  cleanup:
307 			pthread_mutex_lock(&spc->spc_mtx);
308 			if (spc->spc_istatus == SPCSTATUS_WANTED)
309 				kickall(spc);
310 			spc->spc_istatus = SPCSTATUS_FREE;
311 
312 			/* take one for the team */
313 			if (dosig) {
314 				pthread_mutex_unlock(&spc->spc_mtx);
315 				pthread_sigmask(SIG_SETMASK, mask, NULL);
316 				pthread_sigmask(SIG_SETMASK, &fullset, NULL);
317 				pthread_mutex_lock(&spc->spc_mtx);
318 			}
319 		} else {
320 			spc->spc_istatus = SPCSTATUS_WANTED;
321 			pthread_cond_wait(&rw->rw_cv, &spc->spc_mtx);
322 		}
323 	}
324 	TAILQ_REMOVE(&spc->spc_respwait, rw, rw_entries);
325 	pthread_mutex_unlock(&spc->spc_mtx);
326 	pthread_cond_destroy(&rw->rw_cv);
327 
328 	if (spc->spc_generation != mygen || !imalive) {
329 		return ENOTCONN;
330 	}
331 	return rw->rw_error;
332 }
333 
334 static int
335 syscall_req(struct spclient *spc, sigset_t *omask, int sysnum,
336 	const void *data, size_t dlen, void **resp)
337 {
338 	struct rsp_hdr rhdr;
339 	struct respwait rw;
340 	struct iovec iov[2];
341 	int rv;
342 
343 	rhdr.rsp_len = sizeof(rhdr) + dlen;
344 	rhdr.rsp_class = RUMPSP_REQ;
345 	rhdr.rsp_type = RUMPSP_SYSCALL;
346 	rhdr.rsp_sysnum = sysnum;
347 
348 	IOVPUT(iov[0], rhdr);
349 	IOVPUT_WITHSIZE(iov[1], __UNCONST(data), dlen);
350 
351 	do {
352 		putwait(spc, &rw, &rhdr);
353 		if ((rv = send_with_recon(spc, iov, __arraycount(iov))) != 0) {
354 			unputwait(spc, &rw);
355 			continue;
356 		}
357 
358 		rv = cliwaitresp(spc, &rw, omask, false);
359 		if (rv == ENOTCONN)
360 			rv = EAGAIN;
361 	} while (rv == EAGAIN);
362 
363 	*resp = rw.rw_data;
364 	return rv;
365 }
366 
367 static int
368 handshake_req(struct spclient *spc, int type, void *data,
369 	int cancel, bool haslock)
370 {
371 	struct handshake_fork rf;
372 	const char *myprogname = NULL; /* XXXgcc */
373 	struct rsp_hdr rhdr;
374 	struct respwait rw;
375 	sigset_t omask;
376 	size_t bonus;
377 	struct iovec iov[2];
378 	int rv;
379 
380 	if (type == HANDSHAKE_FORK) {
381 		bonus = sizeof(rf);
382 	} else {
383 #ifdef __NetBSD__
384 		/* would procfs work on NetBSD too? */
385 		myprogname = getprogname();
386 #else
387 		int fd = open("/proc/self/comm", O_RDONLY);
388 		if (fd == -1) {
389 			myprogname = "???";
390 		} else {
391 			static char commname[128];
392 
393 			memset(commname, 0, sizeof(commname));
394 			if (read(fd, commname, sizeof(commname)) > 0) {
395 				char *n;
396 
397 				n = strrchr(commname, '\n');
398 				if (n)
399 					*n = '\0';
400 				myprogname = commname;
401 			} else {
402 				myprogname = "???";
403 			}
404 			close(fd);
405 		}
406 #endif
407 		bonus = strlen(myprogname)+1;
408 	}
409 
410 	/* performs server handshake */
411 	rhdr.rsp_len = sizeof(rhdr) + bonus;
412 	rhdr.rsp_class = RUMPSP_REQ;
413 	rhdr.rsp_type = RUMPSP_HANDSHAKE;
414 	rhdr.rsp_handshake = type;
415 
416 	IOVPUT(iov[0], rhdr);
417 
418 	pthread_sigmask(SIG_SETMASK, &fullset, &omask);
419 	if (haslock)
420 		putwait_locked(spc, &rw, &rhdr);
421 	else
422 		putwait(spc, &rw, &rhdr);
423 	if (type == HANDSHAKE_FORK) {
424 		memcpy(rf.rf_auth, data, sizeof(rf.rf_auth)); /* uh, why? */
425 		rf.rf_cancel = cancel;
426 		IOVPUT(iov[1], rf);
427 	} else {
428 		IOVPUT_WITHSIZE(iov[1], __UNCONST(myprogname), bonus);
429 	}
430 	rv = send_with_recon(spc, iov, __arraycount(iov));
431 	if (rv || cancel) {
432 		if (haslock)
433 			unputwait_locked(spc, &rw);
434 		else
435 			unputwait(spc, &rw);
436 		if (cancel) {
437 			goto out;
438 		}
439 	} else {
440 		rv = cliwaitresp(spc, &rw, &omask, haslock);
441 	}
442 	if (rv)
443 		goto out;
444 
445 	rv = *(int *)rw.rw_data;
446 	free(rw.rw_data);
447 
448  out:
449 	pthread_sigmask(SIG_SETMASK, &omask, NULL);
450 	return rv;
451 }
452 
453 static int
454 prefork_req(struct spclient *spc, sigset_t *omask, void **resp)
455 {
456 	struct rsp_hdr rhdr;
457 	struct respwait rw;
458 	struct iovec iov[1];
459 	int rv;
460 
461 	rhdr.rsp_len = sizeof(rhdr);
462 	rhdr.rsp_class = RUMPSP_REQ;
463 	rhdr.rsp_type = RUMPSP_PREFORK;
464 	rhdr.rsp_error = 0;
465 
466 	IOVPUT(iov[0], rhdr);
467 
468 	do {
469 		putwait(spc, &rw, &rhdr);
470 		rv = send_with_recon(spc, iov, __arraycount(iov));
471 		if (rv != 0) {
472 			unputwait(spc, &rw);
473 			continue;
474 		}
475 
476 		rv = cliwaitresp(spc, &rw, omask, false);
477 		if (rv == ENOTCONN)
478 			rv = EAGAIN;
479 	} while (rv == EAGAIN);
480 
481 	*resp = rw.rw_data;
482 	return rv;
483 }
484 
485 /*
486  * prevent response code from deadlocking with reconnect code
487  */
488 static int
489 resp_sendlock(struct spclient *spc)
490 {
491 	int rv = 0;
492 
493 	pthread_mutex_lock(&spc->spc_mtx);
494 	while (spc->spc_ostatus != SPCSTATUS_FREE) {
495 		if (__predict_false(spc->spc_reconnecting)) {
496 			rv = EBUSY;
497 			goto out;
498 		}
499 		spc->spc_ostatus = SPCSTATUS_WANTED;
500 		pthread_cond_wait(&spc->spc_cv, &spc->spc_mtx);
501 	}
502 	spc->spc_ostatus = SPCSTATUS_BUSY;
503 
504  out:
505 	pthread_mutex_unlock(&spc->spc_mtx);
506 	return rv;
507 }
508 
509 static void
510 send_copyin_resp(struct spclient *spc, uint64_t reqno, void *data, size_t dlen,
511 	int wantstr)
512 {
513 	struct rsp_hdr rhdr;
514 	struct iovec iov[2];
515 
516 	if (wantstr)
517 		dlen = MIN(dlen, strlen(data)+1);
518 
519 	rhdr.rsp_len = sizeof(rhdr) + dlen;
520 	rhdr.rsp_reqno = reqno;
521 	rhdr.rsp_class = RUMPSP_RESP;
522 	rhdr.rsp_type = RUMPSP_COPYIN;
523 	rhdr.rsp_sysnum = 0;
524 
525 	IOVPUT(iov[0], rhdr);
526 	IOVPUT_WITHSIZE(iov[1], data, dlen);
527 
528 	if (resp_sendlock(spc) != 0)
529 		return;
530 	(void)SENDIOV(spc, iov);
531 	sendunlock(spc);
532 }
533 
534 static void
535 send_anonmmap_resp(struct spclient *spc, uint64_t reqno, void *addr)
536 {
537 	struct rsp_hdr rhdr;
538 	struct iovec iov[2];
539 
540 	rhdr.rsp_len = sizeof(rhdr) + sizeof(addr);
541 	rhdr.rsp_reqno = reqno;
542 	rhdr.rsp_class = RUMPSP_RESP;
543 	rhdr.rsp_type = RUMPSP_ANONMMAP;
544 	rhdr.rsp_sysnum = 0;
545 
546 	IOVPUT(iov[0], rhdr);
547 	IOVPUT(iov[1], addr);
548 
549 	if (resp_sendlock(spc) != 0)
550 		return;
551 	(void)SENDIOV(spc, iov);
552 	sendunlock(spc);
553 }
554 
555 int
556 rumpclient_syscall(int sysnum, const void *data, size_t dlen,
557 	register_t *retval)
558 {
559 	struct rsp_sysresp *resp;
560 	sigset_t omask;
561 	void *rdata;
562 	int rv;
563 
564 	pthread_sigmask(SIG_SETMASK, &fullset, &omask);
565 
566 	DPRINTF(("rumpsp syscall_req: syscall %d with %p/%zu\n",
567 	    sysnum, data, dlen));
568 
569 	rv = syscall_req(&clispc, &omask, sysnum, data, dlen, &rdata);
570 	if (rv)
571 		goto out;
572 
573 	resp = rdata;
574 	DPRINTF(("rumpsp syscall_resp: syscall %d error %d, rv: %d/%d\n",
575 	    sysnum, rv, resp->rsys_retval[0], resp->rsys_retval[1]));
576 
577 	memcpy(retval, &resp->rsys_retval, sizeof(resp->rsys_retval));
578 	rv = resp->rsys_error;
579 	free(rdata);
580 
581  out:
582 	pthread_sigmask(SIG_SETMASK, &omask, NULL);
583 	return rv;
584 }
585 
586 static void
587 handlereq(struct spclient *spc)
588 {
589 	struct rsp_copydata *copydata;
590 	struct rsp_hdr *rhdr = &spc->spc_hdr;
591 	void *mapaddr;
592 	size_t maplen;
593 	int reqtype = spc->spc_hdr.rsp_type;
594 
595 	switch (reqtype) {
596 	case RUMPSP_COPYIN:
597 	case RUMPSP_COPYINSTR:
598 		/*LINTED*/
599 		copydata = (struct rsp_copydata *)spc->spc_buf;
600 		DPRINTF(("rump_sp handlereq: copyin request: %p/%zu\n",
601 		    copydata->rcp_addr, copydata->rcp_len));
602 		send_copyin_resp(spc, spc->spc_hdr.rsp_reqno,
603 		    copydata->rcp_addr, copydata->rcp_len,
604 		    reqtype == RUMPSP_COPYINSTR);
605 		break;
606 	case RUMPSP_COPYOUT:
607 	case RUMPSP_COPYOUTSTR:
608 		/*LINTED*/
609 		copydata = (struct rsp_copydata *)spc->spc_buf;
610 		DPRINTF(("rump_sp handlereq: copyout request: %p/%zu\n",
611 		    copydata->rcp_addr, copydata->rcp_len));
612 		/*LINTED*/
613 		memcpy(copydata->rcp_addr, copydata->rcp_data,
614 		    copydata->rcp_len);
615 		break;
616 	case RUMPSP_ANONMMAP:
617 		/*LINTED*/
618 		maplen = *(size_t *)spc->spc_buf;
619 		mapaddr = mmap(NULL, maplen, PROT_READ|PROT_WRITE,
620 		    MAP_ANON, -1, 0);
621 		if (mapaddr == MAP_FAILED)
622 			mapaddr = NULL;
623 		DPRINTF(("rump_sp handlereq: anonmmap: %p\n", mapaddr));
624 		send_anonmmap_resp(spc, spc->spc_hdr.rsp_reqno, mapaddr);
625 		break;
626 	case RUMPSP_RAISE:
627 		DPRINTF(("rump_sp handlereq: raise sig %d\n", rhdr->rsp_signo));
628 		raise((int)rhdr->rsp_signo);
629 		/*
630 		 * We most likely have signals blocked, but the signal
631 		 * will be handled soon enough when we return.
632 		 */
633 		break;
634 	default:
635 		printf("PANIC: INVALID TYPE %d\n", reqtype);
636 		abort();
637 		break;
638 	}
639 
640 	spcfreebuf(spc);
641 }
642 
643 static unsigned ptab_idx;
644 static struct sockaddr *serv_sa;
645 
646 /* dup until we get a "good" fd which does not collide with stdio */
647 static int
648 dupgood(int myfd, int mustchange)
649 {
650 	int ofds[4];
651 	int sverrno;
652 	unsigned int i;
653 
654 	for (i = 0; (myfd <= 2 || mustchange) && myfd != -1; i++) {
655 		assert(i < __arraycount(ofds));
656 		ofds[i] = myfd;
657 		myfd = host_dup(myfd);
658 		if (mustchange) {
659 			i--; /* prevent closing old fd */
660 			mustchange = 0;
661 		}
662 	}
663 
664 	sverrno = 0;
665 	if (myfd == -1 && i > 0)
666 		sverrno = errno;
667 
668 	while (i-- > 0) {
669 		host_close(ofds[i]);
670 	}
671 
672 	if (sverrno)
673 		errno = sverrno;
674 
675 	return myfd;
676 }
677 
678 static int
679 doconnect(void)
680 {
681 	struct respwait rw;
682 	struct rsp_hdr rhdr;
683 	char banner[MAXBANNER];
684 	int s, error, flags;
685 	ssize_t n;
686 
687 	if (kq != -1)
688 		host_close(kq);
689 	kq = -1;
690 	s = -1;
691 
692 	if (clispc.spc_fd != -1)
693 		host_close(clispc.spc_fd);
694 	clispc.spc_fd = -1;
695 
696 	/*
697 	 * for reconnect, gate everyone out of the receiver code
698 	 */
699 	putwait_locked(&clispc, &rw, &rhdr);
700 
701 	pthread_mutex_lock(&clispc.spc_mtx);
702 	clispc.spc_reconnecting = 1;
703 	pthread_cond_broadcast(&clispc.spc_cv);
704 	clispc.spc_generation++;
705 	while (clispc.spc_istatus != SPCSTATUS_FREE) {
706 		clispc.spc_istatus = SPCSTATUS_WANTED;
707 		pthread_cond_wait(&rw.rw_cv, &clispc.spc_mtx);
708 	}
709 	kickall(&clispc);
710 
711 	/*
712 	 * we can release it already since we hold the
713 	 * send lock during reconnect
714 	 * XXX: assert it
715 	 */
716 	clispc.spc_istatus = SPCSTATUS_FREE;
717 	pthread_mutex_unlock(&clispc.spc_mtx);
718 	unputwait_locked(&clispc, &rw);
719 
720 	free(clispc.spc_buf);
721 	clispc.spc_off = 0;
722 
723 	s = dupgood(host_socket(parsetab[ptab_idx].domain, SOCK_STREAM, 0), 0);
724 	if (s == -1)
725 		return -1;
726 
727 	while (host_connect(s, serv_sa, parsetab[ptab_idx].slen) == -1) {
728 		if (errno == EINTR)
729 			continue;
730 		ERRLOG(("rump_sp: client connect failed: %s\n",
731 		    strerror(errno)));
732 		return -1;
733 	}
734 
735 	if ((error = parsetab[ptab_idx].connhook(s)) != 0) {
736 		ERRLOG(("rump_sp: connect hook failed\n"));
737 		return -1;
738 	}
739 
740 	if ((n = host_read(s, banner, sizeof(banner)-1)) <= 0) {
741 		ERRLOG(("rump_sp: failed to read banner\n"));
742 		return -1;
743 	}
744 
745 	if (banner[n-1] != '\n') {
746 		ERRLOG(("rump_sp: invalid banner\n"));
747 		return -1;
748 	}
749 	banner[n] = '\0';
750 	/* XXX parse the banner some day */
751 
752 	flags = host_fcntl(s, F_GETFL, 0);
753 	if (host_fcntl(s, F_SETFL, flags | O_NONBLOCK) == -1) {
754 		ERRLOG(("rump_sp: socket fd NONBLOCK: %s\n", strerror(errno)));
755 		return -1;
756 	}
757 	clispc.spc_fd = s;
758 	clispc.spc_state = SPCSTATE_RUNNING;
759 	clispc.spc_reconnecting = 0;
760 
761 #ifdef USE_KQUEUE
762 {
763 	struct kevent kev[NSIG+1];
764 	int i;
765 
766 	/* setup kqueue, we want all signals and the fd */
767 	if ((kq = dupgood(host_kqueue(), 0)) == -1) {
768 		ERRLOG(("rump_sp: cannot setup kqueue"));
769 		return -1;
770 	}
771 
772 	for (i = 0; i < NSIG; i++) {
773 		EV_SET(&kev[i], i+1, EVFILT_SIGNAL, EV_ADD|EV_ENABLE, 0, 0, 0);
774 	}
775 	EV_SET(&kev[NSIG], clispc.spc_fd,
776 	    EVFILT_READ, EV_ADD|EV_ENABLE, 0, 0, 0);
777 	if (host_kevent(kq, kev, NSIG+1, NULL, 0, NULL) == -1) {
778 		ERRLOG(("rump_sp: kevent() failed"));
779 		return -1;
780 	}
781 }
782 #endif /* USE_KQUEUE */
783 
784 	return 0;
785 }
786 
787 static int
788 doinit(void)
789 {
790 
791 	TAILQ_INIT(&clispc.spc_respwait);
792 	pthread_mutex_init(&clispc.spc_mtx, NULL);
793 	pthread_cond_init(&clispc.spc_cv, NULL);
794 
795 	return 0;
796 }
797 
798 void *rumpclient__dlsym(void *, const char *);
799 void *
800 rumpclient__dlsym(void *handle, const char *symbol)
801 {
802 
803 	return dlsym(handle, symbol);
804 }
805 void *rumphijack_dlsym(void *, const char *)
806     __attribute__((__weak__, alias("rumpclient__dlsym")));
807 
808 static pid_t init_done = 0;
809 
810 int
811 rumpclient_init(void)
812 {
813 	char *p;
814 	int error;
815 	int rv = -1;
816 	int hstype;
817 	pid_t mypid;
818 
819 	/*
820 	 * Make sure we're not riding the context of a previous
821 	 * host fork.  Note: it's *possible* that after n>1 forks
822 	 * we have the same pid as one of our exited parents, but
823 	 * I'm pretty sure there are 0 practical implications, since
824 	 * it means generations would have to skip rumpclient init.
825 	 */
826 	if (init_done == (mypid = getpid()))
827 		return 0;
828 
829 	/* kq does not traverse fork() */
830 	if (init_done != 0)
831 		kq = -1;
832 	init_done = mypid;
833 
834 	sigfillset(&fullset);
835 
836 	/*
837 	 * sag mir, wo die symbols sind.  zogen fort, der krieg beginnt.
838 	 * wann wird man je verstehen?  wann wird man je verstehen?
839 	 */
840 #define FINDSYM2(_name_,_syscall_)					\
841 	if ((host_##_name_ = rumphijack_dlsym(RTLD_NEXT,		\
842 	    #_syscall_)) == NULL) {					\
843 		if (rumphijack_dlsym == rumpclient__dlsym)		\
844 			host_##_name_ = _name_; /* static fallback */	\
845 		if (host_##_name_ == NULL)				\
846 			errx(1, "cannot find %s: %s", #_syscall_,	\
847 			    dlerror());					\
848 	}
849 #define FINDSYM(_name_) FINDSYM2(_name_,_name_)
850 #ifdef __NetBSD__
851 	FINDSYM2(socket,__socket30)
852 #else
853 	FINDSYM(socket)
854 #endif
855 
856 	FINDSYM(close)
857 	FINDSYM(connect)
858 	FINDSYM(fcntl)
859 	FINDSYM(poll)
860 	FINDSYM(read)
861 	FINDSYM(sendmsg)
862 	FINDSYM(setsockopt)
863 	FINDSYM(dup)
864 	FINDSYM(execve)
865 
866 #ifdef USE_KQUEUE
867 	FINDSYM(kqueue)
868 #if !__NetBSD_Prereq__(5,99,7)
869 	FINDSYM(kevent)
870 #else
871 	FINDSYM2(kevent,_sys___kevent50)
872 #endif
873 #endif /* USE_KQUEUE */
874 
875 #undef	FINDSYM
876 #undef	FINDSY2
877 
878 	if ((p = getenv("RUMP__PARSEDSERVER")) == NULL) {
879 		if ((p = getenv("RUMP_SERVER")) == NULL) {
880 			errno = ENOENT;
881 			goto out;
882 		}
883 	}
884 
885 	if ((error = parseurl(p, &serv_sa, &ptab_idx, 0)) != 0) {
886 		errno = error;
887 		goto out;
888 	}
889 
890 	if (doinit() == -1)
891 		goto out;
892 
893 	if ((p = getenv("RUMPCLIENT__EXECFD")) != NULL) {
894 		sscanf(p, "%d,%d", &clispc.spc_fd, &kq);
895 		unsetenv("RUMPCLIENT__EXECFD");
896 		hstype = HANDSHAKE_EXEC;
897 	} else {
898 		if (doconnect() == -1)
899 			goto out;
900 		hstype = HANDSHAKE_GUEST;
901 	}
902 
903 	error = handshake_req(&clispc, hstype, NULL, 0, false);
904 	if (error) {
905 		pthread_mutex_destroy(&clispc.spc_mtx);
906 		pthread_cond_destroy(&clispc.spc_cv);
907 		if (clispc.spc_fd != -1)
908 			host_close(clispc.spc_fd);
909 		errno = error;
910 		goto out;
911 	}
912 	rv = 0;
913 
914  out:
915 	if (rv == -1)
916 		init_done = 0;
917 	return rv;
918 }
919 
920 struct rumpclient_fork {
921 	uint32_t fork_auth[AUTHLEN];
922 	struct spclient fork_spc;
923 	int fork_kq;
924 };
925 
926 struct rumpclient_fork *
927 rumpclient_prefork(void)
928 {
929 	struct rumpclient_fork *rpf;
930 	sigset_t omask;
931 	void *resp;
932 	int rv;
933 
934 	pthread_sigmask(SIG_SETMASK, &fullset, &omask);
935 	rpf = malloc(sizeof(*rpf));
936 	if (rpf == NULL)
937 		goto out;
938 
939 	if ((rv = prefork_req(&clispc, &omask, &resp)) != 0) {
940 		free(rpf);
941 		errno = rv;
942 		rpf = NULL;
943 		goto out;
944 	}
945 
946 	memcpy(rpf->fork_auth, resp, sizeof(rpf->fork_auth));
947 	free(resp);
948 
949 	rpf->fork_spc = clispc;
950 	rpf->fork_kq = kq;
951 
952  out:
953 	pthread_sigmask(SIG_SETMASK, &omask, NULL);
954 	return rpf;
955 }
956 
957 int
958 rumpclient_fork_init(struct rumpclient_fork *rpf)
959 {
960 	int error;
961 	int osock;
962 
963 	osock = clispc.spc_fd;
964 	memset(&clispc, 0, sizeof(clispc));
965 	clispc.spc_fd = osock;
966 
967 	kq = -1; /* kqueue descriptor is not copied over fork() */
968 
969 	if (doinit() == -1)
970 		return -1;
971 	if (doconnect() == -1)
972 		return -1;
973 
974 	error = handshake_req(&clispc, HANDSHAKE_FORK, rpf->fork_auth,
975 	    0, false);
976 	if (error) {
977 		pthread_mutex_destroy(&clispc.spc_mtx);
978 		pthread_cond_destroy(&clispc.spc_cv);
979 		errno = error;
980 		return -1;
981 	}
982 
983 	return 0;
984 }
985 
986 /*ARGSUSED*/
987 void
988 rumpclient_fork_cancel(struct rumpclient_fork *rpf)
989 {
990 
991 	/* EUNIMPL */
992 }
993 
994 void
995 rumpclient_fork_vparent(struct rumpclient_fork *rpf)
996 {
997 
998 	clispc = rpf->fork_spc;
999 	kq = rpf->fork_kq;
1000 }
1001 
1002 void
1003 rumpclient_setconnretry(time_t timeout)
1004 {
1005 
1006 	if (timeout < RUMPCLIENT_RETRYCONN_DIE)
1007 		return; /* gigo */
1008 
1009 	retrytimo = timeout;
1010 }
1011 
1012 int
1013 rumpclient__closenotify(int *fdp, enum rumpclient_closevariant variant)
1014 {
1015 	int fd = *fdp;
1016 	int untilfd, rv;
1017 	int newfd;
1018 
1019 	switch (variant) {
1020 	case RUMPCLIENT_CLOSE_FCLOSEM:
1021 		untilfd = MAX(clispc.spc_fd, kq);
1022 		for (; fd <= untilfd; fd++) {
1023 			if (fd == clispc.spc_fd || fd == kq)
1024 				continue;
1025 			rv = host_close(fd);
1026 			if (rv == -1)
1027 				return -1;
1028 		}
1029 		*fdp = fd;
1030 		break;
1031 
1032 	case RUMPCLIENT_CLOSE_CLOSE:
1033 	case RUMPCLIENT_CLOSE_DUP2:
1034 		if (fd == clispc.spc_fd) {
1035 			newfd = dupgood(clispc.spc_fd, 1);
1036 			if (newfd == -1)
1037 				return -1;
1038 
1039 #ifdef USE_KQUEUE
1040 			{
1041 			struct kevent kev[2];
1042 
1043 			/*
1044 			 * now, we have a new socket number, so change
1045 			 * the file descriptor that kqueue is
1046 			 * monitoring.  remove old and add new.
1047 			 */
1048 			EV_SET(&kev[0], clispc.spc_fd,
1049 			    EVFILT_READ, EV_DELETE, 0, 0, 0);
1050 			EV_SET(&kev[1], newfd,
1051 			    EVFILT_READ, EV_ADD|EV_ENABLE, 0, 0, 0);
1052 			if (host_kevent(kq, kev, 2, NULL, 0, NULL) == -1) {
1053 				int sverrno = errno;
1054 				host_close(newfd);
1055 				errno = sverrno;
1056 				return -1;
1057 			}
1058 			clispc.spc_fd = newfd;
1059 			}
1060 		}
1061 		if (fd == kq) {
1062 			newfd = dupgood(kq, 1);
1063 			if (newfd == -1)
1064 				return -1;
1065 			kq = newfd;
1066 #else /* USE_KQUEUE */
1067 			clispc.spc_fd = newfd;
1068 #endif /* !USE_KQUEUE */
1069 		}
1070 		break;
1071 	}
1072 
1073 	return 0;
1074 }
1075 
1076 pid_t
1077 rumpclient_fork(void)
1078 {
1079 
1080 	return rumpclient__dofork(fork);
1081 }
1082 
1083 /*
1084  * Process is about to exec.  Save info about our existing connection
1085  * in the env.  rumpclient will check for this info in init().
1086  * This is mostly for the benefit of rumphijack, but regular applications
1087  * may use it as well.
1088  */
1089 int
1090 rumpclient_exec(const char *path, char *const argv[], char *const envp[])
1091 {
1092 	char buf[4096];
1093 	char **newenv;
1094 	char *envstr, *envstr2;
1095 	size_t nelem;
1096 	int rv, sverrno;
1097 
1098 	snprintf(buf, sizeof(buf), "RUMPCLIENT__EXECFD=%d,%d",
1099 	    clispc.spc_fd, kq);
1100 	envstr = malloc(strlen(buf)+1);
1101 	if (envstr == NULL) {
1102 		return ENOMEM;
1103 	}
1104 	strcpy(envstr, buf);
1105 
1106 	/* do we have a fully parsed url we want to forward in the env? */
1107 	if (*parsedurl != '\0') {
1108 		snprintf(buf, sizeof(buf),
1109 		    "RUMP__PARSEDSERVER=%s", parsedurl);
1110 		envstr2 = malloc(strlen(buf)+1);
1111 		if (envstr2 == NULL) {
1112 			free(envstr);
1113 			return ENOMEM;
1114 		}
1115 		strcpy(envstr2, buf);
1116 	} else {
1117 		envstr2 = NULL;
1118 	}
1119 
1120 	for (nelem = 0; envp && envp[nelem]; nelem++)
1121 		continue;
1122 
1123 	newenv = malloc(sizeof(*newenv) * (nelem+3));
1124 	if (newenv == NULL) {
1125 		free(envstr2);
1126 		free(envstr);
1127 		return ENOMEM;
1128 	}
1129 	memcpy(&newenv[0], envp, nelem*sizeof(*envp));
1130 
1131 	newenv[nelem] = envstr;
1132 	newenv[nelem+1] = envstr2;
1133 	newenv[nelem+2] = NULL;
1134 
1135 	rv = host_execve(path, argv, newenv);
1136 
1137 	_DIAGASSERT(rv != 0);
1138 	sverrno = errno;
1139 	free(envstr2);
1140 	free(envstr);
1141 	free(newenv);
1142 	errno = sverrno;
1143 	return rv;
1144 }
1145 
1146 int
1147 rumpclient_daemon(int nochdir, int noclose)
1148 {
1149 	struct rumpclient_fork *rf;
1150 	int sverrno;
1151 
1152 	if ((rf = rumpclient_prefork()) == NULL)
1153 		return -1;
1154 
1155 	if (daemon(nochdir, noclose) == -1) {
1156 		sverrno = errno;
1157 		rumpclient_fork_cancel(rf);
1158 		errno = sverrno;
1159 		return -1;
1160 	}
1161 
1162 	if (rumpclient_fork_init(rf) == -1)
1163 		return -1;
1164 
1165 	return 0;
1166 }
1167