xref: /netbsd-src/lib/librumphijack/hijack.c (revision c2f76ff004a2cb67efe5b12d97bd3ef7fe89e18d)
1 /*      $NetBSD: hijack.c,v 1.16 2011/01/19 11:27:01 pooka Exp $	*/
2 
3 /*-
4  * Copyright (c) 2011 Antti Kantee.  All Rights Reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
16  * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18  * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27 
28 #include <sys/cdefs.h>
29 __RCSID("$NetBSD: hijack.c,v 1.16 2011/01/19 11:27:01 pooka Exp $");
30 
31 #include <sys/param.h>
32 #include <sys/types.h>
33 #include <sys/event.h>
34 #include <sys/ioctl.h>
35 #include <sys/socket.h>
36 #include <sys/poll.h>
37 
38 #include <rump/rumpclient.h>
39 #include <rump/rump_syscalls.h>
40 
41 #include <assert.h>
42 #include <dlfcn.h>
43 #include <err.h>
44 #include <errno.h>
45 #include <fcntl.h>
46 #include <poll.h>
47 #include <pthread.h>
48 #include <signal.h>
49 #include <stdarg.h>
50 #include <stdbool.h>
51 #include <stdio.h>
52 #include <stdlib.h>
53 #include <time.h>
54 #include <unistd.h>
55 
56 enum {	RUMPCALL_SOCKET, RUMPCALL_ACCEPT, RUMPCALL_BIND, RUMPCALL_CONNECT,
57 	RUMPCALL_GETPEERNAME, RUMPCALL_GETSOCKNAME, RUMPCALL_LISTEN,
58 	RUMPCALL_RECVFROM, RUMPCALL_RECVMSG,
59 	RUMPCALL_SENDTO, RUMPCALL_SENDMSG,
60 	RUMPCALL_GETSOCKOPT, RUMPCALL_SETSOCKOPT,
61 	RUMPCALL_SHUTDOWN,
62 	RUMPCALL_READ, RUMPCALL_READV,
63 	RUMPCALL_WRITE, RUMPCALL_WRITEV,
64 	RUMPCALL_IOCTL, RUMPCALL_FCNTL,
65 	RUMPCALL_CLOSE,
66 	RUMPCALL_POLLTS,
67 	RUMPCALL__NUM
68 };
69 
70 #define RSYS_STRING(a) __STRING(a)
71 #define RSYS_NAME(a) RSYS_STRING(__CONCAT(RUMP_SYS_RENAME_,a))
72 
73 const char *sysnames[] = {
74 	RSYS_NAME(SOCKET),
75 	RSYS_NAME(ACCEPT),
76 	RSYS_NAME(BIND),
77 	RSYS_NAME(CONNECT),
78 	RSYS_NAME(GETPEERNAME),
79 	RSYS_NAME(GETSOCKNAME),
80 	RSYS_NAME(LISTEN),
81 	RSYS_NAME(RECVFROM),
82 	RSYS_NAME(RECVMSG),
83 	RSYS_NAME(SENDTO),
84 	RSYS_NAME(SENDMSG),
85 	RSYS_NAME(GETSOCKOPT),
86 	RSYS_NAME(SETSOCKOPT),
87 	RSYS_NAME(SHUTDOWN),
88 	RSYS_NAME(READ),
89 	RSYS_NAME(READV),
90 	RSYS_NAME(WRITE),
91 	RSYS_NAME(WRITEV),
92 	RSYS_NAME(IOCTL),
93 	RSYS_NAME(FCNTL),
94 	RSYS_NAME(CLOSE),
95 	RSYS_NAME(POLLTS),
96 };
97 
98 static int	(*host_socket)(int, int, int);
99 static int	(*host_connect)(int, const struct sockaddr *, socklen_t);
100 static int	(*host_bind)(int, const struct sockaddr *, socklen_t);
101 static int	(*host_listen)(int, int);
102 static int	(*host_accept)(int, struct sockaddr *, socklen_t *);
103 static int	(*host_getpeername)(int, struct sockaddr *, socklen_t *);
104 static int	(*host_getsockname)(int, struct sockaddr *, socklen_t *);
105 static int	(*host_setsockopt)(int, int, int, const void *, socklen_t);
106 
107 static ssize_t	(*host_read)(int, void *, size_t);
108 static ssize_t	(*host_readv)(int, const struct iovec *, int);
109 static ssize_t	(*host_write)(int, const void *, size_t);
110 static ssize_t	(*host_writev)(int, const struct iovec *, int);
111 static int	(*host_ioctl)(int, unsigned long, ...);
112 static int	(*host_fcntl)(int, int, ...);
113 static int	(*host_close)(int);
114 static int	(*host_pollts)(struct pollfd *, nfds_t,
115 			       const struct timespec *, const sigset_t *);
116 static pid_t	(*host_fork)(void);
117 static int	(*host_dup2)(int, int);
118 static int	(*host_shutdown)(int, int);
119 /* XXX */
120 static void	*host_sendto;
121 static void	*host_recvfrom;
122 
123 static void *rumpcalls[RUMPCALL__NUM];
124 
125 /*
126  * Would be nice to get this automatically in sync with libc.
127  * Also, this does not work for compat-using binaries!
128  */
129 
130 #if !__NetBSD_Prereq__(5,99,7)
131 #define SELECT select
132 #define POLLTS pollts
133 #define POLL poll
134 #else
135 #define SELECT __select50
136 #define POLLTS __pollts50
137 #define POLL __poll50
138 
139 int SELECT(int, fd_set *, fd_set *, fd_set *, struct timeval *);
140 int POLLTS(struct pollfd *, nfds_t, const struct timespec *, const sigset_t *);
141 int POLL(struct pollfd *, nfds_t, int);
142 #endif
143 
144 /*
145  * This is called from librumpclient in case of LD_PRELOAD.
146  * It ensures correct RTLD_NEXT.
147  */
148 static void *
149 hijackdlsym(void *handle, const char *symbol)
150 {
151 
152 	return dlsym(handle, symbol);
153 }
154 
155 /* low calorie sockets? */
156 static bool hostlocalsockets = true;
157 
158 static void __attribute__((constructor))
159 rcinit(void)
160 {
161 	int (*rumpcinit)(void);
162 	void **rumpcdlsym;
163 	void *hand;
164 	int i;
165 
166 	hand = dlopen("librumpclient.so", RTLD_LAZY|RTLD_GLOBAL);
167 	if (!hand)
168 		err(1, "cannot open librumpclient.so");
169 	rumpcinit = dlsym(hand, "rumpclient_init");
170 	_DIAGASSERT(rumpcinit);
171 
172 	rumpcdlsym = dlsym(hand, "rumpclient_dlsym");
173 	*rumpcdlsym = hijackdlsym;
174 
175 	host_socket = dlsym(RTLD_NEXT, "__socket30");
176 	host_listen = dlsym(RTLD_NEXT, "listen");
177 	host_connect = dlsym(RTLD_NEXT, "connect");
178 	host_bind = dlsym(RTLD_NEXT, "bind");
179 	host_accept = dlsym(RTLD_NEXT, "accept");
180 	host_getpeername = dlsym(RTLD_NEXT, "getpeername");
181 	host_getsockname = dlsym(RTLD_NEXT, "getsockname");
182 	host_setsockopt = dlsym(RTLD_NEXT, "setsockopt");
183 
184 	host_read = dlsym(RTLD_NEXT, "read");
185 	host_readv = dlsym(RTLD_NEXT, "readv");
186 	host_write = dlsym(RTLD_NEXT, "write");
187 	host_writev = dlsym(RTLD_NEXT, "writev");
188 	host_ioctl = dlsym(RTLD_NEXT, "ioctl");
189 	host_fcntl = dlsym(RTLD_NEXT, "fcntl");
190 	host_close = dlsym(RTLD_NEXT, "close");
191 	host_pollts = dlsym(RTLD_NEXT, "pollts");
192 	host_fork = dlsym(RTLD_NEXT, "fork");
193 	host_dup2 = dlsym(RTLD_NEXT, "dup2");
194 	host_shutdown = dlsym(RTLD_NEXT, "shutdown");
195 	host_sendto = dlsym(RTLD_NEXT, "sendto");
196 	host_recvfrom = dlsym(RTLD_NEXT, "recvfrom");
197 
198 	for (i = 0; i < RUMPCALL__NUM; i++) {
199 		rumpcalls[i] = dlsym(hand, sysnames[i]);
200 		if (!rumpcalls[i]) {
201 			fprintf(stderr, "rumphijack: cannot find symbol: %s\n",
202 			    sysnames[i]);
203 			exit(1);
204 		}
205 	}
206 
207 	if (rumpcinit() == -1)
208 		err(1, "rumpclient init");
209 }
210 
211 static unsigned dup2mask;
212 #define ISDUP2D(fd) (1<<(fd) & dup2mask)
213 
214 //#define DEBUGJACK
215 #ifdef DEBUGJACK
216 #define DPRINTF(x) mydprintf x
217 static void
218 mydprintf(const char *fmt, ...)
219 {
220 	va_list ap;
221 
222 	if (ISDUP2D(STDERR_FILENO))
223 		return;
224 
225 	va_start(ap, fmt);
226 	vfprintf(stderr, fmt, ap);
227 	va_end(ap);
228 }
229 
230 #else
231 #define DPRINTF(x)
232 #endif
233 
234 /* XXX: need runtime selection.  low for now due to FD_SETSIZE */
235 #define HIJACK_FDOFF 128
236 #define HIJACK_SELECT 128 /* XXX */
237 #define HIJACK_ASSERT 128 /* XXX */
238 static int
239 fd_rump2host(int fd)
240 {
241 
242 	if (fd == -1)
243 		return fd;
244 
245 	if (!ISDUP2D(fd))
246 		fd += HIJACK_FDOFF;
247 
248 	return fd;
249 }
250 
251 static int
252 fd_host2rump(int fd)
253 {
254 
255 	if (!ISDUP2D(fd))
256 		fd -= HIJACK_FDOFF;
257 	return fd;
258 }
259 
260 static bool
261 fd_isrump(int fd)
262 {
263 
264 	return ISDUP2D(fd) || fd >= HIJACK_FDOFF;
265 }
266 
267 #define assertfd(_fd_) assert(ISDUP2D(_fd_) || (_fd_) >= HIJACK_ASSERT)
268 #undef HIJACK_FDOFF
269 
270 int __socket30(int, int, int);
271 int
272 __socket30(int domain, int type, int protocol)
273 {
274 	int (*rc_socket)(int, int, int);
275 	int fd;
276 	bool dohost;
277 
278 	dohost = hostlocalsockets && (domain == AF_LOCAL);
279 
280 	if (dohost)
281 		rc_socket = host_socket;
282 	else
283 		rc_socket = rumpcalls[RUMPCALL_SOCKET];
284 	fd = rc_socket(domain, type, protocol);
285 
286 	if (!dohost)
287 		fd = fd_rump2host(fd);
288 	DPRINTF(("socket <- %d\n", fd));
289 
290 	return fd;
291 }
292 
293 int
294 accept(int s, struct sockaddr *addr, socklen_t *addrlen)
295 {
296 	int (*rc_accept)(int, struct sockaddr *, socklen_t *);
297 	int fd;
298 	bool isrump;
299 
300 	isrump = fd_isrump(s);
301 
302 	DPRINTF(("accept -> %d", s));
303 	if (isrump) {
304 		rc_accept = rumpcalls[RUMPCALL_ACCEPT];
305 		s = fd_host2rump(s);
306 	} else {
307 		rc_accept = host_accept;
308 	}
309 	fd = rc_accept(s, addr, addrlen);
310 	if (fd != -1 && isrump)
311 		fd = fd_rump2host(fd);
312 
313 	DPRINTF((" <- %d\n", fd));
314 
315 	return fd;
316 }
317 
318 int
319 bind(int s, const struct sockaddr *name, socklen_t namelen)
320 {
321 	int (*rc_bind)(int, const struct sockaddr *, socklen_t);
322 
323 	DPRINTF(("bind -> %d\n", s));
324 	if (fd_isrump(s)) {
325 		rc_bind = rumpcalls[RUMPCALL_BIND];
326 		s = fd_host2rump(s);
327 	} else {
328 		rc_bind = host_bind;
329 	}
330 	return rc_bind(s, name, namelen);
331 }
332 
333 int
334 connect(int s, const struct sockaddr *name, socklen_t namelen)
335 {
336 	int (*rc_connect)(int, const struct sockaddr *, socklen_t);
337 
338 	DPRINTF(("connect -> %d\n", s));
339 	if (fd_isrump(s)) {
340 		rc_connect = rumpcalls[RUMPCALL_CONNECT];
341 		s = fd_host2rump(s);
342 	} else {
343 		rc_connect = host_connect;
344 	}
345 
346 	return rc_connect(s, name, namelen);
347 }
348 
349 int
350 getpeername(int s, struct sockaddr *name, socklen_t *namelen)
351 {
352 	int (*rc_getpeername)(int, struct sockaddr *, socklen_t *);
353 
354 	DPRINTF(("getpeername -> %d\n", s));
355 	if (fd_isrump(s)) {
356 		rc_getpeername = rumpcalls[RUMPCALL_GETPEERNAME];
357 		s = fd_host2rump(s);
358 	} else {
359 		rc_getpeername = host_getpeername;
360 	}
361 	return rc_getpeername(s, name, namelen);
362 }
363 
364 int
365 getsockname(int s, struct sockaddr *name, socklen_t *namelen)
366 {
367 	int (*rc_getsockname)(int, struct sockaddr *, socklen_t *);
368 
369 	DPRINTF(("getsockname -> %d\n", s));
370 	if (fd_isrump(s)) {
371 		rc_getsockname = rumpcalls[RUMPCALL_GETSOCKNAME];
372 		s = fd_host2rump(s);
373 	} else {
374 		rc_getsockname = host_getsockname;
375 	}
376 	return rc_getsockname(s, name, namelen);
377 }
378 
379 int
380 listen(int s, int backlog)
381 {
382 	int (*rc_listen)(int, int);
383 
384 	DPRINTF(("listen -> %d\n", s));
385 	if (fd_isrump(s)) {
386 		rc_listen = rumpcalls[RUMPCALL_LISTEN];
387 		s = fd_host2rump(s);
388 	} else {
389 		rc_listen = host_listen;
390 	}
391 	return rc_listen(s, backlog);
392 }
393 
394 ssize_t
395 recv(int s, void *buf, size_t len, int flags)
396 {
397 
398 	return recvfrom(s, buf, len, flags, NULL, NULL);
399 }
400 
401 ssize_t
402 recvfrom(int s, void *buf, size_t len, int flags, struct sockaddr *from,
403 	socklen_t *fromlen)
404 {
405 	int (*rc_recvfrom)(int, void *, size_t, int,
406 	    struct sockaddr *, socklen_t *);
407 
408 	DPRINTF(("recvfrom\n"));
409 	if (fd_isrump(s)) {
410 		rc_recvfrom = rumpcalls[RUMPCALL_RECVFROM];
411 		s = fd_host2rump(s);
412 	} else {
413 		rc_recvfrom = host_recvfrom;
414 	}
415 
416 	return rc_recvfrom(s, buf, len, flags, from, fromlen);
417 }
418 
419 ssize_t
420 recvmsg(int s, struct msghdr *msg, int flags)
421 {
422 	int (*rc_recvmsg)(int, struct msghdr *, int);
423 
424 	DPRINTF(("recvmsg\n"));
425 	assertfd(s);
426 	rc_recvmsg = rumpcalls[RUMPCALL_RECVMSG];
427 	return rc_recvmsg(fd_host2rump(s), msg, flags);
428 }
429 
430 ssize_t
431 send(int s, const void *buf, size_t len, int flags)
432 {
433 
434 	return sendto(s, buf, len, flags, NULL, 0);
435 }
436 
437 ssize_t
438 sendto(int s, const void *buf, size_t len, int flags,
439 	const struct sockaddr *to, socklen_t tolen)
440 {
441 	int (*rc_sendto)(int, const void *, size_t, int,
442 	    const struct sockaddr *, socklen_t);
443 
444 	if (s == -1)
445 		return len;
446 	DPRINTF(("sendto\n"));
447 
448 	if (fd_isrump(s)) {
449 		rc_sendto = rumpcalls[RUMPCALL_SENDTO];
450 		s = fd_host2rump(s);
451 	} else {
452 		rc_sendto = host_sendto;
453 	}
454 	return rc_sendto(s, buf, len, flags, to, tolen);
455 }
456 
457 ssize_t
458 sendmsg(int s, const struct msghdr *msg, int flags)
459 {
460 	int (*rc_sendmsg)(int, const struct msghdr *, int);
461 
462 	DPRINTF(("sendmsg\n"));
463 	assertfd(s);
464 	rc_sendmsg = rumpcalls[RUMPCALL_SENDTO];
465 	return rc_sendmsg(fd_host2rump(s), msg, flags);
466 }
467 
468 int
469 getsockopt(int s, int level, int optname, void *optval, socklen_t *optlen)
470 {
471 	int (*rc_getsockopt)(int, int, int, void *, socklen_t *);
472 
473 	DPRINTF(("getsockopt -> %d\n", s));
474 	assertfd(s);
475 	rc_getsockopt = rumpcalls[RUMPCALL_GETSOCKOPT];
476 	return rc_getsockopt(fd_host2rump(s), level, optname, optval, optlen);
477 }
478 
479 int
480 setsockopt(int s, int level, int optname, const void *optval, socklen_t optlen)
481 {
482 	int (*rc_setsockopt)(int, int, int, const void *, socklen_t);
483 
484 	DPRINTF(("setsockopt -> %d\n", s));
485 	if (fd_isrump(s)) {
486 		rc_setsockopt = rumpcalls[RUMPCALL_SETSOCKOPT];
487 		s = fd_host2rump(s);
488 	} else {
489 		rc_setsockopt = host_setsockopt;
490 	}
491 	return rc_setsockopt(s, level, optname, optval, optlen);
492 }
493 
494 int
495 shutdown(int s, int how)
496 {
497 	int (*rc_shutdown)(int, int);
498 
499 	DPRINTF(("shutdown -> %d\n", s));
500 	if (fd_isrump(s)) {
501 		rc_shutdown = rumpcalls[RUMPCALL_SHUTDOWN];
502 		s = fd_host2rump(s);
503 	} else {
504 		rc_shutdown = host_shutdown;
505 	}
506 	return rc_shutdown(s, how);
507 }
508 
509 /*
510  * dup2 is special.  we allow dup2 of a rump kernel fd to 0-2 since
511  * many programs do that.  dup2 of a rump kernel fd to another value
512  * not >= fdoff is an error.
513  *
514  * Note: cannot rump2host newd, because it is often hardcoded.
515  *
516  * XXX: should disable debug prints after stdout/stderr are dup2'd
517  */
518 int
519 dup2(int oldd, int newd)
520 {
521 	int rv;
522 
523 	DPRINTF(("dup2 -> %d (o) -> %d (n)\n", oldd, newd));
524 
525 	if (fd_isrump(oldd)) {
526 		if (!(newd >= 0 && newd <= 2))
527 			return EBADF;
528 		oldd = fd_host2rump(oldd);
529 		rv = rump_sys_dup2(oldd, newd);
530 		if (rv != -1)
531 			dup2mask |= 1<<newd;
532 	} else {
533 		rv = host_dup2(oldd, newd);
534 	}
535 
536 	return rv;
537 }
538 
539 /*
540  * We just wrap fork the appropriate rump client calls to preserve
541  * the file descriptors of the forked parent in the child, but
542  * prevent double use of connection fd.
543  */
544 
545 pid_t
546 fork()
547 {
548 	struct rumpclient_fork *rf;
549 	pid_t rv;
550 
551 	DPRINTF(("fork\n"));
552 
553 	if ((rf = rumpclient_prefork()) == NULL)
554 		return -1;
555 
556 	switch ((rv = host_fork())) {
557 	case -1:
558 		/* XXX: cancel rf */
559 		break;
560 	case 0:
561 		if (rumpclient_fork_init(rf) == -1)
562 			rv = -1;
563 		break;
564 	default:
565 		break;
566 	}
567 
568 	DPRINTF(("fork returns %d\n", rv));
569 	return rv;
570 }
571 
572 /*
573  * Hybrids
574  */
575 
576 ssize_t
577 read(int fd, void *buf, size_t len)
578 {
579 	ssize_t (*op_read)(int, void *, size_t);
580 	ssize_t n;
581 
582 	DPRINTF(("read %d\n", fd));
583 	if (fd_isrump(fd)) {
584 		fd = fd_host2rump(fd);
585 		op_read = rumpcalls[RUMPCALL_READ];
586 	} else {
587 		op_read = host_read;
588 	}
589 
590 	n = op_read(fd, buf, len);
591 	return n;
592 }
593 
594 ssize_t
595 readv(int fd, const struct iovec *iov, int iovcnt)
596 {
597 	ssize_t (*op_readv)(int, const struct iovec *, int);
598 
599 	DPRINTF(("readv %d\n", fd));
600 	if (fd_isrump(fd)) {
601 		fd = fd_host2rump(fd);
602 		op_readv = rumpcalls[RUMPCALL_READV];
603 	} else {
604 		op_readv = host_readv;
605 	}
606 
607 	return op_readv(fd, iov, iovcnt);
608 }
609 
610 ssize_t
611 write(int fd, const void *buf, size_t len)
612 {
613 	ssize_t (*op_write)(int, const void *, size_t);
614 
615 	if (fd_isrump(fd)) {
616 		fd = fd_host2rump(fd);
617 		op_write = rumpcalls[RUMPCALL_WRITE];
618 	} else {
619 		op_write = host_write;
620 	}
621 
622 	return op_write(fd, buf, len);
623 }
624 
625 ssize_t
626 writev(int fd, const struct iovec *iov, int iovcnt)
627 {
628 	ssize_t (*op_writev)(int, const struct iovec *, int);
629 
630 	DPRINTF(("writev %d\n", fd));
631 	if (fd_isrump(fd)) {
632 		fd = fd_host2rump(fd);
633 		op_writev = rumpcalls[RUMPCALL_WRITEV];
634 	} else {
635 		op_writev = host_writev;
636 	}
637 
638 	return op_writev(fd, iov, iovcnt);
639 }
640 
641 int
642 ioctl(int fd, unsigned long cmd, ...)
643 {
644 	int (*op_ioctl)(int, unsigned long cmd, ...);
645 	va_list ap;
646 	int rv;
647 
648 	DPRINTF(("ioctl\n"));
649 	if (fd_isrump(fd)) {
650 		fd = fd_host2rump(fd);
651 		op_ioctl = rumpcalls[RUMPCALL_IOCTL];
652 	} else {
653 		op_ioctl = host_ioctl;
654 	}
655 
656 	va_start(ap, cmd);
657 	rv = op_ioctl(fd, cmd, va_arg(ap, void *));
658 	va_end(ap);
659 	return rv;
660 }
661 
662 int
663 fcntl(int fd, int cmd, ...)
664 {
665 	int (*op_fcntl)(int, int, ...);
666 	va_list ap;
667 	int rv;
668 
669 	DPRINTF(("fcntl\n"));
670 	if (fd_isrump(fd)) {
671 		fd = fd_host2rump(fd);
672 		op_fcntl = rumpcalls[RUMPCALL_FCNTL];
673 	} else {
674 		op_fcntl = host_fcntl;
675 	}
676 
677 	va_start(ap, cmd);
678 	rv = op_fcntl(fd, cmd, va_arg(ap, void *));
679 	va_end(ap);
680 	return rv;
681 }
682 
683 int
684 close(int fd)
685 {
686 	int (*op_close)(int);
687 
688 	DPRINTF(("close %d\n", fd));
689 	if (fd_isrump(fd)) {
690 		fd = fd_host2rump(fd);
691 		op_close = rumpcalls[RUMPCALL_CLOSE];
692 	} else {
693 		op_close = host_close;
694 	}
695 
696 	return op_close(fd);
697 }
698 
699 int
700 SELECT(int nfds, fd_set *readfds, fd_set *writefds, fd_set *exceptfds,
701 	struct timeval *timeout)
702 {
703 	struct pollfd *pfds;
704 	struct timespec ts, *tsp = NULL;
705 	nfds_t i, j, realnfds;
706 	int rv, incr;
707 
708 	DPRINTF(("select\n"));
709 
710 	/*
711 	 * Well, first we must scan the fds to figure out how many
712 	 * fds there really are.  This is because up to and including
713 	 * nb5 poll() silently refuses nfds > process_open_fds.
714 	 * Seems to be fixed in current, thank the maker.
715 	 * god damn cluster...bomb.
716 	 */
717 
718 	for (i = 0, realnfds = 0; i < nfds; i++) {
719 		if (readfds && FD_ISSET(i, readfds)) {
720 			realnfds++;
721 			continue;
722 		}
723 		if (writefds && FD_ISSET(i, writefds)) {
724 			realnfds++;
725 			continue;
726 		}
727 		if (exceptfds && FD_ISSET(i, exceptfds)) {
728 			realnfds++;
729 			continue;
730 		}
731 	}
732 
733 	if (realnfds) {
734 		pfds = malloc(sizeof(*pfds) * realnfds);
735 		if (!pfds)
736 			return -1;
737 	} else {
738 		pfds = NULL;
739 	}
740 
741 	for (i = 0, j = 0; i < nfds; i++) {
742 		incr = 0;
743 		pfds[j].events = pfds[j].revents = 0;
744 		if (readfds && FD_ISSET(i, readfds)) {
745 			pfds[j].fd = i;
746 			pfds[j].events |= POLLIN;
747 			incr=1;
748 		}
749 		if (writefds && FD_ISSET(i, writefds)) {
750 			pfds[j].fd = i;
751 			pfds[j].events |= POLLOUT;
752 			incr=1;
753 		}
754 		if (exceptfds && FD_ISSET(i, exceptfds)) {
755 			pfds[j].fd = i;
756 			pfds[j].events |= POLLHUP|POLLERR;
757 			incr=1;
758 		}
759 		if (incr)
760 			j++;
761 	}
762 
763 	if (timeout) {
764 		TIMEVAL_TO_TIMESPEC(timeout, &ts);
765 		tsp = &ts;
766 	}
767 	rv = pollts(pfds, realnfds, tsp, NULL);
768 	if (rv <= 0)
769 		goto out;
770 
771 	/*
772 	 * ok, harvest results.  first zero out entries (can't use
773 	 * FD_ZERO for the obvious select-me-not reason).  whee.
774 	 */
775 	for (i = 0; i < nfds; i++) {
776 		if (readfds)
777 			FD_CLR(i, readfds);
778 		if (writefds)
779 			FD_CLR(i, writefds);
780 		if (exceptfds)
781 			FD_CLR(i, exceptfds);
782 	}
783 
784 	/* and then plug in the results */
785 	for (i = 0; i < realnfds; i++) {
786 		if (readfds) {
787 			if (pfds[i].revents & POLLIN) {
788 				FD_SET(pfds[i].fd, readfds);
789 			}
790 		}
791 		if (writefds) {
792 			if (pfds[i].revents & POLLOUT) {
793 				FD_SET(pfds[i].fd, writefds);
794 			}
795 		}
796 		if (exceptfds) {
797 			if (pfds[i].revents & (POLLHUP|POLLERR)) {
798 				FD_SET(pfds[i].fd, exceptfds);
799 			}
800 		}
801 	}
802 
803  out:
804 	free(pfds);
805 	return rv;
806 }
807 
808 static void
809 checkpoll(struct pollfd *fds, nfds_t nfds, int *hostcall, int *rumpcall)
810 {
811 	nfds_t i;
812 
813 	for (i = 0; i < nfds; i++) {
814 		if (fds[i].fd == -1)
815 			continue;
816 
817 		if (fd_isrump(fds[i].fd))
818 			(*rumpcall)++;
819 		else
820 			(*hostcall)++;
821 	}
822 }
823 
824 static void
825 adjustpoll(struct pollfd *fds, nfds_t nfds, int (*fdadj)(int))
826 {
827 	nfds_t i;
828 
829 	for (i = 0; i < nfds; i++) {
830 		fds[i].fd = fdadj(fds[i].fd);
831 	}
832 }
833 
834 /*
835  * poll is easy as long as the call comes in the fds only in one
836  * kernel.  otherwise its quite tricky...
837  */
838 struct pollarg {
839 	struct pollfd *pfds;
840 	nfds_t nfds;
841 	const struct timespec *ts;
842 	const sigset_t *sigmask;
843 	int pipefd;
844 	int errnum;
845 };
846 
847 static void *
848 hostpoll(void *arg)
849 {
850 	struct pollarg *parg = arg;
851 	intptr_t rv;
852 
853 	rv = host_pollts(parg->pfds, parg->nfds, parg->ts, parg->sigmask);
854 	if (rv == -1)
855 		parg->errnum = errno;
856 	rump_sys_write(parg->pipefd, &rv, sizeof(rv));
857 
858 	return (void *)(intptr_t)rv;
859 }
860 
861 int
862 POLLTS(struct pollfd *fds, nfds_t nfds, const struct timespec *ts,
863 	const sigset_t *sigmask)
864 {
865 	int (*op_pollts)(struct pollfd *, nfds_t, const struct timespec *,
866 			 const sigset_t *);
867 	int hostcall = 0, rumpcall = 0;
868 	pthread_t pt;
869 	nfds_t i;
870 	int rv;
871 
872 	DPRINTF(("poll\n"));
873 	checkpoll(fds, nfds, &hostcall, &rumpcall);
874 
875 	if (hostcall && rumpcall) {
876 		struct pollfd *pfd_host = NULL, *pfd_rump = NULL;
877 		int rpipe[2] = {-1,-1}, hpipe[2] = {-1,-1};
878 		struct pollarg parg;
879 		uintptr_t lrv;
880 		int sverrno = 0, trv;
881 
882 		/*
883 		 * ok, this is where it gets tricky.  We must support
884 		 * this since it's a very common operation in certain
885 		 * types of software (telnet, netcat, etc).  We allocate
886 		 * two vectors and run two poll commands in separate
887 		 * threads.  Whichever returns first "wins" and the
888 		 * other kernel's fds won't show activity.
889 		 */
890 		rv = -1;
891 
892 		/* allocate full vector for O(n) joining after call */
893 		pfd_host = malloc(sizeof(*pfd_host)*(nfds+1));
894 		if (!pfd_host)
895 			goto out;
896 		pfd_rump = malloc(sizeof(*pfd_rump)*(nfds+1));
897 		if (!pfd_rump) {
898 			goto out;
899 		}
900 
901 		/* split vectors */
902 		for (i = 0; i < nfds; i++) {
903 			if (fds[i].fd == -1) {
904 				pfd_host[i].fd = -1;
905 				pfd_rump[i].fd = -1;
906 			} else if (fd_isrump(fds[i].fd)) {
907 				pfd_host[i].fd = -1;
908 				pfd_rump[i].fd = fd_host2rump(fds[i].fd);
909 				pfd_rump[i].events = fds[i].events;
910 			} else {
911 				pfd_rump[i].fd = -1;
912 				pfd_host[i].fd = fds[i].fd;
913 				pfd_host[i].events = fds[i].events;
914 			}
915 			fds[i].revents = 0;
916 		}
917 
918 		/*
919 		 * then, open two pipes, one for notifications
920 		 * to each kernel.
921 		 */
922 		if (rump_sys_pipe(rpipe) == -1)
923 			goto out;
924 		if (pipe(hpipe) == -1)
925 			goto out;
926 
927 		pfd_host[nfds].fd = hpipe[0];
928 		pfd_host[nfds].events = POLLIN;
929 		pfd_rump[nfds].fd = rpipe[0];
930 		pfd_rump[nfds].events = POLLIN;
931 
932 		/*
933 		 * then, create a thread to do host part and meanwhile
934 		 * do rump kernel part right here
935 		 */
936 
937 		parg.pfds = pfd_host;
938 		parg.nfds = nfds+1;
939 		parg.ts = ts;
940 		parg.sigmask = sigmask;
941 		parg.pipefd = rpipe[1];
942 		pthread_create(&pt, NULL, hostpoll, &parg);
943 
944 		op_pollts = rumpcalls[RUMPCALL_POLLTS];
945 		lrv = op_pollts(pfd_rump, nfds+1, ts, NULL);
946 		sverrno = errno;
947 		write(hpipe[1], &rv, sizeof(rv));
948 		pthread_join(pt, (void *)&trv);
949 
950 		/* check who "won" and merge results */
951 		if (lrv != 0 && pfd_host[nfds].revents & POLLIN) {
952 			rv = trv;
953 
954 			for (i = 0; i < nfds; i++) {
955 				if (pfd_rump[i].fd != -1)
956 					fds[i].revents = pfd_rump[i].revents;
957 			}
958 			sverrno = parg.errnum;
959 		} else if (trv != 0 && pfd_rump[nfds].revents & POLLIN) {
960 			rv = trv;
961 
962 			for (i = 0; i < nfds; i++) {
963 				if (pfd_host[i].fd != -1)
964 					fds[i].revents = pfd_host[i].revents;
965 			}
966 		} else {
967 			rv = 0;
968 		}
969 
970  out:
971 		if (rpipe[0] != -1)
972 			rump_sys_close(rpipe[0]);
973 		if (rpipe[1] != -1)
974 			rump_sys_close(rpipe[1]);
975 		if (hpipe[0] != -1)
976 			host_close(hpipe[0]);
977 		if (hpipe[1] != -1)
978 			host_close(hpipe[1]);
979 		free(pfd_host);
980 		free(pfd_rump);
981 		errno = sverrno;
982 	} else {
983 		if (hostcall) {
984 			op_pollts = host_pollts;
985 		} else {
986 			op_pollts = rumpcalls[RUMPCALL_POLLTS];
987 			adjustpoll(fds, nfds, fd_host2rump);
988 		}
989 
990 		rv = op_pollts(fds, nfds, ts, sigmask);
991 		if (rumpcall)
992 			adjustpoll(fds, nfds, fd_rump2host);
993 	}
994 
995 	return rv;
996 }
997 
998 int
999 POLL(struct pollfd *fds, nfds_t nfds, int timeout)
1000 {
1001 	struct timespec ts;
1002 	struct timespec *tsp = NULL;
1003 
1004 	if (timeout != INFTIM) {
1005 		ts.tv_sec = timeout / 1000;
1006 		ts.tv_nsec = (timeout % 1000) * 1000*1000;
1007 
1008 		tsp = &ts;
1009 	}
1010 
1011 	return pollts(fds, nfds, tsp, NULL);
1012 }
1013 
1014 int
1015 kqueue(void)
1016 {
1017 
1018 	abort();
1019 }
1020 
1021 int
1022 kevent(int kq, const struct kevent *changelist, size_t nchanges,
1023 	struct kevent *eventlist, size_t nevents,
1024 	const struct timespec *timeout)
1025 {
1026 
1027 	abort();
1028 }
1029