10Sstevel@tonic-gate /*
20Sstevel@tonic-gate * CDDL HEADER START
30Sstevel@tonic-gate *
40Sstevel@tonic-gate * The contents of this file are subject to the terms of the
51778Sraf * Common Development and Distribution License (the "License").
61778Sraf * You may not use this file except in compliance with the License.
70Sstevel@tonic-gate *
80Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing.
100Sstevel@tonic-gate * See the License for the specific language governing permissions
110Sstevel@tonic-gate * and limitations under the License.
120Sstevel@tonic-gate *
130Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each
140Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the
160Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying
170Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner]
180Sstevel@tonic-gate *
190Sstevel@tonic-gate * CDDL HEADER END
200Sstevel@tonic-gate */
211778Sraf
220Sstevel@tonic-gate /*
236515Sraf * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
240Sstevel@tonic-gate * Use is subject to license terms.
250Sstevel@tonic-gate */
260Sstevel@tonic-gate
270Sstevel@tonic-gate /* Copyright (c) 1988 AT&T */
280Sstevel@tonic-gate /* All Rights Reserved */
290Sstevel@tonic-gate
30*6812Sraf #pragma ident "%Z%%M% %I% %E% SMI"
31*6812Sraf
320Sstevel@tonic-gate /*
330Sstevel@tonic-gate * Emulation of select() system call using _pollsys() system call.
340Sstevel@tonic-gate *
350Sstevel@tonic-gate * Assumptions:
360Sstevel@tonic-gate * polling for input only is most common.
370Sstevel@tonic-gate * polling for exceptional conditions is very rare.
380Sstevel@tonic-gate *
390Sstevel@tonic-gate * Note that is it not feasible to emulate all error conditions,
400Sstevel@tonic-gate * in particular conditions that would return EFAULT are far too
410Sstevel@tonic-gate * difficult to check for in a library routine.
420Sstevel@tonic-gate *
430Sstevel@tonic-gate * This is the alternate large fd_set select.
440Sstevel@tonic-gate *
450Sstevel@tonic-gate */
460Sstevel@tonic-gate
470Sstevel@tonic-gate /*
480Sstevel@tonic-gate * Must precede any include files
490Sstevel@tonic-gate */
500Sstevel@tonic-gate #ifdef FD_SETSIZE
510Sstevel@tonic-gate #undef FD_SETSIZE
520Sstevel@tonic-gate #endif
530Sstevel@tonic-gate #define FD_SETSIZE 65536
540Sstevel@tonic-gate
55*6812Sraf #include "lint.h"
560Sstevel@tonic-gate #include <values.h>
570Sstevel@tonic-gate #include <stdlib.h>
580Sstevel@tonic-gate #include <string.h>
596515Sraf #include <pthread.h>
600Sstevel@tonic-gate #include <errno.h>
610Sstevel@tonic-gate #include <sys/time.h>
620Sstevel@tonic-gate #include <sys/types.h>
630Sstevel@tonic-gate #include <sys/poll.h>
640Sstevel@tonic-gate #include <string.h>
650Sstevel@tonic-gate #include <stdlib.h>
660Sstevel@tonic-gate #include "libc.h"
670Sstevel@tonic-gate
680Sstevel@tonic-gate #define DEFAULT_POLL_SIZE 64
690Sstevel@tonic-gate
700Sstevel@tonic-gate static struct pollfd *realloc_fds(int *, struct pollfd **, struct pollfd *);
710Sstevel@tonic-gate
720Sstevel@tonic-gate int
pselect_large_fdset(int nfds,fd_set * in0,fd_set * out0,fd_set * ex0,const timespec_t * tsp,const sigset_t * sigmask)730Sstevel@tonic-gate pselect_large_fdset(int nfds, fd_set *in0, fd_set *out0, fd_set *ex0,
740Sstevel@tonic-gate const timespec_t *tsp, const sigset_t *sigmask)
750Sstevel@tonic-gate {
760Sstevel@tonic-gate long *in, *out, *ex;
770Sstevel@tonic-gate ulong_t m; /* bit mask */
780Sstevel@tonic-gate int j; /* loop counter */
790Sstevel@tonic-gate ulong_t b; /* bits to test */
800Sstevel@tonic-gate int n, rv;
810Sstevel@tonic-gate int lastj = -1;
820Sstevel@tonic-gate int nused;
830Sstevel@tonic-gate
840Sstevel@tonic-gate /*
850Sstevel@tonic-gate * Rather than have a mammoth pollfd (65K) list on the stack
860Sstevel@tonic-gate * we start with a small one and then malloc larger chunks
870Sstevel@tonic-gate * on the heap if necessary.
880Sstevel@tonic-gate */
890Sstevel@tonic-gate
900Sstevel@tonic-gate struct pollfd pfd[DEFAULT_POLL_SIZE];
910Sstevel@tonic-gate struct pollfd *p;
920Sstevel@tonic-gate struct pollfd *pfd_list;
930Sstevel@tonic-gate int nfds_on_list;
940Sstevel@tonic-gate
950Sstevel@tonic-gate fd_set zero;
960Sstevel@tonic-gate
970Sstevel@tonic-gate /*
980Sstevel@tonic-gate * Check for invalid conditions at outset.
990Sstevel@tonic-gate * Required for spec1170.
1000Sstevel@tonic-gate * SUSV3: We must behave as a cancellation point even if we fail early.
1010Sstevel@tonic-gate */
1020Sstevel@tonic-gate if (nfds >= 0 && nfds <= FD_SETSIZE) {
1030Sstevel@tonic-gate if (tsp != NULL) {
1040Sstevel@tonic-gate if (tsp->tv_nsec < 0 || tsp->tv_nsec >= NANOSEC ||
1050Sstevel@tonic-gate tsp->tv_sec < 0) {
1066515Sraf pthread_testcancel();
1070Sstevel@tonic-gate errno = EINVAL;
1080Sstevel@tonic-gate return (-1);
1090Sstevel@tonic-gate }
1100Sstevel@tonic-gate }
1110Sstevel@tonic-gate } else {
1126515Sraf pthread_testcancel();
1130Sstevel@tonic-gate errno = EINVAL;
1140Sstevel@tonic-gate return (-1);
1150Sstevel@tonic-gate }
1160Sstevel@tonic-gate
1170Sstevel@tonic-gate /*
1180Sstevel@tonic-gate * If any input args are null, point them at the null array.
1190Sstevel@tonic-gate */
1200Sstevel@tonic-gate (void) memset(&zero, 0, sizeof (fd_set));
1210Sstevel@tonic-gate if (in0 == NULL)
1220Sstevel@tonic-gate in0 = &zero;
1230Sstevel@tonic-gate if (out0 == NULL)
1240Sstevel@tonic-gate out0 = &zero;
1250Sstevel@tonic-gate if (ex0 == NULL)
1260Sstevel@tonic-gate ex0 = &zero;
1270Sstevel@tonic-gate
1280Sstevel@tonic-gate nfds_on_list = DEFAULT_POLL_SIZE;
1290Sstevel@tonic-gate pfd_list = pfd;
1300Sstevel@tonic-gate p = pfd_list;
1310Sstevel@tonic-gate (void) memset(pfd, 0, sizeof (pfd));
1320Sstevel@tonic-gate /*
1330Sstevel@tonic-gate * For each fd, if any bits are set convert them into
1340Sstevel@tonic-gate * the appropriate pollfd struct.
1350Sstevel@tonic-gate */
1360Sstevel@tonic-gate in = (long *)in0->fds_bits;
1370Sstevel@tonic-gate out = (long *)out0->fds_bits;
1380Sstevel@tonic-gate ex = (long *)ex0->fds_bits;
1390Sstevel@tonic-gate nused = 0;
1400Sstevel@tonic-gate /*
1410Sstevel@tonic-gate * nused reflects the number of pollfd structs currently used
1426515Sraf * less one. If realloc_fds returns NULL it is because malloc
1430Sstevel@tonic-gate * failed. We expect malloc() to have done the proper
1440Sstevel@tonic-gate * thing with errno.
1450Sstevel@tonic-gate */
1460Sstevel@tonic-gate for (n = 0; n < nfds; n += NFDBITS) {
1470Sstevel@tonic-gate b = (ulong_t)(*in | *out | *ex);
1480Sstevel@tonic-gate for (j = 0, m = 1; b != 0; j++, b >>= 1, m <<= 1) {
1490Sstevel@tonic-gate if (b & 1) {
1500Sstevel@tonic-gate p->fd = n + j;
1510Sstevel@tonic-gate if (p->fd < nfds) {
1520Sstevel@tonic-gate p->events = 0;
1530Sstevel@tonic-gate if (*in & m)
1540Sstevel@tonic-gate p->events |= POLLRDNORM;
1550Sstevel@tonic-gate if (*out & m)
1560Sstevel@tonic-gate p->events |= POLLWRNORM;
1570Sstevel@tonic-gate if (*ex & m)
1580Sstevel@tonic-gate p->events |= POLLRDBAND;
1590Sstevel@tonic-gate if (nused < (nfds_on_list - 1)) {
1600Sstevel@tonic-gate p++;
1616515Sraf } else if ((p = realloc_fds(
1626515Sraf &nfds_on_list, &pfd_list, pfd))
1636515Sraf == NULL) {
1646515Sraf if (pfd_list != pfd)
165*6812Sraf free(pfd_list);
1666515Sraf pthread_testcancel();
1676515Sraf return (-1);
1680Sstevel@tonic-gate }
1690Sstevel@tonic-gate nused++;
1700Sstevel@tonic-gate } else
1710Sstevel@tonic-gate goto done;
1720Sstevel@tonic-gate }
1730Sstevel@tonic-gate }
1740Sstevel@tonic-gate in++;
1750Sstevel@tonic-gate out++;
1760Sstevel@tonic-gate ex++;
1770Sstevel@tonic-gate }
1780Sstevel@tonic-gate done:
1790Sstevel@tonic-gate /*
1800Sstevel@tonic-gate * Now do the poll.
1810Sstevel@tonic-gate */
1820Sstevel@tonic-gate do {
1830Sstevel@tonic-gate rv = _pollsys(pfd_list, (nfds_t)nused, tsp, sigmask);
1840Sstevel@tonic-gate } while (rv < 0 && errno == EAGAIN);
1850Sstevel@tonic-gate
1860Sstevel@tonic-gate if (rv < 0) { /* no need to set bit masks */
1870Sstevel@tonic-gate if (pfd_list != pfd)
188*6812Sraf free(pfd_list);
1890Sstevel@tonic-gate return (rv);
1900Sstevel@tonic-gate } else if (rv == 0) {
1910Sstevel@tonic-gate /*
1920Sstevel@tonic-gate * Clear out bit masks, just in case.
1930Sstevel@tonic-gate * On the assumption that usually only
1940Sstevel@tonic-gate * one bit mask is set, use three loops.
1950Sstevel@tonic-gate */
1960Sstevel@tonic-gate if (in0 != &zero) {
1970Sstevel@tonic-gate in = (long *)in0->fds_bits;
1980Sstevel@tonic-gate for (n = 0; n < nfds; n += NFDBITS)
1990Sstevel@tonic-gate *in++ = 0;
2000Sstevel@tonic-gate }
2010Sstevel@tonic-gate if (out0 != &zero) {
2020Sstevel@tonic-gate out = (long *)out0->fds_bits;
2030Sstevel@tonic-gate for (n = 0; n < nfds; n += NFDBITS)
2040Sstevel@tonic-gate *out++ = 0;
2050Sstevel@tonic-gate }
2060Sstevel@tonic-gate if (ex0 != &zero) {
2070Sstevel@tonic-gate ex = (long *)ex0->fds_bits;
2080Sstevel@tonic-gate for (n = 0; n < nfds; n += NFDBITS)
2090Sstevel@tonic-gate *ex++ = 0;
2100Sstevel@tonic-gate }
2110Sstevel@tonic-gate if (pfd_list != pfd)
212*6812Sraf free(pfd_list);
2130Sstevel@tonic-gate return (0);
2140Sstevel@tonic-gate }
2150Sstevel@tonic-gate
2160Sstevel@tonic-gate /*
2170Sstevel@tonic-gate * Check for EINVAL error case first to avoid changing any bits
2180Sstevel@tonic-gate * if we're going to return an error.
2190Sstevel@tonic-gate */
2200Sstevel@tonic-gate for (p = pfd_list, j = nused; j-- > 0; p++) {
2210Sstevel@tonic-gate /*
2220Sstevel@tonic-gate * select will return EBADF immediately if any fd's
2230Sstevel@tonic-gate * are bad. poll will complete the poll on the
2240Sstevel@tonic-gate * rest of the fd's and include the error indication
2250Sstevel@tonic-gate * in the returned bits. This is a rare case so we
2260Sstevel@tonic-gate * accept this difference and return the error after
2270Sstevel@tonic-gate * doing more work than select would've done.
2280Sstevel@tonic-gate */
2290Sstevel@tonic-gate if (p->revents & POLLNVAL) {
2300Sstevel@tonic-gate errno = EBADF;
2310Sstevel@tonic-gate if (pfd_list != pfd)
232*6812Sraf free(pfd_list);
2330Sstevel@tonic-gate return (-1);
2340Sstevel@tonic-gate }
2350Sstevel@tonic-gate /*
2360Sstevel@tonic-gate * We would like to make POLLHUP available to select,
2370Sstevel@tonic-gate * checking to see if we have pending data to be read.
2380Sstevel@tonic-gate * BUT until we figure out how not to break Xsun's
2390Sstevel@tonic-gate * dependencies on select's existing features...
2400Sstevel@tonic-gate * This is what we _thought_ would work ... sigh!
2410Sstevel@tonic-gate */
2420Sstevel@tonic-gate /*
2430Sstevel@tonic-gate * if ((p->revents & POLLHUP) &&
2440Sstevel@tonic-gate * !(p->revents & (POLLRDNORM|POLLRDBAND))) {
2450Sstevel@tonic-gate * errno = EINTR;
2460Sstevel@tonic-gate * return (-1);
2470Sstevel@tonic-gate * }
2480Sstevel@tonic-gate */
2490Sstevel@tonic-gate }
2500Sstevel@tonic-gate
2510Sstevel@tonic-gate /*
2520Sstevel@tonic-gate * Convert results of poll back into bits
2530Sstevel@tonic-gate * in the argument arrays.
2540Sstevel@tonic-gate *
2550Sstevel@tonic-gate * We assume POLLRDNORM, POLLWRNORM, and POLLRDBAND will only be set
2560Sstevel@tonic-gate * on return from poll if they were set on input, thus we don't
2570Sstevel@tonic-gate * worry about accidentally setting the corresponding bits in the
2580Sstevel@tonic-gate * zero array if the input bit masks were null.
2590Sstevel@tonic-gate *
2600Sstevel@tonic-gate * Must return number of bits set, not number of ready descriptors
2610Sstevel@tonic-gate * (as the man page says, and as poll() does).
2620Sstevel@tonic-gate */
2630Sstevel@tonic-gate rv = 0;
2640Sstevel@tonic-gate for (p = pfd_list; nused-- > 0; p++) {
2650Sstevel@tonic-gate j = (int)(p->fd / NFDBITS);
2660Sstevel@tonic-gate /* have we moved into another word of the bit mask yet? */
2670Sstevel@tonic-gate if (j != lastj) {
2680Sstevel@tonic-gate /* clear all output bits to start with */
2690Sstevel@tonic-gate in = (long *)&in0->fds_bits[j];
2700Sstevel@tonic-gate out = (long *)&out0->fds_bits[j];
2710Sstevel@tonic-gate ex = (long *)&ex0->fds_bits[j];
2720Sstevel@tonic-gate /*
2730Sstevel@tonic-gate * In case we made "zero" read-only (e.g., with
2740Sstevel@tonic-gate * cc -R), avoid actually storing into it.
2750Sstevel@tonic-gate */
2760Sstevel@tonic-gate if (in0 != &zero)
2770Sstevel@tonic-gate *in = 0;
2780Sstevel@tonic-gate if (out0 != &zero)
2790Sstevel@tonic-gate *out = 0;
2800Sstevel@tonic-gate if (ex0 != &zero)
2810Sstevel@tonic-gate *ex = 0;
2820Sstevel@tonic-gate lastj = j;
2830Sstevel@tonic-gate }
2840Sstevel@tonic-gate if (p->revents) {
2850Sstevel@tonic-gate m = 1L << (p->fd % NFDBITS);
2860Sstevel@tonic-gate if (p->revents & POLLRDNORM) {
2870Sstevel@tonic-gate *in |= m;
2880Sstevel@tonic-gate rv++;
2890Sstevel@tonic-gate }
2900Sstevel@tonic-gate if (p->revents & POLLWRNORM) {
2910Sstevel@tonic-gate *out |= m;
2920Sstevel@tonic-gate rv++;
2930Sstevel@tonic-gate }
2940Sstevel@tonic-gate if (p->revents & POLLRDBAND) {
2950Sstevel@tonic-gate *ex |= m;
2960Sstevel@tonic-gate rv++;
2970Sstevel@tonic-gate }
2980Sstevel@tonic-gate /*
2990Sstevel@tonic-gate * Only set this bit on return if we asked about
3000Sstevel@tonic-gate * input conditions.
3010Sstevel@tonic-gate */
3020Sstevel@tonic-gate if ((p->revents & (POLLHUP|POLLERR)) &&
3030Sstevel@tonic-gate (p->events & POLLRDNORM)) {
3040Sstevel@tonic-gate if ((*in & m) == 0)
3050Sstevel@tonic-gate rv++; /* wasn't already set */
3060Sstevel@tonic-gate *in |= m;
3070Sstevel@tonic-gate }
3080Sstevel@tonic-gate /*
3090Sstevel@tonic-gate * Only set this bit on return if we asked about
3100Sstevel@tonic-gate * output conditions.
3110Sstevel@tonic-gate */
3120Sstevel@tonic-gate if ((p->revents & (POLLHUP|POLLERR)) &&
3130Sstevel@tonic-gate (p->events & POLLWRNORM)) {
3140Sstevel@tonic-gate if ((*out & m) == 0)
3150Sstevel@tonic-gate rv++; /* wasn't already set */
3160Sstevel@tonic-gate *out |= m;
3170Sstevel@tonic-gate }
3180Sstevel@tonic-gate /*
3190Sstevel@tonic-gate * Only set this bit on return if we asked about
3200Sstevel@tonic-gate * output conditions.
3210Sstevel@tonic-gate */
3220Sstevel@tonic-gate if ((p->revents & (POLLHUP|POLLERR)) &&
3230Sstevel@tonic-gate (p->events & POLLRDBAND)) {
3240Sstevel@tonic-gate if ((*ex & m) == 0)
3250Sstevel@tonic-gate rv++; /* wasn't already set */
3260Sstevel@tonic-gate *ex |= m;
3270Sstevel@tonic-gate }
3280Sstevel@tonic-gate }
3290Sstevel@tonic-gate }
3300Sstevel@tonic-gate if (pfd_list != pfd)
331*6812Sraf free(pfd_list);
3320Sstevel@tonic-gate return (rv);
3330Sstevel@tonic-gate }
3340Sstevel@tonic-gate
3350Sstevel@tonic-gate int
select_large_fdset(int nfds,fd_set * in0,fd_set * out0,fd_set * ex0,struct timeval * tv)3360Sstevel@tonic-gate select_large_fdset(int nfds, fd_set *in0, fd_set *out0, fd_set *ex0,
3370Sstevel@tonic-gate struct timeval *tv)
3380Sstevel@tonic-gate {
3390Sstevel@tonic-gate timespec_t ts;
3400Sstevel@tonic-gate timespec_t *tsp;
3410Sstevel@tonic-gate
3420Sstevel@tonic-gate if (tv == NULL)
3430Sstevel@tonic-gate tsp = NULL;
3440Sstevel@tonic-gate else {
3451778Sraf /* check timeval validity */
3460Sstevel@tonic-gate if (tv->tv_usec < 0 || tv->tv_usec >= MICROSEC) {
3470Sstevel@tonic-gate errno = EINVAL;
3480Sstevel@tonic-gate return (-1);
3490Sstevel@tonic-gate }
3501778Sraf /*
3511778Sraf * Convert timeval to timespec.
3521778Sraf * To preserve compatibility with past behavior,
3531778Sraf * when select was built upon poll(2), which has a
3541778Sraf * minimum non-zero timeout of 1 millisecond, force
3551778Sraf * a minimum non-zero timeout of 500 microseconds.
3561778Sraf */
3570Sstevel@tonic-gate ts.tv_sec = tv->tv_sec;
3580Sstevel@tonic-gate ts.tv_nsec = tv->tv_usec * 1000;
3591778Sraf if (ts.tv_nsec != 0 && ts.tv_nsec < 500000)
3601778Sraf ts.tv_nsec = 500000;
3610Sstevel@tonic-gate tsp = &ts;
3620Sstevel@tonic-gate }
3630Sstevel@tonic-gate
3640Sstevel@tonic-gate return (pselect_large_fdset(nfds, in0, out0, ex0, tsp, NULL));
3650Sstevel@tonic-gate }
3660Sstevel@tonic-gate
3670Sstevel@tonic-gate /*
3680Sstevel@tonic-gate * Reallocate buffers of pollfds for our list. We malloc a new buffer
3690Sstevel@tonic-gate * and, in the case where the old buffer does not match what is passed
3700Sstevel@tonic-gate * in orig, free the buffer after copying the contents.
3710Sstevel@tonic-gate */
3720Sstevel@tonic-gate struct pollfd *
realloc_fds(int * num,struct pollfd ** list_head,struct pollfd * orig)3730Sstevel@tonic-gate realloc_fds(int *num, struct pollfd **list_head, struct pollfd *orig)
3740Sstevel@tonic-gate {
3750Sstevel@tonic-gate struct pollfd *b;
3760Sstevel@tonic-gate int nta;
3770Sstevel@tonic-gate int n2;
3780Sstevel@tonic-gate
3790Sstevel@tonic-gate n2 = *num * 2;
3800Sstevel@tonic-gate nta = n2 * sizeof (struct pollfd);
3810Sstevel@tonic-gate b = malloc(nta);
3820Sstevel@tonic-gate if (b) {
3830Sstevel@tonic-gate (void) memset(b, 0, (size_t)nta);
3840Sstevel@tonic-gate (void) memcpy(b, *list_head, nta / 2);
3850Sstevel@tonic-gate if (*list_head != orig)
386*6812Sraf free(*list_head);
3870Sstevel@tonic-gate *list_head = b;
3880Sstevel@tonic-gate b += *num;
3890Sstevel@tonic-gate *num = n2;
3900Sstevel@tonic-gate }
3910Sstevel@tonic-gate return (b);
3920Sstevel@tonic-gate }
393