1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 /* Copyright (c) 1988 AT&T */ 30 /* All Rights Reserved */ 31 32 /* 33 * Emulation of select() system call using _pollsys() system call. 34 * 35 * Assumptions: 36 * polling for input only is most common. 37 * polling for exceptional conditions is very rare. 38 * 39 * Note that is it not feasible to emulate all error conditions, 40 * in particular conditions that would return EFAULT are far too 41 * difficult to check for in a library routine. 42 * 43 * This is the alternate large fd_set select. 44 * 45 */ 46 47 /* 48 * Must precede any include files 49 */ 50 #ifdef FD_SETSIZE 51 #undef FD_SETSIZE 52 #endif 53 #define FD_SETSIZE 65536 54 55 /* 56 * We do not #redefine the name since the only users of this 57 * are external to the libraries and commands. 58 * 59 * #pragma weak pselect_large_fdset = _pselect_large_fdset 60 * #pragma weak select_large_fdset = _select_large_fdset 61 */ 62 63 #include "synonyms.h" 64 #include <values.h> 65 #include <stdlib.h> 66 #include <string.h> 67 #include <errno.h> 68 #include <sys/time.h> 69 #include <sys/types.h> 70 #include <sys/poll.h> 71 #include <string.h> 72 #include <stdlib.h> 73 #include "libc.h" 74 75 #define DEFAULT_POLL_SIZE 64 76 77 static struct pollfd *realloc_fds(int *, struct pollfd **, struct pollfd *); 78 79 int 80 pselect_large_fdset(int nfds, fd_set *in0, fd_set *out0, fd_set *ex0, 81 const timespec_t *tsp, const sigset_t *sigmask) 82 { 83 long *in, *out, *ex; 84 ulong_t m; /* bit mask */ 85 int j; /* loop counter */ 86 ulong_t b; /* bits to test */ 87 int n, rv; 88 int lastj = -1; 89 int nused; 90 91 /* 92 * Rather than have a mammoth pollfd (65K) list on the stack 93 * we start with a small one and then malloc larger chunks 94 * on the heap if necessary. 95 */ 96 97 struct pollfd pfd[DEFAULT_POLL_SIZE]; 98 struct pollfd *p; 99 struct pollfd *pfd_list; 100 int nfds_on_list; 101 102 fd_set zero; 103 104 /* 105 * Check for invalid conditions at outset. 106 * Required for spec1170. 107 * SUSV3: We must behave as a cancellation point even if we fail early. 108 */ 109 if (nfds >= 0 && nfds <= FD_SETSIZE) { 110 if (tsp != NULL) { 111 if (tsp->tv_nsec < 0 || tsp->tv_nsec >= NANOSEC || 112 tsp->tv_sec < 0) { 113 _private_testcancel(); 114 errno = EINVAL; 115 return (-1); 116 } 117 } 118 } else { 119 _private_testcancel(); 120 errno = EINVAL; 121 return (-1); 122 } 123 124 /* 125 * If any input args are null, point them at the null array. 126 */ 127 (void) memset(&zero, 0, sizeof (fd_set)); 128 if (in0 == NULL) 129 in0 = &zero; 130 if (out0 == NULL) 131 out0 = &zero; 132 if (ex0 == NULL) 133 ex0 = &zero; 134 135 nfds_on_list = DEFAULT_POLL_SIZE; 136 pfd_list = pfd; 137 p = pfd_list; 138 (void) memset(pfd, 0, sizeof (pfd)); 139 /* 140 * For each fd, if any bits are set convert them into 141 * the appropriate pollfd struct. 142 */ 143 in = (long *)in0->fds_bits; 144 out = (long *)out0->fds_bits; 145 ex = (long *)ex0->fds_bits; 146 nused = 0; 147 /* 148 * nused reflects the number of pollfd structs currently used 149 * less one. If realloc_fds returns 0 it is because malloc 150 * failed. We expect malloc() to have done the proper 151 * thing with errno. 152 */ 153 for (n = 0; n < nfds; n += NFDBITS) { 154 b = (ulong_t)(*in | *out | *ex); 155 for (j = 0, m = 1; b != 0; j++, b >>= 1, m <<= 1) { 156 if (b & 1) { 157 p->fd = n + j; 158 if (p->fd < nfds) { 159 p->events = 0; 160 if (*in & m) 161 p->events |= POLLRDNORM; 162 if (*out & m) 163 p->events |= POLLWRNORM; 164 if (*ex & m) 165 p->events |= POLLRDBAND; 166 if (nused < (nfds_on_list - 1)) { 167 p++; 168 } else { 169 p = realloc_fds( 170 &nfds_on_list, 171 &pfd_list, pfd); 172 if (p == 0) { 173 if (pfd_list != pfd) 174 (void) free(pfd_list); 175 _private_testcancel(); 176 return (-1); 177 } 178 } 179 nused++; 180 } else 181 goto done; 182 } 183 } 184 in++; 185 out++; 186 ex++; 187 } 188 done: 189 /* 190 * Now do the poll. 191 */ 192 do { 193 rv = _pollsys(pfd_list, (nfds_t)nused, tsp, sigmask); 194 } while (rv < 0 && errno == EAGAIN); 195 196 if (rv < 0) { /* no need to set bit masks */ 197 if (pfd_list != pfd) 198 (void) free(pfd_list); 199 return (rv); 200 } else if (rv == 0) { 201 /* 202 * Clear out bit masks, just in case. 203 * On the assumption that usually only 204 * one bit mask is set, use three loops. 205 */ 206 if (in0 != &zero) { 207 in = (long *)in0->fds_bits; 208 for (n = 0; n < nfds; n += NFDBITS) 209 *in++ = 0; 210 } 211 if (out0 != &zero) { 212 out = (long *)out0->fds_bits; 213 for (n = 0; n < nfds; n += NFDBITS) 214 *out++ = 0; 215 } 216 if (ex0 != &zero) { 217 ex = (long *)ex0->fds_bits; 218 for (n = 0; n < nfds; n += NFDBITS) 219 *ex++ = 0; 220 } 221 if (pfd_list != pfd) 222 (void) free(pfd_list); 223 return (0); 224 } 225 226 /* 227 * Check for EINVAL error case first to avoid changing any bits 228 * if we're going to return an error. 229 */ 230 for (p = pfd_list, j = nused; j-- > 0; p++) { 231 /* 232 * select will return EBADF immediately if any fd's 233 * are bad. poll will complete the poll on the 234 * rest of the fd's and include the error indication 235 * in the returned bits. This is a rare case so we 236 * accept this difference and return the error after 237 * doing more work than select would've done. 238 */ 239 if (p->revents & POLLNVAL) { 240 errno = EBADF; 241 if (pfd_list != pfd) 242 (void) free(pfd_list); 243 return (-1); 244 } 245 /* 246 * We would like to make POLLHUP available to select, 247 * checking to see if we have pending data to be read. 248 * BUT until we figure out how not to break Xsun's 249 * dependencies on select's existing features... 250 * This is what we _thought_ would work ... sigh! 251 */ 252 /* 253 * if ((p->revents & POLLHUP) && 254 * !(p->revents & (POLLRDNORM|POLLRDBAND))) { 255 * errno = EINTR; 256 * return (-1); 257 * } 258 */ 259 } 260 261 /* 262 * Convert results of poll back into bits 263 * in the argument arrays. 264 * 265 * We assume POLLRDNORM, POLLWRNORM, and POLLRDBAND will only be set 266 * on return from poll if they were set on input, thus we don't 267 * worry about accidentally setting the corresponding bits in the 268 * zero array if the input bit masks were null. 269 * 270 * Must return number of bits set, not number of ready descriptors 271 * (as the man page says, and as poll() does). 272 */ 273 rv = 0; 274 for (p = pfd_list; nused-- > 0; p++) { 275 j = (int)(p->fd / NFDBITS); 276 /* have we moved into another word of the bit mask yet? */ 277 if (j != lastj) { 278 /* clear all output bits to start with */ 279 in = (long *)&in0->fds_bits[j]; 280 out = (long *)&out0->fds_bits[j]; 281 ex = (long *)&ex0->fds_bits[j]; 282 /* 283 * In case we made "zero" read-only (e.g., with 284 * cc -R), avoid actually storing into it. 285 */ 286 if (in0 != &zero) 287 *in = 0; 288 if (out0 != &zero) 289 *out = 0; 290 if (ex0 != &zero) 291 *ex = 0; 292 lastj = j; 293 } 294 if (p->revents) { 295 m = 1L << (p->fd % NFDBITS); 296 if (p->revents & POLLRDNORM) { 297 *in |= m; 298 rv++; 299 } 300 if (p->revents & POLLWRNORM) { 301 *out |= m; 302 rv++; 303 } 304 if (p->revents & POLLRDBAND) { 305 *ex |= m; 306 rv++; 307 } 308 /* 309 * Only set this bit on return if we asked about 310 * input conditions. 311 */ 312 if ((p->revents & (POLLHUP|POLLERR)) && 313 (p->events & POLLRDNORM)) { 314 if ((*in & m) == 0) 315 rv++; /* wasn't already set */ 316 *in |= m; 317 } 318 /* 319 * Only set this bit on return if we asked about 320 * output conditions. 321 */ 322 if ((p->revents & (POLLHUP|POLLERR)) && 323 (p->events & POLLWRNORM)) { 324 if ((*out & m) == 0) 325 rv++; /* wasn't already set */ 326 *out |= m; 327 } 328 /* 329 * Only set this bit on return if we asked about 330 * output conditions. 331 */ 332 if ((p->revents & (POLLHUP|POLLERR)) && 333 (p->events & POLLRDBAND)) { 334 if ((*ex & m) == 0) 335 rv++; /* wasn't already set */ 336 *ex |= m; 337 } 338 } 339 } 340 if (pfd_list != pfd) 341 (void) free(pfd_list); 342 return (rv); 343 } 344 345 int 346 select_large_fdset(int nfds, fd_set *in0, fd_set *out0, fd_set *ex0, 347 struct timeval *tv) 348 { 349 timespec_t ts; 350 timespec_t *tsp; 351 352 if (tv == NULL) 353 tsp = NULL; 354 else { 355 /* check timeval validity */ 356 if (tv->tv_usec < 0 || tv->tv_usec >= MICROSEC) { 357 errno = EINVAL; 358 return (-1); 359 } 360 /* 361 * Convert timeval to timespec. 362 * To preserve compatibility with past behavior, 363 * when select was built upon poll(2), which has a 364 * minimum non-zero timeout of 1 millisecond, force 365 * a minimum non-zero timeout of 500 microseconds. 366 */ 367 ts.tv_sec = tv->tv_sec; 368 ts.tv_nsec = tv->tv_usec * 1000; 369 if (ts.tv_nsec != 0 && ts.tv_nsec < 500000) 370 ts.tv_nsec = 500000; 371 tsp = &ts; 372 } 373 374 return (pselect_large_fdset(nfds, in0, out0, ex0, tsp, NULL)); 375 } 376 377 /* 378 * Reallocate buffers of pollfds for our list. We malloc a new buffer 379 * and, in the case where the old buffer does not match what is passed 380 * in orig, free the buffer after copying the contents. 381 */ 382 struct pollfd * 383 realloc_fds(int *num, struct pollfd **list_head, struct pollfd *orig) 384 { 385 struct pollfd *b; 386 int nta; 387 int n2; 388 389 n2 = *num * 2; 390 nta = n2 * sizeof (struct pollfd); 391 b = malloc(nta); 392 if (b) { 393 (void) memset(b, 0, (size_t)nta); 394 (void) memcpy(b, *list_head, nta / 2); 395 if (*list_head != orig) 396 (void) free (*list_head); 397 *list_head = b; 398 b += *num; 399 *num = n2; 400 } 401 return (b); 402 } 403