1 /* $NetBSD: sys_select.c,v 1.19 2009/11/11 09:48:51 rmind Exp $ */ 2 3 /*- 4 * Copyright (c) 2007, 2008, 2009 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Andrew Doran. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /* 33 * Copyright (c) 1982, 1986, 1989, 1993 34 * The Regents of the University of California. All rights reserved. 35 * (c) UNIX System Laboratories, Inc. 36 * All or some portions of this file are derived from material licensed 37 * to the University of California by American Telephone and Telegraph 38 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 39 * the permission of UNIX System Laboratories, Inc. 40 * 41 * Redistribution and use in source and binary forms, with or without 42 * modification, are permitted provided that the following conditions 43 * are met: 44 * 1. Redistributions of source code must retain the above copyright 45 * notice, this list of conditions and the following disclaimer. 46 * 2. Redistributions in binary form must reproduce the above copyright 47 * notice, this list of conditions and the following disclaimer in the 48 * documentation and/or other materials provided with the distribution. 49 * 3. Neither the name of the University nor the names of its contributors 50 * may be used to endorse or promote products derived from this software 51 * without specific prior written permission. 52 * 53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 63 * SUCH DAMAGE. 64 * 65 * @(#)sys_generic.c 8.9 (Berkeley) 2/14/95 66 */ 67 68 /* 69 * System calls relating to files. 70 */ 71 72 #include <sys/cdefs.h> 73 __KERNEL_RCSID(0, "$NetBSD: sys_select.c,v 1.19 2009/11/11 09:48:51 rmind Exp $"); 74 75 #include <sys/param.h> 76 #include <sys/systm.h> 77 #include <sys/filedesc.h> 78 #include <sys/ioctl.h> 79 #include <sys/file.h> 80 #include <sys/proc.h> 81 #include <sys/socketvar.h> 82 #include <sys/signalvar.h> 83 #include <sys/uio.h> 84 #include <sys/kernel.h> 85 #include <sys/stat.h> 86 #include <sys/poll.h> 87 #include <sys/vnode.h> 88 #include <sys/mount.h> 89 #include <sys/syscallargs.h> 90 #include <sys/cpu.h> 91 #include <sys/atomic.h> 92 #include <sys/socketvar.h> 93 #include <sys/sleepq.h> 94 95 /* Flags for lwp::l_selflag. */ 96 #define SEL_RESET 0 /* awoken, interrupted, or not yet polling */ 97 #define SEL_SCANNING 1 /* polling descriptors */ 98 #define SEL_BLOCKING 2 /* about to block on select_cv */ 99 100 /* Per-CPU state for select()/poll(). */ 101 #if MAXCPUS > 32 102 #error adjust this code 103 #endif 104 typedef struct selcpu { 105 kmutex_t *sc_lock; 106 sleepq_t sc_sleepq; 107 int sc_ncoll; 108 uint32_t sc_mask; 109 } selcpu_t; 110 111 static inline int selscan(char *, u_int, register_t *); 112 static inline int pollscan(struct pollfd *, u_int, register_t *); 113 static void selclear(void); 114 115 static syncobj_t select_sobj = { 116 SOBJ_SLEEPQ_FIFO, 117 sleepq_unsleep, 118 sleepq_changepri, 119 sleepq_lendpri, 120 syncobj_noowner, 121 }; 122 123 /* 124 * Select system call. 125 */ 126 int 127 sys___pselect50(struct lwp *l, const struct sys___pselect50_args *uap, 128 register_t *retval) 129 { 130 /* { 131 syscallarg(int) nd; 132 syscallarg(fd_set *) in; 133 syscallarg(fd_set *) ou; 134 syscallarg(fd_set *) ex; 135 syscallarg(const struct timespec *) ts; 136 syscallarg(sigset_t *) mask; 137 } */ 138 struct timespec ats, *ts = NULL; 139 sigset_t amask, *mask = NULL; 140 int error; 141 142 if (SCARG(uap, ts)) { 143 error = copyin(SCARG(uap, ts), &ats, sizeof(ats)); 144 if (error) 145 return error; 146 ts = &ats; 147 } 148 if (SCARG(uap, mask) != NULL) { 149 error = copyin(SCARG(uap, mask), &amask, sizeof(amask)); 150 if (error) 151 return error; 152 mask = &amask; 153 } 154 155 return selcommon(retval, SCARG(uap, nd), SCARG(uap, in), 156 SCARG(uap, ou), SCARG(uap, ex), ts, mask); 157 } 158 159 int 160 sys___select50(struct lwp *l, const struct sys___select50_args *uap, 161 register_t *retval) 162 { 163 /* { 164 syscallarg(int) nd; 165 syscallarg(fd_set *) in; 166 syscallarg(fd_set *) ou; 167 syscallarg(fd_set *) ex; 168 syscallarg(struct timeval *) tv; 169 } */ 170 struct timeval atv; 171 struct timespec ats, *ts = NULL; 172 int error; 173 174 if (SCARG(uap, tv)) { 175 error = copyin(SCARG(uap, tv), (void *)&atv, sizeof(atv)); 176 if (error) 177 return error; 178 TIMEVAL_TO_TIMESPEC(&atv, &ats); 179 ts = &ats; 180 } 181 182 return selcommon(retval, SCARG(uap, nd), SCARG(uap, in), 183 SCARG(uap, ou), SCARG(uap, ex), ts, NULL); 184 } 185 186 /* 187 * sel_do_scan: common code to perform the scan on descriptors. 188 */ 189 static int 190 sel_do_scan(void *fds, u_int nfds, struct timespec *ts, sigset_t *mask, 191 register_t *retval, int selpoll) 192 { 193 lwp_t * const l = curlwp; 194 proc_t * const p = l->l_proc; 195 selcpu_t *sc; 196 kmutex_t *lock; 197 sigset_t oldmask; 198 struct timespec sleepts; 199 int error, timo; 200 201 timo = 0; 202 if (ts && inittimeleft(ts, &sleepts) == -1) { 203 return EINVAL; 204 } 205 206 if (__predict_false(mask)) { 207 sigminusset(&sigcantmask, mask); 208 mutex_enter(p->p_lock); 209 oldmask = l->l_sigmask; 210 l->l_sigmask = *mask; 211 mutex_exit(p->p_lock); 212 } else { 213 /* XXXgcc */ 214 oldmask = l->l_sigmask; 215 } 216 217 sc = curcpu()->ci_data.cpu_selcpu; 218 lock = sc->sc_lock; 219 l->l_selcpu = sc; 220 SLIST_INIT(&l->l_selwait); 221 for (;;) { 222 int ncoll; 223 224 /* 225 * No need to lock. If this is overwritten by another value 226 * while scanning, we will retry below. We only need to see 227 * exact state from the descriptors that we are about to poll, 228 * and lock activity resulting from fo_poll is enough to 229 * provide an up to date value for new polling activity. 230 */ 231 l->l_selflag = SEL_SCANNING; 232 ncoll = sc->sc_ncoll; 233 234 if (selpoll) { 235 error = selscan((char *)fds, nfds, retval); 236 } else { 237 error = pollscan((struct pollfd *)fds, nfds, retval); 238 } 239 240 if (error || *retval) 241 break; 242 if (ts && (timo = gettimeleft(ts, &sleepts)) <= 0) 243 break; 244 mutex_spin_enter(lock); 245 if (l->l_selflag != SEL_SCANNING || sc->sc_ncoll != ncoll) { 246 mutex_spin_exit(lock); 247 continue; 248 } 249 l->l_selflag = SEL_BLOCKING; 250 l->l_kpriority = true; 251 sleepq_enter(&sc->sc_sleepq, l, lock); 252 sleepq_enqueue(&sc->sc_sleepq, sc, "select", &select_sobj); 253 error = sleepq_block(timo, true); 254 if (error != 0) 255 break; 256 } 257 selclear(); 258 259 if (__predict_false(mask)) { 260 mutex_enter(p->p_lock); 261 l->l_sigmask = oldmask; 262 mutex_exit(p->p_lock); 263 } 264 return error; 265 } 266 267 int 268 selcommon(register_t *retval, int nd, fd_set *u_in, fd_set *u_ou, 269 fd_set *u_ex, struct timespec *ts, sigset_t *mask) 270 { 271 char smallbits[howmany(FD_SETSIZE, NFDBITS) * 272 sizeof(fd_mask) * 6]; 273 char *bits; 274 int error, nf; 275 size_t ni; 276 277 if (nd < 0) 278 return (EINVAL); 279 nf = curlwp->l_fd->fd_dt->dt_nfiles; 280 if (nd > nf) { 281 /* forgiving; slightly wrong */ 282 nd = nf; 283 } 284 ni = howmany(nd, NFDBITS) * sizeof(fd_mask); 285 if (ni * 6 > sizeof(smallbits)) { 286 bits = kmem_alloc(ni * 6, KM_SLEEP); 287 if (bits == NULL) 288 return ENOMEM; 289 } else 290 bits = smallbits; 291 292 #define getbits(name, x) \ 293 if (u_ ## name) { \ 294 error = copyin(u_ ## name, bits + ni * x, ni); \ 295 if (error) \ 296 goto done; \ 297 } else \ 298 memset(bits + ni * x, 0, ni); 299 getbits(in, 0); 300 getbits(ou, 1); 301 getbits(ex, 2); 302 #undef getbits 303 304 error = sel_do_scan(bits, nd, ts, mask, retval, 1); 305 done: 306 /* select is not restarted after signals... */ 307 if (error == ERESTART) 308 error = EINTR; 309 if (error == EWOULDBLOCK) 310 error = 0; 311 if (error == 0 && u_in != NULL) 312 error = copyout(bits + ni * 3, u_in, ni); 313 if (error == 0 && u_ou != NULL) 314 error = copyout(bits + ni * 4, u_ou, ni); 315 if (error == 0 && u_ex != NULL) 316 error = copyout(bits + ni * 5, u_ex, ni); 317 if (bits != smallbits) 318 kmem_free(bits, ni * 6); 319 return (error); 320 } 321 322 static inline int 323 selscan(char *bits, u_int nfd, register_t *retval) 324 { 325 static const int flag[3] = { POLLRDNORM | POLLHUP | POLLERR, 326 POLLWRNORM | POLLHUP | POLLERR, 327 POLLRDBAND }; 328 fd_mask *ibitp, *obitp; 329 int msk, i, j, fd, ni, n; 330 fd_mask ibits, obits; 331 file_t *fp; 332 333 ni = howmany(nfd, NFDBITS) * sizeof(fd_mask); 334 ibitp = (fd_mask *)(bits + ni * 0); 335 obitp = (fd_mask *)(bits + ni * 3); 336 n = 0; 337 338 for (msk = 0; msk < 3; msk++) { 339 for (i = 0; i < nfd; i += NFDBITS) { 340 ibits = *ibitp++; 341 obits = 0; 342 while ((j = ffs(ibits)) && (fd = i + --j) < nfd) { 343 ibits &= ~(1 << j); 344 if ((fp = fd_getfile(fd)) == NULL) 345 return (EBADF); 346 if ((*fp->f_ops->fo_poll)(fp, flag[msk])) { 347 obits |= (1 << j); 348 n++; 349 } 350 fd_putfile(fd); 351 } 352 *obitp++ = obits; 353 } 354 } 355 *retval = n; 356 return (0); 357 } 358 359 /* 360 * Poll system call. 361 */ 362 int 363 sys_poll(struct lwp *l, const struct sys_poll_args *uap, register_t *retval) 364 { 365 /* { 366 syscallarg(struct pollfd *) fds; 367 syscallarg(u_int) nfds; 368 syscallarg(int) timeout; 369 } */ 370 struct timespec ats, *ts = NULL; 371 372 if (SCARG(uap, timeout) != INFTIM) { 373 ats.tv_sec = SCARG(uap, timeout) / 1000; 374 ats.tv_nsec = (SCARG(uap, timeout) % 1000) * 1000000; 375 ts = &ats; 376 } 377 378 return pollcommon(retval, SCARG(uap, fds), SCARG(uap, nfds), ts, NULL); 379 } 380 381 /* 382 * Poll system call. 383 */ 384 int 385 sys___pollts50(struct lwp *l, const struct sys___pollts50_args *uap, 386 register_t *retval) 387 { 388 /* { 389 syscallarg(struct pollfd *) fds; 390 syscallarg(u_int) nfds; 391 syscallarg(const struct timespec *) ts; 392 syscallarg(const sigset_t *) mask; 393 } */ 394 struct timespec ats, *ts = NULL; 395 sigset_t amask, *mask = NULL; 396 int error; 397 398 if (SCARG(uap, ts)) { 399 error = copyin(SCARG(uap, ts), &ats, sizeof(ats)); 400 if (error) 401 return error; 402 ts = &ats; 403 } 404 if (SCARG(uap, mask)) { 405 error = copyin(SCARG(uap, mask), &amask, sizeof(amask)); 406 if (error) 407 return error; 408 mask = &amask; 409 } 410 411 return pollcommon(retval, SCARG(uap, fds), SCARG(uap, nfds), ts, mask); 412 } 413 414 int 415 pollcommon(register_t *retval, struct pollfd *u_fds, u_int nfds, 416 struct timespec *ts, sigset_t *mask) 417 { 418 struct pollfd smallfds[32]; 419 struct pollfd *fds; 420 int error; 421 size_t ni, nf; 422 423 nf = curlwp->l_fd->fd_dt->dt_nfiles; 424 if (nfds > nf) { 425 /* forgiving; slightly wrong */ 426 nfds = nf; 427 } 428 ni = nfds * sizeof(struct pollfd); 429 if (ni > sizeof(smallfds)) { 430 fds = kmem_alloc(ni, KM_SLEEP); 431 if (fds == NULL) 432 return ENOMEM; 433 } else 434 fds = smallfds; 435 436 error = copyin(u_fds, fds, ni); 437 if (error) 438 goto done; 439 440 error = sel_do_scan(fds, nfds, ts, mask, retval, 0); 441 done: 442 /* poll is not restarted after signals... */ 443 if (error == ERESTART) 444 error = EINTR; 445 if (error == EWOULDBLOCK) 446 error = 0; 447 if (error == 0) 448 error = copyout(fds, u_fds, ni); 449 if (fds != smallfds) 450 kmem_free(fds, ni); 451 return (error); 452 } 453 454 static inline int 455 pollscan(struct pollfd *fds, u_int nfd, register_t *retval) 456 { 457 int i, n; 458 file_t *fp; 459 460 n = 0; 461 for (i = 0; i < nfd; i++, fds++) { 462 if (fds->fd < 0) { 463 fds->revents = 0; 464 } else if ((fp = fd_getfile(fds->fd)) == NULL) { 465 fds->revents = POLLNVAL; 466 n++; 467 } else { 468 fds->revents = (*fp->f_ops->fo_poll)(fp, 469 fds->events | POLLERR | POLLHUP); 470 if (fds->revents != 0) 471 n++; 472 fd_putfile(fds->fd); 473 } 474 } 475 *retval = n; 476 return (0); 477 } 478 479 /*ARGSUSED*/ 480 int 481 seltrue(dev_t dev, int events, lwp_t *l) 482 { 483 484 return (events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM)); 485 } 486 487 /* 488 * Record a select request. Concurrency issues: 489 * 490 * The caller holds the same lock across calls to selrecord() and 491 * selnotify(), so we don't need to consider a concurrent wakeup 492 * while in this routine. 493 * 494 * The only activity we need to guard against is selclear(), called by 495 * another thread that is exiting sel_do_scan(). 496 * `sel_lwp' can only become non-NULL while the caller's lock is held, 497 * so it cannot become non-NULL due to a change made by another thread 498 * while we are in this routine. It can only become _NULL_ due to a 499 * call to selclear(). 500 * 501 * If it is non-NULL and != selector there is the potential for 502 * selclear() to be called by another thread. If either of those 503 * conditions are true, we're not interested in touching the `named 504 * waiter' part of the selinfo record because we need to record a 505 * collision. Hence there is no need for additional locking in this 506 * routine. 507 */ 508 void 509 selrecord(lwp_t *selector, struct selinfo *sip) 510 { 511 selcpu_t *sc; 512 lwp_t *other; 513 514 KASSERT(selector == curlwp); 515 516 sc = selector->l_selcpu; 517 other = sip->sel_lwp; 518 519 if (other == selector) { 520 /* `selector' has already claimed it. */ 521 KASSERT(sip->sel_cpu = sc); 522 } else if (other == NULL) { 523 /* 524 * First named waiter, although there may be unnamed 525 * waiters (collisions). Issue a memory barrier to 526 * ensure that we access sel_lwp (above) before other 527 * fields - this guards against a call to selclear(). 528 */ 529 membar_enter(); 530 sip->sel_lwp = selector; 531 SLIST_INSERT_HEAD(&selector->l_selwait, sip, sel_chain); 532 /* Replace selinfo's lock with our chosen CPU's lock. */ 533 sip->sel_cpu = sc; 534 } else { 535 /* Multiple waiters: record a collision. */ 536 sip->sel_collision |= sc->sc_mask; 537 KASSERT(sip->sel_cpu != NULL); 538 } 539 } 540 541 /* 542 * Do a wakeup when a selectable event occurs. Concurrency issues: 543 * 544 * As per selrecord(), the caller's object lock is held. If there 545 * is a named waiter, we must acquire the associated selcpu's lock 546 * in order to synchronize with selclear() and pollers going to sleep 547 * in sel_do_scan(). 548 * 549 * sip->sel_cpu cannot change at this point, as it is only changed 550 * in selrecord(), and concurrent calls to selrecord() are locked 551 * out by the caller. 552 */ 553 void 554 selnotify(struct selinfo *sip, int events, long knhint) 555 { 556 selcpu_t *sc; 557 uint32_t mask; 558 int index, oflag; 559 lwp_t *l; 560 kmutex_t *lock; 561 562 KNOTE(&sip->sel_klist, knhint); 563 564 if (sip->sel_lwp != NULL) { 565 /* One named LWP is waiting. */ 566 sc = sip->sel_cpu; 567 lock = sc->sc_lock; 568 mutex_spin_enter(lock); 569 /* Still there? */ 570 if (sip->sel_lwp != NULL) { 571 l = sip->sel_lwp; 572 /* 573 * If thread is sleeping, wake it up. If it's not 574 * yet asleep, it will notice the change in state 575 * and will re-poll the descriptors. 576 */ 577 oflag = l->l_selflag; 578 l->l_selflag = SEL_RESET; 579 if (oflag == SEL_BLOCKING && l->l_mutex == lock) { 580 KASSERT(l->l_wchan == sc); 581 sleepq_unsleep(l, false); 582 } 583 } 584 mutex_spin_exit(lock); 585 } 586 587 if ((mask = sip->sel_collision) != 0) { 588 /* 589 * There was a collision (multiple waiters): we must 590 * inform all potentially interested waiters. 591 */ 592 sip->sel_collision = 0; 593 do { 594 index = ffs(mask) - 1; 595 mask &= ~(1 << index); 596 sc = cpu_lookup(index)->ci_data.cpu_selcpu; 597 lock = sc->sc_lock; 598 mutex_spin_enter(lock); 599 sc->sc_ncoll++; 600 sleepq_wake(&sc->sc_sleepq, sc, (u_int)-1, lock); 601 } while (__predict_false(mask != 0)); 602 } 603 } 604 605 /* 606 * Remove an LWP from all objects that it is waiting for. Concurrency 607 * issues: 608 * 609 * The object owner's (e.g. device driver) lock is not held here. Calls 610 * can be made to selrecord() and we do not synchronize against those 611 * directly using locks. However, we use `sel_lwp' to lock out changes. 612 * Before clearing it we must use memory barriers to ensure that we can 613 * safely traverse the list of selinfo records. 614 */ 615 static void 616 selclear(void) 617 { 618 struct selinfo *sip, *next; 619 selcpu_t *sc; 620 lwp_t *l; 621 kmutex_t *lock; 622 623 l = curlwp; 624 sc = l->l_selcpu; 625 lock = sc->sc_lock; 626 627 mutex_spin_enter(lock); 628 for (sip = SLIST_FIRST(&l->l_selwait); sip != NULL; sip = next) { 629 KASSERT(sip->sel_lwp == l); 630 KASSERT(sip->sel_cpu == l->l_selcpu); 631 /* 632 * Read link to next selinfo record, if any. 633 * It's no longer safe to touch `sip' after clearing 634 * `sel_lwp', so ensure that the read of `sel_chain' 635 * completes before the clearing of sel_lwp becomes 636 * globally visible. 637 */ 638 next = SLIST_NEXT(sip, sel_chain); 639 membar_exit(); 640 /* Release the record for another named waiter to use. */ 641 sip->sel_lwp = NULL; 642 } 643 mutex_spin_exit(lock); 644 } 645 646 /* 647 * Initialize the select/poll system calls. Called once for each 648 * CPU in the system, as they are attached. 649 */ 650 void 651 selsysinit(struct cpu_info *ci) 652 { 653 selcpu_t *sc; 654 655 sc = kmem_alloc(roundup2(sizeof(selcpu_t), coherency_unit) + 656 coherency_unit, KM_SLEEP); 657 sc = (void *)roundup2((uintptr_t)sc, coherency_unit); 658 sc->sc_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_SCHED); 659 sleepq_init(&sc->sc_sleepq); 660 sc->sc_ncoll = 0; 661 sc->sc_mask = (1 << cpu_index(ci)); 662 ci->ci_data.cpu_selcpu = sc; 663 } 664 665 /* 666 * Initialize a selinfo record. 667 */ 668 void 669 selinit(struct selinfo *sip) 670 { 671 672 memset(sip, 0, sizeof(*sip)); 673 } 674 675 /* 676 * Destroy a selinfo record. The owning object must not gain new 677 * references while this is in progress: all activity on the record 678 * must be stopped. 679 * 680 * Concurrency issues: we only need guard against a call to selclear() 681 * by a thread exiting sel_do_scan(). The caller has prevented further 682 * references being made to the selinfo record via selrecord(), and it 683 * won't call selwakeup() again. 684 */ 685 void 686 seldestroy(struct selinfo *sip) 687 { 688 selcpu_t *sc; 689 kmutex_t *lock; 690 lwp_t *l; 691 692 if (sip->sel_lwp == NULL) 693 return; 694 695 /* 696 * Lock out selclear(). The selcpu pointer can't change while 697 * we are here since it is only ever changed in selrecord(), 698 * and that will not be entered again for this record because 699 * it is dying. 700 */ 701 KASSERT(sip->sel_cpu != NULL); 702 sc = sip->sel_cpu; 703 lock = sc->sc_lock; 704 mutex_spin_enter(lock); 705 if ((l = sip->sel_lwp) != NULL) { 706 /* 707 * This should rarely happen, so although SLIST_REMOVE() 708 * is slow, using it here is not a problem. 709 */ 710 KASSERT(l->l_selcpu == sc); 711 SLIST_REMOVE(&l->l_selwait, sip, selinfo, sel_chain); 712 sip->sel_lwp = NULL; 713 } 714 mutex_spin_exit(lock); 715 } 716 717 int 718 pollsock(struct socket *so, const struct timespec *tsp, int events) 719 { 720 int ncoll, error, timo; 721 struct timespec sleepts, ts; 722 selcpu_t *sc; 723 lwp_t *l; 724 kmutex_t *lock; 725 726 timo = 0; 727 if (tsp != NULL) { 728 ts = *tsp; 729 if (inittimeleft(&ts, &sleepts) == -1) 730 return EINVAL; 731 } 732 733 l = curlwp; 734 sc = l->l_cpu->ci_data.cpu_selcpu; 735 lock = sc->sc_lock; 736 l->l_selcpu = sc; 737 SLIST_INIT(&l->l_selwait); 738 error = 0; 739 for (;;) { 740 /* 741 * No need to lock. If this is overwritten by another 742 * value while scanning, we will retry below. We only 743 * need to see exact state from the descriptors that 744 * we are about to poll, and lock activity resulting 745 * from fo_poll is enough to provide an up to date value 746 * for new polling activity. 747 */ 748 ncoll = sc->sc_ncoll; 749 l->l_selflag = SEL_SCANNING; 750 if (sopoll(so, events) != 0) 751 break; 752 if (tsp && (timo = gettimeleft(&ts, &sleepts)) <= 0) 753 break; 754 mutex_spin_enter(lock); 755 if (l->l_selflag != SEL_SCANNING || sc->sc_ncoll != ncoll) { 756 mutex_spin_exit(lock); 757 continue; 758 } 759 l->l_selflag = SEL_BLOCKING; 760 sleepq_enter(&sc->sc_sleepq, l, lock); 761 sleepq_enqueue(&sc->sc_sleepq, sc, "pollsock", &select_sobj); 762 error = sleepq_block(timo, true); 763 if (error != 0) 764 break; 765 } 766 selclear(); 767 /* poll is not restarted after signals... */ 768 if (error == ERESTART) 769 error = EINTR; 770 if (error == EWOULDBLOCK) 771 error = 0; 772 return (error); 773 } 774