1 /* $NetBSD: sys_select.c,v 1.20 2009/12/12 17:47:05 dsl Exp $ */ 2 3 /*- 4 * Copyright (c) 2007, 2008, 2009 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Andrew Doran. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /* 33 * Copyright (c) 1982, 1986, 1989, 1993 34 * The Regents of the University of California. All rights reserved. 35 * (c) UNIX System Laboratories, Inc. 36 * All or some portions of this file are derived from material licensed 37 * to the University of California by American Telephone and Telegraph 38 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 39 * the permission of UNIX System Laboratories, Inc. 40 * 41 * Redistribution and use in source and binary forms, with or without 42 * modification, are permitted provided that the following conditions 43 * are met: 44 * 1. Redistributions of source code must retain the above copyright 45 * notice, this list of conditions and the following disclaimer. 46 * 2. Redistributions in binary form must reproduce the above copyright 47 * notice, this list of conditions and the following disclaimer in the 48 * documentation and/or other materials provided with the distribution. 49 * 3. Neither the name of the University nor the names of its contributors 50 * may be used to endorse or promote products derived from this software 51 * without specific prior written permission. 52 * 53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 63 * SUCH DAMAGE. 64 * 65 * @(#)sys_generic.c 8.9 (Berkeley) 2/14/95 66 */ 67 68 /* 69 * System calls relating to files. 70 */ 71 72 #include <sys/cdefs.h> 73 __KERNEL_RCSID(0, "$NetBSD: sys_select.c,v 1.20 2009/12/12 17:47:05 dsl Exp $"); 74 75 #include <sys/param.h> 76 #include <sys/systm.h> 77 #include <sys/filedesc.h> 78 #include <sys/ioctl.h> 79 #include <sys/file.h> 80 #include <sys/proc.h> 81 #include <sys/socketvar.h> 82 #include <sys/signalvar.h> 83 #include <sys/uio.h> 84 #include <sys/kernel.h> 85 #include <sys/stat.h> 86 #include <sys/poll.h> 87 #include <sys/vnode.h> 88 #include <sys/mount.h> 89 #include <sys/syscallargs.h> 90 #include <sys/cpu.h> 91 #include <sys/atomic.h> 92 #include <sys/socketvar.h> 93 #include <sys/sleepq.h> 94 95 /* Flags for lwp::l_selflag. */ 96 #define SEL_RESET 0 /* awoken, interrupted, or not yet polling */ 97 #define SEL_SCANNING 1 /* polling descriptors */ 98 #define SEL_BLOCKING 2 /* about to block on select_cv */ 99 100 /* Per-CPU state for select()/poll(). */ 101 #if MAXCPUS > 32 102 #error adjust this code 103 #endif 104 typedef struct selcpu { 105 kmutex_t *sc_lock; 106 sleepq_t sc_sleepq; 107 int sc_ncoll; 108 uint32_t sc_mask; 109 } selcpu_t; 110 111 static inline int selscan(char *, u_int, register_t *); 112 static inline int pollscan(struct pollfd *, u_int, register_t *); 113 static void selclear(void); 114 115 static syncobj_t select_sobj = { 116 SOBJ_SLEEPQ_FIFO, 117 sleepq_unsleep, 118 sleepq_changepri, 119 sleepq_lendpri, 120 syncobj_noowner, 121 }; 122 123 /* 124 * Select system call. 125 */ 126 int 127 sys___pselect50(struct lwp *l, const struct sys___pselect50_args *uap, 128 register_t *retval) 129 { 130 /* { 131 syscallarg(int) nd; 132 syscallarg(fd_set *) in; 133 syscallarg(fd_set *) ou; 134 syscallarg(fd_set *) ex; 135 syscallarg(const struct timespec *) ts; 136 syscallarg(sigset_t *) mask; 137 } */ 138 struct timespec ats, *ts = NULL; 139 sigset_t amask, *mask = NULL; 140 int error; 141 142 if (SCARG(uap, ts)) { 143 error = copyin(SCARG(uap, ts), &ats, sizeof(ats)); 144 if (error) 145 return error; 146 ts = &ats; 147 } 148 if (SCARG(uap, mask) != NULL) { 149 error = copyin(SCARG(uap, mask), &amask, sizeof(amask)); 150 if (error) 151 return error; 152 mask = &amask; 153 } 154 155 return selcommon(retval, SCARG(uap, nd), SCARG(uap, in), 156 SCARG(uap, ou), SCARG(uap, ex), ts, mask); 157 } 158 159 int 160 sys___select50(struct lwp *l, const struct sys___select50_args *uap, 161 register_t *retval) 162 { 163 /* { 164 syscallarg(int) nd; 165 syscallarg(fd_set *) in; 166 syscallarg(fd_set *) ou; 167 syscallarg(fd_set *) ex; 168 syscallarg(struct timeval *) tv; 169 } */ 170 struct timeval atv; 171 struct timespec ats, *ts = NULL; 172 int error; 173 174 if (SCARG(uap, tv)) { 175 error = copyin(SCARG(uap, tv), (void *)&atv, sizeof(atv)); 176 if (error) 177 return error; 178 TIMEVAL_TO_TIMESPEC(&atv, &ats); 179 ts = &ats; 180 } 181 182 return selcommon(retval, SCARG(uap, nd), SCARG(uap, in), 183 SCARG(uap, ou), SCARG(uap, ex), ts, NULL); 184 } 185 186 /* 187 * sel_do_scan: common code to perform the scan on descriptors. 188 */ 189 static int 190 sel_do_scan(void *fds, u_int nfds, struct timespec *ts, sigset_t *mask, 191 register_t *retval, int selpoll) 192 { 193 lwp_t * const l = curlwp; 194 proc_t * const p = l->l_proc; 195 selcpu_t *sc; 196 kmutex_t *lock; 197 sigset_t oldmask; 198 struct timespec sleepts; 199 int error, timo; 200 201 timo = 0; 202 if (ts && inittimeleft(ts, &sleepts) == -1) { 203 return EINVAL; 204 } 205 206 if (__predict_false(mask)) { 207 sigminusset(&sigcantmask, mask); 208 mutex_enter(p->p_lock); 209 oldmask = l->l_sigmask; 210 l->l_sigmask = *mask; 211 mutex_exit(p->p_lock); 212 } else { 213 /* XXXgcc */ 214 oldmask = l->l_sigmask; 215 } 216 217 sc = curcpu()->ci_data.cpu_selcpu; 218 lock = sc->sc_lock; 219 l->l_selcpu = sc; 220 SLIST_INIT(&l->l_selwait); 221 for (;;) { 222 int ncoll; 223 224 /* 225 * No need to lock. If this is overwritten by another value 226 * while scanning, we will retry below. We only need to see 227 * exact state from the descriptors that we are about to poll, 228 * and lock activity resulting from fo_poll is enough to 229 * provide an up to date value for new polling activity. 230 */ 231 l->l_selflag = SEL_SCANNING; 232 ncoll = sc->sc_ncoll; 233 234 if (selpoll) { 235 error = selscan((char *)fds, nfds, retval); 236 } else { 237 error = pollscan((struct pollfd *)fds, nfds, retval); 238 } 239 240 if (error || *retval) 241 break; 242 if (ts && (timo = gettimeleft(ts, &sleepts)) <= 0) 243 break; 244 mutex_spin_enter(lock); 245 if (l->l_selflag != SEL_SCANNING || sc->sc_ncoll != ncoll) { 246 mutex_spin_exit(lock); 247 continue; 248 } 249 l->l_selflag = SEL_BLOCKING; 250 l->l_kpriority = true; 251 sleepq_enter(&sc->sc_sleepq, l, lock); 252 sleepq_enqueue(&sc->sc_sleepq, sc, "select", &select_sobj); 253 error = sleepq_block(timo, true); 254 if (error != 0) 255 break; 256 } 257 selclear(); 258 259 if (__predict_false(mask)) { 260 mutex_enter(p->p_lock); 261 l->l_sigmask = oldmask; 262 mutex_exit(p->p_lock); 263 } 264 265 /* select and poll are not restarted after signals... */ 266 if (error == ERESTART) 267 return EINTR; 268 if (error == EWOULDBLOCK) 269 return 0; 270 return error; 271 } 272 273 int 274 selcommon(register_t *retval, int nd, fd_set *u_in, fd_set *u_ou, 275 fd_set *u_ex, struct timespec *ts, sigset_t *mask) 276 { 277 char smallbits[howmany(FD_SETSIZE, NFDBITS) * 278 sizeof(fd_mask) * 6]; 279 char *bits; 280 int error, nf; 281 size_t ni; 282 283 if (nd < 0) 284 return (EINVAL); 285 nf = curlwp->l_fd->fd_dt->dt_nfiles; 286 if (nd > nf) { 287 /* forgiving; slightly wrong */ 288 nd = nf; 289 } 290 ni = howmany(nd, NFDBITS) * sizeof(fd_mask); 291 if (ni * 6 > sizeof(smallbits)) { 292 bits = kmem_alloc(ni * 6, KM_SLEEP); 293 if (bits == NULL) 294 return ENOMEM; 295 } else 296 bits = smallbits; 297 298 #define getbits(name, x) \ 299 if (u_ ## name) { \ 300 error = copyin(u_ ## name, bits + ni * x, ni); \ 301 if (error) \ 302 goto fail; \ 303 } else \ 304 memset(bits + ni * x, 0, ni); 305 getbits(in, 0); 306 getbits(ou, 1); 307 getbits(ex, 2); 308 #undef getbits 309 310 error = sel_do_scan(bits, nd, ts, mask, retval, 1); 311 if (error == 0 && u_in != NULL) 312 error = copyout(bits + ni * 3, u_in, ni); 313 if (error == 0 && u_ou != NULL) 314 error = copyout(bits + ni * 4, u_ou, ni); 315 if (error == 0 && u_ex != NULL) 316 error = copyout(bits + ni * 5, u_ex, ni); 317 fail: 318 if (bits != smallbits) 319 kmem_free(bits, ni * 6); 320 return (error); 321 } 322 323 static inline int 324 selscan(char *bits, u_int nfd, register_t *retval) 325 { 326 static const int flag[3] = { POLLRDNORM | POLLHUP | POLLERR, 327 POLLWRNORM | POLLHUP | POLLERR, 328 POLLRDBAND }; 329 fd_mask *ibitp, *obitp; 330 int msk, i, j, fd, ni, n; 331 fd_mask ibits, obits; 332 file_t *fp; 333 334 ni = howmany(nfd, NFDBITS) * sizeof(fd_mask); 335 ibitp = (fd_mask *)(bits + ni * 0); 336 obitp = (fd_mask *)(bits + ni * 3); 337 n = 0; 338 339 for (msk = 0; msk < 3; msk++) { 340 for (i = 0; i < nfd; i += NFDBITS) { 341 ibits = *ibitp++; 342 obits = 0; 343 while ((j = ffs(ibits)) && (fd = i + --j) < nfd) { 344 ibits &= ~(1 << j); 345 if ((fp = fd_getfile(fd)) == NULL) 346 return (EBADF); 347 if ((*fp->f_ops->fo_poll)(fp, flag[msk])) { 348 obits |= (1 << j); 349 n++; 350 } 351 fd_putfile(fd); 352 } 353 *obitp++ = obits; 354 } 355 } 356 *retval = n; 357 return (0); 358 } 359 360 /* 361 * Poll system call. 362 */ 363 int 364 sys_poll(struct lwp *l, const struct sys_poll_args *uap, register_t *retval) 365 { 366 /* { 367 syscallarg(struct pollfd *) fds; 368 syscallarg(u_int) nfds; 369 syscallarg(int) timeout; 370 } */ 371 struct timespec ats, *ts = NULL; 372 373 if (SCARG(uap, timeout) != INFTIM) { 374 ats.tv_sec = SCARG(uap, timeout) / 1000; 375 ats.tv_nsec = (SCARG(uap, timeout) % 1000) * 1000000; 376 ts = &ats; 377 } 378 379 return pollcommon(retval, SCARG(uap, fds), SCARG(uap, nfds), ts, NULL); 380 } 381 382 /* 383 * Poll system call. 384 */ 385 int 386 sys___pollts50(struct lwp *l, const struct sys___pollts50_args *uap, 387 register_t *retval) 388 { 389 /* { 390 syscallarg(struct pollfd *) fds; 391 syscallarg(u_int) nfds; 392 syscallarg(const struct timespec *) ts; 393 syscallarg(const sigset_t *) mask; 394 } */ 395 struct timespec ats, *ts = NULL; 396 sigset_t amask, *mask = NULL; 397 int error; 398 399 if (SCARG(uap, ts)) { 400 error = copyin(SCARG(uap, ts), &ats, sizeof(ats)); 401 if (error) 402 return error; 403 ts = &ats; 404 } 405 if (SCARG(uap, mask)) { 406 error = copyin(SCARG(uap, mask), &amask, sizeof(amask)); 407 if (error) 408 return error; 409 mask = &amask; 410 } 411 412 return pollcommon(retval, SCARG(uap, fds), SCARG(uap, nfds), ts, mask); 413 } 414 415 int 416 pollcommon(register_t *retval, struct pollfd *u_fds, u_int nfds, 417 struct timespec *ts, sigset_t *mask) 418 { 419 struct pollfd smallfds[32]; 420 struct pollfd *fds; 421 int error; 422 size_t ni; 423 424 if (nfds > 1000 + curlwp->l_fd->fd_dt->dt_nfiles) { 425 /* 426 * Either the user passed in a very sparse 'fds' or junk! 427 * The kmem_alloc() call below would be bad news. 428 * We could process the 'fds' array in chunks, but that 429 * is a lot of code that isn't normally useful. 430 * (Or just move the copyin/out into pollscan().) 431 * Historically the code silently truncated 'fds' to 432 * dt_nfiles entries - but that does cause issues. 433 */ 434 return EINVAL; 435 } 436 ni = nfds * sizeof(struct pollfd); 437 if (ni > sizeof(smallfds)) { 438 fds = kmem_alloc(ni, KM_SLEEP); 439 if (fds == NULL) 440 return ENOMEM; 441 } else 442 fds = smallfds; 443 444 error = copyin(u_fds, fds, ni); 445 if (error) 446 goto fail; 447 448 error = sel_do_scan(fds, nfds, ts, mask, retval, 0); 449 if (error == 0) 450 error = copyout(fds, u_fds, ni); 451 fail: 452 if (fds != smallfds) 453 kmem_free(fds, ni); 454 return (error); 455 } 456 457 static inline int 458 pollscan(struct pollfd *fds, u_int nfd, register_t *retval) 459 { 460 int i, n; 461 file_t *fp; 462 463 n = 0; 464 for (i = 0; i < nfd; i++, fds++) { 465 if (fds->fd < 0) { 466 fds->revents = 0; 467 } else if ((fp = fd_getfile(fds->fd)) == NULL) { 468 fds->revents = POLLNVAL; 469 n++; 470 } else { 471 fds->revents = (*fp->f_ops->fo_poll)(fp, 472 fds->events | POLLERR | POLLHUP); 473 if (fds->revents != 0) 474 n++; 475 fd_putfile(fds->fd); 476 } 477 } 478 *retval = n; 479 return (0); 480 } 481 482 /*ARGSUSED*/ 483 int 484 seltrue(dev_t dev, int events, lwp_t *l) 485 { 486 487 return (events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM)); 488 } 489 490 /* 491 * Record a select request. Concurrency issues: 492 * 493 * The caller holds the same lock across calls to selrecord() and 494 * selnotify(), so we don't need to consider a concurrent wakeup 495 * while in this routine. 496 * 497 * The only activity we need to guard against is selclear(), called by 498 * another thread that is exiting sel_do_scan(). 499 * `sel_lwp' can only become non-NULL while the caller's lock is held, 500 * so it cannot become non-NULL due to a change made by another thread 501 * while we are in this routine. It can only become _NULL_ due to a 502 * call to selclear(). 503 * 504 * If it is non-NULL and != selector there is the potential for 505 * selclear() to be called by another thread. If either of those 506 * conditions are true, we're not interested in touching the `named 507 * waiter' part of the selinfo record because we need to record a 508 * collision. Hence there is no need for additional locking in this 509 * routine. 510 */ 511 void 512 selrecord(lwp_t *selector, struct selinfo *sip) 513 { 514 selcpu_t *sc; 515 lwp_t *other; 516 517 KASSERT(selector == curlwp); 518 519 sc = selector->l_selcpu; 520 other = sip->sel_lwp; 521 522 if (other == selector) { 523 /* `selector' has already claimed it. */ 524 KASSERT(sip->sel_cpu = sc); 525 } else if (other == NULL) { 526 /* 527 * First named waiter, although there may be unnamed 528 * waiters (collisions). Issue a memory barrier to 529 * ensure that we access sel_lwp (above) before other 530 * fields - this guards against a call to selclear(). 531 */ 532 membar_enter(); 533 sip->sel_lwp = selector; 534 SLIST_INSERT_HEAD(&selector->l_selwait, sip, sel_chain); 535 /* Replace selinfo's lock with our chosen CPU's lock. */ 536 sip->sel_cpu = sc; 537 } else { 538 /* Multiple waiters: record a collision. */ 539 sip->sel_collision |= sc->sc_mask; 540 KASSERT(sip->sel_cpu != NULL); 541 } 542 } 543 544 /* 545 * Do a wakeup when a selectable event occurs. Concurrency issues: 546 * 547 * As per selrecord(), the caller's object lock is held. If there 548 * is a named waiter, we must acquire the associated selcpu's lock 549 * in order to synchronize with selclear() and pollers going to sleep 550 * in sel_do_scan(). 551 * 552 * sip->sel_cpu cannot change at this point, as it is only changed 553 * in selrecord(), and concurrent calls to selrecord() are locked 554 * out by the caller. 555 */ 556 void 557 selnotify(struct selinfo *sip, int events, long knhint) 558 { 559 selcpu_t *sc; 560 uint32_t mask; 561 int index, oflag; 562 lwp_t *l; 563 kmutex_t *lock; 564 565 KNOTE(&sip->sel_klist, knhint); 566 567 if (sip->sel_lwp != NULL) { 568 /* One named LWP is waiting. */ 569 sc = sip->sel_cpu; 570 lock = sc->sc_lock; 571 mutex_spin_enter(lock); 572 /* Still there? */ 573 if (sip->sel_lwp != NULL) { 574 l = sip->sel_lwp; 575 /* 576 * If thread is sleeping, wake it up. If it's not 577 * yet asleep, it will notice the change in state 578 * and will re-poll the descriptors. 579 */ 580 oflag = l->l_selflag; 581 l->l_selflag = SEL_RESET; 582 if (oflag == SEL_BLOCKING && l->l_mutex == lock) { 583 KASSERT(l->l_wchan == sc); 584 sleepq_unsleep(l, false); 585 } 586 } 587 mutex_spin_exit(lock); 588 } 589 590 if ((mask = sip->sel_collision) != 0) { 591 /* 592 * There was a collision (multiple waiters): we must 593 * inform all potentially interested waiters. 594 */ 595 sip->sel_collision = 0; 596 do { 597 index = ffs(mask) - 1; 598 mask &= ~(1 << index); 599 sc = cpu_lookup(index)->ci_data.cpu_selcpu; 600 lock = sc->sc_lock; 601 mutex_spin_enter(lock); 602 sc->sc_ncoll++; 603 sleepq_wake(&sc->sc_sleepq, sc, (u_int)-1, lock); 604 } while (__predict_false(mask != 0)); 605 } 606 } 607 608 /* 609 * Remove an LWP from all objects that it is waiting for. Concurrency 610 * issues: 611 * 612 * The object owner's (e.g. device driver) lock is not held here. Calls 613 * can be made to selrecord() and we do not synchronize against those 614 * directly using locks. However, we use `sel_lwp' to lock out changes. 615 * Before clearing it we must use memory barriers to ensure that we can 616 * safely traverse the list of selinfo records. 617 */ 618 static void 619 selclear(void) 620 { 621 struct selinfo *sip, *next; 622 selcpu_t *sc; 623 lwp_t *l; 624 kmutex_t *lock; 625 626 l = curlwp; 627 sc = l->l_selcpu; 628 lock = sc->sc_lock; 629 630 mutex_spin_enter(lock); 631 for (sip = SLIST_FIRST(&l->l_selwait); sip != NULL; sip = next) { 632 KASSERT(sip->sel_lwp == l); 633 KASSERT(sip->sel_cpu == l->l_selcpu); 634 /* 635 * Read link to next selinfo record, if any. 636 * It's no longer safe to touch `sip' after clearing 637 * `sel_lwp', so ensure that the read of `sel_chain' 638 * completes before the clearing of sel_lwp becomes 639 * globally visible. 640 */ 641 next = SLIST_NEXT(sip, sel_chain); 642 membar_exit(); 643 /* Release the record for another named waiter to use. */ 644 sip->sel_lwp = NULL; 645 } 646 mutex_spin_exit(lock); 647 } 648 649 /* 650 * Initialize the select/poll system calls. Called once for each 651 * CPU in the system, as they are attached. 652 */ 653 void 654 selsysinit(struct cpu_info *ci) 655 { 656 selcpu_t *sc; 657 658 sc = kmem_alloc(roundup2(sizeof(selcpu_t), coherency_unit) + 659 coherency_unit, KM_SLEEP); 660 sc = (void *)roundup2((uintptr_t)sc, coherency_unit); 661 sc->sc_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_SCHED); 662 sleepq_init(&sc->sc_sleepq); 663 sc->sc_ncoll = 0; 664 sc->sc_mask = (1 << cpu_index(ci)); 665 ci->ci_data.cpu_selcpu = sc; 666 } 667 668 /* 669 * Initialize a selinfo record. 670 */ 671 void 672 selinit(struct selinfo *sip) 673 { 674 675 memset(sip, 0, sizeof(*sip)); 676 } 677 678 /* 679 * Destroy a selinfo record. The owning object must not gain new 680 * references while this is in progress: all activity on the record 681 * must be stopped. 682 * 683 * Concurrency issues: we only need guard against a call to selclear() 684 * by a thread exiting sel_do_scan(). The caller has prevented further 685 * references being made to the selinfo record via selrecord(), and it 686 * won't call selwakeup() again. 687 */ 688 void 689 seldestroy(struct selinfo *sip) 690 { 691 selcpu_t *sc; 692 kmutex_t *lock; 693 lwp_t *l; 694 695 if (sip->sel_lwp == NULL) 696 return; 697 698 /* 699 * Lock out selclear(). The selcpu pointer can't change while 700 * we are here since it is only ever changed in selrecord(), 701 * and that will not be entered again for this record because 702 * it is dying. 703 */ 704 KASSERT(sip->sel_cpu != NULL); 705 sc = sip->sel_cpu; 706 lock = sc->sc_lock; 707 mutex_spin_enter(lock); 708 if ((l = sip->sel_lwp) != NULL) { 709 /* 710 * This should rarely happen, so although SLIST_REMOVE() 711 * is slow, using it here is not a problem. 712 */ 713 KASSERT(l->l_selcpu == sc); 714 SLIST_REMOVE(&l->l_selwait, sip, selinfo, sel_chain); 715 sip->sel_lwp = NULL; 716 } 717 mutex_spin_exit(lock); 718 } 719 720 int 721 pollsock(struct socket *so, const struct timespec *tsp, int events) 722 { 723 int ncoll, error, timo; 724 struct timespec sleepts, ts; 725 selcpu_t *sc; 726 lwp_t *l; 727 kmutex_t *lock; 728 729 timo = 0; 730 if (tsp != NULL) { 731 ts = *tsp; 732 if (inittimeleft(&ts, &sleepts) == -1) 733 return EINVAL; 734 } 735 736 l = curlwp; 737 sc = l->l_cpu->ci_data.cpu_selcpu; 738 lock = sc->sc_lock; 739 l->l_selcpu = sc; 740 SLIST_INIT(&l->l_selwait); 741 error = 0; 742 for (;;) { 743 /* 744 * No need to lock. If this is overwritten by another 745 * value while scanning, we will retry below. We only 746 * need to see exact state from the descriptors that 747 * we are about to poll, and lock activity resulting 748 * from fo_poll is enough to provide an up to date value 749 * for new polling activity. 750 */ 751 ncoll = sc->sc_ncoll; 752 l->l_selflag = SEL_SCANNING; 753 if (sopoll(so, events) != 0) 754 break; 755 if (tsp && (timo = gettimeleft(&ts, &sleepts)) <= 0) 756 break; 757 mutex_spin_enter(lock); 758 if (l->l_selflag != SEL_SCANNING || sc->sc_ncoll != ncoll) { 759 mutex_spin_exit(lock); 760 continue; 761 } 762 l->l_selflag = SEL_BLOCKING; 763 sleepq_enter(&sc->sc_sleepq, l, lock); 764 sleepq_enqueue(&sc->sc_sleepq, sc, "pollsock", &select_sobj); 765 error = sleepq_block(timo, true); 766 if (error != 0) 767 break; 768 } 769 selclear(); 770 /* poll is not restarted after signals... */ 771 if (error == ERESTART) 772 error = EINTR; 773 if (error == EWOULDBLOCK) 774 error = 0; 775 return (error); 776 } 777