1 /* $OpenBSD: sys_generic.c,v 1.66 2009/06/08 23:18:42 deraadt Exp $ */ 2 /* $NetBSD: sys_generic.c,v 1.24 1996/03/29 00:25:32 cgd Exp $ */ 3 4 /* 5 * Copyright (c) 1996 Theo de Raadt 6 * Copyright (c) 1982, 1986, 1989, 1993 7 * The Regents of the University of California. All rights reserved. 8 * (c) UNIX System Laboratories, Inc. 9 * All or some portions of this file are derived from material licensed 10 * to the University of California by American Telephone and Telegraph 11 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 12 * the permission of UNIX System Laboratories, Inc. 13 * 14 * Redistribution and use in source and binary forms, with or without 15 * modification, are permitted provided that the following conditions 16 * are met: 17 * 1. Redistributions of source code must retain the above copyright 18 * notice, this list of conditions and the following disclaimer. 19 * 2. Redistributions in binary form must reproduce the above copyright 20 * notice, this list of conditions and the following disclaimer in the 21 * documentation and/or other materials provided with the distribution. 22 * 3. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)sys_generic.c 8.5 (Berkeley) 1/21/94 39 */ 40 41 #include <sys/param.h> 42 #include <sys/systm.h> 43 #include <sys/filedesc.h> 44 #include <sys/ioctl.h> 45 #include <sys/file.h> 46 #include <sys/proc.h> 47 #include <sys/resourcevar.h> 48 #include <sys/socketvar.h> 49 #include <sys/signalvar.h> 50 #include <sys/uio.h> 51 #include <sys/kernel.h> 52 #include <sys/stat.h> 53 #include <sys/malloc.h> 54 #include <sys/poll.h> 55 #ifdef KTRACE 56 #include <sys/ktrace.h> 57 #endif 58 #include <sys/sched.h> 59 60 #include <sys/mount.h> 61 #include <sys/syscallargs.h> 62 63 #include <uvm/uvm_extern.h> 64 65 int selscan(struct proc *, fd_set *, fd_set *, int, int, register_t *); 66 int seltrue(dev_t, int, struct proc *); 67 void pollscan(struct proc *, struct pollfd *, u_int, register_t *); 68 int pollout(struct pollfd *, struct pollfd *, u_int); 69 70 /* 71 * Read system call. 72 */ 73 /* ARGSUSED */ 74 int 75 sys_read(struct proc *p, void *v, register_t *retval) 76 { 77 struct sys_read_args /* { 78 syscallarg(int) fd; 79 syscallarg(void *) buf; 80 syscallarg(size_t) nbyte; 81 } */ *uap = v; 82 struct iovec iov; 83 int fd = SCARG(uap, fd); 84 struct file *fp; 85 struct filedesc *fdp = p->p_fd; 86 87 if ((fp = fd_getfile(fdp, fd)) == NULL) 88 return (EBADF); 89 if ((fp->f_flag & FREAD) == 0) 90 return (EBADF); 91 92 iov.iov_base = SCARG(uap, buf); 93 iov.iov_len = SCARG(uap, nbyte); 94 95 FREF(fp); 96 97 /* dofilereadv() will FRELE the descriptor for us */ 98 return (dofilereadv(p, fd, fp, &iov, 1, 0, &fp->f_offset, retval)); 99 } 100 101 /* 102 * Scatter read system call. 103 */ 104 int 105 sys_readv(struct proc *p, void *v, register_t *retval) 106 { 107 struct sys_readv_args /* { 108 syscallarg(int) fd; 109 syscallarg(const struct iovec *) iovp; 110 syscallarg(int) iovcnt; 111 } */ *uap = v; 112 int fd = SCARG(uap, fd); 113 struct file *fp; 114 struct filedesc *fdp = p->p_fd; 115 116 if ((fp = fd_getfile(fdp, fd)) == NULL) 117 return (EBADF); 118 if ((fp->f_flag & FREAD) == 0) 119 return (EBADF); 120 121 FREF(fp); 122 123 /* dofilereadv() will FRELE the descriptor for us */ 124 return (dofilereadv(p, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt), 1, 125 &fp->f_offset, retval)); 126 } 127 128 int 129 dofilereadv(struct proc *p, int fd, struct file *fp, const struct iovec *iovp, 130 int iovcnt, int userspace, off_t *offset, register_t *retval) 131 { 132 struct iovec aiov[UIO_SMALLIOV]; 133 struct uio auio; 134 struct iovec *iov; 135 struct iovec *needfree = NULL; 136 long i, cnt, error = 0; 137 u_int iovlen; 138 #ifdef KTRACE 139 struct iovec *ktriov = NULL; 140 #endif 141 142 /* note: can't use iovlen until iovcnt is validated */ 143 iovlen = iovcnt * sizeof(struct iovec); 144 145 /* 146 * If the iovec array exists in userspace, it needs to be copied in; 147 * otherwise, it can be used directly. 148 */ 149 if (userspace) { 150 if ((u_int)iovcnt > UIO_SMALLIOV) { 151 if ((u_int)iovcnt > IOV_MAX) { 152 error = EINVAL; 153 goto out; 154 } 155 iov = needfree = malloc(iovlen, M_IOV, M_WAITOK); 156 } else if ((u_int)iovcnt > 0) { 157 iov = aiov; 158 needfree = NULL; 159 } else { 160 error = EINVAL; 161 goto out; 162 } 163 if ((error = copyin(iovp, iov, iovlen))) 164 goto done; 165 } else { 166 iov = (struct iovec *)iovp; /* de-constify */ 167 } 168 169 auio.uio_iov = iov; 170 auio.uio_iovcnt = iovcnt; 171 auio.uio_rw = UIO_READ; 172 auio.uio_segflg = UIO_USERSPACE; 173 auio.uio_procp = p; 174 auio.uio_resid = 0; 175 for (i = 0; i < iovcnt; i++) { 176 auio.uio_resid += iov->iov_len; 177 /* 178 * Reads return ssize_t because -1 is returned on error. 179 * Therefore we must restrict the length to SSIZE_MAX to 180 * avoid garbage return values. 181 */ 182 if (iov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) { 183 error = EINVAL; 184 goto done; 185 } 186 iov++; 187 } 188 #ifdef KTRACE 189 /* 190 * if tracing, save a copy of iovec 191 */ 192 if (KTRPOINT(p, KTR_GENIO)) { 193 ktriov = malloc(iovlen, M_TEMP, M_WAITOK); 194 bcopy(auio.uio_iov, ktriov, iovlen); 195 } 196 #endif 197 cnt = auio.uio_resid; 198 error = (*fp->f_ops->fo_read)(fp, offset, &auio, fp->f_cred); 199 if (error) 200 if (auio.uio_resid != cnt && (error == ERESTART || 201 error == EINTR || error == EWOULDBLOCK)) 202 error = 0; 203 cnt -= auio.uio_resid; 204 205 fp->f_rxfer++; 206 fp->f_rbytes += cnt; 207 #ifdef KTRACE 208 if (ktriov != NULL) { 209 if (error == 0) 210 ktrgenio(p, fd, UIO_READ, ktriov, cnt, 211 error); 212 free(ktriov, M_TEMP); 213 } 214 #endif 215 *retval = cnt; 216 done: 217 if (needfree) 218 free(needfree, M_IOV); 219 out: 220 FRELE(fp); 221 return (error); 222 } 223 224 /* 225 * Write system call 226 */ 227 int 228 sys_write(struct proc *p, void *v, register_t *retval) 229 { 230 struct sys_write_args /* { 231 syscallarg(int) fd; 232 syscallarg(const void *) buf; 233 syscallarg(size_t) nbyte; 234 } */ *uap = v; 235 struct iovec iov; 236 int fd = SCARG(uap, fd); 237 struct file *fp; 238 struct filedesc *fdp = p->p_fd; 239 240 if ((fp = fd_getfile(fdp, fd)) == NULL) 241 return (EBADF); 242 if ((fp->f_flag & FWRITE) == 0) 243 return (EBADF); 244 245 iov.iov_base = (void *)SCARG(uap, buf); 246 iov.iov_len = SCARG(uap, nbyte); 247 248 FREF(fp); 249 250 /* dofilewritev() will FRELE the descriptor for us */ 251 return (dofilewritev(p, fd, fp, &iov, 1, 0, &fp->f_offset, retval)); 252 } 253 254 /* 255 * Gather write system call 256 */ 257 int 258 sys_writev(struct proc *p, void *v, register_t *retval) 259 { 260 struct sys_writev_args /* { 261 syscallarg(int) fd; 262 syscallarg(const struct iovec *) iovp; 263 syscallarg(int) iovcnt; 264 } */ *uap = v; 265 int fd = SCARG(uap, fd); 266 struct file *fp; 267 struct filedesc *fdp = p->p_fd; 268 269 if ((fp = fd_getfile(fdp, fd)) == NULL) 270 return (EBADF); 271 if ((fp->f_flag & FWRITE) == 0) 272 return (EBADF); 273 274 FREF(fp); 275 276 /* dofilewritev() will FRELE the descriptor for us */ 277 return (dofilewritev(p, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt), 1, 278 &fp->f_offset, retval)); 279 } 280 281 int 282 dofilewritev(struct proc *p, int fd, struct file *fp, const struct iovec *iovp, 283 int iovcnt, int userspace, off_t *offset, register_t *retval) 284 { 285 struct iovec aiov[UIO_SMALLIOV]; 286 struct uio auio; 287 struct iovec *iov; 288 struct iovec *needfree = NULL; 289 long i, cnt, error = 0; 290 u_int iovlen; 291 #ifdef KTRACE 292 struct iovec *ktriov = NULL; 293 #endif 294 295 /* note: can't use iovlen until iovcnt is validated */ 296 iovlen = iovcnt * sizeof(struct iovec); 297 298 /* 299 * If the iovec array exists in userspace, it needs to be copied in; 300 * otherwise, it can be used directly. 301 */ 302 if (userspace) { 303 if ((u_int)iovcnt > UIO_SMALLIOV) { 304 if ((u_int)iovcnt > IOV_MAX) { 305 error = EINVAL; 306 goto out; 307 } 308 iov = needfree = malloc(iovlen, M_IOV, M_WAITOK); 309 } else if ((u_int)iovcnt > 0) { 310 iov = aiov; 311 needfree = NULL; 312 } else { 313 error = EINVAL; 314 goto out; 315 } 316 if ((error = copyin(iovp, iov, iovlen))) 317 goto done; 318 } else { 319 iov = (struct iovec *)iovp; /* de-constify */ 320 } 321 322 auio.uio_iov = iov; 323 auio.uio_iovcnt = iovcnt; 324 auio.uio_rw = UIO_WRITE; 325 auio.uio_segflg = UIO_USERSPACE; 326 auio.uio_procp = p; 327 auio.uio_resid = 0; 328 for (i = 0; i < iovcnt; i++) { 329 auio.uio_resid += iov->iov_len; 330 /* 331 * Writes return ssize_t because -1 is returned on error. 332 * Therefore we must restrict the length to SSIZE_MAX to 333 * avoid garbage return values. 334 */ 335 if (iov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) { 336 error = EINVAL; 337 goto done; 338 } 339 iov++; 340 } 341 #ifdef KTRACE 342 /* 343 * if tracing, save a copy of iovec 344 */ 345 if (KTRPOINT(p, KTR_GENIO)) { 346 ktriov = malloc(iovlen, M_TEMP, M_WAITOK); 347 bcopy(auio.uio_iov, ktriov, iovlen); 348 } 349 #endif 350 cnt = auio.uio_resid; 351 error = (*fp->f_ops->fo_write)(fp, offset, &auio, fp->f_cred); 352 if (error) { 353 if (auio.uio_resid != cnt && (error == ERESTART || 354 error == EINTR || error == EWOULDBLOCK)) 355 error = 0; 356 if (error == EPIPE) 357 ptsignal(p, SIGPIPE, STHREAD); 358 } 359 cnt -= auio.uio_resid; 360 361 fp->f_wxfer++; 362 fp->f_wbytes += cnt; 363 #ifdef KTRACE 364 if (ktriov != NULL) { 365 if (error == 0) 366 ktrgenio(p, fd, UIO_WRITE, ktriov, cnt, error); 367 free(ktriov, M_TEMP); 368 } 369 #endif 370 *retval = cnt; 371 done: 372 if (needfree) 373 free(needfree, M_IOV); 374 out: 375 FRELE(fp); 376 return (error); 377 } 378 379 /* 380 * Ioctl system call 381 */ 382 /* ARGSUSED */ 383 int 384 sys_ioctl(struct proc *p, void *v, register_t *retval) 385 { 386 struct sys_ioctl_args /* { 387 syscallarg(int) fd; 388 syscallarg(u_long) com; 389 syscallarg(void *) data; 390 } */ *uap = v; 391 struct file *fp; 392 struct filedesc *fdp; 393 u_long com; 394 int error; 395 u_int size; 396 caddr_t data, memp; 397 int tmp; 398 #define STK_PARAMS 128 399 char stkbuf[STK_PARAMS]; 400 401 fdp = p->p_fd; 402 if ((fp = fd_getfile(fdp, SCARG(uap, fd))) == NULL) 403 return (EBADF); 404 405 if ((fp->f_flag & (FREAD | FWRITE)) == 0) 406 return (EBADF); 407 408 switch (com = SCARG(uap, com)) { 409 case FIONCLEX: 410 fdp->fd_ofileflags[SCARG(uap, fd)] &= ~UF_EXCLOSE; 411 return (0); 412 case FIOCLEX: 413 fdp->fd_ofileflags[SCARG(uap, fd)] |= UF_EXCLOSE; 414 return (0); 415 } 416 417 /* 418 * Interpret high order word to find amount of data to be 419 * copied to/from the user's address space. 420 */ 421 size = IOCPARM_LEN(com); 422 if (size > IOCPARM_MAX) 423 return (ENOTTY); 424 FREF(fp); 425 memp = NULL; 426 if (size > sizeof (stkbuf)) { 427 memp = (caddr_t)malloc((u_long)size, M_IOCTLOPS, M_WAITOK); 428 data = memp; 429 } else 430 data = stkbuf; 431 if (com&IOC_IN) { 432 if (size) { 433 error = copyin(SCARG(uap, data), data, (u_int)size); 434 if (error) { 435 goto out; 436 } 437 } else 438 *(caddr_t *)data = SCARG(uap, data); 439 } else if ((com&IOC_OUT) && size) 440 /* 441 * Zero the buffer so the user always 442 * gets back something deterministic. 443 */ 444 bzero(data, size); 445 else if (com&IOC_VOID) 446 *(caddr_t *)data = SCARG(uap, data); 447 448 switch (com) { 449 450 case FIONBIO: 451 if ((tmp = *(int *)data) != 0) 452 fp->f_flag |= FNONBLOCK; 453 else 454 fp->f_flag &= ~FNONBLOCK; 455 error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&tmp, p); 456 break; 457 458 case FIOASYNC: 459 if ((tmp = *(int *)data) != 0) 460 fp->f_flag |= FASYNC; 461 else 462 fp->f_flag &= ~FASYNC; 463 error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, (caddr_t)&tmp, p); 464 break; 465 466 case FIOSETOWN: 467 tmp = *(int *)data; 468 if (fp->f_type == DTYPE_SOCKET) { 469 struct socket *so = (struct socket *)fp->f_data; 470 471 so->so_pgid = tmp; 472 so->so_siguid = p->p_cred->p_ruid; 473 so->so_sigeuid = p->p_ucred->cr_uid; 474 error = 0; 475 break; 476 } 477 if (tmp <= 0) { 478 tmp = -tmp; 479 } else { 480 struct proc *p1 = pfind(tmp); 481 if (p1 == 0) { 482 error = ESRCH; 483 break; 484 } 485 tmp = p1->p_pgrp->pg_id; 486 } 487 error = (*fp->f_ops->fo_ioctl) 488 (fp, TIOCSPGRP, (caddr_t)&tmp, p); 489 break; 490 491 case FIOGETOWN: 492 if (fp->f_type == DTYPE_SOCKET) { 493 error = 0; 494 *(int *)data = ((struct socket *)fp->f_data)->so_pgid; 495 break; 496 } 497 error = (*fp->f_ops->fo_ioctl)(fp, TIOCGPGRP, data, p); 498 *(int *)data = -*(int *)data; 499 break; 500 501 default: 502 error = (*fp->f_ops->fo_ioctl)(fp, com, data, p); 503 break; 504 } 505 /* 506 * Copy any data to user, size was 507 * already set and checked above. 508 */ 509 if (error == 0 && (com&IOC_OUT) && size) 510 error = copyout(data, SCARG(uap, data), (u_int)size); 511 out: 512 FRELE(fp); 513 if (memp) 514 free(memp, M_IOCTLOPS); 515 return (error); 516 } 517 518 int selwait, nselcoll; 519 520 /* 521 * Select system call. 522 */ 523 int 524 sys_select(struct proc *p, void *v, register_t *retval) 525 { 526 struct sys_select_args /* { 527 syscallarg(int) nd; 528 syscallarg(fd_set *) in; 529 syscallarg(fd_set *) ou; 530 syscallarg(fd_set *) ex; 531 syscallarg(struct timeval *) tv; 532 } */ *uap = v; 533 fd_mask bits[6]; 534 fd_set *pibits[3], *pobits[3]; 535 struct timeval atv, rtv, ttv; 536 int s, ncoll, error = 0, timo; 537 u_int nd, ni; 538 539 nd = SCARG(uap, nd); 540 if (nd > p->p_fd->fd_nfiles) { 541 /* forgiving; slightly wrong */ 542 nd = p->p_fd->fd_nfiles; 543 } 544 ni = howmany(nd, NFDBITS) * sizeof(fd_mask); 545 if (nd > sizeof(bits[0])) { 546 caddr_t mbits; 547 548 mbits = malloc(ni * 6, M_TEMP, M_WAITOK|M_ZERO); 549 pibits[0] = (fd_set *)&mbits[ni * 0]; 550 pibits[1] = (fd_set *)&mbits[ni * 1]; 551 pibits[2] = (fd_set *)&mbits[ni * 2]; 552 pobits[0] = (fd_set *)&mbits[ni * 3]; 553 pobits[1] = (fd_set *)&mbits[ni * 4]; 554 pobits[2] = (fd_set *)&mbits[ni * 5]; 555 } else { 556 bzero(bits, sizeof(bits)); 557 pibits[0] = (fd_set *)&bits[0]; 558 pibits[1] = (fd_set *)&bits[1]; 559 pibits[2] = (fd_set *)&bits[2]; 560 pobits[0] = (fd_set *)&bits[3]; 561 pobits[1] = (fd_set *)&bits[4]; 562 pobits[2] = (fd_set *)&bits[5]; 563 } 564 565 #define getbits(name, x) \ 566 if (SCARG(uap, name) && (error = copyin(SCARG(uap, name), \ 567 pibits[x], ni))) \ 568 goto done; 569 getbits(in, 0); 570 getbits(ou, 1); 571 getbits(ex, 2); 572 #undef getbits 573 574 if (SCARG(uap, tv)) { 575 error = copyin(SCARG(uap, tv), &atv, sizeof (atv)); 576 if (error) 577 goto done; 578 if (itimerfix(&atv)) { 579 error = EINVAL; 580 goto done; 581 } 582 getmicrouptime(&rtv); 583 timeradd(&atv, &rtv, &atv); 584 } else { 585 atv.tv_sec = 0; 586 atv.tv_usec = 0; 587 } 588 timo = 0; 589 590 retry: 591 ncoll = nselcoll; 592 atomic_setbits_int(&p->p_flag, P_SELECT); 593 error = selscan(p, pibits[0], pobits[0], nd, ni, retval); 594 if (error || *retval) 595 goto done; 596 if (SCARG(uap, tv)) { 597 getmicrouptime(&rtv); 598 if (timercmp(&rtv, &atv, >=)) 599 goto done; 600 ttv = atv; 601 timersub(&ttv, &rtv, &ttv); 602 timo = ttv.tv_sec > 24 * 60 * 60 ? 603 24 * 60 * 60 * hz : tvtohz(&ttv); 604 } 605 s = splhigh(); 606 if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) { 607 splx(s); 608 goto retry; 609 } 610 atomic_clearbits_int(&p->p_flag, P_SELECT); 611 error = tsleep(&selwait, PSOCK | PCATCH, "select", timo); 612 splx(s); 613 if (error == 0) 614 goto retry; 615 done: 616 atomic_clearbits_int(&p->p_flag, P_SELECT); 617 /* select is not restarted after signals... */ 618 if (error == ERESTART) 619 error = EINTR; 620 if (error == EWOULDBLOCK) 621 error = 0; 622 #define putbits(name, x) \ 623 if (SCARG(uap, name) && (error2 = copyout(pobits[x], \ 624 SCARG(uap, name), ni))) \ 625 error = error2; 626 if (error == 0) { 627 int error2; 628 629 putbits(in, 0); 630 putbits(ou, 1); 631 putbits(ex, 2); 632 #undef putbits 633 } 634 635 if (pibits[0] != (fd_set *)&bits[0]) 636 free(pibits[0], M_TEMP); 637 return (error); 638 } 639 640 int 641 selscan(struct proc *p, fd_set *ibits, fd_set *obits, int nfd, int ni, 642 register_t *retval) 643 { 644 caddr_t cibits = (caddr_t)ibits, cobits = (caddr_t)obits; 645 struct filedesc *fdp = p->p_fd; 646 int msk, i, j, fd; 647 fd_mask bits; 648 struct file *fp; 649 int n = 0; 650 static const int flag[3] = { POLLIN, POLLOUT, POLLPRI }; 651 652 for (msk = 0; msk < 3; msk++) { 653 fd_set *pibits = (fd_set *)&cibits[msk*ni]; 654 fd_set *pobits = (fd_set *)&cobits[msk*ni]; 655 656 for (i = 0; i < nfd; i += NFDBITS) { 657 bits = pibits->fds_bits[i/NFDBITS]; 658 while ((j = ffs(bits)) && (fd = i + --j) < nfd) { 659 bits &= ~(1 << j); 660 if ((fp = fd_getfile(fdp, fd)) == NULL) 661 return (EBADF); 662 FREF(fp); 663 if ((*fp->f_ops->fo_poll)(fp, flag[msk], p)) { 664 FD_SET(fd, pobits); 665 n++; 666 } 667 FRELE(fp); 668 } 669 } 670 } 671 *retval = n; 672 return (0); 673 } 674 675 /*ARGSUSED*/ 676 int 677 seltrue(dev_t dev, int events, struct proc *p) 678 { 679 680 return (events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM)); 681 } 682 683 /* 684 * Record a select request. 685 */ 686 void 687 selrecord(struct proc *selector, struct selinfo *sip) 688 { 689 struct proc *p; 690 pid_t mypid; 691 692 mypid = selector->p_pid; 693 if (sip->si_selpid == mypid) 694 return; 695 if (sip->si_selpid && (p = pfind(sip->si_selpid)) && 696 p->p_wchan == (caddr_t)&selwait) 697 sip->si_flags |= SI_COLL; 698 else 699 sip->si_selpid = mypid; 700 } 701 702 /* 703 * Do a wakeup when a selectable event occurs. 704 */ 705 void 706 selwakeup(struct selinfo *sip) 707 { 708 struct proc *p; 709 int s; 710 711 if (sip->si_selpid == 0) 712 return; 713 if (sip->si_flags & SI_COLL) { 714 nselcoll++; 715 sip->si_flags &= ~SI_COLL; 716 wakeup(&selwait); 717 } 718 p = pfind(sip->si_selpid); 719 sip->si_selpid = 0; 720 if (p != NULL) { 721 SCHED_LOCK(s); 722 if (p->p_wchan == (caddr_t)&selwait) { 723 if (p->p_stat == SSLEEP) 724 setrunnable(p); 725 else 726 unsleep(p); 727 } else if (p->p_flag & P_SELECT) 728 atomic_clearbits_int(&p->p_flag, P_SELECT); 729 SCHED_UNLOCK(s); 730 } 731 } 732 733 void 734 pollscan(struct proc *p, struct pollfd *pl, u_int nfd, register_t *retval) 735 { 736 struct filedesc *fdp = p->p_fd; 737 struct file *fp; 738 u_int i; 739 int n = 0; 740 741 for (i = 0; i < nfd; i++, pl++) { 742 /* Check the file descriptor. */ 743 if (pl->fd < 0) { 744 pl->revents = 0; 745 continue; 746 } 747 if ((fp = fd_getfile(fdp, pl->fd)) == NULL) { 748 pl->revents = POLLNVAL; 749 n++; 750 continue; 751 } 752 FREF(fp); 753 pl->revents = (*fp->f_ops->fo_poll)(fp, pl->events, p); 754 FRELE(fp); 755 if (pl->revents != 0) 756 n++; 757 } 758 *retval = n; 759 } 760 761 /* 762 * Only copyout the revents field. 763 */ 764 int 765 pollout(struct pollfd *pl, struct pollfd *upl, u_int nfds) 766 { 767 int error = 0; 768 u_int i = 0; 769 770 while (!error && i++ < nfds) { 771 error = copyout(&pl->revents, &upl->revents, 772 sizeof(upl->revents)); 773 pl++; 774 upl++; 775 } 776 777 return (error); 778 } 779 780 /* 781 * We are using the same mechanism as select only we encode/decode args 782 * differently. 783 */ 784 int 785 sys_poll(struct proc *p, void *v, register_t *retval) 786 { 787 struct sys_poll_args /* { 788 syscallarg(struct pollfd *) fds; 789 syscallarg(u_int) nfds; 790 syscallarg(int) timeout; 791 } */ *uap = v; 792 size_t sz; 793 struct pollfd pfds[4], *pl = pfds; 794 int msec = SCARG(uap, timeout); 795 struct timeval atv, rtv, ttv; 796 int timo, ncoll, i, s, error; 797 extern int nselcoll, selwait; 798 u_int nfds = SCARG(uap, nfds); 799 800 /* Standards say no more than MAX_OPEN; this is possibly better. */ 801 if (nfds > min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfiles)) 802 return (EINVAL); 803 804 sz = sizeof(struct pollfd) * nfds; 805 806 /* optimize for the default case, of a small nfds value */ 807 if (sz > sizeof(pfds)) 808 pl = (struct pollfd *) malloc(sz, M_TEMP, M_WAITOK); 809 810 if ((error = copyin(SCARG(uap, fds), pl, sz)) != 0) 811 goto bad; 812 813 for (i = 0; i < nfds; i++) 814 pl[i].revents = 0; 815 816 if (msec != INFTIM) { 817 atv.tv_sec = msec / 1000; 818 atv.tv_usec = (msec - (atv.tv_sec * 1000)) * 1000; 819 820 if (itimerfix(&atv)) { 821 error = EINVAL; 822 goto done; 823 } 824 getmicrouptime(&rtv); 825 timeradd(&atv, &rtv, &atv); 826 } else { 827 atv.tv_sec = 0; 828 atv.tv_usec = 0; 829 } 830 timo = 0; 831 832 retry: 833 ncoll = nselcoll; 834 atomic_setbits_int(&p->p_flag, P_SELECT); 835 pollscan(p, pl, nfds, retval); 836 if (*retval) 837 goto done; 838 if (msec != INFTIM) { 839 getmicrouptime(&rtv); 840 if (timercmp(&rtv, &atv, >=)) 841 goto done; 842 ttv = atv; 843 timersub(&ttv, &rtv, &ttv); 844 timo = ttv.tv_sec > 24 * 60 * 60 ? 845 24 * 60 * 60 * hz : tvtohz(&ttv); 846 } 847 s = splhigh(); 848 if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) { 849 splx(s); 850 goto retry; 851 } 852 atomic_clearbits_int(&p->p_flag, P_SELECT); 853 error = tsleep(&selwait, PSOCK | PCATCH, "poll", timo); 854 splx(s); 855 if (error == 0) 856 goto retry; 857 858 done: 859 atomic_clearbits_int(&p->p_flag, P_SELECT); 860 /* 861 * NOTE: poll(2) is not restarted after a signal and EWOULDBLOCK is 862 * ignored (since the whole point is to see what would block). 863 */ 864 switch (error) { 865 case ERESTART: 866 error = pollout(pl, SCARG(uap, fds), nfds); 867 if (error == 0) 868 error = EINTR; 869 break; 870 case EWOULDBLOCK: 871 case 0: 872 error = pollout(pl, SCARG(uap, fds), nfds); 873 break; 874 } 875 bad: 876 if (pl != pfds) 877 free(pl, M_TEMP); 878 return (error); 879 } 880