1 /* $OpenBSD: sys_generic.c,v 1.47 2003/12/10 23:10:08 millert Exp $ */ 2 /* $NetBSD: sys_generic.c,v 1.24 1996/03/29 00:25:32 cgd Exp $ */ 3 4 /* 5 * Copyright (c) 1996 Theo de Raadt 6 * Copyright (c) 1982, 1986, 1989, 1993 7 * The Regents of the University of California. All rights reserved. 8 * (c) UNIX System Laboratories, Inc. 9 * All or some portions of this file are derived from material licensed 10 * to the University of California by American Telephone and Telegraph 11 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 12 * the permission of UNIX System Laboratories, Inc. 13 * 14 * Redistribution and use in source and binary forms, with or without 15 * modification, are permitted provided that the following conditions 16 * are met: 17 * 1. Redistributions of source code must retain the above copyright 18 * notice, this list of conditions and the following disclaimer. 19 * 2. Redistributions in binary form must reproduce the above copyright 20 * notice, this list of conditions and the following disclaimer in the 21 * documentation and/or other materials provided with the distribution. 22 * 3. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)sys_generic.c 8.5 (Berkeley) 1/21/94 39 */ 40 41 #include <sys/param.h> 42 #include <sys/systm.h> 43 #include <sys/filedesc.h> 44 #include <sys/ioctl.h> 45 #include <sys/file.h> 46 #include <sys/proc.h> 47 #include <sys/resourcevar.h> 48 #include <sys/socketvar.h> 49 #include <sys/signalvar.h> 50 #include <sys/uio.h> 51 #include <sys/kernel.h> 52 #include <sys/stat.h> 53 #include <sys/malloc.h> 54 #include <sys/poll.h> 55 #ifdef KTRACE 56 #include <sys/ktrace.h> 57 #endif 58 59 #include <sys/mount.h> 60 #include <sys/syscallargs.h> 61 62 #include <uvm/uvm_extern.h> 63 64 int selscan(struct proc *, fd_set *, fd_set *, int, register_t *); 65 int seltrue(dev_t, int, struct proc *); 66 void pollscan(struct proc *, struct pollfd *, u_int, register_t *); 67 68 /* 69 * Read system call. 70 */ 71 /* ARGSUSED */ 72 int 73 sys_read(p, v, retval) 74 struct proc *p; 75 void *v; 76 register_t *retval; 77 { 78 struct sys_read_args /* { 79 syscallarg(int) fd; 80 syscallarg(void *) buf; 81 syscallarg(size_t) nbyte; 82 } */ *uap = v; 83 int fd = SCARG(uap, fd); 84 struct file *fp; 85 struct filedesc *fdp = p->p_fd; 86 87 if ((fp = fd_getfile(fdp, fd)) == NULL) 88 return (EBADF); 89 if ((fp->f_flag & FREAD) == 0) 90 return (EBADF); 91 92 FREF(fp); 93 94 /* dofileread() will FRELE the descriptor for us */ 95 return (dofileread(p, fd, fp, SCARG(uap, buf), SCARG(uap, nbyte), 96 &fp->f_offset, retval)); 97 } 98 99 int 100 dofileread(p, fd, fp, buf, nbyte, offset, retval) 101 struct proc *p; 102 int fd; 103 struct file *fp; 104 void *buf; 105 size_t nbyte; 106 off_t *offset; 107 register_t *retval; 108 { 109 struct uio auio; 110 struct iovec aiov; 111 long cnt, error = 0; 112 #ifdef KTRACE 113 struct iovec ktriov; 114 #endif 115 116 aiov.iov_base = buf; 117 aiov.iov_len = nbyte; 118 auio.uio_iov = &aiov; 119 auio.uio_iovcnt = 1; 120 auio.uio_resid = nbyte; 121 auio.uio_rw = UIO_READ; 122 auio.uio_segflg = UIO_USERSPACE; 123 auio.uio_procp = p; 124 125 /* 126 * Reads return ssize_t because -1 is returned on error. Therefore 127 * we must restrict the length to SSIZE_MAX to avoid garbage return 128 * values. 129 */ 130 if (auio.uio_resid > SSIZE_MAX) { 131 error = EINVAL; 132 goto out; 133 } 134 135 #ifdef KTRACE 136 /* 137 * if tracing, save a copy of iovec 138 */ 139 if (KTRPOINT(p, KTR_GENIO)) 140 ktriov = aiov; 141 #endif 142 cnt = auio.uio_resid; 143 error = (*fp->f_ops->fo_read)(fp, offset, &auio, fp->f_cred); 144 if (error) 145 if (auio.uio_resid != cnt && (error == ERESTART || 146 error == EINTR || error == EWOULDBLOCK)) 147 error = 0; 148 cnt -= auio.uio_resid; 149 #ifdef KTRACE 150 if (KTRPOINT(p, KTR_GENIO) && error == 0) 151 ktrgenio(p, fd, UIO_READ, &ktriov, cnt, error); 152 #endif 153 *retval = cnt; 154 out: 155 FRELE(fp); 156 return (error); 157 } 158 159 /* 160 * Scatter read system call. 161 */ 162 int 163 sys_readv(p, v, retval) 164 struct proc *p; 165 void *v; 166 register_t *retval; 167 { 168 struct sys_readv_args /* { 169 syscallarg(int) fd; 170 syscallarg(const struct iovec *) iovp; 171 syscallarg(int) iovcnt; 172 } */ *uap = v; 173 int fd = SCARG(uap, fd); 174 struct file *fp; 175 struct filedesc *fdp = p->p_fd; 176 177 if ((fp = fd_getfile(fdp, fd)) == NULL) 178 return (EBADF); 179 if ((fp->f_flag & FREAD) == 0) 180 return (EBADF); 181 182 FREF(fp); 183 184 /* dofilereadv() will FRELE the descriptor for us */ 185 return (dofilereadv(p, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt), 186 &fp->f_offset, retval)); 187 } 188 189 int 190 dofilereadv(p, fd, fp, iovp, iovcnt, offset, retval) 191 struct proc *p; 192 int fd; 193 struct file *fp; 194 const struct iovec *iovp; 195 int iovcnt; 196 off_t *offset; 197 register_t *retval; 198 { 199 struct uio auio; 200 struct iovec *iov; 201 struct iovec *needfree; 202 struct iovec aiov[UIO_SMALLIOV]; 203 long i, cnt, error = 0; 204 u_int iovlen; 205 #ifdef KTRACE 206 struct iovec *ktriov = NULL; 207 #endif 208 209 /* note: can't use iovlen until iovcnt is validated */ 210 iovlen = iovcnt * sizeof(struct iovec); 211 if ((u_int)iovcnt > UIO_SMALLIOV) { 212 if ((u_int)iovcnt > IOV_MAX) { 213 error = EINVAL; 214 goto out; 215 } 216 iov = needfree = malloc(iovlen, M_IOV, M_WAITOK); 217 } else if ((u_int)iovcnt > 0) { 218 iov = aiov; 219 needfree = NULL; 220 } else { 221 error = EINVAL; 222 goto out; 223 } 224 225 auio.uio_iov = iov; 226 auio.uio_iovcnt = iovcnt; 227 auio.uio_rw = UIO_READ; 228 auio.uio_segflg = UIO_USERSPACE; 229 auio.uio_procp = p; 230 error = copyin(iovp, iov, iovlen); 231 if (error) 232 goto done; 233 auio.uio_resid = 0; 234 for (i = 0; i < iovcnt; i++) { 235 auio.uio_resid += iov->iov_len; 236 /* 237 * Reads return ssize_t because -1 is returned on error. 238 * Therefore we must restrict the length to SSIZE_MAX to 239 * avoid garbage return values. 240 */ 241 if (iov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) { 242 error = EINVAL; 243 goto done; 244 } 245 iov++; 246 } 247 #ifdef KTRACE 248 /* 249 * if tracing, save a copy of iovec 250 */ 251 if (KTRPOINT(p, KTR_GENIO)) { 252 ktriov = malloc(iovlen, M_TEMP, M_WAITOK); 253 bcopy(auio.uio_iov, ktriov, iovlen); 254 } 255 #endif 256 cnt = auio.uio_resid; 257 error = (*fp->f_ops->fo_read)(fp, offset, &auio, fp->f_cred); 258 if (error) 259 if (auio.uio_resid != cnt && (error == ERESTART || 260 error == EINTR || error == EWOULDBLOCK)) 261 error = 0; 262 cnt -= auio.uio_resid; 263 #ifdef KTRACE 264 if (ktriov != NULL) { 265 if (error == 0) 266 ktrgenio(p, fd, UIO_READ, ktriov, cnt, 267 error); 268 free(ktriov, M_TEMP); 269 } 270 #endif 271 *retval = cnt; 272 done: 273 if (needfree) 274 free(needfree, M_IOV); 275 out: 276 FRELE(fp); 277 return (error); 278 } 279 280 /* 281 * Write system call 282 */ 283 int 284 sys_write(p, v, retval) 285 struct proc *p; 286 void *v; 287 register_t *retval; 288 { 289 struct sys_write_args /* { 290 syscallarg(int) fd; 291 syscallarg(const void *) buf; 292 syscallarg(size_t) nbyte; 293 } */ *uap = v; 294 int fd = SCARG(uap, fd); 295 struct file *fp; 296 struct filedesc *fdp = p->p_fd; 297 298 if ((fp = fd_getfile(fdp, fd)) == NULL) 299 return (EBADF); 300 if ((fp->f_flag & FWRITE) == 0) 301 return (EBADF); 302 303 FREF(fp); 304 305 /* dofilewrite() will FRELE the descriptor for us */ 306 return (dofilewrite(p, fd, fp, SCARG(uap, buf), SCARG(uap, nbyte), 307 &fp->f_offset, retval)); 308 } 309 310 int 311 dofilewrite(p, fd, fp, buf, nbyte, offset, retval) 312 struct proc *p; 313 int fd; 314 struct file *fp; 315 const void *buf; 316 size_t nbyte; 317 off_t *offset; 318 register_t *retval; 319 { 320 struct uio auio; 321 struct iovec aiov; 322 long cnt, error = 0; 323 #ifdef KTRACE 324 struct iovec ktriov; 325 #endif 326 327 aiov.iov_base = (void *)buf; /* XXX kills const */ 328 aiov.iov_len = nbyte; 329 auio.uio_iov = &aiov; 330 auio.uio_iovcnt = 1; 331 auio.uio_resid = nbyte; 332 auio.uio_rw = UIO_WRITE; 333 auio.uio_segflg = UIO_USERSPACE; 334 auio.uio_procp = p; 335 336 /* 337 * Writes return ssize_t because -1 is returned on error. Therefore 338 * we must restrict the length to SSIZE_MAX to avoid garbage return 339 * values. 340 */ 341 if (auio.uio_resid > SSIZE_MAX) { 342 error = EINVAL; 343 goto out; 344 } 345 346 #ifdef KTRACE 347 /* 348 * if tracing, save a copy of iovec 349 */ 350 if (KTRPOINT(p, KTR_GENIO)) 351 ktriov = aiov; 352 #endif 353 cnt = auio.uio_resid; 354 error = (*fp->f_ops->fo_write)(fp, offset, &auio, fp->f_cred); 355 if (error) { 356 if (auio.uio_resid != cnt && (error == ERESTART || 357 error == EINTR || error == EWOULDBLOCK)) 358 error = 0; 359 if (error == EPIPE) 360 psignal(p, SIGPIPE); 361 } 362 cnt -= auio.uio_resid; 363 #ifdef KTRACE 364 if (KTRPOINT(p, KTR_GENIO) && error == 0) 365 ktrgenio(p, fd, UIO_WRITE, &ktriov, cnt, error); 366 #endif 367 *retval = cnt; 368 out: 369 FRELE(fp); 370 return (error); 371 } 372 373 /* 374 * Gather write system call 375 */ 376 int 377 sys_writev(p, v, retval) 378 struct proc *p; 379 void *v; 380 register_t *retval; 381 { 382 struct sys_writev_args /* { 383 syscallarg(int) fd; 384 syscallarg(const struct iovec *) iovp; 385 syscallarg(int) iovcnt; 386 } */ *uap = v; 387 int fd = SCARG(uap, fd); 388 struct file *fp; 389 struct filedesc *fdp = p->p_fd; 390 391 if ((fp = fd_getfile(fdp, fd)) == NULL) 392 return (EBADF); 393 if ((fp->f_flag & FWRITE) == 0) 394 return (EBADF); 395 396 FREF(fp); 397 398 /* dofilewritev() will FRELE the descriptor for us */ 399 return (dofilewritev(p, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt), 400 &fp->f_offset, retval)); 401 } 402 403 int 404 dofilewritev(p, fd, fp, iovp, iovcnt, offset, retval) 405 struct proc *p; 406 int fd; 407 struct file *fp; 408 const struct iovec *iovp; 409 int iovcnt; 410 off_t *offset; 411 register_t *retval; 412 { 413 struct uio auio; 414 struct iovec *iov; 415 struct iovec *needfree; 416 struct iovec aiov[UIO_SMALLIOV]; 417 long i, cnt, error = 0; 418 u_int iovlen; 419 #ifdef KTRACE 420 struct iovec *ktriov = NULL; 421 #endif 422 423 /* note: can't use iovlen until iovcnt is validated */ 424 iovlen = iovcnt * sizeof(struct iovec); 425 if ((u_int)iovcnt > UIO_SMALLIOV) { 426 if ((u_int)iovcnt > IOV_MAX) { 427 error = EINVAL; 428 goto out; 429 } 430 iov = needfree = malloc(iovlen, M_IOV, M_WAITOK); 431 } else if ((u_int)iovcnt > 0) { 432 iov = aiov; 433 needfree = NULL; 434 } else { 435 error = EINVAL; 436 goto out; 437 } 438 439 auio.uio_iov = iov; 440 auio.uio_iovcnt = iovcnt; 441 auio.uio_rw = UIO_WRITE; 442 auio.uio_segflg = UIO_USERSPACE; 443 auio.uio_procp = p; 444 error = copyin(iovp, iov, iovlen); 445 if (error) 446 goto done; 447 auio.uio_resid = 0; 448 for (i = 0; i < iovcnt; i++) { 449 auio.uio_resid += iov->iov_len; 450 /* 451 * Writes return ssize_t because -1 is returned on error. 452 * Therefore we must restrict the length to SSIZE_MAX to 453 * avoid garbage return values. 454 */ 455 if (iov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) { 456 error = EINVAL; 457 goto done; 458 } 459 iov++; 460 } 461 #ifdef KTRACE 462 /* 463 * if tracing, save a copy of iovec 464 */ 465 if (KTRPOINT(p, KTR_GENIO)) { 466 ktriov = malloc(iovlen, M_TEMP, M_WAITOK); 467 bcopy(auio.uio_iov, ktriov, iovlen); 468 } 469 #endif 470 cnt = auio.uio_resid; 471 error = (*fp->f_ops->fo_write)(fp, offset, &auio, fp->f_cred); 472 if (error) { 473 if (auio.uio_resid != cnt && (error == ERESTART || 474 error == EINTR || error == EWOULDBLOCK)) 475 error = 0; 476 if (error == EPIPE) 477 psignal(p, SIGPIPE); 478 } 479 cnt -= auio.uio_resid; 480 #ifdef KTRACE 481 if (ktriov != NULL) { 482 if (error == 0) 483 ktrgenio(p, fd, UIO_WRITE, ktriov, cnt, 484 error); 485 free(ktriov, M_TEMP); 486 } 487 #endif 488 *retval = cnt; 489 done: 490 if (needfree) 491 free(needfree, M_IOV); 492 out: 493 FRELE(fp); 494 return (error); 495 } 496 497 /* 498 * Ioctl system call 499 */ 500 /* ARGSUSED */ 501 int 502 sys_ioctl(p, v, retval) 503 struct proc *p; 504 void *v; 505 register_t *retval; 506 { 507 struct sys_ioctl_args /* { 508 syscallarg(int) fd; 509 syscallarg(u_long) com; 510 syscallarg(void *) data; 511 } */ *uap = v; 512 struct file *fp; 513 struct filedesc *fdp; 514 u_long com; 515 int error; 516 u_int size; 517 caddr_t data, memp; 518 int tmp; 519 #define STK_PARAMS 128 520 char stkbuf[STK_PARAMS]; 521 522 fdp = p->p_fd; 523 if ((fp = fd_getfile(fdp, SCARG(uap, fd))) == NULL) 524 return (EBADF); 525 526 if ((fp->f_flag & (FREAD | FWRITE)) == 0) 527 return (EBADF); 528 529 switch (com = SCARG(uap, com)) { 530 case FIONCLEX: 531 fdp->fd_ofileflags[SCARG(uap, fd)] &= ~UF_EXCLOSE; 532 return (0); 533 case FIOCLEX: 534 fdp->fd_ofileflags[SCARG(uap, fd)] |= UF_EXCLOSE; 535 return (0); 536 } 537 538 /* 539 * Interpret high order word to find amount of data to be 540 * copied to/from the user's address space. 541 */ 542 size = IOCPARM_LEN(com); 543 if (size > IOCPARM_MAX) 544 return (ENOTTY); 545 FREF(fp); 546 memp = NULL; 547 if (size > sizeof (stkbuf)) { 548 memp = (caddr_t)malloc((u_long)size, M_IOCTLOPS, M_WAITOK); 549 data = memp; 550 } else 551 data = stkbuf; 552 if (com&IOC_IN) { 553 if (size) { 554 error = copyin(SCARG(uap, data), data, (u_int)size); 555 if (error) { 556 goto out; 557 } 558 } else 559 *(caddr_t *)data = SCARG(uap, data); 560 } else if ((com&IOC_OUT) && size) 561 /* 562 * Zero the buffer so the user always 563 * gets back something deterministic. 564 */ 565 bzero(data, size); 566 else if (com&IOC_VOID) 567 *(caddr_t *)data = SCARG(uap, data); 568 569 switch (com) { 570 571 case FIONBIO: 572 if ((tmp = *(int *)data) != 0) 573 fp->f_flag |= FNONBLOCK; 574 else 575 fp->f_flag &= ~FNONBLOCK; 576 error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&tmp, p); 577 break; 578 579 case FIOASYNC: 580 if ((tmp = *(int *)data) != 0) 581 fp->f_flag |= FASYNC; 582 else 583 fp->f_flag &= ~FASYNC; 584 error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, (caddr_t)&tmp, p); 585 break; 586 587 case FIOSETOWN: 588 tmp = *(int *)data; 589 if (fp->f_type == DTYPE_SOCKET) { 590 struct socket *so = (struct socket *)fp->f_data; 591 592 so->so_pgid = tmp; 593 so->so_siguid = p->p_cred->p_ruid; 594 so->so_sigeuid = p->p_ucred->cr_uid; 595 error = 0; 596 break; 597 } 598 if (tmp <= 0) { 599 tmp = -tmp; 600 } else { 601 struct proc *p1 = pfind(tmp); 602 if (p1 == 0) { 603 error = ESRCH; 604 break; 605 } 606 tmp = p1->p_pgrp->pg_id; 607 } 608 error = (*fp->f_ops->fo_ioctl) 609 (fp, TIOCSPGRP, (caddr_t)&tmp, p); 610 break; 611 612 case FIOGETOWN: 613 if (fp->f_type == DTYPE_SOCKET) { 614 error = 0; 615 *(int *)data = ((struct socket *)fp->f_data)->so_pgid; 616 break; 617 } 618 error = (*fp->f_ops->fo_ioctl)(fp, TIOCGPGRP, data, p); 619 *(int *)data = -*(int *)data; 620 break; 621 622 default: 623 error = (*fp->f_ops->fo_ioctl)(fp, com, data, p); 624 /* 625 * Copy any data to user, size was 626 * already set and checked above. 627 */ 628 if (error == 0 && (com&IOC_OUT) && size) 629 error = copyout(data, SCARG(uap, data), (u_int)size); 630 break; 631 } 632 out: 633 FRELE(fp); 634 if (memp) 635 free(memp, M_IOCTLOPS); 636 return (error); 637 } 638 639 int selwait, nselcoll; 640 641 /* 642 * Select system call. 643 */ 644 int 645 sys_select(struct proc *p, void *v, register_t *retval) 646 { 647 struct sys_select_args /* { 648 syscallarg(int) nd; 649 syscallarg(fd_set *) in; 650 syscallarg(fd_set *) ou; 651 syscallarg(fd_set *) ex; 652 syscallarg(struct timeval *) tv; 653 } */ *uap = v; 654 fd_set bits[6], *pibits[3], *pobits[3]; 655 struct timeval atv; 656 int s, ncoll, error = 0, timo; 657 u_int nd, ni; 658 659 nd = SCARG(uap, nd); 660 if (nd > p->p_fd->fd_nfiles) { 661 /* forgiving; slightly wrong */ 662 nd = p->p_fd->fd_nfiles; 663 } 664 ni = howmany(nd, NFDBITS) * sizeof(fd_mask); 665 if (nd > FD_SETSIZE) { 666 caddr_t mbits; 667 668 mbits = malloc(ni * 6, M_TEMP, M_WAITOK); 669 bzero(mbits, ni * 6); 670 pibits[0] = (fd_set *)&mbits[ni * 0]; 671 pibits[1] = (fd_set *)&mbits[ni * 1]; 672 pibits[2] = (fd_set *)&mbits[ni * 2]; 673 pobits[0] = (fd_set *)&mbits[ni * 3]; 674 pobits[1] = (fd_set *)&mbits[ni * 4]; 675 pobits[2] = (fd_set *)&mbits[ni * 5]; 676 } else { 677 bzero(bits, sizeof(bits)); 678 pibits[0] = &bits[0]; 679 pibits[1] = &bits[1]; 680 pibits[2] = &bits[2]; 681 pobits[0] = &bits[3]; 682 pobits[1] = &bits[4]; 683 pobits[2] = &bits[5]; 684 } 685 686 #define getbits(name, x) \ 687 if (SCARG(uap, name) && (error = copyin(SCARG(uap, name), \ 688 pibits[x], ni))) \ 689 goto done; 690 getbits(in, 0); 691 getbits(ou, 1); 692 getbits(ex, 2); 693 #undef getbits 694 695 if (SCARG(uap, tv)) { 696 error = copyin(SCARG(uap, tv), &atv, sizeof (atv)); 697 if (error) 698 goto done; 699 if (itimerfix(&atv)) { 700 error = EINVAL; 701 goto done; 702 } 703 s = splclock(); 704 timeradd(&atv, &time, &atv); 705 splx(s); 706 } else 707 timo = 0; 708 retry: 709 ncoll = nselcoll; 710 p->p_flag |= P_SELECT; 711 error = selscan(p, pibits[0], pobits[0], nd, retval); 712 if (error || *retval) 713 goto done; 714 if (SCARG(uap, tv)) { 715 /* 716 * We have to recalculate the timeout on every retry. 717 */ 718 timo = hzto(&atv); 719 if (timo <= 0) 720 goto done; 721 } 722 s = splhigh(); 723 if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) { 724 splx(s); 725 goto retry; 726 } 727 p->p_flag &= ~P_SELECT; 728 error = tsleep(&selwait, PSOCK | PCATCH, "select", timo); 729 splx(s); 730 if (error == 0) 731 goto retry; 732 done: 733 p->p_flag &= ~P_SELECT; 734 /* select is not restarted after signals... */ 735 if (error == ERESTART) 736 error = EINTR; 737 if (error == EWOULDBLOCK) 738 error = 0; 739 #define putbits(name, x) \ 740 if (SCARG(uap, name) && (error2 = copyout(pobits[x], \ 741 SCARG(uap, name), ni))) \ 742 error = error2; 743 if (error == 0) { 744 int error2; 745 746 putbits(in, 0); 747 putbits(ou, 1); 748 putbits(ex, 2); 749 #undef putbits 750 } 751 752 if (pibits[0] != &bits[0]) 753 free(pibits[0], M_TEMP); 754 return (error); 755 } 756 757 int 758 selscan(p, ibits, obits, nfd, retval) 759 struct proc *p; 760 fd_set *ibits, *obits; 761 int nfd; 762 register_t *retval; 763 { 764 caddr_t cibits = (caddr_t)ibits, cobits = (caddr_t)obits; 765 register struct filedesc *fdp = p->p_fd; 766 register int msk, i, j, fd; 767 register fd_mask bits; 768 struct file *fp; 769 int ni, n = 0; 770 static const int flag[3] = { POLLIN, POLLOUT, POLLPRI }; 771 772 /* 773 * if nfd > FD_SETSIZE then the fd_set's contain nfd bits (rounded 774 * up to the next byte) otherwise the fd_set's are normal sized. 775 */ 776 ni = sizeof(fd_set); 777 if (nfd > FD_SETSIZE) 778 ni = howmany(nfd, NFDBITS) * sizeof(fd_mask); 779 780 for (msk = 0; msk < 3; msk++) { 781 fd_set *pibits = (fd_set *)&cibits[msk*ni]; 782 fd_set *pobits = (fd_set *)&cobits[msk*ni]; 783 784 for (i = 0; i < nfd; i += NFDBITS) { 785 bits = pibits->fds_bits[i/NFDBITS]; 786 while ((j = ffs(bits)) && (fd = i + --j) < nfd) { 787 bits &= ~(1 << j); 788 if ((fp = fd_getfile(fdp, fd)) == NULL) 789 return (EBADF); 790 FREF(fp); 791 if ((*fp->f_ops->fo_poll)(fp, flag[msk], p)) { 792 FD_SET(fd, pobits); 793 n++; 794 } 795 FRELE(fp); 796 } 797 } 798 } 799 *retval = n; 800 return (0); 801 } 802 803 /*ARGSUSED*/ 804 int 805 seltrue(dev, events, p) 806 dev_t dev; 807 int events; 808 struct proc *p; 809 { 810 811 return (events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM)); 812 } 813 814 /* 815 * Record a select request. 816 */ 817 void 818 selrecord(selector, sip) 819 struct proc *selector; 820 struct selinfo *sip; 821 { 822 struct proc *p; 823 pid_t mypid; 824 825 mypid = selector->p_pid; 826 if (sip->si_selpid == mypid) 827 return; 828 if (sip->si_selpid && (p = pfind(sip->si_selpid)) && 829 p->p_wchan == (caddr_t)&selwait) 830 sip->si_flags |= SI_COLL; 831 else 832 sip->si_selpid = mypid; 833 } 834 835 /* 836 * Do a wakeup when a selectable event occurs. 837 */ 838 void 839 selwakeup(sip) 840 register struct selinfo *sip; 841 { 842 register struct proc *p; 843 int s; 844 845 if (sip->si_selpid == 0) 846 return; 847 if (sip->si_flags & SI_COLL) { 848 nselcoll++; 849 sip->si_flags &= ~SI_COLL; 850 wakeup(&selwait); 851 } 852 p = pfind(sip->si_selpid); 853 sip->si_selpid = 0; 854 if (p != NULL) { 855 s = splhigh(); 856 if (p->p_wchan == (caddr_t)&selwait) { 857 if (p->p_stat == SSLEEP) 858 setrunnable(p); 859 else 860 unsleep(p); 861 } else if (p->p_flag & P_SELECT) 862 p->p_flag &= ~P_SELECT; 863 splx(s); 864 } 865 } 866 867 void 868 pollscan(p, pl, nfd, retval) 869 struct proc *p; 870 struct pollfd *pl; 871 u_int nfd; 872 register_t *retval; 873 { 874 struct filedesc *fdp = p->p_fd; 875 struct file *fp; 876 u_int i; 877 int n = 0; 878 879 for (i = 0; i < nfd; i++, pl++) { 880 /* Check the file descriptor. */ 881 if (pl->fd < 0) { 882 pl->revents = 0; 883 continue; 884 } 885 if ((fp = fd_getfile(fdp, pl->fd)) == NULL) { 886 pl->revents = POLLNVAL; 887 n++; 888 continue; 889 } 890 FREF(fp); 891 pl->revents = (*fp->f_ops->fo_poll)(fp, pl->events, p); 892 FRELE(fp); 893 if (pl->revents != 0) 894 n++; 895 } 896 *retval = n; 897 } 898 899 /* 900 * We are using the same mechanism as select only we encode/decode args 901 * differently. 902 */ 903 int 904 sys_poll(struct proc *p, void *v, register_t *retval) 905 { 906 struct sys_poll_args /* { 907 syscallarg(struct pollfd *) fds; 908 syscallarg(u_int) nfds; 909 syscallarg(int) timeout; 910 } */ *uap = v; 911 size_t sz; 912 struct pollfd pfds[4], *pl = pfds; 913 int msec = SCARG(uap, timeout); 914 struct timeval atv; 915 int timo, ncoll, i, s, error; 916 extern int nselcoll, selwait; 917 u_int nfds = SCARG(uap, nfds); 918 919 /* Standards say no more than MAX_OPEN; this is possibly better. */ 920 if (nfds > min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfiles)) 921 return (EINVAL); 922 923 sz = sizeof(struct pollfd) * nfds; 924 925 /* optimize for the default case, of a small nfds value */ 926 if (sz > sizeof(pfds)) 927 pl = (struct pollfd *) malloc(sz, M_TEMP, M_WAITOK); 928 929 if ((error = copyin(SCARG(uap, fds), pl, sz)) != 0) 930 goto bad; 931 932 for (i = 0; i < nfds; i++) 933 pl[i].revents = 0; 934 935 if (msec != INFTIM) { 936 atv.tv_sec = msec / 1000; 937 atv.tv_usec = (msec - (atv.tv_sec * 1000)) * 1000; 938 939 if (itimerfix(&atv)) { 940 error = EINVAL; 941 goto done; 942 } 943 s = splclock(); 944 timeradd(&atv, &time, &atv); 945 splx(s); 946 } else 947 timo = 0; 948 949 retry: 950 ncoll = nselcoll; 951 p->p_flag |= P_SELECT; 952 pollscan(p, pl, nfds, retval); 953 if (*retval) 954 goto done; 955 if (msec != INFTIM) { 956 /* 957 * We have to recalculate the timeout on every retry. 958 */ 959 timo = hzto(&atv); 960 if (timo <= 0) 961 goto done; 962 } 963 s = splhigh(); 964 if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) { 965 splx(s); 966 goto retry; 967 } 968 p->p_flag &= ~P_SELECT; 969 error = tsleep(&selwait, PSOCK | PCATCH, "poll", timo); 970 splx(s); 971 if (error == 0) 972 goto retry; 973 974 done: 975 p->p_flag &= ~P_SELECT; 976 /* 977 * NOTE: poll(2) is not restarted after a signal and EWOULDBLOCK is 978 * ignored (since the whole point is to see what would block). 979 */ 980 switch (error) { 981 case ERESTART: 982 error = EINTR; 983 break; 984 case EWOULDBLOCK: 985 case 0: 986 error = copyout(pl, SCARG(uap, fds), sz); 987 break; 988 } 989 bad: 990 if (pl != pfds) 991 free(pl, M_TEMP); 992 return (error); 993 } 994