1 /* $OpenBSD: sys_generic.c,v 1.49 2004/06/24 19:35:24 tholo Exp $ */ 2 /* $NetBSD: sys_generic.c,v 1.24 1996/03/29 00:25:32 cgd Exp $ */ 3 4 /* 5 * Copyright (c) 1996 Theo de Raadt 6 * Copyright (c) 1982, 1986, 1989, 1993 7 * The Regents of the University of California. All rights reserved. 8 * (c) UNIX System Laboratories, Inc. 9 * All or some portions of this file are derived from material licensed 10 * to the University of California by American Telephone and Telegraph 11 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 12 * the permission of UNIX System Laboratories, Inc. 13 * 14 * Redistribution and use in source and binary forms, with or without 15 * modification, are permitted provided that the following conditions 16 * are met: 17 * 1. Redistributions of source code must retain the above copyright 18 * notice, this list of conditions and the following disclaimer. 19 * 2. Redistributions in binary form must reproduce the above copyright 20 * notice, this list of conditions and the following disclaimer in the 21 * documentation and/or other materials provided with the distribution. 22 * 3. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)sys_generic.c 8.5 (Berkeley) 1/21/94 39 */ 40 41 #include <sys/param.h> 42 #include <sys/systm.h> 43 #include <sys/filedesc.h> 44 #include <sys/ioctl.h> 45 #include <sys/file.h> 46 #include <sys/proc.h> 47 #include <sys/resourcevar.h> 48 #include <sys/socketvar.h> 49 #include <sys/signalvar.h> 50 #include <sys/uio.h> 51 #include <sys/kernel.h> 52 #include <sys/stat.h> 53 #include <sys/malloc.h> 54 #include <sys/poll.h> 55 #ifdef KTRACE 56 #include <sys/ktrace.h> 57 #endif 58 #include <sys/sched.h> 59 60 #include <sys/mount.h> 61 #include <sys/syscallargs.h> 62 63 #include <uvm/uvm_extern.h> 64 65 int selscan(struct proc *, fd_set *, fd_set *, int, register_t *); 66 int seltrue(dev_t, int, struct proc *); 67 void pollscan(struct proc *, struct pollfd *, u_int, register_t *); 68 69 /* 70 * Read system call. 71 */ 72 /* ARGSUSED */ 73 int 74 sys_read(p, v, retval) 75 struct proc *p; 76 void *v; 77 register_t *retval; 78 { 79 struct sys_read_args /* { 80 syscallarg(int) fd; 81 syscallarg(void *) buf; 82 syscallarg(size_t) nbyte; 83 } */ *uap = v; 84 int fd = SCARG(uap, fd); 85 struct file *fp; 86 struct filedesc *fdp = p->p_fd; 87 88 if ((fp = fd_getfile(fdp, fd)) == NULL) 89 return (EBADF); 90 if ((fp->f_flag & FREAD) == 0) 91 return (EBADF); 92 93 FREF(fp); 94 95 /* dofileread() will FRELE the descriptor for us */ 96 return (dofileread(p, fd, fp, SCARG(uap, buf), SCARG(uap, nbyte), 97 &fp->f_offset, retval)); 98 } 99 100 int 101 dofileread(p, fd, fp, buf, nbyte, offset, retval) 102 struct proc *p; 103 int fd; 104 struct file *fp; 105 void *buf; 106 size_t nbyte; 107 off_t *offset; 108 register_t *retval; 109 { 110 struct uio auio; 111 struct iovec aiov; 112 long cnt, error = 0; 113 #ifdef KTRACE 114 struct iovec ktriov; 115 #endif 116 117 aiov.iov_base = buf; 118 aiov.iov_len = nbyte; 119 auio.uio_iov = &aiov; 120 auio.uio_iovcnt = 1; 121 auio.uio_resid = nbyte; 122 auio.uio_rw = UIO_READ; 123 auio.uio_segflg = UIO_USERSPACE; 124 auio.uio_procp = p; 125 126 /* 127 * Reads return ssize_t because -1 is returned on error. Therefore 128 * we must restrict the length to SSIZE_MAX to avoid garbage return 129 * values. 130 */ 131 if (auio.uio_resid > SSIZE_MAX) { 132 error = EINVAL; 133 goto out; 134 } 135 136 #ifdef KTRACE 137 /* 138 * if tracing, save a copy of iovec 139 */ 140 if (KTRPOINT(p, KTR_GENIO)) 141 ktriov = aiov; 142 #endif 143 cnt = auio.uio_resid; 144 error = (*fp->f_ops->fo_read)(fp, offset, &auio, fp->f_cred); 145 if (error) 146 if (auio.uio_resid != cnt && (error == ERESTART || 147 error == EINTR || error == EWOULDBLOCK)) 148 error = 0; 149 cnt -= auio.uio_resid; 150 #ifdef KTRACE 151 if (KTRPOINT(p, KTR_GENIO) && error == 0) 152 ktrgenio(p, fd, UIO_READ, &ktriov, cnt, error); 153 #endif 154 *retval = cnt; 155 out: 156 FRELE(fp); 157 return (error); 158 } 159 160 /* 161 * Scatter read system call. 162 */ 163 int 164 sys_readv(p, v, retval) 165 struct proc *p; 166 void *v; 167 register_t *retval; 168 { 169 struct sys_readv_args /* { 170 syscallarg(int) fd; 171 syscallarg(const struct iovec *) iovp; 172 syscallarg(int) iovcnt; 173 } */ *uap = v; 174 int fd = SCARG(uap, fd); 175 struct file *fp; 176 struct filedesc *fdp = p->p_fd; 177 178 if ((fp = fd_getfile(fdp, fd)) == NULL) 179 return (EBADF); 180 if ((fp->f_flag & FREAD) == 0) 181 return (EBADF); 182 183 FREF(fp); 184 185 /* dofilereadv() will FRELE the descriptor for us */ 186 return (dofilereadv(p, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt), 187 &fp->f_offset, retval)); 188 } 189 190 int 191 dofilereadv(p, fd, fp, iovp, iovcnt, offset, retval) 192 struct proc *p; 193 int fd; 194 struct file *fp; 195 const struct iovec *iovp; 196 int iovcnt; 197 off_t *offset; 198 register_t *retval; 199 { 200 struct uio auio; 201 struct iovec *iov; 202 struct iovec *needfree; 203 struct iovec aiov[UIO_SMALLIOV]; 204 long i, cnt, error = 0; 205 u_int iovlen; 206 #ifdef KTRACE 207 struct iovec *ktriov = NULL; 208 #endif 209 210 /* note: can't use iovlen until iovcnt is validated */ 211 iovlen = iovcnt * sizeof(struct iovec); 212 if ((u_int)iovcnt > UIO_SMALLIOV) { 213 if ((u_int)iovcnt > IOV_MAX) { 214 error = EINVAL; 215 goto out; 216 } 217 iov = needfree = malloc(iovlen, M_IOV, M_WAITOK); 218 } else if ((u_int)iovcnt > 0) { 219 iov = aiov; 220 needfree = NULL; 221 } else { 222 error = EINVAL; 223 goto out; 224 } 225 226 auio.uio_iov = iov; 227 auio.uio_iovcnt = iovcnt; 228 auio.uio_rw = UIO_READ; 229 auio.uio_segflg = UIO_USERSPACE; 230 auio.uio_procp = p; 231 error = copyin(iovp, iov, iovlen); 232 if (error) 233 goto done; 234 auio.uio_resid = 0; 235 for (i = 0; i < iovcnt; i++) { 236 auio.uio_resid += iov->iov_len; 237 /* 238 * Reads return ssize_t because -1 is returned on error. 239 * Therefore we must restrict the length to SSIZE_MAX to 240 * avoid garbage return values. 241 */ 242 if (iov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) { 243 error = EINVAL; 244 goto done; 245 } 246 iov++; 247 } 248 #ifdef KTRACE 249 /* 250 * if tracing, save a copy of iovec 251 */ 252 if (KTRPOINT(p, KTR_GENIO)) { 253 ktriov = malloc(iovlen, M_TEMP, M_WAITOK); 254 bcopy(auio.uio_iov, ktriov, iovlen); 255 } 256 #endif 257 cnt = auio.uio_resid; 258 error = (*fp->f_ops->fo_read)(fp, offset, &auio, fp->f_cred); 259 if (error) 260 if (auio.uio_resid != cnt && (error == ERESTART || 261 error == EINTR || error == EWOULDBLOCK)) 262 error = 0; 263 cnt -= auio.uio_resid; 264 #ifdef KTRACE 265 if (ktriov != NULL) { 266 if (error == 0) 267 ktrgenio(p, fd, UIO_READ, ktriov, cnt, 268 error); 269 free(ktriov, M_TEMP); 270 } 271 #endif 272 *retval = cnt; 273 done: 274 if (needfree) 275 free(needfree, M_IOV); 276 out: 277 FRELE(fp); 278 return (error); 279 } 280 281 /* 282 * Write system call 283 */ 284 int 285 sys_write(p, v, retval) 286 struct proc *p; 287 void *v; 288 register_t *retval; 289 { 290 struct sys_write_args /* { 291 syscallarg(int) fd; 292 syscallarg(const void *) buf; 293 syscallarg(size_t) nbyte; 294 } */ *uap = v; 295 int fd = SCARG(uap, fd); 296 struct file *fp; 297 struct filedesc *fdp = p->p_fd; 298 299 if ((fp = fd_getfile(fdp, fd)) == NULL) 300 return (EBADF); 301 if ((fp->f_flag & FWRITE) == 0) 302 return (EBADF); 303 304 FREF(fp); 305 306 /* dofilewrite() will FRELE the descriptor for us */ 307 return (dofilewrite(p, fd, fp, SCARG(uap, buf), SCARG(uap, nbyte), 308 &fp->f_offset, retval)); 309 } 310 311 int 312 dofilewrite(p, fd, fp, buf, nbyte, offset, retval) 313 struct proc *p; 314 int fd; 315 struct file *fp; 316 const void *buf; 317 size_t nbyte; 318 off_t *offset; 319 register_t *retval; 320 { 321 struct uio auio; 322 struct iovec aiov; 323 long cnt, error = 0; 324 #ifdef KTRACE 325 struct iovec ktriov; 326 #endif 327 328 aiov.iov_base = (void *)buf; /* XXX kills const */ 329 aiov.iov_len = nbyte; 330 auio.uio_iov = &aiov; 331 auio.uio_iovcnt = 1; 332 auio.uio_resid = nbyte; 333 auio.uio_rw = UIO_WRITE; 334 auio.uio_segflg = UIO_USERSPACE; 335 auio.uio_procp = p; 336 337 /* 338 * Writes return ssize_t because -1 is returned on error. Therefore 339 * we must restrict the length to SSIZE_MAX to avoid garbage return 340 * values. 341 */ 342 if (auio.uio_resid > SSIZE_MAX) { 343 error = EINVAL; 344 goto out; 345 } 346 347 #ifdef KTRACE 348 /* 349 * if tracing, save a copy of iovec 350 */ 351 if (KTRPOINT(p, KTR_GENIO)) 352 ktriov = aiov; 353 #endif 354 cnt = auio.uio_resid; 355 error = (*fp->f_ops->fo_write)(fp, offset, &auio, fp->f_cred); 356 if (error) { 357 if (auio.uio_resid != cnt && (error == ERESTART || 358 error == EINTR || error == EWOULDBLOCK)) 359 error = 0; 360 if (error == EPIPE) 361 psignal(p, SIGPIPE); 362 } 363 cnt -= auio.uio_resid; 364 #ifdef KTRACE 365 if (KTRPOINT(p, KTR_GENIO) && error == 0) 366 ktrgenio(p, fd, UIO_WRITE, &ktriov, cnt, error); 367 #endif 368 *retval = cnt; 369 out: 370 FRELE(fp); 371 return (error); 372 } 373 374 /* 375 * Gather write system call 376 */ 377 int 378 sys_writev(p, v, retval) 379 struct proc *p; 380 void *v; 381 register_t *retval; 382 { 383 struct sys_writev_args /* { 384 syscallarg(int) fd; 385 syscallarg(const struct iovec *) iovp; 386 syscallarg(int) iovcnt; 387 } */ *uap = v; 388 int fd = SCARG(uap, fd); 389 struct file *fp; 390 struct filedesc *fdp = p->p_fd; 391 392 if ((fp = fd_getfile(fdp, fd)) == NULL) 393 return (EBADF); 394 if ((fp->f_flag & FWRITE) == 0) 395 return (EBADF); 396 397 FREF(fp); 398 399 /* dofilewritev() will FRELE the descriptor for us */ 400 return (dofilewritev(p, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt), 401 &fp->f_offset, retval)); 402 } 403 404 int 405 dofilewritev(p, fd, fp, iovp, iovcnt, offset, retval) 406 struct proc *p; 407 int fd; 408 struct file *fp; 409 const struct iovec *iovp; 410 int iovcnt; 411 off_t *offset; 412 register_t *retval; 413 { 414 struct uio auio; 415 struct iovec *iov; 416 struct iovec *needfree; 417 struct iovec aiov[UIO_SMALLIOV]; 418 long i, cnt, error = 0; 419 u_int iovlen; 420 #ifdef KTRACE 421 struct iovec *ktriov = NULL; 422 #endif 423 424 /* note: can't use iovlen until iovcnt is validated */ 425 iovlen = iovcnt * sizeof(struct iovec); 426 if ((u_int)iovcnt > UIO_SMALLIOV) { 427 if ((u_int)iovcnt > IOV_MAX) { 428 error = EINVAL; 429 goto out; 430 } 431 iov = needfree = malloc(iovlen, M_IOV, M_WAITOK); 432 } else if ((u_int)iovcnt > 0) { 433 iov = aiov; 434 needfree = NULL; 435 } else { 436 error = EINVAL; 437 goto out; 438 } 439 440 auio.uio_iov = iov; 441 auio.uio_iovcnt = iovcnt; 442 auio.uio_rw = UIO_WRITE; 443 auio.uio_segflg = UIO_USERSPACE; 444 auio.uio_procp = p; 445 error = copyin(iovp, iov, iovlen); 446 if (error) 447 goto done; 448 auio.uio_resid = 0; 449 for (i = 0; i < iovcnt; i++) { 450 auio.uio_resid += iov->iov_len; 451 /* 452 * Writes return ssize_t because -1 is returned on error. 453 * Therefore we must restrict the length to SSIZE_MAX to 454 * avoid garbage return values. 455 */ 456 if (iov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) { 457 error = EINVAL; 458 goto done; 459 } 460 iov++; 461 } 462 #ifdef KTRACE 463 /* 464 * if tracing, save a copy of iovec 465 */ 466 if (KTRPOINT(p, KTR_GENIO)) { 467 ktriov = malloc(iovlen, M_TEMP, M_WAITOK); 468 bcopy(auio.uio_iov, ktriov, iovlen); 469 } 470 #endif 471 cnt = auio.uio_resid; 472 error = (*fp->f_ops->fo_write)(fp, offset, &auio, fp->f_cred); 473 if (error) { 474 if (auio.uio_resid != cnt && (error == ERESTART || 475 error == EINTR || error == EWOULDBLOCK)) 476 error = 0; 477 if (error == EPIPE) 478 psignal(p, SIGPIPE); 479 } 480 cnt -= auio.uio_resid; 481 #ifdef KTRACE 482 if (ktriov != NULL) { 483 if (error == 0) 484 ktrgenio(p, fd, UIO_WRITE, ktriov, cnt, 485 error); 486 free(ktriov, M_TEMP); 487 } 488 #endif 489 *retval = cnt; 490 done: 491 if (needfree) 492 free(needfree, M_IOV); 493 out: 494 FRELE(fp); 495 return (error); 496 } 497 498 /* 499 * Ioctl system call 500 */ 501 /* ARGSUSED */ 502 int 503 sys_ioctl(p, v, retval) 504 struct proc *p; 505 void *v; 506 register_t *retval; 507 { 508 struct sys_ioctl_args /* { 509 syscallarg(int) fd; 510 syscallarg(u_long) com; 511 syscallarg(void *) data; 512 } */ *uap = v; 513 struct file *fp; 514 struct filedesc *fdp; 515 u_long com; 516 int error; 517 u_int size; 518 caddr_t data, memp; 519 int tmp; 520 #define STK_PARAMS 128 521 char stkbuf[STK_PARAMS]; 522 523 fdp = p->p_fd; 524 if ((fp = fd_getfile(fdp, SCARG(uap, fd))) == NULL) 525 return (EBADF); 526 527 if ((fp->f_flag & (FREAD | FWRITE)) == 0) 528 return (EBADF); 529 530 switch (com = SCARG(uap, com)) { 531 case FIONCLEX: 532 fdp->fd_ofileflags[SCARG(uap, fd)] &= ~UF_EXCLOSE; 533 return (0); 534 case FIOCLEX: 535 fdp->fd_ofileflags[SCARG(uap, fd)] |= UF_EXCLOSE; 536 return (0); 537 } 538 539 /* 540 * Interpret high order word to find amount of data to be 541 * copied to/from the user's address space. 542 */ 543 size = IOCPARM_LEN(com); 544 if (size > IOCPARM_MAX) 545 return (ENOTTY); 546 FREF(fp); 547 memp = NULL; 548 if (size > sizeof (stkbuf)) { 549 memp = (caddr_t)malloc((u_long)size, M_IOCTLOPS, M_WAITOK); 550 data = memp; 551 } else 552 data = stkbuf; 553 if (com&IOC_IN) { 554 if (size) { 555 error = copyin(SCARG(uap, data), data, (u_int)size); 556 if (error) { 557 goto out; 558 } 559 } else 560 *(caddr_t *)data = SCARG(uap, data); 561 } else if ((com&IOC_OUT) && size) 562 /* 563 * Zero the buffer so the user always 564 * gets back something deterministic. 565 */ 566 bzero(data, size); 567 else if (com&IOC_VOID) 568 *(caddr_t *)data = SCARG(uap, data); 569 570 switch (com) { 571 572 case FIONBIO: 573 if ((tmp = *(int *)data) != 0) 574 fp->f_flag |= FNONBLOCK; 575 else 576 fp->f_flag &= ~FNONBLOCK; 577 error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&tmp, p); 578 break; 579 580 case FIOASYNC: 581 if ((tmp = *(int *)data) != 0) 582 fp->f_flag |= FASYNC; 583 else 584 fp->f_flag &= ~FASYNC; 585 error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, (caddr_t)&tmp, p); 586 break; 587 588 case FIOSETOWN: 589 tmp = *(int *)data; 590 if (fp->f_type == DTYPE_SOCKET) { 591 struct socket *so = (struct socket *)fp->f_data; 592 593 so->so_pgid = tmp; 594 so->so_siguid = p->p_cred->p_ruid; 595 so->so_sigeuid = p->p_ucred->cr_uid; 596 error = 0; 597 break; 598 } 599 if (tmp <= 0) { 600 tmp = -tmp; 601 } else { 602 struct proc *p1 = pfind(tmp); 603 if (p1 == 0) { 604 error = ESRCH; 605 break; 606 } 607 tmp = p1->p_pgrp->pg_id; 608 } 609 error = (*fp->f_ops->fo_ioctl) 610 (fp, TIOCSPGRP, (caddr_t)&tmp, p); 611 break; 612 613 case FIOGETOWN: 614 if (fp->f_type == DTYPE_SOCKET) { 615 error = 0; 616 *(int *)data = ((struct socket *)fp->f_data)->so_pgid; 617 break; 618 } 619 error = (*fp->f_ops->fo_ioctl)(fp, TIOCGPGRP, data, p); 620 *(int *)data = -*(int *)data; 621 break; 622 623 default: 624 error = (*fp->f_ops->fo_ioctl)(fp, com, data, p); 625 /* 626 * Copy any data to user, size was 627 * already set and checked above. 628 */ 629 if (error == 0 && (com&IOC_OUT) && size) 630 error = copyout(data, SCARG(uap, data), (u_int)size); 631 break; 632 } 633 out: 634 FRELE(fp); 635 if (memp) 636 free(memp, M_IOCTLOPS); 637 return (error); 638 } 639 640 int selwait, nselcoll; 641 642 /* 643 * Select system call. 644 */ 645 int 646 sys_select(struct proc *p, void *v, register_t *retval) 647 { 648 struct sys_select_args /* { 649 syscallarg(int) nd; 650 syscallarg(fd_set *) in; 651 syscallarg(fd_set *) ou; 652 syscallarg(fd_set *) ex; 653 syscallarg(struct timeval *) tv; 654 } */ *uap = v; 655 fd_set bits[6], *pibits[3], *pobits[3]; 656 struct timeval atv, rtv, ttv; 657 int s, ncoll, error = 0, timo; 658 u_int nd, ni; 659 660 nd = SCARG(uap, nd); 661 if (nd > p->p_fd->fd_nfiles) { 662 /* forgiving; slightly wrong */ 663 nd = p->p_fd->fd_nfiles; 664 } 665 ni = howmany(nd, NFDBITS) * sizeof(fd_mask); 666 if (nd > FD_SETSIZE) { 667 caddr_t mbits; 668 669 mbits = malloc(ni * 6, M_TEMP, M_WAITOK); 670 bzero(mbits, ni * 6); 671 pibits[0] = (fd_set *)&mbits[ni * 0]; 672 pibits[1] = (fd_set *)&mbits[ni * 1]; 673 pibits[2] = (fd_set *)&mbits[ni * 2]; 674 pobits[0] = (fd_set *)&mbits[ni * 3]; 675 pobits[1] = (fd_set *)&mbits[ni * 4]; 676 pobits[2] = (fd_set *)&mbits[ni * 5]; 677 } else { 678 bzero(bits, sizeof(bits)); 679 pibits[0] = &bits[0]; 680 pibits[1] = &bits[1]; 681 pibits[2] = &bits[2]; 682 pobits[0] = &bits[3]; 683 pobits[1] = &bits[4]; 684 pobits[2] = &bits[5]; 685 } 686 687 #define getbits(name, x) \ 688 if (SCARG(uap, name) && (error = copyin(SCARG(uap, name), \ 689 pibits[x], ni))) \ 690 goto done; 691 getbits(in, 0); 692 getbits(ou, 1); 693 getbits(ex, 2); 694 #undef getbits 695 696 if (SCARG(uap, tv)) { 697 error = copyin(SCARG(uap, tv), &atv, sizeof (atv)); 698 if (error) 699 goto done; 700 if (itimerfix(&atv)) { 701 error = EINVAL; 702 goto done; 703 } 704 getmicrouptime(&rtv); 705 timeradd(&atv, &rtv, &atv); 706 } else { 707 atv.tv_sec = 0; 708 atv.tv_usec = 0; 709 } 710 timo = 0; 711 712 retry: 713 ncoll = nselcoll; 714 p->p_flag |= P_SELECT; 715 error = selscan(p, pibits[0], pobits[0], nd, retval); 716 if (error || *retval) 717 goto done; 718 if (SCARG(uap, tv)) { 719 getmicrouptime(&rtv); 720 if (timercmp(&rtv, &atv, >=)) 721 goto done; 722 ttv = atv; 723 timersub(&ttv, &rtv, &ttv); 724 timo = ttv.tv_sec > 24 * 60 * 60 ? 725 24 * 60 * 60 * hz : tvtohz(&ttv); 726 } 727 s = splhigh(); 728 if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) { 729 splx(s); 730 goto retry; 731 } 732 p->p_flag &= ~P_SELECT; 733 error = tsleep(&selwait, PSOCK | PCATCH, "select", timo); 734 splx(s); 735 if (error == 0) 736 goto retry; 737 done: 738 p->p_flag &= ~P_SELECT; 739 /* select is not restarted after signals... */ 740 if (error == ERESTART) 741 error = EINTR; 742 if (error == EWOULDBLOCK) 743 error = 0; 744 #define putbits(name, x) \ 745 if (SCARG(uap, name) && (error2 = copyout(pobits[x], \ 746 SCARG(uap, name), ni))) \ 747 error = error2; 748 if (error == 0) { 749 int error2; 750 751 putbits(in, 0); 752 putbits(ou, 1); 753 putbits(ex, 2); 754 #undef putbits 755 } 756 757 if (pibits[0] != &bits[0]) 758 free(pibits[0], M_TEMP); 759 return (error); 760 } 761 762 int 763 selscan(p, ibits, obits, nfd, retval) 764 struct proc *p; 765 fd_set *ibits, *obits; 766 int nfd; 767 register_t *retval; 768 { 769 caddr_t cibits = (caddr_t)ibits, cobits = (caddr_t)obits; 770 register struct filedesc *fdp = p->p_fd; 771 register int msk, i, j, fd; 772 register fd_mask bits; 773 struct file *fp; 774 int ni, n = 0; 775 static const int flag[3] = { POLLIN, POLLOUT, POLLPRI }; 776 777 /* 778 * if nfd > FD_SETSIZE then the fd_set's contain nfd bits (rounded 779 * up to the next byte) otherwise the fd_set's are normal sized. 780 */ 781 ni = sizeof(fd_set); 782 if (nfd > FD_SETSIZE) 783 ni = howmany(nfd, NFDBITS) * sizeof(fd_mask); 784 785 for (msk = 0; msk < 3; msk++) { 786 fd_set *pibits = (fd_set *)&cibits[msk*ni]; 787 fd_set *pobits = (fd_set *)&cobits[msk*ni]; 788 789 for (i = 0; i < nfd; i += NFDBITS) { 790 bits = pibits->fds_bits[i/NFDBITS]; 791 while ((j = ffs(bits)) && (fd = i + --j) < nfd) { 792 bits &= ~(1 << j); 793 if ((fp = fd_getfile(fdp, fd)) == NULL) 794 return (EBADF); 795 FREF(fp); 796 if ((*fp->f_ops->fo_poll)(fp, flag[msk], p)) { 797 FD_SET(fd, pobits); 798 n++; 799 } 800 FRELE(fp); 801 } 802 } 803 } 804 *retval = n; 805 return (0); 806 } 807 808 /*ARGSUSED*/ 809 int 810 seltrue(dev, events, p) 811 dev_t dev; 812 int events; 813 struct proc *p; 814 { 815 816 return (events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM)); 817 } 818 819 /* 820 * Record a select request. 821 */ 822 void 823 selrecord(selector, sip) 824 struct proc *selector; 825 struct selinfo *sip; 826 { 827 struct proc *p; 828 pid_t mypid; 829 830 mypid = selector->p_pid; 831 if (sip->si_selpid == mypid) 832 return; 833 if (sip->si_selpid && (p = pfind(sip->si_selpid)) && 834 p->p_wchan == (caddr_t)&selwait) 835 sip->si_flags |= SI_COLL; 836 else 837 sip->si_selpid = mypid; 838 } 839 840 /* 841 * Do a wakeup when a selectable event occurs. 842 */ 843 void 844 selwakeup(sip) 845 register struct selinfo *sip; 846 { 847 register struct proc *p; 848 int s; 849 850 if (sip->si_selpid == 0) 851 return; 852 if (sip->si_flags & SI_COLL) { 853 nselcoll++; 854 sip->si_flags &= ~SI_COLL; 855 wakeup(&selwait); 856 } 857 p = pfind(sip->si_selpid); 858 sip->si_selpid = 0; 859 if (p != NULL) { 860 SCHED_LOCK(s); 861 if (p->p_wchan == (caddr_t)&selwait) { 862 if (p->p_stat == SSLEEP) 863 setrunnable(p); 864 else 865 unsleep(p); 866 } else if (p->p_flag & P_SELECT) 867 p->p_flag &= ~P_SELECT; 868 SCHED_UNLOCK(s); 869 } 870 } 871 872 void 873 pollscan(p, pl, nfd, retval) 874 struct proc *p; 875 struct pollfd *pl; 876 u_int nfd; 877 register_t *retval; 878 { 879 struct filedesc *fdp = p->p_fd; 880 struct file *fp; 881 u_int i; 882 int n = 0; 883 884 for (i = 0; i < nfd; i++, pl++) { 885 /* Check the file descriptor. */ 886 if (pl->fd < 0) { 887 pl->revents = 0; 888 continue; 889 } 890 if ((fp = fd_getfile(fdp, pl->fd)) == NULL) { 891 pl->revents = POLLNVAL; 892 n++; 893 continue; 894 } 895 FREF(fp); 896 pl->revents = (*fp->f_ops->fo_poll)(fp, pl->events, p); 897 FRELE(fp); 898 if (pl->revents != 0) 899 n++; 900 } 901 *retval = n; 902 } 903 904 /* 905 * We are using the same mechanism as select only we encode/decode args 906 * differently. 907 */ 908 int 909 sys_poll(struct proc *p, void *v, register_t *retval) 910 { 911 struct sys_poll_args /* { 912 syscallarg(struct pollfd *) fds; 913 syscallarg(u_int) nfds; 914 syscallarg(int) timeout; 915 } */ *uap = v; 916 size_t sz; 917 struct pollfd pfds[4], *pl = pfds; 918 int msec = SCARG(uap, timeout); 919 struct timeval atv, rtv, ttv; 920 int timo, ncoll, i, s, error; 921 extern int nselcoll, selwait; 922 u_int nfds = SCARG(uap, nfds); 923 924 /* Standards say no more than MAX_OPEN; this is possibly better. */ 925 if (nfds > min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfiles)) 926 return (EINVAL); 927 928 sz = sizeof(struct pollfd) * nfds; 929 930 /* optimize for the default case, of a small nfds value */ 931 if (sz > sizeof(pfds)) 932 pl = (struct pollfd *) malloc(sz, M_TEMP, M_WAITOK); 933 934 if ((error = copyin(SCARG(uap, fds), pl, sz)) != 0) 935 goto bad; 936 937 for (i = 0; i < nfds; i++) 938 pl[i].revents = 0; 939 940 if (msec != INFTIM) { 941 atv.tv_sec = msec / 1000; 942 atv.tv_usec = (msec - (atv.tv_sec * 1000)) * 1000; 943 944 if (itimerfix(&atv)) { 945 error = EINVAL; 946 goto done; 947 } 948 getmicrouptime(&rtv); 949 timeradd(&atv, &rtv, &atv); 950 } else { 951 atv.tv_sec = 0; 952 atv.tv_usec = 0; 953 } 954 timo = 0; 955 956 retry: 957 ncoll = nselcoll; 958 p->p_flag |= P_SELECT; 959 pollscan(p, pl, nfds, retval); 960 if (*retval) 961 goto done; 962 if (msec != INFTIM) { 963 getmicrouptime(&rtv); 964 if (timercmp(&rtv, &atv, >=)) 965 goto done; 966 ttv = atv; 967 timersub(&ttv, &rtv, &ttv); 968 timo = ttv.tv_sec > 24 * 60 * 60 ? 969 24 * 60 * 60 * hz : tvtohz(&ttv); 970 } 971 s = splhigh(); 972 if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) { 973 splx(s); 974 goto retry; 975 } 976 p->p_flag &= ~P_SELECT; 977 error = tsleep(&selwait, PSOCK | PCATCH, "poll", timo); 978 splx(s); 979 if (error == 0) 980 goto retry; 981 982 done: 983 p->p_flag &= ~P_SELECT; 984 /* 985 * NOTE: poll(2) is not restarted after a signal and EWOULDBLOCK is 986 * ignored (since the whole point is to see what would block). 987 */ 988 switch (error) { 989 case ERESTART: 990 error = EINTR; 991 break; 992 case EWOULDBLOCK: 993 case 0: 994 error = copyout(pl, SCARG(uap, fds), sz); 995 break; 996 } 997 bad: 998 if (pl != pfds) 999 free(pl, M_TEMP); 1000 return (error); 1001 } 1002