1 /* $OpenBSD: sys_generic.c,v 1.26 2000/07/07 14:33:20 art Exp $ */ 2 /* $NetBSD: sys_generic.c,v 1.24 1996/03/29 00:25:32 cgd Exp $ */ 3 4 /* 5 * Copyright (c) 1996 Theo de Raadt 6 * Copyright (c) 1982, 1986, 1989, 1993 7 * The Regents of the University of California. All rights reserved. 8 * (c) UNIX System Laboratories, Inc. 9 * All or some portions of this file are derived from material licensed 10 * to the University of California by American Telephone and Telegraph 11 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 12 * the permission of UNIX System Laboratories, Inc. 13 * 14 * Redistribution and use in source and binary forms, with or without 15 * modification, are permitted provided that the following conditions 16 * are met: 17 * 1. Redistributions of source code must retain the above copyright 18 * notice, this list of conditions and the following disclaimer. 19 * 2. Redistributions in binary form must reproduce the above copyright 20 * notice, this list of conditions and the following disclaimer in the 21 * documentation and/or other materials provided with the distribution. 22 * 3. All advertising materials mentioning features or use of this software 23 * must display the following acknowledgement: 24 * This product includes software developed by the University of 25 * California, Berkeley and its contributors. 26 * 4. Neither the name of the University nor the names of its contributors 27 * may be used to endorse or promote products derived from this software 28 * without specific prior written permission. 29 * 30 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 31 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 32 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 33 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 34 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 35 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 36 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 37 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 38 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 39 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 40 * SUCH DAMAGE. 41 * 42 * @(#)sys_generic.c 8.5 (Berkeley) 1/21/94 43 */ 44 45 #include <sys/param.h> 46 #include <sys/systm.h> 47 #include <sys/filedesc.h> 48 #include <sys/ioctl.h> 49 #include <sys/file.h> 50 #include <sys/proc.h> 51 #include <sys/resourcevar.h> 52 #include <sys/socketvar.h> 53 #include <sys/signalvar.h> 54 #include <sys/uio.h> 55 #include <sys/kernel.h> 56 #include <sys/stat.h> 57 #include <sys/malloc.h> 58 #include <sys/poll.h> 59 #ifdef KTRACE 60 #include <sys/ktrace.h> 61 #endif 62 63 #include <sys/mount.h> 64 #include <sys/syscallargs.h> 65 66 int selscan __P((struct proc *, fd_set *, fd_set *, int, register_t *)); 67 int seltrue __P((dev_t, int, struct proc *)); 68 void pollscan __P((struct proc *, struct pollfd *, int, register_t *)); 69 70 /* 71 * Read system call. 72 */ 73 /* ARGSUSED */ 74 int 75 sys_read(p, v, retval) 76 struct proc *p; 77 void *v; 78 register_t *retval; 79 { 80 struct sys_read_args /* { 81 syscallarg(int) fd; 82 syscallarg(void *) buf; 83 syscallarg(size_t) nbyte; 84 } */ *uap = v; 85 int fd = SCARG(uap, fd); 86 struct file *fp; 87 struct filedesc *fdp = p->p_fd; 88 89 if ((u_int)fd >= fdp->fd_nfiles || 90 (fp = fdp->fd_ofiles[fd]) == NULL || 91 #if notyet 92 (fp->f_iflags & FIF_WANTCLOSE) != 0 || 93 #endif 94 (fp->f_flag & FREAD) == 0) 95 return (EBADF); 96 97 #if notyet 98 FILE_USE(fp); 99 #endif 100 /* dofileread() will unuse the descriptor for us */ 101 return (dofileread(p, fd, fp, SCARG(uap, buf), SCARG(uap, nbyte), 102 &fp->f_offset, retval)); 103 } 104 105 int 106 dofileread(p, fd, fp, buf, nbyte, offset, retval) 107 struct proc *p; 108 int fd; 109 struct file *fp; 110 void *buf; 111 size_t nbyte; 112 off_t *offset; 113 register_t *retval; 114 { 115 struct uio auio; 116 struct iovec aiov; 117 long cnt, error = 0; 118 #ifdef KTRACE 119 struct iovec ktriov; 120 #endif 121 122 aiov.iov_base = (caddr_t)buf; 123 aiov.iov_len = nbyte; 124 auio.uio_iov = &aiov; 125 auio.uio_iovcnt = 1; 126 auio.uio_resid = nbyte; 127 auio.uio_rw = UIO_READ; 128 auio.uio_segflg = UIO_USERSPACE; 129 auio.uio_procp = p; 130 131 /* 132 * Reads return ssize_t because -1 is returned on error. Therefore 133 * we must restrict the length to SSIZE_MAX to avoid garbage return 134 * values. 135 */ 136 if (auio.uio_resid > SSIZE_MAX) { 137 error = EINVAL; 138 goto out; 139 } 140 141 #ifdef KTRACE 142 /* 143 * if tracing, save a copy of iovec 144 */ 145 if (KTRPOINT(p, KTR_GENIO)) 146 ktriov = aiov; 147 #endif 148 cnt = auio.uio_resid; 149 error = (*fp->f_ops->fo_read)(fp, offset, &auio, fp->f_cred); 150 if (error) 151 if (auio.uio_resid != cnt && (error == ERESTART || 152 error == EINTR || error == EWOULDBLOCK)) 153 error = 0; 154 cnt -= auio.uio_resid; 155 #ifdef KTRACE 156 if (KTRPOINT(p, KTR_GENIO) && error == 0) 157 ktrgenio(p->p_tracep, fd, UIO_READ, &ktriov, cnt, error); 158 #endif 159 *retval = cnt; 160 out: 161 #if notyet 162 FILE_UNUSE(fp, p); 163 #endif 164 return (error); 165 } 166 167 /* 168 * Scatter read system call. 169 */ 170 int 171 sys_readv(p, v, retval) 172 struct proc *p; 173 void *v; 174 register_t *retval; 175 { 176 struct sys_readv_args /* { 177 syscallarg(int) fd; 178 syscallarg(const struct iovec *) iovp; 179 syscallarg(int) iovcnt; 180 } */ *uap = v; 181 int fd = SCARG(uap, fd); 182 struct file *fp; 183 struct filedesc *fdp = p->p_fd; 184 185 if ((u_int)fd >= fdp->fd_nfiles || 186 (fp = fdp->fd_ofiles[fd]) == NULL || 187 #if notyet 188 (fp->f_iflags & FIF_WANTCLOSE) != 0 || 189 #endif 190 (fp->f_flag & FREAD) == 0) 191 return (EBADF); 192 193 #if notyet 194 FILE_USE(fp); 195 #endif 196 /* dofilereadv() will unuse the descriptor for us */ 197 return (dofilereadv(p, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt), 198 &fp->f_offset, retval)); 199 } 200 201 int 202 dofilereadv(p, fd, fp, iovp, iovcnt, offset, retval) 203 struct proc *p; 204 int fd; 205 struct file *fp; 206 const struct iovec *iovp; 207 int iovcnt; 208 off_t *offset; 209 register_t *retval; 210 { 211 struct uio auio; 212 struct iovec *iov; 213 struct iovec *needfree; 214 struct iovec aiov[UIO_SMALLIOV]; 215 long i, cnt, error = 0; 216 u_int iovlen; 217 #ifdef KTRACE 218 struct iovec *ktriov = NULL; 219 #endif 220 221 /* note: can't use iovlen until iovcnt is validated */ 222 iovlen = iovcnt * sizeof(struct iovec); 223 if ((u_int)iovcnt > UIO_SMALLIOV) { 224 if ((u_int)iovcnt > IOV_MAX) { 225 error = EINVAL; 226 goto out; 227 } 228 MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK); 229 needfree = iov; 230 } else if ((u_int)iovcnt > 0) { 231 iov = aiov; 232 needfree = NULL; 233 } else { 234 error = EINVAL; 235 goto out; 236 } 237 238 auio.uio_iov = iov; 239 auio.uio_iovcnt = iovcnt; 240 auio.uio_rw = UIO_READ; 241 auio.uio_segflg = UIO_USERSPACE; 242 auio.uio_procp = p; 243 error = copyin(iovp, iov, iovlen); 244 if (error) 245 goto done; 246 auio.uio_resid = 0; 247 for (i = 0; i < iovcnt; i++) { 248 auio.uio_resid += iov->iov_len; 249 /* 250 * Reads return ssize_t because -1 is returned on error. 251 * Therefore we must restrict the length to SSIZE_MAX to 252 * avoid garbage return values. 253 */ 254 if (iov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) { 255 error = EINVAL; 256 goto done; 257 } 258 iov++; 259 } 260 #ifdef KTRACE 261 /* 262 * if tracing, save a copy of iovec 263 */ 264 if (KTRPOINT(p, KTR_GENIO)) { 265 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK); 266 bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen); 267 } 268 #endif 269 cnt = auio.uio_resid; 270 error = (*fp->f_ops->fo_read)(fp, offset, &auio, fp->f_cred); 271 if (error) 272 if (auio.uio_resid != cnt && (error == ERESTART || 273 error == EINTR || error == EWOULDBLOCK)) 274 error = 0; 275 cnt -= auio.uio_resid; 276 #ifdef KTRACE 277 if (ktriov != NULL) { 278 if (error == 0) 279 ktrgenio(p->p_tracep, fd, UIO_READ, ktriov, cnt, 280 error); 281 FREE(ktriov, M_TEMP); 282 } 283 #endif 284 *retval = cnt; 285 done: 286 if (needfree) 287 FREE(needfree, M_IOV); 288 out: 289 #if notyet 290 FILE_UNUSE(fp, p); 291 #endif 292 return (error); 293 } 294 295 /* 296 * Write system call 297 */ 298 int 299 sys_write(p, v, retval) 300 struct proc *p; 301 void *v; 302 register_t *retval; 303 { 304 struct sys_write_args /* { 305 syscallarg(int) fd; 306 syscallarg(const void *) buf; 307 syscallarg(size_t) nbyte; 308 } */ *uap = v; 309 int fd = SCARG(uap, fd); 310 struct file *fp; 311 struct filedesc *fdp = p->p_fd; 312 313 if ((u_int)fd >= fdp->fd_nfiles || 314 (fp = fdp->fd_ofiles[fd]) == NULL || 315 #if notyet 316 (fp->f_iflags & FIF_WANTCLOSE) != 0 || 317 #endif 318 (fp->f_flag & FWRITE) == 0) 319 return (EBADF); 320 321 #if notyet 322 FILE_USE(fp); 323 #endif 324 /* dofilewrite() will unuse the descriptor for us */ 325 return (dofilewrite(p, fd, fp, SCARG(uap, buf), SCARG(uap, nbyte), 326 &fp->f_offset, retval)); 327 } 328 329 int 330 dofilewrite(p, fd, fp, buf, nbyte, offset, retval) 331 struct proc *p; 332 int fd; 333 struct file *fp; 334 const void *buf; 335 size_t nbyte; 336 off_t *offset; 337 register_t *retval; 338 { 339 struct uio auio; 340 struct iovec aiov; 341 long cnt, error = 0; 342 #ifdef KTRACE 343 struct iovec ktriov; 344 #endif 345 346 aiov.iov_base = (caddr_t)buf; /* XXX kills const */ 347 aiov.iov_len = nbyte; 348 auio.uio_iov = &aiov; 349 auio.uio_iovcnt = 1; 350 auio.uio_resid = nbyte; 351 auio.uio_rw = UIO_WRITE; 352 auio.uio_segflg = UIO_USERSPACE; 353 auio.uio_procp = p; 354 355 /* 356 * Writes return ssize_t because -1 is returned on error. Therefore 357 * we must restrict the length to SSIZE_MAX to avoid garbage return 358 * values. 359 */ 360 if (auio.uio_resid > SSIZE_MAX) { 361 error = EINVAL; 362 goto out; 363 } 364 365 #ifdef KTRACE 366 /* 367 * if tracing, save a copy of iovec 368 */ 369 if (KTRPOINT(p, KTR_GENIO)) 370 ktriov = aiov; 371 #endif 372 cnt = auio.uio_resid; 373 error = (*fp->f_ops->fo_write)(fp, offset, &auio, fp->f_cred); 374 if (error) { 375 if (auio.uio_resid != cnt && (error == ERESTART || 376 error == EINTR || error == EWOULDBLOCK)) 377 error = 0; 378 if (error == EPIPE) 379 psignal(p, SIGPIPE); 380 } 381 cnt -= auio.uio_resid; 382 #ifdef KTRACE 383 if (KTRPOINT(p, KTR_GENIO) && error == 0) 384 ktrgenio(p->p_tracep, fd, UIO_WRITE, &ktriov, cnt, error); 385 #endif 386 *retval = cnt; 387 out: 388 #if notyet 389 FILE_UNUSE(fp, p); 390 #endif 391 return (error); 392 } 393 394 /* 395 * Gather write system call 396 */ 397 int 398 sys_writev(p, v, retval) 399 struct proc *p; 400 void *v; 401 register_t *retval; 402 { 403 struct sys_writev_args /* { 404 syscallarg(int) fd; 405 syscallarg(const struct iovec *) iovp; 406 syscallarg(int) iovcnt; 407 } */ *uap = v; 408 int fd = SCARG(uap, fd); 409 struct file *fp; 410 struct filedesc *fdp = p->p_fd; 411 412 if ((u_int)fd >= fdp->fd_nfiles || 413 (fp = fdp->fd_ofiles[fd]) == NULL || 414 #if notyet 415 (fp->f_iflags & FIF_WANTCLOSE) != 0 || 416 #endif 417 (fp->f_flag & FWRITE) == 0) 418 return (EBADF); 419 420 #if notyet 421 FILE_USE(fp); 422 #endif 423 /* dofilewritev() will unuse the descriptor for us */ 424 return (dofilewritev(p, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt), 425 &fp->f_offset, retval)); 426 } 427 428 int 429 dofilewritev(p, fd, fp, iovp, iovcnt, offset, retval) 430 struct proc *p; 431 int fd; 432 struct file *fp; 433 const struct iovec *iovp; 434 int iovcnt; 435 off_t *offset; 436 register_t *retval; 437 { 438 struct uio auio; 439 struct iovec *iov; 440 struct iovec *needfree; 441 struct iovec aiov[UIO_SMALLIOV]; 442 long i, cnt, error = 0; 443 u_int iovlen; 444 #ifdef KTRACE 445 struct iovec *ktriov = NULL; 446 #endif 447 448 /* note: can't use iovlen until iovcnt is validated */ 449 iovlen = iovcnt * sizeof(struct iovec); 450 if ((u_int)iovcnt > UIO_SMALLIOV) { 451 if ((u_int)iovcnt > IOV_MAX) 452 return (EINVAL); 453 MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK); 454 needfree = iov; 455 } else if ((u_int)iovcnt > 0) { 456 iov = aiov; 457 needfree = NULL; 458 } else { 459 error = EINVAL; 460 goto out; 461 } 462 463 auio.uio_iov = iov; 464 auio.uio_iovcnt = iovcnt; 465 auio.uio_rw = UIO_WRITE; 466 auio.uio_segflg = UIO_USERSPACE; 467 auio.uio_procp = p; 468 error = copyin(iovp, iov, iovlen); 469 if (error) 470 goto done; 471 auio.uio_resid = 0; 472 for (i = 0; i < iovcnt; i++) { 473 auio.uio_resid += iov->iov_len; 474 /* 475 * Writes return ssize_t because -1 is returned on error. 476 * Therefore we must restrict the length to SSIZE_MAX to 477 * avoid garbage return values. 478 */ 479 if (iov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) { 480 error = EINVAL; 481 goto done; 482 } 483 iov++; 484 } 485 #ifdef KTRACE 486 /* 487 * if tracing, save a copy of iovec 488 */ 489 if (KTRPOINT(p, KTR_GENIO)) { 490 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK); 491 bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen); 492 } 493 #endif 494 cnt = auio.uio_resid; 495 error = (*fp->f_ops->fo_write)(fp, offset, &auio, fp->f_cred); 496 if (error) { 497 if (auio.uio_resid != cnt && (error == ERESTART || 498 error == EINTR || error == EWOULDBLOCK)) 499 error = 0; 500 if (error == EPIPE) 501 psignal(p, SIGPIPE); 502 } 503 cnt -= auio.uio_resid; 504 #ifdef KTRACE 505 if (ktriov != NULL) { 506 if (error == 0) 507 ktrgenio(p->p_tracep, fd, UIO_WRITE, ktriov, cnt, 508 error); 509 FREE(ktriov, M_TEMP); 510 } 511 #endif 512 *retval = cnt; 513 done: 514 if (needfree) 515 FREE(needfree, M_IOV); 516 out: 517 #if notyet 518 FILE_UNUSE(fp, p); 519 #endif 520 return (error); 521 } 522 523 /* 524 * Ioctl system call 525 */ 526 /* ARGSUSED */ 527 int 528 sys_ioctl(p, v, retval) 529 struct proc *p; 530 void *v; 531 register_t *retval; 532 { 533 register struct sys_ioctl_args /* { 534 syscallarg(int) fd; 535 syscallarg(u_long) com; 536 syscallarg(caddr_t) data; 537 } */ *uap = v; 538 register struct file *fp; 539 register struct filedesc *fdp; 540 register u_long com; 541 register int error; 542 register u_int size; 543 caddr_t data, memp; 544 int tmp; 545 #define STK_PARAMS 128 546 char stkbuf[STK_PARAMS]; 547 548 fdp = p->p_fd; 549 if ((u_int)SCARG(uap, fd) >= fdp->fd_nfiles || 550 (fp = fdp->fd_ofiles[SCARG(uap, fd)]) == NULL) 551 return (EBADF); 552 553 if ((fp->f_flag & (FREAD | FWRITE)) == 0) 554 return (EBADF); 555 556 switch (com = SCARG(uap, com)) { 557 case FIONCLEX: 558 fdp->fd_ofileflags[SCARG(uap, fd)] &= ~UF_EXCLOSE; 559 return (0); 560 case FIOCLEX: 561 fdp->fd_ofileflags[SCARG(uap, fd)] |= UF_EXCLOSE; 562 return (0); 563 } 564 565 /* 566 * Interpret high order word to find amount of data to be 567 * copied to/from the user's address space. 568 */ 569 size = IOCPARM_LEN(com); 570 if (size > IOCPARM_MAX) 571 return (ENOTTY); 572 memp = NULL; 573 if (size > sizeof (stkbuf)) { 574 memp = (caddr_t)malloc((u_long)size, M_IOCTLOPS, M_WAITOK); 575 data = memp; 576 } else 577 data = stkbuf; 578 if (com&IOC_IN) { 579 if (size) { 580 error = copyin(SCARG(uap, data), data, (u_int)size); 581 if (error) { 582 if (memp) 583 free(memp, M_IOCTLOPS); 584 return (error); 585 } 586 } else 587 *(caddr_t *)data = SCARG(uap, data); 588 } else if ((com&IOC_OUT) && size) 589 /* 590 * Zero the buffer so the user always 591 * gets back something deterministic. 592 */ 593 bzero(data, size); 594 else if (com&IOC_VOID) 595 *(caddr_t *)data = SCARG(uap, data); 596 597 switch (com) { 598 599 case FIONBIO: 600 if ((tmp = *(int *)data) != 0) 601 fp->f_flag |= FNONBLOCK; 602 else 603 fp->f_flag &= ~FNONBLOCK; 604 error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&tmp, p); 605 break; 606 607 case FIOASYNC: 608 if ((tmp = *(int *)data) != 0) 609 fp->f_flag |= FASYNC; 610 else 611 fp->f_flag &= ~FASYNC; 612 error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, (caddr_t)&tmp, p); 613 break; 614 615 case FIOSETOWN: 616 tmp = *(int *)data; 617 if (fp->f_type == DTYPE_SOCKET) { 618 struct socket *so = (struct socket *)fp->f_data; 619 620 so->so_pgid = tmp; 621 so->so_siguid = p->p_cred->p_ruid; 622 so->so_sigeuid = p->p_ucred->cr_uid; 623 error = 0; 624 break; 625 } 626 if (tmp <= 0) { 627 tmp = -tmp; 628 } else { 629 struct proc *p1 = pfind(tmp); 630 if (p1 == 0) { 631 error = ESRCH; 632 break; 633 } 634 tmp = p1->p_pgrp->pg_id; 635 } 636 error = (*fp->f_ops->fo_ioctl) 637 (fp, TIOCSPGRP, (caddr_t)&tmp, p); 638 break; 639 640 case FIOGETOWN: 641 if (fp->f_type == DTYPE_SOCKET) { 642 error = 0; 643 *(int *)data = ((struct socket *)fp->f_data)->so_pgid; 644 break; 645 } 646 error = (*fp->f_ops->fo_ioctl)(fp, TIOCGPGRP, data, p); 647 *(int *)data = -*(int *)data; 648 break; 649 650 default: 651 error = (*fp->f_ops->fo_ioctl)(fp, com, data, p); 652 /* 653 * Copy any data to user, size was 654 * already set and checked above. 655 */ 656 if (error == 0 && (com&IOC_OUT) && size) 657 error = copyout(data, SCARG(uap, data), (u_int)size); 658 break; 659 } 660 if (memp) 661 free(memp, M_IOCTLOPS); 662 return (error); 663 } 664 665 int selwait, nselcoll; 666 667 /* 668 * Select system call. 669 */ 670 int 671 sys_select(p, v, retval) 672 register struct proc *p; 673 void *v; 674 register_t *retval; 675 { 676 register struct sys_select_args /* { 677 syscallarg(int) nd; 678 syscallarg(fd_set *) in; 679 syscallarg(fd_set *) ou; 680 syscallarg(fd_set *) ex; 681 syscallarg(struct timeval *) tv; 682 } */ *uap = v; 683 fd_set bits[6], *pibits[3], *pobits[3]; 684 struct timeval atv; 685 int s, ncoll, error = 0, timo; 686 u_int ni; 687 688 if (SCARG(uap, nd) > p->p_fd->fd_nfiles) { 689 /* forgiving; slightly wrong */ 690 SCARG(uap, nd) = p->p_fd->fd_nfiles; 691 } 692 ni = howmany(SCARG(uap, nd), NFDBITS) * sizeof(fd_mask); 693 if (SCARG(uap, nd) > FD_SETSIZE) { 694 caddr_t mbits; 695 696 if ((mbits = malloc(ni * 6, M_TEMP, M_WAITOK)) == NULL) { 697 error = EINVAL; 698 goto cleanup; 699 } 700 bzero(mbits, ni * 6); 701 pibits[0] = (fd_set *)&mbits[ni * 0]; 702 pibits[1] = (fd_set *)&mbits[ni * 1]; 703 pibits[2] = (fd_set *)&mbits[ni * 2]; 704 pobits[0] = (fd_set *)&mbits[ni * 3]; 705 pobits[1] = (fd_set *)&mbits[ni * 4]; 706 pobits[2] = (fd_set *)&mbits[ni * 5]; 707 } else { 708 bzero((caddr_t)bits, sizeof(bits)); 709 pibits[0] = &bits[0]; 710 pibits[1] = &bits[1]; 711 pibits[2] = &bits[2]; 712 pobits[0] = &bits[3]; 713 pobits[1] = &bits[4]; 714 pobits[2] = &bits[5]; 715 } 716 717 #define getbits(name, x) \ 718 if (SCARG(uap, name) && (error = copyin((caddr_t)SCARG(uap, name), \ 719 (caddr_t)pibits[x], ni))) \ 720 goto done; 721 getbits(in, 0); 722 getbits(ou, 1); 723 getbits(ex, 2); 724 #undef getbits 725 726 if (SCARG(uap, tv)) { 727 error = copyin((caddr_t)SCARG(uap, tv), (caddr_t)&atv, 728 sizeof (atv)); 729 if (error) 730 goto done; 731 if (itimerfix(&atv)) { 732 error = EINVAL; 733 goto done; 734 } 735 s = splclock(); 736 timeradd(&atv, &time, &atv); 737 splx(s); 738 } else 739 timo = 0; 740 retry: 741 ncoll = nselcoll; 742 p->p_flag |= P_SELECT; 743 error = selscan(p, pibits[0], pobits[0], SCARG(uap, nd), retval); 744 if (error || *retval) 745 goto done; 746 if (SCARG(uap, tv)) { 747 /* 748 * We have to recalculate the timeout on every retry. 749 */ 750 timo = hzto(&atv); 751 if (timo <= 0) 752 goto done; 753 } 754 s = splhigh(); 755 if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) { 756 splx(s); 757 goto retry; 758 } 759 p->p_flag &= ~P_SELECT; 760 error = tsleep((caddr_t)&selwait, PSOCK | PCATCH, "select", timo); 761 splx(s); 762 if (error == 0) 763 goto retry; 764 done: 765 p->p_flag &= ~P_SELECT; 766 /* select is not restarted after signals... */ 767 if (error == ERESTART) 768 error = EINTR; 769 if (error == EWOULDBLOCK) 770 error = 0; 771 #define putbits(name, x) \ 772 if (SCARG(uap, name) && (error2 = copyout((caddr_t)pobits[x], \ 773 (caddr_t)SCARG(uap, name), ni))) \ 774 error = error2; 775 if (error == 0) { 776 int error2; 777 778 putbits(in, 0); 779 putbits(ou, 1); 780 putbits(ex, 2); 781 #undef putbits 782 } 783 784 cleanup: 785 if (pibits[0] != &bits[0]) 786 free(pibits[0], M_TEMP); 787 return (error); 788 } 789 790 int 791 selscan(p, ibits, obits, nfd, retval) 792 struct proc *p; 793 fd_set *ibits, *obits; 794 int nfd; 795 register_t *retval; 796 { 797 caddr_t cibits = (caddr_t)ibits, cobits = (caddr_t)obits; 798 register struct filedesc *fdp = p->p_fd; 799 register int msk, i, j, fd; 800 register fd_mask bits; 801 struct file *fp; 802 int ni, n = 0; 803 static int flag[3] = { FREAD, FWRITE, 0 }; 804 805 /* 806 * if nfd > FD_SETSIZE then the fd_set's contain nfd bits (rounded 807 * up to the next byte) otherwise the fd_set's are normal sized. 808 */ 809 ni = sizeof(fd_set); 810 if (nfd > FD_SETSIZE) 811 ni = howmany(nfd, NFDBITS) * sizeof(fd_mask); 812 813 for (msk = 0; msk < 3; msk++) { 814 fd_set *pibits = (fd_set *)&cibits[msk*ni]; 815 fd_set *pobits = (fd_set *)&cobits[msk*ni]; 816 817 for (i = 0; i < nfd; i += NFDBITS) { 818 bits = pibits->fds_bits[i/NFDBITS]; 819 while ((j = ffs(bits)) && (fd = i + --j) < nfd) { 820 bits &= ~(1 << j); 821 fp = fdp->fd_ofiles[fd]; 822 if (fp == NULL) 823 return (EBADF); 824 if ((*fp->f_ops->fo_select)(fp, flag[msk], p)) { 825 FD_SET(fd, pobits); 826 n++; 827 } 828 } 829 } 830 } 831 *retval = n; 832 return (0); 833 } 834 835 /*ARGSUSED*/ 836 int 837 seltrue(dev, flag, p) 838 dev_t dev; 839 int flag; 840 struct proc *p; 841 { 842 843 return (1); 844 } 845 846 /* 847 * Record a select request. 848 */ 849 void 850 selrecord(selector, sip) 851 struct proc *selector; 852 struct selinfo *sip; 853 { 854 struct proc *p; 855 pid_t mypid; 856 857 mypid = selector->p_pid; 858 if (sip->si_selpid == mypid) 859 return; 860 if (sip->si_selpid && (p = pfind(sip->si_selpid)) && 861 p->p_wchan == (caddr_t)&selwait) 862 sip->si_flags |= SI_COLL; 863 else 864 sip->si_selpid = mypid; 865 } 866 867 /* 868 * Do a wakeup when a selectable event occurs. 869 */ 870 void 871 selwakeup(sip) 872 register struct selinfo *sip; 873 { 874 register struct proc *p; 875 int s; 876 877 if (sip->si_selpid == 0) 878 return; 879 if (sip->si_flags & SI_COLL) { 880 nselcoll++; 881 sip->si_flags &= ~SI_COLL; 882 wakeup((caddr_t)&selwait); 883 } 884 p = pfind(sip->si_selpid); 885 sip->si_selpid = 0; 886 if (p != NULL) { 887 s = splhigh(); 888 if (p->p_wchan == (caddr_t)&selwait) { 889 if (p->p_stat == SSLEEP) 890 setrunnable(p); 891 else 892 unsleep(p); 893 } else if (p->p_flag & P_SELECT) 894 p->p_flag &= ~P_SELECT; 895 splx(s); 896 } 897 } 898 899 void 900 pollscan(p, pl, nfd, retval) 901 struct proc *p; 902 struct pollfd *pl; 903 int nfd; 904 register_t *retval; 905 { 906 register struct filedesc *fdp = p->p_fd; 907 register int msk, i; 908 struct file *fp; 909 int x, n = 0; 910 static int flag[3] = { FREAD, FWRITE, 0 }; 911 static int pflag[3] = { POLLIN|POLLRDNORM, POLLOUT, POLLERR }; 912 913 /* 914 * XXX: We need to implement the rest of the flags. 915 */ 916 for (i = 0; i < nfd; i++) { 917 /* Check the file descriptor. */ 918 if (pl[i].fd < 0) 919 continue; 920 if (pl[i].fd >= fdp->fd_nfiles) { 921 pl[i].revents = POLLNVAL; 922 n++; 923 continue; 924 } 925 926 fp = fdp->fd_ofiles[pl[i].fd]; 927 if (fp == NULL) { 928 pl[i].revents = POLLNVAL; 929 n++; 930 continue; 931 } 932 for (x = msk = 0; msk < 3; msk++) { 933 if (pl[i].events & pflag[msk]) { 934 if ((*fp->f_ops->fo_select)(fp, flag[msk], p)) { 935 pl[i].revents |= pflag[msk] & 936 pl[i].events; 937 x++; 938 } 939 } 940 } 941 if (x) 942 n++; 943 } 944 *retval = n; 945 } 946 947 /* 948 * We are using the same mechanism as select only we encode/decode args 949 * differently. 950 */ 951 int 952 sys_poll(p, v, retval) 953 register struct proc *p; 954 void *v; 955 register_t *retval; 956 { 957 struct sys_poll_args *uap = v; 958 size_t sz; 959 struct pollfd pfds[4], *pl = pfds; 960 int msec = SCARG(uap, timeout); 961 struct timeval atv; 962 int timo, ncoll, i, s, error, error2; 963 extern int nselcoll, selwait; 964 965 /* Standards say no more than MAX_OPEN; this is possibly better. */ 966 if (SCARG(uap, nfds) > min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, 967 maxfiles)) 968 return (EINVAL); 969 970 sz = sizeof(struct pollfd) * SCARG(uap, nfds); 971 972 /* optimize for the default case, of a small nfds value */ 973 if (sz > sizeof(pfds)) 974 pl = (struct pollfd *) malloc(sz, M_TEMP, M_WAITOK); 975 976 if ((error = copyin(SCARG(uap, fds), pl, sz)) != 0) 977 goto bad; 978 979 for (i = 0; i < SCARG(uap, nfds); i++) 980 pl[i].revents = 0; 981 982 if (msec != -1) { 983 atv.tv_sec = msec / 1000; 984 atv.tv_usec = (msec - (atv.tv_sec * 1000)) * 1000; 985 986 if (itimerfix(&atv)) { 987 error = EINVAL; 988 goto done; 989 } 990 s = splclock(); 991 timeradd(&atv, &time, &atv); 992 splx(s); 993 } else 994 timo = 0; 995 996 retry: 997 ncoll = nselcoll; 998 p->p_flag |= P_SELECT; 999 pollscan(p, pl, SCARG(uap, nfds), retval); 1000 if (*retval) 1001 goto done; 1002 if (msec != -1) { 1003 /* 1004 * We have to recalculate the timeout on every retry. 1005 */ 1006 timo = hzto(&atv); 1007 if (timo <= 0) 1008 goto done; 1009 } 1010 s = splhigh(); 1011 if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) { 1012 splx(s); 1013 goto retry; 1014 } 1015 p->p_flag &= ~P_SELECT; 1016 error = tsleep((caddr_t)&selwait, PSOCK | PCATCH, "poll", timo); 1017 splx(s); 1018 if (error == 0) 1019 goto retry; 1020 1021 done: 1022 p->p_flag &= ~P_SELECT; 1023 /* poll is not restarted after signals... */ 1024 if (error == ERESTART) 1025 error = EINTR; 1026 if (error == EWOULDBLOCK) 1027 error = 0; 1028 if ((error2 = copyout(pl, SCARG(uap, fds), sz)) != 0) 1029 error = error2; 1030 bad: 1031 if (pl != pfds) 1032 free((char *) pl, M_TEMP); 1033 return (error); 1034 } 1035 1036