1 /* $OpenBSD: sys_generic.c,v 1.27 2000/09/27 16:13:46 mickey Exp $ */ 2 /* $NetBSD: sys_generic.c,v 1.24 1996/03/29 00:25:32 cgd Exp $ */ 3 4 /* 5 * Copyright (c) 1996 Theo de Raadt 6 * Copyright (c) 1982, 1986, 1989, 1993 7 * The Regents of the University of California. All rights reserved. 8 * (c) UNIX System Laboratories, Inc. 9 * All or some portions of this file are derived from material licensed 10 * to the University of California by American Telephone and Telegraph 11 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 12 * the permission of UNIX System Laboratories, Inc. 13 * 14 * Redistribution and use in source and binary forms, with or without 15 * modification, are permitted provided that the following conditions 16 * are met: 17 * 1. Redistributions of source code must retain the above copyright 18 * notice, this list of conditions and the following disclaimer. 19 * 2. Redistributions in binary form must reproduce the above copyright 20 * notice, this list of conditions and the following disclaimer in the 21 * documentation and/or other materials provided with the distribution. 22 * 3. All advertising materials mentioning features or use of this software 23 * must display the following acknowledgement: 24 * This product includes software developed by the University of 25 * California, Berkeley and its contributors. 26 * 4. Neither the name of the University nor the names of its contributors 27 * may be used to endorse or promote products derived from this software 28 * without specific prior written permission. 29 * 30 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 31 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 32 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 33 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 34 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 35 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 36 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 37 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 38 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 39 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 40 * SUCH DAMAGE. 41 * 42 * @(#)sys_generic.c 8.5 (Berkeley) 1/21/94 43 */ 44 45 #include <sys/param.h> 46 #include <sys/systm.h> 47 #include <sys/filedesc.h> 48 #include <sys/ioctl.h> 49 #include <sys/file.h> 50 #include <sys/proc.h> 51 #include <sys/resourcevar.h> 52 #include <sys/socketvar.h> 53 #include <sys/signalvar.h> 54 #include <sys/uio.h> 55 #include <sys/kernel.h> 56 #include <sys/stat.h> 57 #include <sys/malloc.h> 58 #include <sys/poll.h> 59 #ifdef KTRACE 60 #include <sys/ktrace.h> 61 #endif 62 63 #include <sys/mount.h> 64 #include <sys/syscallargs.h> 65 66 int selscan __P((struct proc *, fd_set *, fd_set *, int, register_t *)); 67 int seltrue __P((dev_t, int, struct proc *)); 68 void pollscan __P((struct proc *, struct pollfd *, int, register_t *)); 69 70 /* 71 * Read system call. 72 */ 73 /* ARGSUSED */ 74 int 75 sys_read(p, v, retval) 76 struct proc *p; 77 void *v; 78 register_t *retval; 79 { 80 struct sys_read_args /* { 81 syscallarg(int) fd; 82 syscallarg(void *) buf; 83 syscallarg(size_t) nbyte; 84 } */ *uap = v; 85 int fd = SCARG(uap, fd); 86 struct file *fp; 87 struct filedesc *fdp = p->p_fd; 88 89 if ((u_int)fd >= fdp->fd_nfiles || 90 (fp = fdp->fd_ofiles[fd]) == NULL || 91 #if notyet 92 (fp->f_iflags & FIF_WANTCLOSE) != 0 || 93 #endif 94 (fp->f_flag & FREAD) == 0) 95 return (EBADF); 96 97 #if notyet 98 FILE_USE(fp); 99 #endif 100 /* dofileread() will unuse the descriptor for us */ 101 return (dofileread(p, fd, fp, SCARG(uap, buf), SCARG(uap, nbyte), 102 &fp->f_offset, retval)); 103 } 104 105 int 106 dofileread(p, fd, fp, buf, nbyte, offset, retval) 107 struct proc *p; 108 int fd; 109 struct file *fp; 110 void *buf; 111 size_t nbyte; 112 off_t *offset; 113 register_t *retval; 114 { 115 struct uio auio; 116 struct iovec aiov; 117 long cnt, error = 0; 118 #ifdef KTRACE 119 struct iovec ktriov; 120 #endif 121 122 aiov.iov_base = (caddr_t)buf; 123 aiov.iov_len = nbyte; 124 auio.uio_iov = &aiov; 125 auio.uio_iovcnt = 1; 126 auio.uio_resid = nbyte; 127 auio.uio_rw = UIO_READ; 128 auio.uio_segflg = UIO_USERSPACE; 129 auio.uio_procp = p; 130 131 /* 132 * Reads return ssize_t because -1 is returned on error. Therefore 133 * we must restrict the length to SSIZE_MAX to avoid garbage return 134 * values. 135 */ 136 if (auio.uio_resid > SSIZE_MAX) { 137 error = EINVAL; 138 goto out; 139 } 140 141 #ifdef KTRACE 142 /* 143 * if tracing, save a copy of iovec 144 */ 145 if (KTRPOINT(p, KTR_GENIO)) 146 ktriov = aiov; 147 #endif 148 cnt = auio.uio_resid; 149 error = (*fp->f_ops->fo_read)(fp, offset, &auio, fp->f_cred); 150 if (error) 151 if (auio.uio_resid != cnt && (error == ERESTART || 152 error == EINTR || error == EWOULDBLOCK)) 153 error = 0; 154 cnt -= auio.uio_resid; 155 #ifdef KTRACE 156 if (KTRPOINT(p, KTR_GENIO) && error == 0) 157 ktrgenio(p->p_tracep, fd, UIO_READ, &ktriov, cnt, error); 158 #endif 159 *retval = cnt; 160 out: 161 #if notyet 162 FILE_UNUSE(fp, p); 163 #endif 164 return (error); 165 } 166 167 /* 168 * Scatter read system call. 169 */ 170 int 171 sys_readv(p, v, retval) 172 struct proc *p; 173 void *v; 174 register_t *retval; 175 { 176 struct sys_readv_args /* { 177 syscallarg(int) fd; 178 syscallarg(const struct iovec *) iovp; 179 syscallarg(int) iovcnt; 180 } */ *uap = v; 181 int fd = SCARG(uap, fd); 182 struct file *fp; 183 struct filedesc *fdp = p->p_fd; 184 185 if ((u_int)fd >= fdp->fd_nfiles || 186 (fp = fdp->fd_ofiles[fd]) == NULL || 187 #if notyet 188 (fp->f_iflags & FIF_WANTCLOSE) != 0 || 189 #endif 190 (fp->f_flag & FREAD) == 0) 191 return (EBADF); 192 193 #if notyet 194 FILE_USE(fp); 195 #endif 196 /* dofilereadv() will unuse the descriptor for us */ 197 return (dofilereadv(p, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt), 198 &fp->f_offset, retval)); 199 } 200 201 int 202 dofilereadv(p, fd, fp, iovp, iovcnt, offset, retval) 203 struct proc *p; 204 int fd; 205 struct file *fp; 206 const struct iovec *iovp; 207 int iovcnt; 208 off_t *offset; 209 register_t *retval; 210 { 211 struct uio auio; 212 struct iovec *iov; 213 struct iovec *needfree; 214 struct iovec aiov[UIO_SMALLIOV]; 215 long i, cnt, error = 0; 216 u_int iovlen; 217 #ifdef KTRACE 218 struct iovec *ktriov = NULL; 219 #endif 220 221 /* note: can't use iovlen until iovcnt is validated */ 222 iovlen = iovcnt * sizeof(struct iovec); 223 if ((u_int)iovcnt > UIO_SMALLIOV) { 224 if ((u_int)iovcnt > IOV_MAX) { 225 error = EINVAL; 226 goto out; 227 } 228 iov = needfree = malloc(iovlen, M_IOV, M_WAITOK); 229 } else if ((u_int)iovcnt > 0) { 230 iov = aiov; 231 needfree = NULL; 232 } else { 233 error = EINVAL; 234 goto out; 235 } 236 237 auio.uio_iov = iov; 238 auio.uio_iovcnt = iovcnt; 239 auio.uio_rw = UIO_READ; 240 auio.uio_segflg = UIO_USERSPACE; 241 auio.uio_procp = p; 242 error = copyin(iovp, iov, iovlen); 243 if (error) 244 goto done; 245 auio.uio_resid = 0; 246 for (i = 0; i < iovcnt; i++) { 247 auio.uio_resid += iov->iov_len; 248 /* 249 * Reads return ssize_t because -1 is returned on error. 250 * Therefore we must restrict the length to SSIZE_MAX to 251 * avoid garbage return values. 252 */ 253 if (iov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) { 254 error = EINVAL; 255 goto done; 256 } 257 iov++; 258 } 259 #ifdef KTRACE 260 /* 261 * if tracing, save a copy of iovec 262 */ 263 if (KTRPOINT(p, KTR_GENIO)) { 264 ktriov = malloc(iovlen, M_TEMP, M_WAITOK); 265 bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen); 266 } 267 #endif 268 cnt = auio.uio_resid; 269 error = (*fp->f_ops->fo_read)(fp, offset, &auio, fp->f_cred); 270 if (error) 271 if (auio.uio_resid != cnt && (error == ERESTART || 272 error == EINTR || error == EWOULDBLOCK)) 273 error = 0; 274 cnt -= auio.uio_resid; 275 #ifdef KTRACE 276 if (ktriov != NULL) { 277 if (error == 0) 278 ktrgenio(p->p_tracep, fd, UIO_READ, ktriov, cnt, 279 error); 280 free(ktriov, M_TEMP); 281 } 282 #endif 283 *retval = cnt; 284 done: 285 if (needfree) 286 free(needfree, M_IOV); 287 out: 288 #if notyet 289 FILE_UNUSE(fp, p); 290 #endif 291 return (error); 292 } 293 294 /* 295 * Write system call 296 */ 297 int 298 sys_write(p, v, retval) 299 struct proc *p; 300 void *v; 301 register_t *retval; 302 { 303 struct sys_write_args /* { 304 syscallarg(int) fd; 305 syscallarg(const void *) buf; 306 syscallarg(size_t) nbyte; 307 } */ *uap = v; 308 int fd = SCARG(uap, fd); 309 struct file *fp; 310 struct filedesc *fdp = p->p_fd; 311 312 if ((u_int)fd >= fdp->fd_nfiles || 313 (fp = fdp->fd_ofiles[fd]) == NULL || 314 #if notyet 315 (fp->f_iflags & FIF_WANTCLOSE) != 0 || 316 #endif 317 (fp->f_flag & FWRITE) == 0) 318 return (EBADF); 319 320 #if notyet 321 FILE_USE(fp); 322 #endif 323 /* dofilewrite() will unuse the descriptor for us */ 324 return (dofilewrite(p, fd, fp, SCARG(uap, buf), SCARG(uap, nbyte), 325 &fp->f_offset, retval)); 326 } 327 328 int 329 dofilewrite(p, fd, fp, buf, nbyte, offset, retval) 330 struct proc *p; 331 int fd; 332 struct file *fp; 333 const void *buf; 334 size_t nbyte; 335 off_t *offset; 336 register_t *retval; 337 { 338 struct uio auio; 339 struct iovec aiov; 340 long cnt, error = 0; 341 #ifdef KTRACE 342 struct iovec ktriov; 343 #endif 344 345 aiov.iov_base = (caddr_t)buf; /* XXX kills const */ 346 aiov.iov_len = nbyte; 347 auio.uio_iov = &aiov; 348 auio.uio_iovcnt = 1; 349 auio.uio_resid = nbyte; 350 auio.uio_rw = UIO_WRITE; 351 auio.uio_segflg = UIO_USERSPACE; 352 auio.uio_procp = p; 353 354 /* 355 * Writes return ssize_t because -1 is returned on error. Therefore 356 * we must restrict the length to SSIZE_MAX to avoid garbage return 357 * values. 358 */ 359 if (auio.uio_resid > SSIZE_MAX) { 360 error = EINVAL; 361 goto out; 362 } 363 364 #ifdef KTRACE 365 /* 366 * if tracing, save a copy of iovec 367 */ 368 if (KTRPOINT(p, KTR_GENIO)) 369 ktriov = aiov; 370 #endif 371 cnt = auio.uio_resid; 372 error = (*fp->f_ops->fo_write)(fp, offset, &auio, fp->f_cred); 373 if (error) { 374 if (auio.uio_resid != cnt && (error == ERESTART || 375 error == EINTR || error == EWOULDBLOCK)) 376 error = 0; 377 if (error == EPIPE) 378 psignal(p, SIGPIPE); 379 } 380 cnt -= auio.uio_resid; 381 #ifdef KTRACE 382 if (KTRPOINT(p, KTR_GENIO) && error == 0) 383 ktrgenio(p->p_tracep, fd, UIO_WRITE, &ktriov, cnt, error); 384 #endif 385 *retval = cnt; 386 out: 387 #if notyet 388 FILE_UNUSE(fp, p); 389 #endif 390 return (error); 391 } 392 393 /* 394 * Gather write system call 395 */ 396 int 397 sys_writev(p, v, retval) 398 struct proc *p; 399 void *v; 400 register_t *retval; 401 { 402 struct sys_writev_args /* { 403 syscallarg(int) fd; 404 syscallarg(const struct iovec *) iovp; 405 syscallarg(int) iovcnt; 406 } */ *uap = v; 407 int fd = SCARG(uap, fd); 408 struct file *fp; 409 struct filedesc *fdp = p->p_fd; 410 411 if ((u_int)fd >= fdp->fd_nfiles || 412 (fp = fdp->fd_ofiles[fd]) == NULL || 413 #if notyet 414 (fp->f_iflags & FIF_WANTCLOSE) != 0 || 415 #endif 416 (fp->f_flag & FWRITE) == 0) 417 return (EBADF); 418 419 #if notyet 420 FILE_USE(fp); 421 #endif 422 /* dofilewritev() will unuse the descriptor for us */ 423 return (dofilewritev(p, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt), 424 &fp->f_offset, retval)); 425 } 426 427 int 428 dofilewritev(p, fd, fp, iovp, iovcnt, offset, retval) 429 struct proc *p; 430 int fd; 431 struct file *fp; 432 const struct iovec *iovp; 433 int iovcnt; 434 off_t *offset; 435 register_t *retval; 436 { 437 struct uio auio; 438 struct iovec *iov; 439 struct iovec *needfree; 440 struct iovec aiov[UIO_SMALLIOV]; 441 long i, cnt, error = 0; 442 u_int iovlen; 443 #ifdef KTRACE 444 struct iovec *ktriov = NULL; 445 #endif 446 447 /* note: can't use iovlen until iovcnt is validated */ 448 iovlen = iovcnt * sizeof(struct iovec); 449 if ((u_int)iovcnt > UIO_SMALLIOV) { 450 if ((u_int)iovcnt > IOV_MAX) 451 return (EINVAL); 452 iov = needfree = malloc(iovlen, M_IOV, M_WAITOK); 453 } else if ((u_int)iovcnt > 0) { 454 iov = aiov; 455 needfree = NULL; 456 } else { 457 error = EINVAL; 458 goto out; 459 } 460 461 auio.uio_iov = iov; 462 auio.uio_iovcnt = iovcnt; 463 auio.uio_rw = UIO_WRITE; 464 auio.uio_segflg = UIO_USERSPACE; 465 auio.uio_procp = p; 466 error = copyin(iovp, iov, iovlen); 467 if (error) 468 goto done; 469 auio.uio_resid = 0; 470 for (i = 0; i < iovcnt; i++) { 471 auio.uio_resid += iov->iov_len; 472 /* 473 * Writes return ssize_t because -1 is returned on error. 474 * Therefore we must restrict the length to SSIZE_MAX to 475 * avoid garbage return values. 476 */ 477 if (iov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) { 478 error = EINVAL; 479 goto done; 480 } 481 iov++; 482 } 483 #ifdef KTRACE 484 /* 485 * if tracing, save a copy of iovec 486 */ 487 if (KTRPOINT(p, KTR_GENIO)) { 488 ktriov = malloc(iovlen, M_TEMP, M_WAITOK); 489 bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen); 490 } 491 #endif 492 cnt = auio.uio_resid; 493 error = (*fp->f_ops->fo_write)(fp, offset, &auio, fp->f_cred); 494 if (error) { 495 if (auio.uio_resid != cnt && (error == ERESTART || 496 error == EINTR || error == EWOULDBLOCK)) 497 error = 0; 498 if (error == EPIPE) 499 psignal(p, SIGPIPE); 500 } 501 cnt -= auio.uio_resid; 502 #ifdef KTRACE 503 if (ktriov != NULL) { 504 if (error == 0) 505 ktrgenio(p->p_tracep, fd, UIO_WRITE, ktriov, cnt, 506 error); 507 free(ktriov, M_TEMP); 508 } 509 #endif 510 *retval = cnt; 511 done: 512 if (needfree) 513 free(needfree, M_IOV); 514 out: 515 #if notyet 516 FILE_UNUSE(fp, p); 517 #endif 518 return (error); 519 } 520 521 /* 522 * Ioctl system call 523 */ 524 /* ARGSUSED */ 525 int 526 sys_ioctl(p, v, retval) 527 struct proc *p; 528 void *v; 529 register_t *retval; 530 { 531 register struct sys_ioctl_args /* { 532 syscallarg(int) fd; 533 syscallarg(u_long) com; 534 syscallarg(caddr_t) data; 535 } */ *uap = v; 536 register struct file *fp; 537 register struct filedesc *fdp; 538 register u_long com; 539 register int error; 540 register u_int size; 541 caddr_t data, memp; 542 int tmp; 543 #define STK_PARAMS 128 544 char stkbuf[STK_PARAMS]; 545 546 fdp = p->p_fd; 547 if ((u_int)SCARG(uap, fd) >= fdp->fd_nfiles || 548 (fp = fdp->fd_ofiles[SCARG(uap, fd)]) == NULL) 549 return (EBADF); 550 551 if ((fp->f_flag & (FREAD | FWRITE)) == 0) 552 return (EBADF); 553 554 switch (com = SCARG(uap, com)) { 555 case FIONCLEX: 556 fdp->fd_ofileflags[SCARG(uap, fd)] &= ~UF_EXCLOSE; 557 return (0); 558 case FIOCLEX: 559 fdp->fd_ofileflags[SCARG(uap, fd)] |= UF_EXCLOSE; 560 return (0); 561 } 562 563 /* 564 * Interpret high order word to find amount of data to be 565 * copied to/from the user's address space. 566 */ 567 size = IOCPARM_LEN(com); 568 if (size > IOCPARM_MAX) 569 return (ENOTTY); 570 memp = NULL; 571 if (size > sizeof (stkbuf)) { 572 memp = (caddr_t)malloc((u_long)size, M_IOCTLOPS, M_WAITOK); 573 data = memp; 574 } else 575 data = stkbuf; 576 if (com&IOC_IN) { 577 if (size) { 578 error = copyin(SCARG(uap, data), data, (u_int)size); 579 if (error) { 580 if (memp) 581 free(memp, M_IOCTLOPS); 582 return (error); 583 } 584 } else 585 *(caddr_t *)data = SCARG(uap, data); 586 } else if ((com&IOC_OUT) && size) 587 /* 588 * Zero the buffer so the user always 589 * gets back something deterministic. 590 */ 591 bzero(data, size); 592 else if (com&IOC_VOID) 593 *(caddr_t *)data = SCARG(uap, data); 594 595 switch (com) { 596 597 case FIONBIO: 598 if ((tmp = *(int *)data) != 0) 599 fp->f_flag |= FNONBLOCK; 600 else 601 fp->f_flag &= ~FNONBLOCK; 602 error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&tmp, p); 603 break; 604 605 case FIOASYNC: 606 if ((tmp = *(int *)data) != 0) 607 fp->f_flag |= FASYNC; 608 else 609 fp->f_flag &= ~FASYNC; 610 error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, (caddr_t)&tmp, p); 611 break; 612 613 case FIOSETOWN: 614 tmp = *(int *)data; 615 if (fp->f_type == DTYPE_SOCKET) { 616 struct socket *so = (struct socket *)fp->f_data; 617 618 so->so_pgid = tmp; 619 so->so_siguid = p->p_cred->p_ruid; 620 so->so_sigeuid = p->p_ucred->cr_uid; 621 error = 0; 622 break; 623 } 624 if (tmp <= 0) { 625 tmp = -tmp; 626 } else { 627 struct proc *p1 = pfind(tmp); 628 if (p1 == 0) { 629 error = ESRCH; 630 break; 631 } 632 tmp = p1->p_pgrp->pg_id; 633 } 634 error = (*fp->f_ops->fo_ioctl) 635 (fp, TIOCSPGRP, (caddr_t)&tmp, p); 636 break; 637 638 case FIOGETOWN: 639 if (fp->f_type == DTYPE_SOCKET) { 640 error = 0; 641 *(int *)data = ((struct socket *)fp->f_data)->so_pgid; 642 break; 643 } 644 error = (*fp->f_ops->fo_ioctl)(fp, TIOCGPGRP, data, p); 645 *(int *)data = -*(int *)data; 646 break; 647 648 default: 649 error = (*fp->f_ops->fo_ioctl)(fp, com, data, p); 650 /* 651 * Copy any data to user, size was 652 * already set and checked above. 653 */ 654 if (error == 0 && (com&IOC_OUT) && size) 655 error = copyout(data, SCARG(uap, data), (u_int)size); 656 break; 657 } 658 if (memp) 659 free(memp, M_IOCTLOPS); 660 return (error); 661 } 662 663 int selwait, nselcoll; 664 665 /* 666 * Select system call. 667 */ 668 int 669 sys_select(p, v, retval) 670 register struct proc *p; 671 void *v; 672 register_t *retval; 673 { 674 register struct sys_select_args /* { 675 syscallarg(int) nd; 676 syscallarg(fd_set *) in; 677 syscallarg(fd_set *) ou; 678 syscallarg(fd_set *) ex; 679 syscallarg(struct timeval *) tv; 680 } */ *uap = v; 681 fd_set bits[6], *pibits[3], *pobits[3]; 682 struct timeval atv; 683 int s, ncoll, error = 0, timo; 684 u_int ni; 685 686 if (SCARG(uap, nd) > p->p_fd->fd_nfiles) { 687 /* forgiving; slightly wrong */ 688 SCARG(uap, nd) = p->p_fd->fd_nfiles; 689 } 690 ni = howmany(SCARG(uap, nd), NFDBITS) * sizeof(fd_mask); 691 if (SCARG(uap, nd) > FD_SETSIZE) { 692 caddr_t mbits; 693 694 if ((mbits = malloc(ni * 6, M_TEMP, M_WAITOK)) == NULL) { 695 error = EINVAL; 696 goto cleanup; 697 } 698 bzero(mbits, ni * 6); 699 pibits[0] = (fd_set *)&mbits[ni * 0]; 700 pibits[1] = (fd_set *)&mbits[ni * 1]; 701 pibits[2] = (fd_set *)&mbits[ni * 2]; 702 pobits[0] = (fd_set *)&mbits[ni * 3]; 703 pobits[1] = (fd_set *)&mbits[ni * 4]; 704 pobits[2] = (fd_set *)&mbits[ni * 5]; 705 } else { 706 bzero((caddr_t)bits, sizeof(bits)); 707 pibits[0] = &bits[0]; 708 pibits[1] = &bits[1]; 709 pibits[2] = &bits[2]; 710 pobits[0] = &bits[3]; 711 pobits[1] = &bits[4]; 712 pobits[2] = &bits[5]; 713 } 714 715 #define getbits(name, x) \ 716 if (SCARG(uap, name) && (error = copyin((caddr_t)SCARG(uap, name), \ 717 (caddr_t)pibits[x], ni))) \ 718 goto done; 719 getbits(in, 0); 720 getbits(ou, 1); 721 getbits(ex, 2); 722 #undef getbits 723 724 if (SCARG(uap, tv)) { 725 error = copyin((caddr_t)SCARG(uap, tv), (caddr_t)&atv, 726 sizeof (atv)); 727 if (error) 728 goto done; 729 if (itimerfix(&atv)) { 730 error = EINVAL; 731 goto done; 732 } 733 s = splclock(); 734 timeradd(&atv, &time, &atv); 735 splx(s); 736 } else 737 timo = 0; 738 retry: 739 ncoll = nselcoll; 740 p->p_flag |= P_SELECT; 741 error = selscan(p, pibits[0], pobits[0], SCARG(uap, nd), retval); 742 if (error || *retval) 743 goto done; 744 if (SCARG(uap, tv)) { 745 /* 746 * We have to recalculate the timeout on every retry. 747 */ 748 timo = hzto(&atv); 749 if (timo <= 0) 750 goto done; 751 } 752 s = splhigh(); 753 if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) { 754 splx(s); 755 goto retry; 756 } 757 p->p_flag &= ~P_SELECT; 758 error = tsleep((caddr_t)&selwait, PSOCK | PCATCH, "select", timo); 759 splx(s); 760 if (error == 0) 761 goto retry; 762 done: 763 p->p_flag &= ~P_SELECT; 764 /* select is not restarted after signals... */ 765 if (error == ERESTART) 766 error = EINTR; 767 if (error == EWOULDBLOCK) 768 error = 0; 769 #define putbits(name, x) \ 770 if (SCARG(uap, name) && (error2 = copyout((caddr_t)pobits[x], \ 771 (caddr_t)SCARG(uap, name), ni))) \ 772 error = error2; 773 if (error == 0) { 774 int error2; 775 776 putbits(in, 0); 777 putbits(ou, 1); 778 putbits(ex, 2); 779 #undef putbits 780 } 781 782 cleanup: 783 if (pibits[0] != &bits[0]) 784 free(pibits[0], M_TEMP); 785 return (error); 786 } 787 788 int 789 selscan(p, ibits, obits, nfd, retval) 790 struct proc *p; 791 fd_set *ibits, *obits; 792 int nfd; 793 register_t *retval; 794 { 795 caddr_t cibits = (caddr_t)ibits, cobits = (caddr_t)obits; 796 register struct filedesc *fdp = p->p_fd; 797 register int msk, i, j, fd; 798 register fd_mask bits; 799 struct file *fp; 800 int ni, n = 0; 801 static int flag[3] = { FREAD, FWRITE, 0 }; 802 803 /* 804 * if nfd > FD_SETSIZE then the fd_set's contain nfd bits (rounded 805 * up to the next byte) otherwise the fd_set's are normal sized. 806 */ 807 ni = sizeof(fd_set); 808 if (nfd > FD_SETSIZE) 809 ni = howmany(nfd, NFDBITS) * sizeof(fd_mask); 810 811 for (msk = 0; msk < 3; msk++) { 812 fd_set *pibits = (fd_set *)&cibits[msk*ni]; 813 fd_set *pobits = (fd_set *)&cobits[msk*ni]; 814 815 for (i = 0; i < nfd; i += NFDBITS) { 816 bits = pibits->fds_bits[i/NFDBITS]; 817 while ((j = ffs(bits)) && (fd = i + --j) < nfd) { 818 bits &= ~(1 << j); 819 fp = fdp->fd_ofiles[fd]; 820 if (fp == NULL) 821 return (EBADF); 822 if ((*fp->f_ops->fo_select)(fp, flag[msk], p)) { 823 FD_SET(fd, pobits); 824 n++; 825 } 826 } 827 } 828 } 829 *retval = n; 830 return (0); 831 } 832 833 /*ARGSUSED*/ 834 int 835 seltrue(dev, flag, p) 836 dev_t dev; 837 int flag; 838 struct proc *p; 839 { 840 841 return (1); 842 } 843 844 /* 845 * Record a select request. 846 */ 847 void 848 selrecord(selector, sip) 849 struct proc *selector; 850 struct selinfo *sip; 851 { 852 struct proc *p; 853 pid_t mypid; 854 855 mypid = selector->p_pid; 856 if (sip->si_selpid == mypid) 857 return; 858 if (sip->si_selpid && (p = pfind(sip->si_selpid)) && 859 p->p_wchan == (caddr_t)&selwait) 860 sip->si_flags |= SI_COLL; 861 else 862 sip->si_selpid = mypid; 863 } 864 865 /* 866 * Do a wakeup when a selectable event occurs. 867 */ 868 void 869 selwakeup(sip) 870 register struct selinfo *sip; 871 { 872 register struct proc *p; 873 int s; 874 875 if (sip->si_selpid == 0) 876 return; 877 if (sip->si_flags & SI_COLL) { 878 nselcoll++; 879 sip->si_flags &= ~SI_COLL; 880 wakeup((caddr_t)&selwait); 881 } 882 p = pfind(sip->si_selpid); 883 sip->si_selpid = 0; 884 if (p != NULL) { 885 s = splhigh(); 886 if (p->p_wchan == (caddr_t)&selwait) { 887 if (p->p_stat == SSLEEP) 888 setrunnable(p); 889 else 890 unsleep(p); 891 } else if (p->p_flag & P_SELECT) 892 p->p_flag &= ~P_SELECT; 893 splx(s); 894 } 895 } 896 897 void 898 pollscan(p, pl, nfd, retval) 899 struct proc *p; 900 struct pollfd *pl; 901 int nfd; 902 register_t *retval; 903 { 904 register struct filedesc *fdp = p->p_fd; 905 register int msk, i; 906 struct file *fp; 907 int x, n = 0; 908 static int flag[3] = { FREAD, FWRITE, 0 }; 909 static int pflag[3] = { POLLIN|POLLRDNORM, POLLOUT, POLLERR }; 910 911 /* 912 * XXX: We need to implement the rest of the flags. 913 */ 914 for (i = 0; i < nfd; i++) { 915 /* Check the file descriptor. */ 916 if (pl[i].fd < 0) 917 continue; 918 if (pl[i].fd >= fdp->fd_nfiles) { 919 pl[i].revents = POLLNVAL; 920 n++; 921 continue; 922 } 923 924 fp = fdp->fd_ofiles[pl[i].fd]; 925 if (fp == NULL) { 926 pl[i].revents = POLLNVAL; 927 n++; 928 continue; 929 } 930 for (x = msk = 0; msk < 3; msk++) { 931 if (pl[i].events & pflag[msk]) { 932 if ((*fp->f_ops->fo_select)(fp, flag[msk], p)) { 933 pl[i].revents |= pflag[msk] & 934 pl[i].events; 935 x++; 936 } 937 } 938 } 939 if (x) 940 n++; 941 } 942 *retval = n; 943 } 944 945 /* 946 * We are using the same mechanism as select only we encode/decode args 947 * differently. 948 */ 949 int 950 sys_poll(p, v, retval) 951 register struct proc *p; 952 void *v; 953 register_t *retval; 954 { 955 struct sys_poll_args *uap = v; 956 size_t sz; 957 struct pollfd pfds[4], *pl = pfds; 958 int msec = SCARG(uap, timeout); 959 struct timeval atv; 960 int timo, ncoll, i, s, error, error2; 961 extern int nselcoll, selwait; 962 963 /* Standards say no more than MAX_OPEN; this is possibly better. */ 964 if (SCARG(uap, nfds) > min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, 965 maxfiles)) 966 return (EINVAL); 967 968 sz = sizeof(struct pollfd) * SCARG(uap, nfds); 969 970 /* optimize for the default case, of a small nfds value */ 971 if (sz > sizeof(pfds)) 972 pl = (struct pollfd *) malloc(sz, M_TEMP, M_WAITOK); 973 974 if ((error = copyin(SCARG(uap, fds), pl, sz)) != 0) 975 goto bad; 976 977 for (i = 0; i < SCARG(uap, nfds); i++) 978 pl[i].revents = 0; 979 980 if (msec != -1) { 981 atv.tv_sec = msec / 1000; 982 atv.tv_usec = (msec - (atv.tv_sec * 1000)) * 1000; 983 984 if (itimerfix(&atv)) { 985 error = EINVAL; 986 goto done; 987 } 988 s = splclock(); 989 timeradd(&atv, &time, &atv); 990 splx(s); 991 } else 992 timo = 0; 993 994 retry: 995 ncoll = nselcoll; 996 p->p_flag |= P_SELECT; 997 pollscan(p, pl, SCARG(uap, nfds), retval); 998 if (*retval) 999 goto done; 1000 if (msec != -1) { 1001 /* 1002 * We have to recalculate the timeout on every retry. 1003 */ 1004 timo = hzto(&atv); 1005 if (timo <= 0) 1006 goto done; 1007 } 1008 s = splhigh(); 1009 if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) { 1010 splx(s); 1011 goto retry; 1012 } 1013 p->p_flag &= ~P_SELECT; 1014 error = tsleep((caddr_t)&selwait, PSOCK | PCATCH, "poll", timo); 1015 splx(s); 1016 if (error == 0) 1017 goto retry; 1018 1019 done: 1020 p->p_flag &= ~P_SELECT; 1021 /* poll is not restarted after signals... */ 1022 if (error == ERESTART) 1023 error = EINTR; 1024 if (error == EWOULDBLOCK) 1025 error = 0; 1026 if ((error2 = copyout(pl, SCARG(uap, fds), sz)) != 0) 1027 error = error2; 1028 bad: 1029 if (pl != pfds) 1030 free((char *) pl, M_TEMP); 1031 return (error); 1032 } 1033 1034