1 /* $NetBSD: sys_generic.c,v 1.64 2002/08/25 23:15:21 thorpej Exp $ */ 2 3 /* 4 * Copyright (c) 1982, 1986, 1989, 1993 5 * The Regents of the University of California. All rights reserved. 6 * (c) UNIX System Laboratories, Inc. 7 * All or some portions of this file are derived from material licensed 8 * to the University of California by American Telephone and Telegraph 9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 10 * the permission of UNIX System Laboratories, Inc. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 3. All advertising materials mentioning features or use of this software 21 * must display the following acknowledgement: 22 * This product includes software developed by the University of 23 * California, Berkeley and its contributors. 24 * 4. Neither the name of the University nor the names of its contributors 25 * may be used to endorse or promote products derived from this software 26 * without specific prior written permission. 27 * 28 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 29 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 30 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 31 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 32 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 33 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 34 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 35 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 37 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 38 * SUCH DAMAGE. 39 * 40 * @(#)sys_generic.c 8.9 (Berkeley) 2/14/95 41 */ 42 43 #include <sys/cdefs.h> 44 __KERNEL_RCSID(0, "$NetBSD: sys_generic.c,v 1.64 2002/08/25 23:15:21 thorpej Exp $"); 45 46 #include "opt_ktrace.h" 47 48 #include <sys/param.h> 49 #include <sys/systm.h> 50 #include <sys/filedesc.h> 51 #include <sys/ioctl.h> 52 #include <sys/file.h> 53 #include <sys/proc.h> 54 #include <sys/socketvar.h> 55 #include <sys/signalvar.h> 56 #include <sys/uio.h> 57 #include <sys/kernel.h> 58 #include <sys/stat.h> 59 #include <sys/malloc.h> 60 #include <sys/poll.h> 61 #ifdef KTRACE 62 #include <sys/ktrace.h> 63 #endif 64 65 #include <sys/mount.h> 66 #include <sys/syscallargs.h> 67 68 int selscan __P((struct proc *, fd_mask *, fd_mask *, int, register_t *)); 69 int pollscan __P((struct proc *, struct pollfd *, int, register_t *)); 70 71 /* 72 * Read system call. 73 */ 74 /* ARGSUSED */ 75 int 76 sys_read(struct proc *p, void *v, register_t *retval) 77 { 78 struct sys_read_args /* { 79 syscallarg(int) fd; 80 syscallarg(void *) buf; 81 syscallarg(size_t) nbyte; 82 } */ *uap = v; 83 int fd; 84 struct file *fp; 85 struct filedesc *fdp; 86 87 fd = SCARG(uap, fd); 88 fdp = p->p_fd; 89 90 if ((fp = fd_getfile(fdp, fd)) == NULL) 91 return (EBADF); 92 93 if ((fp->f_flag & FREAD) == 0) 94 return (EBADF); 95 96 FILE_USE(fp); 97 98 /* dofileread() will unuse the descriptor for us */ 99 return (dofileread(p, fd, fp, SCARG(uap, buf), SCARG(uap, nbyte), 100 &fp->f_offset, FOF_UPDATE_OFFSET, retval)); 101 } 102 103 int 104 dofileread(struct proc *p, int fd, struct file *fp, void *buf, size_t nbyte, 105 off_t *offset, int flags, register_t *retval) 106 { 107 struct uio auio; 108 struct iovec aiov; 109 size_t cnt; 110 int error; 111 #ifdef KTRACE 112 struct iovec ktriov; 113 #endif 114 error = 0; 115 116 aiov.iov_base = (caddr_t)buf; 117 aiov.iov_len = nbyte; 118 auio.uio_iov = &aiov; 119 auio.uio_iovcnt = 1; 120 auio.uio_resid = nbyte; 121 auio.uio_rw = UIO_READ; 122 auio.uio_segflg = UIO_USERSPACE; 123 auio.uio_procp = p; 124 125 /* 126 * Reads return ssize_t because -1 is returned on error. Therefore 127 * we must restrict the length to SSIZE_MAX to avoid garbage return 128 * values. 129 */ 130 if (auio.uio_resid > SSIZE_MAX) { 131 error = EINVAL; 132 goto out; 133 } 134 135 #ifdef KTRACE 136 /* 137 * if tracing, save a copy of iovec 138 */ 139 if (KTRPOINT(p, KTR_GENIO)) 140 ktriov = aiov; 141 #endif 142 cnt = auio.uio_resid; 143 error = (*fp->f_ops->fo_read)(fp, offset, &auio, fp->f_cred, flags); 144 if (error) 145 if (auio.uio_resid != cnt && (error == ERESTART || 146 error == EINTR || error == EWOULDBLOCK)) 147 error = 0; 148 cnt -= auio.uio_resid; 149 #ifdef KTRACE 150 if (KTRPOINT(p, KTR_GENIO) && error == 0) 151 ktrgenio(p, fd, UIO_READ, &ktriov, cnt, error); 152 #endif 153 *retval = cnt; 154 out: 155 FILE_UNUSE(fp, p); 156 return (error); 157 } 158 159 /* 160 * Scatter read system call. 161 */ 162 int 163 sys_readv(struct proc *p, void *v, register_t *retval) 164 { 165 struct sys_readv_args /* { 166 syscallarg(int) fd; 167 syscallarg(const struct iovec *) iovp; 168 syscallarg(int) iovcnt; 169 } */ *uap = v; 170 int fd; 171 struct file *fp; 172 struct filedesc *fdp; 173 174 fd = SCARG(uap, fd); 175 fdp = p->p_fd; 176 177 if ((fp = fd_getfile(fdp, fd)) == NULL) 178 return (EBADF); 179 180 if ((fp->f_flag & FREAD) == 0) 181 return (EBADF); 182 183 FILE_USE(fp); 184 185 /* dofilereadv() will unuse the descriptor for us */ 186 return (dofilereadv(p, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt), 187 &fp->f_offset, FOF_UPDATE_OFFSET, retval)); 188 } 189 190 int 191 dofilereadv(struct proc *p, int fd, struct file *fp, const struct iovec *iovp, 192 int iovcnt, off_t *offset, int flags, register_t *retval) 193 { 194 struct uio auio; 195 struct iovec *iov, *needfree, aiov[UIO_SMALLIOV]; 196 int i, error; 197 size_t cnt; 198 u_int iovlen; 199 #ifdef KTRACE 200 struct iovec *ktriov; 201 #endif 202 203 error = 0; 204 #ifdef KTRACE 205 ktriov = NULL; 206 #endif 207 /* note: can't use iovlen until iovcnt is validated */ 208 iovlen = iovcnt * sizeof(struct iovec); 209 if ((u_int)iovcnt > UIO_SMALLIOV) { 210 if ((u_int)iovcnt > IOV_MAX) { 211 error = EINVAL; 212 goto out; 213 } 214 iov = malloc(iovlen, M_IOV, M_WAITOK); 215 needfree = iov; 216 } else if ((u_int)iovcnt > 0) { 217 iov = aiov; 218 needfree = NULL; 219 } else { 220 error = EINVAL; 221 goto out; 222 } 223 224 auio.uio_iov = iov; 225 auio.uio_iovcnt = iovcnt; 226 auio.uio_rw = UIO_READ; 227 auio.uio_segflg = UIO_USERSPACE; 228 auio.uio_procp = p; 229 error = copyin(iovp, iov, iovlen); 230 if (error) 231 goto done; 232 auio.uio_resid = 0; 233 for (i = 0; i < iovcnt; i++) { 234 auio.uio_resid += iov->iov_len; 235 /* 236 * Reads return ssize_t because -1 is returned on error. 237 * Therefore we must restrict the length to SSIZE_MAX to 238 * avoid garbage return values. 239 */ 240 if (iov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) { 241 error = EINVAL; 242 goto done; 243 } 244 iov++; 245 } 246 #ifdef KTRACE 247 /* 248 * if tracing, save a copy of iovec 249 */ 250 if (KTRPOINT(p, KTR_GENIO)) { 251 ktriov = malloc(iovlen, M_TEMP, M_WAITOK); 252 memcpy((caddr_t)ktriov, (caddr_t)auio.uio_iov, iovlen); 253 } 254 #endif 255 cnt = auio.uio_resid; 256 error = (*fp->f_ops->fo_read)(fp, offset, &auio, fp->f_cred, flags); 257 if (error) 258 if (auio.uio_resid != cnt && (error == ERESTART || 259 error == EINTR || error == EWOULDBLOCK)) 260 error = 0; 261 cnt -= auio.uio_resid; 262 #ifdef KTRACE 263 if (ktriov != NULL) { 264 if (error == 0) 265 ktrgenio(p, fd, UIO_READ, ktriov, cnt, error); 266 free(ktriov, M_TEMP); 267 } 268 #endif 269 *retval = cnt; 270 done: 271 if (needfree) 272 free(needfree, M_IOV); 273 out: 274 FILE_UNUSE(fp, p); 275 return (error); 276 } 277 278 /* 279 * Write system call 280 */ 281 int 282 sys_write(struct proc *p, void *v, register_t *retval) 283 { 284 struct sys_write_args /* { 285 syscallarg(int) fd; 286 syscallarg(const void *) buf; 287 syscallarg(size_t) nbyte; 288 } */ *uap = v; 289 int fd; 290 struct file *fp; 291 struct filedesc *fdp; 292 293 fd = SCARG(uap, fd); 294 fdp = p->p_fd; 295 296 if ((fp = fd_getfile(fdp, fd)) == NULL) 297 return (EBADF); 298 299 if ((fp->f_flag & FWRITE) == 0) 300 return (EBADF); 301 302 FILE_USE(fp); 303 304 /* dofilewrite() will unuse the descriptor for us */ 305 return (dofilewrite(p, fd, fp, SCARG(uap, buf), SCARG(uap, nbyte), 306 &fp->f_offset, FOF_UPDATE_OFFSET, retval)); 307 } 308 309 int 310 dofilewrite(struct proc *p, int fd, struct file *fp, const void *buf, 311 size_t nbyte, off_t *offset, int flags, register_t *retval) 312 { 313 struct uio auio; 314 struct iovec aiov; 315 size_t cnt; 316 int error; 317 #ifdef KTRACE 318 struct iovec ktriov; 319 #endif 320 321 error = 0; 322 aiov.iov_base = (caddr_t)buf; /* XXX kills const */ 323 aiov.iov_len = nbyte; 324 auio.uio_iov = &aiov; 325 auio.uio_iovcnt = 1; 326 auio.uio_resid = nbyte; 327 auio.uio_rw = UIO_WRITE; 328 auio.uio_segflg = UIO_USERSPACE; 329 auio.uio_procp = p; 330 331 /* 332 * Writes return ssize_t because -1 is returned on error. Therefore 333 * we must restrict the length to SSIZE_MAX to avoid garbage return 334 * values. 335 */ 336 if (auio.uio_resid > SSIZE_MAX) { 337 error = EINVAL; 338 goto out; 339 } 340 341 #ifdef KTRACE 342 /* 343 * if tracing, save a copy of iovec 344 */ 345 if (KTRPOINT(p, KTR_GENIO)) 346 ktriov = aiov; 347 #endif 348 cnt = auio.uio_resid; 349 error = (*fp->f_ops->fo_write)(fp, offset, &auio, fp->f_cred, flags); 350 if (error) { 351 if (auio.uio_resid != cnt && (error == ERESTART || 352 error == EINTR || error == EWOULDBLOCK)) 353 error = 0; 354 if (error == EPIPE) 355 psignal(p, SIGPIPE); 356 } 357 cnt -= auio.uio_resid; 358 #ifdef KTRACE 359 if (KTRPOINT(p, KTR_GENIO) && error == 0) 360 ktrgenio(p, fd, UIO_WRITE, &ktriov, cnt, error); 361 #endif 362 *retval = cnt; 363 out: 364 FILE_UNUSE(fp, p); 365 return (error); 366 } 367 368 /* 369 * Gather write system call 370 */ 371 int 372 sys_writev(struct proc *p, void *v, register_t *retval) 373 { 374 struct sys_writev_args /* { 375 syscallarg(int) fd; 376 syscallarg(const struct iovec *) iovp; 377 syscallarg(int) iovcnt; 378 } */ *uap = v; 379 int fd; 380 struct file *fp; 381 struct filedesc *fdp; 382 383 fd = SCARG(uap, fd); 384 fdp = p->p_fd; 385 386 if ((fp = fd_getfile(fdp, fd)) == NULL) 387 return (EBADF); 388 389 if ((fp->f_flag & FWRITE) == 0) 390 return (EBADF); 391 392 FILE_USE(fp); 393 394 /* dofilewritev() will unuse the descriptor for us */ 395 return (dofilewritev(p, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt), 396 &fp->f_offset, FOF_UPDATE_OFFSET, retval)); 397 } 398 399 int 400 dofilewritev(struct proc *p, int fd, struct file *fp, const struct iovec *iovp, 401 int iovcnt, off_t *offset, int flags, register_t *retval) 402 { 403 struct uio auio; 404 struct iovec *iov, *needfree, aiov[UIO_SMALLIOV]; 405 int i, error; 406 size_t cnt; 407 u_int iovlen; 408 #ifdef KTRACE 409 struct iovec *ktriov; 410 #endif 411 412 error = 0; 413 #ifdef KTRACE 414 ktriov = NULL; 415 #endif 416 /* note: can't use iovlen until iovcnt is validated */ 417 iovlen = iovcnt * sizeof(struct iovec); 418 if ((u_int)iovcnt > UIO_SMALLIOV) { 419 if ((u_int)iovcnt > IOV_MAX) { 420 error = EINVAL; 421 goto out; 422 } 423 iov = malloc(iovlen, M_IOV, M_WAITOK); 424 needfree = iov; 425 } else if ((u_int)iovcnt > 0) { 426 iov = aiov; 427 needfree = NULL; 428 } else { 429 error = EINVAL; 430 goto out; 431 } 432 433 auio.uio_iov = iov; 434 auio.uio_iovcnt = iovcnt; 435 auio.uio_rw = UIO_WRITE; 436 auio.uio_segflg = UIO_USERSPACE; 437 auio.uio_procp = p; 438 error = copyin(iovp, iov, iovlen); 439 if (error) 440 goto done; 441 auio.uio_resid = 0; 442 for (i = 0; i < iovcnt; i++) { 443 auio.uio_resid += iov->iov_len; 444 /* 445 * Writes return ssize_t because -1 is returned on error. 446 * Therefore we must restrict the length to SSIZE_MAX to 447 * avoid garbage return values. 448 */ 449 if (iov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) { 450 error = EINVAL; 451 goto done; 452 } 453 iov++; 454 } 455 #ifdef KTRACE 456 /* 457 * if tracing, save a copy of iovec 458 */ 459 if (KTRPOINT(p, KTR_GENIO)) { 460 ktriov = malloc(iovlen, M_TEMP, M_WAITOK); 461 memcpy((caddr_t)ktriov, (caddr_t)auio.uio_iov, iovlen); 462 } 463 #endif 464 cnt = auio.uio_resid; 465 error = (*fp->f_ops->fo_write)(fp, offset, &auio, fp->f_cred, flags); 466 if (error) { 467 if (auio.uio_resid != cnt && (error == ERESTART || 468 error == EINTR || error == EWOULDBLOCK)) 469 error = 0; 470 if (error == EPIPE) 471 psignal(p, SIGPIPE); 472 } 473 cnt -= auio.uio_resid; 474 #ifdef KTRACE 475 if (KTRPOINT(p, KTR_GENIO)) 476 if (error == 0) { 477 ktrgenio(p, fd, UIO_WRITE, ktriov, cnt, error); 478 free(ktriov, M_TEMP); 479 } 480 #endif 481 *retval = cnt; 482 done: 483 if (needfree) 484 free(needfree, M_IOV); 485 out: 486 FILE_UNUSE(fp, p); 487 return (error); 488 } 489 490 /* 491 * Ioctl system call 492 */ 493 /* ARGSUSED */ 494 int 495 sys_ioctl(struct proc *p, void *v, register_t *retval) 496 { 497 struct sys_ioctl_args /* { 498 syscallarg(int) fd; 499 syscallarg(u_long) com; 500 syscallarg(caddr_t) data; 501 } */ *uap = v; 502 struct file *fp; 503 struct filedesc *fdp; 504 u_long com; 505 int error; 506 u_int size; 507 caddr_t data, memp; 508 int tmp; 509 #define STK_PARAMS 128 510 u_long stkbuf[STK_PARAMS/sizeof(u_long)]; 511 512 error = 0; 513 fdp = p->p_fd; 514 515 if ((fp = fd_getfile(fdp, SCARG(uap, fd))) == NULL) 516 return (EBADF); 517 518 FILE_USE(fp); 519 520 if ((fp->f_flag & (FREAD | FWRITE)) == 0) { 521 error = EBADF; 522 goto out; 523 } 524 525 switch (com = SCARG(uap, com)) { 526 case FIONCLEX: 527 fdp->fd_ofileflags[SCARG(uap, fd)] &= ~UF_EXCLOSE; 528 goto out; 529 530 case FIOCLEX: 531 fdp->fd_ofileflags[SCARG(uap, fd)] |= UF_EXCLOSE; 532 goto out; 533 } 534 535 /* 536 * Interpret high order word to find amount of data to be 537 * copied to/from the user's address space. 538 */ 539 size = IOCPARM_LEN(com); 540 if (size > IOCPARM_MAX) { 541 error = ENOTTY; 542 goto out; 543 } 544 memp = NULL; 545 if (size > sizeof(stkbuf)) { 546 memp = (caddr_t)malloc((u_long)size, M_IOCTLOPS, M_WAITOK); 547 data = memp; 548 } else 549 data = (caddr_t)stkbuf; 550 if (com&IOC_IN) { 551 if (size) { 552 error = copyin(SCARG(uap, data), data, size); 553 if (error) { 554 if (memp) 555 free(memp, M_IOCTLOPS); 556 goto out; 557 } 558 } else 559 *(caddr_t *)data = SCARG(uap, data); 560 } else if ((com&IOC_OUT) && size) 561 /* 562 * Zero the buffer so the user always 563 * gets back something deterministic. 564 */ 565 memset(data, 0, size); 566 else if (com&IOC_VOID) 567 *(caddr_t *)data = SCARG(uap, data); 568 569 switch (com) { 570 571 case FIONBIO: 572 if ((tmp = *(int *)data) != 0) 573 fp->f_flag |= FNONBLOCK; 574 else 575 fp->f_flag &= ~FNONBLOCK; 576 error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&tmp, p); 577 break; 578 579 case FIOASYNC: 580 if ((tmp = *(int *)data) != 0) 581 fp->f_flag |= FASYNC; 582 else 583 fp->f_flag &= ~FASYNC; 584 error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, (caddr_t)&tmp, p); 585 break; 586 587 case FIOSETOWN: 588 tmp = *(int *)data; 589 if (fp->f_type == DTYPE_SOCKET) { 590 ((struct socket *)fp->f_data)->so_pgid = tmp; 591 error = 0; 592 break; 593 } 594 if (tmp <= 0) { 595 tmp = -tmp; 596 } else { 597 struct proc *p1 = pfind(tmp); 598 if (p1 == 0) { 599 error = ESRCH; 600 break; 601 } 602 tmp = p1->p_pgrp->pg_id; 603 } 604 error = (*fp->f_ops->fo_ioctl) 605 (fp, TIOCSPGRP, (caddr_t)&tmp, p); 606 break; 607 608 case FIOGETOWN: 609 if (fp->f_type == DTYPE_SOCKET) { 610 error = 0; 611 *(int *)data = ((struct socket *)fp->f_data)->so_pgid; 612 break; 613 } 614 error = (*fp->f_ops->fo_ioctl)(fp, TIOCGPGRP, data, p); 615 if (error == 0) 616 *(int *)data = -*(int *)data; 617 break; 618 619 default: 620 error = (*fp->f_ops->fo_ioctl)(fp, com, data, p); 621 /* 622 * Copy any data to user, size was 623 * already set and checked above. 624 */ 625 if (error == 0 && (com&IOC_OUT) && size) 626 error = copyout(data, SCARG(uap, data), size); 627 break; 628 } 629 if (memp) 630 free(memp, M_IOCTLOPS); 631 out: 632 FILE_UNUSE(fp, p); 633 switch (error) { 634 case -1: 635 printf("sys_ioctl: _IO%s%s('%c', %lu, %lu) returned -1: " 636 "pid=%d comm=%s\n", 637 (com & IOC_IN) ? "W" : "", (com & IOC_OUT) ? "R" : "", 638 (char)IOCGROUP(com), (com & 0xff), IOCPARM_LEN(com), 639 p->p_pid, p->p_comm); 640 /* FALLTHROUGH */ 641 case EPASSTHROUGH: 642 error = ENOTTY; 643 /* FALLTHROUGH */ 644 default: 645 return (error); 646 } 647 } 648 649 int selwait, nselcoll; 650 651 /* 652 * Select system call. 653 */ 654 int 655 sys_select(struct proc *p, void *v, register_t *retval) 656 { 657 struct sys_select_args /* { 658 syscallarg(int) nd; 659 syscallarg(fd_set *) in; 660 syscallarg(fd_set *) ou; 661 syscallarg(fd_set *) ex; 662 syscallarg(struct timeval *) tv; 663 } */ *uap = v; 664 caddr_t bits; 665 char smallbits[howmany(FD_SETSIZE, NFDBITS) * 666 sizeof(fd_mask) * 6]; 667 struct timeval atv; 668 int s, ncoll, error, timo; 669 size_t ni; 670 671 error = 0; 672 if (SCARG(uap, nd) < 0) 673 return (EINVAL); 674 if (SCARG(uap, nd) > p->p_fd->fd_nfiles) { 675 /* forgiving; slightly wrong */ 676 SCARG(uap, nd) = p->p_fd->fd_nfiles; 677 } 678 ni = howmany(SCARG(uap, nd), NFDBITS) * sizeof(fd_mask); 679 if (ni * 6 > sizeof(smallbits)) 680 bits = malloc(ni * 6, M_TEMP, M_WAITOK); 681 else 682 bits = smallbits; 683 684 #define getbits(name, x) \ 685 if (SCARG(uap, name)) { \ 686 error = copyin(SCARG(uap, name), bits + ni * x, ni); \ 687 if (error) \ 688 goto done; \ 689 } else \ 690 memset(bits + ni * x, 0, ni); 691 getbits(in, 0); 692 getbits(ou, 1); 693 getbits(ex, 2); 694 #undef getbits 695 696 if (SCARG(uap, tv)) { 697 error = copyin(SCARG(uap, tv), (caddr_t)&atv, 698 sizeof(atv)); 699 if (error) 700 goto done; 701 if (itimerfix(&atv)) { 702 error = EINVAL; 703 goto done; 704 } 705 s = splclock(); 706 timeradd(&atv, &time, &atv); 707 splx(s); 708 } else 709 timo = 0; 710 retry: 711 ncoll = nselcoll; 712 p->p_flag |= P_SELECT; 713 error = selscan(p, (fd_mask *)(bits + ni * 0), 714 (fd_mask *)(bits + ni * 3), SCARG(uap, nd), retval); 715 if (error || *retval) 716 goto done; 717 if (SCARG(uap, tv)) { 718 /* 719 * We have to recalculate the timeout on every retry. 720 */ 721 timo = hzto(&atv); 722 if (timo <= 0) 723 goto done; 724 } 725 s = splsched(); 726 if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) { 727 splx(s); 728 goto retry; 729 } 730 p->p_flag &= ~P_SELECT; 731 error = tsleep((caddr_t)&selwait, PSOCK | PCATCH, "select", timo); 732 splx(s); 733 if (error == 0) 734 goto retry; 735 done: 736 p->p_flag &= ~P_SELECT; 737 /* select is not restarted after signals... */ 738 if (error == ERESTART) 739 error = EINTR; 740 if (error == EWOULDBLOCK) 741 error = 0; 742 if (error == 0) { 743 744 #define putbits(name, x) \ 745 if (SCARG(uap, name)) { \ 746 error = copyout(bits + ni * x, SCARG(uap, name), ni); \ 747 if (error) \ 748 goto out; \ 749 } 750 putbits(in, 3); 751 putbits(ou, 4); 752 putbits(ex, 5); 753 #undef putbits 754 } 755 out: 756 if (ni * 6 > sizeof(smallbits)) 757 free(bits, M_TEMP); 758 return (error); 759 } 760 761 int 762 selscan(struct proc *p, fd_mask *ibitp, fd_mask *obitp, int nfd, 763 register_t *retval) 764 { 765 struct filedesc *fdp; 766 int msk, i, j, fd, n; 767 fd_mask ibits, obits; 768 struct file *fp; 769 static const int flag[3] = { POLLRDNORM | POLLHUP | POLLERR, 770 POLLWRNORM | POLLHUP | POLLERR, 771 POLLRDBAND }; 772 773 fdp = p->p_fd; 774 n = 0; 775 for (msk = 0; msk < 3; msk++) { 776 for (i = 0; i < nfd; i += NFDBITS) { 777 ibits = *ibitp++; 778 obits = 0; 779 while ((j = ffs(ibits)) && (fd = i + --j) < nfd) { 780 ibits &= ~(1 << j); 781 if ((fp = fd_getfile(fdp, fd)) == NULL) 782 return (EBADF); 783 FILE_USE(fp); 784 if ((*fp->f_ops->fo_poll)(fp, flag[msk], p)) { 785 obits |= (1 << j); 786 n++; 787 } 788 FILE_UNUSE(fp, p); 789 } 790 *obitp++ = obits; 791 } 792 } 793 *retval = n; 794 return (0); 795 } 796 797 /* 798 * Poll system call. 799 */ 800 int 801 sys_poll(struct proc *p, void *v, register_t *retval) 802 { 803 struct sys_poll_args /* { 804 syscallarg(struct pollfd *) fds; 805 syscallarg(u_int) nfds; 806 syscallarg(int) timeout; 807 } */ *uap = v; 808 caddr_t bits; 809 char smallbits[32 * sizeof(struct pollfd)]; 810 struct timeval atv; 811 int s, ncoll, error, timo; 812 size_t ni; 813 814 error = 0; 815 if (SCARG(uap, nfds) > p->p_fd->fd_nfiles) { 816 /* forgiving; slightly wrong */ 817 SCARG(uap, nfds) = p->p_fd->fd_nfiles; 818 } 819 ni = SCARG(uap, nfds) * sizeof(struct pollfd); 820 if (ni > sizeof(smallbits)) 821 bits = malloc(ni, M_TEMP, M_WAITOK); 822 else 823 bits = smallbits; 824 825 error = copyin(SCARG(uap, fds), bits, ni); 826 if (error) 827 goto done; 828 829 if (SCARG(uap, timeout) != INFTIM) { 830 atv.tv_sec = SCARG(uap, timeout) / 1000; 831 atv.tv_usec = (SCARG(uap, timeout) % 1000) * 1000; 832 if (itimerfix(&atv)) { 833 error = EINVAL; 834 goto done; 835 } 836 s = splclock(); 837 timeradd(&atv, &time, &atv); 838 splx(s); 839 } else 840 timo = 0; 841 retry: 842 ncoll = nselcoll; 843 p->p_flag |= P_SELECT; 844 error = pollscan(p, (struct pollfd *)bits, SCARG(uap, nfds), retval); 845 if (error || *retval) 846 goto done; 847 if (SCARG(uap, timeout) != INFTIM) { 848 /* 849 * We have to recalculate the timeout on every retry. 850 */ 851 timo = hzto(&atv); 852 if (timo <= 0) 853 goto done; 854 } 855 s = splsched(); 856 if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) { 857 splx(s); 858 goto retry; 859 } 860 p->p_flag &= ~P_SELECT; 861 error = tsleep((caddr_t)&selwait, PSOCK | PCATCH, "select", timo); 862 splx(s); 863 if (error == 0) 864 goto retry; 865 done: 866 p->p_flag &= ~P_SELECT; 867 /* poll is not restarted after signals... */ 868 if (error == ERESTART) 869 error = EINTR; 870 if (error == EWOULDBLOCK) 871 error = 0; 872 if (error == 0) { 873 error = copyout(bits, SCARG(uap, fds), ni); 874 if (error) 875 goto out; 876 } 877 out: 878 if (ni > sizeof(smallbits)) 879 free(bits, M_TEMP); 880 return (error); 881 } 882 883 int 884 pollscan(struct proc *p, struct pollfd *fds, int nfd, register_t *retval) 885 { 886 struct filedesc *fdp; 887 int i, n; 888 struct file *fp; 889 890 fdp = p->p_fd; 891 n = 0; 892 for (i = 0; i < nfd; i++, fds++) { 893 if (fds->fd >= fdp->fd_nfiles) { 894 fds->revents = POLLNVAL; 895 n++; 896 } else if (fds->fd < 0) { 897 fds->revents = 0; 898 } else { 899 if ((fp = fd_getfile(fdp, fds->fd)) == NULL) { 900 fds->revents = POLLNVAL; 901 n++; 902 } else { 903 FILE_USE(fp); 904 fds->revents = (*fp->f_ops->fo_poll)(fp, 905 fds->events | POLLERR | POLLHUP, p); 906 if (fds->revents != 0) 907 n++; 908 FILE_UNUSE(fp, p); 909 } 910 } 911 } 912 *retval = n; 913 return (0); 914 } 915 916 /*ARGSUSED*/ 917 int 918 seltrue(dev_t dev, int events, struct proc *p) 919 { 920 921 return (events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM)); 922 } 923 924 /* 925 * Record a select request. 926 */ 927 void 928 selrecord(struct proc *selector, struct selinfo *sip) 929 { 930 struct proc *p; 931 pid_t mypid; 932 933 mypid = selector->p_pid; 934 if (sip->si_pid == mypid) 935 return; 936 if (sip->si_pid && (p = pfind(sip->si_pid)) && 937 p->p_wchan == (caddr_t)&selwait) 938 sip->si_flags |= SI_COLL; 939 else { 940 sip->si_flags &= ~SI_COLL; 941 sip->si_pid = mypid; 942 } 943 } 944 945 /* 946 * Do a wakeup when a selectable event occurs. 947 */ 948 void 949 selwakeup(sip) 950 struct selinfo *sip; 951 { 952 struct proc *p; 953 int s; 954 955 if (sip->si_pid == 0) 956 return; 957 if (sip->si_flags & SI_COLL) { 958 nselcoll++; 959 sip->si_flags &= ~SI_COLL; 960 wakeup((caddr_t)&selwait); 961 } 962 p = pfind(sip->si_pid); 963 sip->si_pid = 0; 964 if (p != NULL) { 965 SCHED_LOCK(s); 966 if (p->p_wchan == (caddr_t)&selwait) { 967 if (p->p_stat == SSLEEP) 968 setrunnable(p); 969 else 970 unsleep(p); 971 } else if (p->p_flag & P_SELECT) 972 p->p_flag &= ~P_SELECT; 973 SCHED_UNLOCK(s); 974 } 975 } 976