1 /* $NetBSD: sys_generic.c,v 1.57 2001/06/28 05:45:49 atatat Exp $ */ 2 3 /* 4 * Copyright (c) 1982, 1986, 1989, 1993 5 * The Regents of the University of California. All rights reserved. 6 * (c) UNIX System Laboratories, Inc. 7 * All or some portions of this file are derived from material licensed 8 * to the University of California by American Telephone and Telegraph 9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 10 * the permission of UNIX System Laboratories, Inc. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 3. All advertising materials mentioning features or use of this software 21 * must display the following acknowledgement: 22 * This product includes software developed by the University of 23 * California, Berkeley and its contributors. 24 * 4. Neither the name of the University nor the names of its contributors 25 * may be used to endorse or promote products derived from this software 26 * without specific prior written permission. 27 * 28 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 29 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 30 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 31 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 32 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 33 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 34 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 35 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 37 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 38 * SUCH DAMAGE. 39 * 40 * @(#)sys_generic.c 8.9 (Berkeley) 2/14/95 41 */ 42 43 #include "opt_ktrace.h" 44 45 #include <sys/param.h> 46 #include <sys/systm.h> 47 #include <sys/filedesc.h> 48 #include <sys/ioctl.h> 49 #include <sys/file.h> 50 #include <sys/proc.h> 51 #include <sys/socketvar.h> 52 #include <sys/signalvar.h> 53 #include <sys/uio.h> 54 #include <sys/kernel.h> 55 #include <sys/stat.h> 56 #include <sys/malloc.h> 57 #include <sys/poll.h> 58 #ifdef KTRACE 59 #include <sys/ktrace.h> 60 #endif 61 62 #include <sys/mount.h> 63 #include <sys/syscallargs.h> 64 65 int selscan __P((struct proc *, fd_mask *, fd_mask *, int, register_t *)); 66 int pollscan __P((struct proc *, struct pollfd *, int, register_t *)); 67 68 /* 69 * Read system call. 70 */ 71 /* ARGSUSED */ 72 int 73 sys_read(struct proc *p, void *v, register_t *retval) 74 { 75 struct sys_read_args /* { 76 syscallarg(int) fd; 77 syscallarg(void *) buf; 78 syscallarg(size_t) nbyte; 79 } */ *uap = v; 80 int fd; 81 struct file *fp; 82 struct filedesc *fdp; 83 84 fd = SCARG(uap, fd); 85 fdp = p->p_fd; 86 87 if ((fp = fd_getfile(fdp, fd)) == NULL) 88 return (EBADF); 89 90 if ((fp->f_flag & FREAD) == 0) 91 return (EBADF); 92 93 FILE_USE(fp); 94 95 /* dofileread() will unuse the descriptor for us */ 96 return (dofileread(p, fd, fp, SCARG(uap, buf), SCARG(uap, nbyte), 97 &fp->f_offset, FOF_UPDATE_OFFSET, retval)); 98 } 99 100 int 101 dofileread(struct proc *p, int fd, struct file *fp, void *buf, size_t nbyte, 102 off_t *offset, int flags, register_t *retval) 103 { 104 struct uio auio; 105 struct iovec aiov; 106 long cnt, error; 107 #ifdef KTRACE 108 struct iovec ktriov; 109 #endif 110 error = 0; 111 112 aiov.iov_base = (caddr_t)buf; 113 aiov.iov_len = nbyte; 114 auio.uio_iov = &aiov; 115 auio.uio_iovcnt = 1; 116 auio.uio_resid = nbyte; 117 auio.uio_rw = UIO_READ; 118 auio.uio_segflg = UIO_USERSPACE; 119 auio.uio_procp = p; 120 121 /* 122 * Reads return ssize_t because -1 is returned on error. Therefore 123 * we must restrict the length to SSIZE_MAX to avoid garbage return 124 * values. 125 */ 126 if (auio.uio_resid > SSIZE_MAX) { 127 error = EINVAL; 128 goto out; 129 } 130 131 #ifdef KTRACE 132 /* 133 * if tracing, save a copy of iovec 134 */ 135 if (KTRPOINT(p, KTR_GENIO)) 136 ktriov = aiov; 137 #endif 138 cnt = auio.uio_resid; 139 error = (*fp->f_ops->fo_read)(fp, offset, &auio, fp->f_cred, flags); 140 if (error) 141 if (auio.uio_resid != cnt && (error == ERESTART || 142 error == EINTR || error == EWOULDBLOCK)) 143 error = 0; 144 cnt -= auio.uio_resid; 145 #ifdef KTRACE 146 if (KTRPOINT(p, KTR_GENIO) && error == 0) 147 ktrgenio(p, fd, UIO_READ, &ktriov, cnt, error); 148 #endif 149 *retval = cnt; 150 out: 151 FILE_UNUSE(fp, p); 152 return (error); 153 } 154 155 /* 156 * Scatter read system call. 157 */ 158 int 159 sys_readv(struct proc *p, void *v, register_t *retval) 160 { 161 struct sys_readv_args /* { 162 syscallarg(int) fd; 163 syscallarg(const struct iovec *) iovp; 164 syscallarg(int) iovcnt; 165 } */ *uap = v; 166 int fd; 167 struct file *fp; 168 struct filedesc *fdp; 169 170 fd = SCARG(uap, fd); 171 fdp = p->p_fd; 172 173 if ((fp = fd_getfile(fdp, fd)) == NULL) 174 return (EBADF); 175 176 if ((fp->f_flag & FREAD) == 0) 177 return (EBADF); 178 179 FILE_USE(fp); 180 181 /* dofilereadv() will unuse the descriptor for us */ 182 return (dofilereadv(p, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt), 183 &fp->f_offset, FOF_UPDATE_OFFSET, retval)); 184 } 185 186 int 187 dofilereadv(struct proc *p, int fd, struct file *fp, const struct iovec *iovp, 188 int iovcnt, off_t *offset, int flags, register_t *retval) 189 { 190 struct uio auio; 191 struct iovec *iov, *needfree, aiov[UIO_SMALLIOV]; 192 long i, cnt, error; 193 u_int iovlen; 194 #ifdef KTRACE 195 struct iovec *ktriov; 196 #endif 197 198 error = 0; 199 #ifdef KTRACE 200 ktriov = NULL; 201 #endif 202 /* note: can't use iovlen until iovcnt is validated */ 203 iovlen = iovcnt * sizeof(struct iovec); 204 if ((u_int)iovcnt > UIO_SMALLIOV) { 205 if ((u_int)iovcnt > IOV_MAX) { 206 error = EINVAL; 207 goto out; 208 } 209 iov = malloc(iovlen, M_IOV, M_WAITOK); 210 needfree = iov; 211 } else if ((u_int)iovcnt > 0) { 212 iov = aiov; 213 needfree = NULL; 214 } else { 215 error = EINVAL; 216 goto out; 217 } 218 219 auio.uio_iov = iov; 220 auio.uio_iovcnt = iovcnt; 221 auio.uio_rw = UIO_READ; 222 auio.uio_segflg = UIO_USERSPACE; 223 auio.uio_procp = p; 224 error = copyin(iovp, iov, iovlen); 225 if (error) 226 goto done; 227 auio.uio_resid = 0; 228 for (i = 0; i < iovcnt; i++) { 229 auio.uio_resid += iov->iov_len; 230 /* 231 * Reads return ssize_t because -1 is returned on error. 232 * Therefore we must restrict the length to SSIZE_MAX to 233 * avoid garbage return values. 234 */ 235 if (iov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) { 236 error = EINVAL; 237 goto done; 238 } 239 iov++; 240 } 241 #ifdef KTRACE 242 /* 243 * if tracing, save a copy of iovec 244 */ 245 if (KTRPOINT(p, KTR_GENIO)) { 246 ktriov = malloc(iovlen, M_TEMP, M_WAITOK); 247 memcpy((caddr_t)ktriov, (caddr_t)auio.uio_iov, iovlen); 248 } 249 #endif 250 cnt = auio.uio_resid; 251 error = (*fp->f_ops->fo_read)(fp, offset, &auio, fp->f_cred, flags); 252 if (error) 253 if (auio.uio_resid != cnt && (error == ERESTART || 254 error == EINTR || error == EWOULDBLOCK)) 255 error = 0; 256 cnt -= auio.uio_resid; 257 #ifdef KTRACE 258 if (KTRPOINT(p, KTR_GENIO)) 259 if (error == 0) { 260 ktrgenio(p, fd, UIO_READ, ktriov, cnt, error); 261 free(ktriov, M_TEMP); 262 } 263 #endif 264 *retval = cnt; 265 done: 266 if (needfree) 267 free(needfree, M_IOV); 268 out: 269 FILE_UNUSE(fp, p); 270 return (error); 271 } 272 273 /* 274 * Write system call 275 */ 276 int 277 sys_write(struct proc *p, void *v, register_t *retval) 278 { 279 struct sys_write_args /* { 280 syscallarg(int) fd; 281 syscallarg(const void *) buf; 282 syscallarg(size_t) nbyte; 283 } */ *uap = v; 284 int fd; 285 struct file *fp; 286 struct filedesc *fdp; 287 288 fd = SCARG(uap, fd); 289 fdp = p->p_fd; 290 291 if ((fp = fd_getfile(fdp, fd)) == NULL) 292 return (EBADF); 293 294 if ((fp->f_flag & FWRITE) == 0) 295 return (EBADF); 296 297 FILE_USE(fp); 298 299 /* dofilewrite() will unuse the descriptor for us */ 300 return (dofilewrite(p, fd, fp, SCARG(uap, buf), SCARG(uap, nbyte), 301 &fp->f_offset, FOF_UPDATE_OFFSET, retval)); 302 } 303 304 int 305 dofilewrite(struct proc *p, int fd, struct file *fp, const void *buf, 306 size_t nbyte, off_t *offset, int flags, register_t *retval) 307 { 308 struct uio auio; 309 struct iovec aiov; 310 long cnt, error; 311 #ifdef KTRACE 312 struct iovec ktriov; 313 #endif 314 315 error = 0; 316 aiov.iov_base = (caddr_t)buf; /* XXX kills const */ 317 aiov.iov_len = nbyte; 318 auio.uio_iov = &aiov; 319 auio.uio_iovcnt = 1; 320 auio.uio_resid = nbyte; 321 auio.uio_rw = UIO_WRITE; 322 auio.uio_segflg = UIO_USERSPACE; 323 auio.uio_procp = p; 324 325 /* 326 * Writes return ssize_t because -1 is returned on error. Therefore 327 * we must restrict the length to SSIZE_MAX to avoid garbage return 328 * values. 329 */ 330 if (auio.uio_resid > SSIZE_MAX) { 331 error = EINVAL; 332 goto out; 333 } 334 335 #ifdef KTRACE 336 /* 337 * if tracing, save a copy of iovec 338 */ 339 if (KTRPOINT(p, KTR_GENIO)) 340 ktriov = aiov; 341 #endif 342 cnt = auio.uio_resid; 343 error = (*fp->f_ops->fo_write)(fp, offset, &auio, fp->f_cred, flags); 344 if (error) { 345 if (auio.uio_resid != cnt && (error == ERESTART || 346 error == EINTR || error == EWOULDBLOCK)) 347 error = 0; 348 if (error == EPIPE) 349 psignal(p, SIGPIPE); 350 } 351 cnt -= auio.uio_resid; 352 #ifdef KTRACE 353 if (KTRPOINT(p, KTR_GENIO) && error == 0) 354 ktrgenio(p, fd, UIO_WRITE, &ktriov, cnt, error); 355 #endif 356 *retval = cnt; 357 out: 358 FILE_UNUSE(fp, p); 359 return (error); 360 } 361 362 /* 363 * Gather write system call 364 */ 365 int 366 sys_writev(struct proc *p, void *v, register_t *retval) 367 { 368 struct sys_writev_args /* { 369 syscallarg(int) fd; 370 syscallarg(const struct iovec *) iovp; 371 syscallarg(int) iovcnt; 372 } */ *uap = v; 373 int fd; 374 struct file *fp; 375 struct filedesc *fdp; 376 377 fd = SCARG(uap, fd); 378 fdp = p->p_fd; 379 380 if ((fp = fd_getfile(fdp, fd)) == NULL) 381 return (EBADF); 382 383 if ((fp->f_flag & FWRITE) == 0) 384 return (EBADF); 385 386 FILE_USE(fp); 387 388 /* dofilewritev() will unuse the descriptor for us */ 389 return (dofilewritev(p, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt), 390 &fp->f_offset, FOF_UPDATE_OFFSET, retval)); 391 } 392 393 int 394 dofilewritev(struct proc *p, int fd, struct file *fp, const struct iovec *iovp, 395 int iovcnt, off_t *offset, int flags, register_t *retval) 396 { 397 struct uio auio; 398 struct iovec *iov, *needfree, aiov[UIO_SMALLIOV]; 399 long i, cnt, error; 400 u_int iovlen; 401 #ifdef KTRACE 402 struct iovec *ktriov; 403 #endif 404 405 error = 0; 406 #ifdef KTRACE 407 ktriov = NULL; 408 #endif 409 /* note: can't use iovlen until iovcnt is validated */ 410 iovlen = iovcnt * sizeof(struct iovec); 411 if ((u_int)iovcnt > UIO_SMALLIOV) { 412 if ((u_int)iovcnt > IOV_MAX) 413 return (EINVAL); 414 iov = malloc(iovlen, M_IOV, M_WAITOK); 415 needfree = iov; 416 } else if ((u_int)iovcnt > 0) { 417 iov = aiov; 418 needfree = NULL; 419 } else { 420 error = EINVAL; 421 goto out; 422 } 423 424 auio.uio_iov = iov; 425 auio.uio_iovcnt = iovcnt; 426 auio.uio_rw = UIO_WRITE; 427 auio.uio_segflg = UIO_USERSPACE; 428 auio.uio_procp = p; 429 error = copyin(iovp, iov, iovlen); 430 if (error) 431 goto done; 432 auio.uio_resid = 0; 433 for (i = 0; i < iovcnt; i++) { 434 auio.uio_resid += iov->iov_len; 435 /* 436 * Writes return ssize_t because -1 is returned on error. 437 * Therefore we must restrict the length to SSIZE_MAX to 438 * avoid garbage return values. 439 */ 440 if (iov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) { 441 error = EINVAL; 442 goto done; 443 } 444 iov++; 445 } 446 #ifdef KTRACE 447 /* 448 * if tracing, save a copy of iovec 449 */ 450 if (KTRPOINT(p, KTR_GENIO)) { 451 ktriov = malloc(iovlen, M_TEMP, M_WAITOK); 452 memcpy((caddr_t)ktriov, (caddr_t)auio.uio_iov, iovlen); 453 } 454 #endif 455 cnt = auio.uio_resid; 456 error = (*fp->f_ops->fo_write)(fp, offset, &auio, fp->f_cred, flags); 457 if (error) { 458 if (auio.uio_resid != cnt && (error == ERESTART || 459 error == EINTR || error == EWOULDBLOCK)) 460 error = 0; 461 if (error == EPIPE) 462 psignal(p, SIGPIPE); 463 } 464 cnt -= auio.uio_resid; 465 #ifdef KTRACE 466 if (KTRPOINT(p, KTR_GENIO)) 467 if (error == 0) { 468 ktrgenio(p, fd, UIO_WRITE, ktriov, cnt, error); 469 free(ktriov, M_TEMP); 470 } 471 #endif 472 *retval = cnt; 473 done: 474 if (needfree) 475 free(needfree, M_IOV); 476 out: 477 FILE_UNUSE(fp, p); 478 return (error); 479 } 480 481 /* 482 * Ioctl system call 483 */ 484 /* ARGSUSED */ 485 int 486 sys_ioctl(struct proc *p, void *v, register_t *retval) 487 { 488 struct sys_ioctl_args /* { 489 syscallarg(int) fd; 490 syscallarg(u_long) com; 491 syscallarg(caddr_t) data; 492 } */ *uap = v; 493 struct file *fp; 494 struct filedesc *fdp; 495 u_long com; 496 int error; 497 u_int size; 498 caddr_t data, memp; 499 int tmp; 500 #define STK_PARAMS 128 501 u_long stkbuf[STK_PARAMS/sizeof(u_long)]; 502 503 error = 0; 504 fdp = p->p_fd; 505 506 if ((fp = fd_getfile(fdp, SCARG(uap, fd))) == NULL) 507 return (EBADF); 508 509 FILE_USE(fp); 510 511 if ((fp->f_flag & (FREAD | FWRITE)) == 0) { 512 error = EBADF; 513 goto out; 514 } 515 516 switch (com = SCARG(uap, com)) { 517 case FIONCLEX: 518 fdp->fd_ofileflags[SCARG(uap, fd)] &= ~UF_EXCLOSE; 519 goto out; 520 521 case FIOCLEX: 522 fdp->fd_ofileflags[SCARG(uap, fd)] |= UF_EXCLOSE; 523 goto out; 524 } 525 526 /* 527 * Interpret high order word to find amount of data to be 528 * copied to/from the user's address space. 529 */ 530 size = IOCPARM_LEN(com); 531 if (size > IOCPARM_MAX) { 532 error = ENOTTY; 533 goto out; 534 } 535 memp = NULL; 536 if (size > sizeof(stkbuf)) { 537 memp = (caddr_t)malloc((u_long)size, M_IOCTLOPS, M_WAITOK); 538 data = memp; 539 } else 540 data = (caddr_t)stkbuf; 541 if (com&IOC_IN) { 542 if (size) { 543 error = copyin(SCARG(uap, data), data, size); 544 if (error) { 545 if (memp) 546 free(memp, M_IOCTLOPS); 547 goto out; 548 } 549 } else 550 *(caddr_t *)data = SCARG(uap, data); 551 } else if ((com&IOC_OUT) && size) 552 /* 553 * Zero the buffer so the user always 554 * gets back something deterministic. 555 */ 556 memset(data, 0, size); 557 else if (com&IOC_VOID) 558 *(caddr_t *)data = SCARG(uap, data); 559 560 switch (com) { 561 562 case FIONBIO: 563 if ((tmp = *(int *)data) != 0) 564 fp->f_flag |= FNONBLOCK; 565 else 566 fp->f_flag &= ~FNONBLOCK; 567 error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&tmp, p); 568 break; 569 570 case FIOASYNC: 571 if ((tmp = *(int *)data) != 0) 572 fp->f_flag |= FASYNC; 573 else 574 fp->f_flag &= ~FASYNC; 575 error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, (caddr_t)&tmp, p); 576 break; 577 578 case FIOSETOWN: 579 tmp = *(int *)data; 580 if (fp->f_type == DTYPE_SOCKET) { 581 ((struct socket *)fp->f_data)->so_pgid = tmp; 582 error = 0; 583 break; 584 } 585 if (tmp <= 0) { 586 tmp = -tmp; 587 } else { 588 struct proc *p1 = pfind(tmp); 589 if (p1 == 0) { 590 error = ESRCH; 591 break; 592 } 593 tmp = p1->p_pgrp->pg_id; 594 } 595 error = (*fp->f_ops->fo_ioctl) 596 (fp, TIOCSPGRP, (caddr_t)&tmp, p); 597 break; 598 599 case FIOGETOWN: 600 if (fp->f_type == DTYPE_SOCKET) { 601 error = 0; 602 *(int *)data = ((struct socket *)fp->f_data)->so_pgid; 603 break; 604 } 605 error = (*fp->f_ops->fo_ioctl)(fp, TIOCGPGRP, data, p); 606 if (error == 0) 607 *(int *)data = -*(int *)data; 608 break; 609 610 default: 611 error = (*fp->f_ops->fo_ioctl)(fp, com, data, p); 612 /* 613 * Copy any data to user, size was 614 * already set and checked above. 615 */ 616 if (error == 0 && (com&IOC_OUT) && size) 617 error = copyout(data, SCARG(uap, data), size); 618 break; 619 } 620 if (memp) 621 free(memp, M_IOCTLOPS); 622 out: 623 FILE_UNUSE(fp, p); 624 return (error); 625 } 626 627 int selwait, nselcoll; 628 629 /* 630 * Select system call. 631 */ 632 int 633 sys_select(struct proc *p, void *v, register_t *retval) 634 { 635 struct sys_select_args /* { 636 syscallarg(int) nd; 637 syscallarg(fd_set *) in; 638 syscallarg(fd_set *) ou; 639 syscallarg(fd_set *) ex; 640 syscallarg(struct timeval *) tv; 641 } */ *uap = v; 642 caddr_t bits; 643 char smallbits[howmany(FD_SETSIZE, NFDBITS) * 644 sizeof(fd_mask) * 6]; 645 struct timeval atv; 646 int s, ncoll, error, timo; 647 size_t ni; 648 649 error = 0; 650 if (SCARG(uap, nd) < 0) 651 return (EINVAL); 652 if (SCARG(uap, nd) > p->p_fd->fd_nfiles) { 653 /* forgiving; slightly wrong */ 654 SCARG(uap, nd) = p->p_fd->fd_nfiles; 655 } 656 ni = howmany(SCARG(uap, nd), NFDBITS) * sizeof(fd_mask); 657 if (ni * 6 > sizeof(smallbits)) 658 bits = malloc(ni * 6, M_TEMP, M_WAITOK); 659 else 660 bits = smallbits; 661 662 #define getbits(name, x) \ 663 if (SCARG(uap, name)) { \ 664 error = copyin(SCARG(uap, name), bits + ni * x, ni); \ 665 if (error) \ 666 goto done; \ 667 } else \ 668 memset(bits + ni * x, 0, ni); 669 getbits(in, 0); 670 getbits(ou, 1); 671 getbits(ex, 2); 672 #undef getbits 673 674 if (SCARG(uap, tv)) { 675 error = copyin(SCARG(uap, tv), (caddr_t)&atv, 676 sizeof(atv)); 677 if (error) 678 goto done; 679 if (itimerfix(&atv)) { 680 error = EINVAL; 681 goto done; 682 } 683 s = splclock(); 684 timeradd(&atv, &time, &atv); 685 splx(s); 686 } else 687 timo = 0; 688 retry: 689 ncoll = nselcoll; 690 p->p_flag |= P_SELECT; 691 error = selscan(p, (fd_mask *)(bits + ni * 0), 692 (fd_mask *)(bits + ni * 3), SCARG(uap, nd), retval); 693 if (error || *retval) 694 goto done; 695 if (SCARG(uap, tv)) { 696 /* 697 * We have to recalculate the timeout on every retry. 698 */ 699 timo = hzto(&atv); 700 if (timo <= 0) 701 goto done; 702 } 703 s = splsched(); 704 if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) { 705 splx(s); 706 goto retry; 707 } 708 p->p_flag &= ~P_SELECT; 709 error = tsleep((caddr_t)&selwait, PSOCK | PCATCH, "select", timo); 710 splx(s); 711 if (error == 0) 712 goto retry; 713 done: 714 p->p_flag &= ~P_SELECT; 715 /* select is not restarted after signals... */ 716 if (error == ERESTART) 717 error = EINTR; 718 if (error == EWOULDBLOCK) 719 error = 0; 720 if (error == 0) { 721 722 #define putbits(name, x) \ 723 if (SCARG(uap, name)) { \ 724 error = copyout(bits + ni * x, SCARG(uap, name), ni); \ 725 if (error) \ 726 goto out; \ 727 } 728 putbits(in, 3); 729 putbits(ou, 4); 730 putbits(ex, 5); 731 #undef putbits 732 } 733 out: 734 if (ni * 6 > sizeof(smallbits)) 735 free(bits, M_TEMP); 736 return (error); 737 } 738 739 int 740 selscan(struct proc *p, fd_mask *ibitp, fd_mask *obitp, int nfd, 741 register_t *retval) 742 { 743 struct filedesc *fdp; 744 int msk, i, j, fd, n; 745 fd_mask ibits, obits; 746 struct file *fp; 747 static int flag[3] = { POLLRDNORM | POLLHUP | POLLERR, 748 POLLWRNORM | POLLHUP | POLLERR, 749 POLLRDBAND }; 750 751 fdp = p->p_fd; 752 n = 0; 753 for (msk = 0; msk < 3; msk++) { 754 for (i = 0; i < nfd; i += NFDBITS) { 755 ibits = *ibitp++; 756 obits = 0; 757 while ((j = ffs(ibits)) && (fd = i + --j) < nfd) { 758 ibits &= ~(1 << j); 759 if ((fp = fd_getfile(fdp, fd)) == NULL) 760 return (EBADF); 761 FILE_USE(fp); 762 if ((*fp->f_ops->fo_poll)(fp, flag[msk], p)) { 763 obits |= (1 << j); 764 n++; 765 } 766 FILE_UNUSE(fp, p); 767 } 768 *obitp++ = obits; 769 } 770 } 771 *retval = n; 772 return (0); 773 } 774 775 /* 776 * Poll system call. 777 */ 778 int 779 sys_poll(struct proc *p, void *v, register_t *retval) 780 { 781 struct sys_poll_args /* { 782 syscallarg(struct pollfd *) fds; 783 syscallarg(u_int) nfds; 784 syscallarg(int) timeout; 785 } */ *uap = v; 786 caddr_t bits; 787 char smallbits[32 * sizeof(struct pollfd)]; 788 struct timeval atv; 789 int s, ncoll, error, timo; 790 size_t ni; 791 792 error = 0; 793 if (SCARG(uap, nfds) > p->p_fd->fd_nfiles) { 794 /* forgiving; slightly wrong */ 795 SCARG(uap, nfds) = p->p_fd->fd_nfiles; 796 } 797 ni = SCARG(uap, nfds) * sizeof(struct pollfd); 798 if (ni > sizeof(smallbits)) 799 bits = malloc(ni, M_TEMP, M_WAITOK); 800 else 801 bits = smallbits; 802 803 error = copyin(SCARG(uap, fds), bits, ni); 804 if (error) 805 goto done; 806 807 if (SCARG(uap, timeout) != INFTIM) { 808 atv.tv_sec = SCARG(uap, timeout) / 1000; 809 atv.tv_usec = (SCARG(uap, timeout) % 1000) * 1000; 810 if (itimerfix(&atv)) { 811 error = EINVAL; 812 goto done; 813 } 814 s = splclock(); 815 timeradd(&atv, &time, &atv); 816 splx(s); 817 } else 818 timo = 0; 819 retry: 820 ncoll = nselcoll; 821 p->p_flag |= P_SELECT; 822 error = pollscan(p, (struct pollfd *)bits, SCARG(uap, nfds), retval); 823 if (error || *retval) 824 goto done; 825 if (SCARG(uap, timeout) != INFTIM) { 826 /* 827 * We have to recalculate the timeout on every retry. 828 */ 829 timo = hzto(&atv); 830 if (timo <= 0) 831 goto done; 832 } 833 s = splsched(); 834 if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) { 835 splx(s); 836 goto retry; 837 } 838 p->p_flag &= ~P_SELECT; 839 error = tsleep((caddr_t)&selwait, PSOCK | PCATCH, "select", timo); 840 splx(s); 841 if (error == 0) 842 goto retry; 843 done: 844 p->p_flag &= ~P_SELECT; 845 /* poll is not restarted after signals... */ 846 if (error == ERESTART) 847 error = EINTR; 848 if (error == EWOULDBLOCK) 849 error = 0; 850 if (error == 0) { 851 error = copyout(bits, SCARG(uap, fds), ni); 852 if (error) 853 goto out; 854 } 855 out: 856 if (ni > sizeof(smallbits)) 857 free(bits, M_TEMP); 858 return (error); 859 } 860 861 int 862 pollscan(struct proc *p, struct pollfd *fds, int nfd, register_t *retval) 863 { 864 struct filedesc *fdp; 865 int i, n; 866 struct file *fp; 867 868 fdp = p->p_fd; 869 n = 0; 870 for (i = 0; i < nfd; i++, fds++) { 871 if ((u_int)fds->fd >= fdp->fd_nfiles) { 872 fds->revents = POLLNVAL; 873 n++; 874 } else { 875 if ((fp = fd_getfile(fdp, fds->fd)) == NULL) { 876 fds->revents = POLLNVAL; 877 n++; 878 } else { 879 FILE_USE(fp); 880 fds->revents = (*fp->f_ops->fo_poll)(fp, 881 fds->events | POLLERR | POLLHUP, p); 882 if (fds->revents != 0) 883 n++; 884 FILE_UNUSE(fp, p); 885 } 886 } 887 } 888 *retval = n; 889 return (0); 890 } 891 892 /*ARGSUSED*/ 893 int 894 seltrue(dev_t dev, int events, struct proc *p) 895 { 896 897 return (events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM)); 898 } 899 900 /* 901 * Record a select request. 902 */ 903 void 904 selrecord(struct proc *selector, struct selinfo *sip) 905 { 906 struct proc *p; 907 pid_t mypid; 908 909 mypid = selector->p_pid; 910 if (sip->si_pid == mypid) 911 return; 912 if (sip->si_pid && (p = pfind(sip->si_pid)) && 913 p->p_wchan == (caddr_t)&selwait) 914 sip->si_flags |= SI_COLL; 915 else { 916 sip->si_flags &= ~SI_COLL; 917 sip->si_pid = mypid; 918 } 919 } 920 921 /* 922 * Do a wakeup when a selectable event occurs. 923 */ 924 void 925 selwakeup(sip) 926 struct selinfo *sip; 927 { 928 struct proc *p; 929 int s; 930 931 if (sip->si_pid == 0) 932 return; 933 if (sip->si_flags & SI_COLL) { 934 nselcoll++; 935 sip->si_flags &= ~SI_COLL; 936 wakeup((caddr_t)&selwait); 937 } 938 p = pfind(sip->si_pid); 939 sip->si_pid = 0; 940 if (p != NULL) { 941 SCHED_LOCK(s); 942 if (p->p_wchan == (caddr_t)&selwait) { 943 if (p->p_stat == SSLEEP) 944 setrunnable(p); 945 else 946 unsleep(p); 947 } else if (p->p_flag & P_SELECT) 948 p->p_flag &= ~P_SELECT; 949 SCHED_UNLOCK(s); 950 } 951 } 952