1 /* $NetBSD: sys_generic.c,v 1.60 2001/11/14 18:43:58 christos Exp $ */ 2 3 /* 4 * Copyright (c) 1982, 1986, 1989, 1993 5 * The Regents of the University of California. All rights reserved. 6 * (c) UNIX System Laboratories, Inc. 7 * All or some portions of this file are derived from material licensed 8 * to the University of California by American Telephone and Telegraph 9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 10 * the permission of UNIX System Laboratories, Inc. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 3. All advertising materials mentioning features or use of this software 21 * must display the following acknowledgement: 22 * This product includes software developed by the University of 23 * California, Berkeley and its contributors. 24 * 4. Neither the name of the University nor the names of its contributors 25 * may be used to endorse or promote products derived from this software 26 * without specific prior written permission. 27 * 28 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 29 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 30 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 31 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 32 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 33 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 34 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 35 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 37 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 38 * SUCH DAMAGE. 39 * 40 * @(#)sys_generic.c 8.9 (Berkeley) 2/14/95 41 */ 42 43 #include <sys/cdefs.h> 44 __KERNEL_RCSID(0, "$NetBSD: sys_generic.c,v 1.60 2001/11/14 18:43:58 christos Exp $"); 45 46 #include "opt_ktrace.h" 47 48 #include <sys/param.h> 49 #include <sys/systm.h> 50 #include <sys/filedesc.h> 51 #include <sys/ioctl.h> 52 #include <sys/file.h> 53 #include <sys/proc.h> 54 #include <sys/socketvar.h> 55 #include <sys/signalvar.h> 56 #include <sys/uio.h> 57 #include <sys/kernel.h> 58 #include <sys/stat.h> 59 #include <sys/malloc.h> 60 #include <sys/poll.h> 61 #ifdef KTRACE 62 #include <sys/ktrace.h> 63 #endif 64 65 #include <sys/mount.h> 66 #include <sys/syscallargs.h> 67 68 int selscan __P((struct proc *, fd_mask *, fd_mask *, int, register_t *)); 69 int pollscan __P((struct proc *, struct pollfd *, int, register_t *)); 70 71 /* 72 * Read system call. 73 */ 74 /* ARGSUSED */ 75 int 76 sys_read(struct proc *p, void *v, register_t *retval) 77 { 78 struct sys_read_args /* { 79 syscallarg(int) fd; 80 syscallarg(void *) buf; 81 syscallarg(size_t) nbyte; 82 } */ *uap = v; 83 int fd; 84 struct file *fp; 85 struct filedesc *fdp; 86 87 fd = SCARG(uap, fd); 88 fdp = p->p_fd; 89 90 if ((fp = fd_getfile(fdp, fd)) == NULL) 91 return (EBADF); 92 93 if ((fp->f_flag & FREAD) == 0) 94 return (EBADF); 95 96 FILE_USE(fp); 97 98 /* dofileread() will unuse the descriptor for us */ 99 return (dofileread(p, fd, fp, SCARG(uap, buf), SCARG(uap, nbyte), 100 &fp->f_offset, FOF_UPDATE_OFFSET, retval)); 101 } 102 103 int 104 dofileread(struct proc *p, int fd, struct file *fp, void *buf, size_t nbyte, 105 off_t *offset, int flags, register_t *retval) 106 { 107 struct uio auio; 108 struct iovec aiov; 109 long cnt, error; 110 #ifdef KTRACE 111 struct iovec ktriov; 112 #endif 113 error = 0; 114 115 aiov.iov_base = (caddr_t)buf; 116 aiov.iov_len = nbyte; 117 auio.uio_iov = &aiov; 118 auio.uio_iovcnt = 1; 119 auio.uio_resid = nbyte; 120 auio.uio_rw = UIO_READ; 121 auio.uio_segflg = UIO_USERSPACE; 122 auio.uio_procp = p; 123 124 /* 125 * Reads return ssize_t because -1 is returned on error. Therefore 126 * we must restrict the length to SSIZE_MAX to avoid garbage return 127 * values. 128 */ 129 if (auio.uio_resid > SSIZE_MAX) { 130 error = EINVAL; 131 goto out; 132 } 133 134 #ifdef KTRACE 135 /* 136 * if tracing, save a copy of iovec 137 */ 138 if (KTRPOINT(p, KTR_GENIO)) 139 ktriov = aiov; 140 #endif 141 cnt = auio.uio_resid; 142 error = (*fp->f_ops->fo_read)(fp, offset, &auio, fp->f_cred, flags); 143 if (error) 144 if (auio.uio_resid != cnt && (error == ERESTART || 145 error == EINTR || error == EWOULDBLOCK)) 146 error = 0; 147 cnt -= auio.uio_resid; 148 #ifdef KTRACE 149 if (KTRPOINT(p, KTR_GENIO) && error == 0) 150 ktrgenio(p, fd, UIO_READ, &ktriov, cnt, error); 151 #endif 152 *retval = cnt; 153 out: 154 FILE_UNUSE(fp, p); 155 return (error); 156 } 157 158 /* 159 * Scatter read system call. 160 */ 161 int 162 sys_readv(struct proc *p, void *v, register_t *retval) 163 { 164 struct sys_readv_args /* { 165 syscallarg(int) fd; 166 syscallarg(const struct iovec *) iovp; 167 syscallarg(int) iovcnt; 168 } */ *uap = v; 169 int fd; 170 struct file *fp; 171 struct filedesc *fdp; 172 173 fd = SCARG(uap, fd); 174 fdp = p->p_fd; 175 176 if ((fp = fd_getfile(fdp, fd)) == NULL) 177 return (EBADF); 178 179 if ((fp->f_flag & FREAD) == 0) 180 return (EBADF); 181 182 FILE_USE(fp); 183 184 /* dofilereadv() will unuse the descriptor for us */ 185 return (dofilereadv(p, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt), 186 &fp->f_offset, FOF_UPDATE_OFFSET, retval)); 187 } 188 189 int 190 dofilereadv(struct proc *p, int fd, struct file *fp, const struct iovec *iovp, 191 int iovcnt, off_t *offset, int flags, register_t *retval) 192 { 193 struct uio auio; 194 struct iovec *iov, *needfree, aiov[UIO_SMALLIOV]; 195 long i, cnt, error; 196 u_int iovlen; 197 #ifdef KTRACE 198 struct iovec *ktriov; 199 #endif 200 201 error = 0; 202 #ifdef KTRACE 203 ktriov = NULL; 204 #endif 205 /* note: can't use iovlen until iovcnt is validated */ 206 iovlen = iovcnt * sizeof(struct iovec); 207 if ((u_int)iovcnt > UIO_SMALLIOV) { 208 if ((u_int)iovcnt > IOV_MAX) { 209 error = EINVAL; 210 goto out; 211 } 212 iov = malloc(iovlen, M_IOV, M_WAITOK); 213 needfree = iov; 214 } else if ((u_int)iovcnt > 0) { 215 iov = aiov; 216 needfree = NULL; 217 } else { 218 error = EINVAL; 219 goto out; 220 } 221 222 auio.uio_iov = iov; 223 auio.uio_iovcnt = iovcnt; 224 auio.uio_rw = UIO_READ; 225 auio.uio_segflg = UIO_USERSPACE; 226 auio.uio_procp = p; 227 error = copyin(iovp, iov, iovlen); 228 if (error) 229 goto done; 230 auio.uio_resid = 0; 231 for (i = 0; i < iovcnt; i++) { 232 auio.uio_resid += iov->iov_len; 233 /* 234 * Reads return ssize_t because -1 is returned on error. 235 * Therefore we must restrict the length to SSIZE_MAX to 236 * avoid garbage return values. 237 */ 238 if (iov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) { 239 error = EINVAL; 240 goto done; 241 } 242 iov++; 243 } 244 #ifdef KTRACE 245 /* 246 * if tracing, save a copy of iovec 247 */ 248 if (KTRPOINT(p, KTR_GENIO)) { 249 ktriov = malloc(iovlen, M_TEMP, M_WAITOK); 250 memcpy((caddr_t)ktriov, (caddr_t)auio.uio_iov, iovlen); 251 } 252 #endif 253 cnt = auio.uio_resid; 254 error = (*fp->f_ops->fo_read)(fp, offset, &auio, fp->f_cred, flags); 255 if (error) 256 if (auio.uio_resid != cnt && (error == ERESTART || 257 error == EINTR || error == EWOULDBLOCK)) 258 error = 0; 259 cnt -= auio.uio_resid; 260 #ifdef KTRACE 261 if (ktriov != NULL) { 262 if (error == 0) 263 ktrgenio(p, fd, UIO_READ, ktriov, cnt, error); 264 free(ktriov, M_TEMP); 265 } 266 #endif 267 *retval = cnt; 268 done: 269 if (needfree) 270 free(needfree, M_IOV); 271 out: 272 FILE_UNUSE(fp, p); 273 return (error); 274 } 275 276 /* 277 * Write system call 278 */ 279 int 280 sys_write(struct proc *p, void *v, register_t *retval) 281 { 282 struct sys_write_args /* { 283 syscallarg(int) fd; 284 syscallarg(const void *) buf; 285 syscallarg(size_t) nbyte; 286 } */ *uap = v; 287 int fd; 288 struct file *fp; 289 struct filedesc *fdp; 290 291 fd = SCARG(uap, fd); 292 fdp = p->p_fd; 293 294 if ((fp = fd_getfile(fdp, fd)) == NULL) 295 return (EBADF); 296 297 if ((fp->f_flag & FWRITE) == 0) 298 return (EBADF); 299 300 FILE_USE(fp); 301 302 /* dofilewrite() will unuse the descriptor for us */ 303 return (dofilewrite(p, fd, fp, SCARG(uap, buf), SCARG(uap, nbyte), 304 &fp->f_offset, FOF_UPDATE_OFFSET, retval)); 305 } 306 307 int 308 dofilewrite(struct proc *p, int fd, struct file *fp, const void *buf, 309 size_t nbyte, off_t *offset, int flags, register_t *retval) 310 { 311 struct uio auio; 312 struct iovec aiov; 313 long cnt, error; 314 #ifdef KTRACE 315 struct iovec ktriov; 316 #endif 317 318 error = 0; 319 aiov.iov_base = (caddr_t)buf; /* XXX kills const */ 320 aiov.iov_len = nbyte; 321 auio.uio_iov = &aiov; 322 auio.uio_iovcnt = 1; 323 auio.uio_resid = nbyte; 324 auio.uio_rw = UIO_WRITE; 325 auio.uio_segflg = UIO_USERSPACE; 326 auio.uio_procp = p; 327 328 /* 329 * Writes return ssize_t because -1 is returned on error. Therefore 330 * we must restrict the length to SSIZE_MAX to avoid garbage return 331 * values. 332 */ 333 if (auio.uio_resid > SSIZE_MAX) { 334 error = EINVAL; 335 goto out; 336 } 337 338 #ifdef KTRACE 339 /* 340 * if tracing, save a copy of iovec 341 */ 342 if (KTRPOINT(p, KTR_GENIO)) 343 ktriov = aiov; 344 #endif 345 cnt = auio.uio_resid; 346 error = (*fp->f_ops->fo_write)(fp, offset, &auio, fp->f_cred, flags); 347 if (error) { 348 if (auio.uio_resid != cnt && (error == ERESTART || 349 error == EINTR || error == EWOULDBLOCK)) 350 error = 0; 351 if (error == EPIPE) 352 psignal(p, SIGPIPE); 353 } 354 cnt -= auio.uio_resid; 355 #ifdef KTRACE 356 if (KTRPOINT(p, KTR_GENIO) && error == 0) 357 ktrgenio(p, fd, UIO_WRITE, &ktriov, cnt, error); 358 #endif 359 *retval = cnt; 360 out: 361 FILE_UNUSE(fp, p); 362 return (error); 363 } 364 365 /* 366 * Gather write system call 367 */ 368 int 369 sys_writev(struct proc *p, void *v, register_t *retval) 370 { 371 struct sys_writev_args /* { 372 syscallarg(int) fd; 373 syscallarg(const struct iovec *) iovp; 374 syscallarg(int) iovcnt; 375 } */ *uap = v; 376 int fd; 377 struct file *fp; 378 struct filedesc *fdp; 379 380 fd = SCARG(uap, fd); 381 fdp = p->p_fd; 382 383 if ((fp = fd_getfile(fdp, fd)) == NULL) 384 return (EBADF); 385 386 if ((fp->f_flag & FWRITE) == 0) 387 return (EBADF); 388 389 FILE_USE(fp); 390 391 /* dofilewritev() will unuse the descriptor for us */ 392 return (dofilewritev(p, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt), 393 &fp->f_offset, FOF_UPDATE_OFFSET, retval)); 394 } 395 396 int 397 dofilewritev(struct proc *p, int fd, struct file *fp, const struct iovec *iovp, 398 int iovcnt, off_t *offset, int flags, register_t *retval) 399 { 400 struct uio auio; 401 struct iovec *iov, *needfree, aiov[UIO_SMALLIOV]; 402 long i, cnt, error; 403 u_int iovlen; 404 #ifdef KTRACE 405 struct iovec *ktriov; 406 #endif 407 408 error = 0; 409 #ifdef KTRACE 410 ktriov = NULL; 411 #endif 412 /* note: can't use iovlen until iovcnt is validated */ 413 iovlen = iovcnt * sizeof(struct iovec); 414 if ((u_int)iovcnt > UIO_SMALLIOV) { 415 if ((u_int)iovcnt > IOV_MAX) 416 return (EINVAL); 417 iov = malloc(iovlen, M_IOV, M_WAITOK); 418 needfree = iov; 419 } else if ((u_int)iovcnt > 0) { 420 iov = aiov; 421 needfree = NULL; 422 } else { 423 error = EINVAL; 424 goto out; 425 } 426 427 auio.uio_iov = iov; 428 auio.uio_iovcnt = iovcnt; 429 auio.uio_rw = UIO_WRITE; 430 auio.uio_segflg = UIO_USERSPACE; 431 auio.uio_procp = p; 432 error = copyin(iovp, iov, iovlen); 433 if (error) 434 goto done; 435 auio.uio_resid = 0; 436 for (i = 0; i < iovcnt; i++) { 437 auio.uio_resid += iov->iov_len; 438 /* 439 * Writes return ssize_t because -1 is returned on error. 440 * Therefore we must restrict the length to SSIZE_MAX to 441 * avoid garbage return values. 442 */ 443 if (iov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) { 444 error = EINVAL; 445 goto done; 446 } 447 iov++; 448 } 449 #ifdef KTRACE 450 /* 451 * if tracing, save a copy of iovec 452 */ 453 if (KTRPOINT(p, KTR_GENIO)) { 454 ktriov = malloc(iovlen, M_TEMP, M_WAITOK); 455 memcpy((caddr_t)ktriov, (caddr_t)auio.uio_iov, iovlen); 456 } 457 #endif 458 cnt = auio.uio_resid; 459 error = (*fp->f_ops->fo_write)(fp, offset, &auio, fp->f_cred, flags); 460 if (error) { 461 if (auio.uio_resid != cnt && (error == ERESTART || 462 error == EINTR || error == EWOULDBLOCK)) 463 error = 0; 464 if (error == EPIPE) 465 psignal(p, SIGPIPE); 466 } 467 cnt -= auio.uio_resid; 468 #ifdef KTRACE 469 if (KTRPOINT(p, KTR_GENIO)) 470 if (error == 0) { 471 ktrgenio(p, fd, UIO_WRITE, ktriov, cnt, error); 472 free(ktriov, M_TEMP); 473 } 474 #endif 475 *retval = cnt; 476 done: 477 if (needfree) 478 free(needfree, M_IOV); 479 out: 480 FILE_UNUSE(fp, p); 481 return (error); 482 } 483 484 /* 485 * Ioctl system call 486 */ 487 /* ARGSUSED */ 488 int 489 sys_ioctl(struct proc *p, void *v, register_t *retval) 490 { 491 struct sys_ioctl_args /* { 492 syscallarg(int) fd; 493 syscallarg(u_long) com; 494 syscallarg(caddr_t) data; 495 } */ *uap = v; 496 struct file *fp; 497 struct filedesc *fdp; 498 u_long com; 499 int error; 500 u_int size; 501 caddr_t data, memp; 502 int tmp; 503 #define STK_PARAMS 128 504 u_long stkbuf[STK_PARAMS/sizeof(u_long)]; 505 506 error = 0; 507 fdp = p->p_fd; 508 509 if ((fp = fd_getfile(fdp, SCARG(uap, fd))) == NULL) 510 return (EBADF); 511 512 FILE_USE(fp); 513 514 if ((fp->f_flag & (FREAD | FWRITE)) == 0) { 515 error = EBADF; 516 goto out; 517 } 518 519 switch (com = SCARG(uap, com)) { 520 case FIONCLEX: 521 fdp->fd_ofileflags[SCARG(uap, fd)] &= ~UF_EXCLOSE; 522 goto out; 523 524 case FIOCLEX: 525 fdp->fd_ofileflags[SCARG(uap, fd)] |= UF_EXCLOSE; 526 goto out; 527 } 528 529 /* 530 * Interpret high order word to find amount of data to be 531 * copied to/from the user's address space. 532 */ 533 size = IOCPARM_LEN(com); 534 if (size > IOCPARM_MAX) { 535 error = ENOTTY; 536 goto out; 537 } 538 memp = NULL; 539 if (size > sizeof(stkbuf)) { 540 memp = (caddr_t)malloc((u_long)size, M_IOCTLOPS, M_WAITOK); 541 data = memp; 542 } else 543 data = (caddr_t)stkbuf; 544 if (com&IOC_IN) { 545 if (size) { 546 error = copyin(SCARG(uap, data), data, size); 547 if (error) { 548 if (memp) 549 free(memp, M_IOCTLOPS); 550 goto out; 551 } 552 } else 553 *(caddr_t *)data = SCARG(uap, data); 554 } else if ((com&IOC_OUT) && size) 555 /* 556 * Zero the buffer so the user always 557 * gets back something deterministic. 558 */ 559 memset(data, 0, size); 560 else if (com&IOC_VOID) 561 *(caddr_t *)data = SCARG(uap, data); 562 563 switch (com) { 564 565 case FIONBIO: 566 if ((tmp = *(int *)data) != 0) 567 fp->f_flag |= FNONBLOCK; 568 else 569 fp->f_flag &= ~FNONBLOCK; 570 error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&tmp, p); 571 break; 572 573 case FIOASYNC: 574 if ((tmp = *(int *)data) != 0) 575 fp->f_flag |= FASYNC; 576 else 577 fp->f_flag &= ~FASYNC; 578 error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, (caddr_t)&tmp, p); 579 break; 580 581 case FIOSETOWN: 582 tmp = *(int *)data; 583 if (fp->f_type == DTYPE_SOCKET) { 584 ((struct socket *)fp->f_data)->so_pgid = tmp; 585 error = 0; 586 break; 587 } 588 if (tmp <= 0) { 589 tmp = -tmp; 590 } else { 591 struct proc *p1 = pfind(tmp); 592 if (p1 == 0) { 593 error = ESRCH; 594 break; 595 } 596 tmp = p1->p_pgrp->pg_id; 597 } 598 error = (*fp->f_ops->fo_ioctl) 599 (fp, TIOCSPGRP, (caddr_t)&tmp, p); 600 break; 601 602 case FIOGETOWN: 603 if (fp->f_type == DTYPE_SOCKET) { 604 error = 0; 605 *(int *)data = ((struct socket *)fp->f_data)->so_pgid; 606 break; 607 } 608 error = (*fp->f_ops->fo_ioctl)(fp, TIOCGPGRP, data, p); 609 if (error == 0) 610 *(int *)data = -*(int *)data; 611 break; 612 613 default: 614 error = (*fp->f_ops->fo_ioctl)(fp, com, data, p); 615 /* 616 * Copy any data to user, size was 617 * already set and checked above. 618 */ 619 if (error == 0 && (com&IOC_OUT) && size) 620 error = copyout(data, SCARG(uap, data), size); 621 break; 622 } 623 if (memp) 624 free(memp, M_IOCTLOPS); 625 out: 626 FILE_UNUSE(fp, p); 627 return (error); 628 } 629 630 int selwait, nselcoll; 631 632 /* 633 * Select system call. 634 */ 635 int 636 sys_select(struct proc *p, void *v, register_t *retval) 637 { 638 struct sys_select_args /* { 639 syscallarg(int) nd; 640 syscallarg(fd_set *) in; 641 syscallarg(fd_set *) ou; 642 syscallarg(fd_set *) ex; 643 syscallarg(struct timeval *) tv; 644 } */ *uap = v; 645 caddr_t bits; 646 char smallbits[howmany(FD_SETSIZE, NFDBITS) * 647 sizeof(fd_mask) * 6]; 648 struct timeval atv; 649 int s, ncoll, error, timo; 650 size_t ni; 651 652 error = 0; 653 if (SCARG(uap, nd) < 0) 654 return (EINVAL); 655 if (SCARG(uap, nd) > p->p_fd->fd_nfiles) { 656 /* forgiving; slightly wrong */ 657 SCARG(uap, nd) = p->p_fd->fd_nfiles; 658 } 659 ni = howmany(SCARG(uap, nd), NFDBITS) * sizeof(fd_mask); 660 if (ni * 6 > sizeof(smallbits)) 661 bits = malloc(ni * 6, M_TEMP, M_WAITOK); 662 else 663 bits = smallbits; 664 665 #define getbits(name, x) \ 666 if (SCARG(uap, name)) { \ 667 error = copyin(SCARG(uap, name), bits + ni * x, ni); \ 668 if (error) \ 669 goto done; \ 670 } else \ 671 memset(bits + ni * x, 0, ni); 672 getbits(in, 0); 673 getbits(ou, 1); 674 getbits(ex, 2); 675 #undef getbits 676 677 if (SCARG(uap, tv)) { 678 error = copyin(SCARG(uap, tv), (caddr_t)&atv, 679 sizeof(atv)); 680 if (error) 681 goto done; 682 if (itimerfix(&atv)) { 683 error = EINVAL; 684 goto done; 685 } 686 s = splclock(); 687 timeradd(&atv, &time, &atv); 688 splx(s); 689 } else 690 timo = 0; 691 retry: 692 ncoll = nselcoll; 693 p->p_flag |= P_SELECT; 694 error = selscan(p, (fd_mask *)(bits + ni * 0), 695 (fd_mask *)(bits + ni * 3), SCARG(uap, nd), retval); 696 if (error || *retval) 697 goto done; 698 if (SCARG(uap, tv)) { 699 /* 700 * We have to recalculate the timeout on every retry. 701 */ 702 timo = hzto(&atv); 703 if (timo <= 0) 704 goto done; 705 } 706 s = splsched(); 707 if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) { 708 splx(s); 709 goto retry; 710 } 711 p->p_flag &= ~P_SELECT; 712 error = tsleep((caddr_t)&selwait, PSOCK | PCATCH, "select", timo); 713 splx(s); 714 if (error == 0) 715 goto retry; 716 done: 717 p->p_flag &= ~P_SELECT; 718 /* select is not restarted after signals... */ 719 if (error == ERESTART) 720 error = EINTR; 721 if (error == EWOULDBLOCK) 722 error = 0; 723 if (error == 0) { 724 725 #define putbits(name, x) \ 726 if (SCARG(uap, name)) { \ 727 error = copyout(bits + ni * x, SCARG(uap, name), ni); \ 728 if (error) \ 729 goto out; \ 730 } 731 putbits(in, 3); 732 putbits(ou, 4); 733 putbits(ex, 5); 734 #undef putbits 735 } 736 out: 737 if (ni * 6 > sizeof(smallbits)) 738 free(bits, M_TEMP); 739 return (error); 740 } 741 742 int 743 selscan(struct proc *p, fd_mask *ibitp, fd_mask *obitp, int nfd, 744 register_t *retval) 745 { 746 struct filedesc *fdp; 747 int msk, i, j, fd, n; 748 fd_mask ibits, obits; 749 struct file *fp; 750 static int flag[3] = { POLLRDNORM | POLLHUP | POLLERR, 751 POLLWRNORM | POLLHUP | POLLERR, 752 POLLRDBAND }; 753 754 fdp = p->p_fd; 755 n = 0; 756 for (msk = 0; msk < 3; msk++) { 757 for (i = 0; i < nfd; i += NFDBITS) { 758 ibits = *ibitp++; 759 obits = 0; 760 while ((j = ffs(ibits)) && (fd = i + --j) < nfd) { 761 ibits &= ~(1 << j); 762 if ((fp = fd_getfile(fdp, fd)) == NULL) 763 return (EBADF); 764 FILE_USE(fp); 765 if ((*fp->f_ops->fo_poll)(fp, flag[msk], p)) { 766 obits |= (1 << j); 767 n++; 768 } 769 FILE_UNUSE(fp, p); 770 } 771 *obitp++ = obits; 772 } 773 } 774 *retval = n; 775 return (0); 776 } 777 778 /* 779 * Poll system call. 780 */ 781 int 782 sys_poll(struct proc *p, void *v, register_t *retval) 783 { 784 struct sys_poll_args /* { 785 syscallarg(struct pollfd *) fds; 786 syscallarg(u_int) nfds; 787 syscallarg(int) timeout; 788 } */ *uap = v; 789 caddr_t bits; 790 char smallbits[32 * sizeof(struct pollfd)]; 791 struct timeval atv; 792 int s, ncoll, error, timo; 793 size_t ni; 794 795 error = 0; 796 if (SCARG(uap, nfds) > p->p_fd->fd_nfiles) { 797 /* forgiving; slightly wrong */ 798 SCARG(uap, nfds) = p->p_fd->fd_nfiles; 799 } 800 ni = SCARG(uap, nfds) * sizeof(struct pollfd); 801 if (ni > sizeof(smallbits)) 802 bits = malloc(ni, M_TEMP, M_WAITOK); 803 else 804 bits = smallbits; 805 806 error = copyin(SCARG(uap, fds), bits, ni); 807 if (error) 808 goto done; 809 810 if (SCARG(uap, timeout) != INFTIM) { 811 atv.tv_sec = SCARG(uap, timeout) / 1000; 812 atv.tv_usec = (SCARG(uap, timeout) % 1000) * 1000; 813 if (itimerfix(&atv)) { 814 error = EINVAL; 815 goto done; 816 } 817 s = splclock(); 818 timeradd(&atv, &time, &atv); 819 splx(s); 820 } else 821 timo = 0; 822 retry: 823 ncoll = nselcoll; 824 p->p_flag |= P_SELECT; 825 error = pollscan(p, (struct pollfd *)bits, SCARG(uap, nfds), retval); 826 if (error || *retval) 827 goto done; 828 if (SCARG(uap, timeout) != INFTIM) { 829 /* 830 * We have to recalculate the timeout on every retry. 831 */ 832 timo = hzto(&atv); 833 if (timo <= 0) 834 goto done; 835 } 836 s = splsched(); 837 if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) { 838 splx(s); 839 goto retry; 840 } 841 p->p_flag &= ~P_SELECT; 842 error = tsleep((caddr_t)&selwait, PSOCK | PCATCH, "select", timo); 843 splx(s); 844 if (error == 0) 845 goto retry; 846 done: 847 p->p_flag &= ~P_SELECT; 848 /* poll is not restarted after signals... */ 849 if (error == ERESTART) 850 error = EINTR; 851 if (error == EWOULDBLOCK) 852 error = 0; 853 if (error == 0) { 854 error = copyout(bits, SCARG(uap, fds), ni); 855 if (error) 856 goto out; 857 } 858 out: 859 if (ni > sizeof(smallbits)) 860 free(bits, M_TEMP); 861 return (error); 862 } 863 864 int 865 pollscan(struct proc *p, struct pollfd *fds, int nfd, register_t *retval) 866 { 867 struct filedesc *fdp; 868 int i, n; 869 struct file *fp; 870 871 fdp = p->p_fd; 872 n = 0; 873 for (i = 0; i < nfd; i++, fds++) { 874 if (fds->fd >= fdp->fd_nfiles) { 875 fds->revents = POLLNVAL; 876 n++; 877 } else if (fds->fd < 0) { 878 fds->revents = 0; 879 } else { 880 if ((fp = fd_getfile(fdp, fds->fd)) == NULL) { 881 fds->revents = POLLNVAL; 882 n++; 883 } else { 884 FILE_USE(fp); 885 fds->revents = (*fp->f_ops->fo_poll)(fp, 886 fds->events | POLLERR | POLLHUP, p); 887 if (fds->revents != 0) 888 n++; 889 FILE_UNUSE(fp, p); 890 } 891 } 892 } 893 *retval = n; 894 return (0); 895 } 896 897 /*ARGSUSED*/ 898 int 899 seltrue(dev_t dev, int events, struct proc *p) 900 { 901 902 return (events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM)); 903 } 904 905 /* 906 * Record a select request. 907 */ 908 void 909 selrecord(struct proc *selector, struct selinfo *sip) 910 { 911 struct proc *p; 912 pid_t mypid; 913 914 mypid = selector->p_pid; 915 if (sip->si_pid == mypid) 916 return; 917 if (sip->si_pid && (p = pfind(sip->si_pid)) && 918 p->p_wchan == (caddr_t)&selwait) 919 sip->si_flags |= SI_COLL; 920 else { 921 sip->si_flags &= ~SI_COLL; 922 sip->si_pid = mypid; 923 } 924 } 925 926 /* 927 * Do a wakeup when a selectable event occurs. 928 */ 929 void 930 selwakeup(sip) 931 struct selinfo *sip; 932 { 933 struct proc *p; 934 int s; 935 936 if (sip->si_pid == 0) 937 return; 938 if (sip->si_flags & SI_COLL) { 939 nselcoll++; 940 sip->si_flags &= ~SI_COLL; 941 wakeup((caddr_t)&selwait); 942 } 943 p = pfind(sip->si_pid); 944 sip->si_pid = 0; 945 if (p != NULL) { 946 SCHED_LOCK(s); 947 if (p->p_wchan == (caddr_t)&selwait) { 948 if (p->p_stat == SSLEEP) 949 setrunnable(p); 950 else 951 unsleep(p); 952 } else if (p->p_flag & P_SELECT) 953 p->p_flag &= ~P_SELECT; 954 SCHED_UNLOCK(s); 955 } 956 } 957