1 /* $NetBSD: sys_generic.c,v 1.81 2004/03/23 13:22:04 junyoung Exp $ */ 2 3 /* 4 * Copyright (c) 1982, 1986, 1989, 1993 5 * The Regents of the University of California. All rights reserved. 6 * (c) UNIX System Laboratories, Inc. 7 * All or some portions of this file are derived from material licensed 8 * to the University of California by American Telephone and Telegraph 9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 10 * the permission of UNIX System Laboratories, Inc. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 3. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * @(#)sys_generic.c 8.9 (Berkeley) 2/14/95 37 */ 38 39 #include <sys/cdefs.h> 40 __KERNEL_RCSID(0, "$NetBSD: sys_generic.c,v 1.81 2004/03/23 13:22:04 junyoung Exp $"); 41 42 #include "opt_ktrace.h" 43 44 #include <sys/param.h> 45 #include <sys/systm.h> 46 #include <sys/filedesc.h> 47 #include <sys/ioctl.h> 48 #include <sys/file.h> 49 #include <sys/proc.h> 50 #include <sys/socketvar.h> 51 #include <sys/signalvar.h> 52 #include <sys/uio.h> 53 #include <sys/kernel.h> 54 #include <sys/stat.h> 55 #include <sys/malloc.h> 56 #include <sys/poll.h> 57 #ifdef KTRACE 58 #include <sys/ktrace.h> 59 #endif 60 61 #include <sys/mount.h> 62 #include <sys/sa.h> 63 #include <sys/syscallargs.h> 64 65 int selscan(struct proc *, fd_mask *, fd_mask *, int, register_t *); 66 int pollscan(struct proc *, struct pollfd *, int, register_t *); 67 68 /* 69 * Read system call. 70 */ 71 /* ARGSUSED */ 72 int 73 sys_read(struct lwp *l, void *v, register_t *retval) 74 { 75 struct sys_read_args /* { 76 syscallarg(int) fd; 77 syscallarg(void *) buf; 78 syscallarg(size_t) nbyte; 79 } */ *uap = v; 80 int fd; 81 struct file *fp; 82 struct proc *p; 83 struct filedesc *fdp; 84 85 fd = SCARG(uap, fd); 86 p = l->l_proc; 87 fdp = p->p_fd; 88 89 if ((fp = fd_getfile(fdp, fd)) == NULL) 90 return (EBADF); 91 92 if ((fp->f_flag & FREAD) == 0) { 93 simple_unlock(&fp->f_slock); 94 return (EBADF); 95 } 96 97 FILE_USE(fp); 98 99 /* dofileread() will unuse the descriptor for us */ 100 return (dofileread(p, fd, fp, SCARG(uap, buf), SCARG(uap, nbyte), 101 &fp->f_offset, FOF_UPDATE_OFFSET, retval)); 102 } 103 104 int 105 dofileread(struct proc *p, int fd, struct file *fp, void *buf, size_t nbyte, 106 off_t *offset, int flags, register_t *retval) 107 { 108 struct uio auio; 109 struct iovec aiov; 110 size_t cnt; 111 int error; 112 #ifdef KTRACE 113 struct iovec ktriov = {0}; 114 #endif 115 error = 0; 116 117 aiov.iov_base = (caddr_t)buf; 118 aiov.iov_len = nbyte; 119 auio.uio_iov = &aiov; 120 auio.uio_iovcnt = 1; 121 auio.uio_resid = nbyte; 122 auio.uio_rw = UIO_READ; 123 auio.uio_segflg = UIO_USERSPACE; 124 auio.uio_procp = p; 125 126 /* 127 * Reads return ssize_t because -1 is returned on error. Therefore 128 * we must restrict the length to SSIZE_MAX to avoid garbage return 129 * values. 130 */ 131 if (auio.uio_resid > SSIZE_MAX) { 132 error = EINVAL; 133 goto out; 134 } 135 136 #ifdef KTRACE 137 /* 138 * if tracing, save a copy of iovec 139 */ 140 if (KTRPOINT(p, KTR_GENIO)) 141 ktriov = aiov; 142 #endif 143 cnt = auio.uio_resid; 144 error = (*fp->f_ops->fo_read)(fp, offset, &auio, fp->f_cred, flags); 145 if (error) 146 if (auio.uio_resid != cnt && (error == ERESTART || 147 error == EINTR || error == EWOULDBLOCK)) 148 error = 0; 149 cnt -= auio.uio_resid; 150 #ifdef KTRACE 151 if (KTRPOINT(p, KTR_GENIO) && error == 0) 152 ktrgenio(p, fd, UIO_READ, &ktriov, cnt, error); 153 #endif 154 *retval = cnt; 155 out: 156 FILE_UNUSE(fp, p); 157 return (error); 158 } 159 160 /* 161 * Scatter read system call. 162 */ 163 int 164 sys_readv(struct lwp *l, void *v, register_t *retval) 165 { 166 struct sys_readv_args /* { 167 syscallarg(int) fd; 168 syscallarg(const struct iovec *) iovp; 169 syscallarg(int) iovcnt; 170 } */ *uap = v; 171 int fd; 172 struct file *fp; 173 struct proc *p; 174 struct filedesc *fdp; 175 176 fd = SCARG(uap, fd); 177 p = l->l_proc; 178 fdp = p->p_fd; 179 180 if ((fp = fd_getfile(fdp, fd)) == NULL) 181 return (EBADF); 182 183 if ((fp->f_flag & FREAD) == 0) { 184 simple_unlock(&fp->f_slock); 185 return (EBADF); 186 } 187 188 FILE_USE(fp); 189 190 /* dofilereadv() will unuse the descriptor for us */ 191 return (dofilereadv(p, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt), 192 &fp->f_offset, FOF_UPDATE_OFFSET, retval)); 193 } 194 195 int 196 dofilereadv(struct proc *p, int fd, struct file *fp, const struct iovec *iovp, 197 int iovcnt, off_t *offset, int flags, register_t *retval) 198 { 199 struct uio auio; 200 struct iovec *iov, *needfree, aiov[UIO_SMALLIOV]; 201 int i, error; 202 size_t cnt; 203 u_int iovlen; 204 #ifdef KTRACE 205 struct iovec *ktriov; 206 #endif 207 208 error = 0; 209 #ifdef KTRACE 210 ktriov = NULL; 211 #endif 212 /* note: can't use iovlen until iovcnt is validated */ 213 iovlen = iovcnt * sizeof(struct iovec); 214 if ((u_int)iovcnt > UIO_SMALLIOV) { 215 if ((u_int)iovcnt > IOV_MAX) { 216 error = EINVAL; 217 goto out; 218 } 219 iov = malloc(iovlen, M_IOV, M_WAITOK); 220 needfree = iov; 221 } else if ((u_int)iovcnt > 0) { 222 iov = aiov; 223 needfree = NULL; 224 } else { 225 error = EINVAL; 226 goto out; 227 } 228 229 auio.uio_iov = iov; 230 auio.uio_iovcnt = iovcnt; 231 auio.uio_rw = UIO_READ; 232 auio.uio_segflg = UIO_USERSPACE; 233 auio.uio_procp = p; 234 error = copyin(iovp, iov, iovlen); 235 if (error) 236 goto done; 237 auio.uio_resid = 0; 238 for (i = 0; i < iovcnt; i++) { 239 auio.uio_resid += iov->iov_len; 240 /* 241 * Reads return ssize_t because -1 is returned on error. 242 * Therefore we must restrict the length to SSIZE_MAX to 243 * avoid garbage return values. 244 */ 245 if (iov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) { 246 error = EINVAL; 247 goto done; 248 } 249 iov++; 250 } 251 #ifdef KTRACE 252 /* 253 * if tracing, save a copy of iovec 254 */ 255 if (KTRPOINT(p, KTR_GENIO)) { 256 ktriov = malloc(iovlen, M_TEMP, M_WAITOK); 257 memcpy((caddr_t)ktriov, (caddr_t)auio.uio_iov, iovlen); 258 } 259 #endif 260 cnt = auio.uio_resid; 261 error = (*fp->f_ops->fo_read)(fp, offset, &auio, fp->f_cred, flags); 262 if (error) 263 if (auio.uio_resid != cnt && (error == ERESTART || 264 error == EINTR || error == EWOULDBLOCK)) 265 error = 0; 266 cnt -= auio.uio_resid; 267 #ifdef KTRACE 268 if (ktriov != NULL) { 269 if (KTRPOINT(p, KTR_GENIO) && (error == 0)) 270 ktrgenio(p, fd, UIO_READ, ktriov, cnt, error); 271 free(ktriov, M_TEMP); 272 } 273 #endif 274 *retval = cnt; 275 done: 276 if (needfree) 277 free(needfree, M_IOV); 278 out: 279 FILE_UNUSE(fp, p); 280 return (error); 281 } 282 283 /* 284 * Write system call 285 */ 286 int 287 sys_write(struct lwp *l, void *v, register_t *retval) 288 { 289 struct sys_write_args /* { 290 syscallarg(int) fd; 291 syscallarg(const void *) buf; 292 syscallarg(size_t) nbyte; 293 } */ *uap = v; 294 int fd; 295 struct file *fp; 296 struct proc *p; 297 struct filedesc *fdp; 298 299 fd = SCARG(uap, fd); 300 p = l->l_proc; 301 fdp = p->p_fd; 302 303 if ((fp = fd_getfile(fdp, fd)) == NULL) 304 return (EBADF); 305 306 if ((fp->f_flag & FWRITE) == 0) { 307 simple_unlock(&fp->f_slock); 308 return (EBADF); 309 } 310 311 FILE_USE(fp); 312 313 /* dofilewrite() will unuse the descriptor for us */ 314 return (dofilewrite(p, fd, fp, SCARG(uap, buf), SCARG(uap, nbyte), 315 &fp->f_offset, FOF_UPDATE_OFFSET, retval)); 316 } 317 318 int 319 dofilewrite(struct proc *p, int fd, struct file *fp, const void *buf, 320 size_t nbyte, off_t *offset, int flags, register_t *retval) 321 { 322 struct uio auio; 323 struct iovec aiov; 324 size_t cnt; 325 int error; 326 #ifdef KTRACE 327 struct iovec ktriov = {0}; 328 #endif 329 330 error = 0; 331 aiov.iov_base = (caddr_t)buf; /* XXX kills const */ 332 aiov.iov_len = nbyte; 333 auio.uio_iov = &aiov; 334 auio.uio_iovcnt = 1; 335 auio.uio_resid = nbyte; 336 auio.uio_rw = UIO_WRITE; 337 auio.uio_segflg = UIO_USERSPACE; 338 auio.uio_procp = p; 339 340 /* 341 * Writes return ssize_t because -1 is returned on error. Therefore 342 * we must restrict the length to SSIZE_MAX to avoid garbage return 343 * values. 344 */ 345 if (auio.uio_resid > SSIZE_MAX) { 346 error = EINVAL; 347 goto out; 348 } 349 350 #ifdef KTRACE 351 /* 352 * if tracing, save a copy of iovec 353 */ 354 if (KTRPOINT(p, KTR_GENIO)) 355 ktriov = aiov; 356 #endif 357 cnt = auio.uio_resid; 358 error = (*fp->f_ops->fo_write)(fp, offset, &auio, fp->f_cred, flags); 359 if (error) { 360 if (auio.uio_resid != cnt && (error == ERESTART || 361 error == EINTR || error == EWOULDBLOCK)) 362 error = 0; 363 if (error == EPIPE) 364 psignal(p, SIGPIPE); 365 } 366 cnt -= auio.uio_resid; 367 #ifdef KTRACE 368 if (KTRPOINT(p, KTR_GENIO) && error == 0) 369 ktrgenio(p, fd, UIO_WRITE, &ktriov, cnt, error); 370 #endif 371 *retval = cnt; 372 out: 373 FILE_UNUSE(fp, p); 374 return (error); 375 } 376 377 /* 378 * Gather write system call 379 */ 380 int 381 sys_writev(struct lwp *l, void *v, register_t *retval) 382 { 383 struct sys_writev_args /* { 384 syscallarg(int) fd; 385 syscallarg(const struct iovec *) iovp; 386 syscallarg(int) iovcnt; 387 } */ *uap = v; 388 int fd; 389 struct file *fp; 390 struct proc *p; 391 struct filedesc *fdp; 392 393 fd = SCARG(uap, fd); 394 p = l->l_proc; 395 fdp = p->p_fd; 396 397 if ((fp = fd_getfile(fdp, fd)) == NULL) 398 return (EBADF); 399 400 if ((fp->f_flag & FWRITE) == 0) { 401 simple_unlock(&fp->f_slock); 402 return (EBADF); 403 } 404 405 FILE_USE(fp); 406 407 /* dofilewritev() will unuse the descriptor for us */ 408 return (dofilewritev(p, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt), 409 &fp->f_offset, FOF_UPDATE_OFFSET, retval)); 410 } 411 412 int 413 dofilewritev(struct proc *p, int fd, struct file *fp, const struct iovec *iovp, 414 int iovcnt, off_t *offset, int flags, register_t *retval) 415 { 416 struct uio auio; 417 struct iovec *iov, *needfree, aiov[UIO_SMALLIOV]; 418 int i, error; 419 size_t cnt; 420 u_int iovlen; 421 #ifdef KTRACE 422 struct iovec *ktriov; 423 #endif 424 425 error = 0; 426 #ifdef KTRACE 427 ktriov = NULL; 428 #endif 429 /* note: can't use iovlen until iovcnt is validated */ 430 iovlen = iovcnt * sizeof(struct iovec); 431 if ((u_int)iovcnt > UIO_SMALLIOV) { 432 if ((u_int)iovcnt > IOV_MAX) { 433 error = EINVAL; 434 goto out; 435 } 436 iov = malloc(iovlen, M_IOV, M_WAITOK); 437 needfree = iov; 438 } else if ((u_int)iovcnt > 0) { 439 iov = aiov; 440 needfree = NULL; 441 } else { 442 error = EINVAL; 443 goto out; 444 } 445 446 auio.uio_iov = iov; 447 auio.uio_iovcnt = iovcnt; 448 auio.uio_rw = UIO_WRITE; 449 auio.uio_segflg = UIO_USERSPACE; 450 auio.uio_procp = p; 451 error = copyin(iovp, iov, iovlen); 452 if (error) 453 goto done; 454 auio.uio_resid = 0; 455 for (i = 0; i < iovcnt; i++) { 456 auio.uio_resid += iov->iov_len; 457 /* 458 * Writes return ssize_t because -1 is returned on error. 459 * Therefore we must restrict the length to SSIZE_MAX to 460 * avoid garbage return values. 461 */ 462 if (iov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) { 463 error = EINVAL; 464 goto done; 465 } 466 iov++; 467 } 468 #ifdef KTRACE 469 /* 470 * if tracing, save a copy of iovec 471 */ 472 if (KTRPOINT(p, KTR_GENIO)) { 473 ktriov = malloc(iovlen, M_TEMP, M_WAITOK); 474 memcpy((caddr_t)ktriov, (caddr_t)auio.uio_iov, iovlen); 475 } 476 #endif 477 cnt = auio.uio_resid; 478 error = (*fp->f_ops->fo_write)(fp, offset, &auio, fp->f_cred, flags); 479 if (error) { 480 if (auio.uio_resid != cnt && (error == ERESTART || 481 error == EINTR || error == EWOULDBLOCK)) 482 error = 0; 483 if (error == EPIPE) 484 psignal(p, SIGPIPE); 485 } 486 cnt -= auio.uio_resid; 487 #ifdef KTRACE 488 if (ktriov != NULL) { 489 if (KTRPOINT(p, KTR_GENIO) && (error == 0)) 490 ktrgenio(p, fd, UIO_WRITE, ktriov, cnt, error); 491 free(ktriov, M_TEMP); 492 } 493 #endif 494 *retval = cnt; 495 done: 496 if (needfree) 497 free(needfree, M_IOV); 498 out: 499 FILE_UNUSE(fp, p); 500 return (error); 501 } 502 503 /* 504 * Ioctl system call 505 */ 506 /* ARGSUSED */ 507 int 508 sys_ioctl(struct lwp *l, void *v, register_t *retval) 509 { 510 struct sys_ioctl_args /* { 511 syscallarg(int) fd; 512 syscallarg(u_long) com; 513 syscallarg(caddr_t) data; 514 } */ *uap = v; 515 struct file *fp; 516 struct proc *p; 517 struct filedesc *fdp; 518 u_long com; 519 int error; 520 u_int size; 521 caddr_t data, memp; 522 #define STK_PARAMS 128 523 u_long stkbuf[STK_PARAMS/sizeof(u_long)]; 524 525 error = 0; 526 p = l->l_proc; 527 fdp = p->p_fd; 528 529 if ((fp = fd_getfile(fdp, SCARG(uap, fd))) == NULL) 530 return (EBADF); 531 532 FILE_USE(fp); 533 534 if ((fp->f_flag & (FREAD | FWRITE)) == 0) { 535 error = EBADF; 536 com = 0; 537 goto out; 538 } 539 540 switch (com = SCARG(uap, com)) { 541 case FIONCLEX: 542 fdp->fd_ofileflags[SCARG(uap, fd)] &= ~UF_EXCLOSE; 543 goto out; 544 545 case FIOCLEX: 546 fdp->fd_ofileflags[SCARG(uap, fd)] |= UF_EXCLOSE; 547 goto out; 548 } 549 550 /* 551 * Interpret high order word to find amount of data to be 552 * copied to/from the user's address space. 553 */ 554 size = IOCPARM_LEN(com); 555 if (size > IOCPARM_MAX) { 556 error = ENOTTY; 557 goto out; 558 } 559 memp = NULL; 560 if (size > sizeof(stkbuf)) { 561 memp = (caddr_t)malloc((u_long)size, M_IOCTLOPS, M_WAITOK); 562 data = memp; 563 } else 564 data = (caddr_t)stkbuf; 565 if (com&IOC_IN) { 566 if (size) { 567 error = copyin(SCARG(uap, data), data, size); 568 if (error) { 569 if (memp) 570 free(memp, M_IOCTLOPS); 571 goto out; 572 } 573 #ifdef KTRACE 574 if (KTRPOINT(p, KTR_GENIO)) { 575 struct iovec iov; 576 iov.iov_base = SCARG(uap, data); 577 iov.iov_len = size; 578 ktrgenio(p, SCARG(uap, fd), UIO_WRITE, &iov, 579 size, 0); 580 } 581 #endif 582 } else 583 *(caddr_t *)data = SCARG(uap, data); 584 } else if ((com&IOC_OUT) && size) 585 /* 586 * Zero the buffer so the user always 587 * gets back something deterministic. 588 */ 589 memset(data, 0, size); 590 else if (com&IOC_VOID) 591 *(caddr_t *)data = SCARG(uap, data); 592 593 switch (com) { 594 595 case FIONBIO: 596 if (*(int *)data != 0) 597 fp->f_flag |= FNONBLOCK; 598 else 599 fp->f_flag &= ~FNONBLOCK; 600 error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, data, p); 601 break; 602 603 case FIOASYNC: 604 if (*(int *)data != 0) 605 fp->f_flag |= FASYNC; 606 else 607 fp->f_flag &= ~FASYNC; 608 error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, data, p); 609 break; 610 611 default: 612 error = (*fp->f_ops->fo_ioctl)(fp, com, data, p); 613 /* 614 * Copy any data to user, size was 615 * already set and checked above. 616 */ 617 if (error == 0 && (com&IOC_OUT) && size) { 618 error = copyout(data, SCARG(uap, data), size); 619 #ifdef KTRACE 620 if (KTRPOINT(p, KTR_GENIO)) { 621 struct iovec iov; 622 iov.iov_base = SCARG(uap, data); 623 iov.iov_len = size; 624 ktrgenio(p, SCARG(uap, fd), UIO_READ, &iov, 625 size, error); 626 } 627 #endif 628 } 629 break; 630 } 631 if (memp) 632 free(memp, M_IOCTLOPS); 633 out: 634 FILE_UNUSE(fp, p); 635 switch (error) { 636 case -1: 637 printf("sys_ioctl: _IO%s%s('%c', %lu, %lu) returned -1: " 638 "pid=%d comm=%s\n", 639 (com & IOC_IN) ? "W" : "", (com & IOC_OUT) ? "R" : "", 640 (char)IOCGROUP(com), (com & 0xff), IOCPARM_LEN(com), 641 p->p_pid, p->p_comm); 642 /* FALLTHROUGH */ 643 case EPASSTHROUGH: 644 error = ENOTTY; 645 /* FALLTHROUGH */ 646 default: 647 return (error); 648 } 649 } 650 651 int selwait, nselcoll; 652 653 /* 654 * Select system call. 655 */ 656 int 657 sys_select(struct lwp *l, void *v, register_t *retval) 658 { 659 struct sys_select_args /* { 660 syscallarg(int) nd; 661 syscallarg(fd_set *) in; 662 syscallarg(fd_set *) ou; 663 syscallarg(fd_set *) ex; 664 syscallarg(struct timeval *) tv; 665 } */ *uap = v; 666 struct proc *p; 667 caddr_t bits; 668 char smallbits[howmany(FD_SETSIZE, NFDBITS) * 669 sizeof(fd_mask) * 6]; 670 struct timeval atv; 671 int s, ncoll, error, timo; 672 size_t ni; 673 674 error = 0; 675 p = l->l_proc; 676 if (SCARG(uap, nd) < 0) 677 return (EINVAL); 678 if (SCARG(uap, nd) > p->p_fd->fd_nfiles) { 679 /* forgiving; slightly wrong */ 680 SCARG(uap, nd) = p->p_fd->fd_nfiles; 681 } 682 ni = howmany(SCARG(uap, nd), NFDBITS) * sizeof(fd_mask); 683 if (ni * 6 > sizeof(smallbits)) 684 bits = malloc(ni * 6, M_TEMP, M_WAITOK); 685 else 686 bits = smallbits; 687 688 #define getbits(name, x) \ 689 if (SCARG(uap, name)) { \ 690 error = copyin(SCARG(uap, name), bits + ni * x, ni); \ 691 if (error) \ 692 goto done; \ 693 } else \ 694 memset(bits + ni * x, 0, ni); 695 getbits(in, 0); 696 getbits(ou, 1); 697 getbits(ex, 2); 698 #undef getbits 699 700 timo = 0; 701 if (SCARG(uap, tv)) { 702 error = copyin(SCARG(uap, tv), (caddr_t)&atv, 703 sizeof(atv)); 704 if (error) 705 goto done; 706 if (itimerfix(&atv)) { 707 error = EINVAL; 708 goto done; 709 } 710 s = splclock(); 711 timeradd(&atv, &time, &atv); 712 splx(s); 713 } 714 715 retry: 716 ncoll = nselcoll; 717 l->l_flag |= L_SELECT; 718 error = selscan(p, (fd_mask *)(bits + ni * 0), 719 (fd_mask *)(bits + ni * 3), SCARG(uap, nd), retval); 720 if (error || *retval) 721 goto done; 722 if (SCARG(uap, tv)) { 723 /* 724 * We have to recalculate the timeout on every retry. 725 */ 726 timo = hzto(&atv); 727 if (timo <= 0) 728 goto done; 729 } 730 s = splsched(); 731 if ((l->l_flag & L_SELECT) == 0 || nselcoll != ncoll) { 732 splx(s); 733 goto retry; 734 } 735 l->l_flag &= ~L_SELECT; 736 error = tsleep((caddr_t)&selwait, PSOCK | PCATCH, "select", timo); 737 splx(s); 738 if (error == 0) 739 goto retry; 740 done: 741 l->l_flag &= ~L_SELECT; 742 /* select is not restarted after signals... */ 743 if (error == ERESTART) 744 error = EINTR; 745 if (error == EWOULDBLOCK) 746 error = 0; 747 if (error == 0) { 748 749 #define putbits(name, x) \ 750 if (SCARG(uap, name)) { \ 751 error = copyout(bits + ni * x, SCARG(uap, name), ni); \ 752 if (error) \ 753 goto out; \ 754 } 755 putbits(in, 3); 756 putbits(ou, 4); 757 putbits(ex, 5); 758 #undef putbits 759 } 760 out: 761 if (ni * 6 > sizeof(smallbits)) 762 free(bits, M_TEMP); 763 return (error); 764 } 765 766 int 767 selscan(struct proc *p, fd_mask *ibitp, fd_mask *obitp, int nfd, 768 register_t *retval) 769 { 770 struct filedesc *fdp; 771 int msk, i, j, fd, n; 772 fd_mask ibits, obits; 773 struct file *fp; 774 static const int flag[3] = { POLLRDNORM | POLLHUP | POLLERR, 775 POLLWRNORM | POLLHUP | POLLERR, 776 POLLRDBAND }; 777 778 fdp = p->p_fd; 779 n = 0; 780 for (msk = 0; msk < 3; msk++) { 781 for (i = 0; i < nfd; i += NFDBITS) { 782 ibits = *ibitp++; 783 obits = 0; 784 while ((j = ffs(ibits)) && (fd = i + --j) < nfd) { 785 ibits &= ~(1 << j); 786 if ((fp = fd_getfile(fdp, fd)) == NULL) 787 return (EBADF); 788 FILE_USE(fp); 789 if ((*fp->f_ops->fo_poll)(fp, flag[msk], p)) { 790 obits |= (1 << j); 791 n++; 792 } 793 FILE_UNUSE(fp, p); 794 } 795 *obitp++ = obits; 796 } 797 } 798 *retval = n; 799 return (0); 800 } 801 802 /* 803 * Poll system call. 804 */ 805 int 806 sys_poll(struct lwp *l, void *v, register_t *retval) 807 { 808 struct sys_poll_args /* { 809 syscallarg(struct pollfd *) fds; 810 syscallarg(u_int) nfds; 811 syscallarg(int) timeout; 812 } */ *uap = v; 813 struct proc *p; 814 caddr_t bits; 815 char smallbits[32 * sizeof(struct pollfd)]; 816 struct timeval atv; 817 int s, ncoll, error, timo; 818 size_t ni; 819 820 error = 0; 821 p = l->l_proc; 822 if (SCARG(uap, nfds) > p->p_fd->fd_nfiles) { 823 /* forgiving; slightly wrong */ 824 SCARG(uap, nfds) = p->p_fd->fd_nfiles; 825 } 826 ni = SCARG(uap, nfds) * sizeof(struct pollfd); 827 if (ni > sizeof(smallbits)) 828 bits = malloc(ni, M_TEMP, M_WAITOK); 829 else 830 bits = smallbits; 831 832 error = copyin(SCARG(uap, fds), bits, ni); 833 if (error) 834 goto done; 835 836 timo = 0; 837 if (SCARG(uap, timeout) != INFTIM) { 838 atv.tv_sec = SCARG(uap, timeout) / 1000; 839 atv.tv_usec = (SCARG(uap, timeout) % 1000) * 1000; 840 if (itimerfix(&atv)) { 841 error = EINVAL; 842 goto done; 843 } 844 s = splclock(); 845 timeradd(&atv, &time, &atv); 846 splx(s); 847 } 848 849 retry: 850 ncoll = nselcoll; 851 l->l_flag |= L_SELECT; 852 error = pollscan(p, (struct pollfd *)bits, SCARG(uap, nfds), retval); 853 if (error || *retval) 854 goto done; 855 if (SCARG(uap, timeout) != INFTIM) { 856 /* 857 * We have to recalculate the timeout on every retry. 858 */ 859 timo = hzto(&atv); 860 if (timo <= 0) 861 goto done; 862 } 863 s = splsched(); 864 if ((l->l_flag & L_SELECT) == 0 || nselcoll != ncoll) { 865 splx(s); 866 goto retry; 867 } 868 l->l_flag &= ~L_SELECT; 869 error = tsleep((caddr_t)&selwait, PSOCK | PCATCH, "poll", timo); 870 splx(s); 871 if (error == 0) 872 goto retry; 873 done: 874 l->l_flag &= ~L_SELECT; 875 /* poll is not restarted after signals... */ 876 if (error == ERESTART) 877 error = EINTR; 878 if (error == EWOULDBLOCK) 879 error = 0; 880 if (error == 0) { 881 error = copyout(bits, SCARG(uap, fds), ni); 882 if (error) 883 goto out; 884 } 885 out: 886 if (ni > sizeof(smallbits)) 887 free(bits, M_TEMP); 888 return (error); 889 } 890 891 int 892 pollscan(struct proc *p, struct pollfd *fds, int nfd, register_t *retval) 893 { 894 struct filedesc *fdp; 895 int i, n; 896 struct file *fp; 897 898 fdp = p->p_fd; 899 n = 0; 900 for (i = 0; i < nfd; i++, fds++) { 901 if (fds->fd >= fdp->fd_nfiles) { 902 fds->revents = POLLNVAL; 903 n++; 904 } else if (fds->fd < 0) { 905 fds->revents = 0; 906 } else { 907 if ((fp = fd_getfile(fdp, fds->fd)) == NULL) { 908 fds->revents = POLLNVAL; 909 n++; 910 } else { 911 FILE_USE(fp); 912 fds->revents = (*fp->f_ops->fo_poll)(fp, 913 fds->events | POLLERR | POLLHUP, p); 914 if (fds->revents != 0) 915 n++; 916 FILE_UNUSE(fp, p); 917 } 918 } 919 } 920 *retval = n; 921 return (0); 922 } 923 924 /*ARGSUSED*/ 925 int 926 seltrue(dev_t dev, int events, struct proc *p) 927 { 928 929 return (events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM)); 930 } 931 932 /* 933 * Record a select request. 934 */ 935 void 936 selrecord(struct proc *selector, struct selinfo *sip) 937 { 938 struct lwp *l; 939 struct proc *p; 940 pid_t mypid; 941 942 mypid = selector->p_pid; 943 if (sip->sel_pid == mypid) 944 return; 945 if (sip->sel_pid && (p = pfind(sip->sel_pid))) { 946 LIST_FOREACH(l, &p->p_lwps, l_sibling) { 947 if (l->l_wchan == (caddr_t)&selwait) { 948 sip->sel_collision = 1; 949 return; 950 } 951 } 952 } 953 954 sip->sel_pid = mypid; 955 } 956 957 /* 958 * Do a wakeup when a selectable event occurs. 959 */ 960 void 961 selwakeup(sip) 962 struct selinfo *sip; 963 { 964 struct lwp *l; 965 struct proc *p; 966 int s; 967 968 if (sip->sel_pid == 0) 969 return; 970 if (sip->sel_collision) { 971 sip->sel_pid = 0; 972 nselcoll++; 973 sip->sel_collision = 0; 974 wakeup((caddr_t)&selwait); 975 return; 976 } 977 p = pfind(sip->sel_pid); 978 sip->sel_pid = 0; 979 if (p != NULL) { 980 LIST_FOREACH(l, &p->p_lwps, l_sibling) { 981 SCHED_LOCK(s); 982 if (l->l_wchan == (caddr_t)&selwait) { 983 if (l->l_stat == LSSLEEP) 984 setrunnable(l); 985 else 986 unsleep(l); 987 } else if (l->l_flag & L_SELECT) 988 l->l_flag &= ~L_SELECT; 989 SCHED_UNLOCK(s); 990 } 991 } 992 } 993