1 /* $NetBSD: sys_generic.c,v 1.92 2006/09/03 06:34:34 christos Exp $ */ 2 3 /* 4 * Copyright (c) 1982, 1986, 1989, 1993 5 * The Regents of the University of California. All rights reserved. 6 * (c) UNIX System Laboratories, Inc. 7 * All or some portions of this file are derived from material licensed 8 * to the University of California by American Telephone and Telegraph 9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 10 * the permission of UNIX System Laboratories, Inc. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 3. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * @(#)sys_generic.c 8.9 (Berkeley) 2/14/95 37 */ 38 39 #include <sys/cdefs.h> 40 __KERNEL_RCSID(0, "$NetBSD: sys_generic.c,v 1.92 2006/09/03 06:34:34 christos Exp $"); 41 42 #include "opt_ktrace.h" 43 44 #include <sys/param.h> 45 #include <sys/systm.h> 46 #include <sys/filedesc.h> 47 #include <sys/ioctl.h> 48 #include <sys/file.h> 49 #include <sys/proc.h> 50 #include <sys/socketvar.h> 51 #include <sys/signalvar.h> 52 #include <sys/uio.h> 53 #include <sys/kernel.h> 54 #include <sys/stat.h> 55 #include <sys/malloc.h> 56 #include <sys/poll.h> 57 #ifdef KTRACE 58 #include <sys/ktrace.h> 59 #endif 60 61 #include <sys/mount.h> 62 #include <sys/sa.h> 63 #include <sys/syscallargs.h> 64 65 #include <uvm/uvm_extern.h> 66 67 int selscan(struct lwp *, fd_mask *, fd_mask *, int, register_t *); 68 int pollscan(struct lwp *, struct pollfd *, int, register_t *); 69 70 71 /* 72 * Read system call. 73 */ 74 /* ARGSUSED */ 75 int 76 sys_read(struct lwp *l, void *v, register_t *retval) 77 { 78 struct sys_read_args /* { 79 syscallarg(int) fd; 80 syscallarg(void *) buf; 81 syscallarg(size_t) nbyte; 82 } */ *uap = v; 83 int fd; 84 struct file *fp; 85 struct proc *p; 86 struct filedesc *fdp; 87 88 fd = SCARG(uap, fd); 89 p = l->l_proc; 90 fdp = p->p_fd; 91 92 if ((fp = fd_getfile(fdp, fd)) == NULL) 93 return (EBADF); 94 95 if ((fp->f_flag & FREAD) == 0) { 96 simple_unlock(&fp->f_slock); 97 return (EBADF); 98 } 99 100 FILE_USE(fp); 101 102 /* dofileread() will unuse the descriptor for us */ 103 return (dofileread(l, fd, fp, SCARG(uap, buf), SCARG(uap, nbyte), 104 &fp->f_offset, FOF_UPDATE_OFFSET, retval)); 105 } 106 107 int 108 dofileread(struct lwp *l, int fd, struct file *fp, void *buf, size_t nbyte, 109 off_t *offset, int flags, register_t *retval) 110 { 111 struct iovec aiov; 112 struct uio auio; 113 struct proc *p; 114 struct vmspace *vm; 115 size_t cnt; 116 int error; 117 #ifdef KTRACE 118 struct iovec ktriov = { .iov_base = NULL, }; 119 #endif 120 p = l->l_proc; 121 122 error = proc_vmspace_getref(p, &vm); 123 if (error) { 124 goto out; 125 } 126 127 aiov.iov_base = (caddr_t)buf; 128 aiov.iov_len = nbyte; 129 auio.uio_iov = &aiov; 130 auio.uio_iovcnt = 1; 131 auio.uio_resid = nbyte; 132 auio.uio_rw = UIO_READ; 133 auio.uio_vmspace = vm; 134 135 /* 136 * Reads return ssize_t because -1 is returned on error. Therefore 137 * we must restrict the length to SSIZE_MAX to avoid garbage return 138 * values. 139 */ 140 if (auio.uio_resid > SSIZE_MAX) { 141 error = EINVAL; 142 goto out; 143 } 144 145 #ifdef KTRACE 146 /* 147 * if tracing, save a copy of iovec 148 */ 149 if (KTRPOINT(p, KTR_GENIO)) 150 ktriov = aiov; 151 #endif 152 cnt = auio.uio_resid; 153 error = (*fp->f_ops->fo_read)(fp, offset, &auio, fp->f_cred, flags); 154 if (error) 155 if (auio.uio_resid != cnt && (error == ERESTART || 156 error == EINTR || error == EWOULDBLOCK)) 157 error = 0; 158 cnt -= auio.uio_resid; 159 #ifdef KTRACE 160 if (KTRPOINT(p, KTR_GENIO) && error == 0) 161 ktrgenio(l, fd, UIO_READ, &ktriov, cnt, error); 162 #endif 163 *retval = cnt; 164 out: 165 FILE_UNUSE(fp, l); 166 uvmspace_free(vm); 167 return (error); 168 } 169 170 /* 171 * Scatter read system call. 172 */ 173 int 174 sys_readv(struct lwp *l, void *v, register_t *retval) 175 { 176 struct sys_readv_args /* { 177 syscallarg(int) fd; 178 syscallarg(const struct iovec *) iovp; 179 syscallarg(int) iovcnt; 180 } */ *uap = v; 181 struct filedesc *fdp; 182 struct file *fp; 183 struct proc *p; 184 int fd; 185 186 fd = SCARG(uap, fd); 187 p = l->l_proc; 188 fdp = p->p_fd; 189 190 if ((fp = fd_getfile(fdp, fd)) == NULL) 191 return (EBADF); 192 193 if ((fp->f_flag & FREAD) == 0) { 194 simple_unlock(&fp->f_slock); 195 return (EBADF); 196 } 197 198 FILE_USE(fp); 199 200 /* dofilereadv() will unuse the descriptor for us */ 201 return (dofilereadv(l, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt), 202 &fp->f_offset, FOF_UPDATE_OFFSET, retval)); 203 } 204 205 int 206 dofilereadv(struct lwp *l, int fd, struct file *fp, const struct iovec *iovp, 207 int iovcnt, off_t *offset, int flags, register_t *retval) 208 { 209 struct proc *p; 210 struct uio auio; 211 struct iovec *iov, *needfree, aiov[UIO_SMALLIOV]; 212 struct vmspace *vm; 213 int i, error; 214 size_t cnt; 215 u_int iovlen; 216 #ifdef KTRACE 217 struct iovec *ktriov; 218 #endif 219 220 p = l->l_proc; 221 error = proc_vmspace_getref(p, &vm); 222 if (error) { 223 goto out; 224 } 225 226 #ifdef KTRACE 227 ktriov = NULL; 228 #endif 229 /* note: can't use iovlen until iovcnt is validated */ 230 iovlen = iovcnt * sizeof(struct iovec); 231 if ((u_int)iovcnt > UIO_SMALLIOV) { 232 if ((u_int)iovcnt > IOV_MAX) { 233 error = EINVAL; 234 goto out; 235 } 236 iov = malloc(iovlen, M_IOV, M_WAITOK); 237 needfree = iov; 238 } else if ((u_int)iovcnt > 0) { 239 iov = aiov; 240 needfree = NULL; 241 } else { 242 error = EINVAL; 243 goto out; 244 } 245 246 auio.uio_iov = iov; 247 auio.uio_iovcnt = iovcnt; 248 auio.uio_rw = UIO_READ; 249 auio.uio_vmspace = vm; 250 error = copyin(iovp, iov, iovlen); 251 if (error) 252 goto done; 253 auio.uio_resid = 0; 254 for (i = 0; i < iovcnt; i++) { 255 auio.uio_resid += iov->iov_len; 256 /* 257 * Reads return ssize_t because -1 is returned on error. 258 * Therefore we must restrict the length to SSIZE_MAX to 259 * avoid garbage return values. 260 */ 261 if (iov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) { 262 error = EINVAL; 263 goto done; 264 } 265 iov++; 266 } 267 #ifdef KTRACE 268 /* 269 * if tracing, save a copy of iovec 270 */ 271 if (KTRPOINT(p, KTR_GENIO)) { 272 ktriov = malloc(iovlen, M_TEMP, M_WAITOK); 273 memcpy((caddr_t)ktriov, (caddr_t)auio.uio_iov, iovlen); 274 } 275 #endif 276 cnt = auio.uio_resid; 277 error = (*fp->f_ops->fo_read)(fp, offset, &auio, fp->f_cred, flags); 278 if (error) 279 if (auio.uio_resid != cnt && (error == ERESTART || 280 error == EINTR || error == EWOULDBLOCK)) 281 error = 0; 282 cnt -= auio.uio_resid; 283 #ifdef KTRACE 284 if (ktriov != NULL) { 285 if (KTRPOINT(p, KTR_GENIO) && (error == 0)) 286 ktrgenio(l, fd, UIO_READ, ktriov, cnt, error); 287 free(ktriov, M_TEMP); 288 } 289 #endif 290 *retval = cnt; 291 done: 292 if (needfree) 293 free(needfree, M_IOV); 294 out: 295 FILE_UNUSE(fp, l); 296 uvmspace_free(vm); 297 return (error); 298 } 299 300 /* 301 * Write system call 302 */ 303 int 304 sys_write(struct lwp *l, void *v, register_t *retval) 305 { 306 struct sys_write_args /* { 307 syscallarg(int) fd; 308 syscallarg(const void *) buf; 309 syscallarg(size_t) nbyte; 310 } */ *uap = v; 311 int fd; 312 struct file *fp; 313 struct proc *p; 314 struct filedesc *fdp; 315 316 fd = SCARG(uap, fd); 317 p = l->l_proc; 318 fdp = p->p_fd; 319 320 if ((fp = fd_getfile(fdp, fd)) == NULL) 321 return (EBADF); 322 323 if ((fp->f_flag & FWRITE) == 0) { 324 simple_unlock(&fp->f_slock); 325 return (EBADF); 326 } 327 328 FILE_USE(fp); 329 330 /* dofilewrite() will unuse the descriptor for us */ 331 return (dofilewrite(l, fd, fp, SCARG(uap, buf), SCARG(uap, nbyte), 332 &fp->f_offset, FOF_UPDATE_OFFSET, retval)); 333 } 334 335 int 336 dofilewrite(struct lwp *l, int fd, struct file *fp, const void *buf, 337 size_t nbyte, off_t *offset, int flags, register_t *retval) 338 { 339 struct iovec aiov; 340 struct uio auio; 341 struct proc *p; 342 struct vmspace *vm; 343 size_t cnt; 344 int error; 345 #ifdef KTRACE 346 struct iovec ktriov = { .iov_base = NULL, }; 347 #endif 348 349 p = l->l_proc; 350 error = proc_vmspace_getref(p, &vm); 351 if (error) { 352 goto out; 353 } 354 aiov.iov_base = __UNCONST(buf); /* XXXUNCONST kills const */ 355 aiov.iov_len = nbyte; 356 auio.uio_iov = &aiov; 357 auio.uio_iovcnt = 1; 358 auio.uio_resid = nbyte; 359 auio.uio_rw = UIO_WRITE; 360 auio.uio_vmspace = vm; 361 362 /* 363 * Writes return ssize_t because -1 is returned on error. Therefore 364 * we must restrict the length to SSIZE_MAX to avoid garbage return 365 * values. 366 */ 367 if (auio.uio_resid > SSIZE_MAX) { 368 error = EINVAL; 369 goto out; 370 } 371 372 #ifdef KTRACE 373 /* 374 * if tracing, save a copy of iovec 375 */ 376 if (KTRPOINT(p, KTR_GENIO)) 377 ktriov = aiov; 378 #endif 379 cnt = auio.uio_resid; 380 error = (*fp->f_ops->fo_write)(fp, offset, &auio, fp->f_cred, flags); 381 if (error) { 382 if (auio.uio_resid != cnt && (error == ERESTART || 383 error == EINTR || error == EWOULDBLOCK)) 384 error = 0; 385 if (error == EPIPE) 386 psignal(p, SIGPIPE); 387 } 388 cnt -= auio.uio_resid; 389 #ifdef KTRACE 390 if (KTRPOINT(p, KTR_GENIO) && error == 0) 391 ktrgenio(l, fd, UIO_WRITE, &ktriov, cnt, error); 392 #endif 393 *retval = cnt; 394 out: 395 FILE_UNUSE(fp, l); 396 uvmspace_free(vm); 397 return (error); 398 } 399 400 /* 401 * Gather write system call 402 */ 403 int 404 sys_writev(struct lwp *l, void *v, register_t *retval) 405 { 406 struct sys_writev_args /* { 407 syscallarg(int) fd; 408 syscallarg(const struct iovec *) iovp; 409 syscallarg(int) iovcnt; 410 } */ *uap = v; 411 int fd; 412 struct file *fp; 413 struct proc *p; 414 struct filedesc *fdp; 415 416 fd = SCARG(uap, fd); 417 p = l->l_proc; 418 fdp = p->p_fd; 419 420 if ((fp = fd_getfile(fdp, fd)) == NULL) 421 return (EBADF); 422 423 if ((fp->f_flag & FWRITE) == 0) { 424 simple_unlock(&fp->f_slock); 425 return (EBADF); 426 } 427 428 FILE_USE(fp); 429 430 /* dofilewritev() will unuse the descriptor for us */ 431 return (dofilewritev(l, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt), 432 &fp->f_offset, FOF_UPDATE_OFFSET, retval)); 433 } 434 435 int 436 dofilewritev(struct lwp *l, int fd, struct file *fp, const struct iovec *iovp, 437 int iovcnt, off_t *offset, int flags, register_t *retval) 438 { 439 struct proc *p; 440 struct uio auio; 441 struct iovec *iov, *needfree, aiov[UIO_SMALLIOV]; 442 struct vmspace *vm; 443 int i, error; 444 size_t cnt; 445 u_int iovlen; 446 #ifdef KTRACE 447 struct iovec *ktriov; 448 #endif 449 450 p = l->l_proc; 451 error = proc_vmspace_getref(p, &vm); 452 if (error) { 453 goto out; 454 } 455 #ifdef KTRACE 456 ktriov = NULL; 457 #endif 458 /* note: can't use iovlen until iovcnt is validated */ 459 iovlen = iovcnt * sizeof(struct iovec); 460 if ((u_int)iovcnt > UIO_SMALLIOV) { 461 if ((u_int)iovcnt > IOV_MAX) { 462 error = EINVAL; 463 goto out; 464 } 465 iov = malloc(iovlen, M_IOV, M_WAITOK); 466 needfree = iov; 467 } else if ((u_int)iovcnt > 0) { 468 iov = aiov; 469 needfree = NULL; 470 } else { 471 error = EINVAL; 472 goto out; 473 } 474 475 auio.uio_iov = iov; 476 auio.uio_iovcnt = iovcnt; 477 auio.uio_rw = UIO_WRITE; 478 auio.uio_vmspace = vm; 479 error = copyin(iovp, iov, iovlen); 480 if (error) 481 goto done; 482 auio.uio_resid = 0; 483 for (i = 0; i < iovcnt; i++) { 484 auio.uio_resid += iov->iov_len; 485 /* 486 * Writes return ssize_t because -1 is returned on error. 487 * Therefore we must restrict the length to SSIZE_MAX to 488 * avoid garbage return values. 489 */ 490 if (iov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) { 491 error = EINVAL; 492 goto done; 493 } 494 iov++; 495 } 496 #ifdef KTRACE 497 /* 498 * if tracing, save a copy of iovec 499 */ 500 if (KTRPOINT(p, KTR_GENIO)) { 501 ktriov = malloc(iovlen, M_TEMP, M_WAITOK); 502 memcpy((caddr_t)ktriov, (caddr_t)auio.uio_iov, iovlen); 503 } 504 #endif 505 cnt = auio.uio_resid; 506 error = (*fp->f_ops->fo_write)(fp, offset, &auio, fp->f_cred, flags); 507 if (error) { 508 if (auio.uio_resid != cnt && (error == ERESTART || 509 error == EINTR || error == EWOULDBLOCK)) 510 error = 0; 511 if (error == EPIPE) 512 psignal(p, SIGPIPE); 513 } 514 cnt -= auio.uio_resid; 515 #ifdef KTRACE 516 if (ktriov != NULL) { 517 if (KTRPOINT(p, KTR_GENIO) && (error == 0)) 518 ktrgenio(l, fd, UIO_WRITE, ktriov, cnt, error); 519 free(ktriov, M_TEMP); 520 } 521 #endif 522 *retval = cnt; 523 done: 524 if (needfree) 525 free(needfree, M_IOV); 526 out: 527 FILE_UNUSE(fp, l); 528 uvmspace_free(vm); 529 return (error); 530 } 531 532 /* 533 * Ioctl system call 534 */ 535 /* ARGSUSED */ 536 int 537 sys_ioctl(struct lwp *l, void *v, register_t *retval) 538 { 539 struct sys_ioctl_args /* { 540 syscallarg(int) fd; 541 syscallarg(u_long) com; 542 syscallarg(caddr_t) data; 543 } */ *uap = v; 544 struct file *fp; 545 struct proc *p; 546 struct filedesc *fdp; 547 u_long com; 548 int error; 549 u_int size; 550 caddr_t data, memp; 551 #define STK_PARAMS 128 552 u_long stkbuf[STK_PARAMS/sizeof(u_long)]; 553 554 error = 0; 555 p = l->l_proc; 556 fdp = p->p_fd; 557 558 if ((fp = fd_getfile(fdp, SCARG(uap, fd))) == NULL) 559 return (EBADF); 560 561 FILE_USE(fp); 562 563 if ((fp->f_flag & (FREAD | FWRITE)) == 0) { 564 error = EBADF; 565 com = 0; 566 goto out; 567 } 568 569 switch (com = SCARG(uap, com)) { 570 case FIONCLEX: 571 fdp->fd_ofileflags[SCARG(uap, fd)] &= ~UF_EXCLOSE; 572 goto out; 573 574 case FIOCLEX: 575 fdp->fd_ofileflags[SCARG(uap, fd)] |= UF_EXCLOSE; 576 goto out; 577 } 578 579 /* 580 * Interpret high order word to find amount of data to be 581 * copied to/from the user's address space. 582 */ 583 size = IOCPARM_LEN(com); 584 if (size > IOCPARM_MAX) { 585 error = ENOTTY; 586 goto out; 587 } 588 memp = NULL; 589 if (size > sizeof(stkbuf)) { 590 memp = (caddr_t)malloc((u_long)size, M_IOCTLOPS, M_WAITOK); 591 data = memp; 592 } else 593 data = (caddr_t)stkbuf; 594 if (com&IOC_IN) { 595 if (size) { 596 error = copyin(SCARG(uap, data), data, size); 597 if (error) { 598 if (memp) 599 free(memp, M_IOCTLOPS); 600 goto out; 601 } 602 #ifdef KTRACE 603 if (KTRPOINT(p, KTR_GENIO)) { 604 struct iovec iov; 605 iov.iov_base = SCARG(uap, data); 606 iov.iov_len = size; 607 ktrgenio(l, SCARG(uap, fd), UIO_WRITE, &iov, 608 size, 0); 609 } 610 #endif 611 } else 612 *(caddr_t *)data = SCARG(uap, data); 613 } else if ((com&IOC_OUT) && size) 614 /* 615 * Zero the buffer so the user always 616 * gets back something deterministic. 617 */ 618 memset(data, 0, size); 619 else if (com&IOC_VOID) 620 *(caddr_t *)data = SCARG(uap, data); 621 622 switch (com) { 623 624 case FIONBIO: 625 if (*(int *)data != 0) 626 fp->f_flag |= FNONBLOCK; 627 else 628 fp->f_flag &= ~FNONBLOCK; 629 error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, data, l); 630 break; 631 632 case FIOASYNC: 633 if (*(int *)data != 0) 634 fp->f_flag |= FASYNC; 635 else 636 fp->f_flag &= ~FASYNC; 637 error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, data, l); 638 break; 639 640 default: 641 error = (*fp->f_ops->fo_ioctl)(fp, com, data, l); 642 /* 643 * Copy any data to user, size was 644 * already set and checked above. 645 */ 646 if (error == 0 && (com&IOC_OUT) && size) { 647 error = copyout(data, SCARG(uap, data), size); 648 #ifdef KTRACE 649 if (KTRPOINT(p, KTR_GENIO)) { 650 struct iovec iov; 651 iov.iov_base = SCARG(uap, data); 652 iov.iov_len = size; 653 ktrgenio(l, SCARG(uap, fd), UIO_READ, &iov, 654 size, error); 655 } 656 #endif 657 } 658 break; 659 } 660 if (memp) 661 free(memp, M_IOCTLOPS); 662 out: 663 FILE_UNUSE(fp, l); 664 switch (error) { 665 case -1: 666 printf("sys_ioctl: _IO%s%s('%c', %lu, %lu) returned -1: " 667 "pid=%d comm=%s\n", 668 (com & IOC_IN) ? "W" : "", (com & IOC_OUT) ? "R" : "", 669 (char)IOCGROUP(com), (com & 0xff), IOCPARM_LEN(com), 670 p->p_pid, p->p_comm); 671 /* FALLTHROUGH */ 672 case EPASSTHROUGH: 673 error = ENOTTY; 674 /* FALLTHROUGH */ 675 default: 676 return (error); 677 } 678 } 679 680 int selwait, nselcoll; 681 682 /* 683 * Select system call. 684 */ 685 int 686 sys_pselect(struct lwp *l, void *v, register_t *retval) 687 { 688 struct sys_pselect_args /* { 689 syscallarg(int) nd; 690 syscallarg(fd_set *) in; 691 syscallarg(fd_set *) ou; 692 syscallarg(fd_set *) ex; 693 syscallarg(const struct timespec *) ts; 694 syscallarg(sigset_t *) mask; 695 } */ * const uap = v; 696 struct timespec ats; 697 struct timeval atv, *tv = NULL; 698 sigset_t amask, *mask = NULL; 699 int error; 700 701 if (SCARG(uap, ts)) { 702 error = copyin(SCARG(uap, ts), &ats, sizeof(ats)); 703 if (error) 704 return error; 705 atv.tv_sec = ats.tv_sec; 706 atv.tv_usec = ats.tv_nsec / 1000; 707 tv = &atv; 708 } 709 if (SCARG(uap, mask) != NULL) { 710 error = copyin(SCARG(uap, mask), &amask, sizeof(amask)); 711 if (error) 712 return error; 713 mask = &amask; 714 } 715 716 return selcommon(l, retval, SCARG(uap, nd), SCARG(uap, in), 717 SCARG(uap, ou), SCARG(uap, ex), tv, mask); 718 } 719 720 int 721 inittimeleft(struct timeval *tv, struct timeval *sleeptv) 722 { 723 if (itimerfix(tv)) 724 return -1; 725 getmicrouptime(sleeptv); 726 return 0; 727 } 728 729 int 730 gettimeleft(struct timeval *tv, struct timeval *sleeptv) 731 { 732 /* 733 * We have to recalculate the timeout on every retry. 734 */ 735 struct timeval slepttv; 736 /* 737 * reduce tv by elapsed time 738 * based on monotonic time scale 739 */ 740 getmicrouptime(&slepttv); 741 timeradd(tv, sleeptv, tv); 742 timersub(tv, &slepttv, tv); 743 *sleeptv = slepttv; 744 return tvtohz(tv); 745 } 746 747 int 748 sys_select(struct lwp *l, void *v, register_t *retval) 749 { 750 struct sys_select_args /* { 751 syscallarg(int) nd; 752 syscallarg(fd_set *) in; 753 syscallarg(fd_set *) ou; 754 syscallarg(fd_set *) ex; 755 syscallarg(struct timeval *) tv; 756 } */ * const uap = v; 757 struct timeval atv, *tv = NULL; 758 int error; 759 760 if (SCARG(uap, tv)) { 761 error = copyin(SCARG(uap, tv), (caddr_t)&atv, 762 sizeof(atv)); 763 if (error) 764 return error; 765 tv = &atv; 766 } 767 768 return selcommon(l, retval, SCARG(uap, nd), SCARG(uap, in), 769 SCARG(uap, ou), SCARG(uap, ex), tv, NULL); 770 } 771 772 int 773 selcommon(struct lwp *l, register_t *retval, int nd, fd_set *u_in, 774 fd_set *u_ou, fd_set *u_ex, struct timeval *tv, sigset_t *mask) 775 { 776 char smallbits[howmany(FD_SETSIZE, NFDBITS) * 777 sizeof(fd_mask) * 6]; 778 struct proc * const p = l->l_proc; 779 caddr_t bits; 780 int s, ncoll, error, timo; 781 size_t ni; 782 sigset_t oldmask; 783 struct timeval sleeptv; 784 785 error = 0; 786 if (nd < 0) 787 return (EINVAL); 788 if (nd > p->p_fd->fd_nfiles) { 789 /* forgiving; slightly wrong */ 790 nd = p->p_fd->fd_nfiles; 791 } 792 ni = howmany(nd, NFDBITS) * sizeof(fd_mask); 793 if (ni * 6 > sizeof(smallbits)) 794 bits = malloc(ni * 6, M_TEMP, M_WAITOK); 795 else 796 bits = smallbits; 797 798 #define getbits(name, x) \ 799 if (u_ ## name) { \ 800 error = copyin(u_ ## name, bits + ni * x, ni); \ 801 if (error) \ 802 goto done; \ 803 } else \ 804 memset(bits + ni * x, 0, ni); 805 getbits(in, 0); 806 getbits(ou, 1); 807 getbits(ex, 2); 808 #undef getbits 809 810 timo = 0; 811 if (tv && inittimeleft(tv, &sleeptv) == -1) { 812 error = EINVAL; 813 goto done; 814 } 815 816 if (mask) 817 (void)sigprocmask1(p, SIG_SETMASK, mask, &oldmask); 818 819 retry: 820 ncoll = nselcoll; 821 l->l_flag |= L_SELECT; 822 error = selscan(l, (fd_mask *)(bits + ni * 0), 823 (fd_mask *)(bits + ni * 3), nd, retval); 824 if (error || *retval) 825 goto done; 826 if (tv && (timo = gettimeleft(tv, &sleeptv)) <= 0) 827 goto done; 828 s = splsched(); 829 if ((l->l_flag & L_SELECT) == 0 || nselcoll != ncoll) { 830 splx(s); 831 goto retry; 832 } 833 l->l_flag &= ~L_SELECT; 834 error = tsleep((caddr_t)&selwait, PSOCK | PCATCH, "select", timo); 835 splx(s); 836 if (error == 0) 837 goto retry; 838 done: 839 if (mask) 840 (void)sigprocmask1(p, SIG_SETMASK, &oldmask, NULL); 841 l->l_flag &= ~L_SELECT; 842 /* select is not restarted after signals... */ 843 if (error == ERESTART) 844 error = EINTR; 845 if (error == EWOULDBLOCK) 846 error = 0; 847 if (error == 0) { 848 849 #define putbits(name, x) \ 850 if (u_ ## name) { \ 851 error = copyout(bits + ni * x, u_ ## name, ni); \ 852 if (error) \ 853 goto out; \ 854 } 855 putbits(in, 3); 856 putbits(ou, 4); 857 putbits(ex, 5); 858 #undef putbits 859 } 860 out: 861 if (ni * 6 > sizeof(smallbits)) 862 free(bits, M_TEMP); 863 return (error); 864 } 865 866 int 867 selscan(struct lwp *l, fd_mask *ibitp, fd_mask *obitp, int nfd, 868 register_t *retval) 869 { 870 static const int flag[3] = { POLLRDNORM | POLLHUP | POLLERR, 871 POLLWRNORM | POLLHUP | POLLERR, 872 POLLRDBAND }; 873 struct proc *p = l->l_proc; 874 struct filedesc *fdp; 875 int msk, i, j, fd, n; 876 fd_mask ibits, obits; 877 struct file *fp; 878 879 fdp = p->p_fd; 880 n = 0; 881 for (msk = 0; msk < 3; msk++) { 882 for (i = 0; i < nfd; i += NFDBITS) { 883 ibits = *ibitp++; 884 obits = 0; 885 while ((j = ffs(ibits)) && (fd = i + --j) < nfd) { 886 ibits &= ~(1 << j); 887 if ((fp = fd_getfile(fdp, fd)) == NULL) 888 return (EBADF); 889 FILE_USE(fp); 890 if ((*fp->f_ops->fo_poll)(fp, flag[msk], l)) { 891 obits |= (1 << j); 892 n++; 893 } 894 FILE_UNUSE(fp, l); 895 } 896 *obitp++ = obits; 897 } 898 } 899 *retval = n; 900 return (0); 901 } 902 903 /* 904 * Poll system call. 905 */ 906 int 907 sys_poll(struct lwp *l, void *v, register_t *retval) 908 { 909 struct sys_poll_args /* { 910 syscallarg(struct pollfd *) fds; 911 syscallarg(u_int) nfds; 912 syscallarg(int) timeout; 913 } */ * const uap = v; 914 struct timeval atv, *tv = NULL; 915 916 if (SCARG(uap, timeout) != INFTIM) { 917 atv.tv_sec = SCARG(uap, timeout) / 1000; 918 atv.tv_usec = (SCARG(uap, timeout) % 1000) * 1000; 919 tv = &atv; 920 } 921 922 return pollcommon(l, retval, SCARG(uap, fds), SCARG(uap, nfds), 923 tv, NULL); 924 } 925 926 /* 927 * Poll system call. 928 */ 929 int 930 sys_pollts(struct lwp *l, void *v, register_t *retval) 931 { 932 struct sys_pollts_args /* { 933 syscallarg(struct pollfd *) fds; 934 syscallarg(u_int) nfds; 935 syscallarg(const struct timespec *) ts; 936 syscallarg(const sigset_t *) mask; 937 } */ * const uap = v; 938 struct timespec ats; 939 struct timeval atv, *tv = NULL; 940 sigset_t amask, *mask = NULL; 941 int error; 942 943 if (SCARG(uap, ts)) { 944 error = copyin(SCARG(uap, ts), &ats, sizeof(ats)); 945 if (error) 946 return error; 947 atv.tv_sec = ats.tv_sec; 948 atv.tv_usec = ats.tv_nsec / 1000; 949 tv = &atv; 950 } 951 if (SCARG(uap, mask)) { 952 error = copyin(SCARG(uap, mask), &amask, sizeof(amask)); 953 if (error) 954 return error; 955 mask = &amask; 956 } 957 958 return pollcommon(l, retval, SCARG(uap, fds), SCARG(uap, nfds), 959 tv, mask); 960 } 961 962 int 963 pollcommon(struct lwp *l, register_t *retval, 964 struct pollfd *u_fds, u_int nfds, 965 struct timeval *tv, sigset_t *mask) 966 { 967 char smallbits[32 * sizeof(struct pollfd)]; 968 struct proc * const p = l->l_proc; 969 caddr_t bits; 970 sigset_t oldmask; 971 int s, ncoll, error, timo; 972 size_t ni; 973 struct timeval sleeptv; 974 975 if (nfds > p->p_fd->fd_nfiles) { 976 /* forgiving; slightly wrong */ 977 nfds = p->p_fd->fd_nfiles; 978 } 979 ni = nfds * sizeof(struct pollfd); 980 if (ni > sizeof(smallbits)) 981 bits = malloc(ni, M_TEMP, M_WAITOK); 982 else 983 bits = smallbits; 984 985 error = copyin(u_fds, bits, ni); 986 if (error) 987 goto done; 988 989 timo = 0; 990 if (tv && inittimeleft(tv, &sleeptv) == -1) { 991 error = EINVAL; 992 goto done; 993 } 994 995 if (mask != NULL) 996 (void)sigprocmask1(p, SIG_SETMASK, mask, &oldmask); 997 998 retry: 999 ncoll = nselcoll; 1000 l->l_flag |= L_SELECT; 1001 error = pollscan(l, (struct pollfd *)bits, nfds, retval); 1002 if (error || *retval) 1003 goto done; 1004 if (tv && (timo = gettimeleft(tv, &sleeptv)) <= 0) 1005 goto done; 1006 s = splsched(); 1007 if ((l->l_flag & L_SELECT) == 0 || nselcoll != ncoll) { 1008 splx(s); 1009 goto retry; 1010 } 1011 l->l_flag &= ~L_SELECT; 1012 error = tsleep((caddr_t)&selwait, PSOCK | PCATCH, "poll", timo); 1013 splx(s); 1014 if (error == 0) 1015 goto retry; 1016 done: 1017 if (mask != NULL) 1018 (void)sigprocmask1(p, SIG_SETMASK, &oldmask, NULL); 1019 l->l_flag &= ~L_SELECT; 1020 /* poll is not restarted after signals... */ 1021 if (error == ERESTART) 1022 error = EINTR; 1023 if (error == EWOULDBLOCK) 1024 error = 0; 1025 if (error == 0) { 1026 error = copyout(bits, u_fds, ni); 1027 if (error) 1028 goto out; 1029 } 1030 out: 1031 if (ni > sizeof(smallbits)) 1032 free(bits, M_TEMP); 1033 return (error); 1034 } 1035 1036 int 1037 pollscan(struct lwp *l, struct pollfd *fds, int nfd, register_t *retval) 1038 { 1039 struct proc *p = l->l_proc; 1040 struct filedesc *fdp; 1041 int i, n; 1042 struct file *fp; 1043 1044 fdp = p->p_fd; 1045 n = 0; 1046 for (i = 0; i < nfd; i++, fds++) { 1047 if (fds->fd >= fdp->fd_nfiles) { 1048 fds->revents = POLLNVAL; 1049 n++; 1050 } else if (fds->fd < 0) { 1051 fds->revents = 0; 1052 } else { 1053 if ((fp = fd_getfile(fdp, fds->fd)) == NULL) { 1054 fds->revents = POLLNVAL; 1055 n++; 1056 } else { 1057 FILE_USE(fp); 1058 fds->revents = (*fp->f_ops->fo_poll)(fp, 1059 fds->events | POLLERR | POLLHUP, l); 1060 if (fds->revents != 0) 1061 n++; 1062 FILE_UNUSE(fp, l); 1063 } 1064 } 1065 } 1066 *retval = n; 1067 return (0); 1068 } 1069 1070 /*ARGSUSED*/ 1071 int 1072 seltrue(dev_t dev, int events, struct lwp *l) 1073 { 1074 1075 return (events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM)); 1076 } 1077 1078 /* 1079 * Record a select request. 1080 */ 1081 void 1082 selrecord(struct lwp *selector, struct selinfo *sip) 1083 { 1084 struct lwp *l; 1085 struct proc *p; 1086 pid_t mypid; 1087 1088 mypid = selector->l_proc->p_pid; 1089 if (sip->sel_pid == mypid) 1090 return; 1091 if (sip->sel_pid && (p = pfind(sip->sel_pid))) { 1092 LIST_FOREACH(l, &p->p_lwps, l_sibling) { 1093 if (l->l_wchan == (caddr_t)&selwait) { 1094 sip->sel_collision = 1; 1095 return; 1096 } 1097 } 1098 } 1099 1100 sip->sel_pid = mypid; 1101 } 1102 1103 /* 1104 * Do a wakeup when a selectable event occurs. 1105 */ 1106 void 1107 selwakeup(sip) 1108 struct selinfo *sip; 1109 { 1110 struct lwp *l; 1111 struct proc *p; 1112 int s; 1113 1114 if (sip->sel_pid == 0) 1115 return; 1116 if (sip->sel_collision) { 1117 sip->sel_pid = 0; 1118 nselcoll++; 1119 sip->sel_collision = 0; 1120 wakeup((caddr_t)&selwait); 1121 return; 1122 } 1123 p = pfind(sip->sel_pid); 1124 sip->sel_pid = 0; 1125 if (p != NULL) { 1126 LIST_FOREACH(l, &p->p_lwps, l_sibling) { 1127 SCHED_LOCK(s); 1128 if (l->l_wchan == (caddr_t)&selwait) { 1129 if (l->l_stat == LSSLEEP) 1130 setrunnable(l); 1131 else 1132 unsleep(l); 1133 } else if (l->l_flag & L_SELECT) 1134 l->l_flag &= ~L_SELECT; 1135 SCHED_UNLOCK(s); 1136 } 1137 } 1138 } 1139