1 /* $NetBSD: sys_generic.c,v 1.105 2007/08/27 16:23:16 dsl Exp $ */ 2 3 /*- 4 * Copyright (c) 2007 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Andrew Doran. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the NetBSD 21 * Foundation, Inc. and its contributors. 22 * 4. Neither the name of The NetBSD Foundation nor the names of its 23 * contributors may be used to endorse or promote products derived 24 * from this software without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 36 * POSSIBILITY OF SUCH DAMAGE. 37 */ 38 39 /* 40 * Copyright (c) 1982, 1986, 1989, 1993 41 * The Regents of the University of California. All rights reserved. 42 * (c) UNIX System Laboratories, Inc. 43 * All or some portions of this file are derived from material licensed 44 * to the University of California by American Telephone and Telegraph 45 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 46 * the permission of UNIX System Laboratories, Inc. 47 * 48 * Redistribution and use in source and binary forms, with or without 49 * modification, are permitted provided that the following conditions 50 * are met: 51 * 1. Redistributions of source code must retain the above copyright 52 * notice, this list of conditions and the following disclaimer. 53 * 2. Redistributions in binary form must reproduce the above copyright 54 * notice, this list of conditions and the following disclaimer in the 55 * documentation and/or other materials provided with the distribution. 56 * 3. Neither the name of the University nor the names of its contributors 57 * may be used to endorse or promote products derived from this software 58 * without specific prior written permission. 59 * 60 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 61 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 62 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 63 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 64 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 65 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 66 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 67 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 68 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 69 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 70 * SUCH DAMAGE. 71 * 72 * @(#)sys_generic.c 8.9 (Berkeley) 2/14/95 73 */ 74 75 /* 76 * System calls relating to files. 77 */ 78 79 #include <sys/cdefs.h> 80 __KERNEL_RCSID(0, "$NetBSD: sys_generic.c,v 1.105 2007/08/27 16:23:16 dsl Exp $"); 81 82 #include <sys/param.h> 83 #include <sys/systm.h> 84 #include <sys/filedesc.h> 85 #include <sys/ioctl.h> 86 #include <sys/file.h> 87 #include <sys/proc.h> 88 #include <sys/socketvar.h> 89 #include <sys/signalvar.h> 90 #include <sys/uio.h> 91 #include <sys/kernel.h> 92 #include <sys/stat.h> 93 #include <sys/kmem.h> 94 #include <sys/poll.h> 95 #include <sys/vnode.h> 96 #include <sys/mount.h> 97 #include <sys/syscallargs.h> 98 #include <sys/ktrace.h> 99 100 #include <uvm/uvm_extern.h> 101 102 /* Flags for lwp::l_selflag. */ 103 #define SEL_RESET 0 /* awoken, interrupted, or not yet polling */ 104 #define SEL_SCANNING 1 /* polling descriptors */ 105 #define SEL_BLOCKING 2 /* about to block on select_cv */ 106 107 static int selscan(lwp_t *, fd_mask *, fd_mask *, int, register_t *); 108 static int pollscan(lwp_t *, struct pollfd *, int, register_t *); 109 static void selclear(void); 110 111 /* Global state for select()/poll(). */ 112 kmutex_t select_lock; 113 kcondvar_t select_cv; 114 int nselcoll; 115 116 /* 117 * Read system call. 118 */ 119 /* ARGSUSED */ 120 int 121 sys_read(lwp_t *l, void *v, register_t *retval) 122 { 123 struct sys_read_args /* { 124 syscallarg(int) fd; 125 syscallarg(void *) buf; 126 syscallarg(size_t) nbyte; 127 } */ *uap = v; 128 int fd; 129 struct file *fp; 130 proc_t *p; 131 struct filedesc *fdp; 132 133 fd = SCARG(uap, fd); 134 p = l->l_proc; 135 fdp = p->p_fd; 136 137 if ((fp = fd_getfile(fdp, fd)) == NULL) 138 return (EBADF); 139 140 if ((fp->f_flag & FREAD) == 0) { 141 simple_unlock(&fp->f_slock); 142 return (EBADF); 143 } 144 145 FILE_USE(fp); 146 147 /* dofileread() will unuse the descriptor for us */ 148 return (dofileread(l, fd, fp, SCARG(uap, buf), SCARG(uap, nbyte), 149 &fp->f_offset, FOF_UPDATE_OFFSET, retval)); 150 } 151 152 int 153 dofileread(lwp_t *l, int fd, struct file *fp, void *buf, size_t nbyte, 154 off_t *offset, int flags, register_t *retval) 155 { 156 struct iovec aiov; 157 struct uio auio; 158 proc_t *p; 159 struct vmspace *vm; 160 size_t cnt; 161 int error; 162 p = l->l_proc; 163 164 error = proc_vmspace_getref(p, &vm); 165 if (error) { 166 goto out; 167 } 168 169 aiov.iov_base = (void *)buf; 170 aiov.iov_len = nbyte; 171 auio.uio_iov = &aiov; 172 auio.uio_iovcnt = 1; 173 auio.uio_resid = nbyte; 174 auio.uio_rw = UIO_READ; 175 auio.uio_vmspace = vm; 176 177 /* 178 * Reads return ssize_t because -1 is returned on error. Therefore 179 * we must restrict the length to SSIZE_MAX to avoid garbage return 180 * values. 181 */ 182 if (auio.uio_resid > SSIZE_MAX) { 183 error = EINVAL; 184 goto out; 185 } 186 187 cnt = auio.uio_resid; 188 error = (*fp->f_ops->fo_read)(fp, offset, &auio, fp->f_cred, flags); 189 if (error) 190 if (auio.uio_resid != cnt && (error == ERESTART || 191 error == EINTR || error == EWOULDBLOCK)) 192 error = 0; 193 cnt -= auio.uio_resid; 194 ktrgenio(fd, UIO_READ, buf, cnt, error); 195 *retval = cnt; 196 out: 197 FILE_UNUSE(fp, l); 198 uvmspace_free(vm); 199 return (error); 200 } 201 202 /* 203 * Scatter read system call. 204 */ 205 int 206 sys_readv(lwp_t *l, void *v, register_t *retval) 207 { 208 struct sys_readv_args /* { 209 syscallarg(int) fd; 210 syscallarg(const struct iovec *) iovp; 211 syscallarg(int) iovcnt; 212 } */ *uap = v; 213 214 return do_filereadv(l, SCARG(uap, fd), SCARG(uap, iovp), 215 SCARG(uap, iovcnt), NULL, FOF_UPDATE_OFFSET, retval); 216 } 217 218 int 219 do_filereadv(struct lwp *l, int fd, const struct iovec *iovp, int iovcnt, 220 off_t *offset, int flags, register_t *retval) 221 { 222 struct proc *p; 223 struct uio auio; 224 struct iovec *iov, *needfree = NULL, aiov[UIO_SMALLIOV]; 225 struct vmspace *vm; 226 int i, error; 227 size_t cnt; 228 u_int iovlen; 229 struct file *fp; 230 struct filedesc *fdp; 231 struct iovec *ktriov = NULL; 232 233 if (iovcnt == 0) 234 return EINVAL; 235 236 p = l->l_proc; 237 fdp = p->p_fd; 238 239 if ((fp = fd_getfile(fdp, fd)) == NULL) 240 return EBADF; 241 242 if ((fp->f_flag & FREAD) == 0) { 243 simple_unlock(&fp->f_slock); 244 return EBADF; 245 } 246 247 FILE_USE(fp); 248 249 if (offset == NULL) 250 offset = &fp->f_offset; 251 else { 252 struct vnode *vp = fp->f_data; 253 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) { 254 error = ESPIPE; 255 goto out; 256 } 257 /* 258 * Test that the device is seekable ? 259 * XXX This works because no file systems actually 260 * XXX take any action on the seek operation. 261 */ 262 error = VOP_SEEK(vp, fp->f_offset, *offset, fp->f_cred); 263 if (error != 0) 264 goto out; 265 } 266 267 error = proc_vmspace_getref(p, &vm); 268 if (error) 269 goto out; 270 271 iovlen = iovcnt * sizeof(struct iovec); 272 if (flags & FOF_IOV_SYSSPACE) 273 iov = __UNCONST(iovp); 274 else { 275 iov = aiov; 276 if ((u_int)iovcnt > UIO_SMALLIOV) { 277 if ((u_int)iovcnt > IOV_MAX) { 278 error = EINVAL; 279 goto out; 280 } 281 iov = kmem_alloc(iovlen, KM_SLEEP); 282 if (iov == NULL) { 283 error = ENOMEM; 284 goto out; 285 } 286 needfree = iov; 287 } 288 error = copyin(iovp, iov, iovlen); 289 if (error) 290 goto done; 291 } 292 293 auio.uio_iov = iov; 294 auio.uio_iovcnt = iovcnt; 295 auio.uio_rw = UIO_READ; 296 auio.uio_vmspace = vm; 297 298 auio.uio_resid = 0; 299 for (i = 0; i < iovcnt; i++, iov++) { 300 auio.uio_resid += iov->iov_len; 301 /* 302 * Reads return ssize_t because -1 is returned on error. 303 * Therefore we must restrict the length to SSIZE_MAX to 304 * avoid garbage return values. 305 */ 306 if (iov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) { 307 error = EINVAL; 308 goto done; 309 } 310 } 311 312 /* 313 * if tracing, save a copy of iovec 314 */ 315 if (ktrpoint(KTR_GENIO)) { 316 ktriov = kmem_alloc(iovlen, KM_SLEEP); 317 if (ktriov != NULL) 318 memcpy(ktriov, auio.uio_iov, iovlen); 319 } 320 321 cnt = auio.uio_resid; 322 error = (*fp->f_ops->fo_read)(fp, offset, &auio, fp->f_cred, flags); 323 if (error) 324 if (auio.uio_resid != cnt && (error == ERESTART || 325 error == EINTR || error == EWOULDBLOCK)) 326 error = 0; 327 cnt -= auio.uio_resid; 328 *retval = cnt; 329 330 if (ktriov != NULL) { 331 ktrgeniov(fd, UIO_READ, ktriov, cnt, error); 332 kmem_free(ktriov, iovlen); 333 } 334 335 done: 336 if (needfree) 337 kmem_free(needfree, iovlen); 338 out: 339 FILE_UNUSE(fp, l); 340 uvmspace_free(vm); 341 return (error); 342 } 343 344 /* 345 * Write system call 346 */ 347 int 348 sys_write(lwp_t *l, void *v, register_t *retval) 349 { 350 struct sys_write_args /* { 351 syscallarg(int) fd; 352 syscallarg(const void *) buf; 353 syscallarg(size_t) nbyte; 354 } */ *uap = v; 355 int fd; 356 struct file *fp; 357 proc_t *p; 358 struct filedesc *fdp; 359 360 fd = SCARG(uap, fd); 361 p = l->l_proc; 362 fdp = p->p_fd; 363 364 if ((fp = fd_getfile(fdp, fd)) == NULL) 365 return (EBADF); 366 367 if ((fp->f_flag & FWRITE) == 0) { 368 simple_unlock(&fp->f_slock); 369 return (EBADF); 370 } 371 372 FILE_USE(fp); 373 374 /* dofilewrite() will unuse the descriptor for us */ 375 return (dofilewrite(l, fd, fp, SCARG(uap, buf), SCARG(uap, nbyte), 376 &fp->f_offset, FOF_UPDATE_OFFSET, retval)); 377 } 378 379 int 380 dofilewrite(lwp_t *l, int fd, struct file *fp, const void *buf, 381 size_t nbyte, off_t *offset, int flags, register_t *retval) 382 { 383 struct iovec aiov; 384 struct uio auio; 385 proc_t *p; 386 struct vmspace *vm; 387 size_t cnt; 388 int error; 389 390 p = l->l_proc; 391 error = proc_vmspace_getref(p, &vm); 392 if (error) { 393 goto out; 394 } 395 aiov.iov_base = __UNCONST(buf); /* XXXUNCONST kills const */ 396 aiov.iov_len = nbyte; 397 auio.uio_iov = &aiov; 398 auio.uio_iovcnt = 1; 399 auio.uio_resid = nbyte; 400 auio.uio_rw = UIO_WRITE; 401 auio.uio_vmspace = vm; 402 403 /* 404 * Writes return ssize_t because -1 is returned on error. Therefore 405 * we must restrict the length to SSIZE_MAX to avoid garbage return 406 * values. 407 */ 408 if (auio.uio_resid > SSIZE_MAX) { 409 error = EINVAL; 410 goto out; 411 } 412 413 cnt = auio.uio_resid; 414 error = (*fp->f_ops->fo_write)(fp, offset, &auio, fp->f_cred, flags); 415 if (error) { 416 if (auio.uio_resid != cnt && (error == ERESTART || 417 error == EINTR || error == EWOULDBLOCK)) 418 error = 0; 419 if (error == EPIPE) { 420 mutex_enter(&proclist_mutex); 421 psignal(p, SIGPIPE); 422 mutex_exit(&proclist_mutex); 423 } 424 } 425 cnt -= auio.uio_resid; 426 ktrgenio(fd, UIO_WRITE, buf, cnt, error); 427 *retval = cnt; 428 out: 429 FILE_UNUSE(fp, l); 430 uvmspace_free(vm); 431 return (error); 432 } 433 434 /* 435 * Gather write system call 436 */ 437 int 438 sys_writev(lwp_t *l, void *v, register_t *retval) 439 { 440 struct sys_writev_args /* { 441 syscallarg(int) fd; 442 syscallarg(const struct iovec *) iovp; 443 syscallarg(int) iovcnt; 444 } */ *uap = v; 445 446 return do_filewritev(l, SCARG(uap, fd), SCARG(uap, iovp), 447 SCARG(uap, iovcnt), NULL, FOF_UPDATE_OFFSET, retval); 448 } 449 450 int 451 do_filewritev(struct lwp *l, int fd, const struct iovec *iovp, int iovcnt, 452 off_t *offset, int flags, register_t *retval) 453 { 454 struct proc *p; 455 struct uio auio; 456 struct iovec *iov, *needfree = NULL, aiov[UIO_SMALLIOV]; 457 struct vmspace *vm; 458 int i, error; 459 size_t cnt; 460 u_int iovlen; 461 struct file *fp; 462 struct filedesc *fdp; 463 struct iovec *ktriov = NULL; 464 465 if (iovcnt == 0) 466 return EINVAL; 467 468 p = l->l_proc; 469 fdp = p->p_fd; 470 471 if ((fp = fd_getfile(fdp, fd)) == NULL) 472 return EBADF; 473 474 if ((fp->f_flag & FWRITE) == 0) { 475 simple_unlock(&fp->f_slock); 476 return EBADF; 477 } 478 479 FILE_USE(fp); 480 481 if (offset == NULL) 482 offset = &fp->f_offset; 483 else { 484 struct vnode *vp = fp->f_data; 485 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) { 486 error = ESPIPE; 487 goto out; 488 } 489 /* 490 * Test that the device is seekable ? 491 * XXX This works because no file systems actually 492 * XXX take any action on the seek operation. 493 */ 494 error = VOP_SEEK(vp, fp->f_offset, *offset, fp->f_cred); 495 if (error != 0) 496 goto out; 497 } 498 499 error = proc_vmspace_getref(p, &vm); 500 if (error) 501 goto out; 502 503 iovlen = iovcnt * sizeof(struct iovec); 504 if (flags & FOF_IOV_SYSSPACE) 505 iov = __UNCONST(iovp); 506 else { 507 iov = aiov; 508 if ((u_int)iovcnt > UIO_SMALLIOV) { 509 if ((u_int)iovcnt > IOV_MAX) { 510 error = EINVAL; 511 goto out; 512 } 513 iov = kmem_alloc(iovlen, KM_SLEEP); 514 if (iov == NULL) { 515 error = ENOMEM; 516 goto out; 517 } 518 needfree = iov; 519 } 520 error = copyin(iovp, iov, iovlen); 521 if (error) 522 goto done; 523 } 524 525 auio.uio_iov = iov; 526 auio.uio_iovcnt = iovcnt; 527 auio.uio_rw = UIO_WRITE; 528 auio.uio_vmspace = vm; 529 530 auio.uio_resid = 0; 531 for (i = 0; i < iovcnt; i++, iov++) { 532 auio.uio_resid += iov->iov_len; 533 /* 534 * Writes return ssize_t because -1 is returned on error. 535 * Therefore we must restrict the length to SSIZE_MAX to 536 * avoid garbage return values. 537 */ 538 if (iov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) { 539 error = EINVAL; 540 goto done; 541 } 542 } 543 544 /* 545 * if tracing, save a copy of iovec 546 */ 547 if (ktrpoint(KTR_GENIO)) { 548 ktriov = kmem_alloc(iovlen, KM_SLEEP); 549 if (ktriov != NULL) 550 memcpy(ktriov, auio.uio_iov, iovlen); 551 } 552 553 cnt = auio.uio_resid; 554 error = (*fp->f_ops->fo_write)(fp, offset, &auio, fp->f_cred, flags); 555 if (error) { 556 if (auio.uio_resid != cnt && (error == ERESTART || 557 error == EINTR || error == EWOULDBLOCK)) 558 error = 0; 559 if (error == EPIPE) { 560 mutex_enter(&proclist_mutex); 561 psignal(p, SIGPIPE); 562 mutex_exit(&proclist_mutex); 563 } 564 } 565 cnt -= auio.uio_resid; 566 *retval = cnt; 567 568 if (ktriov != NULL) { 569 ktrgeniov(fd, UIO_WRITE, ktriov, cnt, error); 570 kmem_free(ktriov, iovlen); 571 } 572 573 done: 574 if (needfree) 575 kmem_free(needfree, iovlen); 576 out: 577 FILE_UNUSE(fp, l); 578 uvmspace_free(vm); 579 return (error); 580 } 581 582 /* 583 * Ioctl system call 584 */ 585 /* ARGSUSED */ 586 int 587 sys_ioctl(struct lwp *l, void *v, register_t *retval) 588 { 589 struct sys_ioctl_args /* { 590 syscallarg(int) fd; 591 syscallarg(u_long) com; 592 syscallarg(void *) data; 593 } */ *uap = v; 594 struct file *fp; 595 proc_t *p; 596 struct filedesc *fdp; 597 u_long com; 598 int error; 599 u_int size; 600 void *data, *memp; 601 #define STK_PARAMS 128 602 u_long stkbuf[STK_PARAMS/sizeof(u_long)]; 603 604 error = 0; 605 p = l->l_proc; 606 fdp = p->p_fd; 607 608 if ((fp = fd_getfile(fdp, SCARG(uap, fd))) == NULL) 609 return (EBADF); 610 611 FILE_USE(fp); 612 613 if ((fp->f_flag & (FREAD | FWRITE)) == 0) { 614 error = EBADF; 615 com = 0; 616 goto out; 617 } 618 619 switch (com = SCARG(uap, com)) { 620 case FIONCLEX: 621 fdp->fd_ofileflags[SCARG(uap, fd)] &= ~UF_EXCLOSE; 622 goto out; 623 624 case FIOCLEX: 625 fdp->fd_ofileflags[SCARG(uap, fd)] |= UF_EXCLOSE; 626 goto out; 627 } 628 629 /* 630 * Interpret high order word to find amount of data to be 631 * copied to/from the user's address space. 632 */ 633 size = IOCPARM_LEN(com); 634 if (size > IOCPARM_MAX) { 635 error = ENOTTY; 636 goto out; 637 } 638 memp = NULL; 639 if (size > sizeof(stkbuf)) { 640 memp = kmem_alloc(size, KM_SLEEP); 641 data = memp; 642 } else 643 data = (void *)stkbuf; 644 if (com&IOC_IN) { 645 if (size) { 646 error = copyin(SCARG(uap, data), data, size); 647 if (error) { 648 if (memp) 649 kmem_free(memp, size); 650 goto out; 651 } 652 ktrgenio(SCARG(uap, fd), UIO_WRITE, SCARG(uap, data), 653 size, 0); 654 } else 655 *(void **)data = SCARG(uap, data); 656 } else if ((com&IOC_OUT) && size) 657 /* 658 * Zero the buffer so the user always 659 * gets back something deterministic. 660 */ 661 memset(data, 0, size); 662 else if (com&IOC_VOID) 663 *(void **)data = SCARG(uap, data); 664 665 switch (com) { 666 667 case FIONBIO: 668 if (*(int *)data != 0) 669 fp->f_flag |= FNONBLOCK; 670 else 671 fp->f_flag &= ~FNONBLOCK; 672 error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, data, l); 673 break; 674 675 case FIOASYNC: 676 if (*(int *)data != 0) 677 fp->f_flag |= FASYNC; 678 else 679 fp->f_flag &= ~FASYNC; 680 error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, data, l); 681 break; 682 683 default: 684 error = (*fp->f_ops->fo_ioctl)(fp, com, data, l); 685 /* 686 * Copy any data to user, size was 687 * already set and checked above. 688 */ 689 if (error == 0 && (com&IOC_OUT) && size) { 690 error = copyout(data, SCARG(uap, data), size); 691 ktrgenio(SCARG(uap, fd), UIO_READ, SCARG(uap, data), 692 size, error); 693 } 694 break; 695 } 696 if (memp) 697 kmem_free(memp, size); 698 out: 699 FILE_UNUSE(fp, l); 700 switch (error) { 701 case -1: 702 printf("sys_ioctl: _IO%s%s('%c', %lu, %lu) returned -1: " 703 "pid=%d comm=%s\n", 704 (com & IOC_IN) ? "W" : "", (com & IOC_OUT) ? "R" : "", 705 (char)IOCGROUP(com), (com & 0xff), IOCPARM_LEN(com), 706 p->p_pid, p->p_comm); 707 /* FALLTHROUGH */ 708 case EPASSTHROUGH: 709 error = ENOTTY; 710 /* FALLTHROUGH */ 711 default: 712 return (error); 713 } 714 } 715 716 /* 717 * Select system call. 718 */ 719 int 720 sys_pselect(lwp_t *l, void *v, register_t *retval) 721 { 722 struct sys_pselect_args /* { 723 syscallarg(int) nd; 724 syscallarg(fd_set *) in; 725 syscallarg(fd_set *) ou; 726 syscallarg(fd_set *) ex; 727 syscallarg(const struct timespec *) ts; 728 syscallarg(sigset_t *) mask; 729 } */ * const uap = v; 730 struct timespec ats; 731 struct timeval atv, *tv = NULL; 732 sigset_t amask, *mask = NULL; 733 int error; 734 735 if (SCARG(uap, ts)) { 736 error = copyin(SCARG(uap, ts), &ats, sizeof(ats)); 737 if (error) 738 return error; 739 atv.tv_sec = ats.tv_sec; 740 atv.tv_usec = ats.tv_nsec / 1000; 741 tv = &atv; 742 } 743 if (SCARG(uap, mask) != NULL) { 744 error = copyin(SCARG(uap, mask), &amask, sizeof(amask)); 745 if (error) 746 return error; 747 mask = &amask; 748 } 749 750 return selcommon(l, retval, SCARG(uap, nd), SCARG(uap, in), 751 SCARG(uap, ou), SCARG(uap, ex), tv, mask); 752 } 753 754 int 755 inittimeleft(struct timeval *tv, struct timeval *sleeptv) 756 { 757 if (itimerfix(tv)) 758 return -1; 759 getmicrouptime(sleeptv); 760 return 0; 761 } 762 763 int 764 gettimeleft(struct timeval *tv, struct timeval *sleeptv) 765 { 766 /* 767 * We have to recalculate the timeout on every retry. 768 */ 769 struct timeval slepttv; 770 /* 771 * reduce tv by elapsed time 772 * based on monotonic time scale 773 */ 774 getmicrouptime(&slepttv); 775 timeradd(tv, sleeptv, tv); 776 timersub(tv, &slepttv, tv); 777 *sleeptv = slepttv; 778 return tvtohz(tv); 779 } 780 781 int 782 sys_select(lwp_t *l, void *v, register_t *retval) 783 { 784 struct sys_select_args /* { 785 syscallarg(int) nd; 786 syscallarg(fd_set *) in; 787 syscallarg(fd_set *) ou; 788 syscallarg(fd_set *) ex; 789 syscallarg(struct timeval *) tv; 790 } */ * const uap = v; 791 struct timeval atv, *tv = NULL; 792 int error; 793 794 if (SCARG(uap, tv)) { 795 error = copyin(SCARG(uap, tv), (void *)&atv, 796 sizeof(atv)); 797 if (error) 798 return error; 799 tv = &atv; 800 } 801 802 return selcommon(l, retval, SCARG(uap, nd), SCARG(uap, in), 803 SCARG(uap, ou), SCARG(uap, ex), tv, NULL); 804 } 805 806 int 807 selcommon(lwp_t *l, register_t *retval, int nd, fd_set *u_in, 808 fd_set *u_ou, fd_set *u_ex, struct timeval *tv, sigset_t *mask) 809 { 810 char smallbits[howmany(FD_SETSIZE, NFDBITS) * 811 sizeof(fd_mask) * 6]; 812 proc_t * const p = l->l_proc; 813 char *bits; 814 int ncoll, error, timo; 815 size_t ni; 816 sigset_t oldmask; 817 struct timeval sleeptv; 818 819 error = 0; 820 if (nd < 0) 821 return (EINVAL); 822 if (nd > p->p_fd->fd_nfiles) { 823 /* forgiving; slightly wrong */ 824 nd = p->p_fd->fd_nfiles; 825 } 826 ni = howmany(nd, NFDBITS) * sizeof(fd_mask); 827 if (ni * 6 > sizeof(smallbits)) 828 bits = kmem_alloc(ni * 6, KM_SLEEP); 829 else 830 bits = smallbits; 831 832 #define getbits(name, x) \ 833 if (u_ ## name) { \ 834 error = copyin(u_ ## name, bits + ni * x, ni); \ 835 if (error) \ 836 goto done; \ 837 } else \ 838 memset(bits + ni * x, 0, ni); 839 getbits(in, 0); 840 getbits(ou, 1); 841 getbits(ex, 2); 842 #undef getbits 843 844 timo = 0; 845 if (tv && inittimeleft(tv, &sleeptv) == -1) { 846 error = EINVAL; 847 goto done; 848 } 849 850 if (mask) { 851 sigminusset(&sigcantmask, mask); 852 mutex_enter(&p->p_smutex); 853 oldmask = l->l_sigmask; 854 l->l_sigmask = *mask; 855 mutex_exit(&p->p_smutex); 856 } else 857 oldmask = l->l_sigmask; /* XXXgcc */ 858 859 mutex_enter(&select_lock); 860 SLIST_INIT(&l->l_selwait); 861 for (;;) { 862 l->l_selflag = SEL_SCANNING; 863 ncoll = nselcoll; 864 mutex_exit(&select_lock); 865 866 error = selscan(l, (fd_mask *)(bits + ni * 0), 867 (fd_mask *)(bits + ni * 3), nd, retval); 868 869 mutex_enter(&select_lock); 870 if (error || *retval) 871 break; 872 if (tv && (timo = gettimeleft(tv, &sleeptv)) <= 0) 873 break; 874 if (l->l_selflag != SEL_SCANNING || ncoll != nselcoll) 875 continue; 876 l->l_selflag = SEL_BLOCKING; 877 error = cv_timedwait_sig(&select_cv, &select_lock, timo); 878 if (error != 0) 879 break; 880 } 881 selclear(); 882 mutex_exit(&select_lock); 883 884 if (mask) { 885 mutex_enter(&p->p_smutex); 886 l->l_sigmask = oldmask; 887 mutex_exit(&p->p_smutex); 888 } 889 890 done: 891 /* select is not restarted after signals... */ 892 if (error == ERESTART) 893 error = EINTR; 894 if (error == EWOULDBLOCK) 895 error = 0; 896 if (error == 0 && u_in != NULL) 897 error = copyout(bits + ni * 3, u_in, ni); 898 if (error == 0 && u_ou != NULL) 899 error = copyout(bits + ni * 4, u_ou, ni); 900 if (error == 0 && u_ex != NULL) 901 error = copyout(bits + ni * 5, u_ex, ni); 902 if (bits != smallbits) 903 kmem_free(bits, ni * 6); 904 return (error); 905 } 906 907 int 908 selscan(lwp_t *l, fd_mask *ibitp, fd_mask *obitp, int nfd, 909 register_t *retval) 910 { 911 static const int flag[3] = { POLLRDNORM | POLLHUP | POLLERR, 912 POLLWRNORM | POLLHUP | POLLERR, 913 POLLRDBAND }; 914 proc_t *p = l->l_proc; 915 struct filedesc *fdp; 916 int msk, i, j, fd, n; 917 fd_mask ibits, obits; 918 struct file *fp; 919 920 fdp = p->p_fd; 921 n = 0; 922 for (msk = 0; msk < 3; msk++) { 923 for (i = 0; i < nfd; i += NFDBITS) { 924 ibits = *ibitp++; 925 obits = 0; 926 while ((j = ffs(ibits)) && (fd = i + --j) < nfd) { 927 ibits &= ~(1 << j); 928 if ((fp = fd_getfile(fdp, fd)) == NULL) 929 return (EBADF); 930 FILE_USE(fp); 931 if ((*fp->f_ops->fo_poll)(fp, flag[msk], l)) { 932 obits |= (1 << j); 933 n++; 934 } 935 FILE_UNUSE(fp, l); 936 } 937 *obitp++ = obits; 938 } 939 } 940 *retval = n; 941 return (0); 942 } 943 944 /* 945 * Poll system call. 946 */ 947 int 948 sys_poll(lwp_t *l, void *v, register_t *retval) 949 { 950 struct sys_poll_args /* { 951 syscallarg(struct pollfd *) fds; 952 syscallarg(u_int) nfds; 953 syscallarg(int) timeout; 954 } */ * const uap = v; 955 struct timeval atv, *tv = NULL; 956 957 if (SCARG(uap, timeout) != INFTIM) { 958 atv.tv_sec = SCARG(uap, timeout) / 1000; 959 atv.tv_usec = (SCARG(uap, timeout) % 1000) * 1000; 960 tv = &atv; 961 } 962 963 return pollcommon(l, retval, SCARG(uap, fds), SCARG(uap, nfds), 964 tv, NULL); 965 } 966 967 /* 968 * Poll system call. 969 */ 970 int 971 sys_pollts(lwp_t *l, void *v, register_t *retval) 972 { 973 struct sys_pollts_args /* { 974 syscallarg(struct pollfd *) fds; 975 syscallarg(u_int) nfds; 976 syscallarg(const struct timespec *) ts; 977 syscallarg(const sigset_t *) mask; 978 } */ * const uap = v; 979 struct timespec ats; 980 struct timeval atv, *tv = NULL; 981 sigset_t amask, *mask = NULL; 982 int error; 983 984 if (SCARG(uap, ts)) { 985 error = copyin(SCARG(uap, ts), &ats, sizeof(ats)); 986 if (error) 987 return error; 988 atv.tv_sec = ats.tv_sec; 989 atv.tv_usec = ats.tv_nsec / 1000; 990 tv = &atv; 991 } 992 if (SCARG(uap, mask)) { 993 error = copyin(SCARG(uap, mask), &amask, sizeof(amask)); 994 if (error) 995 return error; 996 mask = &amask; 997 } 998 999 return pollcommon(l, retval, SCARG(uap, fds), SCARG(uap, nfds), 1000 tv, mask); 1001 } 1002 1003 int 1004 pollcommon(lwp_t *l, register_t *retval, 1005 struct pollfd *u_fds, u_int nfds, 1006 struct timeval *tv, sigset_t *mask) 1007 { 1008 char smallbits[32 * sizeof(struct pollfd)]; 1009 proc_t * const p = l->l_proc; 1010 void * bits; 1011 sigset_t oldmask; 1012 int ncoll, error, timo; 1013 size_t ni; 1014 struct timeval sleeptv; 1015 1016 if (nfds > p->p_fd->fd_nfiles) { 1017 /* forgiving; slightly wrong */ 1018 nfds = p->p_fd->fd_nfiles; 1019 } 1020 ni = nfds * sizeof(struct pollfd); 1021 if (ni > sizeof(smallbits)) 1022 bits = kmem_alloc(ni, KM_SLEEP); 1023 else 1024 bits = smallbits; 1025 1026 error = copyin(u_fds, bits, ni); 1027 if (error) 1028 goto done; 1029 1030 timo = 0; 1031 if (tv && inittimeleft(tv, &sleeptv) == -1) { 1032 error = EINVAL; 1033 goto done; 1034 } 1035 1036 if (mask) { 1037 sigminusset(&sigcantmask, mask); 1038 mutex_enter(&p->p_smutex); 1039 oldmask = l->l_sigmask; 1040 l->l_sigmask = *mask; 1041 mutex_exit(&p->p_smutex); 1042 } else 1043 oldmask = l->l_sigmask; /* XXXgcc */ 1044 1045 mutex_enter(&select_lock); 1046 SLIST_INIT(&l->l_selwait); 1047 for (;;) { 1048 ncoll = nselcoll; 1049 l->l_selflag = SEL_SCANNING; 1050 mutex_exit(&select_lock); 1051 1052 error = pollscan(l, (struct pollfd *)bits, nfds, retval); 1053 1054 mutex_enter(&select_lock); 1055 if (error || *retval) 1056 break; 1057 if (tv && (timo = gettimeleft(tv, &sleeptv)) <= 0) 1058 break; 1059 if (l->l_selflag != SEL_SCANNING || nselcoll != ncoll) 1060 continue; 1061 l->l_selflag = SEL_BLOCKING; 1062 error = cv_timedwait_sig(&select_cv, &select_lock, timo); 1063 if (error != 0) 1064 break; 1065 } 1066 selclear(); 1067 mutex_exit(&select_lock); 1068 1069 if (mask) { 1070 mutex_enter(&p->p_smutex); 1071 l->l_sigmask = oldmask; 1072 mutex_exit(&p->p_smutex); 1073 } 1074 done: 1075 /* poll is not restarted after signals... */ 1076 if (error == ERESTART) 1077 error = EINTR; 1078 if (error == EWOULDBLOCK) 1079 error = 0; 1080 if (error == 0) 1081 error = copyout(bits, u_fds, ni); 1082 if (bits != smallbits) 1083 kmem_free(bits, ni); 1084 return (error); 1085 } 1086 1087 int 1088 pollscan(lwp_t *l, struct pollfd *fds, int nfd, register_t *retval) 1089 { 1090 proc_t *p = l->l_proc; 1091 struct filedesc *fdp; 1092 int i, n; 1093 struct file *fp; 1094 1095 fdp = p->p_fd; 1096 n = 0; 1097 for (i = 0; i < nfd; i++, fds++) { 1098 if (fds->fd >= fdp->fd_nfiles) { 1099 fds->revents = POLLNVAL; 1100 n++; 1101 } else if (fds->fd < 0) { 1102 fds->revents = 0; 1103 } else { 1104 if ((fp = fd_getfile(fdp, fds->fd)) == NULL) { 1105 fds->revents = POLLNVAL; 1106 n++; 1107 } else { 1108 FILE_USE(fp); 1109 fds->revents = (*fp->f_ops->fo_poll)(fp, 1110 fds->events | POLLERR | POLLHUP, l); 1111 if (fds->revents != 0) 1112 n++; 1113 FILE_UNUSE(fp, l); 1114 } 1115 } 1116 } 1117 *retval = n; 1118 return (0); 1119 } 1120 1121 /*ARGSUSED*/ 1122 int 1123 seltrue(dev_t dev, int events, lwp_t *l) 1124 { 1125 1126 return (events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM)); 1127 } 1128 1129 /* 1130 * Record a select request. 1131 */ 1132 void 1133 selrecord(lwp_t *selector, struct selinfo *sip) 1134 { 1135 1136 mutex_enter(&select_lock); 1137 if (sip->sel_lwp == NULL) { 1138 /* First named waiter, although there may be more. */ 1139 sip->sel_lwp = selector; 1140 SLIST_INSERT_HEAD(&selector->l_selwait, sip, sel_chain); 1141 } else if (sip->sel_lwp != selector) { 1142 /* Multiple waiters. */ 1143 sip->sel_collision = true; 1144 } 1145 mutex_exit(&select_lock); 1146 } 1147 1148 /* 1149 * Do a wakeup when a selectable event occurs. 1150 */ 1151 void 1152 selwakeup(struct selinfo *sip) 1153 { 1154 lwp_t *l; 1155 1156 mutex_enter(&select_lock); 1157 if (sip->sel_collision) { 1158 /* Multiple waiters - just notify everybody. */ 1159 nselcoll++; 1160 sip->sel_collision = false; 1161 cv_broadcast(&select_cv); 1162 } else if (sip->sel_lwp != NULL) { 1163 /* Only one LWP waiting. */ 1164 l = sip->sel_lwp; 1165 if (l->l_selflag == SEL_BLOCKING) { 1166 /* 1167 * If it's sleeping, wake it up. If not, it's 1168 * already awake but hasn't yet removed itself 1169 * from the selector. We reset the state below 1170 * so that we only attempt to do this once. 1171 */ 1172 lwp_lock(l); 1173 if (l->l_wchan == &select_cv) { 1174 /* lwp_unsleep() releases the LWP lock. */ 1175 lwp_unsleep(l); 1176 } else 1177 lwp_unlock(l); 1178 } else { 1179 /* 1180 * Not yet asleep. Reset its state below so that 1181 * it will go around again. 1182 */ 1183 } 1184 l->l_selflag = SEL_RESET; 1185 } 1186 mutex_exit(&select_lock); 1187 } 1188 1189 void 1190 selnotify(struct selinfo *sip, long knhint) 1191 { 1192 1193 selwakeup(sip); 1194 KNOTE(&sip->sel_klist, knhint); 1195 } 1196 1197 /* 1198 * Remove an LWP from all objects that it is waiting for. 1199 */ 1200 static void 1201 selclear(void) 1202 { 1203 struct selinfo *sip; 1204 lwp_t *l = curlwp; 1205 1206 KASSERT(mutex_owned(&select_lock)); 1207 1208 SLIST_FOREACH(sip, &l->l_selwait, sel_chain) { 1209 KASSERT(sip->sel_lwp == l); 1210 sip->sel_lwp = NULL; 1211 } 1212 } 1213 1214 /* 1215 * Initialize the select/poll system calls. 1216 */ 1217 void 1218 selsysinit(void) 1219 { 1220 1221 mutex_init(&select_lock, MUTEX_DRIVER, IPL_VM); 1222 cv_init(&select_cv, "select"); 1223 } 1224