1 /* $NetBSD: sys_generic.c,v 1.94 2006/10/13 16:53:36 dogcow Exp $ */ 2 3 /* 4 * Copyright (c) 1982, 1986, 1989, 1993 5 * The Regents of the University of California. All rights reserved. 6 * (c) UNIX System Laboratories, Inc. 7 * All or some portions of this file are derived from material licensed 8 * to the University of California by American Telephone and Telegraph 9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 10 * the permission of UNIX System Laboratories, Inc. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 3. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * @(#)sys_generic.c 8.9 (Berkeley) 2/14/95 37 */ 38 39 #include <sys/cdefs.h> 40 __KERNEL_RCSID(0, "$NetBSD: sys_generic.c,v 1.94 2006/10/13 16:53:36 dogcow Exp $"); 41 42 #include "opt_ktrace.h" 43 44 #include <sys/param.h> 45 #include <sys/systm.h> 46 #include <sys/filedesc.h> 47 #include <sys/ioctl.h> 48 #include <sys/file.h> 49 #include <sys/proc.h> 50 #include <sys/socketvar.h> 51 #include <sys/signalvar.h> 52 #include <sys/uio.h> 53 #include <sys/kernel.h> 54 #include <sys/stat.h> 55 #include <sys/malloc.h> 56 #include <sys/poll.h> 57 #ifdef KTRACE 58 #include <sys/ktrace.h> 59 #endif 60 61 #include <sys/mount.h> 62 #include <sys/sa.h> 63 #include <sys/syscallargs.h> 64 65 #include <uvm/uvm_extern.h> 66 67 int selscan(struct lwp *, fd_mask *, fd_mask *, int, register_t *); 68 int pollscan(struct lwp *, struct pollfd *, int, register_t *); 69 70 71 /* 72 * Read system call. 73 */ 74 /* ARGSUSED */ 75 int 76 sys_read(struct lwp *l, void *v, register_t *retval) 77 { 78 struct sys_read_args /* { 79 syscallarg(int) fd; 80 syscallarg(void *) buf; 81 syscallarg(size_t) nbyte; 82 } */ *uap = v; 83 int fd; 84 struct file *fp; 85 struct proc *p; 86 struct filedesc *fdp; 87 88 fd = SCARG(uap, fd); 89 p = l->l_proc; 90 fdp = p->p_fd; 91 92 if ((fp = fd_getfile(fdp, fd)) == NULL) 93 return (EBADF); 94 95 if ((fp->f_flag & FREAD) == 0) { 96 simple_unlock(&fp->f_slock); 97 return (EBADF); 98 } 99 100 FILE_USE(fp); 101 102 /* dofileread() will unuse the descriptor for us */ 103 return (dofileread(l, fd, fp, SCARG(uap, buf), SCARG(uap, nbyte), 104 &fp->f_offset, FOF_UPDATE_OFFSET, retval)); 105 } 106 107 int 108 dofileread(struct lwp *l, int fd, struct file *fp, void *buf, size_t nbyte, 109 off_t *offset, int flags, register_t *retval) 110 { 111 struct iovec aiov; 112 struct uio auio; 113 struct proc *p; 114 struct vmspace *vm; 115 size_t cnt; 116 int error; 117 #ifdef KTRACE 118 struct iovec ktriov = { .iov_base = NULL, }; 119 #else 120 do { if (&fd) {} } while (/* CONSTCOND */ 0); /* shut up -Wunused */ 121 #endif 122 p = l->l_proc; 123 124 error = proc_vmspace_getref(p, &vm); 125 if (error) { 126 goto out; 127 } 128 129 aiov.iov_base = (caddr_t)buf; 130 aiov.iov_len = nbyte; 131 auio.uio_iov = &aiov; 132 auio.uio_iovcnt = 1; 133 auio.uio_resid = nbyte; 134 auio.uio_rw = UIO_READ; 135 auio.uio_vmspace = vm; 136 137 /* 138 * Reads return ssize_t because -1 is returned on error. Therefore 139 * we must restrict the length to SSIZE_MAX to avoid garbage return 140 * values. 141 */ 142 if (auio.uio_resid > SSIZE_MAX) { 143 error = EINVAL; 144 goto out; 145 } 146 147 #ifdef KTRACE 148 /* 149 * if tracing, save a copy of iovec 150 */ 151 if (KTRPOINT(p, KTR_GENIO)) 152 ktriov = aiov; 153 #endif 154 cnt = auio.uio_resid; 155 error = (*fp->f_ops->fo_read)(fp, offset, &auio, fp->f_cred, flags); 156 if (error) 157 if (auio.uio_resid != cnt && (error == ERESTART || 158 error == EINTR || error == EWOULDBLOCK)) 159 error = 0; 160 cnt -= auio.uio_resid; 161 #ifdef KTRACE 162 if (KTRPOINT(p, KTR_GENIO) && error == 0) 163 ktrgenio(l, fd, UIO_READ, &ktriov, cnt, error); 164 #endif 165 *retval = cnt; 166 out: 167 FILE_UNUSE(fp, l); 168 uvmspace_free(vm); 169 return (error); 170 } 171 172 /* 173 * Scatter read system call. 174 */ 175 int 176 sys_readv(struct lwp *l, void *v, register_t *retval) 177 { 178 struct sys_readv_args /* { 179 syscallarg(int) fd; 180 syscallarg(const struct iovec *) iovp; 181 syscallarg(int) iovcnt; 182 } */ *uap = v; 183 struct filedesc *fdp; 184 struct file *fp; 185 struct proc *p; 186 int fd; 187 188 fd = SCARG(uap, fd); 189 p = l->l_proc; 190 fdp = p->p_fd; 191 192 if ((fp = fd_getfile(fdp, fd)) == NULL) 193 return (EBADF); 194 195 if ((fp->f_flag & FREAD) == 0) { 196 simple_unlock(&fp->f_slock); 197 return (EBADF); 198 } 199 200 FILE_USE(fp); 201 202 /* dofilereadv() will unuse the descriptor for us */ 203 return (dofilereadv(l, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt), 204 &fp->f_offset, FOF_UPDATE_OFFSET, retval)); 205 } 206 207 int 208 dofilereadv(struct lwp *l, int fd, struct file *fp, const struct iovec *iovp, 209 int iovcnt, off_t *offset, int flags, register_t *retval) 210 { 211 struct proc *p; 212 struct uio auio; 213 struct iovec *iov, *needfree, aiov[UIO_SMALLIOV]; 214 struct vmspace *vm; 215 int i, error; 216 size_t cnt; 217 u_int iovlen; 218 #ifdef KTRACE 219 struct iovec *ktriov; 220 #else 221 do { if (&fd) {} } while (/* CONSTCOND */ 0); /* shut up -Wunused */ 222 #endif 223 224 p = l->l_proc; 225 error = proc_vmspace_getref(p, &vm); 226 if (error) { 227 goto out; 228 } 229 230 #ifdef KTRACE 231 ktriov = NULL; 232 #endif 233 /* note: can't use iovlen until iovcnt is validated */ 234 iovlen = iovcnt * sizeof(struct iovec); 235 if ((u_int)iovcnt > UIO_SMALLIOV) { 236 if ((u_int)iovcnt > IOV_MAX) { 237 error = EINVAL; 238 goto out; 239 } 240 iov = malloc(iovlen, M_IOV, M_WAITOK); 241 needfree = iov; 242 } else if ((u_int)iovcnt > 0) { 243 iov = aiov; 244 needfree = NULL; 245 } else { 246 error = EINVAL; 247 goto out; 248 } 249 250 auio.uio_iov = iov; 251 auio.uio_iovcnt = iovcnt; 252 auio.uio_rw = UIO_READ; 253 auio.uio_vmspace = vm; 254 error = copyin(iovp, iov, iovlen); 255 if (error) 256 goto done; 257 auio.uio_resid = 0; 258 for (i = 0; i < iovcnt; i++) { 259 auio.uio_resid += iov->iov_len; 260 /* 261 * Reads return ssize_t because -1 is returned on error. 262 * Therefore we must restrict the length to SSIZE_MAX to 263 * avoid garbage return values. 264 */ 265 if (iov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) { 266 error = EINVAL; 267 goto done; 268 } 269 iov++; 270 } 271 #ifdef KTRACE 272 /* 273 * if tracing, save a copy of iovec 274 */ 275 if (KTRPOINT(p, KTR_GENIO)) { 276 ktriov = malloc(iovlen, M_TEMP, M_WAITOK); 277 memcpy((caddr_t)ktriov, (caddr_t)auio.uio_iov, iovlen); 278 } 279 #endif 280 cnt = auio.uio_resid; 281 error = (*fp->f_ops->fo_read)(fp, offset, &auio, fp->f_cred, flags); 282 if (error) 283 if (auio.uio_resid != cnt && (error == ERESTART || 284 error == EINTR || error == EWOULDBLOCK)) 285 error = 0; 286 cnt -= auio.uio_resid; 287 #ifdef KTRACE 288 if (ktriov != NULL) { 289 if (KTRPOINT(p, KTR_GENIO) && (error == 0)) 290 ktrgenio(l, fd, UIO_READ, ktriov, cnt, error); 291 free(ktriov, M_TEMP); 292 } 293 #endif 294 *retval = cnt; 295 done: 296 if (needfree) 297 free(needfree, M_IOV); 298 out: 299 FILE_UNUSE(fp, l); 300 uvmspace_free(vm); 301 return (error); 302 } 303 304 /* 305 * Write system call 306 */ 307 int 308 sys_write(struct lwp *l, void *v, register_t *retval) 309 { 310 struct sys_write_args /* { 311 syscallarg(int) fd; 312 syscallarg(const void *) buf; 313 syscallarg(size_t) nbyte; 314 } */ *uap = v; 315 int fd; 316 struct file *fp; 317 struct proc *p; 318 struct filedesc *fdp; 319 320 fd = SCARG(uap, fd); 321 p = l->l_proc; 322 fdp = p->p_fd; 323 324 if ((fp = fd_getfile(fdp, fd)) == NULL) 325 return (EBADF); 326 327 if ((fp->f_flag & FWRITE) == 0) { 328 simple_unlock(&fp->f_slock); 329 return (EBADF); 330 } 331 332 FILE_USE(fp); 333 334 /* dofilewrite() will unuse the descriptor for us */ 335 return (dofilewrite(l, fd, fp, SCARG(uap, buf), SCARG(uap, nbyte), 336 &fp->f_offset, FOF_UPDATE_OFFSET, retval)); 337 } 338 339 int 340 dofilewrite(struct lwp *l, int fd, struct file *fp, const void *buf, 341 size_t nbyte, off_t *offset, int flags, register_t *retval) 342 { 343 struct iovec aiov; 344 struct uio auio; 345 struct proc *p; 346 struct vmspace *vm; 347 size_t cnt; 348 int error; 349 #ifdef KTRACE 350 struct iovec ktriov = { .iov_base = NULL, }; 351 #else 352 do { if (&fd) {} } while (/* CONSTCOND */ 0); /* shut up -Wunused */ 353 #endif 354 355 p = l->l_proc; 356 error = proc_vmspace_getref(p, &vm); 357 if (error) { 358 goto out; 359 } 360 aiov.iov_base = __UNCONST(buf); /* XXXUNCONST kills const */ 361 aiov.iov_len = nbyte; 362 auio.uio_iov = &aiov; 363 auio.uio_iovcnt = 1; 364 auio.uio_resid = nbyte; 365 auio.uio_rw = UIO_WRITE; 366 auio.uio_vmspace = vm; 367 368 /* 369 * Writes return ssize_t because -1 is returned on error. Therefore 370 * we must restrict the length to SSIZE_MAX to avoid garbage return 371 * values. 372 */ 373 if (auio.uio_resid > SSIZE_MAX) { 374 error = EINVAL; 375 goto out; 376 } 377 378 #ifdef KTRACE 379 /* 380 * if tracing, save a copy of iovec 381 */ 382 if (KTRPOINT(p, KTR_GENIO)) 383 ktriov = aiov; 384 #endif 385 cnt = auio.uio_resid; 386 error = (*fp->f_ops->fo_write)(fp, offset, &auio, fp->f_cred, flags); 387 if (error) { 388 if (auio.uio_resid != cnt && (error == ERESTART || 389 error == EINTR || error == EWOULDBLOCK)) 390 error = 0; 391 if (error == EPIPE) 392 psignal(p, SIGPIPE); 393 } 394 cnt -= auio.uio_resid; 395 #ifdef KTRACE 396 if (KTRPOINT(p, KTR_GENIO) && error == 0) 397 ktrgenio(l, fd, UIO_WRITE, &ktriov, cnt, error); 398 #endif 399 *retval = cnt; 400 out: 401 FILE_UNUSE(fp, l); 402 uvmspace_free(vm); 403 return (error); 404 } 405 406 /* 407 * Gather write system call 408 */ 409 int 410 sys_writev(struct lwp *l, void *v, register_t *retval) 411 { 412 struct sys_writev_args /* { 413 syscallarg(int) fd; 414 syscallarg(const struct iovec *) iovp; 415 syscallarg(int) iovcnt; 416 } */ *uap = v; 417 int fd; 418 struct file *fp; 419 struct proc *p; 420 struct filedesc *fdp; 421 422 fd = SCARG(uap, fd); 423 p = l->l_proc; 424 fdp = p->p_fd; 425 426 if ((fp = fd_getfile(fdp, fd)) == NULL) 427 return (EBADF); 428 429 if ((fp->f_flag & FWRITE) == 0) { 430 simple_unlock(&fp->f_slock); 431 return (EBADF); 432 } 433 434 FILE_USE(fp); 435 436 /* dofilewritev() will unuse the descriptor for us */ 437 return (dofilewritev(l, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt), 438 &fp->f_offset, FOF_UPDATE_OFFSET, retval)); 439 } 440 441 int 442 dofilewritev(struct lwp *l, int fd, struct file *fp, const struct iovec *iovp, 443 int iovcnt, off_t *offset, int flags, register_t *retval) 444 { 445 struct proc *p; 446 struct uio auio; 447 struct iovec *iov, *needfree, aiov[UIO_SMALLIOV]; 448 struct vmspace *vm; 449 int i, error; 450 size_t cnt; 451 u_int iovlen; 452 #ifdef KTRACE 453 struct iovec *ktriov; 454 #else 455 do { if (&fd) {} } while (/* CONSTCOND */ 0); /* shut up -Wunused */ 456 #endif 457 458 p = l->l_proc; 459 error = proc_vmspace_getref(p, &vm); 460 if (error) { 461 goto out; 462 } 463 #ifdef KTRACE 464 ktriov = NULL; 465 #endif 466 /* note: can't use iovlen until iovcnt is validated */ 467 iovlen = iovcnt * sizeof(struct iovec); 468 if ((u_int)iovcnt > UIO_SMALLIOV) { 469 if ((u_int)iovcnt > IOV_MAX) { 470 error = EINVAL; 471 goto out; 472 } 473 iov = malloc(iovlen, M_IOV, M_WAITOK); 474 needfree = iov; 475 } else if ((u_int)iovcnt > 0) { 476 iov = aiov; 477 needfree = NULL; 478 } else { 479 error = EINVAL; 480 goto out; 481 } 482 483 auio.uio_iov = iov; 484 auio.uio_iovcnt = iovcnt; 485 auio.uio_rw = UIO_WRITE; 486 auio.uio_vmspace = vm; 487 error = copyin(iovp, iov, iovlen); 488 if (error) 489 goto done; 490 auio.uio_resid = 0; 491 for (i = 0; i < iovcnt; i++) { 492 auio.uio_resid += iov->iov_len; 493 /* 494 * Writes return ssize_t because -1 is returned on error. 495 * Therefore we must restrict the length to SSIZE_MAX to 496 * avoid garbage return values. 497 */ 498 if (iov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) { 499 error = EINVAL; 500 goto done; 501 } 502 iov++; 503 } 504 #ifdef KTRACE 505 /* 506 * if tracing, save a copy of iovec 507 */ 508 if (KTRPOINT(p, KTR_GENIO)) { 509 ktriov = malloc(iovlen, M_TEMP, M_WAITOK); 510 memcpy((caddr_t)ktriov, (caddr_t)auio.uio_iov, iovlen); 511 } 512 #endif 513 cnt = auio.uio_resid; 514 error = (*fp->f_ops->fo_write)(fp, offset, &auio, fp->f_cred, flags); 515 if (error) { 516 if (auio.uio_resid != cnt && (error == ERESTART || 517 error == EINTR || error == EWOULDBLOCK)) 518 error = 0; 519 if (error == EPIPE) 520 psignal(p, SIGPIPE); 521 } 522 cnt -= auio.uio_resid; 523 #ifdef KTRACE 524 if (ktriov != NULL) { 525 if (KTRPOINT(p, KTR_GENIO) && (error == 0)) 526 ktrgenio(l, fd, UIO_WRITE, ktriov, cnt, error); 527 free(ktriov, M_TEMP); 528 } 529 #endif 530 *retval = cnt; 531 done: 532 if (needfree) 533 free(needfree, M_IOV); 534 out: 535 FILE_UNUSE(fp, l); 536 uvmspace_free(vm); 537 return (error); 538 } 539 540 /* 541 * Ioctl system call 542 */ 543 /* ARGSUSED */ 544 int 545 sys_ioctl(struct lwp *l, void *v, register_t *retval __unused) 546 { 547 struct sys_ioctl_args /* { 548 syscallarg(int) fd; 549 syscallarg(u_long) com; 550 syscallarg(caddr_t) data; 551 } */ *uap = v; 552 struct file *fp; 553 struct proc *p; 554 struct filedesc *fdp; 555 u_long com; 556 int error; 557 u_int size; 558 caddr_t data, memp; 559 #define STK_PARAMS 128 560 u_long stkbuf[STK_PARAMS/sizeof(u_long)]; 561 562 error = 0; 563 p = l->l_proc; 564 fdp = p->p_fd; 565 566 if ((fp = fd_getfile(fdp, SCARG(uap, fd))) == NULL) 567 return (EBADF); 568 569 FILE_USE(fp); 570 571 if ((fp->f_flag & (FREAD | FWRITE)) == 0) { 572 error = EBADF; 573 com = 0; 574 goto out; 575 } 576 577 switch (com = SCARG(uap, com)) { 578 case FIONCLEX: 579 fdp->fd_ofileflags[SCARG(uap, fd)] &= ~UF_EXCLOSE; 580 goto out; 581 582 case FIOCLEX: 583 fdp->fd_ofileflags[SCARG(uap, fd)] |= UF_EXCLOSE; 584 goto out; 585 } 586 587 /* 588 * Interpret high order word to find amount of data to be 589 * copied to/from the user's address space. 590 */ 591 size = IOCPARM_LEN(com); 592 if (size > IOCPARM_MAX) { 593 error = ENOTTY; 594 goto out; 595 } 596 memp = NULL; 597 if (size > sizeof(stkbuf)) { 598 memp = (caddr_t)malloc((u_long)size, M_IOCTLOPS, M_WAITOK); 599 data = memp; 600 } else 601 data = (caddr_t)stkbuf; 602 if (com&IOC_IN) { 603 if (size) { 604 error = copyin(SCARG(uap, data), data, size); 605 if (error) { 606 if (memp) 607 free(memp, M_IOCTLOPS); 608 goto out; 609 } 610 #ifdef KTRACE 611 if (KTRPOINT(p, KTR_GENIO)) { 612 struct iovec iov; 613 iov.iov_base = SCARG(uap, data); 614 iov.iov_len = size; 615 ktrgenio(l, SCARG(uap, fd), UIO_WRITE, &iov, 616 size, 0); 617 } 618 #endif 619 } else 620 *(caddr_t *)data = SCARG(uap, data); 621 } else if ((com&IOC_OUT) && size) 622 /* 623 * Zero the buffer so the user always 624 * gets back something deterministic. 625 */ 626 memset(data, 0, size); 627 else if (com&IOC_VOID) 628 *(caddr_t *)data = SCARG(uap, data); 629 630 switch (com) { 631 632 case FIONBIO: 633 if (*(int *)data != 0) 634 fp->f_flag |= FNONBLOCK; 635 else 636 fp->f_flag &= ~FNONBLOCK; 637 error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, data, l); 638 break; 639 640 case FIOASYNC: 641 if (*(int *)data != 0) 642 fp->f_flag |= FASYNC; 643 else 644 fp->f_flag &= ~FASYNC; 645 error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, data, l); 646 break; 647 648 default: 649 error = (*fp->f_ops->fo_ioctl)(fp, com, data, l); 650 /* 651 * Copy any data to user, size was 652 * already set and checked above. 653 */ 654 if (error == 0 && (com&IOC_OUT) && size) { 655 error = copyout(data, SCARG(uap, data), size); 656 #ifdef KTRACE 657 if (KTRPOINT(p, KTR_GENIO)) { 658 struct iovec iov; 659 iov.iov_base = SCARG(uap, data); 660 iov.iov_len = size; 661 ktrgenio(l, SCARG(uap, fd), UIO_READ, &iov, 662 size, error); 663 } 664 #endif 665 } 666 break; 667 } 668 if (memp) 669 free(memp, M_IOCTLOPS); 670 out: 671 FILE_UNUSE(fp, l); 672 switch (error) { 673 case -1: 674 printf("sys_ioctl: _IO%s%s('%c', %lu, %lu) returned -1: " 675 "pid=%d comm=%s\n", 676 (com & IOC_IN) ? "W" : "", (com & IOC_OUT) ? "R" : "", 677 (char)IOCGROUP(com), (com & 0xff), IOCPARM_LEN(com), 678 p->p_pid, p->p_comm); 679 /* FALLTHROUGH */ 680 case EPASSTHROUGH: 681 error = ENOTTY; 682 /* FALLTHROUGH */ 683 default: 684 return (error); 685 } 686 } 687 688 int selwait, nselcoll; 689 690 /* 691 * Select system call. 692 */ 693 int 694 sys_pselect(struct lwp *l, void *v, register_t *retval) 695 { 696 struct sys_pselect_args /* { 697 syscallarg(int) nd; 698 syscallarg(fd_set *) in; 699 syscallarg(fd_set *) ou; 700 syscallarg(fd_set *) ex; 701 syscallarg(const struct timespec *) ts; 702 syscallarg(sigset_t *) mask; 703 } */ * const uap = v; 704 struct timespec ats; 705 struct timeval atv, *tv = NULL; 706 sigset_t amask, *mask = NULL; 707 int error; 708 709 if (SCARG(uap, ts)) { 710 error = copyin(SCARG(uap, ts), &ats, sizeof(ats)); 711 if (error) 712 return error; 713 atv.tv_sec = ats.tv_sec; 714 atv.tv_usec = ats.tv_nsec / 1000; 715 tv = &atv; 716 } 717 if (SCARG(uap, mask) != NULL) { 718 error = copyin(SCARG(uap, mask), &amask, sizeof(amask)); 719 if (error) 720 return error; 721 mask = &amask; 722 } 723 724 return selcommon(l, retval, SCARG(uap, nd), SCARG(uap, in), 725 SCARG(uap, ou), SCARG(uap, ex), tv, mask); 726 } 727 728 int 729 inittimeleft(struct timeval *tv, struct timeval *sleeptv) 730 { 731 if (itimerfix(tv)) 732 return -1; 733 getmicrouptime(sleeptv); 734 return 0; 735 } 736 737 int 738 gettimeleft(struct timeval *tv, struct timeval *sleeptv) 739 { 740 /* 741 * We have to recalculate the timeout on every retry. 742 */ 743 struct timeval slepttv; 744 /* 745 * reduce tv by elapsed time 746 * based on monotonic time scale 747 */ 748 getmicrouptime(&slepttv); 749 timeradd(tv, sleeptv, tv); 750 timersub(tv, &slepttv, tv); 751 *sleeptv = slepttv; 752 return tvtohz(tv); 753 } 754 755 int 756 sys_select(struct lwp *l, void *v, register_t *retval) 757 { 758 struct sys_select_args /* { 759 syscallarg(int) nd; 760 syscallarg(fd_set *) in; 761 syscallarg(fd_set *) ou; 762 syscallarg(fd_set *) ex; 763 syscallarg(struct timeval *) tv; 764 } */ * const uap = v; 765 struct timeval atv, *tv = NULL; 766 int error; 767 768 if (SCARG(uap, tv)) { 769 error = copyin(SCARG(uap, tv), (caddr_t)&atv, 770 sizeof(atv)); 771 if (error) 772 return error; 773 tv = &atv; 774 } 775 776 return selcommon(l, retval, SCARG(uap, nd), SCARG(uap, in), 777 SCARG(uap, ou), SCARG(uap, ex), tv, NULL); 778 } 779 780 int 781 selcommon(struct lwp *l, register_t *retval, int nd, fd_set *u_in, 782 fd_set *u_ou, fd_set *u_ex, struct timeval *tv, sigset_t *mask) 783 { 784 char smallbits[howmany(FD_SETSIZE, NFDBITS) * 785 sizeof(fd_mask) * 6]; 786 struct proc * const p = l->l_proc; 787 caddr_t bits; 788 int s, ncoll, error, timo; 789 size_t ni; 790 sigset_t oldmask; 791 struct timeval sleeptv; 792 793 error = 0; 794 if (nd < 0) 795 return (EINVAL); 796 if (nd > p->p_fd->fd_nfiles) { 797 /* forgiving; slightly wrong */ 798 nd = p->p_fd->fd_nfiles; 799 } 800 ni = howmany(nd, NFDBITS) * sizeof(fd_mask); 801 if (ni * 6 > sizeof(smallbits)) 802 bits = malloc(ni * 6, M_TEMP, M_WAITOK); 803 else 804 bits = smallbits; 805 806 #define getbits(name, x) \ 807 if (u_ ## name) { \ 808 error = copyin(u_ ## name, bits + ni * x, ni); \ 809 if (error) \ 810 goto done; \ 811 } else \ 812 memset(bits + ni * x, 0, ni); 813 getbits(in, 0); 814 getbits(ou, 1); 815 getbits(ex, 2); 816 #undef getbits 817 818 timo = 0; 819 if (tv && inittimeleft(tv, &sleeptv) == -1) { 820 error = EINVAL; 821 goto done; 822 } 823 824 if (mask) 825 (void)sigprocmask1(p, SIG_SETMASK, mask, &oldmask); 826 827 retry: 828 ncoll = nselcoll; 829 l->l_flag |= L_SELECT; 830 error = selscan(l, (fd_mask *)(bits + ni * 0), 831 (fd_mask *)(bits + ni * 3), nd, retval); 832 if (error || *retval) 833 goto done; 834 if (tv && (timo = gettimeleft(tv, &sleeptv)) <= 0) 835 goto done; 836 s = splsched(); 837 if ((l->l_flag & L_SELECT) == 0 || nselcoll != ncoll) { 838 splx(s); 839 goto retry; 840 } 841 l->l_flag &= ~L_SELECT; 842 error = tsleep((caddr_t)&selwait, PSOCK | PCATCH, "select", timo); 843 splx(s); 844 if (error == 0) 845 goto retry; 846 done: 847 if (mask) 848 (void)sigprocmask1(p, SIG_SETMASK, &oldmask, NULL); 849 l->l_flag &= ~L_SELECT; 850 /* select is not restarted after signals... */ 851 if (error == ERESTART) 852 error = EINTR; 853 if (error == EWOULDBLOCK) 854 error = 0; 855 if (error == 0) { 856 857 #define putbits(name, x) \ 858 if (u_ ## name) { \ 859 error = copyout(bits + ni * x, u_ ## name, ni); \ 860 if (error) \ 861 goto out; \ 862 } 863 putbits(in, 3); 864 putbits(ou, 4); 865 putbits(ex, 5); 866 #undef putbits 867 } 868 out: 869 if (ni * 6 > sizeof(smallbits)) 870 free(bits, M_TEMP); 871 return (error); 872 } 873 874 int 875 selscan(struct lwp *l, fd_mask *ibitp, fd_mask *obitp, int nfd, 876 register_t *retval) 877 { 878 static const int flag[3] = { POLLRDNORM | POLLHUP | POLLERR, 879 POLLWRNORM | POLLHUP | POLLERR, 880 POLLRDBAND }; 881 struct proc *p = l->l_proc; 882 struct filedesc *fdp; 883 int msk, i, j, fd, n; 884 fd_mask ibits, obits; 885 struct file *fp; 886 887 fdp = p->p_fd; 888 n = 0; 889 for (msk = 0; msk < 3; msk++) { 890 for (i = 0; i < nfd; i += NFDBITS) { 891 ibits = *ibitp++; 892 obits = 0; 893 while ((j = ffs(ibits)) && (fd = i + --j) < nfd) { 894 ibits &= ~(1 << j); 895 if ((fp = fd_getfile(fdp, fd)) == NULL) 896 return (EBADF); 897 FILE_USE(fp); 898 if ((*fp->f_ops->fo_poll)(fp, flag[msk], l)) { 899 obits |= (1 << j); 900 n++; 901 } 902 FILE_UNUSE(fp, l); 903 } 904 *obitp++ = obits; 905 } 906 } 907 *retval = n; 908 return (0); 909 } 910 911 /* 912 * Poll system call. 913 */ 914 int 915 sys_poll(struct lwp *l, void *v, register_t *retval) 916 { 917 struct sys_poll_args /* { 918 syscallarg(struct pollfd *) fds; 919 syscallarg(u_int) nfds; 920 syscallarg(int) timeout; 921 } */ * const uap = v; 922 struct timeval atv, *tv = NULL; 923 924 if (SCARG(uap, timeout) != INFTIM) { 925 atv.tv_sec = SCARG(uap, timeout) / 1000; 926 atv.tv_usec = (SCARG(uap, timeout) % 1000) * 1000; 927 tv = &atv; 928 } 929 930 return pollcommon(l, retval, SCARG(uap, fds), SCARG(uap, nfds), 931 tv, NULL); 932 } 933 934 /* 935 * Poll system call. 936 */ 937 int 938 sys_pollts(struct lwp *l, void *v, register_t *retval) 939 { 940 struct sys_pollts_args /* { 941 syscallarg(struct pollfd *) fds; 942 syscallarg(u_int) nfds; 943 syscallarg(const struct timespec *) ts; 944 syscallarg(const sigset_t *) mask; 945 } */ * const uap = v; 946 struct timespec ats; 947 struct timeval atv, *tv = NULL; 948 sigset_t amask, *mask = NULL; 949 int error; 950 951 if (SCARG(uap, ts)) { 952 error = copyin(SCARG(uap, ts), &ats, sizeof(ats)); 953 if (error) 954 return error; 955 atv.tv_sec = ats.tv_sec; 956 atv.tv_usec = ats.tv_nsec / 1000; 957 tv = &atv; 958 } 959 if (SCARG(uap, mask)) { 960 error = copyin(SCARG(uap, mask), &amask, sizeof(amask)); 961 if (error) 962 return error; 963 mask = &amask; 964 } 965 966 return pollcommon(l, retval, SCARG(uap, fds), SCARG(uap, nfds), 967 tv, mask); 968 } 969 970 int 971 pollcommon(struct lwp *l, register_t *retval, 972 struct pollfd *u_fds, u_int nfds, 973 struct timeval *tv, sigset_t *mask) 974 { 975 char smallbits[32 * sizeof(struct pollfd)]; 976 struct proc * const p = l->l_proc; 977 caddr_t bits; 978 sigset_t oldmask; 979 int s, ncoll, error, timo; 980 size_t ni; 981 struct timeval sleeptv; 982 983 if (nfds > p->p_fd->fd_nfiles) { 984 /* forgiving; slightly wrong */ 985 nfds = p->p_fd->fd_nfiles; 986 } 987 ni = nfds * sizeof(struct pollfd); 988 if (ni > sizeof(smallbits)) 989 bits = malloc(ni, M_TEMP, M_WAITOK); 990 else 991 bits = smallbits; 992 993 error = copyin(u_fds, bits, ni); 994 if (error) 995 goto done; 996 997 timo = 0; 998 if (tv && inittimeleft(tv, &sleeptv) == -1) { 999 error = EINVAL; 1000 goto done; 1001 } 1002 1003 if (mask != NULL) 1004 (void)sigprocmask1(p, SIG_SETMASK, mask, &oldmask); 1005 1006 retry: 1007 ncoll = nselcoll; 1008 l->l_flag |= L_SELECT; 1009 error = pollscan(l, (struct pollfd *)bits, nfds, retval); 1010 if (error || *retval) 1011 goto done; 1012 if (tv && (timo = gettimeleft(tv, &sleeptv)) <= 0) 1013 goto done; 1014 s = splsched(); 1015 if ((l->l_flag & L_SELECT) == 0 || nselcoll != ncoll) { 1016 splx(s); 1017 goto retry; 1018 } 1019 l->l_flag &= ~L_SELECT; 1020 error = tsleep((caddr_t)&selwait, PSOCK | PCATCH, "poll", timo); 1021 splx(s); 1022 if (error == 0) 1023 goto retry; 1024 done: 1025 if (mask != NULL) 1026 (void)sigprocmask1(p, SIG_SETMASK, &oldmask, NULL); 1027 l->l_flag &= ~L_SELECT; 1028 /* poll is not restarted after signals... */ 1029 if (error == ERESTART) 1030 error = EINTR; 1031 if (error == EWOULDBLOCK) 1032 error = 0; 1033 if (error == 0) { 1034 error = copyout(bits, u_fds, ni); 1035 if (error) 1036 goto out; 1037 } 1038 out: 1039 if (ni > sizeof(smallbits)) 1040 free(bits, M_TEMP); 1041 return (error); 1042 } 1043 1044 int 1045 pollscan(struct lwp *l, struct pollfd *fds, int nfd, register_t *retval) 1046 { 1047 struct proc *p = l->l_proc; 1048 struct filedesc *fdp; 1049 int i, n; 1050 struct file *fp; 1051 1052 fdp = p->p_fd; 1053 n = 0; 1054 for (i = 0; i < nfd; i++, fds++) { 1055 if (fds->fd >= fdp->fd_nfiles) { 1056 fds->revents = POLLNVAL; 1057 n++; 1058 } else if (fds->fd < 0) { 1059 fds->revents = 0; 1060 } else { 1061 if ((fp = fd_getfile(fdp, fds->fd)) == NULL) { 1062 fds->revents = POLLNVAL; 1063 n++; 1064 } else { 1065 FILE_USE(fp); 1066 fds->revents = (*fp->f_ops->fo_poll)(fp, 1067 fds->events | POLLERR | POLLHUP, l); 1068 if (fds->revents != 0) 1069 n++; 1070 FILE_UNUSE(fp, l); 1071 } 1072 } 1073 } 1074 *retval = n; 1075 return (0); 1076 } 1077 1078 /*ARGSUSED*/ 1079 int 1080 seltrue(dev_t dev __unused, int events, struct lwp *l __unused) 1081 { 1082 1083 return (events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM)); 1084 } 1085 1086 /* 1087 * Record a select request. 1088 */ 1089 void 1090 selrecord(struct lwp *selector, struct selinfo *sip) 1091 { 1092 struct lwp *l; 1093 struct proc *p; 1094 pid_t mypid; 1095 1096 mypid = selector->l_proc->p_pid; 1097 if (sip->sel_pid == mypid) 1098 return; 1099 if (sip->sel_pid && (p = pfind(sip->sel_pid))) { 1100 LIST_FOREACH(l, &p->p_lwps, l_sibling) { 1101 if (l->l_wchan == (caddr_t)&selwait) { 1102 sip->sel_collision = 1; 1103 return; 1104 } 1105 } 1106 } 1107 1108 sip->sel_pid = mypid; 1109 } 1110 1111 /* 1112 * Do a wakeup when a selectable event occurs. 1113 */ 1114 void 1115 selwakeup(sip) 1116 struct selinfo *sip; 1117 { 1118 struct lwp *l; 1119 struct proc *p; 1120 int s; 1121 1122 if (sip->sel_pid == 0) 1123 return; 1124 if (sip->sel_collision) { 1125 sip->sel_pid = 0; 1126 nselcoll++; 1127 sip->sel_collision = 0; 1128 wakeup((caddr_t)&selwait); 1129 return; 1130 } 1131 p = pfind(sip->sel_pid); 1132 sip->sel_pid = 0; 1133 if (p != NULL) { 1134 LIST_FOREACH(l, &p->p_lwps, l_sibling) { 1135 SCHED_LOCK(s); 1136 if (l->l_wchan == (caddr_t)&selwait) { 1137 if (l->l_stat == LSSLEEP) 1138 setrunnable(l); 1139 else 1140 unsleep(l); 1141 } else if (l->l_flag & L_SELECT) 1142 l->l_flag &= ~L_SELECT; 1143 SCHED_UNLOCK(s); 1144 } 1145 } 1146 } 1147