1 /* $NetBSD: sys_generic.c,v 1.86 2006/06/07 22:33:40 kardel Exp $ */ 2 3 /* 4 * Copyright (c) 1982, 1986, 1989, 1993 5 * The Regents of the University of California. All rights reserved. 6 * (c) UNIX System Laboratories, Inc. 7 * All or some portions of this file are derived from material licensed 8 * to the University of California by American Telephone and Telegraph 9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 10 * the permission of UNIX System Laboratories, Inc. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 3. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * @(#)sys_generic.c 8.9 (Berkeley) 2/14/95 37 */ 38 39 #include <sys/cdefs.h> 40 __KERNEL_RCSID(0, "$NetBSD: sys_generic.c,v 1.86 2006/06/07 22:33:40 kardel Exp $"); 41 42 #include "opt_ktrace.h" 43 44 #include <sys/param.h> 45 #include <sys/systm.h> 46 #include <sys/filedesc.h> 47 #include <sys/ioctl.h> 48 #include <sys/file.h> 49 #include <sys/proc.h> 50 #include <sys/socketvar.h> 51 #include <sys/signalvar.h> 52 #include <sys/uio.h> 53 #include <sys/kernel.h> 54 #include <sys/stat.h> 55 #include <sys/malloc.h> 56 #include <sys/poll.h> 57 #ifdef KTRACE 58 #include <sys/ktrace.h> 59 #endif 60 61 #include <sys/mount.h> 62 #include <sys/sa.h> 63 #include <sys/syscallargs.h> 64 65 #include <uvm/uvm_extern.h> 66 67 int selscan(struct lwp *, fd_mask *, fd_mask *, int, register_t *); 68 int pollscan(struct lwp *, struct pollfd *, int, register_t *); 69 70 71 /* 72 * Read system call. 73 */ 74 /* ARGSUSED */ 75 int 76 sys_read(struct lwp *l, void *v, register_t *retval) 77 { 78 struct sys_read_args /* { 79 syscallarg(int) fd; 80 syscallarg(void *) buf; 81 syscallarg(size_t) nbyte; 82 } */ *uap = v; 83 int fd; 84 struct file *fp; 85 struct proc *p; 86 struct filedesc *fdp; 87 88 fd = SCARG(uap, fd); 89 p = l->l_proc; 90 fdp = p->p_fd; 91 92 if ((fp = fd_getfile(fdp, fd)) == NULL) 93 return (EBADF); 94 95 if ((fp->f_flag & FREAD) == 0) { 96 simple_unlock(&fp->f_slock); 97 return (EBADF); 98 } 99 100 FILE_USE(fp); 101 102 /* dofileread() will unuse the descriptor for us */ 103 return (dofileread(l, fd, fp, SCARG(uap, buf), SCARG(uap, nbyte), 104 &fp->f_offset, FOF_UPDATE_OFFSET, retval)); 105 } 106 107 int 108 dofileread(struct lwp *l, int fd, struct file *fp, void *buf, size_t nbyte, 109 off_t *offset, int flags, register_t *retval) 110 { 111 struct iovec aiov; 112 struct uio auio; 113 struct proc *p; 114 struct vmspace *vm; 115 size_t cnt; 116 int error; 117 #ifdef KTRACE 118 struct iovec ktriov = {0}; 119 #endif 120 p = l->l_proc; 121 122 error = proc_vmspace_getref(p, &vm); 123 if (error) { 124 goto out; 125 } 126 127 aiov.iov_base = (caddr_t)buf; 128 aiov.iov_len = nbyte; 129 auio.uio_iov = &aiov; 130 auio.uio_iovcnt = 1; 131 auio.uio_resid = nbyte; 132 auio.uio_rw = UIO_READ; 133 auio.uio_vmspace = vm; 134 135 /* 136 * Reads return ssize_t because -1 is returned on error. Therefore 137 * we must restrict the length to SSIZE_MAX to avoid garbage return 138 * values. 139 */ 140 if (auio.uio_resid > SSIZE_MAX) { 141 error = EINVAL; 142 goto out; 143 } 144 145 #ifdef KTRACE 146 /* 147 * if tracing, save a copy of iovec 148 */ 149 if (KTRPOINT(p, KTR_GENIO)) 150 ktriov = aiov; 151 #endif 152 cnt = auio.uio_resid; 153 error = (*fp->f_ops->fo_read)(fp, offset, &auio, fp->f_cred, flags); 154 if (error) 155 if (auio.uio_resid != cnt && (error == ERESTART || 156 error == EINTR || error == EWOULDBLOCK)) 157 error = 0; 158 cnt -= auio.uio_resid; 159 #ifdef KTRACE 160 if (KTRPOINT(p, KTR_GENIO) && error == 0) 161 ktrgenio(l, fd, UIO_READ, &ktriov, cnt, error); 162 #endif 163 *retval = cnt; 164 out: 165 FILE_UNUSE(fp, l); 166 uvmspace_free(vm); 167 return (error); 168 } 169 170 /* 171 * Scatter read system call. 172 */ 173 int 174 sys_readv(struct lwp *l, void *v, register_t *retval) 175 { 176 struct sys_readv_args /* { 177 syscallarg(int) fd; 178 syscallarg(const struct iovec *) iovp; 179 syscallarg(int) iovcnt; 180 } */ *uap = v; 181 struct filedesc *fdp; 182 struct file *fp; 183 struct proc *p; 184 int fd; 185 186 fd = SCARG(uap, fd); 187 p = l->l_proc; 188 fdp = p->p_fd; 189 190 if ((fp = fd_getfile(fdp, fd)) == NULL) 191 return (EBADF); 192 193 if ((fp->f_flag & FREAD) == 0) { 194 simple_unlock(&fp->f_slock); 195 return (EBADF); 196 } 197 198 FILE_USE(fp); 199 200 /* dofilereadv() will unuse the descriptor for us */ 201 return (dofilereadv(l, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt), 202 &fp->f_offset, FOF_UPDATE_OFFSET, retval)); 203 } 204 205 int 206 dofilereadv(struct lwp *l, int fd, struct file *fp, const struct iovec *iovp, 207 int iovcnt, off_t *offset, int flags, register_t *retval) 208 { 209 struct proc *p; 210 struct uio auio; 211 struct iovec *iov, *needfree, aiov[UIO_SMALLIOV]; 212 struct vmspace *vm; 213 int i, error; 214 size_t cnt; 215 u_int iovlen; 216 #ifdef KTRACE 217 struct iovec *ktriov; 218 #endif 219 220 p = l->l_proc; 221 error = proc_vmspace_getref(p, &vm); 222 if (error) { 223 goto out; 224 } 225 226 #ifdef KTRACE 227 ktriov = NULL; 228 #endif 229 /* note: can't use iovlen until iovcnt is validated */ 230 iovlen = iovcnt * sizeof(struct iovec); 231 if ((u_int)iovcnt > UIO_SMALLIOV) { 232 if ((u_int)iovcnt > IOV_MAX) { 233 error = EINVAL; 234 goto out; 235 } 236 iov = malloc(iovlen, M_IOV, M_WAITOK); 237 needfree = iov; 238 } else if ((u_int)iovcnt > 0) { 239 iov = aiov; 240 needfree = NULL; 241 } else { 242 error = EINVAL; 243 goto out; 244 } 245 246 auio.uio_iov = iov; 247 auio.uio_iovcnt = iovcnt; 248 auio.uio_rw = UIO_READ; 249 auio.uio_vmspace = vm; 250 error = copyin(iovp, iov, iovlen); 251 if (error) 252 goto done; 253 auio.uio_resid = 0; 254 for (i = 0; i < iovcnt; i++) { 255 auio.uio_resid += iov->iov_len; 256 /* 257 * Reads return ssize_t because -1 is returned on error. 258 * Therefore we must restrict the length to SSIZE_MAX to 259 * avoid garbage return values. 260 */ 261 if (iov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) { 262 error = EINVAL; 263 goto done; 264 } 265 iov++; 266 } 267 #ifdef KTRACE 268 /* 269 * if tracing, save a copy of iovec 270 */ 271 if (KTRPOINT(p, KTR_GENIO)) { 272 ktriov = malloc(iovlen, M_TEMP, M_WAITOK); 273 memcpy((caddr_t)ktriov, (caddr_t)auio.uio_iov, iovlen); 274 } 275 #endif 276 cnt = auio.uio_resid; 277 error = (*fp->f_ops->fo_read)(fp, offset, &auio, fp->f_cred, flags); 278 if (error) 279 if (auio.uio_resid != cnt && (error == ERESTART || 280 error == EINTR || error == EWOULDBLOCK)) 281 error = 0; 282 cnt -= auio.uio_resid; 283 #ifdef KTRACE 284 if (ktriov != NULL) { 285 if (KTRPOINT(p, KTR_GENIO) && (error == 0)) 286 ktrgenio(l, fd, UIO_READ, ktriov, cnt, error); 287 free(ktriov, M_TEMP); 288 } 289 #endif 290 *retval = cnt; 291 done: 292 if (needfree) 293 free(needfree, M_IOV); 294 out: 295 FILE_UNUSE(fp, l); 296 uvmspace_free(vm); 297 return (error); 298 } 299 300 /* 301 * Write system call 302 */ 303 int 304 sys_write(struct lwp *l, void *v, register_t *retval) 305 { 306 struct sys_write_args /* { 307 syscallarg(int) fd; 308 syscallarg(const void *) buf; 309 syscallarg(size_t) nbyte; 310 } */ *uap = v; 311 int fd; 312 struct file *fp; 313 struct proc *p; 314 struct filedesc *fdp; 315 316 fd = SCARG(uap, fd); 317 p = l->l_proc; 318 fdp = p->p_fd; 319 320 if ((fp = fd_getfile(fdp, fd)) == NULL) 321 return (EBADF); 322 323 if ((fp->f_flag & FWRITE) == 0) { 324 simple_unlock(&fp->f_slock); 325 return (EBADF); 326 } 327 328 FILE_USE(fp); 329 330 /* dofilewrite() will unuse the descriptor for us */ 331 return (dofilewrite(l, fd, fp, SCARG(uap, buf), SCARG(uap, nbyte), 332 &fp->f_offset, FOF_UPDATE_OFFSET, retval)); 333 } 334 335 int 336 dofilewrite(struct lwp *l, int fd, struct file *fp, const void *buf, 337 size_t nbyte, off_t *offset, int flags, register_t *retval) 338 { 339 struct iovec aiov; 340 struct uio auio; 341 struct proc *p; 342 struct vmspace *vm; 343 size_t cnt; 344 int error; 345 #ifdef KTRACE 346 struct iovec ktriov = {0}; 347 #endif 348 349 p = l->l_proc; 350 error = proc_vmspace_getref(p, &vm); 351 if (error) { 352 goto out; 353 } 354 aiov.iov_base = __UNCONST(buf); /* XXXUNCONST kills const */ 355 aiov.iov_len = nbyte; 356 auio.uio_iov = &aiov; 357 auio.uio_iovcnt = 1; 358 auio.uio_resid = nbyte; 359 auio.uio_rw = UIO_WRITE; 360 auio.uio_vmspace = vm; 361 362 /* 363 * Writes return ssize_t because -1 is returned on error. Therefore 364 * we must restrict the length to SSIZE_MAX to avoid garbage return 365 * values. 366 */ 367 if (auio.uio_resid > SSIZE_MAX) { 368 error = EINVAL; 369 goto out; 370 } 371 372 #ifdef KTRACE 373 /* 374 * if tracing, save a copy of iovec 375 */ 376 if (KTRPOINT(p, KTR_GENIO)) 377 ktriov = aiov; 378 #endif 379 cnt = auio.uio_resid; 380 error = (*fp->f_ops->fo_write)(fp, offset, &auio, fp->f_cred, flags); 381 if (error) { 382 if (auio.uio_resid != cnt && (error == ERESTART || 383 error == EINTR || error == EWOULDBLOCK)) 384 error = 0; 385 if (error == EPIPE) 386 psignal(p, SIGPIPE); 387 } 388 cnt -= auio.uio_resid; 389 #ifdef KTRACE 390 if (KTRPOINT(p, KTR_GENIO) && error == 0) 391 ktrgenio(l, fd, UIO_WRITE, &ktriov, cnt, error); 392 #endif 393 *retval = cnt; 394 out: 395 FILE_UNUSE(fp, l); 396 uvmspace_free(vm); 397 return (error); 398 } 399 400 /* 401 * Gather write system call 402 */ 403 int 404 sys_writev(struct lwp *l, void *v, register_t *retval) 405 { 406 struct sys_writev_args /* { 407 syscallarg(int) fd; 408 syscallarg(const struct iovec *) iovp; 409 syscallarg(int) iovcnt; 410 } */ *uap = v; 411 int fd; 412 struct file *fp; 413 struct proc *p; 414 struct filedesc *fdp; 415 416 fd = SCARG(uap, fd); 417 p = l->l_proc; 418 fdp = p->p_fd; 419 420 if ((fp = fd_getfile(fdp, fd)) == NULL) 421 return (EBADF); 422 423 if ((fp->f_flag & FWRITE) == 0) { 424 simple_unlock(&fp->f_slock); 425 return (EBADF); 426 } 427 428 FILE_USE(fp); 429 430 /* dofilewritev() will unuse the descriptor for us */ 431 return (dofilewritev(l, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt), 432 &fp->f_offset, FOF_UPDATE_OFFSET, retval)); 433 } 434 435 int 436 dofilewritev(struct lwp *l, int fd, struct file *fp, const struct iovec *iovp, 437 int iovcnt, off_t *offset, int flags, register_t *retval) 438 { 439 struct proc *p; 440 struct uio auio; 441 struct iovec *iov, *needfree, aiov[UIO_SMALLIOV]; 442 struct vmspace *vm; 443 int i, error; 444 size_t cnt; 445 u_int iovlen; 446 #ifdef KTRACE 447 struct iovec *ktriov; 448 #endif 449 450 p = l->l_proc; 451 error = proc_vmspace_getref(p, &vm); 452 if (error) { 453 goto out; 454 } 455 #ifdef KTRACE 456 ktriov = NULL; 457 #endif 458 /* note: can't use iovlen until iovcnt is validated */ 459 iovlen = iovcnt * sizeof(struct iovec); 460 if ((u_int)iovcnt > UIO_SMALLIOV) { 461 if ((u_int)iovcnt > IOV_MAX) { 462 error = EINVAL; 463 goto out; 464 } 465 iov = malloc(iovlen, M_IOV, M_WAITOK); 466 needfree = iov; 467 } else if ((u_int)iovcnt > 0) { 468 iov = aiov; 469 needfree = NULL; 470 } else { 471 error = EINVAL; 472 goto out; 473 } 474 475 auio.uio_iov = iov; 476 auio.uio_iovcnt = iovcnt; 477 auio.uio_rw = UIO_WRITE; 478 auio.uio_vmspace = vm; 479 error = copyin(iovp, iov, iovlen); 480 if (error) 481 goto done; 482 auio.uio_resid = 0; 483 for (i = 0; i < iovcnt; i++) { 484 auio.uio_resid += iov->iov_len; 485 /* 486 * Writes return ssize_t because -1 is returned on error. 487 * Therefore we must restrict the length to SSIZE_MAX to 488 * avoid garbage return values. 489 */ 490 if (iov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) { 491 error = EINVAL; 492 goto done; 493 } 494 iov++; 495 } 496 #ifdef KTRACE 497 /* 498 * if tracing, save a copy of iovec 499 */ 500 if (KTRPOINT(p, KTR_GENIO)) { 501 ktriov = malloc(iovlen, M_TEMP, M_WAITOK); 502 memcpy((caddr_t)ktriov, (caddr_t)auio.uio_iov, iovlen); 503 } 504 #endif 505 cnt = auio.uio_resid; 506 error = (*fp->f_ops->fo_write)(fp, offset, &auio, fp->f_cred, flags); 507 if (error) { 508 if (auio.uio_resid != cnt && (error == ERESTART || 509 error == EINTR || error == EWOULDBLOCK)) 510 error = 0; 511 if (error == EPIPE) 512 psignal(p, SIGPIPE); 513 } 514 cnt -= auio.uio_resid; 515 #ifdef KTRACE 516 if (ktriov != NULL) { 517 if (KTRPOINT(p, KTR_GENIO) && (error == 0)) 518 ktrgenio(l, fd, UIO_WRITE, ktriov, cnt, error); 519 free(ktriov, M_TEMP); 520 } 521 #endif 522 *retval = cnt; 523 done: 524 if (needfree) 525 free(needfree, M_IOV); 526 out: 527 FILE_UNUSE(fp, l); 528 uvmspace_free(vm); 529 return (error); 530 } 531 532 /* 533 * Ioctl system call 534 */ 535 /* ARGSUSED */ 536 int 537 sys_ioctl(struct lwp *l, void *v, register_t *retval) 538 { 539 struct sys_ioctl_args /* { 540 syscallarg(int) fd; 541 syscallarg(u_long) com; 542 syscallarg(caddr_t) data; 543 } */ *uap = v; 544 struct file *fp; 545 struct proc *p; 546 struct filedesc *fdp; 547 u_long com; 548 int error; 549 u_int size; 550 caddr_t data, memp; 551 #define STK_PARAMS 128 552 u_long stkbuf[STK_PARAMS/sizeof(u_long)]; 553 554 error = 0; 555 p = l->l_proc; 556 fdp = p->p_fd; 557 558 if ((fp = fd_getfile(fdp, SCARG(uap, fd))) == NULL) 559 return (EBADF); 560 561 FILE_USE(fp); 562 563 if ((fp->f_flag & (FREAD | FWRITE)) == 0) { 564 error = EBADF; 565 com = 0; 566 goto out; 567 } 568 569 switch (com = SCARG(uap, com)) { 570 case FIONCLEX: 571 fdp->fd_ofileflags[SCARG(uap, fd)] &= ~UF_EXCLOSE; 572 goto out; 573 574 case FIOCLEX: 575 fdp->fd_ofileflags[SCARG(uap, fd)] |= UF_EXCLOSE; 576 goto out; 577 } 578 579 /* 580 * Interpret high order word to find amount of data to be 581 * copied to/from the user's address space. 582 */ 583 size = IOCPARM_LEN(com); 584 if (size > IOCPARM_MAX) { 585 error = ENOTTY; 586 goto out; 587 } 588 memp = NULL; 589 if (size > sizeof(stkbuf)) { 590 memp = (caddr_t)malloc((u_long)size, M_IOCTLOPS, M_WAITOK); 591 data = memp; 592 } else 593 data = (caddr_t)stkbuf; 594 if (com&IOC_IN) { 595 if (size) { 596 error = copyin(SCARG(uap, data), data, size); 597 if (error) { 598 if (memp) 599 free(memp, M_IOCTLOPS); 600 goto out; 601 } 602 #ifdef KTRACE 603 if (KTRPOINT(p, KTR_GENIO)) { 604 struct iovec iov; 605 iov.iov_base = SCARG(uap, data); 606 iov.iov_len = size; 607 ktrgenio(l, SCARG(uap, fd), UIO_WRITE, &iov, 608 size, 0); 609 } 610 #endif 611 } else 612 *(caddr_t *)data = SCARG(uap, data); 613 } else if ((com&IOC_OUT) && size) 614 /* 615 * Zero the buffer so the user always 616 * gets back something deterministic. 617 */ 618 memset(data, 0, size); 619 else if (com&IOC_VOID) 620 *(caddr_t *)data = SCARG(uap, data); 621 622 switch (com) { 623 624 case FIONBIO: 625 if (*(int *)data != 0) 626 fp->f_flag |= FNONBLOCK; 627 else 628 fp->f_flag &= ~FNONBLOCK; 629 error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, data, l); 630 break; 631 632 case FIOASYNC: 633 if (*(int *)data != 0) 634 fp->f_flag |= FASYNC; 635 else 636 fp->f_flag &= ~FASYNC; 637 error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, data, l); 638 break; 639 640 default: 641 error = (*fp->f_ops->fo_ioctl)(fp, com, data, l); 642 /* 643 * Copy any data to user, size was 644 * already set and checked above. 645 */ 646 if (error == 0 && (com&IOC_OUT) && size) { 647 error = copyout(data, SCARG(uap, data), size); 648 #ifdef KTRACE 649 if (KTRPOINT(p, KTR_GENIO)) { 650 struct iovec iov; 651 iov.iov_base = SCARG(uap, data); 652 iov.iov_len = size; 653 ktrgenio(l, SCARG(uap, fd), UIO_READ, &iov, 654 size, error); 655 } 656 #endif 657 } 658 break; 659 } 660 if (memp) 661 free(memp, M_IOCTLOPS); 662 out: 663 FILE_UNUSE(fp, l); 664 switch (error) { 665 case -1: 666 printf("sys_ioctl: _IO%s%s('%c', %lu, %lu) returned -1: " 667 "pid=%d comm=%s\n", 668 (com & IOC_IN) ? "W" : "", (com & IOC_OUT) ? "R" : "", 669 (char)IOCGROUP(com), (com & 0xff), IOCPARM_LEN(com), 670 p->p_pid, p->p_comm); 671 /* FALLTHROUGH */ 672 case EPASSTHROUGH: 673 error = ENOTTY; 674 /* FALLTHROUGH */ 675 default: 676 return (error); 677 } 678 } 679 680 int selwait, nselcoll; 681 682 /* 683 * Select system call. 684 */ 685 int 686 sys_pselect(struct lwp *l, void *v, register_t *retval) 687 { 688 struct sys_pselect_args /* { 689 syscallarg(int) nd; 690 syscallarg(fd_set *) in; 691 syscallarg(fd_set *) ou; 692 syscallarg(fd_set *) ex; 693 syscallarg(const struct timespec *) ts; 694 syscallarg(sigset_t *) mask; 695 } */ * const uap = v; 696 struct timespec ats; 697 struct timeval atv, *tv = NULL; 698 sigset_t amask, *mask = NULL; 699 int error; 700 701 if (SCARG(uap, ts)) { 702 error = copyin(SCARG(uap, ts), &ats, sizeof(ats)); 703 if (error) 704 return error; 705 atv.tv_sec = ats.tv_sec; 706 atv.tv_usec = ats.tv_nsec / 1000; 707 tv = &atv; 708 } 709 if (SCARG(uap, mask) != NULL) { 710 error = copyin(SCARG(uap, mask), &amask, sizeof(amask)); 711 if (error) 712 return error; 713 mask = &amask; 714 } 715 716 return selcommon(l, retval, SCARG(uap, nd), SCARG(uap, in), 717 SCARG(uap, ou), SCARG(uap, ex), tv, mask); 718 } 719 720 int 721 sys_select(struct lwp *l, void *v, register_t *retval) 722 { 723 struct sys_select_args /* { 724 syscallarg(int) nd; 725 syscallarg(fd_set *) in; 726 syscallarg(fd_set *) ou; 727 syscallarg(fd_set *) ex; 728 syscallarg(struct timeval *) tv; 729 } */ * const uap = v; 730 struct timeval atv, *tv = NULL; 731 int error; 732 733 if (SCARG(uap, tv)) { 734 error = copyin(SCARG(uap, tv), (caddr_t)&atv, 735 sizeof(atv)); 736 if (error) 737 return error; 738 tv = &atv; 739 } 740 741 return selcommon(l, retval, SCARG(uap, nd), SCARG(uap, in), 742 SCARG(uap, ou), SCARG(uap, ex), tv, NULL); 743 } 744 745 int 746 selcommon(struct lwp *l, register_t *retval, int nd, fd_set *u_in, 747 fd_set *u_ou, fd_set *u_ex, struct timeval *tv, sigset_t *mask) 748 { 749 char smallbits[howmany(FD_SETSIZE, NFDBITS) * 750 sizeof(fd_mask) * 6]; 751 struct proc * const p = l->l_proc; 752 caddr_t bits; 753 int s, ncoll, error, timo; 754 size_t ni; 755 sigset_t oldmask; 756 757 error = 0; 758 if (nd < 0) 759 return (EINVAL); 760 if (nd > p->p_fd->fd_nfiles) { 761 /* forgiving; slightly wrong */ 762 nd = p->p_fd->fd_nfiles; 763 } 764 ni = howmany(nd, NFDBITS) * sizeof(fd_mask); 765 if (ni * 6 > sizeof(smallbits)) 766 bits = malloc(ni * 6, M_TEMP, M_WAITOK); 767 else 768 bits = smallbits; 769 770 #define getbits(name, x) \ 771 if (u_ ## name) { \ 772 error = copyin(u_ ## name, bits + ni * x, ni); \ 773 if (error) \ 774 goto done; \ 775 } else \ 776 memset(bits + ni * x, 0, ni); 777 getbits(in, 0); 778 getbits(ou, 1); 779 getbits(ex, 2); 780 #undef getbits 781 782 timo = 0; 783 if (tv && itimerfix(tv)) { 784 error = EINVAL; 785 goto done; 786 } 787 if (mask) 788 (void)sigprocmask1(p, SIG_SETMASK, mask, &oldmask); 789 790 retry: 791 ncoll = nselcoll; 792 l->l_flag |= L_SELECT; 793 error = selscan(l, (fd_mask *)(bits + ni * 0), 794 (fd_mask *)(bits + ni * 3), nd, retval); 795 if (error || *retval) 796 goto done; 797 if (tv) { 798 /* 799 * We have to recalculate the timeout on every retry. 800 */ 801 timo = tvtohz(tv); 802 if (timo <= 0) 803 goto done; 804 } 805 s = splsched(); 806 if ((l->l_flag & L_SELECT) == 0 || nselcoll != ncoll) { 807 splx(s); 808 goto retry; 809 } 810 l->l_flag &= ~L_SELECT; 811 error = tsleep((caddr_t)&selwait, PSOCK | PCATCH, "select", timo); 812 splx(s); 813 if (error == 0) 814 goto retry; 815 done: 816 if (mask) 817 (void)sigprocmask1(p, SIG_SETMASK, &oldmask, NULL); 818 l->l_flag &= ~L_SELECT; 819 /* select is not restarted after signals... */ 820 if (error == ERESTART) 821 error = EINTR; 822 if (error == EWOULDBLOCK) 823 error = 0; 824 if (error == 0) { 825 826 #define putbits(name, x) \ 827 if (u_ ## name) { \ 828 error = copyout(bits + ni * x, u_ ## name, ni); \ 829 if (error) \ 830 goto out; \ 831 } 832 putbits(in, 3); 833 putbits(ou, 4); 834 putbits(ex, 5); 835 #undef putbits 836 } 837 out: 838 if (ni * 6 > sizeof(smallbits)) 839 free(bits, M_TEMP); 840 return (error); 841 } 842 843 int 844 selscan(struct lwp *l, fd_mask *ibitp, fd_mask *obitp, int nfd, 845 register_t *retval) 846 { 847 static const int flag[3] = { POLLRDNORM | POLLHUP | POLLERR, 848 POLLWRNORM | POLLHUP | POLLERR, 849 POLLRDBAND }; 850 struct proc *p = l->l_proc; 851 struct filedesc *fdp; 852 int msk, i, j, fd, n; 853 fd_mask ibits, obits; 854 struct file *fp; 855 856 fdp = p->p_fd; 857 n = 0; 858 for (msk = 0; msk < 3; msk++) { 859 for (i = 0; i < nfd; i += NFDBITS) { 860 ibits = *ibitp++; 861 obits = 0; 862 while ((j = ffs(ibits)) && (fd = i + --j) < nfd) { 863 ibits &= ~(1 << j); 864 if ((fp = fd_getfile(fdp, fd)) == NULL) 865 return (EBADF); 866 FILE_USE(fp); 867 if ((*fp->f_ops->fo_poll)(fp, flag[msk], l)) { 868 obits |= (1 << j); 869 n++; 870 } 871 FILE_UNUSE(fp, l); 872 } 873 *obitp++ = obits; 874 } 875 } 876 *retval = n; 877 return (0); 878 } 879 880 /* 881 * Poll system call. 882 */ 883 int 884 sys_poll(struct lwp *l, void *v, register_t *retval) 885 { 886 struct sys_poll_args /* { 887 syscallarg(struct pollfd *) fds; 888 syscallarg(u_int) nfds; 889 syscallarg(int) timeout; 890 } */ * const uap = v; 891 struct timeval atv, *tv = NULL; 892 893 if (SCARG(uap, timeout) != INFTIM) { 894 atv.tv_sec = SCARG(uap, timeout) / 1000; 895 atv.tv_usec = (SCARG(uap, timeout) % 1000) * 1000; 896 tv = &atv; 897 } 898 899 return pollcommon(l, retval, SCARG(uap, fds), SCARG(uap, nfds), 900 tv, NULL); 901 } 902 903 /* 904 * Poll system call. 905 */ 906 int 907 sys_pollts(struct lwp *l, void *v, register_t *retval) 908 { 909 struct sys_pollts_args /* { 910 syscallarg(struct pollfd *) fds; 911 syscallarg(u_int) nfds; 912 syscallarg(const struct timespec *) ts; 913 syscallarg(const sigset_t *) mask; 914 } */ * const uap = v; 915 struct timespec ats; 916 struct timeval atv, *tv = NULL; 917 sigset_t amask, *mask = NULL; 918 int error; 919 920 if (SCARG(uap, ts)) { 921 error = copyin(SCARG(uap, ts), &ats, sizeof(ats)); 922 if (error) 923 return error; 924 atv.tv_sec = ats.tv_sec; 925 atv.tv_usec = ats.tv_nsec / 1000; 926 tv = &atv; 927 } 928 if (SCARG(uap, mask)) { 929 error = copyin(SCARG(uap, mask), &amask, sizeof(amask)); 930 if (error) 931 return error; 932 mask = &amask; 933 } 934 935 return pollcommon(l, retval, SCARG(uap, fds), SCARG(uap, nfds), 936 tv, mask); 937 } 938 939 int 940 pollcommon(struct lwp *l, register_t *retval, 941 struct pollfd *u_fds, u_int nfds, 942 struct timeval *tv, sigset_t *mask) 943 { 944 char smallbits[32 * sizeof(struct pollfd)]; 945 struct proc * const p = l->l_proc; 946 caddr_t bits; 947 sigset_t oldmask; 948 int s, ncoll, error, timo; 949 size_t ni; 950 951 if (nfds > p->p_fd->fd_nfiles) { 952 /* forgiving; slightly wrong */ 953 nfds = p->p_fd->fd_nfiles; 954 } 955 ni = nfds * sizeof(struct pollfd); 956 if (ni > sizeof(smallbits)) 957 bits = malloc(ni, M_TEMP, M_WAITOK); 958 else 959 bits = smallbits; 960 961 error = copyin(u_fds, bits, ni); 962 if (error) 963 goto done; 964 965 timo = 0; 966 if (tv && itimerfix(tv)) { 967 error = EINVAL; 968 goto done; 969 } 970 if (mask != NULL) 971 (void)sigprocmask1(p, SIG_SETMASK, mask, &oldmask); 972 973 retry: 974 ncoll = nselcoll; 975 l->l_flag |= L_SELECT; 976 error = pollscan(l, (struct pollfd *)bits, nfds, retval); 977 if (error || *retval) 978 goto done; 979 if (tv) { 980 /* 981 * We have to recalculate the timeout on every retry. 982 */ 983 timo = tvtohz(tv); 984 if (timo <= 0) 985 goto done; 986 } 987 s = splsched(); 988 if ((l->l_flag & L_SELECT) == 0 || nselcoll != ncoll) { 989 splx(s); 990 goto retry; 991 } 992 l->l_flag &= ~L_SELECT; 993 error = tsleep((caddr_t)&selwait, PSOCK | PCATCH, "poll", timo); 994 splx(s); 995 if (error == 0) 996 goto retry; 997 done: 998 if (mask != NULL) 999 (void)sigprocmask1(p, SIG_SETMASK, &oldmask, NULL); 1000 l->l_flag &= ~L_SELECT; 1001 /* poll is not restarted after signals... */ 1002 if (error == ERESTART) 1003 error = EINTR; 1004 if (error == EWOULDBLOCK) 1005 error = 0; 1006 if (error == 0) { 1007 error = copyout(bits, u_fds, ni); 1008 if (error) 1009 goto out; 1010 } 1011 out: 1012 if (ni > sizeof(smallbits)) 1013 free(bits, M_TEMP); 1014 return (error); 1015 } 1016 1017 int 1018 pollscan(struct lwp *l, struct pollfd *fds, int nfd, register_t *retval) 1019 { 1020 struct proc *p = l->l_proc; 1021 struct filedesc *fdp; 1022 int i, n; 1023 struct file *fp; 1024 1025 fdp = p->p_fd; 1026 n = 0; 1027 for (i = 0; i < nfd; i++, fds++) { 1028 if (fds->fd >= fdp->fd_nfiles) { 1029 fds->revents = POLLNVAL; 1030 n++; 1031 } else if (fds->fd < 0) { 1032 fds->revents = 0; 1033 } else { 1034 if ((fp = fd_getfile(fdp, fds->fd)) == NULL) { 1035 fds->revents = POLLNVAL; 1036 n++; 1037 } else { 1038 FILE_USE(fp); 1039 fds->revents = (*fp->f_ops->fo_poll)(fp, 1040 fds->events | POLLERR | POLLHUP, l); 1041 if (fds->revents != 0) 1042 n++; 1043 FILE_UNUSE(fp, l); 1044 } 1045 } 1046 } 1047 *retval = n; 1048 return (0); 1049 } 1050 1051 /*ARGSUSED*/ 1052 int 1053 seltrue(dev_t dev, int events, struct lwp *l) 1054 { 1055 1056 return (events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM)); 1057 } 1058 1059 /* 1060 * Record a select request. 1061 */ 1062 void 1063 selrecord(struct lwp *selector, struct selinfo *sip) 1064 { 1065 struct lwp *l; 1066 struct proc *p; 1067 pid_t mypid; 1068 1069 mypid = selector->l_proc->p_pid; 1070 if (sip->sel_pid == mypid) 1071 return; 1072 if (sip->sel_pid && (p = pfind(sip->sel_pid))) { 1073 LIST_FOREACH(l, &p->p_lwps, l_sibling) { 1074 if (l->l_wchan == (caddr_t)&selwait) { 1075 sip->sel_collision = 1; 1076 return; 1077 } 1078 } 1079 } 1080 1081 sip->sel_pid = mypid; 1082 } 1083 1084 /* 1085 * Do a wakeup when a selectable event occurs. 1086 */ 1087 void 1088 selwakeup(sip) 1089 struct selinfo *sip; 1090 { 1091 struct lwp *l; 1092 struct proc *p; 1093 int s; 1094 1095 if (sip->sel_pid == 0) 1096 return; 1097 if (sip->sel_collision) { 1098 sip->sel_pid = 0; 1099 nselcoll++; 1100 sip->sel_collision = 0; 1101 wakeup((caddr_t)&selwait); 1102 return; 1103 } 1104 p = pfind(sip->sel_pid); 1105 sip->sel_pid = 0; 1106 if (p != NULL) { 1107 LIST_FOREACH(l, &p->p_lwps, l_sibling) { 1108 SCHED_LOCK(s); 1109 if (l->l_wchan == (caddr_t)&selwait) { 1110 if (l->l_stat == LSSLEEP) 1111 setrunnable(l); 1112 else 1113 unsleep(l); 1114 } else if (l->l_flag & L_SELECT) 1115 l->l_flag &= ~L_SELECT; 1116 SCHED_UNLOCK(s); 1117 } 1118 } 1119 } 1120