1 /* $NetBSD: sys_generic.c,v 1.85 2006/03/01 12:38:21 yamt Exp $ */ 2 3 /* 4 * Copyright (c) 1982, 1986, 1989, 1993 5 * The Regents of the University of California. All rights reserved. 6 * (c) UNIX System Laboratories, Inc. 7 * All or some portions of this file are derived from material licensed 8 * to the University of California by American Telephone and Telegraph 9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 10 * the permission of UNIX System Laboratories, Inc. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 3. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * @(#)sys_generic.c 8.9 (Berkeley) 2/14/95 37 */ 38 39 #include <sys/cdefs.h> 40 __KERNEL_RCSID(0, "$NetBSD: sys_generic.c,v 1.85 2006/03/01 12:38:21 yamt Exp $"); 41 42 #include "opt_ktrace.h" 43 44 #include <sys/param.h> 45 #include <sys/systm.h> 46 #include <sys/filedesc.h> 47 #include <sys/ioctl.h> 48 #include <sys/file.h> 49 #include <sys/proc.h> 50 #include <sys/socketvar.h> 51 #include <sys/signalvar.h> 52 #include <sys/uio.h> 53 #include <sys/kernel.h> 54 #include <sys/stat.h> 55 #include <sys/malloc.h> 56 #include <sys/poll.h> 57 #ifdef KTRACE 58 #include <sys/ktrace.h> 59 #endif 60 61 #include <sys/mount.h> 62 #include <sys/sa.h> 63 #include <sys/syscallargs.h> 64 65 #include <uvm/uvm_extern.h> 66 67 int selscan(struct lwp *, fd_mask *, fd_mask *, int, register_t *); 68 int pollscan(struct lwp *, struct pollfd *, int, register_t *); 69 70 71 /* 72 * Read system call. 73 */ 74 /* ARGSUSED */ 75 int 76 sys_read(struct lwp *l, void *v, register_t *retval) 77 { 78 struct sys_read_args /* { 79 syscallarg(int) fd; 80 syscallarg(void *) buf; 81 syscallarg(size_t) nbyte; 82 } */ *uap = v; 83 int fd; 84 struct file *fp; 85 struct proc *p; 86 struct filedesc *fdp; 87 88 fd = SCARG(uap, fd); 89 p = l->l_proc; 90 fdp = p->p_fd; 91 92 if ((fp = fd_getfile(fdp, fd)) == NULL) 93 return (EBADF); 94 95 if ((fp->f_flag & FREAD) == 0) { 96 simple_unlock(&fp->f_slock); 97 return (EBADF); 98 } 99 100 FILE_USE(fp); 101 102 /* dofileread() will unuse the descriptor for us */ 103 return (dofileread(l, fd, fp, SCARG(uap, buf), SCARG(uap, nbyte), 104 &fp->f_offset, FOF_UPDATE_OFFSET, retval)); 105 } 106 107 int 108 dofileread(struct lwp *l, int fd, struct file *fp, void *buf, size_t nbyte, 109 off_t *offset, int flags, register_t *retval) 110 { 111 struct iovec aiov; 112 struct uio auio; 113 struct proc *p; 114 struct vmspace *vm; 115 size_t cnt; 116 int error; 117 #ifdef KTRACE 118 struct iovec ktriov = {0}; 119 #endif 120 p = l->l_proc; 121 122 error = proc_vmspace_getref(p, &vm); 123 if (error) { 124 goto out; 125 } 126 127 aiov.iov_base = (caddr_t)buf; 128 aiov.iov_len = nbyte; 129 auio.uio_iov = &aiov; 130 auio.uio_iovcnt = 1; 131 auio.uio_resid = nbyte; 132 auio.uio_rw = UIO_READ; 133 auio.uio_vmspace = vm; 134 135 /* 136 * Reads return ssize_t because -1 is returned on error. Therefore 137 * we must restrict the length to SSIZE_MAX to avoid garbage return 138 * values. 139 */ 140 if (auio.uio_resid > SSIZE_MAX) { 141 error = EINVAL; 142 goto out; 143 } 144 145 #ifdef KTRACE 146 /* 147 * if tracing, save a copy of iovec 148 */ 149 if (KTRPOINT(p, KTR_GENIO)) 150 ktriov = aiov; 151 #endif 152 cnt = auio.uio_resid; 153 error = (*fp->f_ops->fo_read)(fp, offset, &auio, fp->f_cred, flags); 154 if (error) 155 if (auio.uio_resid != cnt && (error == ERESTART || 156 error == EINTR || error == EWOULDBLOCK)) 157 error = 0; 158 cnt -= auio.uio_resid; 159 #ifdef KTRACE 160 if (KTRPOINT(p, KTR_GENIO) && error == 0) 161 ktrgenio(l, fd, UIO_READ, &ktriov, cnt, error); 162 #endif 163 *retval = cnt; 164 out: 165 FILE_UNUSE(fp, l); 166 uvmspace_free(vm); 167 return (error); 168 } 169 170 /* 171 * Scatter read system call. 172 */ 173 int 174 sys_readv(struct lwp *l, void *v, register_t *retval) 175 { 176 struct sys_readv_args /* { 177 syscallarg(int) fd; 178 syscallarg(const struct iovec *) iovp; 179 syscallarg(int) iovcnt; 180 } */ *uap = v; 181 struct filedesc *fdp; 182 struct file *fp; 183 struct proc *p; 184 int fd; 185 186 fd = SCARG(uap, fd); 187 p = l->l_proc; 188 fdp = p->p_fd; 189 190 if ((fp = fd_getfile(fdp, fd)) == NULL) 191 return (EBADF); 192 193 if ((fp->f_flag & FREAD) == 0) { 194 simple_unlock(&fp->f_slock); 195 return (EBADF); 196 } 197 198 FILE_USE(fp); 199 200 /* dofilereadv() will unuse the descriptor for us */ 201 return (dofilereadv(l, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt), 202 &fp->f_offset, FOF_UPDATE_OFFSET, retval)); 203 } 204 205 int 206 dofilereadv(struct lwp *l, int fd, struct file *fp, const struct iovec *iovp, 207 int iovcnt, off_t *offset, int flags, register_t *retval) 208 { 209 struct proc *p; 210 struct uio auio; 211 struct iovec *iov, *needfree, aiov[UIO_SMALLIOV]; 212 struct vmspace *vm; 213 int i, error; 214 size_t cnt; 215 u_int iovlen; 216 #ifdef KTRACE 217 struct iovec *ktriov; 218 #endif 219 220 p = l->l_proc; 221 error = proc_vmspace_getref(p, &vm); 222 if (error) { 223 goto out; 224 } 225 226 #ifdef KTRACE 227 ktriov = NULL; 228 #endif 229 /* note: can't use iovlen until iovcnt is validated */ 230 iovlen = iovcnt * sizeof(struct iovec); 231 if ((u_int)iovcnt > UIO_SMALLIOV) { 232 if ((u_int)iovcnt > IOV_MAX) { 233 error = EINVAL; 234 goto out; 235 } 236 iov = malloc(iovlen, M_IOV, M_WAITOK); 237 needfree = iov; 238 } else if ((u_int)iovcnt > 0) { 239 iov = aiov; 240 needfree = NULL; 241 } else { 242 error = EINVAL; 243 goto out; 244 } 245 246 auio.uio_iov = iov; 247 auio.uio_iovcnt = iovcnt; 248 auio.uio_rw = UIO_READ; 249 auio.uio_vmspace = vm; 250 error = copyin(iovp, iov, iovlen); 251 if (error) 252 goto done; 253 auio.uio_resid = 0; 254 for (i = 0; i < iovcnt; i++) { 255 auio.uio_resid += iov->iov_len; 256 /* 257 * Reads return ssize_t because -1 is returned on error. 258 * Therefore we must restrict the length to SSIZE_MAX to 259 * avoid garbage return values. 260 */ 261 if (iov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) { 262 error = EINVAL; 263 goto done; 264 } 265 iov++; 266 } 267 #ifdef KTRACE 268 /* 269 * if tracing, save a copy of iovec 270 */ 271 if (KTRPOINT(p, KTR_GENIO)) { 272 ktriov = malloc(iovlen, M_TEMP, M_WAITOK); 273 memcpy((caddr_t)ktriov, (caddr_t)auio.uio_iov, iovlen); 274 } 275 #endif 276 cnt = auio.uio_resid; 277 error = (*fp->f_ops->fo_read)(fp, offset, &auio, fp->f_cred, flags); 278 if (error) 279 if (auio.uio_resid != cnt && (error == ERESTART || 280 error == EINTR || error == EWOULDBLOCK)) 281 error = 0; 282 cnt -= auio.uio_resid; 283 #ifdef KTRACE 284 if (ktriov != NULL) { 285 if (KTRPOINT(p, KTR_GENIO) && (error == 0)) 286 ktrgenio(l, fd, UIO_READ, ktriov, cnt, error); 287 free(ktriov, M_TEMP); 288 } 289 #endif 290 *retval = cnt; 291 done: 292 if (needfree) 293 free(needfree, M_IOV); 294 out: 295 FILE_UNUSE(fp, l); 296 uvmspace_free(vm); 297 return (error); 298 } 299 300 /* 301 * Write system call 302 */ 303 int 304 sys_write(struct lwp *l, void *v, register_t *retval) 305 { 306 struct sys_write_args /* { 307 syscallarg(int) fd; 308 syscallarg(const void *) buf; 309 syscallarg(size_t) nbyte; 310 } */ *uap = v; 311 int fd; 312 struct file *fp; 313 struct proc *p; 314 struct filedesc *fdp; 315 316 fd = SCARG(uap, fd); 317 p = l->l_proc; 318 fdp = p->p_fd; 319 320 if ((fp = fd_getfile(fdp, fd)) == NULL) 321 return (EBADF); 322 323 if ((fp->f_flag & FWRITE) == 0) { 324 simple_unlock(&fp->f_slock); 325 return (EBADF); 326 } 327 328 FILE_USE(fp); 329 330 /* dofilewrite() will unuse the descriptor for us */ 331 return (dofilewrite(l, fd, fp, SCARG(uap, buf), SCARG(uap, nbyte), 332 &fp->f_offset, FOF_UPDATE_OFFSET, retval)); 333 } 334 335 int 336 dofilewrite(struct lwp *l, int fd, struct file *fp, const void *buf, 337 size_t nbyte, off_t *offset, int flags, register_t *retval) 338 { 339 struct iovec aiov; 340 struct uio auio; 341 struct proc *p; 342 struct vmspace *vm; 343 size_t cnt; 344 int error; 345 #ifdef KTRACE 346 struct iovec ktriov = {0}; 347 #endif 348 349 p = l->l_proc; 350 error = proc_vmspace_getref(p, &vm); 351 if (error) { 352 goto out; 353 } 354 aiov.iov_base = __UNCONST(buf); /* XXXUNCONST kills const */ 355 aiov.iov_len = nbyte; 356 auio.uio_iov = &aiov; 357 auio.uio_iovcnt = 1; 358 auio.uio_resid = nbyte; 359 auio.uio_rw = UIO_WRITE; 360 auio.uio_vmspace = vm; 361 362 /* 363 * Writes return ssize_t because -1 is returned on error. Therefore 364 * we must restrict the length to SSIZE_MAX to avoid garbage return 365 * values. 366 */ 367 if (auio.uio_resid > SSIZE_MAX) { 368 error = EINVAL; 369 goto out; 370 } 371 372 #ifdef KTRACE 373 /* 374 * if tracing, save a copy of iovec 375 */ 376 if (KTRPOINT(p, KTR_GENIO)) 377 ktriov = aiov; 378 #endif 379 cnt = auio.uio_resid; 380 error = (*fp->f_ops->fo_write)(fp, offset, &auio, fp->f_cred, flags); 381 if (error) { 382 if (auio.uio_resid != cnt && (error == ERESTART || 383 error == EINTR || error == EWOULDBLOCK)) 384 error = 0; 385 if (error == EPIPE) 386 psignal(p, SIGPIPE); 387 } 388 cnt -= auio.uio_resid; 389 #ifdef KTRACE 390 if (KTRPOINT(p, KTR_GENIO) && error == 0) 391 ktrgenio(l, fd, UIO_WRITE, &ktriov, cnt, error); 392 #endif 393 *retval = cnt; 394 out: 395 FILE_UNUSE(fp, l); 396 uvmspace_free(vm); 397 return (error); 398 } 399 400 /* 401 * Gather write system call 402 */ 403 int 404 sys_writev(struct lwp *l, void *v, register_t *retval) 405 { 406 struct sys_writev_args /* { 407 syscallarg(int) fd; 408 syscallarg(const struct iovec *) iovp; 409 syscallarg(int) iovcnt; 410 } */ *uap = v; 411 int fd; 412 struct file *fp; 413 struct proc *p; 414 struct filedesc *fdp; 415 416 fd = SCARG(uap, fd); 417 p = l->l_proc; 418 fdp = p->p_fd; 419 420 if ((fp = fd_getfile(fdp, fd)) == NULL) 421 return (EBADF); 422 423 if ((fp->f_flag & FWRITE) == 0) { 424 simple_unlock(&fp->f_slock); 425 return (EBADF); 426 } 427 428 FILE_USE(fp); 429 430 /* dofilewritev() will unuse the descriptor for us */ 431 return (dofilewritev(l, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt), 432 &fp->f_offset, FOF_UPDATE_OFFSET, retval)); 433 } 434 435 int 436 dofilewritev(struct lwp *l, int fd, struct file *fp, const struct iovec *iovp, 437 int iovcnt, off_t *offset, int flags, register_t *retval) 438 { 439 struct proc *p; 440 struct uio auio; 441 struct iovec *iov, *needfree, aiov[UIO_SMALLIOV]; 442 struct vmspace *vm; 443 int i, error; 444 size_t cnt; 445 u_int iovlen; 446 #ifdef KTRACE 447 struct iovec *ktriov; 448 #endif 449 450 p = l->l_proc; 451 error = proc_vmspace_getref(p, &vm); 452 if (error) { 453 goto out; 454 } 455 #ifdef KTRACE 456 ktriov = NULL; 457 #endif 458 /* note: can't use iovlen until iovcnt is validated */ 459 iovlen = iovcnt * sizeof(struct iovec); 460 if ((u_int)iovcnt > UIO_SMALLIOV) { 461 if ((u_int)iovcnt > IOV_MAX) { 462 error = EINVAL; 463 goto out; 464 } 465 iov = malloc(iovlen, M_IOV, M_WAITOK); 466 needfree = iov; 467 } else if ((u_int)iovcnt > 0) { 468 iov = aiov; 469 needfree = NULL; 470 } else { 471 error = EINVAL; 472 goto out; 473 } 474 475 auio.uio_iov = iov; 476 auio.uio_iovcnt = iovcnt; 477 auio.uio_rw = UIO_WRITE; 478 auio.uio_vmspace = vm; 479 error = copyin(iovp, iov, iovlen); 480 if (error) 481 goto done; 482 auio.uio_resid = 0; 483 for (i = 0; i < iovcnt; i++) { 484 auio.uio_resid += iov->iov_len; 485 /* 486 * Writes return ssize_t because -1 is returned on error. 487 * Therefore we must restrict the length to SSIZE_MAX to 488 * avoid garbage return values. 489 */ 490 if (iov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) { 491 error = EINVAL; 492 goto done; 493 } 494 iov++; 495 } 496 #ifdef KTRACE 497 /* 498 * if tracing, save a copy of iovec 499 */ 500 if (KTRPOINT(p, KTR_GENIO)) { 501 ktriov = malloc(iovlen, M_TEMP, M_WAITOK); 502 memcpy((caddr_t)ktriov, (caddr_t)auio.uio_iov, iovlen); 503 } 504 #endif 505 cnt = auio.uio_resid; 506 error = (*fp->f_ops->fo_write)(fp, offset, &auio, fp->f_cred, flags); 507 if (error) { 508 if (auio.uio_resid != cnt && (error == ERESTART || 509 error == EINTR || error == EWOULDBLOCK)) 510 error = 0; 511 if (error == EPIPE) 512 psignal(p, SIGPIPE); 513 } 514 cnt -= auio.uio_resid; 515 #ifdef KTRACE 516 if (ktriov != NULL) { 517 if (KTRPOINT(p, KTR_GENIO) && (error == 0)) 518 ktrgenio(l, fd, UIO_WRITE, ktriov, cnt, error); 519 free(ktriov, M_TEMP); 520 } 521 #endif 522 *retval = cnt; 523 done: 524 if (needfree) 525 free(needfree, M_IOV); 526 out: 527 FILE_UNUSE(fp, l); 528 uvmspace_free(vm); 529 return (error); 530 } 531 532 /* 533 * Ioctl system call 534 */ 535 /* ARGSUSED */ 536 int 537 sys_ioctl(struct lwp *l, void *v, register_t *retval) 538 { 539 struct sys_ioctl_args /* { 540 syscallarg(int) fd; 541 syscallarg(u_long) com; 542 syscallarg(caddr_t) data; 543 } */ *uap = v; 544 struct file *fp; 545 struct proc *p; 546 struct filedesc *fdp; 547 u_long com; 548 int error; 549 u_int size; 550 caddr_t data, memp; 551 #define STK_PARAMS 128 552 u_long stkbuf[STK_PARAMS/sizeof(u_long)]; 553 554 error = 0; 555 p = l->l_proc; 556 fdp = p->p_fd; 557 558 if ((fp = fd_getfile(fdp, SCARG(uap, fd))) == NULL) 559 return (EBADF); 560 561 FILE_USE(fp); 562 563 if ((fp->f_flag & (FREAD | FWRITE)) == 0) { 564 error = EBADF; 565 com = 0; 566 goto out; 567 } 568 569 switch (com = SCARG(uap, com)) { 570 case FIONCLEX: 571 fdp->fd_ofileflags[SCARG(uap, fd)] &= ~UF_EXCLOSE; 572 goto out; 573 574 case FIOCLEX: 575 fdp->fd_ofileflags[SCARG(uap, fd)] |= UF_EXCLOSE; 576 goto out; 577 } 578 579 /* 580 * Interpret high order word to find amount of data to be 581 * copied to/from the user's address space. 582 */ 583 size = IOCPARM_LEN(com); 584 if (size > IOCPARM_MAX) { 585 error = ENOTTY; 586 goto out; 587 } 588 memp = NULL; 589 if (size > sizeof(stkbuf)) { 590 memp = (caddr_t)malloc((u_long)size, M_IOCTLOPS, M_WAITOK); 591 data = memp; 592 } else 593 data = (caddr_t)stkbuf; 594 if (com&IOC_IN) { 595 if (size) { 596 error = copyin(SCARG(uap, data), data, size); 597 if (error) { 598 if (memp) 599 free(memp, M_IOCTLOPS); 600 goto out; 601 } 602 #ifdef KTRACE 603 if (KTRPOINT(p, KTR_GENIO)) { 604 struct iovec iov; 605 iov.iov_base = SCARG(uap, data); 606 iov.iov_len = size; 607 ktrgenio(l, SCARG(uap, fd), UIO_WRITE, &iov, 608 size, 0); 609 } 610 #endif 611 } else 612 *(caddr_t *)data = SCARG(uap, data); 613 } else if ((com&IOC_OUT) && size) 614 /* 615 * Zero the buffer so the user always 616 * gets back something deterministic. 617 */ 618 memset(data, 0, size); 619 else if (com&IOC_VOID) 620 *(caddr_t *)data = SCARG(uap, data); 621 622 switch (com) { 623 624 case FIONBIO: 625 if (*(int *)data != 0) 626 fp->f_flag |= FNONBLOCK; 627 else 628 fp->f_flag &= ~FNONBLOCK; 629 error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, data, l); 630 break; 631 632 case FIOASYNC: 633 if (*(int *)data != 0) 634 fp->f_flag |= FASYNC; 635 else 636 fp->f_flag &= ~FASYNC; 637 error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, data, l); 638 break; 639 640 default: 641 error = (*fp->f_ops->fo_ioctl)(fp, com, data, l); 642 /* 643 * Copy any data to user, size was 644 * already set and checked above. 645 */ 646 if (error == 0 && (com&IOC_OUT) && size) { 647 error = copyout(data, SCARG(uap, data), size); 648 #ifdef KTRACE 649 if (KTRPOINT(p, KTR_GENIO)) { 650 struct iovec iov; 651 iov.iov_base = SCARG(uap, data); 652 iov.iov_len = size; 653 ktrgenio(l, SCARG(uap, fd), UIO_READ, &iov, 654 size, error); 655 } 656 #endif 657 } 658 break; 659 } 660 if (memp) 661 free(memp, M_IOCTLOPS); 662 out: 663 FILE_UNUSE(fp, l); 664 switch (error) { 665 case -1: 666 printf("sys_ioctl: _IO%s%s('%c', %lu, %lu) returned -1: " 667 "pid=%d comm=%s\n", 668 (com & IOC_IN) ? "W" : "", (com & IOC_OUT) ? "R" : "", 669 (char)IOCGROUP(com), (com & 0xff), IOCPARM_LEN(com), 670 p->p_pid, p->p_comm); 671 /* FALLTHROUGH */ 672 case EPASSTHROUGH: 673 error = ENOTTY; 674 /* FALLTHROUGH */ 675 default: 676 return (error); 677 } 678 } 679 680 int selwait, nselcoll; 681 682 /* 683 * Select system call. 684 */ 685 int 686 sys_pselect(struct lwp *l, void *v, register_t *retval) 687 { 688 struct sys_pselect_args /* { 689 syscallarg(int) nd; 690 syscallarg(fd_set *) in; 691 syscallarg(fd_set *) ou; 692 syscallarg(fd_set *) ex; 693 syscallarg(const struct timespec *) ts; 694 syscallarg(sigset_t *) mask; 695 } */ * const uap = v; 696 struct timespec ats; 697 struct timeval atv, *tv = NULL; 698 sigset_t amask, *mask = NULL; 699 int error; 700 701 if (SCARG(uap, ts)) { 702 error = copyin(SCARG(uap, ts), &ats, sizeof(ats)); 703 if (error) 704 return error; 705 atv.tv_sec = ats.tv_sec; 706 atv.tv_usec = ats.tv_nsec / 1000; 707 tv = &atv; 708 } 709 if (SCARG(uap, mask) != NULL) { 710 error = copyin(SCARG(uap, mask), &amask, sizeof(amask)); 711 if (error) 712 return error; 713 mask = &amask; 714 } 715 716 return selcommon(l, retval, SCARG(uap, nd), SCARG(uap, in), 717 SCARG(uap, ou), SCARG(uap, ex), tv, mask); 718 } 719 720 int 721 sys_select(struct lwp *l, void *v, register_t *retval) 722 { 723 struct sys_select_args /* { 724 syscallarg(int) nd; 725 syscallarg(fd_set *) in; 726 syscallarg(fd_set *) ou; 727 syscallarg(fd_set *) ex; 728 syscallarg(struct timeval *) tv; 729 } */ * const uap = v; 730 struct timeval atv, *tv = NULL; 731 int error; 732 733 if (SCARG(uap, tv)) { 734 error = copyin(SCARG(uap, tv), (caddr_t)&atv, 735 sizeof(atv)); 736 if (error) 737 return error; 738 tv = &atv; 739 } 740 741 return selcommon(l, retval, SCARG(uap, nd), SCARG(uap, in), 742 SCARG(uap, ou), SCARG(uap, ex), tv, NULL); 743 } 744 745 int 746 selcommon(struct lwp *l, register_t *retval, int nd, fd_set *u_in, 747 fd_set *u_ou, fd_set *u_ex, struct timeval *tv, sigset_t *mask) 748 { 749 struct proc * const p = l->l_proc; 750 caddr_t bits; 751 char smallbits[howmany(FD_SETSIZE, NFDBITS) * 752 sizeof(fd_mask) * 6]; 753 int s, ncoll, error, timo; 754 size_t ni; 755 sigset_t oldmask; 756 757 error = 0; 758 if (nd < 0) 759 return (EINVAL); 760 if (nd > p->p_fd->fd_nfiles) { 761 /* forgiving; slightly wrong */ 762 nd = p->p_fd->fd_nfiles; 763 } 764 ni = howmany(nd, NFDBITS) * sizeof(fd_mask); 765 if (ni * 6 > sizeof(smallbits)) 766 bits = malloc(ni * 6, M_TEMP, M_WAITOK); 767 else 768 bits = smallbits; 769 770 #define getbits(name, x) \ 771 if (u_ ## name) { \ 772 error = copyin(u_ ## name, bits + ni * x, ni); \ 773 if (error) \ 774 goto done; \ 775 } else \ 776 memset(bits + ni * x, 0, ni); 777 getbits(in, 0); 778 getbits(ou, 1); 779 getbits(ex, 2); 780 #undef getbits 781 782 timo = 0; 783 if (tv) { 784 if (itimerfix(tv)) { 785 error = EINVAL; 786 goto done; 787 } 788 s = splclock(); 789 timeradd(tv, &time, tv); 790 splx(s); 791 } 792 if (mask) 793 (void)sigprocmask1(p, SIG_SETMASK, mask, &oldmask); 794 795 retry: 796 ncoll = nselcoll; 797 l->l_flag |= L_SELECT; 798 error = selscan(l, (fd_mask *)(bits + ni * 0), 799 (fd_mask *)(bits + ni * 3), nd, retval); 800 if (error || *retval) 801 goto done; 802 if (tv) { 803 /* 804 * We have to recalculate the timeout on every retry. 805 */ 806 timo = hzto(tv); 807 if (timo <= 0) 808 goto done; 809 } 810 s = splsched(); 811 if ((l->l_flag & L_SELECT) == 0 || nselcoll != ncoll) { 812 splx(s); 813 goto retry; 814 } 815 l->l_flag &= ~L_SELECT; 816 error = tsleep((caddr_t)&selwait, PSOCK | PCATCH, "select", timo); 817 splx(s); 818 if (error == 0) 819 goto retry; 820 done: 821 if (mask) 822 (void)sigprocmask1(p, SIG_SETMASK, &oldmask, NULL); 823 l->l_flag &= ~L_SELECT; 824 /* select is not restarted after signals... */ 825 if (error == ERESTART) 826 error = EINTR; 827 if (error == EWOULDBLOCK) 828 error = 0; 829 if (error == 0) { 830 831 #define putbits(name, x) \ 832 if (u_ ## name) { \ 833 error = copyout(bits + ni * x, u_ ## name, ni); \ 834 if (error) \ 835 goto out; \ 836 } 837 putbits(in, 3); 838 putbits(ou, 4); 839 putbits(ex, 5); 840 #undef putbits 841 } 842 out: 843 if (ni * 6 > sizeof(smallbits)) 844 free(bits, M_TEMP); 845 return (error); 846 } 847 848 int 849 selscan(struct lwp *l, fd_mask *ibitp, fd_mask *obitp, int nfd, 850 register_t *retval) 851 { 852 static const int flag[3] = { POLLRDNORM | POLLHUP | POLLERR, 853 POLLWRNORM | POLLHUP | POLLERR, 854 POLLRDBAND }; 855 struct proc *p = l->l_proc; 856 struct filedesc *fdp; 857 int msk, i, j, fd, n; 858 fd_mask ibits, obits; 859 struct file *fp; 860 861 fdp = p->p_fd; 862 n = 0; 863 for (msk = 0; msk < 3; msk++) { 864 for (i = 0; i < nfd; i += NFDBITS) { 865 ibits = *ibitp++; 866 obits = 0; 867 while ((j = ffs(ibits)) && (fd = i + --j) < nfd) { 868 ibits &= ~(1 << j); 869 if ((fp = fd_getfile(fdp, fd)) == NULL) 870 return (EBADF); 871 FILE_USE(fp); 872 if ((*fp->f_ops->fo_poll)(fp, flag[msk], l)) { 873 obits |= (1 << j); 874 n++; 875 } 876 FILE_UNUSE(fp, l); 877 } 878 *obitp++ = obits; 879 } 880 } 881 *retval = n; 882 return (0); 883 } 884 885 /* 886 * Poll system call. 887 */ 888 int 889 sys_poll(struct lwp *l, void *v, register_t *retval) 890 { 891 struct sys_poll_args /* { 892 syscallarg(struct pollfd *) fds; 893 syscallarg(u_int) nfds; 894 syscallarg(int) timeout; 895 } */ * const uap = v; 896 struct timeval atv, *tv = NULL; 897 898 if (SCARG(uap, timeout) != INFTIM) { 899 atv.tv_sec = SCARG(uap, timeout) / 1000; 900 atv.tv_usec = (SCARG(uap, timeout) % 1000) * 1000; 901 tv = &atv; 902 } 903 904 return pollcommon(l, retval, SCARG(uap, fds), SCARG(uap, nfds), 905 tv, NULL); 906 } 907 908 /* 909 * Poll system call. 910 */ 911 int 912 sys_pollts(struct lwp *l, void *v, register_t *retval) 913 { 914 struct sys_pollts_args /* { 915 syscallarg(struct pollfd *) fds; 916 syscallarg(u_int) nfds; 917 syscallarg(const struct timespec *) ts; 918 syscallarg(const sigset_t *) mask; 919 } */ * const uap = v; 920 struct timespec ats; 921 struct timeval atv, *tv = NULL; 922 sigset_t amask, *mask = NULL; 923 int error; 924 925 if (SCARG(uap, ts)) { 926 error = copyin(SCARG(uap, ts), &ats, sizeof(ats)); 927 if (error) 928 return error; 929 atv.tv_sec = ats.tv_sec; 930 atv.tv_usec = ats.tv_nsec / 1000; 931 tv = &atv; 932 } 933 if (SCARG(uap, mask)) { 934 error = copyin(SCARG(uap, mask), &amask, sizeof(amask)); 935 if (error) 936 return error; 937 mask = &amask; 938 } 939 940 return pollcommon(l, retval, SCARG(uap, fds), SCARG(uap, nfds), 941 tv, mask); 942 } 943 944 int 945 pollcommon(struct lwp *l, register_t *retval, 946 struct pollfd *u_fds, u_int nfds, 947 struct timeval *tv, sigset_t *mask) 948 { 949 struct proc * const p = l->l_proc; 950 caddr_t bits; 951 char smallbits[32 * sizeof(struct pollfd)]; 952 sigset_t oldmask; 953 int s, ncoll, error, timo; 954 size_t ni; 955 956 if (nfds > p->p_fd->fd_nfiles) { 957 /* forgiving; slightly wrong */ 958 nfds = p->p_fd->fd_nfiles; 959 } 960 ni = nfds * sizeof(struct pollfd); 961 if (ni > sizeof(smallbits)) 962 bits = malloc(ni, M_TEMP, M_WAITOK); 963 else 964 bits = smallbits; 965 966 error = copyin(u_fds, bits, ni); 967 if (error) 968 goto done; 969 970 timo = 0; 971 if (tv) { 972 if (itimerfix(tv)) { 973 error = EINVAL; 974 goto done; 975 } 976 s = splclock(); 977 timeradd(tv, &time, tv); 978 splx(s); 979 } 980 if (mask != NULL) 981 (void)sigprocmask1(p, SIG_SETMASK, mask, &oldmask); 982 983 retry: 984 ncoll = nselcoll; 985 l->l_flag |= L_SELECT; 986 error = pollscan(l, (struct pollfd *)bits, nfds, retval); 987 if (error || *retval) 988 goto done; 989 if (tv) { 990 /* 991 * We have to recalculate the timeout on every retry. 992 */ 993 timo = hzto(tv); 994 if (timo <= 0) 995 goto done; 996 } 997 s = splsched(); 998 if ((l->l_flag & L_SELECT) == 0 || nselcoll != ncoll) { 999 splx(s); 1000 goto retry; 1001 } 1002 l->l_flag &= ~L_SELECT; 1003 error = tsleep((caddr_t)&selwait, PSOCK | PCATCH, "poll", timo); 1004 splx(s); 1005 if (error == 0) 1006 goto retry; 1007 done: 1008 if (mask != NULL) 1009 (void)sigprocmask1(p, SIG_SETMASK, &oldmask, NULL); 1010 l->l_flag &= ~L_SELECT; 1011 /* poll is not restarted after signals... */ 1012 if (error == ERESTART) 1013 error = EINTR; 1014 if (error == EWOULDBLOCK) 1015 error = 0; 1016 if (error == 0) { 1017 error = copyout(bits, u_fds, ni); 1018 if (error) 1019 goto out; 1020 } 1021 out: 1022 if (ni > sizeof(smallbits)) 1023 free(bits, M_TEMP); 1024 return (error); 1025 } 1026 1027 int 1028 pollscan(struct lwp *l, struct pollfd *fds, int nfd, register_t *retval) 1029 { 1030 struct proc *p = l->l_proc; 1031 struct filedesc *fdp; 1032 int i, n; 1033 struct file *fp; 1034 1035 fdp = p->p_fd; 1036 n = 0; 1037 for (i = 0; i < nfd; i++, fds++) { 1038 if (fds->fd >= fdp->fd_nfiles) { 1039 fds->revents = POLLNVAL; 1040 n++; 1041 } else if (fds->fd < 0) { 1042 fds->revents = 0; 1043 } else { 1044 if ((fp = fd_getfile(fdp, fds->fd)) == NULL) { 1045 fds->revents = POLLNVAL; 1046 n++; 1047 } else { 1048 FILE_USE(fp); 1049 fds->revents = (*fp->f_ops->fo_poll)(fp, 1050 fds->events | POLLERR | POLLHUP, l); 1051 if (fds->revents != 0) 1052 n++; 1053 FILE_UNUSE(fp, l); 1054 } 1055 } 1056 } 1057 *retval = n; 1058 return (0); 1059 } 1060 1061 /*ARGSUSED*/ 1062 int 1063 seltrue(dev_t dev, int events, struct lwp *l) 1064 { 1065 1066 return (events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM)); 1067 } 1068 1069 /* 1070 * Record a select request. 1071 */ 1072 void 1073 selrecord(struct lwp *selector, struct selinfo *sip) 1074 { 1075 struct lwp *l; 1076 struct proc *p; 1077 pid_t mypid; 1078 1079 mypid = selector->l_proc->p_pid; 1080 if (sip->sel_pid == mypid) 1081 return; 1082 if (sip->sel_pid && (p = pfind(sip->sel_pid))) { 1083 LIST_FOREACH(l, &p->p_lwps, l_sibling) { 1084 if (l->l_wchan == (caddr_t)&selwait) { 1085 sip->sel_collision = 1; 1086 return; 1087 } 1088 } 1089 } 1090 1091 sip->sel_pid = mypid; 1092 } 1093 1094 /* 1095 * Do a wakeup when a selectable event occurs. 1096 */ 1097 void 1098 selwakeup(sip) 1099 struct selinfo *sip; 1100 { 1101 struct lwp *l; 1102 struct proc *p; 1103 int s; 1104 1105 if (sip->sel_pid == 0) 1106 return; 1107 if (sip->sel_collision) { 1108 sip->sel_pid = 0; 1109 nselcoll++; 1110 sip->sel_collision = 0; 1111 wakeup((caddr_t)&selwait); 1112 return; 1113 } 1114 p = pfind(sip->sel_pid); 1115 sip->sel_pid = 0; 1116 if (p != NULL) { 1117 LIST_FOREACH(l, &p->p_lwps, l_sibling) { 1118 SCHED_LOCK(s); 1119 if (l->l_wchan == (caddr_t)&selwait) { 1120 if (l->l_stat == LSSLEEP) 1121 setrunnable(l); 1122 else 1123 unsleep(l); 1124 } else if (l->l_flag & L_SELECT) 1125 l->l_flag &= ~L_SELECT; 1126 SCHED_UNLOCK(s); 1127 } 1128 } 1129 } 1130