1 /* $NetBSD: sys_generic.c,v 1.84 2005/12/11 12:24:30 christos Exp $ */ 2 3 /* 4 * Copyright (c) 1982, 1986, 1989, 1993 5 * The Regents of the University of California. All rights reserved. 6 * (c) UNIX System Laboratories, Inc. 7 * All or some portions of this file are derived from material licensed 8 * to the University of California by American Telephone and Telegraph 9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 10 * the permission of UNIX System Laboratories, Inc. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 3. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * @(#)sys_generic.c 8.9 (Berkeley) 2/14/95 37 */ 38 39 #include <sys/cdefs.h> 40 __KERNEL_RCSID(0, "$NetBSD: sys_generic.c,v 1.84 2005/12/11 12:24:30 christos Exp $"); 41 42 #include "opt_ktrace.h" 43 44 #include <sys/param.h> 45 #include <sys/systm.h> 46 #include <sys/filedesc.h> 47 #include <sys/ioctl.h> 48 #include <sys/file.h> 49 #include <sys/proc.h> 50 #include <sys/socketvar.h> 51 #include <sys/signalvar.h> 52 #include <sys/uio.h> 53 #include <sys/kernel.h> 54 #include <sys/stat.h> 55 #include <sys/malloc.h> 56 #include <sys/poll.h> 57 #ifdef KTRACE 58 #include <sys/ktrace.h> 59 #endif 60 61 #include <sys/mount.h> 62 #include <sys/sa.h> 63 #include <sys/syscallargs.h> 64 65 int selscan(struct lwp *, fd_mask *, fd_mask *, int, register_t *); 66 int pollscan(struct lwp *, struct pollfd *, int, register_t *); 67 68 69 /* 70 * Read system call. 71 */ 72 /* ARGSUSED */ 73 int 74 sys_read(struct lwp *l, void *v, register_t *retval) 75 { 76 struct sys_read_args /* { 77 syscallarg(int) fd; 78 syscallarg(void *) buf; 79 syscallarg(size_t) nbyte; 80 } */ *uap = v; 81 int fd; 82 struct file *fp; 83 struct proc *p; 84 struct filedesc *fdp; 85 86 fd = SCARG(uap, fd); 87 p = l->l_proc; 88 fdp = p->p_fd; 89 90 if ((fp = fd_getfile(fdp, fd)) == NULL) 91 return (EBADF); 92 93 if ((fp->f_flag & FREAD) == 0) { 94 simple_unlock(&fp->f_slock); 95 return (EBADF); 96 } 97 98 FILE_USE(fp); 99 100 /* dofileread() will unuse the descriptor for us */ 101 return (dofileread(l, fd, fp, SCARG(uap, buf), SCARG(uap, nbyte), 102 &fp->f_offset, FOF_UPDATE_OFFSET, retval)); 103 } 104 105 int 106 dofileread(struct lwp *l, int fd, struct file *fp, void *buf, size_t nbyte, 107 off_t *offset, int flags, register_t *retval) 108 { 109 struct iovec aiov; 110 struct uio auio; 111 struct proc *p; 112 size_t cnt; 113 int error; 114 #ifdef KTRACE 115 struct iovec ktriov = {0}; 116 #endif 117 p = l->l_proc; 118 error = 0; 119 120 aiov.iov_base = (caddr_t)buf; 121 aiov.iov_len = nbyte; 122 auio.uio_iov = &aiov; 123 auio.uio_iovcnt = 1; 124 auio.uio_resid = nbyte; 125 auio.uio_rw = UIO_READ; 126 auio.uio_segflg = UIO_USERSPACE; 127 auio.uio_lwp = l; 128 129 /* 130 * Reads return ssize_t because -1 is returned on error. Therefore 131 * we must restrict the length to SSIZE_MAX to avoid garbage return 132 * values. 133 */ 134 if (auio.uio_resid > SSIZE_MAX) { 135 error = EINVAL; 136 goto out; 137 } 138 139 #ifdef KTRACE 140 /* 141 * if tracing, save a copy of iovec 142 */ 143 if (KTRPOINT(p, KTR_GENIO)) 144 ktriov = aiov; 145 #endif 146 cnt = auio.uio_resid; 147 error = (*fp->f_ops->fo_read)(fp, offset, &auio, fp->f_cred, flags); 148 if (error) 149 if (auio.uio_resid != cnt && (error == ERESTART || 150 error == EINTR || error == EWOULDBLOCK)) 151 error = 0; 152 cnt -= auio.uio_resid; 153 #ifdef KTRACE 154 if (KTRPOINT(p, KTR_GENIO) && error == 0) 155 ktrgenio(l, fd, UIO_READ, &ktriov, cnt, error); 156 #endif 157 *retval = cnt; 158 out: 159 FILE_UNUSE(fp, l); 160 return (error); 161 } 162 163 /* 164 * Scatter read system call. 165 */ 166 int 167 sys_readv(struct lwp *l, void *v, register_t *retval) 168 { 169 struct sys_readv_args /* { 170 syscallarg(int) fd; 171 syscallarg(const struct iovec *) iovp; 172 syscallarg(int) iovcnt; 173 } */ *uap = v; 174 struct filedesc *fdp; 175 struct file *fp; 176 struct proc *p; 177 int fd; 178 179 fd = SCARG(uap, fd); 180 p = l->l_proc; 181 fdp = p->p_fd; 182 183 if ((fp = fd_getfile(fdp, fd)) == NULL) 184 return (EBADF); 185 186 if ((fp->f_flag & FREAD) == 0) { 187 simple_unlock(&fp->f_slock); 188 return (EBADF); 189 } 190 191 FILE_USE(fp); 192 193 /* dofilereadv() will unuse the descriptor for us */ 194 return (dofilereadv(l, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt), 195 &fp->f_offset, FOF_UPDATE_OFFSET, retval)); 196 } 197 198 int 199 dofilereadv(struct lwp *l, int fd, struct file *fp, const struct iovec *iovp, 200 int iovcnt, off_t *offset, int flags, register_t *retval) 201 { 202 struct proc *p; 203 struct uio auio; 204 struct iovec *iov, *needfree, aiov[UIO_SMALLIOV]; 205 int i, error; 206 size_t cnt; 207 u_int iovlen; 208 #ifdef KTRACE 209 struct iovec *ktriov; 210 #endif 211 212 p = l->l_proc; 213 error = 0; 214 #ifdef KTRACE 215 ktriov = NULL; 216 #endif 217 /* note: can't use iovlen until iovcnt is validated */ 218 iovlen = iovcnt * sizeof(struct iovec); 219 if ((u_int)iovcnt > UIO_SMALLIOV) { 220 if ((u_int)iovcnt > IOV_MAX) { 221 error = EINVAL; 222 goto out; 223 } 224 iov = malloc(iovlen, M_IOV, M_WAITOK); 225 needfree = iov; 226 } else if ((u_int)iovcnt > 0) { 227 iov = aiov; 228 needfree = NULL; 229 } else { 230 error = EINVAL; 231 goto out; 232 } 233 234 auio.uio_iov = iov; 235 auio.uio_iovcnt = iovcnt; 236 auio.uio_rw = UIO_READ; 237 auio.uio_segflg = UIO_USERSPACE; 238 auio.uio_lwp = l; 239 error = copyin(iovp, iov, iovlen); 240 if (error) 241 goto done; 242 auio.uio_resid = 0; 243 for (i = 0; i < iovcnt; i++) { 244 auio.uio_resid += iov->iov_len; 245 /* 246 * Reads return ssize_t because -1 is returned on error. 247 * Therefore we must restrict the length to SSIZE_MAX to 248 * avoid garbage return values. 249 */ 250 if (iov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) { 251 error = EINVAL; 252 goto done; 253 } 254 iov++; 255 } 256 #ifdef KTRACE 257 /* 258 * if tracing, save a copy of iovec 259 */ 260 if (KTRPOINT(p, KTR_GENIO)) { 261 ktriov = malloc(iovlen, M_TEMP, M_WAITOK); 262 memcpy((caddr_t)ktriov, (caddr_t)auio.uio_iov, iovlen); 263 } 264 #endif 265 cnt = auio.uio_resid; 266 error = (*fp->f_ops->fo_read)(fp, offset, &auio, fp->f_cred, flags); 267 if (error) 268 if (auio.uio_resid != cnt && (error == ERESTART || 269 error == EINTR || error == EWOULDBLOCK)) 270 error = 0; 271 cnt -= auio.uio_resid; 272 #ifdef KTRACE 273 if (ktriov != NULL) { 274 if (KTRPOINT(p, KTR_GENIO) && (error == 0)) 275 ktrgenio(l, fd, UIO_READ, ktriov, cnt, error); 276 free(ktriov, M_TEMP); 277 } 278 #endif 279 *retval = cnt; 280 done: 281 if (needfree) 282 free(needfree, M_IOV); 283 out: 284 FILE_UNUSE(fp, l); 285 return (error); 286 } 287 288 /* 289 * Write system call 290 */ 291 int 292 sys_write(struct lwp *l, void *v, register_t *retval) 293 { 294 struct sys_write_args /* { 295 syscallarg(int) fd; 296 syscallarg(const void *) buf; 297 syscallarg(size_t) nbyte; 298 } */ *uap = v; 299 int fd; 300 struct file *fp; 301 struct proc *p; 302 struct filedesc *fdp; 303 304 fd = SCARG(uap, fd); 305 p = l->l_proc; 306 fdp = p->p_fd; 307 308 if ((fp = fd_getfile(fdp, fd)) == NULL) 309 return (EBADF); 310 311 if ((fp->f_flag & FWRITE) == 0) { 312 simple_unlock(&fp->f_slock); 313 return (EBADF); 314 } 315 316 FILE_USE(fp); 317 318 /* dofilewrite() will unuse the descriptor for us */ 319 return (dofilewrite(l, fd, fp, SCARG(uap, buf), SCARG(uap, nbyte), 320 &fp->f_offset, FOF_UPDATE_OFFSET, retval)); 321 } 322 323 int 324 dofilewrite(struct lwp *l, int fd, struct file *fp, const void *buf, 325 size_t nbyte, off_t *offset, int flags, register_t *retval) 326 { 327 struct iovec aiov; 328 struct uio auio; 329 struct proc *p; 330 size_t cnt; 331 int error; 332 #ifdef KTRACE 333 struct iovec ktriov = {0}; 334 #endif 335 336 p = l->l_proc; 337 error = 0; 338 aiov.iov_base = __UNCONST(buf); /* XXXUNCONST kills const */ 339 aiov.iov_len = nbyte; 340 auio.uio_iov = &aiov; 341 auio.uio_iovcnt = 1; 342 auio.uio_resid = nbyte; 343 auio.uio_rw = UIO_WRITE; 344 auio.uio_segflg = UIO_USERSPACE; 345 auio.uio_lwp = l; 346 347 /* 348 * Writes return ssize_t because -1 is returned on error. Therefore 349 * we must restrict the length to SSIZE_MAX to avoid garbage return 350 * values. 351 */ 352 if (auio.uio_resid > SSIZE_MAX) { 353 error = EINVAL; 354 goto out; 355 } 356 357 #ifdef KTRACE 358 /* 359 * if tracing, save a copy of iovec 360 */ 361 if (KTRPOINT(p, KTR_GENIO)) 362 ktriov = aiov; 363 #endif 364 cnt = auio.uio_resid; 365 error = (*fp->f_ops->fo_write)(fp, offset, &auio, fp->f_cred, flags); 366 if (error) { 367 if (auio.uio_resid != cnt && (error == ERESTART || 368 error == EINTR || error == EWOULDBLOCK)) 369 error = 0; 370 if (error == EPIPE) 371 psignal(p, SIGPIPE); 372 } 373 cnt -= auio.uio_resid; 374 #ifdef KTRACE 375 if (KTRPOINT(p, KTR_GENIO) && error == 0) 376 ktrgenio(l, fd, UIO_WRITE, &ktriov, cnt, error); 377 #endif 378 *retval = cnt; 379 out: 380 FILE_UNUSE(fp, l); 381 return (error); 382 } 383 384 /* 385 * Gather write system call 386 */ 387 int 388 sys_writev(struct lwp *l, void *v, register_t *retval) 389 { 390 struct sys_writev_args /* { 391 syscallarg(int) fd; 392 syscallarg(const struct iovec *) iovp; 393 syscallarg(int) iovcnt; 394 } */ *uap = v; 395 int fd; 396 struct file *fp; 397 struct proc *p; 398 struct filedesc *fdp; 399 400 fd = SCARG(uap, fd); 401 p = l->l_proc; 402 fdp = p->p_fd; 403 404 if ((fp = fd_getfile(fdp, fd)) == NULL) 405 return (EBADF); 406 407 if ((fp->f_flag & FWRITE) == 0) { 408 simple_unlock(&fp->f_slock); 409 return (EBADF); 410 } 411 412 FILE_USE(fp); 413 414 /* dofilewritev() will unuse the descriptor for us */ 415 return (dofilewritev(l, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt), 416 &fp->f_offset, FOF_UPDATE_OFFSET, retval)); 417 } 418 419 int 420 dofilewritev(struct lwp *l, int fd, struct file *fp, const struct iovec *iovp, 421 int iovcnt, off_t *offset, int flags, register_t *retval) 422 { 423 struct proc *p; 424 struct uio auio; 425 struct iovec *iov, *needfree, aiov[UIO_SMALLIOV]; 426 int i, error; 427 size_t cnt; 428 u_int iovlen; 429 #ifdef KTRACE 430 struct iovec *ktriov; 431 #endif 432 433 p = l->l_proc; 434 error = 0; 435 #ifdef KTRACE 436 ktriov = NULL; 437 #endif 438 /* note: can't use iovlen until iovcnt is validated */ 439 iovlen = iovcnt * sizeof(struct iovec); 440 if ((u_int)iovcnt > UIO_SMALLIOV) { 441 if ((u_int)iovcnt > IOV_MAX) { 442 error = EINVAL; 443 goto out; 444 } 445 iov = malloc(iovlen, M_IOV, M_WAITOK); 446 needfree = iov; 447 } else if ((u_int)iovcnt > 0) { 448 iov = aiov; 449 needfree = NULL; 450 } else { 451 error = EINVAL; 452 goto out; 453 } 454 455 auio.uio_iov = iov; 456 auio.uio_iovcnt = iovcnt; 457 auio.uio_rw = UIO_WRITE; 458 auio.uio_segflg = UIO_USERSPACE; 459 auio.uio_lwp = l; 460 error = copyin(iovp, iov, iovlen); 461 if (error) 462 goto done; 463 auio.uio_resid = 0; 464 for (i = 0; i < iovcnt; i++) { 465 auio.uio_resid += iov->iov_len; 466 /* 467 * Writes return ssize_t because -1 is returned on error. 468 * Therefore we must restrict the length to SSIZE_MAX to 469 * avoid garbage return values. 470 */ 471 if (iov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) { 472 error = EINVAL; 473 goto done; 474 } 475 iov++; 476 } 477 #ifdef KTRACE 478 /* 479 * if tracing, save a copy of iovec 480 */ 481 if (KTRPOINT(p, KTR_GENIO)) { 482 ktriov = malloc(iovlen, M_TEMP, M_WAITOK); 483 memcpy((caddr_t)ktriov, (caddr_t)auio.uio_iov, iovlen); 484 } 485 #endif 486 cnt = auio.uio_resid; 487 error = (*fp->f_ops->fo_write)(fp, offset, &auio, fp->f_cred, flags); 488 if (error) { 489 if (auio.uio_resid != cnt && (error == ERESTART || 490 error == EINTR || error == EWOULDBLOCK)) 491 error = 0; 492 if (error == EPIPE) 493 psignal(p, SIGPIPE); 494 } 495 cnt -= auio.uio_resid; 496 #ifdef KTRACE 497 if (ktriov != NULL) { 498 if (KTRPOINT(p, KTR_GENIO) && (error == 0)) 499 ktrgenio(l, fd, UIO_WRITE, ktriov, cnt, error); 500 free(ktriov, M_TEMP); 501 } 502 #endif 503 *retval = cnt; 504 done: 505 if (needfree) 506 free(needfree, M_IOV); 507 out: 508 FILE_UNUSE(fp, l); 509 return (error); 510 } 511 512 /* 513 * Ioctl system call 514 */ 515 /* ARGSUSED */ 516 int 517 sys_ioctl(struct lwp *l, void *v, register_t *retval) 518 { 519 struct sys_ioctl_args /* { 520 syscallarg(int) fd; 521 syscallarg(u_long) com; 522 syscallarg(caddr_t) data; 523 } */ *uap = v; 524 struct file *fp; 525 struct proc *p; 526 struct filedesc *fdp; 527 u_long com; 528 int error; 529 u_int size; 530 caddr_t data, memp; 531 #define STK_PARAMS 128 532 u_long stkbuf[STK_PARAMS/sizeof(u_long)]; 533 534 error = 0; 535 p = l->l_proc; 536 fdp = p->p_fd; 537 538 if ((fp = fd_getfile(fdp, SCARG(uap, fd))) == NULL) 539 return (EBADF); 540 541 FILE_USE(fp); 542 543 if ((fp->f_flag & (FREAD | FWRITE)) == 0) { 544 error = EBADF; 545 com = 0; 546 goto out; 547 } 548 549 switch (com = SCARG(uap, com)) { 550 case FIONCLEX: 551 fdp->fd_ofileflags[SCARG(uap, fd)] &= ~UF_EXCLOSE; 552 goto out; 553 554 case FIOCLEX: 555 fdp->fd_ofileflags[SCARG(uap, fd)] |= UF_EXCLOSE; 556 goto out; 557 } 558 559 /* 560 * Interpret high order word to find amount of data to be 561 * copied to/from the user's address space. 562 */ 563 size = IOCPARM_LEN(com); 564 if (size > IOCPARM_MAX) { 565 error = ENOTTY; 566 goto out; 567 } 568 memp = NULL; 569 if (size > sizeof(stkbuf)) { 570 memp = (caddr_t)malloc((u_long)size, M_IOCTLOPS, M_WAITOK); 571 data = memp; 572 } else 573 data = (caddr_t)stkbuf; 574 if (com&IOC_IN) { 575 if (size) { 576 error = copyin(SCARG(uap, data), data, size); 577 if (error) { 578 if (memp) 579 free(memp, M_IOCTLOPS); 580 goto out; 581 } 582 #ifdef KTRACE 583 if (KTRPOINT(p, KTR_GENIO)) { 584 struct iovec iov; 585 iov.iov_base = SCARG(uap, data); 586 iov.iov_len = size; 587 ktrgenio(l, SCARG(uap, fd), UIO_WRITE, &iov, 588 size, 0); 589 } 590 #endif 591 } else 592 *(caddr_t *)data = SCARG(uap, data); 593 } else if ((com&IOC_OUT) && size) 594 /* 595 * Zero the buffer so the user always 596 * gets back something deterministic. 597 */ 598 memset(data, 0, size); 599 else if (com&IOC_VOID) 600 *(caddr_t *)data = SCARG(uap, data); 601 602 switch (com) { 603 604 case FIONBIO: 605 if (*(int *)data != 0) 606 fp->f_flag |= FNONBLOCK; 607 else 608 fp->f_flag &= ~FNONBLOCK; 609 error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, data, l); 610 break; 611 612 case FIOASYNC: 613 if (*(int *)data != 0) 614 fp->f_flag |= FASYNC; 615 else 616 fp->f_flag &= ~FASYNC; 617 error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, data, l); 618 break; 619 620 default: 621 error = (*fp->f_ops->fo_ioctl)(fp, com, data, l); 622 /* 623 * Copy any data to user, size was 624 * already set and checked above. 625 */ 626 if (error == 0 && (com&IOC_OUT) && size) { 627 error = copyout(data, SCARG(uap, data), size); 628 #ifdef KTRACE 629 if (KTRPOINT(p, KTR_GENIO)) { 630 struct iovec iov; 631 iov.iov_base = SCARG(uap, data); 632 iov.iov_len = size; 633 ktrgenio(l, SCARG(uap, fd), UIO_READ, &iov, 634 size, error); 635 } 636 #endif 637 } 638 break; 639 } 640 if (memp) 641 free(memp, M_IOCTLOPS); 642 out: 643 FILE_UNUSE(fp, l); 644 switch (error) { 645 case -1: 646 printf("sys_ioctl: _IO%s%s('%c', %lu, %lu) returned -1: " 647 "pid=%d comm=%s\n", 648 (com & IOC_IN) ? "W" : "", (com & IOC_OUT) ? "R" : "", 649 (char)IOCGROUP(com), (com & 0xff), IOCPARM_LEN(com), 650 p->p_pid, p->p_comm); 651 /* FALLTHROUGH */ 652 case EPASSTHROUGH: 653 error = ENOTTY; 654 /* FALLTHROUGH */ 655 default: 656 return (error); 657 } 658 } 659 660 int selwait, nselcoll; 661 662 /* 663 * Select system call. 664 */ 665 int 666 sys_pselect(struct lwp *l, void *v, register_t *retval) 667 { 668 struct sys_pselect_args /* { 669 syscallarg(int) nd; 670 syscallarg(fd_set *) in; 671 syscallarg(fd_set *) ou; 672 syscallarg(fd_set *) ex; 673 syscallarg(const struct timespec *) ts; 674 syscallarg(sigset_t *) mask; 675 } */ * const uap = v; 676 struct timespec ats; 677 struct timeval atv, *tv = NULL; 678 sigset_t amask, *mask = NULL; 679 int error; 680 681 if (SCARG(uap, ts)) { 682 error = copyin(SCARG(uap, ts), &ats, sizeof(ats)); 683 if (error) 684 return error; 685 atv.tv_sec = ats.tv_sec; 686 atv.tv_usec = ats.tv_nsec / 1000; 687 tv = &atv; 688 } 689 if (SCARG(uap, mask) != NULL) { 690 error = copyin(SCARG(uap, mask), &amask, sizeof(amask)); 691 if (error) 692 return error; 693 mask = &amask; 694 } 695 696 return selcommon(l, retval, SCARG(uap, nd), SCARG(uap, in), 697 SCARG(uap, ou), SCARG(uap, ex), tv, mask); 698 } 699 700 int 701 sys_select(struct lwp *l, void *v, register_t *retval) 702 { 703 struct sys_select_args /* { 704 syscallarg(int) nd; 705 syscallarg(fd_set *) in; 706 syscallarg(fd_set *) ou; 707 syscallarg(fd_set *) ex; 708 syscallarg(struct timeval *) tv; 709 } */ * const uap = v; 710 struct timeval atv, *tv = NULL; 711 int error; 712 713 if (SCARG(uap, tv)) { 714 error = copyin(SCARG(uap, tv), (caddr_t)&atv, 715 sizeof(atv)); 716 if (error) 717 return error; 718 tv = &atv; 719 } 720 721 return selcommon(l, retval, SCARG(uap, nd), SCARG(uap, in), 722 SCARG(uap, ou), SCARG(uap, ex), tv, NULL); 723 } 724 725 int 726 selcommon(struct lwp *l, register_t *retval, int nd, fd_set *u_in, 727 fd_set *u_ou, fd_set *u_ex, struct timeval *tv, sigset_t *mask) 728 { 729 struct proc * const p = l->l_proc; 730 caddr_t bits; 731 char smallbits[howmany(FD_SETSIZE, NFDBITS) * 732 sizeof(fd_mask) * 6]; 733 int s, ncoll, error, timo; 734 size_t ni; 735 sigset_t oldmask; 736 737 error = 0; 738 if (nd < 0) 739 return (EINVAL); 740 if (nd > p->p_fd->fd_nfiles) { 741 /* forgiving; slightly wrong */ 742 nd = p->p_fd->fd_nfiles; 743 } 744 ni = howmany(nd, NFDBITS) * sizeof(fd_mask); 745 if (ni * 6 > sizeof(smallbits)) 746 bits = malloc(ni * 6, M_TEMP, M_WAITOK); 747 else 748 bits = smallbits; 749 750 #define getbits(name, x) \ 751 if (u_ ## name) { \ 752 error = copyin(u_ ## name, bits + ni * x, ni); \ 753 if (error) \ 754 goto done; \ 755 } else \ 756 memset(bits + ni * x, 0, ni); 757 getbits(in, 0); 758 getbits(ou, 1); 759 getbits(ex, 2); 760 #undef getbits 761 762 timo = 0; 763 if (tv) { 764 if (itimerfix(tv)) { 765 error = EINVAL; 766 goto done; 767 } 768 s = splclock(); 769 timeradd(tv, &time, tv); 770 splx(s); 771 } 772 if (mask) 773 (void)sigprocmask1(p, SIG_SETMASK, mask, &oldmask); 774 775 retry: 776 ncoll = nselcoll; 777 l->l_flag |= L_SELECT; 778 error = selscan(l, (fd_mask *)(bits + ni * 0), 779 (fd_mask *)(bits + ni * 3), nd, retval); 780 if (error || *retval) 781 goto done; 782 if (tv) { 783 /* 784 * We have to recalculate the timeout on every retry. 785 */ 786 timo = hzto(tv); 787 if (timo <= 0) 788 goto done; 789 } 790 s = splsched(); 791 if ((l->l_flag & L_SELECT) == 0 || nselcoll != ncoll) { 792 splx(s); 793 goto retry; 794 } 795 l->l_flag &= ~L_SELECT; 796 error = tsleep((caddr_t)&selwait, PSOCK | PCATCH, "select", timo); 797 splx(s); 798 if (error == 0) 799 goto retry; 800 done: 801 if (mask) 802 (void)sigprocmask1(p, SIG_SETMASK, &oldmask, NULL); 803 l->l_flag &= ~L_SELECT; 804 /* select is not restarted after signals... */ 805 if (error == ERESTART) 806 error = EINTR; 807 if (error == EWOULDBLOCK) 808 error = 0; 809 if (error == 0) { 810 811 #define putbits(name, x) \ 812 if (u_ ## name) { \ 813 error = copyout(bits + ni * x, u_ ## name, ni); \ 814 if (error) \ 815 goto out; \ 816 } 817 putbits(in, 3); 818 putbits(ou, 4); 819 putbits(ex, 5); 820 #undef putbits 821 } 822 out: 823 if (ni * 6 > sizeof(smallbits)) 824 free(bits, M_TEMP); 825 return (error); 826 } 827 828 int 829 selscan(struct lwp *l, fd_mask *ibitp, fd_mask *obitp, int nfd, 830 register_t *retval) 831 { 832 static const int flag[3] = { POLLRDNORM | POLLHUP | POLLERR, 833 POLLWRNORM | POLLHUP | POLLERR, 834 POLLRDBAND }; 835 struct proc *p = l->l_proc; 836 struct filedesc *fdp; 837 int msk, i, j, fd, n; 838 fd_mask ibits, obits; 839 struct file *fp; 840 841 fdp = p->p_fd; 842 n = 0; 843 for (msk = 0; msk < 3; msk++) { 844 for (i = 0; i < nfd; i += NFDBITS) { 845 ibits = *ibitp++; 846 obits = 0; 847 while ((j = ffs(ibits)) && (fd = i + --j) < nfd) { 848 ibits &= ~(1 << j); 849 if ((fp = fd_getfile(fdp, fd)) == NULL) 850 return (EBADF); 851 FILE_USE(fp); 852 if ((*fp->f_ops->fo_poll)(fp, flag[msk], l)) { 853 obits |= (1 << j); 854 n++; 855 } 856 FILE_UNUSE(fp, l); 857 } 858 *obitp++ = obits; 859 } 860 } 861 *retval = n; 862 return (0); 863 } 864 865 /* 866 * Poll system call. 867 */ 868 int 869 sys_poll(struct lwp *l, void *v, register_t *retval) 870 { 871 struct sys_poll_args /* { 872 syscallarg(struct pollfd *) fds; 873 syscallarg(u_int) nfds; 874 syscallarg(int) timeout; 875 } */ * const uap = v; 876 struct timeval atv, *tv = NULL; 877 878 if (SCARG(uap, timeout) != INFTIM) { 879 atv.tv_sec = SCARG(uap, timeout) / 1000; 880 atv.tv_usec = (SCARG(uap, timeout) % 1000) * 1000; 881 tv = &atv; 882 } 883 884 return pollcommon(l, retval, SCARG(uap, fds), SCARG(uap, nfds), 885 tv, NULL); 886 } 887 888 /* 889 * Poll system call. 890 */ 891 int 892 sys_pollts(struct lwp *l, void *v, register_t *retval) 893 { 894 struct sys_pollts_args /* { 895 syscallarg(struct pollfd *) fds; 896 syscallarg(u_int) nfds; 897 syscallarg(const struct timespec *) ts; 898 syscallarg(const sigset_t *) mask; 899 } */ * const uap = v; 900 struct timespec ats; 901 struct timeval atv, *tv = NULL; 902 sigset_t amask, *mask = NULL; 903 int error; 904 905 if (SCARG(uap, ts)) { 906 error = copyin(SCARG(uap, ts), &ats, sizeof(ats)); 907 if (error) 908 return error; 909 atv.tv_sec = ats.tv_sec; 910 atv.tv_usec = ats.tv_nsec / 1000; 911 tv = &atv; 912 } 913 if (SCARG(uap, mask)) { 914 error = copyin(SCARG(uap, mask), &amask, sizeof(amask)); 915 if (error) 916 return error; 917 mask = &amask; 918 } 919 920 return pollcommon(l, retval, SCARG(uap, fds), SCARG(uap, nfds), 921 tv, mask); 922 } 923 924 int 925 pollcommon(struct lwp *l, register_t *retval, 926 struct pollfd *u_fds, u_int nfds, 927 struct timeval *tv, sigset_t *mask) 928 { 929 struct proc * const p = l->l_proc; 930 caddr_t bits; 931 char smallbits[32 * sizeof(struct pollfd)]; 932 sigset_t oldmask; 933 int s, ncoll, error, timo; 934 size_t ni; 935 936 if (nfds > p->p_fd->fd_nfiles) { 937 /* forgiving; slightly wrong */ 938 nfds = p->p_fd->fd_nfiles; 939 } 940 ni = nfds * sizeof(struct pollfd); 941 if (ni > sizeof(smallbits)) 942 bits = malloc(ni, M_TEMP, M_WAITOK); 943 else 944 bits = smallbits; 945 946 error = copyin(u_fds, bits, ni); 947 if (error) 948 goto done; 949 950 timo = 0; 951 if (tv) { 952 if (itimerfix(tv)) { 953 error = EINVAL; 954 goto done; 955 } 956 s = splclock(); 957 timeradd(tv, &time, tv); 958 splx(s); 959 } 960 if (mask != NULL) 961 (void)sigprocmask1(p, SIG_SETMASK, mask, &oldmask); 962 963 retry: 964 ncoll = nselcoll; 965 l->l_flag |= L_SELECT; 966 error = pollscan(l, (struct pollfd *)bits, nfds, retval); 967 if (error || *retval) 968 goto done; 969 if (tv) { 970 /* 971 * We have to recalculate the timeout on every retry. 972 */ 973 timo = hzto(tv); 974 if (timo <= 0) 975 goto done; 976 } 977 s = splsched(); 978 if ((l->l_flag & L_SELECT) == 0 || nselcoll != ncoll) { 979 splx(s); 980 goto retry; 981 } 982 l->l_flag &= ~L_SELECT; 983 error = tsleep((caddr_t)&selwait, PSOCK | PCATCH, "poll", timo); 984 splx(s); 985 if (error == 0) 986 goto retry; 987 done: 988 if (mask != NULL) 989 (void)sigprocmask1(p, SIG_SETMASK, &oldmask, NULL); 990 l->l_flag &= ~L_SELECT; 991 /* poll is not restarted after signals... */ 992 if (error == ERESTART) 993 error = EINTR; 994 if (error == EWOULDBLOCK) 995 error = 0; 996 if (error == 0) { 997 error = copyout(bits, u_fds, ni); 998 if (error) 999 goto out; 1000 } 1001 out: 1002 if (ni > sizeof(smallbits)) 1003 free(bits, M_TEMP); 1004 return (error); 1005 } 1006 1007 int 1008 pollscan(struct lwp *l, struct pollfd *fds, int nfd, register_t *retval) 1009 { 1010 struct proc *p = l->l_proc; 1011 struct filedesc *fdp; 1012 int i, n; 1013 struct file *fp; 1014 1015 fdp = p->p_fd; 1016 n = 0; 1017 for (i = 0; i < nfd; i++, fds++) { 1018 if (fds->fd >= fdp->fd_nfiles) { 1019 fds->revents = POLLNVAL; 1020 n++; 1021 } else if (fds->fd < 0) { 1022 fds->revents = 0; 1023 } else { 1024 if ((fp = fd_getfile(fdp, fds->fd)) == NULL) { 1025 fds->revents = POLLNVAL; 1026 n++; 1027 } else { 1028 FILE_USE(fp); 1029 fds->revents = (*fp->f_ops->fo_poll)(fp, 1030 fds->events | POLLERR | POLLHUP, l); 1031 if (fds->revents != 0) 1032 n++; 1033 FILE_UNUSE(fp, l); 1034 } 1035 } 1036 } 1037 *retval = n; 1038 return (0); 1039 } 1040 1041 /*ARGSUSED*/ 1042 int 1043 seltrue(dev_t dev, int events, struct lwp *l) 1044 { 1045 1046 return (events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM)); 1047 } 1048 1049 /* 1050 * Record a select request. 1051 */ 1052 void 1053 selrecord(struct lwp *selector, struct selinfo *sip) 1054 { 1055 struct lwp *l; 1056 struct proc *p; 1057 pid_t mypid; 1058 1059 mypid = selector->l_proc->p_pid; 1060 if (sip->sel_pid == mypid) 1061 return; 1062 if (sip->sel_pid && (p = pfind(sip->sel_pid))) { 1063 LIST_FOREACH(l, &p->p_lwps, l_sibling) { 1064 if (l->l_wchan == (caddr_t)&selwait) { 1065 sip->sel_collision = 1; 1066 return; 1067 } 1068 } 1069 } 1070 1071 sip->sel_pid = mypid; 1072 } 1073 1074 /* 1075 * Do a wakeup when a selectable event occurs. 1076 */ 1077 void 1078 selwakeup(sip) 1079 struct selinfo *sip; 1080 { 1081 struct lwp *l; 1082 struct proc *p; 1083 int s; 1084 1085 if (sip->sel_pid == 0) 1086 return; 1087 if (sip->sel_collision) { 1088 sip->sel_pid = 0; 1089 nselcoll++; 1090 sip->sel_collision = 0; 1091 wakeup((caddr_t)&selwait); 1092 return; 1093 } 1094 p = pfind(sip->sel_pid); 1095 sip->sel_pid = 0; 1096 if (p != NULL) { 1097 LIST_FOREACH(l, &p->p_lwps, l_sibling) { 1098 SCHED_LOCK(s); 1099 if (l->l_wchan == (caddr_t)&selwait) { 1100 if (l->l_stat == LSSLEEP) 1101 setrunnable(l); 1102 else 1103 unsleep(l); 1104 } else if (l->l_flag & L_SELECT) 1105 l->l_flag &= ~L_SELECT; 1106 SCHED_UNLOCK(s); 1107 } 1108 } 1109 } 1110