1 /* $NetBSD: sys_generic.c,v 1.83 2005/05/29 22:24:15 christos Exp $ */ 2 3 /* 4 * Copyright (c) 1982, 1986, 1989, 1993 5 * The Regents of the University of California. All rights reserved. 6 * (c) UNIX System Laboratories, Inc. 7 * All or some portions of this file are derived from material licensed 8 * to the University of California by American Telephone and Telegraph 9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 10 * the permission of UNIX System Laboratories, Inc. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 3. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * @(#)sys_generic.c 8.9 (Berkeley) 2/14/95 37 */ 38 39 #include <sys/cdefs.h> 40 __KERNEL_RCSID(0, "$NetBSD: sys_generic.c,v 1.83 2005/05/29 22:24:15 christos Exp $"); 41 42 #include "opt_ktrace.h" 43 44 #include <sys/param.h> 45 #include <sys/systm.h> 46 #include <sys/filedesc.h> 47 #include <sys/ioctl.h> 48 #include <sys/file.h> 49 #include <sys/proc.h> 50 #include <sys/socketvar.h> 51 #include <sys/signalvar.h> 52 #include <sys/uio.h> 53 #include <sys/kernel.h> 54 #include <sys/stat.h> 55 #include <sys/malloc.h> 56 #include <sys/poll.h> 57 #ifdef KTRACE 58 #include <sys/ktrace.h> 59 #endif 60 61 #include <sys/mount.h> 62 #include <sys/sa.h> 63 #include <sys/syscallargs.h> 64 65 int selscan(struct proc *, fd_mask *, fd_mask *, int, register_t *); 66 int pollscan(struct proc *, struct pollfd *, int, register_t *); 67 68 69 /* 70 * Read system call. 71 */ 72 /* ARGSUSED */ 73 int 74 sys_read(struct lwp *l, void *v, register_t *retval) 75 { 76 struct sys_read_args /* { 77 syscallarg(int) fd; 78 syscallarg(void *) buf; 79 syscallarg(size_t) nbyte; 80 } */ *uap = v; 81 int fd; 82 struct file *fp; 83 struct proc *p; 84 struct filedesc *fdp; 85 86 fd = SCARG(uap, fd); 87 p = l->l_proc; 88 fdp = p->p_fd; 89 90 if ((fp = fd_getfile(fdp, fd)) == NULL) 91 return (EBADF); 92 93 if ((fp->f_flag & FREAD) == 0) { 94 simple_unlock(&fp->f_slock); 95 return (EBADF); 96 } 97 98 FILE_USE(fp); 99 100 /* dofileread() will unuse the descriptor for us */ 101 return (dofileread(p, fd, fp, SCARG(uap, buf), SCARG(uap, nbyte), 102 &fp->f_offset, FOF_UPDATE_OFFSET, retval)); 103 } 104 105 int 106 dofileread(struct proc *p, int fd, struct file *fp, void *buf, size_t nbyte, 107 off_t *offset, int flags, register_t *retval) 108 { 109 struct uio auio; 110 struct iovec aiov; 111 size_t cnt; 112 int error; 113 #ifdef KTRACE 114 struct iovec ktriov = {0}; 115 #endif 116 error = 0; 117 118 aiov.iov_base = (caddr_t)buf; 119 aiov.iov_len = nbyte; 120 auio.uio_iov = &aiov; 121 auio.uio_iovcnt = 1; 122 auio.uio_resid = nbyte; 123 auio.uio_rw = UIO_READ; 124 auio.uio_segflg = UIO_USERSPACE; 125 auio.uio_procp = p; 126 127 /* 128 * Reads return ssize_t because -1 is returned on error. Therefore 129 * we must restrict the length to SSIZE_MAX to avoid garbage return 130 * values. 131 */ 132 if (auio.uio_resid > SSIZE_MAX) { 133 error = EINVAL; 134 goto out; 135 } 136 137 #ifdef KTRACE 138 /* 139 * if tracing, save a copy of iovec 140 */ 141 if (KTRPOINT(p, KTR_GENIO)) 142 ktriov = aiov; 143 #endif 144 cnt = auio.uio_resid; 145 error = (*fp->f_ops->fo_read)(fp, offset, &auio, fp->f_cred, flags); 146 if (error) 147 if (auio.uio_resid != cnt && (error == ERESTART || 148 error == EINTR || error == EWOULDBLOCK)) 149 error = 0; 150 cnt -= auio.uio_resid; 151 #ifdef KTRACE 152 if (KTRPOINT(p, KTR_GENIO) && error == 0) 153 ktrgenio(p, fd, UIO_READ, &ktriov, cnt, error); 154 #endif 155 *retval = cnt; 156 out: 157 FILE_UNUSE(fp, p); 158 return (error); 159 } 160 161 /* 162 * Scatter read system call. 163 */ 164 int 165 sys_readv(struct lwp *l, void *v, register_t *retval) 166 { 167 struct sys_readv_args /* { 168 syscallarg(int) fd; 169 syscallarg(const struct iovec *) iovp; 170 syscallarg(int) iovcnt; 171 } */ *uap = v; 172 int fd; 173 struct file *fp; 174 struct proc *p; 175 struct filedesc *fdp; 176 177 fd = SCARG(uap, fd); 178 p = l->l_proc; 179 fdp = p->p_fd; 180 181 if ((fp = fd_getfile(fdp, fd)) == NULL) 182 return (EBADF); 183 184 if ((fp->f_flag & FREAD) == 0) { 185 simple_unlock(&fp->f_slock); 186 return (EBADF); 187 } 188 189 FILE_USE(fp); 190 191 /* dofilereadv() will unuse the descriptor for us */ 192 return (dofilereadv(p, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt), 193 &fp->f_offset, FOF_UPDATE_OFFSET, retval)); 194 } 195 196 int 197 dofilereadv(struct proc *p, int fd, struct file *fp, const struct iovec *iovp, 198 int iovcnt, off_t *offset, int flags, register_t *retval) 199 { 200 struct uio auio; 201 struct iovec *iov, *needfree, aiov[UIO_SMALLIOV]; 202 int i, error; 203 size_t cnt; 204 u_int iovlen; 205 #ifdef KTRACE 206 struct iovec *ktriov; 207 #endif 208 209 error = 0; 210 #ifdef KTRACE 211 ktriov = NULL; 212 #endif 213 /* note: can't use iovlen until iovcnt is validated */ 214 iovlen = iovcnt * sizeof(struct iovec); 215 if ((u_int)iovcnt > UIO_SMALLIOV) { 216 if ((u_int)iovcnt > IOV_MAX) { 217 error = EINVAL; 218 goto out; 219 } 220 iov = malloc(iovlen, M_IOV, M_WAITOK); 221 needfree = iov; 222 } else if ((u_int)iovcnt > 0) { 223 iov = aiov; 224 needfree = NULL; 225 } else { 226 error = EINVAL; 227 goto out; 228 } 229 230 auio.uio_iov = iov; 231 auio.uio_iovcnt = iovcnt; 232 auio.uio_rw = UIO_READ; 233 auio.uio_segflg = UIO_USERSPACE; 234 auio.uio_procp = p; 235 error = copyin(iovp, iov, iovlen); 236 if (error) 237 goto done; 238 auio.uio_resid = 0; 239 for (i = 0; i < iovcnt; i++) { 240 auio.uio_resid += iov->iov_len; 241 /* 242 * Reads return ssize_t because -1 is returned on error. 243 * Therefore we must restrict the length to SSIZE_MAX to 244 * avoid garbage return values. 245 */ 246 if (iov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) { 247 error = EINVAL; 248 goto done; 249 } 250 iov++; 251 } 252 #ifdef KTRACE 253 /* 254 * if tracing, save a copy of iovec 255 */ 256 if (KTRPOINT(p, KTR_GENIO)) { 257 ktriov = malloc(iovlen, M_TEMP, M_WAITOK); 258 memcpy((caddr_t)ktriov, (caddr_t)auio.uio_iov, iovlen); 259 } 260 #endif 261 cnt = auio.uio_resid; 262 error = (*fp->f_ops->fo_read)(fp, offset, &auio, fp->f_cred, flags); 263 if (error) 264 if (auio.uio_resid != cnt && (error == ERESTART || 265 error == EINTR || error == EWOULDBLOCK)) 266 error = 0; 267 cnt -= auio.uio_resid; 268 #ifdef KTRACE 269 if (ktriov != NULL) { 270 if (KTRPOINT(p, KTR_GENIO) && (error == 0)) 271 ktrgenio(p, fd, UIO_READ, ktriov, cnt, error); 272 free(ktriov, M_TEMP); 273 } 274 #endif 275 *retval = cnt; 276 done: 277 if (needfree) 278 free(needfree, M_IOV); 279 out: 280 FILE_UNUSE(fp, p); 281 return (error); 282 } 283 284 /* 285 * Write system call 286 */ 287 int 288 sys_write(struct lwp *l, void *v, register_t *retval) 289 { 290 struct sys_write_args /* { 291 syscallarg(int) fd; 292 syscallarg(const void *) buf; 293 syscallarg(size_t) nbyte; 294 } */ *uap = v; 295 int fd; 296 struct file *fp; 297 struct proc *p; 298 struct filedesc *fdp; 299 300 fd = SCARG(uap, fd); 301 p = l->l_proc; 302 fdp = p->p_fd; 303 304 if ((fp = fd_getfile(fdp, fd)) == NULL) 305 return (EBADF); 306 307 if ((fp->f_flag & FWRITE) == 0) { 308 simple_unlock(&fp->f_slock); 309 return (EBADF); 310 } 311 312 FILE_USE(fp); 313 314 /* dofilewrite() will unuse the descriptor for us */ 315 return (dofilewrite(p, fd, fp, SCARG(uap, buf), SCARG(uap, nbyte), 316 &fp->f_offset, FOF_UPDATE_OFFSET, retval)); 317 } 318 319 int 320 dofilewrite(struct proc *p, int fd, struct file *fp, const void *buf, 321 size_t nbyte, off_t *offset, int flags, register_t *retval) 322 { 323 struct uio auio; 324 struct iovec aiov; 325 size_t cnt; 326 int error; 327 #ifdef KTRACE 328 struct iovec ktriov = {0}; 329 #endif 330 331 error = 0; 332 aiov.iov_base = __UNCONST(buf); /* XXXUNCONST kills const */ 333 aiov.iov_len = nbyte; 334 auio.uio_iov = &aiov; 335 auio.uio_iovcnt = 1; 336 auio.uio_resid = nbyte; 337 auio.uio_rw = UIO_WRITE; 338 auio.uio_segflg = UIO_USERSPACE; 339 auio.uio_procp = p; 340 341 /* 342 * Writes return ssize_t because -1 is returned on error. Therefore 343 * we must restrict the length to SSIZE_MAX to avoid garbage return 344 * values. 345 */ 346 if (auio.uio_resid > SSIZE_MAX) { 347 error = EINVAL; 348 goto out; 349 } 350 351 #ifdef KTRACE 352 /* 353 * if tracing, save a copy of iovec 354 */ 355 if (KTRPOINT(p, KTR_GENIO)) 356 ktriov = aiov; 357 #endif 358 cnt = auio.uio_resid; 359 error = (*fp->f_ops->fo_write)(fp, offset, &auio, fp->f_cred, flags); 360 if (error) { 361 if (auio.uio_resid != cnt && (error == ERESTART || 362 error == EINTR || error == EWOULDBLOCK)) 363 error = 0; 364 if (error == EPIPE) 365 psignal(p, SIGPIPE); 366 } 367 cnt -= auio.uio_resid; 368 #ifdef KTRACE 369 if (KTRPOINT(p, KTR_GENIO) && error == 0) 370 ktrgenio(p, fd, UIO_WRITE, &ktriov, cnt, error); 371 #endif 372 *retval = cnt; 373 out: 374 FILE_UNUSE(fp, p); 375 return (error); 376 } 377 378 /* 379 * Gather write system call 380 */ 381 int 382 sys_writev(struct lwp *l, void *v, register_t *retval) 383 { 384 struct sys_writev_args /* { 385 syscallarg(int) fd; 386 syscallarg(const struct iovec *) iovp; 387 syscallarg(int) iovcnt; 388 } */ *uap = v; 389 int fd; 390 struct file *fp; 391 struct proc *p; 392 struct filedesc *fdp; 393 394 fd = SCARG(uap, fd); 395 p = l->l_proc; 396 fdp = p->p_fd; 397 398 if ((fp = fd_getfile(fdp, fd)) == NULL) 399 return (EBADF); 400 401 if ((fp->f_flag & FWRITE) == 0) { 402 simple_unlock(&fp->f_slock); 403 return (EBADF); 404 } 405 406 FILE_USE(fp); 407 408 /* dofilewritev() will unuse the descriptor for us */ 409 return (dofilewritev(p, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt), 410 &fp->f_offset, FOF_UPDATE_OFFSET, retval)); 411 } 412 413 int 414 dofilewritev(struct proc *p, int fd, struct file *fp, const struct iovec *iovp, 415 int iovcnt, off_t *offset, int flags, register_t *retval) 416 { 417 struct uio auio; 418 struct iovec *iov, *needfree, aiov[UIO_SMALLIOV]; 419 int i, error; 420 size_t cnt; 421 u_int iovlen; 422 #ifdef KTRACE 423 struct iovec *ktriov; 424 #endif 425 426 error = 0; 427 #ifdef KTRACE 428 ktriov = NULL; 429 #endif 430 /* note: can't use iovlen until iovcnt is validated */ 431 iovlen = iovcnt * sizeof(struct iovec); 432 if ((u_int)iovcnt > UIO_SMALLIOV) { 433 if ((u_int)iovcnt > IOV_MAX) { 434 error = EINVAL; 435 goto out; 436 } 437 iov = malloc(iovlen, M_IOV, M_WAITOK); 438 needfree = iov; 439 } else if ((u_int)iovcnt > 0) { 440 iov = aiov; 441 needfree = NULL; 442 } else { 443 error = EINVAL; 444 goto out; 445 } 446 447 auio.uio_iov = iov; 448 auio.uio_iovcnt = iovcnt; 449 auio.uio_rw = UIO_WRITE; 450 auio.uio_segflg = UIO_USERSPACE; 451 auio.uio_procp = p; 452 error = copyin(iovp, iov, iovlen); 453 if (error) 454 goto done; 455 auio.uio_resid = 0; 456 for (i = 0; i < iovcnt; i++) { 457 auio.uio_resid += iov->iov_len; 458 /* 459 * Writes return ssize_t because -1 is returned on error. 460 * Therefore we must restrict the length to SSIZE_MAX to 461 * avoid garbage return values. 462 */ 463 if (iov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) { 464 error = EINVAL; 465 goto done; 466 } 467 iov++; 468 } 469 #ifdef KTRACE 470 /* 471 * if tracing, save a copy of iovec 472 */ 473 if (KTRPOINT(p, KTR_GENIO)) { 474 ktriov = malloc(iovlen, M_TEMP, M_WAITOK); 475 memcpy((caddr_t)ktriov, (caddr_t)auio.uio_iov, iovlen); 476 } 477 #endif 478 cnt = auio.uio_resid; 479 error = (*fp->f_ops->fo_write)(fp, offset, &auio, fp->f_cred, flags); 480 if (error) { 481 if (auio.uio_resid != cnt && (error == ERESTART || 482 error == EINTR || error == EWOULDBLOCK)) 483 error = 0; 484 if (error == EPIPE) 485 psignal(p, SIGPIPE); 486 } 487 cnt -= auio.uio_resid; 488 #ifdef KTRACE 489 if (ktriov != NULL) { 490 if (KTRPOINT(p, KTR_GENIO) && (error == 0)) 491 ktrgenio(p, fd, UIO_WRITE, ktriov, cnt, error); 492 free(ktriov, M_TEMP); 493 } 494 #endif 495 *retval = cnt; 496 done: 497 if (needfree) 498 free(needfree, M_IOV); 499 out: 500 FILE_UNUSE(fp, p); 501 return (error); 502 } 503 504 /* 505 * Ioctl system call 506 */ 507 /* ARGSUSED */ 508 int 509 sys_ioctl(struct lwp *l, void *v, register_t *retval) 510 { 511 struct sys_ioctl_args /* { 512 syscallarg(int) fd; 513 syscallarg(u_long) com; 514 syscallarg(caddr_t) data; 515 } */ *uap = v; 516 struct file *fp; 517 struct proc *p; 518 struct filedesc *fdp; 519 u_long com; 520 int error; 521 u_int size; 522 caddr_t data, memp; 523 #define STK_PARAMS 128 524 u_long stkbuf[STK_PARAMS/sizeof(u_long)]; 525 526 error = 0; 527 p = l->l_proc; 528 fdp = p->p_fd; 529 530 if ((fp = fd_getfile(fdp, SCARG(uap, fd))) == NULL) 531 return (EBADF); 532 533 FILE_USE(fp); 534 535 if ((fp->f_flag & (FREAD | FWRITE)) == 0) { 536 error = EBADF; 537 com = 0; 538 goto out; 539 } 540 541 switch (com = SCARG(uap, com)) { 542 case FIONCLEX: 543 fdp->fd_ofileflags[SCARG(uap, fd)] &= ~UF_EXCLOSE; 544 goto out; 545 546 case FIOCLEX: 547 fdp->fd_ofileflags[SCARG(uap, fd)] |= UF_EXCLOSE; 548 goto out; 549 } 550 551 /* 552 * Interpret high order word to find amount of data to be 553 * copied to/from the user's address space. 554 */ 555 size = IOCPARM_LEN(com); 556 if (size > IOCPARM_MAX) { 557 error = ENOTTY; 558 goto out; 559 } 560 memp = NULL; 561 if (size > sizeof(stkbuf)) { 562 memp = (caddr_t)malloc((u_long)size, M_IOCTLOPS, M_WAITOK); 563 data = memp; 564 } else 565 data = (caddr_t)stkbuf; 566 if (com&IOC_IN) { 567 if (size) { 568 error = copyin(SCARG(uap, data), data, size); 569 if (error) { 570 if (memp) 571 free(memp, M_IOCTLOPS); 572 goto out; 573 } 574 #ifdef KTRACE 575 if (KTRPOINT(p, KTR_GENIO)) { 576 struct iovec iov; 577 iov.iov_base = SCARG(uap, data); 578 iov.iov_len = size; 579 ktrgenio(p, SCARG(uap, fd), UIO_WRITE, &iov, 580 size, 0); 581 } 582 #endif 583 } else 584 *(caddr_t *)data = SCARG(uap, data); 585 } else if ((com&IOC_OUT) && size) 586 /* 587 * Zero the buffer so the user always 588 * gets back something deterministic. 589 */ 590 memset(data, 0, size); 591 else if (com&IOC_VOID) 592 *(caddr_t *)data = SCARG(uap, data); 593 594 switch (com) { 595 596 case FIONBIO: 597 if (*(int *)data != 0) 598 fp->f_flag |= FNONBLOCK; 599 else 600 fp->f_flag &= ~FNONBLOCK; 601 error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, data, p); 602 break; 603 604 case FIOASYNC: 605 if (*(int *)data != 0) 606 fp->f_flag |= FASYNC; 607 else 608 fp->f_flag &= ~FASYNC; 609 error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, data, p); 610 break; 611 612 default: 613 error = (*fp->f_ops->fo_ioctl)(fp, com, data, p); 614 /* 615 * Copy any data to user, size was 616 * already set and checked above. 617 */ 618 if (error == 0 && (com&IOC_OUT) && size) { 619 error = copyout(data, SCARG(uap, data), size); 620 #ifdef KTRACE 621 if (KTRPOINT(p, KTR_GENIO)) { 622 struct iovec iov; 623 iov.iov_base = SCARG(uap, data); 624 iov.iov_len = size; 625 ktrgenio(p, SCARG(uap, fd), UIO_READ, &iov, 626 size, error); 627 } 628 #endif 629 } 630 break; 631 } 632 if (memp) 633 free(memp, M_IOCTLOPS); 634 out: 635 FILE_UNUSE(fp, p); 636 switch (error) { 637 case -1: 638 printf("sys_ioctl: _IO%s%s('%c', %lu, %lu) returned -1: " 639 "pid=%d comm=%s\n", 640 (com & IOC_IN) ? "W" : "", (com & IOC_OUT) ? "R" : "", 641 (char)IOCGROUP(com), (com & 0xff), IOCPARM_LEN(com), 642 p->p_pid, p->p_comm); 643 /* FALLTHROUGH */ 644 case EPASSTHROUGH: 645 error = ENOTTY; 646 /* FALLTHROUGH */ 647 default: 648 return (error); 649 } 650 } 651 652 int selwait, nselcoll; 653 654 /* 655 * Select system call. 656 */ 657 int 658 sys_pselect(struct lwp *l, void *v, register_t *retval) 659 { 660 struct sys_pselect_args /* { 661 syscallarg(int) nd; 662 syscallarg(fd_set *) in; 663 syscallarg(fd_set *) ou; 664 syscallarg(fd_set *) ex; 665 syscallarg(const struct timespec *) ts; 666 syscallarg(sigset_t *) mask; 667 } */ * const uap = v; 668 struct timespec ats; 669 struct timeval atv, *tv = NULL; 670 sigset_t amask, *mask = NULL; 671 int error; 672 673 if (SCARG(uap, ts)) { 674 error = copyin(SCARG(uap, ts), &ats, sizeof(ats)); 675 if (error) 676 return error; 677 atv.tv_sec = ats.tv_sec; 678 atv.tv_usec = ats.tv_nsec / 1000; 679 tv = &atv; 680 } 681 if (SCARG(uap, mask) != NULL) { 682 error = copyin(SCARG(uap, mask), &amask, sizeof(amask)); 683 if (error) 684 return error; 685 mask = &amask; 686 } 687 688 return selcommon(l, retval, SCARG(uap, nd), SCARG(uap, in), 689 SCARG(uap, ou), SCARG(uap, ex), tv, mask); 690 } 691 692 int 693 sys_select(struct lwp *l, void *v, register_t *retval) 694 { 695 struct sys_select_args /* { 696 syscallarg(int) nd; 697 syscallarg(fd_set *) in; 698 syscallarg(fd_set *) ou; 699 syscallarg(fd_set *) ex; 700 syscallarg(struct timeval *) tv; 701 } */ * const uap = v; 702 struct timeval atv, *tv = NULL; 703 int error; 704 705 if (SCARG(uap, tv)) { 706 error = copyin(SCARG(uap, tv), (caddr_t)&atv, 707 sizeof(atv)); 708 if (error) 709 return error; 710 tv = &atv; 711 } 712 713 return selcommon(l, retval, SCARG(uap, nd), SCARG(uap, in), 714 SCARG(uap, ou), SCARG(uap, ex), tv, NULL); 715 } 716 717 int 718 selcommon(struct lwp *l, register_t *retval, int nd, fd_set *u_in, 719 fd_set *u_ou, fd_set *u_ex, struct timeval *tv, sigset_t *mask) 720 { 721 struct proc * const p = l->l_proc; 722 caddr_t bits; 723 char smallbits[howmany(FD_SETSIZE, NFDBITS) * 724 sizeof(fd_mask) * 6]; 725 int s, ncoll, error, timo; 726 size_t ni; 727 sigset_t oldmask; 728 729 error = 0; 730 if (nd < 0) 731 return (EINVAL); 732 if (nd > p->p_fd->fd_nfiles) { 733 /* forgiving; slightly wrong */ 734 nd = p->p_fd->fd_nfiles; 735 } 736 ni = howmany(nd, NFDBITS) * sizeof(fd_mask); 737 if (ni * 6 > sizeof(smallbits)) 738 bits = malloc(ni * 6, M_TEMP, M_WAITOK); 739 else 740 bits = smallbits; 741 742 #define getbits(name, x) \ 743 if (u_ ## name) { \ 744 error = copyin(u_ ## name, bits + ni * x, ni); \ 745 if (error) \ 746 goto done; \ 747 } else \ 748 memset(bits + ni * x, 0, ni); 749 getbits(in, 0); 750 getbits(ou, 1); 751 getbits(ex, 2); 752 #undef getbits 753 754 timo = 0; 755 if (tv) { 756 if (itimerfix(tv)) { 757 error = EINVAL; 758 goto done; 759 } 760 s = splclock(); 761 timeradd(tv, &time, tv); 762 splx(s); 763 } 764 if (mask) 765 (void)sigprocmask1(p, SIG_SETMASK, mask, &oldmask); 766 767 retry: 768 ncoll = nselcoll; 769 l->l_flag |= L_SELECT; 770 error = selscan(p, (fd_mask *)(bits + ni * 0), 771 (fd_mask *)(bits + ni * 3), nd, retval); 772 if (error || *retval) 773 goto done; 774 if (tv) { 775 /* 776 * We have to recalculate the timeout on every retry. 777 */ 778 timo = hzto(tv); 779 if (timo <= 0) 780 goto done; 781 } 782 s = splsched(); 783 if ((l->l_flag & L_SELECT) == 0 || nselcoll != ncoll) { 784 splx(s); 785 goto retry; 786 } 787 l->l_flag &= ~L_SELECT; 788 error = tsleep((caddr_t)&selwait, PSOCK | PCATCH, "select", timo); 789 splx(s); 790 if (error == 0) 791 goto retry; 792 done: 793 if (mask) 794 (void)sigprocmask1(p, SIG_SETMASK, &oldmask, NULL); 795 l->l_flag &= ~L_SELECT; 796 /* select is not restarted after signals... */ 797 if (error == ERESTART) 798 error = EINTR; 799 if (error == EWOULDBLOCK) 800 error = 0; 801 if (error == 0) { 802 803 #define putbits(name, x) \ 804 if (u_ ## name) { \ 805 error = copyout(bits + ni * x, u_ ## name, ni); \ 806 if (error) \ 807 goto out; \ 808 } 809 putbits(in, 3); 810 putbits(ou, 4); 811 putbits(ex, 5); 812 #undef putbits 813 } 814 out: 815 if (ni * 6 > sizeof(smallbits)) 816 free(bits, M_TEMP); 817 return (error); 818 } 819 820 int 821 selscan(struct proc *p, fd_mask *ibitp, fd_mask *obitp, int nfd, 822 register_t *retval) 823 { 824 struct filedesc *fdp; 825 int msk, i, j, fd, n; 826 fd_mask ibits, obits; 827 struct file *fp; 828 static const int flag[3] = { POLLRDNORM | POLLHUP | POLLERR, 829 POLLWRNORM | POLLHUP | POLLERR, 830 POLLRDBAND }; 831 832 fdp = p->p_fd; 833 n = 0; 834 for (msk = 0; msk < 3; msk++) { 835 for (i = 0; i < nfd; i += NFDBITS) { 836 ibits = *ibitp++; 837 obits = 0; 838 while ((j = ffs(ibits)) && (fd = i + --j) < nfd) { 839 ibits &= ~(1 << j); 840 if ((fp = fd_getfile(fdp, fd)) == NULL) 841 return (EBADF); 842 FILE_USE(fp); 843 if ((*fp->f_ops->fo_poll)(fp, flag[msk], p)) { 844 obits |= (1 << j); 845 n++; 846 } 847 FILE_UNUSE(fp, p); 848 } 849 *obitp++ = obits; 850 } 851 } 852 *retval = n; 853 return (0); 854 } 855 856 /* 857 * Poll system call. 858 */ 859 int 860 sys_poll(struct lwp *l, void *v, register_t *retval) 861 { 862 struct sys_poll_args /* { 863 syscallarg(struct pollfd *) fds; 864 syscallarg(u_int) nfds; 865 syscallarg(int) timeout; 866 } */ * const uap = v; 867 struct timeval atv, *tv = NULL; 868 869 if (SCARG(uap, timeout) != INFTIM) { 870 atv.tv_sec = SCARG(uap, timeout) / 1000; 871 atv.tv_usec = (SCARG(uap, timeout) % 1000) * 1000; 872 tv = &atv; 873 } 874 875 return pollcommon(l, retval, SCARG(uap, fds), SCARG(uap, nfds), 876 tv, NULL); 877 } 878 879 /* 880 * Poll system call. 881 */ 882 int 883 sys_pollts(struct lwp *l, void *v, register_t *retval) 884 { 885 struct sys_pollts_args /* { 886 syscallarg(struct pollfd *) fds; 887 syscallarg(u_int) nfds; 888 syscallarg(const struct timespec *) ts; 889 syscallarg(const sigset_t *) mask; 890 } */ * const uap = v; 891 struct timespec ats; 892 struct timeval atv, *tv = NULL; 893 sigset_t amask, *mask = NULL; 894 int error; 895 896 if (SCARG(uap, ts)) { 897 error = copyin(SCARG(uap, ts), &ats, sizeof(ats)); 898 if (error) 899 return error; 900 atv.tv_sec = ats.tv_sec; 901 atv.tv_usec = ats.tv_nsec / 1000; 902 tv = &atv; 903 } 904 if (SCARG(uap, mask)) { 905 error = copyin(SCARG(uap, mask), &amask, sizeof(amask)); 906 if (error) 907 return error; 908 mask = &amask; 909 } 910 911 return pollcommon(l, retval, SCARG(uap, fds), SCARG(uap, nfds), 912 tv, mask); 913 } 914 915 int 916 pollcommon(struct lwp *l, register_t *retval, 917 struct pollfd *u_fds, u_int nfds, 918 struct timeval *tv, sigset_t *mask) 919 { 920 struct proc * const p = l->l_proc; 921 caddr_t bits; 922 char smallbits[32 * sizeof(struct pollfd)]; 923 sigset_t oldmask; 924 int s, ncoll, error, timo; 925 size_t ni; 926 927 if (nfds > p->p_fd->fd_nfiles) { 928 /* forgiving; slightly wrong */ 929 nfds = p->p_fd->fd_nfiles; 930 } 931 ni = nfds * sizeof(struct pollfd); 932 if (ni > sizeof(smallbits)) 933 bits = malloc(ni, M_TEMP, M_WAITOK); 934 else 935 bits = smallbits; 936 937 error = copyin(u_fds, bits, ni); 938 if (error) 939 goto done; 940 941 timo = 0; 942 if (tv) { 943 if (itimerfix(tv)) { 944 error = EINVAL; 945 goto done; 946 } 947 s = splclock(); 948 timeradd(tv, &time, tv); 949 splx(s); 950 } 951 if (mask != NULL) 952 (void)sigprocmask1(p, SIG_SETMASK, mask, &oldmask); 953 954 retry: 955 ncoll = nselcoll; 956 l->l_flag |= L_SELECT; 957 error = pollscan(p, (struct pollfd *)bits, nfds, retval); 958 if (error || *retval) 959 goto done; 960 if (tv) { 961 /* 962 * We have to recalculate the timeout on every retry. 963 */ 964 timo = hzto(tv); 965 if (timo <= 0) 966 goto done; 967 } 968 s = splsched(); 969 if ((l->l_flag & L_SELECT) == 0 || nselcoll != ncoll) { 970 splx(s); 971 goto retry; 972 } 973 l->l_flag &= ~L_SELECT; 974 error = tsleep((caddr_t)&selwait, PSOCK | PCATCH, "poll", timo); 975 splx(s); 976 if (error == 0) 977 goto retry; 978 done: 979 if (mask != NULL) 980 (void)sigprocmask1(p, SIG_SETMASK, &oldmask, NULL); 981 l->l_flag &= ~L_SELECT; 982 /* poll is not restarted after signals... */ 983 if (error == ERESTART) 984 error = EINTR; 985 if (error == EWOULDBLOCK) 986 error = 0; 987 if (error == 0) { 988 error = copyout(bits, u_fds, ni); 989 if (error) 990 goto out; 991 } 992 out: 993 if (ni > sizeof(smallbits)) 994 free(bits, M_TEMP); 995 return (error); 996 } 997 998 int 999 pollscan(struct proc *p, struct pollfd *fds, int nfd, register_t *retval) 1000 { 1001 struct filedesc *fdp; 1002 int i, n; 1003 struct file *fp; 1004 1005 fdp = p->p_fd; 1006 n = 0; 1007 for (i = 0; i < nfd; i++, fds++) { 1008 if (fds->fd >= fdp->fd_nfiles) { 1009 fds->revents = POLLNVAL; 1010 n++; 1011 } else if (fds->fd < 0) { 1012 fds->revents = 0; 1013 } else { 1014 if ((fp = fd_getfile(fdp, fds->fd)) == NULL) { 1015 fds->revents = POLLNVAL; 1016 n++; 1017 } else { 1018 FILE_USE(fp); 1019 fds->revents = (*fp->f_ops->fo_poll)(fp, 1020 fds->events | POLLERR | POLLHUP, p); 1021 if (fds->revents != 0) 1022 n++; 1023 FILE_UNUSE(fp, p); 1024 } 1025 } 1026 } 1027 *retval = n; 1028 return (0); 1029 } 1030 1031 /*ARGSUSED*/ 1032 int 1033 seltrue(dev_t dev, int events, struct proc *p) 1034 { 1035 1036 return (events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM)); 1037 } 1038 1039 /* 1040 * Record a select request. 1041 */ 1042 void 1043 selrecord(struct proc *selector, struct selinfo *sip) 1044 { 1045 struct lwp *l; 1046 struct proc *p; 1047 pid_t mypid; 1048 1049 mypid = selector->p_pid; 1050 if (sip->sel_pid == mypid) 1051 return; 1052 if (sip->sel_pid && (p = pfind(sip->sel_pid))) { 1053 LIST_FOREACH(l, &p->p_lwps, l_sibling) { 1054 if (l->l_wchan == (caddr_t)&selwait) { 1055 sip->sel_collision = 1; 1056 return; 1057 } 1058 } 1059 } 1060 1061 sip->sel_pid = mypid; 1062 } 1063 1064 /* 1065 * Do a wakeup when a selectable event occurs. 1066 */ 1067 void 1068 selwakeup(sip) 1069 struct selinfo *sip; 1070 { 1071 struct lwp *l; 1072 struct proc *p; 1073 int s; 1074 1075 if (sip->sel_pid == 0) 1076 return; 1077 if (sip->sel_collision) { 1078 sip->sel_pid = 0; 1079 nselcoll++; 1080 sip->sel_collision = 0; 1081 wakeup((caddr_t)&selwait); 1082 return; 1083 } 1084 p = pfind(sip->sel_pid); 1085 sip->sel_pid = 0; 1086 if (p != NULL) { 1087 LIST_FOREACH(l, &p->p_lwps, l_sibling) { 1088 SCHED_LOCK(s); 1089 if (l->l_wchan == (caddr_t)&selwait) { 1090 if (l->l_stat == LSSLEEP) 1091 setrunnable(l); 1092 else 1093 unsleep(l); 1094 } else if (l->l_flag & L_SELECT) 1095 l->l_flag &= ~L_SELECT; 1096 SCHED_UNLOCK(s); 1097 } 1098 } 1099 } 1100