1 /* $OpenBSD: sys_generic.c,v 1.54 2006/04/15 20:02:19 miod Exp $ */ 2 /* $NetBSD: sys_generic.c,v 1.24 1996/03/29 00:25:32 cgd Exp $ */ 3 4 /* 5 * Copyright (c) 1996 Theo de Raadt 6 * Copyright (c) 1982, 1986, 1989, 1993 7 * The Regents of the University of California. All rights reserved. 8 * (c) UNIX System Laboratories, Inc. 9 * All or some portions of this file are derived from material licensed 10 * to the University of California by American Telephone and Telegraph 11 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 12 * the permission of UNIX System Laboratories, Inc. 13 * 14 * Redistribution and use in source and binary forms, with or without 15 * modification, are permitted provided that the following conditions 16 * are met: 17 * 1. Redistributions of source code must retain the above copyright 18 * notice, this list of conditions and the following disclaimer. 19 * 2. Redistributions in binary form must reproduce the above copyright 20 * notice, this list of conditions and the following disclaimer in the 21 * documentation and/or other materials provided with the distribution. 22 * 3. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)sys_generic.c 8.5 (Berkeley) 1/21/94 39 */ 40 41 #include <sys/param.h> 42 #include <sys/systm.h> 43 #include <sys/filedesc.h> 44 #include <sys/ioctl.h> 45 #include <sys/file.h> 46 #include <sys/proc.h> 47 #include <sys/resourcevar.h> 48 #include <sys/socketvar.h> 49 #include <sys/signalvar.h> 50 #include <sys/uio.h> 51 #include <sys/kernel.h> 52 #include <sys/stat.h> 53 #include <sys/malloc.h> 54 #include <sys/poll.h> 55 #ifdef KTRACE 56 #include <sys/ktrace.h> 57 #endif 58 #include <sys/sched.h> 59 60 #include <sys/mount.h> 61 #include <sys/syscallargs.h> 62 63 #include <uvm/uvm_extern.h> 64 65 int selscan(struct proc *, fd_set *, fd_set *, int, int, register_t *); 66 int seltrue(dev_t, int, struct proc *); 67 void pollscan(struct proc *, struct pollfd *, u_int, register_t *); 68 69 /* 70 * Read system call. 71 */ 72 /* ARGSUSED */ 73 int 74 sys_read(struct proc *p, void *v, register_t *retval) 75 { 76 struct sys_read_args /* { 77 syscallarg(int) fd; 78 syscallarg(void *) buf; 79 syscallarg(size_t) nbyte; 80 } */ *uap = v; 81 int fd = SCARG(uap, fd); 82 struct file *fp; 83 struct filedesc *fdp = p->p_fd; 84 85 if ((fp = fd_getfile(fdp, fd)) == NULL) 86 return (EBADF); 87 if ((fp->f_flag & FREAD) == 0) 88 return (EBADF); 89 90 FREF(fp); 91 92 /* dofileread() will FRELE the descriptor for us */ 93 return (dofileread(p, fd, fp, SCARG(uap, buf), SCARG(uap, nbyte), 94 &fp->f_offset, retval)); 95 } 96 97 int 98 dofileread(struct proc *p, int fd, struct file *fp, void *buf, size_t nbyte, 99 off_t *offset, register_t *retval) 100 { 101 struct uio auio; 102 struct iovec aiov; 103 long cnt, error = 0; 104 #ifdef KTRACE 105 struct iovec ktriov; 106 #endif 107 108 aiov.iov_base = buf; 109 aiov.iov_len = nbyte; 110 auio.uio_iov = &aiov; 111 auio.uio_iovcnt = 1; 112 auio.uio_resid = nbyte; 113 auio.uio_rw = UIO_READ; 114 auio.uio_segflg = UIO_USERSPACE; 115 auio.uio_procp = p; 116 117 /* 118 * Reads return ssize_t because -1 is returned on error. Therefore 119 * we must restrict the length to SSIZE_MAX to avoid garbage return 120 * values. 121 */ 122 if (auio.uio_resid > SSIZE_MAX) { 123 error = EINVAL; 124 goto out; 125 } 126 127 #ifdef KTRACE 128 /* 129 * if tracing, save a copy of iovec 130 */ 131 if (KTRPOINT(p, KTR_GENIO)) 132 ktriov = aiov; 133 #endif 134 cnt = auio.uio_resid; 135 error = (*fp->f_ops->fo_read)(fp, offset, &auio, fp->f_cred); 136 if (error) 137 if (auio.uio_resid != cnt && (error == ERESTART || 138 error == EINTR || error == EWOULDBLOCK)) 139 error = 0; 140 cnt -= auio.uio_resid; 141 142 fp->f_rxfer++; 143 fp->f_rbytes += cnt; 144 #ifdef KTRACE 145 if (KTRPOINT(p, KTR_GENIO) && error == 0) 146 ktrgenio(p, fd, UIO_READ, &ktriov, cnt, error); 147 #endif 148 *retval = cnt; 149 out: 150 FRELE(fp); 151 return (error); 152 } 153 154 /* 155 * Scatter read system call. 156 */ 157 int 158 sys_readv(struct proc *p, void *v, register_t *retval) 159 { 160 struct sys_readv_args /* { 161 syscallarg(int) fd; 162 syscallarg(const struct iovec *) iovp; 163 syscallarg(int) iovcnt; 164 } */ *uap = v; 165 int fd = SCARG(uap, fd); 166 struct file *fp; 167 struct filedesc *fdp = p->p_fd; 168 169 if ((fp = fd_getfile(fdp, fd)) == NULL) 170 return (EBADF); 171 if ((fp->f_flag & FREAD) == 0) 172 return (EBADF); 173 174 FREF(fp); 175 176 /* dofilereadv() will FRELE the descriptor for us */ 177 return (dofilereadv(p, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt), 178 &fp->f_offset, retval)); 179 } 180 181 int 182 dofilereadv(struct proc *p, int fd, struct file *fp, const struct iovec *iovp, 183 int iovcnt, off_t *offset, register_t *retval) 184 { 185 struct uio auio; 186 struct iovec *iov; 187 struct iovec *needfree; 188 struct iovec aiov[UIO_SMALLIOV]; 189 long i, cnt, error = 0; 190 u_int iovlen; 191 #ifdef KTRACE 192 struct iovec *ktriov = NULL; 193 #endif 194 195 /* note: can't use iovlen until iovcnt is validated */ 196 iovlen = iovcnt * sizeof(struct iovec); 197 if ((u_int)iovcnt > UIO_SMALLIOV) { 198 if ((u_int)iovcnt > IOV_MAX) { 199 error = EINVAL; 200 goto out; 201 } 202 iov = needfree = malloc(iovlen, M_IOV, M_WAITOK); 203 } else if ((u_int)iovcnt > 0) { 204 iov = aiov; 205 needfree = NULL; 206 } else { 207 error = EINVAL; 208 goto out; 209 } 210 211 auio.uio_iov = iov; 212 auio.uio_iovcnt = iovcnt; 213 auio.uio_rw = UIO_READ; 214 auio.uio_segflg = UIO_USERSPACE; 215 auio.uio_procp = p; 216 error = copyin(iovp, iov, iovlen); 217 if (error) 218 goto done; 219 auio.uio_resid = 0; 220 for (i = 0; i < iovcnt; i++) { 221 auio.uio_resid += iov->iov_len; 222 /* 223 * Reads return ssize_t because -1 is returned on error. 224 * Therefore we must restrict the length to SSIZE_MAX to 225 * avoid garbage return values. 226 */ 227 if (iov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) { 228 error = EINVAL; 229 goto done; 230 } 231 iov++; 232 } 233 #ifdef KTRACE 234 /* 235 * if tracing, save a copy of iovec 236 */ 237 if (KTRPOINT(p, KTR_GENIO)) { 238 ktriov = malloc(iovlen, M_TEMP, M_WAITOK); 239 bcopy(auio.uio_iov, ktriov, iovlen); 240 } 241 #endif 242 cnt = auio.uio_resid; 243 error = (*fp->f_ops->fo_read)(fp, offset, &auio, fp->f_cred); 244 if (error) 245 if (auio.uio_resid != cnt && (error == ERESTART || 246 error == EINTR || error == EWOULDBLOCK)) 247 error = 0; 248 cnt -= auio.uio_resid; 249 250 fp->f_rxfer++; 251 fp->f_rbytes += cnt; 252 #ifdef KTRACE 253 if (ktriov != NULL) { 254 if (error == 0) 255 ktrgenio(p, fd, UIO_READ, ktriov, cnt, 256 error); 257 free(ktriov, M_TEMP); 258 } 259 #endif 260 *retval = cnt; 261 done: 262 if (needfree) 263 free(needfree, M_IOV); 264 out: 265 FRELE(fp); 266 return (error); 267 } 268 269 /* 270 * Write system call 271 */ 272 int 273 sys_write(struct proc *p, void *v, register_t *retval) 274 { 275 struct sys_write_args /* { 276 syscallarg(int) fd; 277 syscallarg(const void *) buf; 278 syscallarg(size_t) nbyte; 279 } */ *uap = v; 280 int fd = SCARG(uap, fd); 281 struct file *fp; 282 struct filedesc *fdp = p->p_fd; 283 284 if ((fp = fd_getfile(fdp, fd)) == NULL) 285 return (EBADF); 286 if ((fp->f_flag & FWRITE) == 0) 287 return (EBADF); 288 289 FREF(fp); 290 291 /* dofilewrite() will FRELE the descriptor for us */ 292 return (dofilewrite(p, fd, fp, SCARG(uap, buf), SCARG(uap, nbyte), 293 &fp->f_offset, retval)); 294 } 295 296 int 297 dofilewrite(struct proc *p, int fd, struct file *fp, const void *buf, 298 size_t nbyte, off_t *offset, register_t *retval) 299 { 300 struct uio auio; 301 struct iovec aiov; 302 long cnt, error = 0; 303 #ifdef KTRACE 304 struct iovec ktriov; 305 #endif 306 307 aiov.iov_base = (void *)buf; /* XXX kills const */ 308 aiov.iov_len = nbyte; 309 auio.uio_iov = &aiov; 310 auio.uio_iovcnt = 1; 311 auio.uio_resid = nbyte; 312 auio.uio_rw = UIO_WRITE; 313 auio.uio_segflg = UIO_USERSPACE; 314 auio.uio_procp = p; 315 316 /* 317 * Writes return ssize_t because -1 is returned on error. Therefore 318 * we must restrict the length to SSIZE_MAX to avoid garbage return 319 * values. 320 */ 321 if (auio.uio_resid > SSIZE_MAX) { 322 error = EINVAL; 323 goto out; 324 } 325 326 #ifdef KTRACE 327 /* 328 * if tracing, save a copy of iovec 329 */ 330 if (KTRPOINT(p, KTR_GENIO)) 331 ktriov = aiov; 332 #endif 333 cnt = auio.uio_resid; 334 error = (*fp->f_ops->fo_write)(fp, offset, &auio, fp->f_cred); 335 if (error) { 336 if (auio.uio_resid != cnt && (error == ERESTART || 337 error == EINTR || error == EWOULDBLOCK)) 338 error = 0; 339 if (error == EPIPE) 340 psignal(p, SIGPIPE); 341 } 342 cnt -= auio.uio_resid; 343 344 fp->f_wxfer++; 345 fp->f_wbytes += cnt; 346 #ifdef KTRACE 347 if (KTRPOINT(p, KTR_GENIO) && error == 0) 348 ktrgenio(p, fd, UIO_WRITE, &ktriov, cnt, error); 349 #endif 350 *retval = cnt; 351 out: 352 FRELE(fp); 353 return (error); 354 } 355 356 /* 357 * Gather write system call 358 */ 359 int 360 sys_writev(struct proc *p, void *v, register_t *retval) 361 { 362 struct sys_writev_args /* { 363 syscallarg(int) fd; 364 syscallarg(const struct iovec *) iovp; 365 syscallarg(int) iovcnt; 366 } */ *uap = v; 367 int fd = SCARG(uap, fd); 368 struct file *fp; 369 struct filedesc *fdp = p->p_fd; 370 371 if ((fp = fd_getfile(fdp, fd)) == NULL) 372 return (EBADF); 373 if ((fp->f_flag & FWRITE) == 0) 374 return (EBADF); 375 376 FREF(fp); 377 378 /* dofilewritev() will FRELE the descriptor for us */ 379 return (dofilewritev(p, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt), 380 &fp->f_offset, retval)); 381 } 382 383 int 384 dofilewritev(struct proc *p, int fd, struct file *fp, const struct iovec *iovp, 385 int iovcnt, off_t *offset, register_t *retval) 386 { 387 struct uio auio; 388 struct iovec *iov; 389 struct iovec *needfree; 390 struct iovec aiov[UIO_SMALLIOV]; 391 long i, cnt, error = 0; 392 u_int iovlen; 393 #ifdef KTRACE 394 struct iovec *ktriov = NULL; 395 #endif 396 397 /* note: can't use iovlen until iovcnt is validated */ 398 iovlen = iovcnt * sizeof(struct iovec); 399 if ((u_int)iovcnt > UIO_SMALLIOV) { 400 if ((u_int)iovcnt > IOV_MAX) { 401 error = EINVAL; 402 goto out; 403 } 404 iov = needfree = malloc(iovlen, M_IOV, M_WAITOK); 405 } else if ((u_int)iovcnt > 0) { 406 iov = aiov; 407 needfree = NULL; 408 } else { 409 error = EINVAL; 410 goto out; 411 } 412 413 auio.uio_iov = iov; 414 auio.uio_iovcnt = iovcnt; 415 auio.uio_rw = UIO_WRITE; 416 auio.uio_segflg = UIO_USERSPACE; 417 auio.uio_procp = p; 418 error = copyin(iovp, iov, iovlen); 419 if (error) 420 goto done; 421 auio.uio_resid = 0; 422 for (i = 0; i < iovcnt; i++) { 423 auio.uio_resid += iov->iov_len; 424 /* 425 * Writes return ssize_t because -1 is returned on error. 426 * Therefore we must restrict the length to SSIZE_MAX to 427 * avoid garbage return values. 428 */ 429 if (iov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) { 430 error = EINVAL; 431 goto done; 432 } 433 iov++; 434 } 435 #ifdef KTRACE 436 /* 437 * if tracing, save a copy of iovec 438 */ 439 if (KTRPOINT(p, KTR_GENIO)) { 440 ktriov = malloc(iovlen, M_TEMP, M_WAITOK); 441 bcopy(auio.uio_iov, ktriov, iovlen); 442 } 443 #endif 444 cnt = auio.uio_resid; 445 error = (*fp->f_ops->fo_write)(fp, offset, &auio, fp->f_cred); 446 if (error) { 447 if (auio.uio_resid != cnt && (error == ERESTART || 448 error == EINTR || error == EWOULDBLOCK)) 449 error = 0; 450 if (error == EPIPE) 451 psignal(p, SIGPIPE); 452 } 453 cnt -= auio.uio_resid; 454 455 fp->f_wxfer++; 456 fp->f_wbytes += cnt; 457 #ifdef KTRACE 458 if (ktriov != NULL) { 459 if (error == 0) 460 ktrgenio(p, fd, UIO_WRITE, ktriov, cnt, error); 461 free(ktriov, M_TEMP); 462 } 463 #endif 464 *retval = cnt; 465 done: 466 if (needfree) 467 free(needfree, M_IOV); 468 out: 469 FRELE(fp); 470 return (error); 471 } 472 473 /* 474 * Ioctl system call 475 */ 476 /* ARGSUSED */ 477 int 478 sys_ioctl(struct proc *p, void *v, register_t *retval) 479 { 480 struct sys_ioctl_args /* { 481 syscallarg(int) fd; 482 syscallarg(u_long) com; 483 syscallarg(void *) data; 484 } */ *uap = v; 485 struct file *fp; 486 struct filedesc *fdp; 487 u_long com; 488 int error; 489 u_int size; 490 caddr_t data, memp; 491 int tmp; 492 #define STK_PARAMS 128 493 char stkbuf[STK_PARAMS]; 494 495 fdp = p->p_fd; 496 if ((fp = fd_getfile(fdp, SCARG(uap, fd))) == NULL) 497 return (EBADF); 498 499 if ((fp->f_flag & (FREAD | FWRITE)) == 0) 500 return (EBADF); 501 502 switch (com = SCARG(uap, com)) { 503 case FIONCLEX: 504 fdp->fd_ofileflags[SCARG(uap, fd)] &= ~UF_EXCLOSE; 505 return (0); 506 case FIOCLEX: 507 fdp->fd_ofileflags[SCARG(uap, fd)] |= UF_EXCLOSE; 508 return (0); 509 } 510 511 /* 512 * Interpret high order word to find amount of data to be 513 * copied to/from the user's address space. 514 */ 515 size = IOCPARM_LEN(com); 516 if (size > IOCPARM_MAX) 517 return (ENOTTY); 518 FREF(fp); 519 memp = NULL; 520 if (size > sizeof (stkbuf)) { 521 memp = (caddr_t)malloc((u_long)size, M_IOCTLOPS, M_WAITOK); 522 data = memp; 523 } else 524 data = stkbuf; 525 if (com&IOC_IN) { 526 if (size) { 527 error = copyin(SCARG(uap, data), data, (u_int)size); 528 if (error) { 529 goto out; 530 } 531 } else 532 *(caddr_t *)data = SCARG(uap, data); 533 } else if ((com&IOC_OUT) && size) 534 /* 535 * Zero the buffer so the user always 536 * gets back something deterministic. 537 */ 538 bzero(data, size); 539 else if (com&IOC_VOID) 540 *(caddr_t *)data = SCARG(uap, data); 541 542 switch (com) { 543 544 case FIONBIO: 545 if ((tmp = *(int *)data) != 0) 546 fp->f_flag |= FNONBLOCK; 547 else 548 fp->f_flag &= ~FNONBLOCK; 549 error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&tmp, p); 550 break; 551 552 case FIOASYNC: 553 if ((tmp = *(int *)data) != 0) 554 fp->f_flag |= FASYNC; 555 else 556 fp->f_flag &= ~FASYNC; 557 error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, (caddr_t)&tmp, p); 558 break; 559 560 case FIOSETOWN: 561 tmp = *(int *)data; 562 if (fp->f_type == DTYPE_SOCKET) { 563 struct socket *so = (struct socket *)fp->f_data; 564 565 so->so_pgid = tmp; 566 so->so_siguid = p->p_cred->p_ruid; 567 so->so_sigeuid = p->p_ucred->cr_uid; 568 error = 0; 569 break; 570 } 571 if (tmp <= 0) { 572 tmp = -tmp; 573 } else { 574 struct proc *p1 = pfind(tmp); 575 if (p1 == 0) { 576 error = ESRCH; 577 break; 578 } 579 tmp = p1->p_pgrp->pg_id; 580 } 581 error = (*fp->f_ops->fo_ioctl) 582 (fp, TIOCSPGRP, (caddr_t)&tmp, p); 583 break; 584 585 case FIOGETOWN: 586 if (fp->f_type == DTYPE_SOCKET) { 587 error = 0; 588 *(int *)data = ((struct socket *)fp->f_data)->so_pgid; 589 break; 590 } 591 error = (*fp->f_ops->fo_ioctl)(fp, TIOCGPGRP, data, p); 592 *(int *)data = -*(int *)data; 593 break; 594 595 default: 596 error = (*fp->f_ops->fo_ioctl)(fp, com, data, p); 597 break; 598 } 599 /* 600 * Copy any data to user, size was 601 * already set and checked above. 602 */ 603 if (error == 0 && (com&IOC_OUT) && size) 604 error = copyout(data, SCARG(uap, data), (u_int)size); 605 out: 606 FRELE(fp); 607 if (memp) 608 free(memp, M_IOCTLOPS); 609 return (error); 610 } 611 612 int selwait, nselcoll; 613 614 /* 615 * Select system call. 616 */ 617 int 618 sys_select(struct proc *p, void *v, register_t *retval) 619 { 620 struct sys_select_args /* { 621 syscallarg(int) nd; 622 syscallarg(fd_set *) in; 623 syscallarg(fd_set *) ou; 624 syscallarg(fd_set *) ex; 625 syscallarg(struct timeval *) tv; 626 } */ *uap = v; 627 fd_mask bits[6]; 628 fd_set *pibits[3], *pobits[3]; 629 struct timeval atv, rtv, ttv; 630 int s, ncoll, error = 0, timo; 631 u_int nd, ni; 632 633 nd = SCARG(uap, nd); 634 if (nd > p->p_fd->fd_nfiles) { 635 /* forgiving; slightly wrong */ 636 nd = p->p_fd->fd_nfiles; 637 } 638 ni = howmany(nd, NFDBITS) * sizeof(fd_mask); 639 if (nd > sizeof(bits[0])) { 640 caddr_t mbits; 641 642 mbits = malloc(ni * 6, M_TEMP, M_WAITOK); 643 bzero(mbits, ni * 6); 644 pibits[0] = (fd_set *)&mbits[ni * 0]; 645 pibits[1] = (fd_set *)&mbits[ni * 1]; 646 pibits[2] = (fd_set *)&mbits[ni * 2]; 647 pobits[0] = (fd_set *)&mbits[ni * 3]; 648 pobits[1] = (fd_set *)&mbits[ni * 4]; 649 pobits[2] = (fd_set *)&mbits[ni * 5]; 650 } else { 651 bzero(bits, sizeof(bits)); 652 pibits[0] = (fd_set *)&bits[0]; 653 pibits[1] = (fd_set *)&bits[1]; 654 pibits[2] = (fd_set *)&bits[2]; 655 pobits[0] = (fd_set *)&bits[3]; 656 pobits[1] = (fd_set *)&bits[4]; 657 pobits[2] = (fd_set *)&bits[5]; 658 } 659 660 #define getbits(name, x) \ 661 if (SCARG(uap, name) && (error = copyin(SCARG(uap, name), \ 662 pibits[x], ni))) \ 663 goto done; 664 getbits(in, 0); 665 getbits(ou, 1); 666 getbits(ex, 2); 667 #undef getbits 668 669 if (SCARG(uap, tv)) { 670 error = copyin(SCARG(uap, tv), &atv, sizeof (atv)); 671 if (error) 672 goto done; 673 if (itimerfix(&atv)) { 674 error = EINVAL; 675 goto done; 676 } 677 getmicrouptime(&rtv); 678 timeradd(&atv, &rtv, &atv); 679 } else { 680 atv.tv_sec = 0; 681 atv.tv_usec = 0; 682 } 683 timo = 0; 684 685 retry: 686 ncoll = nselcoll; 687 p->p_flag |= P_SELECT; 688 error = selscan(p, pibits[0], pobits[0], nd, ni, retval); 689 if (error || *retval) 690 goto done; 691 if (SCARG(uap, tv)) { 692 getmicrouptime(&rtv); 693 if (timercmp(&rtv, &atv, >=)) 694 goto done; 695 ttv = atv; 696 timersub(&ttv, &rtv, &ttv); 697 timo = ttv.tv_sec > 24 * 60 * 60 ? 698 24 * 60 * 60 * hz : tvtohz(&ttv); 699 } 700 s = splhigh(); 701 if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) { 702 splx(s); 703 goto retry; 704 } 705 p->p_flag &= ~P_SELECT; 706 error = tsleep(&selwait, PSOCK | PCATCH, "select", timo); 707 splx(s); 708 if (error == 0) 709 goto retry; 710 done: 711 p->p_flag &= ~P_SELECT; 712 /* select is not restarted after signals... */ 713 if (error == ERESTART) 714 error = EINTR; 715 if (error == EWOULDBLOCK) 716 error = 0; 717 #define putbits(name, x) \ 718 if (SCARG(uap, name) && (error2 = copyout(pobits[x], \ 719 SCARG(uap, name), ni))) \ 720 error = error2; 721 if (error == 0) { 722 int error2; 723 724 putbits(in, 0); 725 putbits(ou, 1); 726 putbits(ex, 2); 727 #undef putbits 728 } 729 730 if (pibits[0] != (fd_set *)&bits[0]) 731 free(pibits[0], M_TEMP); 732 return (error); 733 } 734 735 int 736 selscan(struct proc *p, fd_set *ibits, fd_set *obits, int nfd, int ni, 737 register_t *retval) 738 { 739 caddr_t cibits = (caddr_t)ibits, cobits = (caddr_t)obits; 740 struct filedesc *fdp = p->p_fd; 741 int msk, i, j, fd; 742 fd_mask bits; 743 struct file *fp; 744 int n = 0; 745 static const int flag[3] = { POLLIN, POLLOUT, POLLPRI }; 746 747 for (msk = 0; msk < 3; msk++) { 748 fd_set *pibits = (fd_set *)&cibits[msk*ni]; 749 fd_set *pobits = (fd_set *)&cobits[msk*ni]; 750 751 for (i = 0; i < nfd; i += NFDBITS) { 752 bits = pibits->fds_bits[i/NFDBITS]; 753 while ((j = ffs(bits)) && (fd = i + --j) < nfd) { 754 bits &= ~(1 << j); 755 if ((fp = fd_getfile(fdp, fd)) == NULL) 756 return (EBADF); 757 FREF(fp); 758 if ((*fp->f_ops->fo_poll)(fp, flag[msk], p)) { 759 FD_SET(fd, pobits); 760 n++; 761 } 762 FRELE(fp); 763 } 764 } 765 } 766 *retval = n; 767 return (0); 768 } 769 770 /*ARGSUSED*/ 771 int 772 seltrue(dev_t dev, int events, struct proc *p) 773 { 774 775 return (events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM)); 776 } 777 778 /* 779 * Record a select request. 780 */ 781 void 782 selrecord(struct proc *selector, struct selinfo *sip) 783 { 784 struct proc *p; 785 pid_t mypid; 786 787 mypid = selector->p_pid; 788 if (sip->si_selpid == mypid) 789 return; 790 if (sip->si_selpid && (p = pfind(sip->si_selpid)) && 791 p->p_wchan == (caddr_t)&selwait) 792 sip->si_flags |= SI_COLL; 793 else 794 sip->si_selpid = mypid; 795 } 796 797 /* 798 * Do a wakeup when a selectable event occurs. 799 */ 800 void 801 selwakeup(struct selinfo *sip) 802 { 803 struct proc *p; 804 int s; 805 806 if (sip->si_selpid == 0) 807 return; 808 if (sip->si_flags & SI_COLL) { 809 nselcoll++; 810 sip->si_flags &= ~SI_COLL; 811 wakeup(&selwait); 812 } 813 p = pfind(sip->si_selpid); 814 sip->si_selpid = 0; 815 if (p != NULL) { 816 SCHED_LOCK(s); 817 if (p->p_wchan == (caddr_t)&selwait) { 818 if (p->p_stat == SSLEEP) 819 setrunnable(p); 820 else 821 unsleep(p); 822 } else if (p->p_flag & P_SELECT) 823 p->p_flag &= ~P_SELECT; 824 SCHED_UNLOCK(s); 825 } 826 } 827 828 void 829 pollscan(struct proc *p, struct pollfd *pl, u_int nfd, register_t *retval) 830 { 831 struct filedesc *fdp = p->p_fd; 832 struct file *fp; 833 u_int i; 834 int n = 0; 835 836 for (i = 0; i < nfd; i++, pl++) { 837 /* Check the file descriptor. */ 838 if (pl->fd < 0) { 839 pl->revents = 0; 840 continue; 841 } 842 if ((fp = fd_getfile(fdp, pl->fd)) == NULL) { 843 pl->revents = POLLNVAL; 844 n++; 845 continue; 846 } 847 FREF(fp); 848 pl->revents = (*fp->f_ops->fo_poll)(fp, pl->events, p); 849 FRELE(fp); 850 if (pl->revents != 0) 851 n++; 852 } 853 *retval = n; 854 } 855 856 /* 857 * We are using the same mechanism as select only we encode/decode args 858 * differently. 859 */ 860 int 861 sys_poll(struct proc *p, void *v, register_t *retval) 862 { 863 struct sys_poll_args /* { 864 syscallarg(struct pollfd *) fds; 865 syscallarg(u_int) nfds; 866 syscallarg(int) timeout; 867 } */ *uap = v; 868 size_t sz; 869 struct pollfd pfds[4], *pl = pfds; 870 int msec = SCARG(uap, timeout); 871 struct timeval atv, rtv, ttv; 872 int timo, ncoll, i, s, error; 873 extern int nselcoll, selwait; 874 u_int nfds = SCARG(uap, nfds); 875 876 /* Standards say no more than MAX_OPEN; this is possibly better. */ 877 if (nfds > min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfiles)) 878 return (EINVAL); 879 880 sz = sizeof(struct pollfd) * nfds; 881 882 /* optimize for the default case, of a small nfds value */ 883 if (sz > sizeof(pfds)) 884 pl = (struct pollfd *) malloc(sz, M_TEMP, M_WAITOK); 885 886 if ((error = copyin(SCARG(uap, fds), pl, sz)) != 0) 887 goto bad; 888 889 for (i = 0; i < nfds; i++) 890 pl[i].revents = 0; 891 892 if (msec != INFTIM) { 893 atv.tv_sec = msec / 1000; 894 atv.tv_usec = (msec - (atv.tv_sec * 1000)) * 1000; 895 896 if (itimerfix(&atv)) { 897 error = EINVAL; 898 goto done; 899 } 900 getmicrouptime(&rtv); 901 timeradd(&atv, &rtv, &atv); 902 } else { 903 atv.tv_sec = 0; 904 atv.tv_usec = 0; 905 } 906 timo = 0; 907 908 retry: 909 ncoll = nselcoll; 910 p->p_flag |= P_SELECT; 911 pollscan(p, pl, nfds, retval); 912 if (*retval) 913 goto done; 914 if (msec != INFTIM) { 915 getmicrouptime(&rtv); 916 if (timercmp(&rtv, &atv, >=)) 917 goto done; 918 ttv = atv; 919 timersub(&ttv, &rtv, &ttv); 920 timo = ttv.tv_sec > 24 * 60 * 60 ? 921 24 * 60 * 60 * hz : tvtohz(&ttv); 922 } 923 s = splhigh(); 924 if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) { 925 splx(s); 926 goto retry; 927 } 928 p->p_flag &= ~P_SELECT; 929 error = tsleep(&selwait, PSOCK | PCATCH, "poll", timo); 930 splx(s); 931 if (error == 0) 932 goto retry; 933 934 done: 935 p->p_flag &= ~P_SELECT; 936 /* 937 * NOTE: poll(2) is not restarted after a signal and EWOULDBLOCK is 938 * ignored (since the whole point is to see what would block). 939 */ 940 switch (error) { 941 case ERESTART: 942 error = copyout(pl, SCARG(uap, fds), sz); 943 if (error == 0) 944 error = EINTR; 945 break; 946 case EWOULDBLOCK: 947 case 0: 948 error = copyout(pl, SCARG(uap, fds), sz); 949 break; 950 } 951 bad: 952 if (pl != pfds) 953 free(pl, M_TEMP); 954 return (error); 955 } 956