1 /* $OpenBSD: sys_generic.c,v 1.60 2009/03/24 13:49:38 kurt Exp $ */ 2 /* $NetBSD: sys_generic.c,v 1.24 1996/03/29 00:25:32 cgd Exp $ */ 3 4 /* 5 * Copyright (c) 1996 Theo de Raadt 6 * Copyright (c) 1982, 1986, 1989, 1993 7 * The Regents of the University of California. All rights reserved. 8 * (c) UNIX System Laboratories, Inc. 9 * All or some portions of this file are derived from material licensed 10 * to the University of California by American Telephone and Telegraph 11 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 12 * the permission of UNIX System Laboratories, Inc. 13 * 14 * Redistribution and use in source and binary forms, with or without 15 * modification, are permitted provided that the following conditions 16 * are met: 17 * 1. Redistributions of source code must retain the above copyright 18 * notice, this list of conditions and the following disclaimer. 19 * 2. Redistributions in binary form must reproduce the above copyright 20 * notice, this list of conditions and the following disclaimer in the 21 * documentation and/or other materials provided with the distribution. 22 * 3. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)sys_generic.c 8.5 (Berkeley) 1/21/94 39 */ 40 41 #include <sys/param.h> 42 #include <sys/systm.h> 43 #include <sys/filedesc.h> 44 #include <sys/ioctl.h> 45 #include <sys/file.h> 46 #include <sys/proc.h> 47 #include <sys/resourcevar.h> 48 #include <sys/socketvar.h> 49 #include <sys/signalvar.h> 50 #include <sys/uio.h> 51 #include <sys/kernel.h> 52 #include <sys/stat.h> 53 #include <sys/malloc.h> 54 #include <sys/poll.h> 55 #ifdef KTRACE 56 #include <sys/ktrace.h> 57 #endif 58 #include <sys/sched.h> 59 60 #include <sys/mount.h> 61 #include <sys/syscallargs.h> 62 63 #include <uvm/uvm_extern.h> 64 65 int selscan(struct proc *, fd_set *, fd_set *, int, int, register_t *); 66 int seltrue(dev_t, int, struct proc *); 67 void pollscan(struct proc *, struct pollfd *, u_int, register_t *); 68 int pollout(struct pollfd *, struct pollfd *, u_int); 69 70 /* 71 * Read system call. 72 */ 73 /* ARGSUSED */ 74 int 75 sys_read(struct proc *p, void *v, register_t *retval) 76 { 77 struct sys_read_args /* { 78 syscallarg(int) fd; 79 syscallarg(void *) buf; 80 syscallarg(size_t) nbyte; 81 } */ *uap = v; 82 int fd = SCARG(uap, fd); 83 struct file *fp; 84 struct filedesc *fdp = p->p_fd; 85 86 if ((fp = fd_getfile(fdp, fd)) == NULL) 87 return (EBADF); 88 if ((fp->f_flag & FREAD) == 0) 89 return (EBADF); 90 91 FREF(fp); 92 93 /* dofileread() will FRELE the descriptor for us */ 94 return (dofileread(p, fd, fp, SCARG(uap, buf), SCARG(uap, nbyte), 95 &fp->f_offset, retval)); 96 } 97 98 int 99 dofileread(struct proc *p, int fd, struct file *fp, void *buf, size_t nbyte, 100 off_t *offset, register_t *retval) 101 { 102 struct uio auio; 103 struct iovec aiov; 104 long cnt, error = 0; 105 #ifdef KTRACE 106 struct iovec ktriov; 107 #endif 108 109 aiov.iov_base = buf; 110 aiov.iov_len = nbyte; 111 auio.uio_iov = &aiov; 112 auio.uio_iovcnt = 1; 113 auio.uio_resid = nbyte; 114 auio.uio_rw = UIO_READ; 115 auio.uio_segflg = UIO_USERSPACE; 116 auio.uio_procp = p; 117 118 /* 119 * Reads return ssize_t because -1 is returned on error. Therefore 120 * we must restrict the length to SSIZE_MAX to avoid garbage return 121 * values. 122 */ 123 if (auio.uio_resid > SSIZE_MAX) { 124 error = EINVAL; 125 goto out; 126 } 127 128 #ifdef KTRACE 129 /* 130 * if tracing, save a copy of iovec 131 */ 132 if (KTRPOINT(p, KTR_GENIO)) 133 ktriov = aiov; 134 #endif 135 cnt = auio.uio_resid; 136 error = (*fp->f_ops->fo_read)(fp, offset, &auio, fp->f_cred); 137 if (error) 138 if (auio.uio_resid != cnt && (error == ERESTART || 139 error == EINTR || error == EWOULDBLOCK)) 140 error = 0; 141 cnt -= auio.uio_resid; 142 143 fp->f_rxfer++; 144 fp->f_rbytes += cnt; 145 #ifdef KTRACE 146 if (KTRPOINT(p, KTR_GENIO) && error == 0) 147 ktrgenio(p, fd, UIO_READ, &ktriov, cnt, error); 148 #endif 149 *retval = cnt; 150 out: 151 FRELE(fp); 152 return (error); 153 } 154 155 /* 156 * Scatter read system call. 157 */ 158 int 159 sys_readv(struct proc *p, void *v, register_t *retval) 160 { 161 struct sys_readv_args /* { 162 syscallarg(int) fd; 163 syscallarg(const struct iovec *) iovp; 164 syscallarg(int) iovcnt; 165 } */ *uap = v; 166 int fd = SCARG(uap, fd); 167 struct file *fp; 168 struct filedesc *fdp = p->p_fd; 169 170 if ((fp = fd_getfile(fdp, fd)) == NULL) 171 return (EBADF); 172 if ((fp->f_flag & FREAD) == 0) 173 return (EBADF); 174 175 FREF(fp); 176 177 /* dofilereadv() will FRELE the descriptor for us */ 178 return (dofilereadv(p, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt), 179 &fp->f_offset, retval)); 180 } 181 182 int 183 dofilereadv(struct proc *p, int fd, struct file *fp, const struct iovec *iovp, 184 int iovcnt, off_t *offset, register_t *retval) 185 { 186 struct uio auio; 187 struct iovec *iov; 188 struct iovec *needfree; 189 struct iovec aiov[UIO_SMALLIOV]; 190 long i, cnt, error = 0; 191 u_int iovlen; 192 #ifdef KTRACE 193 struct iovec *ktriov = NULL; 194 #endif 195 196 /* note: can't use iovlen until iovcnt is validated */ 197 iovlen = iovcnt * sizeof(struct iovec); 198 if ((u_int)iovcnt > UIO_SMALLIOV) { 199 if ((u_int)iovcnt > IOV_MAX) { 200 error = EINVAL; 201 goto out; 202 } 203 iov = needfree = malloc(iovlen, M_IOV, M_WAITOK); 204 } else if ((u_int)iovcnt > 0) { 205 iov = aiov; 206 needfree = NULL; 207 } else { 208 error = EINVAL; 209 goto out; 210 } 211 212 auio.uio_iov = iov; 213 auio.uio_iovcnt = iovcnt; 214 auio.uio_rw = UIO_READ; 215 auio.uio_segflg = UIO_USERSPACE; 216 auio.uio_procp = p; 217 error = copyin(iovp, iov, iovlen); 218 if (error) 219 goto done; 220 auio.uio_resid = 0; 221 for (i = 0; i < iovcnt; i++) { 222 auio.uio_resid += iov->iov_len; 223 /* 224 * Reads return ssize_t because -1 is returned on error. 225 * Therefore we must restrict the length to SSIZE_MAX to 226 * avoid garbage return values. 227 */ 228 if (iov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) { 229 error = EINVAL; 230 goto done; 231 } 232 iov++; 233 } 234 #ifdef KTRACE 235 /* 236 * if tracing, save a copy of iovec 237 */ 238 if (KTRPOINT(p, KTR_GENIO)) { 239 ktriov = malloc(iovlen, M_TEMP, M_WAITOK); 240 bcopy(auio.uio_iov, ktriov, iovlen); 241 } 242 #endif 243 cnt = auio.uio_resid; 244 error = (*fp->f_ops->fo_read)(fp, offset, &auio, fp->f_cred); 245 if (error) 246 if (auio.uio_resid != cnt && (error == ERESTART || 247 error == EINTR || error == EWOULDBLOCK)) 248 error = 0; 249 cnt -= auio.uio_resid; 250 251 fp->f_rxfer++; 252 fp->f_rbytes += cnt; 253 #ifdef KTRACE 254 if (ktriov != NULL) { 255 if (error == 0) 256 ktrgenio(p, fd, UIO_READ, ktriov, cnt, 257 error); 258 free(ktriov, M_TEMP); 259 } 260 #endif 261 *retval = cnt; 262 done: 263 if (needfree) 264 free(needfree, M_IOV); 265 out: 266 FRELE(fp); 267 return (error); 268 } 269 270 /* 271 * Write system call 272 */ 273 int 274 sys_write(struct proc *p, void *v, register_t *retval) 275 { 276 struct sys_write_args /* { 277 syscallarg(int) fd; 278 syscallarg(const void *) buf; 279 syscallarg(size_t) nbyte; 280 } */ *uap = v; 281 int fd = SCARG(uap, fd); 282 struct file *fp; 283 struct filedesc *fdp = p->p_fd; 284 285 if ((fp = fd_getfile(fdp, fd)) == NULL) 286 return (EBADF); 287 if ((fp->f_flag & FWRITE) == 0) 288 return (EBADF); 289 290 FREF(fp); 291 292 /* dofilewrite() will FRELE the descriptor for us */ 293 return (dofilewrite(p, fd, fp, SCARG(uap, buf), SCARG(uap, nbyte), 294 &fp->f_offset, retval)); 295 } 296 297 int 298 dofilewrite(struct proc *p, int fd, struct file *fp, const void *buf, 299 size_t nbyte, off_t *offset, register_t *retval) 300 { 301 struct uio auio; 302 struct iovec aiov; 303 long cnt, error = 0; 304 #ifdef KTRACE 305 struct iovec ktriov; 306 #endif 307 308 aiov.iov_base = (void *)buf; /* XXX kills const */ 309 aiov.iov_len = nbyte; 310 auio.uio_iov = &aiov; 311 auio.uio_iovcnt = 1; 312 auio.uio_resid = nbyte; 313 auio.uio_rw = UIO_WRITE; 314 auio.uio_segflg = UIO_USERSPACE; 315 auio.uio_procp = p; 316 317 /* 318 * Writes return ssize_t because -1 is returned on error. Therefore 319 * we must restrict the length to SSIZE_MAX to avoid garbage return 320 * values. 321 */ 322 if (auio.uio_resid > SSIZE_MAX) { 323 error = EINVAL; 324 goto out; 325 } 326 327 #ifdef KTRACE 328 /* 329 * if tracing, save a copy of iovec 330 */ 331 if (KTRPOINT(p, KTR_GENIO)) 332 ktriov = aiov; 333 #endif 334 cnt = auio.uio_resid; 335 error = (*fp->f_ops->fo_write)(fp, offset, &auio, fp->f_cred); 336 if (error) { 337 if (auio.uio_resid != cnt && (error == ERESTART || 338 error == EINTR || error == EWOULDBLOCK)) 339 error = 0; 340 if (error == EPIPE) 341 ptsignal(p, SIGPIPE, STHREAD); 342 } 343 cnt -= auio.uio_resid; 344 345 fp->f_wxfer++; 346 fp->f_wbytes += cnt; 347 #ifdef KTRACE 348 if (KTRPOINT(p, KTR_GENIO) && error == 0) 349 ktrgenio(p, fd, UIO_WRITE, &ktriov, cnt, error); 350 #endif 351 *retval = cnt; 352 out: 353 FRELE(fp); 354 return (error); 355 } 356 357 /* 358 * Gather write system call 359 */ 360 int 361 sys_writev(struct proc *p, void *v, register_t *retval) 362 { 363 struct sys_writev_args /* { 364 syscallarg(int) fd; 365 syscallarg(const struct iovec *) iovp; 366 syscallarg(int) iovcnt; 367 } */ *uap = v; 368 int fd = SCARG(uap, fd); 369 struct file *fp; 370 struct filedesc *fdp = p->p_fd; 371 372 if ((fp = fd_getfile(fdp, fd)) == NULL) 373 return (EBADF); 374 if ((fp->f_flag & FWRITE) == 0) 375 return (EBADF); 376 377 FREF(fp); 378 379 /* dofilewritev() will FRELE the descriptor for us */ 380 return (dofilewritev(p, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt), 381 &fp->f_offset, retval)); 382 } 383 384 int 385 dofilewritev(struct proc *p, int fd, struct file *fp, const struct iovec *iovp, 386 int iovcnt, off_t *offset, register_t *retval) 387 { 388 struct uio auio; 389 struct iovec *iov; 390 struct iovec *needfree; 391 struct iovec aiov[UIO_SMALLIOV]; 392 long i, cnt, error = 0; 393 u_int iovlen; 394 #ifdef KTRACE 395 struct iovec *ktriov = NULL; 396 #endif 397 398 /* note: can't use iovlen until iovcnt is validated */ 399 iovlen = iovcnt * sizeof(struct iovec); 400 if ((u_int)iovcnt > UIO_SMALLIOV) { 401 if ((u_int)iovcnt > IOV_MAX) { 402 error = EINVAL; 403 goto out; 404 } 405 iov = needfree = malloc(iovlen, M_IOV, M_WAITOK); 406 } else if ((u_int)iovcnt > 0) { 407 iov = aiov; 408 needfree = NULL; 409 } else { 410 error = EINVAL; 411 goto out; 412 } 413 414 auio.uio_iov = iov; 415 auio.uio_iovcnt = iovcnt; 416 auio.uio_rw = UIO_WRITE; 417 auio.uio_segflg = UIO_USERSPACE; 418 auio.uio_procp = p; 419 error = copyin(iovp, iov, iovlen); 420 if (error) 421 goto done; 422 auio.uio_resid = 0; 423 for (i = 0; i < iovcnt; i++) { 424 auio.uio_resid += iov->iov_len; 425 /* 426 * Writes return ssize_t because -1 is returned on error. 427 * Therefore we must restrict the length to SSIZE_MAX to 428 * avoid garbage return values. 429 */ 430 if (iov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) { 431 error = EINVAL; 432 goto done; 433 } 434 iov++; 435 } 436 #ifdef KTRACE 437 /* 438 * if tracing, save a copy of iovec 439 */ 440 if (KTRPOINT(p, KTR_GENIO)) { 441 ktriov = malloc(iovlen, M_TEMP, M_WAITOK); 442 bcopy(auio.uio_iov, ktriov, iovlen); 443 } 444 #endif 445 cnt = auio.uio_resid; 446 error = (*fp->f_ops->fo_write)(fp, offset, &auio, fp->f_cred); 447 if (error) { 448 if (auio.uio_resid != cnt && (error == ERESTART || 449 error == EINTR || error == EWOULDBLOCK)) 450 error = 0; 451 if (error == EPIPE) 452 ptsignal(p, SIGPIPE, STHREAD); 453 } 454 cnt -= auio.uio_resid; 455 456 fp->f_wxfer++; 457 fp->f_wbytes += cnt; 458 #ifdef KTRACE 459 if (ktriov != NULL) { 460 if (error == 0) 461 ktrgenio(p, fd, UIO_WRITE, ktriov, cnt, error); 462 free(ktriov, M_TEMP); 463 } 464 #endif 465 *retval = cnt; 466 done: 467 if (needfree) 468 free(needfree, M_IOV); 469 out: 470 FRELE(fp); 471 return (error); 472 } 473 474 /* 475 * Ioctl system call 476 */ 477 /* ARGSUSED */ 478 int 479 sys_ioctl(struct proc *p, void *v, register_t *retval) 480 { 481 struct sys_ioctl_args /* { 482 syscallarg(int) fd; 483 syscallarg(u_long) com; 484 syscallarg(void *) data; 485 } */ *uap = v; 486 struct file *fp; 487 struct filedesc *fdp; 488 u_long com; 489 int error; 490 u_int size; 491 caddr_t data, memp; 492 int tmp; 493 #define STK_PARAMS 128 494 char stkbuf[STK_PARAMS]; 495 496 fdp = p->p_fd; 497 if ((fp = fd_getfile(fdp, SCARG(uap, fd))) == NULL) 498 return (EBADF); 499 500 if ((fp->f_flag & (FREAD | FWRITE)) == 0) 501 return (EBADF); 502 503 switch (com = SCARG(uap, com)) { 504 case FIONCLEX: 505 fdp->fd_ofileflags[SCARG(uap, fd)] &= ~UF_EXCLOSE; 506 return (0); 507 case FIOCLEX: 508 fdp->fd_ofileflags[SCARG(uap, fd)] |= UF_EXCLOSE; 509 return (0); 510 } 511 512 /* 513 * Interpret high order word to find amount of data to be 514 * copied to/from the user's address space. 515 */ 516 size = IOCPARM_LEN(com); 517 if (size > IOCPARM_MAX) 518 return (ENOTTY); 519 FREF(fp); 520 memp = NULL; 521 if (size > sizeof (stkbuf)) { 522 memp = (caddr_t)malloc((u_long)size, M_IOCTLOPS, M_WAITOK); 523 data = memp; 524 } else 525 data = stkbuf; 526 if (com&IOC_IN) { 527 if (size) { 528 error = copyin(SCARG(uap, data), data, (u_int)size); 529 if (error) { 530 goto out; 531 } 532 } else 533 *(caddr_t *)data = SCARG(uap, data); 534 } else if ((com&IOC_OUT) && size) 535 /* 536 * Zero the buffer so the user always 537 * gets back something deterministic. 538 */ 539 bzero(data, size); 540 else if (com&IOC_VOID) 541 *(caddr_t *)data = SCARG(uap, data); 542 543 switch (com) { 544 545 case FIONBIO: 546 if ((tmp = *(int *)data) != 0) 547 fp->f_flag |= FNONBLOCK; 548 else 549 fp->f_flag &= ~FNONBLOCK; 550 error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&tmp, p); 551 break; 552 553 case FIOASYNC: 554 if ((tmp = *(int *)data) != 0) 555 fp->f_flag |= FASYNC; 556 else 557 fp->f_flag &= ~FASYNC; 558 error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, (caddr_t)&tmp, p); 559 break; 560 561 case FIOSETOWN: 562 tmp = *(int *)data; 563 if (fp->f_type == DTYPE_SOCKET) { 564 struct socket *so = (struct socket *)fp->f_data; 565 566 so->so_pgid = tmp; 567 so->so_siguid = p->p_cred->p_ruid; 568 so->so_sigeuid = p->p_ucred->cr_uid; 569 error = 0; 570 break; 571 } 572 if (tmp <= 0) { 573 tmp = -tmp; 574 } else { 575 struct proc *p1 = pfind(tmp); 576 if (p1 == 0) { 577 error = ESRCH; 578 break; 579 } 580 tmp = p1->p_pgrp->pg_id; 581 } 582 error = (*fp->f_ops->fo_ioctl) 583 (fp, TIOCSPGRP, (caddr_t)&tmp, p); 584 break; 585 586 case FIOGETOWN: 587 if (fp->f_type == DTYPE_SOCKET) { 588 error = 0; 589 *(int *)data = ((struct socket *)fp->f_data)->so_pgid; 590 break; 591 } 592 error = (*fp->f_ops->fo_ioctl)(fp, TIOCGPGRP, data, p); 593 *(int *)data = -*(int *)data; 594 break; 595 596 default: 597 error = (*fp->f_ops->fo_ioctl)(fp, com, data, p); 598 break; 599 } 600 /* 601 * Copy any data to user, size was 602 * already set and checked above. 603 */ 604 if (error == 0 && (com&IOC_OUT) && size) 605 error = copyout(data, SCARG(uap, data), (u_int)size); 606 out: 607 FRELE(fp); 608 if (memp) 609 free(memp, M_IOCTLOPS); 610 return (error); 611 } 612 613 int selwait, nselcoll; 614 615 /* 616 * Select system call. 617 */ 618 int 619 sys_select(struct proc *p, void *v, register_t *retval) 620 { 621 struct sys_select_args /* { 622 syscallarg(int) nd; 623 syscallarg(fd_set *) in; 624 syscallarg(fd_set *) ou; 625 syscallarg(fd_set *) ex; 626 syscallarg(struct timeval *) tv; 627 } */ *uap = v; 628 fd_mask bits[6]; 629 fd_set *pibits[3], *pobits[3]; 630 struct timeval atv, rtv, ttv; 631 int s, ncoll, error = 0, timo; 632 u_int nd, ni; 633 634 nd = SCARG(uap, nd); 635 if (nd > p->p_fd->fd_nfiles) { 636 /* forgiving; slightly wrong */ 637 nd = p->p_fd->fd_nfiles; 638 } 639 ni = howmany(nd, NFDBITS) * sizeof(fd_mask); 640 if (nd > sizeof(bits[0])) { 641 caddr_t mbits; 642 643 mbits = malloc(ni * 6, M_TEMP, M_WAITOK|M_ZERO); 644 pibits[0] = (fd_set *)&mbits[ni * 0]; 645 pibits[1] = (fd_set *)&mbits[ni * 1]; 646 pibits[2] = (fd_set *)&mbits[ni * 2]; 647 pobits[0] = (fd_set *)&mbits[ni * 3]; 648 pobits[1] = (fd_set *)&mbits[ni * 4]; 649 pobits[2] = (fd_set *)&mbits[ni * 5]; 650 } else { 651 bzero(bits, sizeof(bits)); 652 pibits[0] = (fd_set *)&bits[0]; 653 pibits[1] = (fd_set *)&bits[1]; 654 pibits[2] = (fd_set *)&bits[2]; 655 pobits[0] = (fd_set *)&bits[3]; 656 pobits[1] = (fd_set *)&bits[4]; 657 pobits[2] = (fd_set *)&bits[5]; 658 } 659 660 #define getbits(name, x) \ 661 if (SCARG(uap, name) && (error = copyin(SCARG(uap, name), \ 662 pibits[x], ni))) \ 663 goto done; 664 getbits(in, 0); 665 getbits(ou, 1); 666 getbits(ex, 2); 667 #undef getbits 668 669 if (SCARG(uap, tv)) { 670 error = copyin(SCARG(uap, tv), &atv, sizeof (atv)); 671 if (error) 672 goto done; 673 if (itimerfix(&atv)) { 674 error = EINVAL; 675 goto done; 676 } 677 getmicrouptime(&rtv); 678 timeradd(&atv, &rtv, &atv); 679 } else { 680 atv.tv_sec = 0; 681 atv.tv_usec = 0; 682 } 683 timo = 0; 684 685 retry: 686 ncoll = nselcoll; 687 atomic_setbits_int(&p->p_flag, P_SELECT); 688 error = selscan(p, pibits[0], pobits[0], nd, ni, retval); 689 if (error || *retval) 690 goto done; 691 if (SCARG(uap, tv)) { 692 getmicrouptime(&rtv); 693 if (timercmp(&rtv, &atv, >=)) 694 goto done; 695 ttv = atv; 696 timersub(&ttv, &rtv, &ttv); 697 timo = ttv.tv_sec > 24 * 60 * 60 ? 698 24 * 60 * 60 * hz : tvtohz(&ttv); 699 } 700 s = splhigh(); 701 if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) { 702 splx(s); 703 goto retry; 704 } 705 atomic_clearbits_int(&p->p_flag, P_SELECT); 706 error = tsleep(&selwait, PSOCK | PCATCH, "select", timo); 707 splx(s); 708 if (error == 0) 709 goto retry; 710 done: 711 atomic_clearbits_int(&p->p_flag, P_SELECT); 712 /* select is not restarted after signals... */ 713 if (error == ERESTART) 714 error = EINTR; 715 if (error == EWOULDBLOCK) 716 error = 0; 717 #define putbits(name, x) \ 718 if (SCARG(uap, name) && (error2 = copyout(pobits[x], \ 719 SCARG(uap, name), ni))) \ 720 error = error2; 721 if (error == 0) { 722 int error2; 723 724 putbits(in, 0); 725 putbits(ou, 1); 726 putbits(ex, 2); 727 #undef putbits 728 } 729 730 if (pibits[0] != (fd_set *)&bits[0]) 731 free(pibits[0], M_TEMP); 732 return (error); 733 } 734 735 int 736 selscan(struct proc *p, fd_set *ibits, fd_set *obits, int nfd, int ni, 737 register_t *retval) 738 { 739 caddr_t cibits = (caddr_t)ibits, cobits = (caddr_t)obits; 740 struct filedesc *fdp = p->p_fd; 741 int msk, i, j, fd; 742 fd_mask bits; 743 struct file *fp; 744 int n = 0; 745 static const int flag[3] = { POLLIN, POLLOUT, POLLPRI }; 746 747 for (msk = 0; msk < 3; msk++) { 748 fd_set *pibits = (fd_set *)&cibits[msk*ni]; 749 fd_set *pobits = (fd_set *)&cobits[msk*ni]; 750 751 for (i = 0; i < nfd; i += NFDBITS) { 752 bits = pibits->fds_bits[i/NFDBITS]; 753 while ((j = ffs(bits)) && (fd = i + --j) < nfd) { 754 bits &= ~(1 << j); 755 if ((fp = fd_getfile(fdp, fd)) == NULL) 756 return (EBADF); 757 FREF(fp); 758 if ((*fp->f_ops->fo_poll)(fp, flag[msk], p)) { 759 FD_SET(fd, pobits); 760 n++; 761 } 762 FRELE(fp); 763 } 764 } 765 } 766 *retval = n; 767 return (0); 768 } 769 770 /*ARGSUSED*/ 771 int 772 seltrue(dev_t dev, int events, struct proc *p) 773 { 774 775 return (events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM)); 776 } 777 778 /* 779 * Record a select request. 780 */ 781 void 782 selrecord(struct proc *selector, struct selinfo *sip) 783 { 784 struct proc *p; 785 pid_t mypid; 786 787 mypid = selector->p_pid; 788 if (sip->si_selpid == mypid) 789 return; 790 if (sip->si_selpid && (p = pfind(sip->si_selpid)) && 791 p->p_wchan == (caddr_t)&selwait) 792 sip->si_flags |= SI_COLL; 793 else 794 sip->si_selpid = mypid; 795 } 796 797 /* 798 * Do a wakeup when a selectable event occurs. 799 */ 800 void 801 selwakeup(struct selinfo *sip) 802 { 803 struct proc *p; 804 int s; 805 806 if (sip->si_selpid == 0) 807 return; 808 if (sip->si_flags & SI_COLL) { 809 nselcoll++; 810 sip->si_flags &= ~SI_COLL; 811 wakeup(&selwait); 812 } 813 p = pfind(sip->si_selpid); 814 sip->si_selpid = 0; 815 if (p != NULL) { 816 SCHED_LOCK(s); 817 if (p->p_wchan == (caddr_t)&selwait) { 818 if (p->p_stat == SSLEEP) 819 setrunnable(p); 820 else 821 unsleep(p); 822 } else if (p->p_flag & P_SELECT) 823 atomic_clearbits_int(&p->p_flag, P_SELECT); 824 SCHED_UNLOCK(s); 825 } 826 } 827 828 void 829 pollscan(struct proc *p, struct pollfd *pl, u_int nfd, register_t *retval) 830 { 831 struct filedesc *fdp = p->p_fd; 832 struct file *fp; 833 u_int i; 834 int n = 0; 835 836 for (i = 0; i < nfd; i++, pl++) { 837 /* Check the file descriptor. */ 838 if (pl->fd < 0) { 839 pl->revents = 0; 840 continue; 841 } 842 if ((fp = fd_getfile(fdp, pl->fd)) == NULL) { 843 pl->revents = POLLNVAL; 844 n++; 845 continue; 846 } 847 FREF(fp); 848 pl->revents = (*fp->f_ops->fo_poll)(fp, pl->events, p); 849 FRELE(fp); 850 if (pl->revents != 0) 851 n++; 852 } 853 *retval = n; 854 } 855 856 /* 857 * Only copyout the revents field. 858 */ 859 int 860 pollout(struct pollfd *pl, struct pollfd *upl, u_int nfds) 861 { 862 int error = 0; 863 u_int i = 0; 864 865 while (!error && i++ < nfds) { 866 error = copyout(&pl->revents, &upl->revents, 867 sizeof(upl->revents)); 868 pl++; 869 upl++; 870 } 871 872 return (error); 873 } 874 875 /* 876 * We are using the same mechanism as select only we encode/decode args 877 * differently. 878 */ 879 int 880 sys_poll(struct proc *p, void *v, register_t *retval) 881 { 882 struct sys_poll_args /* { 883 syscallarg(struct pollfd *) fds; 884 syscallarg(u_int) nfds; 885 syscallarg(int) timeout; 886 } */ *uap = v; 887 size_t sz; 888 struct pollfd pfds[4], *pl = pfds; 889 int msec = SCARG(uap, timeout); 890 struct timeval atv, rtv, ttv; 891 int timo, ncoll, i, s, error; 892 extern int nselcoll, selwait; 893 u_int nfds = SCARG(uap, nfds); 894 895 /* Standards say no more than MAX_OPEN; this is possibly better. */ 896 if (nfds > min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfiles)) 897 return (EINVAL); 898 899 sz = sizeof(struct pollfd) * nfds; 900 901 /* optimize for the default case, of a small nfds value */ 902 if (sz > sizeof(pfds)) 903 pl = (struct pollfd *) malloc(sz, M_TEMP, M_WAITOK); 904 905 if ((error = copyin(SCARG(uap, fds), pl, sz)) != 0) 906 goto bad; 907 908 for (i = 0; i < nfds; i++) 909 pl[i].revents = 0; 910 911 if (msec != INFTIM) { 912 atv.tv_sec = msec / 1000; 913 atv.tv_usec = (msec - (atv.tv_sec * 1000)) * 1000; 914 915 if (itimerfix(&atv)) { 916 error = EINVAL; 917 goto done; 918 } 919 getmicrouptime(&rtv); 920 timeradd(&atv, &rtv, &atv); 921 } else { 922 atv.tv_sec = 0; 923 atv.tv_usec = 0; 924 } 925 timo = 0; 926 927 retry: 928 ncoll = nselcoll; 929 atomic_setbits_int(&p->p_flag, P_SELECT); 930 pollscan(p, pl, nfds, retval); 931 if (*retval) 932 goto done; 933 if (msec != INFTIM) { 934 getmicrouptime(&rtv); 935 if (timercmp(&rtv, &atv, >=)) 936 goto done; 937 ttv = atv; 938 timersub(&ttv, &rtv, &ttv); 939 timo = ttv.tv_sec > 24 * 60 * 60 ? 940 24 * 60 * 60 * hz : tvtohz(&ttv); 941 } 942 s = splhigh(); 943 if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) { 944 splx(s); 945 goto retry; 946 } 947 atomic_clearbits_int(&p->p_flag, P_SELECT); 948 error = tsleep(&selwait, PSOCK | PCATCH, "poll", timo); 949 splx(s); 950 if (error == 0) 951 goto retry; 952 953 done: 954 atomic_clearbits_int(&p->p_flag, P_SELECT); 955 /* 956 * NOTE: poll(2) is not restarted after a signal and EWOULDBLOCK is 957 * ignored (since the whole point is to see what would block). 958 */ 959 switch (error) { 960 case ERESTART: 961 error = pollout(pl, SCARG(uap, fds), nfds); 962 if (error == 0) 963 error = EINTR; 964 break; 965 case EWOULDBLOCK: 966 case 0: 967 error = pollout(pl, SCARG(uap, fds), nfds); 968 break; 969 } 970 bad: 971 if (pl != pfds) 972 free(pl, M_TEMP); 973 return (error); 974 } 975