1 /* $OpenBSD: sys_generic.c,v 1.56 2007/03/24 16:01:22 art Exp $ */ 2 /* $NetBSD: sys_generic.c,v 1.24 1996/03/29 00:25:32 cgd Exp $ */ 3 4 /* 5 * Copyright (c) 1996 Theo de Raadt 6 * Copyright (c) 1982, 1986, 1989, 1993 7 * The Regents of the University of California. All rights reserved. 8 * (c) UNIX System Laboratories, Inc. 9 * All or some portions of this file are derived from material licensed 10 * to the University of California by American Telephone and Telegraph 11 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 12 * the permission of UNIX System Laboratories, Inc. 13 * 14 * Redistribution and use in source and binary forms, with or without 15 * modification, are permitted provided that the following conditions 16 * are met: 17 * 1. Redistributions of source code must retain the above copyright 18 * notice, this list of conditions and the following disclaimer. 19 * 2. Redistributions in binary form must reproduce the above copyright 20 * notice, this list of conditions and the following disclaimer in the 21 * documentation and/or other materials provided with the distribution. 22 * 3. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)sys_generic.c 8.5 (Berkeley) 1/21/94 39 */ 40 41 #include <sys/param.h> 42 #include <sys/systm.h> 43 #include <sys/filedesc.h> 44 #include <sys/ioctl.h> 45 #include <sys/file.h> 46 #include <sys/proc.h> 47 #include <sys/resourcevar.h> 48 #include <sys/socketvar.h> 49 #include <sys/signalvar.h> 50 #include <sys/uio.h> 51 #include <sys/kernel.h> 52 #include <sys/stat.h> 53 #include <sys/malloc.h> 54 #include <sys/poll.h> 55 #ifdef KTRACE 56 #include <sys/ktrace.h> 57 #endif 58 #include <sys/sched.h> 59 60 #include <sys/mount.h> 61 #include <sys/syscallargs.h> 62 63 #include <uvm/uvm_extern.h> 64 65 int selscan(struct proc *, fd_set *, fd_set *, int, int, register_t *); 66 int seltrue(dev_t, int, struct proc *); 67 void pollscan(struct proc *, struct pollfd *, u_int, register_t *); 68 69 void sel_clean_proclist(struct proc *); 70 71 /* 72 * Read system call. 73 */ 74 /* ARGSUSED */ 75 int 76 sys_read(struct proc *p, void *v, register_t *retval) 77 { 78 struct sys_read_args /* { 79 syscallarg(int) fd; 80 syscallarg(void *) buf; 81 syscallarg(size_t) nbyte; 82 } */ *uap = v; 83 int fd = SCARG(uap, fd); 84 struct file *fp; 85 struct filedesc *fdp = p->p_fd; 86 87 if ((fp = fd_getfile(fdp, fd)) == NULL) 88 return (EBADF); 89 if ((fp->f_flag & FREAD) == 0) 90 return (EBADF); 91 92 FREF(fp); 93 94 /* dofileread() will FRELE the descriptor for us */ 95 return (dofileread(p, fd, fp, SCARG(uap, buf), SCARG(uap, nbyte), 96 &fp->f_offset, retval)); 97 } 98 99 int 100 dofileread(struct proc *p, int fd, struct file *fp, void *buf, size_t nbyte, 101 off_t *offset, register_t *retval) 102 { 103 struct uio auio; 104 struct iovec aiov; 105 long cnt, error = 0; 106 #ifdef KTRACE 107 struct iovec ktriov; 108 #endif 109 110 aiov.iov_base = buf; 111 aiov.iov_len = nbyte; 112 auio.uio_iov = &aiov; 113 auio.uio_iovcnt = 1; 114 auio.uio_resid = nbyte; 115 auio.uio_rw = UIO_READ; 116 auio.uio_segflg = UIO_USERSPACE; 117 auio.uio_procp = p; 118 119 /* 120 * Reads return ssize_t because -1 is returned on error. Therefore 121 * we must restrict the length to SSIZE_MAX to avoid garbage return 122 * values. 123 */ 124 if (auio.uio_resid > SSIZE_MAX) { 125 error = EINVAL; 126 goto out; 127 } 128 129 #ifdef KTRACE 130 /* 131 * if tracing, save a copy of iovec 132 */ 133 if (KTRPOINT(p, KTR_GENIO)) 134 ktriov = aiov; 135 #endif 136 cnt = auio.uio_resid; 137 error = (*fp->f_ops->fo_read)(fp, offset, &auio, fp->f_cred); 138 if (error) 139 if (auio.uio_resid != cnt && (error == ERESTART || 140 error == EINTR || error == EWOULDBLOCK)) 141 error = 0; 142 cnt -= auio.uio_resid; 143 144 fp->f_rxfer++; 145 fp->f_rbytes += cnt; 146 #ifdef KTRACE 147 if (KTRPOINT(p, KTR_GENIO) && error == 0) 148 ktrgenio(p, fd, UIO_READ, &ktriov, cnt, error); 149 #endif 150 *retval = cnt; 151 out: 152 FRELE(fp); 153 return (error); 154 } 155 156 /* 157 * Scatter read system call. 158 */ 159 int 160 sys_readv(struct proc *p, void *v, register_t *retval) 161 { 162 struct sys_readv_args /* { 163 syscallarg(int) fd; 164 syscallarg(const struct iovec *) iovp; 165 syscallarg(int) iovcnt; 166 } */ *uap = v; 167 int fd = SCARG(uap, fd); 168 struct file *fp; 169 struct filedesc *fdp = p->p_fd; 170 171 if ((fp = fd_getfile(fdp, fd)) == NULL) 172 return (EBADF); 173 if ((fp->f_flag & FREAD) == 0) 174 return (EBADF); 175 176 FREF(fp); 177 178 /* dofilereadv() will FRELE the descriptor for us */ 179 return (dofilereadv(p, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt), 180 &fp->f_offset, retval)); 181 } 182 183 int 184 dofilereadv(struct proc *p, int fd, struct file *fp, const struct iovec *iovp, 185 int iovcnt, off_t *offset, register_t *retval) 186 { 187 struct uio auio; 188 struct iovec *iov; 189 struct iovec *needfree; 190 struct iovec aiov[UIO_SMALLIOV]; 191 long i, cnt, error = 0; 192 u_int iovlen; 193 #ifdef KTRACE 194 struct iovec *ktriov = NULL; 195 #endif 196 197 /* note: can't use iovlen until iovcnt is validated */ 198 iovlen = iovcnt * sizeof(struct iovec); 199 if ((u_int)iovcnt > UIO_SMALLIOV) { 200 if ((u_int)iovcnt > IOV_MAX) { 201 error = EINVAL; 202 goto out; 203 } 204 iov = needfree = malloc(iovlen, M_IOV, M_WAITOK); 205 } else if ((u_int)iovcnt > 0) { 206 iov = aiov; 207 needfree = NULL; 208 } else { 209 error = EINVAL; 210 goto out; 211 } 212 213 auio.uio_iov = iov; 214 auio.uio_iovcnt = iovcnt; 215 auio.uio_rw = UIO_READ; 216 auio.uio_segflg = UIO_USERSPACE; 217 auio.uio_procp = p; 218 error = copyin(iovp, iov, iovlen); 219 if (error) 220 goto done; 221 auio.uio_resid = 0; 222 for (i = 0; i < iovcnt; i++) { 223 auio.uio_resid += iov->iov_len; 224 /* 225 * Reads return ssize_t because -1 is returned on error. 226 * Therefore we must restrict the length to SSIZE_MAX to 227 * avoid garbage return values. 228 */ 229 if (iov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) { 230 error = EINVAL; 231 goto done; 232 } 233 iov++; 234 } 235 #ifdef KTRACE 236 /* 237 * if tracing, save a copy of iovec 238 */ 239 if (KTRPOINT(p, KTR_GENIO)) { 240 ktriov = malloc(iovlen, M_TEMP, M_WAITOK); 241 bcopy(auio.uio_iov, ktriov, iovlen); 242 } 243 #endif 244 cnt = auio.uio_resid; 245 error = (*fp->f_ops->fo_read)(fp, offset, &auio, fp->f_cred); 246 if (error) 247 if (auio.uio_resid != cnt && (error == ERESTART || 248 error == EINTR || error == EWOULDBLOCK)) 249 error = 0; 250 cnt -= auio.uio_resid; 251 252 fp->f_rxfer++; 253 fp->f_rbytes += cnt; 254 #ifdef KTRACE 255 if (ktriov != NULL) { 256 if (error == 0) 257 ktrgenio(p, fd, UIO_READ, ktriov, cnt, 258 error); 259 free(ktriov, M_TEMP); 260 } 261 #endif 262 *retval = cnt; 263 done: 264 if (needfree) 265 free(needfree, M_IOV); 266 out: 267 FRELE(fp); 268 return (error); 269 } 270 271 /* 272 * Write system call 273 */ 274 int 275 sys_write(struct proc *p, void *v, register_t *retval) 276 { 277 struct sys_write_args /* { 278 syscallarg(int) fd; 279 syscallarg(const void *) buf; 280 syscallarg(size_t) nbyte; 281 } */ *uap = v; 282 int fd = SCARG(uap, fd); 283 struct file *fp; 284 struct filedesc *fdp = p->p_fd; 285 286 if ((fp = fd_getfile(fdp, fd)) == NULL) 287 return (EBADF); 288 if ((fp->f_flag & FWRITE) == 0) 289 return (EBADF); 290 291 FREF(fp); 292 293 /* dofilewrite() will FRELE the descriptor for us */ 294 return (dofilewrite(p, fd, fp, SCARG(uap, buf), SCARG(uap, nbyte), 295 &fp->f_offset, retval)); 296 } 297 298 int 299 dofilewrite(struct proc *p, int fd, struct file *fp, const void *buf, 300 size_t nbyte, off_t *offset, register_t *retval) 301 { 302 struct uio auio; 303 struct iovec aiov; 304 long cnt, error = 0; 305 #ifdef KTRACE 306 struct iovec ktriov; 307 #endif 308 309 aiov.iov_base = (void *)buf; /* XXX kills const */ 310 aiov.iov_len = nbyte; 311 auio.uio_iov = &aiov; 312 auio.uio_iovcnt = 1; 313 auio.uio_resid = nbyte; 314 auio.uio_rw = UIO_WRITE; 315 auio.uio_segflg = UIO_USERSPACE; 316 auio.uio_procp = p; 317 318 /* 319 * Writes return ssize_t because -1 is returned on error. Therefore 320 * we must restrict the length to SSIZE_MAX to avoid garbage return 321 * values. 322 */ 323 if (auio.uio_resid > SSIZE_MAX) { 324 error = EINVAL; 325 goto out; 326 } 327 328 #ifdef KTRACE 329 /* 330 * if tracing, save a copy of iovec 331 */ 332 if (KTRPOINT(p, KTR_GENIO)) 333 ktriov = aiov; 334 #endif 335 cnt = auio.uio_resid; 336 error = (*fp->f_ops->fo_write)(fp, offset, &auio, fp->f_cred); 337 if (error) { 338 if (auio.uio_resid != cnt && (error == ERESTART || 339 error == EINTR || error == EWOULDBLOCK)) 340 error = 0; 341 if (error == EPIPE) 342 psignal(p, SIGPIPE); 343 } 344 cnt -= auio.uio_resid; 345 346 fp->f_wxfer++; 347 fp->f_wbytes += cnt; 348 #ifdef KTRACE 349 if (KTRPOINT(p, KTR_GENIO) && error == 0) 350 ktrgenio(p, fd, UIO_WRITE, &ktriov, cnt, error); 351 #endif 352 *retval = cnt; 353 out: 354 FRELE(fp); 355 return (error); 356 } 357 358 /* 359 * Gather write system call 360 */ 361 int 362 sys_writev(struct proc *p, void *v, register_t *retval) 363 { 364 struct sys_writev_args /* { 365 syscallarg(int) fd; 366 syscallarg(const struct iovec *) iovp; 367 syscallarg(int) iovcnt; 368 } */ *uap = v; 369 int fd = SCARG(uap, fd); 370 struct file *fp; 371 struct filedesc *fdp = p->p_fd; 372 373 if ((fp = fd_getfile(fdp, fd)) == NULL) 374 return (EBADF); 375 if ((fp->f_flag & FWRITE) == 0) 376 return (EBADF); 377 378 FREF(fp); 379 380 /* dofilewritev() will FRELE the descriptor for us */ 381 return (dofilewritev(p, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt), 382 &fp->f_offset, retval)); 383 } 384 385 int 386 dofilewritev(struct proc *p, int fd, struct file *fp, const struct iovec *iovp, 387 int iovcnt, off_t *offset, register_t *retval) 388 { 389 struct uio auio; 390 struct iovec *iov; 391 struct iovec *needfree; 392 struct iovec aiov[UIO_SMALLIOV]; 393 long i, cnt, error = 0; 394 u_int iovlen; 395 #ifdef KTRACE 396 struct iovec *ktriov = NULL; 397 #endif 398 399 /* note: can't use iovlen until iovcnt is validated */ 400 iovlen = iovcnt * sizeof(struct iovec); 401 if ((u_int)iovcnt > UIO_SMALLIOV) { 402 if ((u_int)iovcnt > IOV_MAX) { 403 error = EINVAL; 404 goto out; 405 } 406 iov = needfree = malloc(iovlen, M_IOV, M_WAITOK); 407 } else if ((u_int)iovcnt > 0) { 408 iov = aiov; 409 needfree = NULL; 410 } else { 411 error = EINVAL; 412 goto out; 413 } 414 415 auio.uio_iov = iov; 416 auio.uio_iovcnt = iovcnt; 417 auio.uio_rw = UIO_WRITE; 418 auio.uio_segflg = UIO_USERSPACE; 419 auio.uio_procp = p; 420 error = copyin(iovp, iov, iovlen); 421 if (error) 422 goto done; 423 auio.uio_resid = 0; 424 for (i = 0; i < iovcnt; i++) { 425 auio.uio_resid += iov->iov_len; 426 /* 427 * Writes return ssize_t because -1 is returned on error. 428 * Therefore we must restrict the length to SSIZE_MAX to 429 * avoid garbage return values. 430 */ 431 if (iov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) { 432 error = EINVAL; 433 goto done; 434 } 435 iov++; 436 } 437 #ifdef KTRACE 438 /* 439 * if tracing, save a copy of iovec 440 */ 441 if (KTRPOINT(p, KTR_GENIO)) { 442 ktriov = malloc(iovlen, M_TEMP, M_WAITOK); 443 bcopy(auio.uio_iov, ktriov, iovlen); 444 } 445 #endif 446 cnt = auio.uio_resid; 447 error = (*fp->f_ops->fo_write)(fp, offset, &auio, fp->f_cred); 448 if (error) { 449 if (auio.uio_resid != cnt && (error == ERESTART || 450 error == EINTR || error == EWOULDBLOCK)) 451 error = 0; 452 if (error == EPIPE) 453 psignal(p, SIGPIPE); 454 } 455 cnt -= auio.uio_resid; 456 457 fp->f_wxfer++; 458 fp->f_wbytes += cnt; 459 #ifdef KTRACE 460 if (ktriov != NULL) { 461 if (error == 0) 462 ktrgenio(p, fd, UIO_WRITE, ktriov, cnt, error); 463 free(ktriov, M_TEMP); 464 } 465 #endif 466 *retval = cnt; 467 done: 468 if (needfree) 469 free(needfree, M_IOV); 470 out: 471 FRELE(fp); 472 return (error); 473 } 474 475 /* 476 * Ioctl system call 477 */ 478 /* ARGSUSED */ 479 int 480 sys_ioctl(struct proc *p, void *v, register_t *retval) 481 { 482 struct sys_ioctl_args /* { 483 syscallarg(int) fd; 484 syscallarg(u_long) com; 485 syscallarg(void *) data; 486 } */ *uap = v; 487 struct file *fp; 488 struct filedesc *fdp; 489 u_long com; 490 int error; 491 u_int size; 492 caddr_t data, memp; 493 int tmp; 494 #define STK_PARAMS 128 495 char stkbuf[STK_PARAMS]; 496 497 fdp = p->p_fd; 498 if ((fp = fd_getfile(fdp, SCARG(uap, fd))) == NULL) 499 return (EBADF); 500 501 if ((fp->f_flag & (FREAD | FWRITE)) == 0) 502 return (EBADF); 503 504 switch (com = SCARG(uap, com)) { 505 case FIONCLEX: 506 fdp->fd_ofileflags[SCARG(uap, fd)] &= ~UF_EXCLOSE; 507 return (0); 508 case FIOCLEX: 509 fdp->fd_ofileflags[SCARG(uap, fd)] |= UF_EXCLOSE; 510 return (0); 511 } 512 513 /* 514 * Interpret high order word to find amount of data to be 515 * copied to/from the user's address space. 516 */ 517 size = IOCPARM_LEN(com); 518 if (size > IOCPARM_MAX) 519 return (ENOTTY); 520 FREF(fp); 521 memp = NULL; 522 if (size > sizeof (stkbuf)) { 523 memp = (caddr_t)malloc((u_long)size, M_IOCTLOPS, M_WAITOK); 524 data = memp; 525 } else 526 data = stkbuf; 527 if (com&IOC_IN) { 528 if (size) { 529 error = copyin(SCARG(uap, data), data, (u_int)size); 530 if (error) { 531 goto out; 532 } 533 } else 534 *(caddr_t *)data = SCARG(uap, data); 535 } else if ((com&IOC_OUT) && size) 536 /* 537 * Zero the buffer so the user always 538 * gets back something deterministic. 539 */ 540 bzero(data, size); 541 else if (com&IOC_VOID) 542 *(caddr_t *)data = SCARG(uap, data); 543 544 switch (com) { 545 546 case FIONBIO: 547 if ((tmp = *(int *)data) != 0) 548 fp->f_flag |= FNONBLOCK; 549 else 550 fp->f_flag &= ~FNONBLOCK; 551 error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&tmp, p); 552 break; 553 554 case FIOASYNC: 555 if ((tmp = *(int *)data) != 0) 556 fp->f_flag |= FASYNC; 557 else 558 fp->f_flag &= ~FASYNC; 559 error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, (caddr_t)&tmp, p); 560 break; 561 562 case FIOSETOWN: 563 tmp = *(int *)data; 564 if (fp->f_type == DTYPE_SOCKET) { 565 struct socket *so = (struct socket *)fp->f_data; 566 567 so->so_pgid = tmp; 568 so->so_siguid = p->p_cred->p_ruid; 569 so->so_sigeuid = p->p_ucred->cr_uid; 570 error = 0; 571 break; 572 } 573 if (tmp <= 0) { 574 tmp = -tmp; 575 } else { 576 struct proc *p1 = pfind(tmp); 577 if (p1 == 0) { 578 error = ESRCH; 579 break; 580 } 581 tmp = p1->p_pgrp->pg_id; 582 } 583 error = (*fp->f_ops->fo_ioctl) 584 (fp, TIOCSPGRP, (caddr_t)&tmp, p); 585 break; 586 587 case FIOGETOWN: 588 if (fp->f_type == DTYPE_SOCKET) { 589 error = 0; 590 *(int *)data = ((struct socket *)fp->f_data)->so_pgid; 591 break; 592 } 593 error = (*fp->f_ops->fo_ioctl)(fp, TIOCGPGRP, data, p); 594 *(int *)data = -*(int *)data; 595 break; 596 597 default: 598 error = (*fp->f_ops->fo_ioctl)(fp, com, data, p); 599 break; 600 } 601 /* 602 * Copy any data to user, size was 603 * already set and checked above. 604 */ 605 if (error == 0 && (com&IOC_OUT) && size) 606 error = copyout(data, SCARG(uap, data), (u_int)size); 607 out: 608 FRELE(fp); 609 if (memp) 610 free(memp, M_IOCTLOPS); 611 return (error); 612 } 613 614 int selwait, nselcoll; 615 616 /* 617 * Select system call. 618 */ 619 int 620 sys_select(struct proc *p, void *v, register_t *retval) 621 { 622 struct sys_select_args /* { 623 syscallarg(int) nd; 624 syscallarg(fd_set *) in; 625 syscallarg(fd_set *) ou; 626 syscallarg(fd_set *) ex; 627 syscallarg(struct timeval *) tv; 628 } */ *uap = v; 629 fd_mask bits[6]; 630 fd_set *pibits[3], *pobits[3]; 631 struct timeval atv, rtv, ttv; 632 int s, ncoll, error = 0, timo; 633 u_int nd, ni; 634 635 nd = SCARG(uap, nd); 636 if (nd > p->p_fd->fd_nfiles) { 637 /* forgiving; slightly wrong */ 638 nd = p->p_fd->fd_nfiles; 639 } 640 ni = howmany(nd, NFDBITS) * sizeof(fd_mask); 641 if (nd > sizeof(bits[0])) { 642 caddr_t mbits; 643 644 mbits = malloc(ni * 6, M_TEMP, M_WAITOK); 645 bzero(mbits, ni * 6); 646 pibits[0] = (fd_set *)&mbits[ni * 0]; 647 pibits[1] = (fd_set *)&mbits[ni * 1]; 648 pibits[2] = (fd_set *)&mbits[ni * 2]; 649 pobits[0] = (fd_set *)&mbits[ni * 3]; 650 pobits[1] = (fd_set *)&mbits[ni * 4]; 651 pobits[2] = (fd_set *)&mbits[ni * 5]; 652 } else { 653 bzero(bits, sizeof(bits)); 654 pibits[0] = (fd_set *)&bits[0]; 655 pibits[1] = (fd_set *)&bits[1]; 656 pibits[2] = (fd_set *)&bits[2]; 657 pobits[0] = (fd_set *)&bits[3]; 658 pobits[1] = (fd_set *)&bits[4]; 659 pobits[2] = (fd_set *)&bits[5]; 660 } 661 662 #define getbits(name, x) \ 663 if (SCARG(uap, name) && (error = copyin(SCARG(uap, name), \ 664 pibits[x], ni))) \ 665 goto done; 666 getbits(in, 0); 667 getbits(ou, 1); 668 getbits(ex, 2); 669 #undef getbits 670 671 if (SCARG(uap, tv)) { 672 error = copyin(SCARG(uap, tv), &atv, sizeof (atv)); 673 if (error) 674 goto done; 675 if (itimerfix(&atv)) { 676 error = EINVAL; 677 goto done; 678 } 679 getmicrouptime(&rtv); 680 timeradd(&atv, &rtv, &atv); 681 } else { 682 atv.tv_sec = 0; 683 atv.tv_usec = 0; 684 } 685 timo = 0; 686 687 retry: 688 ncoll = nselcoll; 689 atomic_setbits_int(&p->p_flag, P_SELECT); 690 error = selscan(p, pibits[0], pobits[0], nd, ni, retval); 691 if (error || *retval) 692 goto done; 693 if (SCARG(uap, tv)) { 694 getmicrouptime(&rtv); 695 if (timercmp(&rtv, &atv, >=)) 696 goto done; 697 ttv = atv; 698 timersub(&ttv, &rtv, &ttv); 699 timo = ttv.tv_sec > 24 * 60 * 60 ? 700 24 * 60 * 60 * hz : tvtohz(&ttv); 701 } 702 s = splhigh(); 703 if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) { 704 splx(s); 705 goto retry; 706 } 707 atomic_clearbits_int(&p->p_flag, P_SELECT); 708 error = tsleep(&selwait, PSOCK | PCATCH, "select", timo); 709 splx(s); 710 if (error == 0) 711 goto retry; 712 done: 713 sel_clean_proclist(p); 714 atomic_clearbits_int(&p->p_flag, P_SELECT); 715 /* select is not restarted after signals... */ 716 if (error == ERESTART) 717 error = EINTR; 718 if (error == EWOULDBLOCK) 719 error = 0; 720 #define putbits(name, x) \ 721 if (SCARG(uap, name) && (error2 = copyout(pobits[x], \ 722 SCARG(uap, name), ni))) \ 723 error = error2; 724 if (error == 0) { 725 int error2; 726 727 putbits(in, 0); 728 putbits(ou, 1); 729 putbits(ex, 2); 730 #undef putbits 731 } 732 733 if (pibits[0] != (fd_set *)&bits[0]) 734 free(pibits[0], M_TEMP); 735 return (error); 736 } 737 738 int 739 selscan(struct proc *p, fd_set *ibits, fd_set *obits, int nfd, int ni, 740 register_t *retval) 741 { 742 caddr_t cibits = (caddr_t)ibits, cobits = (caddr_t)obits; 743 struct filedesc *fdp = p->p_fd; 744 int msk, i, j, fd; 745 fd_mask bits; 746 struct file *fp; 747 int n = 0; 748 static const int flag[3] = { POLLIN, POLLOUT, POLLPRI }; 749 750 for (msk = 0; msk < 3; msk++) { 751 fd_set *pibits = (fd_set *)&cibits[msk*ni]; 752 fd_set *pobits = (fd_set *)&cobits[msk*ni]; 753 754 for (i = 0; i < nfd; i += NFDBITS) { 755 bits = pibits->fds_bits[i/NFDBITS]; 756 while ((j = ffs(bits)) && (fd = i + --j) < nfd) { 757 bits &= ~(1 << j); 758 if ((fp = fd_getfile(fdp, fd)) == NULL) 759 return (EBADF); 760 FREF(fp); 761 if ((*fp->f_ops->fo_poll)(fp, flag[msk], p)) { 762 FD_SET(fd, pobits); 763 n++; 764 } 765 FRELE(fp); 766 } 767 } 768 } 769 *retval = n; 770 return (0); 771 } 772 773 /*ARGSUSED*/ 774 int 775 seltrue(dev_t dev, int events, struct proc *p) 776 { 777 778 return (events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM)); 779 } 780 781 /* 782 * Record a select request. 783 */ 784 void 785 selrecord(struct proc *selector, struct selinfo *sip) 786 { 787 if (sip->si_selproc == NULL) { 788 sip->si_selproc = selector; 789 TAILQ_INSERT_TAIL(&selector->p_selects, sip, si_list); 790 } else if (sip->si_selproc != selector) { 791 sip->si_flags |= SI_COLL; 792 } 793 } 794 795 /* 796 * Do a wakeup when a selectable event occurs. 797 */ 798 void 799 selwakeup(struct selinfo *sip) 800 { 801 struct proc *p; 802 int s; 803 804 if (sip->si_flags & SI_COLL) { 805 nselcoll++; 806 sip->si_flags &= ~SI_COLL; 807 wakeup(&selwait); 808 } 809 810 /* 811 * We check the process once before locking. 812 * Then we wake the process and clean up its 813 * selects list. 814 */ 815 if (sip->si_selproc == NULL) 816 return; 817 818 SCHED_LOCK(s); 819 if ((p = sip->si_selproc) != NULL) { 820 if (p->p_wchan != NULL) { 821 if (p->p_stat == SSLEEP) 822 setrunnable(p); 823 else 824 unsleep(p); 825 } else { 826 atomic_clearbits_int(&p->p_flag, P_SELECT); 827 } 828 } 829 SCHED_UNLOCK(s); 830 } 831 832 void 833 sel_clean_proclist(struct proc *p) 834 { 835 struct selinfo *sip; 836 837 while ((sip = TAILQ_FIRST(&p->p_selects)) != NULL) { 838 sip->si_selproc = NULL; 839 TAILQ_REMOVE(&p->p_selects, sip, si_list); 840 } 841 } 842 843 void 844 pollscan(struct proc *p, struct pollfd *pl, u_int nfd, register_t *retval) 845 { 846 struct filedesc *fdp = p->p_fd; 847 struct file *fp; 848 u_int i; 849 int n = 0; 850 851 for (i = 0; i < nfd; i++, pl++) { 852 /* Check the file descriptor. */ 853 if (pl->fd < 0) { 854 pl->revents = 0; 855 continue; 856 } 857 if ((fp = fd_getfile(fdp, pl->fd)) == NULL) { 858 pl->revents = POLLNVAL; 859 n++; 860 continue; 861 } 862 FREF(fp); 863 pl->revents = (*fp->f_ops->fo_poll)(fp, pl->events, p); 864 FRELE(fp); 865 if (pl->revents != 0) 866 n++; 867 } 868 *retval = n; 869 } 870 871 /* 872 * We are using the same mechanism as select only we encode/decode args 873 * differently. 874 */ 875 int 876 sys_poll(struct proc *p, void *v, register_t *retval) 877 { 878 struct sys_poll_args /* { 879 syscallarg(struct pollfd *) fds; 880 syscallarg(u_int) nfds; 881 syscallarg(int) timeout; 882 } */ *uap = v; 883 size_t sz; 884 struct pollfd pfds[4], *pl = pfds; 885 int msec = SCARG(uap, timeout); 886 struct timeval atv, rtv, ttv; 887 int timo, ncoll, i, s, error; 888 extern int nselcoll, selwait; 889 u_int nfds = SCARG(uap, nfds); 890 891 /* Standards say no more than MAX_OPEN; this is possibly better. */ 892 if (nfds > min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfiles)) 893 return (EINVAL); 894 895 sz = sizeof(struct pollfd) * nfds; 896 897 /* optimize for the default case, of a small nfds value */ 898 if (sz > sizeof(pfds)) 899 pl = (struct pollfd *) malloc(sz, M_TEMP, M_WAITOK); 900 901 if ((error = copyin(SCARG(uap, fds), pl, sz)) != 0) 902 goto bad; 903 904 for (i = 0; i < nfds; i++) 905 pl[i].revents = 0; 906 907 if (msec != INFTIM) { 908 atv.tv_sec = msec / 1000; 909 atv.tv_usec = (msec - (atv.tv_sec * 1000)) * 1000; 910 911 if (itimerfix(&atv)) { 912 error = EINVAL; 913 goto done; 914 } 915 getmicrouptime(&rtv); 916 timeradd(&atv, &rtv, &atv); 917 } else { 918 atv.tv_sec = 0; 919 atv.tv_usec = 0; 920 } 921 timo = 0; 922 923 retry: 924 ncoll = nselcoll; 925 atomic_setbits_int(&p->p_flag, P_SELECT); 926 pollscan(p, pl, nfds, retval); 927 if (*retval) 928 goto done; 929 if (msec != INFTIM) { 930 getmicrouptime(&rtv); 931 if (timercmp(&rtv, &atv, >=)) 932 goto done; 933 ttv = atv; 934 timersub(&ttv, &rtv, &ttv); 935 timo = ttv.tv_sec > 24 * 60 * 60 ? 936 24 * 60 * 60 * hz : tvtohz(&ttv); 937 } 938 s = splhigh(); 939 if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) { 940 splx(s); 941 goto retry; 942 } 943 atomic_clearbits_int(&p->p_flag, P_SELECT); 944 error = tsleep(&selwait, PSOCK | PCATCH, "poll", timo); 945 splx(s); 946 if (error == 0) 947 goto retry; 948 949 done: 950 sel_clean_proclist(p); 951 atomic_clearbits_int(&p->p_flag, P_SELECT); 952 /* 953 * NOTE: poll(2) is not restarted after a signal and EWOULDBLOCK is 954 * ignored (since the whole point is to see what would block). 955 */ 956 switch (error) { 957 case ERESTART: 958 error = copyout(pl, SCARG(uap, fds), sz); 959 if (error == 0) 960 error = EINTR; 961 break; 962 case EWOULDBLOCK: 963 case 0: 964 error = copyout(pl, SCARG(uap, fds), sz); 965 break; 966 } 967 bad: 968 if (pl != pfds) 969 free(pl, M_TEMP); 970 return (error); 971 } 972