1 /* $OpenBSD: sys_generic.c,v 1.41 2002/08/12 14:32:44 aaron Exp $ */ 2 /* $NetBSD: sys_generic.c,v 1.24 1996/03/29 00:25:32 cgd Exp $ */ 3 4 /* 5 * Copyright (c) 1996 Theo de Raadt 6 * Copyright (c) 1982, 1986, 1989, 1993 7 * The Regents of the University of California. All rights reserved. 8 * (c) UNIX System Laboratories, Inc. 9 * All or some portions of this file are derived from material licensed 10 * to the University of California by American Telephone and Telegraph 11 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 12 * the permission of UNIX System Laboratories, Inc. 13 * 14 * Redistribution and use in source and binary forms, with or without 15 * modification, are permitted provided that the following conditions 16 * are met: 17 * 1. Redistributions of source code must retain the above copyright 18 * notice, this list of conditions and the following disclaimer. 19 * 2. Redistributions in binary form must reproduce the above copyright 20 * notice, this list of conditions and the following disclaimer in the 21 * documentation and/or other materials provided with the distribution. 22 * 3. All advertising materials mentioning features or use of this software 23 * must display the following acknowledgement: 24 * This product includes software developed by the University of 25 * California, Berkeley and its contributors. 26 * 4. Neither the name of the University nor the names of its contributors 27 * may be used to endorse or promote products derived from this software 28 * without specific prior written permission. 29 * 30 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 31 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 32 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 33 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 34 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 35 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 36 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 37 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 38 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 39 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 40 * SUCH DAMAGE. 41 * 42 * @(#)sys_generic.c 8.5 (Berkeley) 1/21/94 43 */ 44 45 #include <sys/param.h> 46 #include <sys/systm.h> 47 #include <sys/filedesc.h> 48 #include <sys/ioctl.h> 49 #include <sys/file.h> 50 #include <sys/proc.h> 51 #include <sys/resourcevar.h> 52 #include <sys/socketvar.h> 53 #include <sys/signalvar.h> 54 #include <sys/uio.h> 55 #include <sys/kernel.h> 56 #include <sys/stat.h> 57 #include <sys/malloc.h> 58 #include <sys/poll.h> 59 #ifdef KTRACE 60 #include <sys/ktrace.h> 61 #endif 62 63 #include <sys/mount.h> 64 #include <sys/syscallargs.h> 65 66 #include <uvm/uvm_extern.h> 67 68 int selscan(struct proc *, fd_set *, fd_set *, int, register_t *); 69 int seltrue(dev_t, int, struct proc *); 70 void pollscan(struct proc *, struct pollfd *, int, register_t *); 71 72 /* 73 * Read system call. 74 */ 75 /* ARGSUSED */ 76 int 77 sys_read(p, v, retval) 78 struct proc *p; 79 void *v; 80 register_t *retval; 81 { 82 struct sys_read_args /* { 83 syscallarg(int) fd; 84 syscallarg(void *) buf; 85 syscallarg(size_t) nbyte; 86 } */ *uap = v; 87 int fd = SCARG(uap, fd); 88 struct file *fp; 89 struct filedesc *fdp = p->p_fd; 90 91 if ((fp = fd_getfile(fdp, fd)) == NULL) 92 return (EBADF); 93 if ((fp->f_flag & FREAD) == 0) 94 return (EBADF); 95 96 FREF(fp); 97 98 /* dofileread() will FRELE the descriptor for us */ 99 return (dofileread(p, fd, fp, SCARG(uap, buf), SCARG(uap, nbyte), 100 &fp->f_offset, retval)); 101 } 102 103 int 104 dofileread(p, fd, fp, buf, nbyte, offset, retval) 105 struct proc *p; 106 int fd; 107 struct file *fp; 108 void *buf; 109 size_t nbyte; 110 off_t *offset; 111 register_t *retval; 112 { 113 struct uio auio; 114 struct iovec aiov; 115 long cnt, error = 0; 116 #ifdef KTRACE 117 struct iovec ktriov; 118 #endif 119 120 aiov.iov_base = (caddr_t)buf; 121 aiov.iov_len = nbyte; 122 auio.uio_iov = &aiov; 123 auio.uio_iovcnt = 1; 124 auio.uio_resid = nbyte; 125 auio.uio_rw = UIO_READ; 126 auio.uio_segflg = UIO_USERSPACE; 127 auio.uio_procp = p; 128 129 /* 130 * Reads return ssize_t because -1 is returned on error. Therefore 131 * we must restrict the length to SSIZE_MAX to avoid garbage return 132 * values. 133 */ 134 if (auio.uio_resid > SSIZE_MAX) { 135 error = EINVAL; 136 goto out; 137 } 138 139 #ifdef KTRACE 140 /* 141 * if tracing, save a copy of iovec 142 */ 143 if (KTRPOINT(p, KTR_GENIO)) 144 ktriov = aiov; 145 #endif 146 cnt = auio.uio_resid; 147 error = (*fp->f_ops->fo_read)(fp, offset, &auio, fp->f_cred); 148 if (error) 149 if (auio.uio_resid != cnt && (error == ERESTART || 150 error == EINTR || error == EWOULDBLOCK)) 151 error = 0; 152 cnt -= auio.uio_resid; 153 #ifdef KTRACE 154 if (KTRPOINT(p, KTR_GENIO) && error == 0) 155 ktrgenio(p, fd, UIO_READ, &ktriov, cnt, error); 156 #endif 157 *retval = cnt; 158 out: 159 FRELE(fp); 160 return (error); 161 } 162 163 /* 164 * Scatter read system call. 165 */ 166 int 167 sys_readv(p, v, retval) 168 struct proc *p; 169 void *v; 170 register_t *retval; 171 { 172 struct sys_readv_args /* { 173 syscallarg(int) fd; 174 syscallarg(const struct iovec *) iovp; 175 syscallarg(int) iovcnt; 176 } */ *uap = v; 177 int fd = SCARG(uap, fd); 178 struct file *fp; 179 struct filedesc *fdp = p->p_fd; 180 181 if ((fp = fd_getfile(fdp, fd)) == NULL) 182 return (EBADF); 183 if ((fp->f_flag & FREAD) == 0) 184 return (EBADF); 185 186 FREF(fp); 187 188 /* dofilereadv() will FRELE the descriptor for us */ 189 return (dofilereadv(p, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt), 190 &fp->f_offset, retval)); 191 } 192 193 int 194 dofilereadv(p, fd, fp, iovp, iovcnt, offset, retval) 195 struct proc *p; 196 int fd; 197 struct file *fp; 198 const struct iovec *iovp; 199 int iovcnt; 200 off_t *offset; 201 register_t *retval; 202 { 203 struct uio auio; 204 struct iovec *iov; 205 struct iovec *needfree; 206 struct iovec aiov[UIO_SMALLIOV]; 207 long i, cnt, error = 0; 208 u_int iovlen; 209 #ifdef KTRACE 210 struct iovec *ktriov = NULL; 211 #endif 212 213 /* note: can't use iovlen until iovcnt is validated */ 214 iovlen = iovcnt * sizeof(struct iovec); 215 if ((u_int)iovcnt > UIO_SMALLIOV) { 216 if ((u_int)iovcnt > IOV_MAX) { 217 error = EINVAL; 218 goto out; 219 } 220 iov = needfree = malloc(iovlen, M_IOV, M_WAITOK); 221 } else if ((u_int)iovcnt > 0) { 222 iov = aiov; 223 needfree = NULL; 224 } else { 225 error = EINVAL; 226 goto out; 227 } 228 229 auio.uio_iov = iov; 230 auio.uio_iovcnt = iovcnt; 231 auio.uio_rw = UIO_READ; 232 auio.uio_segflg = UIO_USERSPACE; 233 auio.uio_procp = p; 234 error = copyin(iovp, iov, iovlen); 235 if (error) 236 goto done; 237 auio.uio_resid = 0; 238 for (i = 0; i < iovcnt; i++) { 239 auio.uio_resid += iov->iov_len; 240 /* 241 * Reads return ssize_t because -1 is returned on error. 242 * Therefore we must restrict the length to SSIZE_MAX to 243 * avoid garbage return values. 244 */ 245 if (iov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) { 246 error = EINVAL; 247 goto done; 248 } 249 iov++; 250 } 251 #ifdef KTRACE 252 /* 253 * if tracing, save a copy of iovec 254 */ 255 if (KTRPOINT(p, KTR_GENIO)) { 256 ktriov = malloc(iovlen, M_TEMP, M_WAITOK); 257 bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen); 258 } 259 #endif 260 cnt = auio.uio_resid; 261 error = (*fp->f_ops->fo_read)(fp, offset, &auio, fp->f_cred); 262 if (error) 263 if (auio.uio_resid != cnt && (error == ERESTART || 264 error == EINTR || error == EWOULDBLOCK)) 265 error = 0; 266 cnt -= auio.uio_resid; 267 #ifdef KTRACE 268 if (ktriov != NULL) { 269 if (error == 0) 270 ktrgenio(p, fd, UIO_READ, ktriov, cnt, 271 error); 272 free(ktriov, M_TEMP); 273 } 274 #endif 275 *retval = cnt; 276 done: 277 if (needfree) 278 free(needfree, M_IOV); 279 out: 280 FRELE(fp); 281 return (error); 282 } 283 284 /* 285 * Write system call 286 */ 287 int 288 sys_write(p, v, retval) 289 struct proc *p; 290 void *v; 291 register_t *retval; 292 { 293 struct sys_write_args /* { 294 syscallarg(int) fd; 295 syscallarg(const void *) buf; 296 syscallarg(size_t) nbyte; 297 } */ *uap = v; 298 int fd = SCARG(uap, fd); 299 struct file *fp; 300 struct filedesc *fdp = p->p_fd; 301 302 if ((fp = fd_getfile(fdp, fd)) == NULL) 303 return (EBADF); 304 if ((fp->f_flag & FWRITE) == 0) 305 return (EBADF); 306 307 FREF(fp); 308 309 /* dofilewrite() will FRELE the descriptor for us */ 310 return (dofilewrite(p, fd, fp, SCARG(uap, buf), SCARG(uap, nbyte), 311 &fp->f_offset, retval)); 312 } 313 314 int 315 dofilewrite(p, fd, fp, buf, nbyte, offset, retval) 316 struct proc *p; 317 int fd; 318 struct file *fp; 319 const void *buf; 320 size_t nbyte; 321 off_t *offset; 322 register_t *retval; 323 { 324 struct uio auio; 325 struct iovec aiov; 326 long cnt, error = 0; 327 #ifdef KTRACE 328 struct iovec ktriov; 329 #endif 330 331 aiov.iov_base = (caddr_t)buf; /* XXX kills const */ 332 aiov.iov_len = nbyte; 333 auio.uio_iov = &aiov; 334 auio.uio_iovcnt = 1; 335 auio.uio_resid = nbyte; 336 auio.uio_rw = UIO_WRITE; 337 auio.uio_segflg = UIO_USERSPACE; 338 auio.uio_procp = p; 339 340 /* 341 * Writes return ssize_t because -1 is returned on error. Therefore 342 * we must restrict the length to SSIZE_MAX to avoid garbage return 343 * values. 344 */ 345 if (auio.uio_resid > SSIZE_MAX) { 346 error = EINVAL; 347 goto out; 348 } 349 350 #ifdef KTRACE 351 /* 352 * if tracing, save a copy of iovec 353 */ 354 if (KTRPOINT(p, KTR_GENIO)) 355 ktriov = aiov; 356 #endif 357 cnt = auio.uio_resid; 358 error = (*fp->f_ops->fo_write)(fp, offset, &auio, fp->f_cred); 359 if (error) { 360 if (auio.uio_resid != cnt && (error == ERESTART || 361 error == EINTR || error == EWOULDBLOCK)) 362 error = 0; 363 if (error == EPIPE) 364 psignal(p, SIGPIPE); 365 } 366 cnt -= auio.uio_resid; 367 #ifdef KTRACE 368 if (KTRPOINT(p, KTR_GENIO) && error == 0) 369 ktrgenio(p, fd, UIO_WRITE, &ktriov, cnt, error); 370 #endif 371 *retval = cnt; 372 out: 373 FRELE(fp); 374 return (error); 375 } 376 377 /* 378 * Gather write system call 379 */ 380 int 381 sys_writev(p, v, retval) 382 struct proc *p; 383 void *v; 384 register_t *retval; 385 { 386 struct sys_writev_args /* { 387 syscallarg(int) fd; 388 syscallarg(const struct iovec *) iovp; 389 syscallarg(int) iovcnt; 390 } */ *uap = v; 391 int fd = SCARG(uap, fd); 392 struct file *fp; 393 struct filedesc *fdp = p->p_fd; 394 395 if ((fp = fd_getfile(fdp, fd)) == NULL) 396 return (EBADF); 397 if ((fp->f_flag & FWRITE) == 0) 398 return (EBADF); 399 400 FREF(fp); 401 402 /* dofilewritev() will FRELE the descriptor for us */ 403 return (dofilewritev(p, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt), 404 &fp->f_offset, retval)); 405 } 406 407 int 408 dofilewritev(p, fd, fp, iovp, iovcnt, offset, retval) 409 struct proc *p; 410 int fd; 411 struct file *fp; 412 const struct iovec *iovp; 413 int iovcnt; 414 off_t *offset; 415 register_t *retval; 416 { 417 struct uio auio; 418 struct iovec *iov; 419 struct iovec *needfree; 420 struct iovec aiov[UIO_SMALLIOV]; 421 long i, cnt, error = 0; 422 u_int iovlen; 423 #ifdef KTRACE 424 struct iovec *ktriov = NULL; 425 #endif 426 427 /* note: can't use iovlen until iovcnt is validated */ 428 iovlen = iovcnt * sizeof(struct iovec); 429 if ((u_int)iovcnt > UIO_SMALLIOV) { 430 if ((u_int)iovcnt > IOV_MAX) { 431 error = EINVAL; 432 goto out; 433 } 434 iov = needfree = malloc(iovlen, M_IOV, M_WAITOK); 435 } else if ((u_int)iovcnt > 0) { 436 iov = aiov; 437 needfree = NULL; 438 } else { 439 error = EINVAL; 440 goto out; 441 } 442 443 auio.uio_iov = iov; 444 auio.uio_iovcnt = iovcnt; 445 auio.uio_rw = UIO_WRITE; 446 auio.uio_segflg = UIO_USERSPACE; 447 auio.uio_procp = p; 448 error = copyin(iovp, iov, iovlen); 449 if (error) 450 goto done; 451 auio.uio_resid = 0; 452 for (i = 0; i < iovcnt; i++) { 453 auio.uio_resid += iov->iov_len; 454 /* 455 * Writes return ssize_t because -1 is returned on error. 456 * Therefore we must restrict the length to SSIZE_MAX to 457 * avoid garbage return values. 458 */ 459 if (iov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) { 460 error = EINVAL; 461 goto done; 462 } 463 iov++; 464 } 465 #ifdef KTRACE 466 /* 467 * if tracing, save a copy of iovec 468 */ 469 if (KTRPOINT(p, KTR_GENIO)) { 470 ktriov = malloc(iovlen, M_TEMP, M_WAITOK); 471 bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen); 472 } 473 #endif 474 cnt = auio.uio_resid; 475 error = (*fp->f_ops->fo_write)(fp, offset, &auio, fp->f_cred); 476 if (error) { 477 if (auio.uio_resid != cnt && (error == ERESTART || 478 error == EINTR || error == EWOULDBLOCK)) 479 error = 0; 480 if (error == EPIPE) 481 psignal(p, SIGPIPE); 482 } 483 cnt -= auio.uio_resid; 484 #ifdef KTRACE 485 if (ktriov != NULL) { 486 if (error == 0) 487 ktrgenio(p, fd, UIO_WRITE, ktriov, cnt, 488 error); 489 free(ktriov, M_TEMP); 490 } 491 #endif 492 *retval = cnt; 493 done: 494 if (needfree) 495 free(needfree, M_IOV); 496 out: 497 FRELE(fp); 498 return (error); 499 } 500 501 /* 502 * Ioctl system call 503 */ 504 /* ARGSUSED */ 505 int 506 sys_ioctl(p, v, retval) 507 struct proc *p; 508 void *v; 509 register_t *retval; 510 { 511 struct sys_ioctl_args /* { 512 syscallarg(int) fd; 513 syscallarg(u_long) com; 514 syscallarg(caddr_t) data; 515 } */ *uap = v; 516 struct file *fp; 517 struct filedesc *fdp; 518 u_long com; 519 int error; 520 u_int size; 521 caddr_t data, memp; 522 int tmp; 523 #define STK_PARAMS 128 524 char stkbuf[STK_PARAMS]; 525 526 fdp = p->p_fd; 527 if ((fp = fd_getfile(fdp, SCARG(uap, fd))) == NULL) 528 return (EBADF); 529 530 if ((fp->f_flag & (FREAD | FWRITE)) == 0) 531 return (EBADF); 532 533 switch (com = SCARG(uap, com)) { 534 case FIONCLEX: 535 fdp->fd_ofileflags[SCARG(uap, fd)] &= ~UF_EXCLOSE; 536 return (0); 537 case FIOCLEX: 538 fdp->fd_ofileflags[SCARG(uap, fd)] |= UF_EXCLOSE; 539 return (0); 540 } 541 542 /* 543 * Interpret high order word to find amount of data to be 544 * copied to/from the user's address space. 545 */ 546 size = IOCPARM_LEN(com); 547 if (size > IOCPARM_MAX) 548 return (ENOTTY); 549 FREF(fp); 550 memp = NULL; 551 if (size > sizeof (stkbuf)) { 552 memp = (caddr_t)malloc((u_long)size, M_IOCTLOPS, M_WAITOK); 553 data = memp; 554 } else 555 data = stkbuf; 556 if (com&IOC_IN) { 557 if (size) { 558 error = copyin(SCARG(uap, data), data, (u_int)size); 559 if (error) { 560 goto out; 561 } 562 } else 563 *(caddr_t *)data = SCARG(uap, data); 564 } else if ((com&IOC_OUT) && size) 565 /* 566 * Zero the buffer so the user always 567 * gets back something deterministic. 568 */ 569 bzero(data, size); 570 else if (com&IOC_VOID) 571 *(caddr_t *)data = SCARG(uap, data); 572 573 switch (com) { 574 575 case FIONBIO: 576 if ((tmp = *(int *)data) != 0) 577 fp->f_flag |= FNONBLOCK; 578 else 579 fp->f_flag &= ~FNONBLOCK; 580 error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&tmp, p); 581 break; 582 583 case FIOASYNC: 584 if ((tmp = *(int *)data) != 0) 585 fp->f_flag |= FASYNC; 586 else 587 fp->f_flag &= ~FASYNC; 588 error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, (caddr_t)&tmp, p); 589 break; 590 591 case FIOSETOWN: 592 tmp = *(int *)data; 593 if (fp->f_type == DTYPE_SOCKET) { 594 struct socket *so = (struct socket *)fp->f_data; 595 596 so->so_pgid = tmp; 597 so->so_siguid = p->p_cred->p_ruid; 598 so->so_sigeuid = p->p_ucred->cr_uid; 599 error = 0; 600 break; 601 } 602 if (tmp <= 0) { 603 tmp = -tmp; 604 } else { 605 struct proc *p1 = pfind(tmp); 606 if (p1 == 0) { 607 error = ESRCH; 608 break; 609 } 610 tmp = p1->p_pgrp->pg_id; 611 } 612 error = (*fp->f_ops->fo_ioctl) 613 (fp, TIOCSPGRP, (caddr_t)&tmp, p); 614 break; 615 616 case FIOGETOWN: 617 if (fp->f_type == DTYPE_SOCKET) { 618 error = 0; 619 *(int *)data = ((struct socket *)fp->f_data)->so_pgid; 620 break; 621 } 622 error = (*fp->f_ops->fo_ioctl)(fp, TIOCGPGRP, data, p); 623 *(int *)data = -*(int *)data; 624 break; 625 626 default: 627 error = (*fp->f_ops->fo_ioctl)(fp, com, data, p); 628 /* 629 * Copy any data to user, size was 630 * already set and checked above. 631 */ 632 if (error == 0 && (com&IOC_OUT) && size) 633 error = copyout(data, SCARG(uap, data), (u_int)size); 634 break; 635 } 636 out: 637 FRELE(fp); 638 if (memp) 639 free(memp, M_IOCTLOPS); 640 return (error); 641 } 642 643 int selwait, nselcoll; 644 645 /* 646 * Select system call. 647 */ 648 int 649 sys_select(struct proc *p, void *v, register_t *retval) 650 { 651 struct sys_select_args /* { 652 syscallarg(int) nd; 653 syscallarg(fd_set *) in; 654 syscallarg(fd_set *) ou; 655 syscallarg(fd_set *) ex; 656 syscallarg(struct timeval *) tv; 657 } */ *uap = v; 658 fd_set bits[6], *pibits[3], *pobits[3]; 659 struct timeval atv; 660 int s, ncoll, error = 0, timo; 661 u_int nd, ni; 662 663 nd = SCARG(uap, nd); 664 if (nd > p->p_fd->fd_nfiles) { 665 /* forgiving; slightly wrong */ 666 nd = p->p_fd->fd_nfiles; 667 } 668 ni = howmany(nd, NFDBITS) * sizeof(fd_mask); 669 if (nd > FD_SETSIZE) { 670 caddr_t mbits; 671 672 mbits = malloc(ni * 6, M_TEMP, M_WAITOK); 673 bzero(mbits, ni * 6); 674 pibits[0] = (fd_set *)&mbits[ni * 0]; 675 pibits[1] = (fd_set *)&mbits[ni * 1]; 676 pibits[2] = (fd_set *)&mbits[ni * 2]; 677 pobits[0] = (fd_set *)&mbits[ni * 3]; 678 pobits[1] = (fd_set *)&mbits[ni * 4]; 679 pobits[2] = (fd_set *)&mbits[ni * 5]; 680 } else { 681 bzero((caddr_t)bits, sizeof(bits)); 682 pibits[0] = &bits[0]; 683 pibits[1] = &bits[1]; 684 pibits[2] = &bits[2]; 685 pobits[0] = &bits[3]; 686 pobits[1] = &bits[4]; 687 pobits[2] = &bits[5]; 688 } 689 690 #define getbits(name, x) \ 691 if (SCARG(uap, name) && (error = copyin((caddr_t)SCARG(uap, name), \ 692 (caddr_t)pibits[x], ni))) \ 693 goto done; 694 getbits(in, 0); 695 getbits(ou, 1); 696 getbits(ex, 2); 697 #undef getbits 698 699 if (SCARG(uap, tv)) { 700 error = copyin((caddr_t)SCARG(uap, tv), (caddr_t)&atv, 701 sizeof (atv)); 702 if (error) 703 goto done; 704 if (itimerfix(&atv)) { 705 error = EINVAL; 706 goto done; 707 } 708 s = splclock(); 709 timeradd(&atv, &time, &atv); 710 splx(s); 711 } else 712 timo = 0; 713 retry: 714 ncoll = nselcoll; 715 p->p_flag |= P_SELECT; 716 error = selscan(p, pibits[0], pobits[0], nd, retval); 717 if (error || *retval) 718 goto done; 719 if (SCARG(uap, tv)) { 720 /* 721 * We have to recalculate the timeout on every retry. 722 */ 723 timo = hzto(&atv); 724 if (timo <= 0) 725 goto done; 726 } 727 s = splhigh(); 728 if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) { 729 splx(s); 730 goto retry; 731 } 732 p->p_flag &= ~P_SELECT; 733 error = tsleep((caddr_t)&selwait, PSOCK | PCATCH, "select", timo); 734 splx(s); 735 if (error == 0) 736 goto retry; 737 done: 738 p->p_flag &= ~P_SELECT; 739 /* select is not restarted after signals... */ 740 if (error == ERESTART) 741 error = EINTR; 742 if (error == EWOULDBLOCK) 743 error = 0; 744 #define putbits(name, x) \ 745 if (SCARG(uap, name) && (error2 = copyout((caddr_t)pobits[x], \ 746 (caddr_t)SCARG(uap, name), ni))) \ 747 error = error2; 748 if (error == 0) { 749 int error2; 750 751 putbits(in, 0); 752 putbits(ou, 1); 753 putbits(ex, 2); 754 #undef putbits 755 } 756 757 if (pibits[0] != &bits[0]) 758 free(pibits[0], M_TEMP); 759 return (error); 760 } 761 762 int 763 selscan(p, ibits, obits, nfd, retval) 764 struct proc *p; 765 fd_set *ibits, *obits; 766 int nfd; 767 register_t *retval; 768 { 769 caddr_t cibits = (caddr_t)ibits, cobits = (caddr_t)obits; 770 register struct filedesc *fdp = p->p_fd; 771 register int msk, i, j, fd; 772 register fd_mask bits; 773 struct file *fp; 774 int ni, n = 0; 775 static int flag[3] = { FREAD, FWRITE, 0 }; 776 777 /* 778 * if nfd > FD_SETSIZE then the fd_set's contain nfd bits (rounded 779 * up to the next byte) otherwise the fd_set's are normal sized. 780 */ 781 ni = sizeof(fd_set); 782 if (nfd > FD_SETSIZE) 783 ni = howmany(nfd, NFDBITS) * sizeof(fd_mask); 784 785 for (msk = 0; msk < 3; msk++) { 786 fd_set *pibits = (fd_set *)&cibits[msk*ni]; 787 fd_set *pobits = (fd_set *)&cobits[msk*ni]; 788 789 for (i = 0; i < nfd; i += NFDBITS) { 790 bits = pibits->fds_bits[i/NFDBITS]; 791 while ((j = ffs(bits)) && (fd = i + --j) < nfd) { 792 bits &= ~(1 << j); 793 if ((fp = fd_getfile(fdp, fd)) == NULL) 794 return (EBADF); 795 FREF(fp); 796 if ((*fp->f_ops->fo_select)(fp, flag[msk], p)) { 797 FD_SET(fd, pobits); 798 n++; 799 } 800 FRELE(fp); 801 } 802 } 803 } 804 *retval = n; 805 return (0); 806 } 807 808 /*ARGSUSED*/ 809 int 810 seltrue(dev, flag, p) 811 dev_t dev; 812 int flag; 813 struct proc *p; 814 { 815 816 return (1); 817 } 818 819 /* 820 * Record a select request. 821 */ 822 void 823 selrecord(selector, sip) 824 struct proc *selector; 825 struct selinfo *sip; 826 { 827 struct proc *p; 828 pid_t mypid; 829 830 mypid = selector->p_pid; 831 if (sip->si_selpid == mypid) 832 return; 833 if (sip->si_selpid && (p = pfind(sip->si_selpid)) && 834 p->p_wchan == (caddr_t)&selwait) 835 sip->si_flags |= SI_COLL; 836 else 837 sip->si_selpid = mypid; 838 } 839 840 /* 841 * Do a wakeup when a selectable event occurs. 842 */ 843 void 844 selwakeup(sip) 845 register struct selinfo *sip; 846 { 847 register struct proc *p; 848 int s; 849 850 if (sip->si_selpid == 0) 851 return; 852 if (sip->si_flags & SI_COLL) { 853 nselcoll++; 854 sip->si_flags &= ~SI_COLL; 855 wakeup((caddr_t)&selwait); 856 } 857 p = pfind(sip->si_selpid); 858 sip->si_selpid = 0; 859 if (p != NULL) { 860 s = splhigh(); 861 if (p->p_wchan == (caddr_t)&selwait) { 862 if (p->p_stat == SSLEEP) 863 setrunnable(p); 864 else 865 unsleep(p); 866 } else if (p->p_flag & P_SELECT) 867 p->p_flag &= ~P_SELECT; 868 splx(s); 869 } 870 } 871 872 void 873 pollscan(p, pl, nfd, retval) 874 struct proc *p; 875 struct pollfd *pl; 876 int nfd; 877 register_t *retval; 878 { 879 register struct filedesc *fdp = p->p_fd; 880 register int msk, i; 881 struct file *fp; 882 int x, n = 0; 883 static int flag[3] = { FREAD, FWRITE, 0 }; 884 static int pflag[3] = { POLLIN|POLLRDNORM, POLLOUT, POLLERR }; 885 886 /* 887 * XXX: We need to implement the rest of the flags. 888 */ 889 for (i = 0; i < nfd; i++) { 890 /* Check the file descriptor. */ 891 if (pl[i].fd < 0) { 892 pl[i].revents = 0; 893 continue; 894 } 895 if ((fp = fd_getfile(fdp, pl[i].fd)) == NULL) { 896 pl[i].revents = POLLNVAL; 897 n++; 898 continue; 899 } 900 FREF(fp); 901 for (x = msk = 0; msk < 3; msk++) { 902 if (pl[i].events & pflag[msk]) { 903 if ((*fp->f_ops->fo_select)(fp, flag[msk], p)) { 904 pl[i].revents |= pflag[msk] & 905 pl[i].events; 906 x++; 907 } 908 } 909 } 910 FRELE(fp); 911 if (x) 912 n++; 913 } 914 *retval = n; 915 } 916 917 /* 918 * We are using the same mechanism as select only we encode/decode args 919 * differently. 920 */ 921 int 922 sys_poll(struct proc *p, void *v, register_t *retval) 923 { 924 struct sys_poll_args *uap = v; 925 size_t sz; 926 struct pollfd pfds[4], *pl = pfds; 927 int msec = SCARG(uap, timeout); 928 struct timeval atv; 929 int timo, ncoll, i, s, error, error2; 930 extern int nselcoll, selwait; 931 u_int nfds = SCARG(uap, nfds); 932 933 /* Standards say no more than MAX_OPEN; this is possibly better. */ 934 if (nfds > min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfiles)) 935 return (EINVAL); 936 937 sz = sizeof(struct pollfd) * nfds; 938 939 /* optimize for the default case, of a small nfds value */ 940 if (sz > sizeof(pfds)) 941 pl = (struct pollfd *) malloc(sz, M_TEMP, M_WAITOK); 942 943 if ((error = copyin(SCARG(uap, fds), pl, sz)) != 0) 944 goto bad; 945 946 for (i = 0; i < nfds; i++) 947 pl[i].revents = 0; 948 949 if (msec != -1) { 950 atv.tv_sec = msec / 1000; 951 atv.tv_usec = (msec - (atv.tv_sec * 1000)) * 1000; 952 953 if (itimerfix(&atv)) { 954 error = EINVAL; 955 goto done; 956 } 957 s = splclock(); 958 timeradd(&atv, &time, &atv); 959 splx(s); 960 } else 961 timo = 0; 962 963 retry: 964 ncoll = nselcoll; 965 p->p_flag |= P_SELECT; 966 pollscan(p, pl, nfds, retval); 967 if (*retval) 968 goto done; 969 if (msec != -1) { 970 /* 971 * We have to recalculate the timeout on every retry. 972 */ 973 timo = hzto(&atv); 974 if (timo <= 0) 975 goto done; 976 } 977 s = splhigh(); 978 if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) { 979 splx(s); 980 goto retry; 981 } 982 p->p_flag &= ~P_SELECT; 983 error = tsleep((caddr_t)&selwait, PSOCK | PCATCH, "poll", timo); 984 splx(s); 985 if (error == 0) 986 goto retry; 987 988 done: 989 p->p_flag &= ~P_SELECT; 990 /* poll is not restarted after signals... */ 991 if (error == ERESTART) 992 error = EINTR; 993 if (error == EWOULDBLOCK) 994 error = 0; 995 if ((error2 = copyout(pl, SCARG(uap, fds), sz)) != 0) 996 error = error2; 997 bad: 998 if (pl != pfds) 999 free((char *) pl, M_TEMP); 1000 return (error); 1001 } 1002 1003