1 /* $OpenBSD: sys_generic.c,v 1.34 2002/02/08 13:53:28 art Exp $ */ 2 /* $NetBSD: sys_generic.c,v 1.24 1996/03/29 00:25:32 cgd Exp $ */ 3 4 /* 5 * Copyright (c) 1996 Theo de Raadt 6 * Copyright (c) 1982, 1986, 1989, 1993 7 * The Regents of the University of California. All rights reserved. 8 * (c) UNIX System Laboratories, Inc. 9 * All or some portions of this file are derived from material licensed 10 * to the University of California by American Telephone and Telegraph 11 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 12 * the permission of UNIX System Laboratories, Inc. 13 * 14 * Redistribution and use in source and binary forms, with or without 15 * modification, are permitted provided that the following conditions 16 * are met: 17 * 1. Redistributions of source code must retain the above copyright 18 * notice, this list of conditions and the following disclaimer. 19 * 2. Redistributions in binary form must reproduce the above copyright 20 * notice, this list of conditions and the following disclaimer in the 21 * documentation and/or other materials provided with the distribution. 22 * 3. All advertising materials mentioning features or use of this software 23 * must display the following acknowledgement: 24 * This product includes software developed by the University of 25 * California, Berkeley and its contributors. 26 * 4. Neither the name of the University nor the names of its contributors 27 * may be used to endorse or promote products derived from this software 28 * without specific prior written permission. 29 * 30 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 31 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 32 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 33 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 34 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 35 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 36 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 37 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 38 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 39 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 40 * SUCH DAMAGE. 41 * 42 * @(#)sys_generic.c 8.5 (Berkeley) 1/21/94 43 */ 44 45 #include <sys/param.h> 46 #include <sys/systm.h> 47 #include <sys/filedesc.h> 48 #include <sys/ioctl.h> 49 #include <sys/file.h> 50 #include <sys/proc.h> 51 #include <sys/resourcevar.h> 52 #include <sys/socketvar.h> 53 #include <sys/signalvar.h> 54 #include <sys/uio.h> 55 #include <sys/kernel.h> 56 #include <sys/stat.h> 57 #include <sys/malloc.h> 58 #include <sys/poll.h> 59 #ifdef KTRACE 60 #include <sys/ktrace.h> 61 #endif 62 63 #include <sys/mount.h> 64 #include <sys/syscallargs.h> 65 66 int selscan __P((struct proc *, fd_set *, fd_set *, int, register_t *)); 67 int seltrue __P((dev_t, int, struct proc *)); 68 void pollscan __P((struct proc *, struct pollfd *, int, register_t *)); 69 70 /* 71 * Read system call. 72 */ 73 /* ARGSUSED */ 74 int 75 sys_read(p, v, retval) 76 struct proc *p; 77 void *v; 78 register_t *retval; 79 { 80 struct sys_read_args /* { 81 syscallarg(int) fd; 82 syscallarg(void *) buf; 83 syscallarg(size_t) nbyte; 84 } */ *uap = v; 85 int fd = SCARG(uap, fd); 86 struct file *fp; 87 struct filedesc *fdp = p->p_fd; 88 89 if ((fp = fd_getfile(fdp, fd)) == NULL) 90 return (EBADF); 91 if ((fp->f_flag & FREAD) == 0) 92 return (EBADF); 93 94 FREF(fp); 95 96 /* dofileread() will unuse the descriptor for us */ 97 return (dofileread(p, fd, fp, SCARG(uap, buf), SCARG(uap, nbyte), 98 &fp->f_offset, retval)); 99 } 100 101 int 102 dofileread(p, fd, fp, buf, nbyte, offset, retval) 103 struct proc *p; 104 int fd; 105 struct file *fp; 106 void *buf; 107 size_t nbyte; 108 off_t *offset; 109 register_t *retval; 110 { 111 struct uio auio; 112 struct iovec aiov; 113 long cnt, error = 0; 114 #ifdef KTRACE 115 struct iovec ktriov; 116 #endif 117 118 aiov.iov_base = (caddr_t)buf; 119 aiov.iov_len = nbyte; 120 auio.uio_iov = &aiov; 121 auio.uio_iovcnt = 1; 122 auio.uio_resid = nbyte; 123 auio.uio_rw = UIO_READ; 124 auio.uio_segflg = UIO_USERSPACE; 125 auio.uio_procp = p; 126 127 /* 128 * Reads return ssize_t because -1 is returned on error. Therefore 129 * we must restrict the length to SSIZE_MAX to avoid garbage return 130 * values. 131 */ 132 if (auio.uio_resid > SSIZE_MAX) { 133 error = EINVAL; 134 goto out; 135 } 136 137 #ifdef KTRACE 138 /* 139 * if tracing, save a copy of iovec 140 */ 141 if (KTRPOINT(p, KTR_GENIO)) 142 ktriov = aiov; 143 #endif 144 cnt = auio.uio_resid; 145 error = (*fp->f_ops->fo_read)(fp, offset, &auio, fp->f_cred); 146 if (error) 147 if (auio.uio_resid != cnt && (error == ERESTART || 148 error == EINTR || error == EWOULDBLOCK)) 149 error = 0; 150 cnt -= auio.uio_resid; 151 #ifdef KTRACE 152 if (KTRPOINT(p, KTR_GENIO) && error == 0) 153 ktrgenio(p, fd, UIO_READ, &ktriov, cnt, error); 154 #endif 155 *retval = cnt; 156 out: 157 FRELE(fp); 158 return (error); 159 } 160 161 /* 162 * Scatter read system call. 163 */ 164 int 165 sys_readv(p, v, retval) 166 struct proc *p; 167 void *v; 168 register_t *retval; 169 { 170 struct sys_readv_args /* { 171 syscallarg(int) fd; 172 syscallarg(const struct iovec *) iovp; 173 syscallarg(int) iovcnt; 174 } */ *uap = v; 175 int fd = SCARG(uap, fd); 176 struct file *fp; 177 struct filedesc *fdp = p->p_fd; 178 179 if ((fp = fd_getfile(fdp, fd)) == NULL) 180 return (EBADF); 181 if ((fp->f_flag & FREAD) == 0) 182 return (EBADF); 183 184 /* dofilereadv() will unuse the descriptor for us */ 185 return (dofilereadv(p, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt), 186 &fp->f_offset, retval)); 187 } 188 189 int 190 dofilereadv(p, fd, fp, iovp, iovcnt, offset, retval) 191 struct proc *p; 192 int fd; 193 struct file *fp; 194 const struct iovec *iovp; 195 int iovcnt; 196 off_t *offset; 197 register_t *retval; 198 { 199 struct uio auio; 200 struct iovec *iov; 201 struct iovec *needfree; 202 struct iovec aiov[UIO_SMALLIOV]; 203 long i, cnt, error = 0; 204 u_int iovlen; 205 #ifdef KTRACE 206 struct iovec *ktriov = NULL; 207 #endif 208 209 /* note: can't use iovlen until iovcnt is validated */ 210 iovlen = iovcnt * sizeof(struct iovec); 211 if ((u_int)iovcnt > UIO_SMALLIOV) { 212 if ((u_int)iovcnt > IOV_MAX) { 213 error = EINVAL; 214 goto out; 215 } 216 iov = needfree = malloc(iovlen, M_IOV, M_WAITOK); 217 } else if ((u_int)iovcnt > 0) { 218 iov = aiov; 219 needfree = NULL; 220 } else { 221 error = EINVAL; 222 goto out; 223 } 224 225 auio.uio_iov = iov; 226 auio.uio_iovcnt = iovcnt; 227 auio.uio_rw = UIO_READ; 228 auio.uio_segflg = UIO_USERSPACE; 229 auio.uio_procp = p; 230 error = copyin(iovp, iov, iovlen); 231 if (error) 232 goto done; 233 auio.uio_resid = 0; 234 for (i = 0; i < iovcnt; i++) { 235 auio.uio_resid += iov->iov_len; 236 /* 237 * Reads return ssize_t because -1 is returned on error. 238 * Therefore we must restrict the length to SSIZE_MAX to 239 * avoid garbage return values. 240 */ 241 if (iov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) { 242 error = EINVAL; 243 goto done; 244 } 245 iov++; 246 } 247 #ifdef KTRACE 248 /* 249 * if tracing, save a copy of iovec 250 */ 251 if (KTRPOINT(p, KTR_GENIO)) { 252 ktriov = malloc(iovlen, M_TEMP, M_WAITOK); 253 bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen); 254 } 255 #endif 256 cnt = auio.uio_resid; 257 error = (*fp->f_ops->fo_read)(fp, offset, &auio, fp->f_cred); 258 if (error) 259 if (auio.uio_resid != cnt && (error == ERESTART || 260 error == EINTR || error == EWOULDBLOCK)) 261 error = 0; 262 cnt -= auio.uio_resid; 263 #ifdef KTRACE 264 if (ktriov != NULL) { 265 if (error == 0) 266 ktrgenio(p, fd, UIO_READ, ktriov, cnt, 267 error); 268 free(ktriov, M_TEMP); 269 } 270 #endif 271 *retval = cnt; 272 done: 273 if (needfree) 274 free(needfree, M_IOV); 275 out: 276 return (error); 277 } 278 279 /* 280 * Write system call 281 */ 282 int 283 sys_write(p, v, retval) 284 struct proc *p; 285 void *v; 286 register_t *retval; 287 { 288 struct sys_write_args /* { 289 syscallarg(int) fd; 290 syscallarg(const void *) buf; 291 syscallarg(size_t) nbyte; 292 } */ *uap = v; 293 int fd = SCARG(uap, fd); 294 struct file *fp; 295 struct filedesc *fdp = p->p_fd; 296 297 if ((fp = fd_getfile(fdp, fd)) == NULL) 298 return (EBADF); 299 if ((fp->f_flag & FWRITE) == 0) 300 return (EBADF); 301 302 /* dofilewrite() will unuse the descriptor for us */ 303 return (dofilewrite(p, fd, fp, SCARG(uap, buf), SCARG(uap, nbyte), 304 &fp->f_offset, retval)); 305 } 306 307 int 308 dofilewrite(p, fd, fp, buf, nbyte, offset, retval) 309 struct proc *p; 310 int fd; 311 struct file *fp; 312 const void *buf; 313 size_t nbyte; 314 off_t *offset; 315 register_t *retval; 316 { 317 struct uio auio; 318 struct iovec aiov; 319 long cnt, error = 0; 320 #ifdef KTRACE 321 struct iovec ktriov; 322 #endif 323 324 aiov.iov_base = (caddr_t)buf; /* XXX kills const */ 325 aiov.iov_len = nbyte; 326 auio.uio_iov = &aiov; 327 auio.uio_iovcnt = 1; 328 auio.uio_resid = nbyte; 329 auio.uio_rw = UIO_WRITE; 330 auio.uio_segflg = UIO_USERSPACE; 331 auio.uio_procp = p; 332 333 /* 334 * Writes return ssize_t because -1 is returned on error. Therefore 335 * we must restrict the length to SSIZE_MAX to avoid garbage return 336 * values. 337 */ 338 if (auio.uio_resid > SSIZE_MAX) { 339 error = EINVAL; 340 goto out; 341 } 342 343 #ifdef KTRACE 344 /* 345 * if tracing, save a copy of iovec 346 */ 347 if (KTRPOINT(p, KTR_GENIO)) 348 ktriov = aiov; 349 #endif 350 cnt = auio.uio_resid; 351 error = (*fp->f_ops->fo_write)(fp, offset, &auio, fp->f_cred); 352 if (error) { 353 if (auio.uio_resid != cnt && (error == ERESTART || 354 error == EINTR || error == EWOULDBLOCK)) 355 error = 0; 356 if (error == EPIPE) 357 psignal(p, SIGPIPE); 358 } 359 cnt -= auio.uio_resid; 360 #ifdef KTRACE 361 if (KTRPOINT(p, KTR_GENIO) && error == 0) 362 ktrgenio(p, fd, UIO_WRITE, &ktriov, cnt, error); 363 #endif 364 *retval = cnt; 365 out: 366 return (error); 367 } 368 369 /* 370 * Gather write system call 371 */ 372 int 373 sys_writev(p, v, retval) 374 struct proc *p; 375 void *v; 376 register_t *retval; 377 { 378 struct sys_writev_args /* { 379 syscallarg(int) fd; 380 syscallarg(const struct iovec *) iovp; 381 syscallarg(int) iovcnt; 382 } */ *uap = v; 383 int fd = SCARG(uap, fd); 384 struct file *fp; 385 struct filedesc *fdp = p->p_fd; 386 387 if ((fp = fd_getfile(fdp, fd)) == NULL) 388 return (EBADF); 389 if ((fp->f_flag & FWRITE) == 0) 390 return (EBADF); 391 392 /* dofilewritev() will unuse the descriptor for us */ 393 return (dofilewritev(p, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt), 394 &fp->f_offset, retval)); 395 } 396 397 int 398 dofilewritev(p, fd, fp, iovp, iovcnt, offset, retval) 399 struct proc *p; 400 int fd; 401 struct file *fp; 402 const struct iovec *iovp; 403 int iovcnt; 404 off_t *offset; 405 register_t *retval; 406 { 407 struct uio auio; 408 struct iovec *iov; 409 struct iovec *needfree; 410 struct iovec aiov[UIO_SMALLIOV]; 411 long i, cnt, error = 0; 412 u_int iovlen; 413 #ifdef KTRACE 414 struct iovec *ktriov = NULL; 415 #endif 416 417 /* note: can't use iovlen until iovcnt is validated */ 418 iovlen = iovcnt * sizeof(struct iovec); 419 if ((u_int)iovcnt > UIO_SMALLIOV) { 420 if ((u_int)iovcnt > IOV_MAX) 421 return (EINVAL); 422 iov = needfree = malloc(iovlen, M_IOV, M_WAITOK); 423 } else if ((u_int)iovcnt > 0) { 424 iov = aiov; 425 needfree = NULL; 426 } else { 427 error = EINVAL; 428 goto out; 429 } 430 431 auio.uio_iov = iov; 432 auio.uio_iovcnt = iovcnt; 433 auio.uio_rw = UIO_WRITE; 434 auio.uio_segflg = UIO_USERSPACE; 435 auio.uio_procp = p; 436 error = copyin(iovp, iov, iovlen); 437 if (error) 438 goto done; 439 auio.uio_resid = 0; 440 for (i = 0; i < iovcnt; i++) { 441 auio.uio_resid += iov->iov_len; 442 /* 443 * Writes return ssize_t because -1 is returned on error. 444 * Therefore we must restrict the length to SSIZE_MAX to 445 * avoid garbage return values. 446 */ 447 if (iov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) { 448 error = EINVAL; 449 goto done; 450 } 451 iov++; 452 } 453 #ifdef KTRACE 454 /* 455 * if tracing, save a copy of iovec 456 */ 457 if (KTRPOINT(p, KTR_GENIO)) { 458 ktriov = malloc(iovlen, M_TEMP, M_WAITOK); 459 bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen); 460 } 461 #endif 462 cnt = auio.uio_resid; 463 error = (*fp->f_ops->fo_write)(fp, offset, &auio, fp->f_cred); 464 if (error) { 465 if (auio.uio_resid != cnt && (error == ERESTART || 466 error == EINTR || error == EWOULDBLOCK)) 467 error = 0; 468 if (error == EPIPE) 469 psignal(p, SIGPIPE); 470 } 471 cnt -= auio.uio_resid; 472 #ifdef KTRACE 473 if (ktriov != NULL) { 474 if (error == 0) 475 ktrgenio(p, fd, UIO_WRITE, ktriov, cnt, 476 error); 477 free(ktriov, M_TEMP); 478 } 479 #endif 480 *retval = cnt; 481 done: 482 if (needfree) 483 free(needfree, M_IOV); 484 out: 485 return (error); 486 } 487 488 /* 489 * Ioctl system call 490 */ 491 /* ARGSUSED */ 492 int 493 sys_ioctl(p, v, retval) 494 struct proc *p; 495 void *v; 496 register_t *retval; 497 { 498 register struct sys_ioctl_args /* { 499 syscallarg(int) fd; 500 syscallarg(u_long) com; 501 syscallarg(caddr_t) data; 502 } */ *uap = v; 503 register struct file *fp; 504 register struct filedesc *fdp; 505 register u_long com; 506 register int error; 507 register u_int size; 508 caddr_t data, memp; 509 int tmp; 510 #define STK_PARAMS 128 511 char stkbuf[STK_PARAMS]; 512 513 fdp = p->p_fd; 514 if ((fp = fd_getfile(fdp, SCARG(uap, fd))) == NULL) 515 return (EBADF); 516 517 if ((fp->f_flag & (FREAD | FWRITE)) == 0) 518 return (EBADF); 519 520 switch (com = SCARG(uap, com)) { 521 case FIONCLEX: 522 fdp->fd_ofileflags[SCARG(uap, fd)] &= ~UF_EXCLOSE; 523 return (0); 524 case FIOCLEX: 525 fdp->fd_ofileflags[SCARG(uap, fd)] |= UF_EXCLOSE; 526 return (0); 527 } 528 529 /* 530 * Interpret high order word to find amount of data to be 531 * copied to/from the user's address space. 532 */ 533 size = IOCPARM_LEN(com); 534 if (size > IOCPARM_MAX) 535 return (ENOTTY); 536 memp = NULL; 537 if (size > sizeof (stkbuf)) { 538 memp = (caddr_t)malloc((u_long)size, M_IOCTLOPS, M_WAITOK); 539 data = memp; 540 } else 541 data = stkbuf; 542 if (com&IOC_IN) { 543 if (size) { 544 error = copyin(SCARG(uap, data), data, (u_int)size); 545 if (error) { 546 if (memp) 547 free(memp, M_IOCTLOPS); 548 return (error); 549 } 550 } else 551 *(caddr_t *)data = SCARG(uap, data); 552 } else if ((com&IOC_OUT) && size) 553 /* 554 * Zero the buffer so the user always 555 * gets back something deterministic. 556 */ 557 bzero(data, size); 558 else if (com&IOC_VOID) 559 *(caddr_t *)data = SCARG(uap, data); 560 561 switch (com) { 562 563 case FIONBIO: 564 if ((tmp = *(int *)data) != 0) 565 fp->f_flag |= FNONBLOCK; 566 else 567 fp->f_flag &= ~FNONBLOCK; 568 error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&tmp, p); 569 break; 570 571 case FIOASYNC: 572 if ((tmp = *(int *)data) != 0) 573 fp->f_flag |= FASYNC; 574 else 575 fp->f_flag &= ~FASYNC; 576 error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, (caddr_t)&tmp, p); 577 break; 578 579 case FIOSETOWN: 580 tmp = *(int *)data; 581 if (fp->f_type == DTYPE_SOCKET) { 582 struct socket *so = (struct socket *)fp->f_data; 583 584 so->so_pgid = tmp; 585 so->so_siguid = p->p_cred->p_ruid; 586 so->so_sigeuid = p->p_ucred->cr_uid; 587 error = 0; 588 break; 589 } 590 if (tmp <= 0) { 591 tmp = -tmp; 592 } else { 593 struct proc *p1 = pfind(tmp); 594 if (p1 == 0) { 595 error = ESRCH; 596 break; 597 } 598 tmp = p1->p_pgrp->pg_id; 599 } 600 error = (*fp->f_ops->fo_ioctl) 601 (fp, TIOCSPGRP, (caddr_t)&tmp, p); 602 break; 603 604 case FIOGETOWN: 605 if (fp->f_type == DTYPE_SOCKET) { 606 error = 0; 607 *(int *)data = ((struct socket *)fp->f_data)->so_pgid; 608 break; 609 } 610 error = (*fp->f_ops->fo_ioctl)(fp, TIOCGPGRP, data, p); 611 *(int *)data = -*(int *)data; 612 break; 613 614 default: 615 error = (*fp->f_ops->fo_ioctl)(fp, com, data, p); 616 /* 617 * Copy any data to user, size was 618 * already set and checked above. 619 */ 620 if (error == 0 && (com&IOC_OUT) && size) 621 error = copyout(data, SCARG(uap, data), (u_int)size); 622 break; 623 } 624 if (memp) 625 free(memp, M_IOCTLOPS); 626 return (error); 627 } 628 629 int selwait, nselcoll; 630 631 /* 632 * Select system call. 633 */ 634 int 635 sys_select(p, v, retval) 636 register struct proc *p; 637 void *v; 638 register_t *retval; 639 { 640 register struct sys_select_args /* { 641 syscallarg(int) nd; 642 syscallarg(fd_set *) in; 643 syscallarg(fd_set *) ou; 644 syscallarg(fd_set *) ex; 645 syscallarg(struct timeval *) tv; 646 } */ *uap = v; 647 fd_set bits[6], *pibits[3], *pobits[3]; 648 struct timeval atv; 649 int s, ncoll, error = 0, timo; 650 u_int ni; 651 652 if (SCARG(uap, nd) > p->p_fd->fd_nfiles) { 653 /* forgiving; slightly wrong */ 654 SCARG(uap, nd) = p->p_fd->fd_nfiles; 655 } 656 ni = howmany(SCARG(uap, nd), NFDBITS) * sizeof(fd_mask); 657 if (SCARG(uap, nd) > FD_SETSIZE) { 658 caddr_t mbits; 659 660 mbits = malloc(ni * 6, M_TEMP, M_WAITOK); 661 bzero(mbits, ni * 6); 662 pibits[0] = (fd_set *)&mbits[ni * 0]; 663 pibits[1] = (fd_set *)&mbits[ni * 1]; 664 pibits[2] = (fd_set *)&mbits[ni * 2]; 665 pobits[0] = (fd_set *)&mbits[ni * 3]; 666 pobits[1] = (fd_set *)&mbits[ni * 4]; 667 pobits[2] = (fd_set *)&mbits[ni * 5]; 668 } else { 669 bzero((caddr_t)bits, sizeof(bits)); 670 pibits[0] = &bits[0]; 671 pibits[1] = &bits[1]; 672 pibits[2] = &bits[2]; 673 pobits[0] = &bits[3]; 674 pobits[1] = &bits[4]; 675 pobits[2] = &bits[5]; 676 } 677 678 #define getbits(name, x) \ 679 if (SCARG(uap, name) && (error = copyin((caddr_t)SCARG(uap, name), \ 680 (caddr_t)pibits[x], ni))) \ 681 goto done; 682 getbits(in, 0); 683 getbits(ou, 1); 684 getbits(ex, 2); 685 #undef getbits 686 687 if (SCARG(uap, tv)) { 688 error = copyin((caddr_t)SCARG(uap, tv), (caddr_t)&atv, 689 sizeof (atv)); 690 if (error) 691 goto done; 692 if (itimerfix(&atv)) { 693 error = EINVAL; 694 goto done; 695 } 696 s = splclock(); 697 timeradd(&atv, &time, &atv); 698 splx(s); 699 } else 700 timo = 0; 701 retry: 702 ncoll = nselcoll; 703 p->p_flag |= P_SELECT; 704 error = selscan(p, pibits[0], pobits[0], SCARG(uap, nd), retval); 705 if (error || *retval) 706 goto done; 707 if (SCARG(uap, tv)) { 708 /* 709 * We have to recalculate the timeout on every retry. 710 */ 711 timo = hzto(&atv); 712 if (timo <= 0) 713 goto done; 714 } 715 s = splhigh(); 716 if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) { 717 splx(s); 718 goto retry; 719 } 720 p->p_flag &= ~P_SELECT; 721 error = tsleep((caddr_t)&selwait, PSOCK | PCATCH, "select", timo); 722 splx(s); 723 if (error == 0) 724 goto retry; 725 done: 726 p->p_flag &= ~P_SELECT; 727 /* select is not restarted after signals... */ 728 if (error == ERESTART) 729 error = EINTR; 730 if (error == EWOULDBLOCK) 731 error = 0; 732 #define putbits(name, x) \ 733 if (SCARG(uap, name) && (error2 = copyout((caddr_t)pobits[x], \ 734 (caddr_t)SCARG(uap, name), ni))) \ 735 error = error2; 736 if (error == 0) { 737 int error2; 738 739 putbits(in, 0); 740 putbits(ou, 1); 741 putbits(ex, 2); 742 #undef putbits 743 } 744 745 if (pibits[0] != &bits[0]) 746 free(pibits[0], M_TEMP); 747 return (error); 748 } 749 750 int 751 selscan(p, ibits, obits, nfd, retval) 752 struct proc *p; 753 fd_set *ibits, *obits; 754 int nfd; 755 register_t *retval; 756 { 757 caddr_t cibits = (caddr_t)ibits, cobits = (caddr_t)obits; 758 register struct filedesc *fdp = p->p_fd; 759 register int msk, i, j, fd; 760 register fd_mask bits; 761 struct file *fp; 762 int ni, n = 0; 763 static int flag[3] = { FREAD, FWRITE, 0 }; 764 765 /* 766 * if nfd > FD_SETSIZE then the fd_set's contain nfd bits (rounded 767 * up to the next byte) otherwise the fd_set's are normal sized. 768 */ 769 ni = sizeof(fd_set); 770 if (nfd > FD_SETSIZE) 771 ni = howmany(nfd, NFDBITS) * sizeof(fd_mask); 772 773 for (msk = 0; msk < 3; msk++) { 774 fd_set *pibits = (fd_set *)&cibits[msk*ni]; 775 fd_set *pobits = (fd_set *)&cobits[msk*ni]; 776 777 for (i = 0; i < nfd; i += NFDBITS) { 778 bits = pibits->fds_bits[i/NFDBITS]; 779 while ((j = ffs(bits)) && (fd = i + --j) < nfd) { 780 bits &= ~(1 << j); 781 if ((fp = fd_getfile(fdp, fd)) == NULL) 782 return (EBADF); 783 if ((*fp->f_ops->fo_select)(fp, flag[msk], p)) { 784 FD_SET(fd, pobits); 785 n++; 786 } 787 } 788 } 789 } 790 *retval = n; 791 return (0); 792 } 793 794 /*ARGSUSED*/ 795 int 796 seltrue(dev, flag, p) 797 dev_t dev; 798 int flag; 799 struct proc *p; 800 { 801 802 return (1); 803 } 804 805 /* 806 * Record a select request. 807 */ 808 void 809 selrecord(selector, sip) 810 struct proc *selector; 811 struct selinfo *sip; 812 { 813 struct proc *p; 814 pid_t mypid; 815 816 mypid = selector->p_pid; 817 if (sip->si_selpid == mypid) 818 return; 819 if (sip->si_selpid && (p = pfind(sip->si_selpid)) && 820 p->p_wchan == (caddr_t)&selwait) 821 sip->si_flags |= SI_COLL; 822 else 823 sip->si_selpid = mypid; 824 } 825 826 /* 827 * Do a wakeup when a selectable event occurs. 828 */ 829 void 830 selwakeup(sip) 831 register struct selinfo *sip; 832 { 833 register struct proc *p; 834 int s; 835 836 if (sip->si_selpid == 0) 837 return; 838 if (sip->si_flags & SI_COLL) { 839 nselcoll++; 840 sip->si_flags &= ~SI_COLL; 841 wakeup((caddr_t)&selwait); 842 } 843 p = pfind(sip->si_selpid); 844 sip->si_selpid = 0; 845 if (p != NULL) { 846 s = splhigh(); 847 if (p->p_wchan == (caddr_t)&selwait) { 848 if (p->p_stat == SSLEEP) 849 setrunnable(p); 850 else 851 unsleep(p); 852 } else if (p->p_flag & P_SELECT) 853 p->p_flag &= ~P_SELECT; 854 splx(s); 855 } 856 } 857 858 void 859 pollscan(p, pl, nfd, retval) 860 struct proc *p; 861 struct pollfd *pl; 862 int nfd; 863 register_t *retval; 864 { 865 register struct filedesc *fdp = p->p_fd; 866 register int msk, i; 867 struct file *fp; 868 int x, n = 0; 869 static int flag[3] = { FREAD, FWRITE, 0 }; 870 static int pflag[3] = { POLLIN|POLLRDNORM, POLLOUT, POLLERR }; 871 872 /* 873 * XXX: We need to implement the rest of the flags. 874 */ 875 for (i = 0; i < nfd; i++) { 876 /* Check the file descriptor. */ 877 if (pl[i].fd < 0) { 878 pl[i].revents = 0; 879 continue; 880 } 881 if ((fp = fd_getfile(fdp, pl[i].fd)) == NULL) { 882 pl[i].revents = POLLNVAL; 883 n++; 884 continue; 885 } 886 for (x = msk = 0; msk < 3; msk++) { 887 if (pl[i].events & pflag[msk]) { 888 if ((*fp->f_ops->fo_select)(fp, flag[msk], p)) { 889 pl[i].revents |= pflag[msk] & 890 pl[i].events; 891 x++; 892 } 893 } 894 } 895 if (x) 896 n++; 897 } 898 *retval = n; 899 } 900 901 /* 902 * We are using the same mechanism as select only we encode/decode args 903 * differently. 904 */ 905 int 906 sys_poll(p, v, retval) 907 register struct proc *p; 908 void *v; 909 register_t *retval; 910 { 911 struct sys_poll_args *uap = v; 912 size_t sz; 913 struct pollfd pfds[4], *pl = pfds; 914 int msec = SCARG(uap, timeout); 915 struct timeval atv; 916 int timo, ncoll, i, s, error, error2; 917 extern int nselcoll, selwait; 918 919 /* Standards say no more than MAX_OPEN; this is possibly better. */ 920 if (SCARG(uap, nfds) > min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, 921 maxfiles)) 922 return (EINVAL); 923 924 sz = sizeof(struct pollfd) * SCARG(uap, nfds); 925 926 /* optimize for the default case, of a small nfds value */ 927 if (sz > sizeof(pfds)) 928 pl = (struct pollfd *) malloc(sz, M_TEMP, M_WAITOK); 929 930 if ((error = copyin(SCARG(uap, fds), pl, sz)) != 0) 931 goto bad; 932 933 for (i = 0; i < SCARG(uap, nfds); i++) 934 pl[i].revents = 0; 935 936 if (msec != -1) { 937 atv.tv_sec = msec / 1000; 938 atv.tv_usec = (msec - (atv.tv_sec * 1000)) * 1000; 939 940 if (itimerfix(&atv)) { 941 error = EINVAL; 942 goto done; 943 } 944 s = splclock(); 945 timeradd(&atv, &time, &atv); 946 splx(s); 947 } else 948 timo = 0; 949 950 retry: 951 ncoll = nselcoll; 952 p->p_flag |= P_SELECT; 953 pollscan(p, pl, SCARG(uap, nfds), retval); 954 if (*retval) 955 goto done; 956 if (msec != -1) { 957 /* 958 * We have to recalculate the timeout on every retry. 959 */ 960 timo = hzto(&atv); 961 if (timo <= 0) 962 goto done; 963 } 964 s = splhigh(); 965 if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) { 966 splx(s); 967 goto retry; 968 } 969 p->p_flag &= ~P_SELECT; 970 error = tsleep((caddr_t)&selwait, PSOCK | PCATCH, "poll", timo); 971 splx(s); 972 if (error == 0) 973 goto retry; 974 975 done: 976 p->p_flag &= ~P_SELECT; 977 /* poll is not restarted after signals... */ 978 if (error == ERESTART) 979 error = EINTR; 980 if (error == EWOULDBLOCK) 981 error = 0; 982 if ((error2 = copyout(pl, SCARG(uap, fds), sz)) != 0) 983 error = error2; 984 bad: 985 if (pl != pfds) 986 free((char *) pl, M_TEMP); 987 return (error); 988 } 989 990