1 /* $OpenBSD: sys_generic.c,v 1.35 2002/02/08 19:47:50 art Exp $ */ 2 /* $NetBSD: sys_generic.c,v 1.24 1996/03/29 00:25:32 cgd Exp $ */ 3 4 /* 5 * Copyright (c) 1996 Theo de Raadt 6 * Copyright (c) 1982, 1986, 1989, 1993 7 * The Regents of the University of California. All rights reserved. 8 * (c) UNIX System Laboratories, Inc. 9 * All or some portions of this file are derived from material licensed 10 * to the University of California by American Telephone and Telegraph 11 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 12 * the permission of UNIX System Laboratories, Inc. 13 * 14 * Redistribution and use in source and binary forms, with or without 15 * modification, are permitted provided that the following conditions 16 * are met: 17 * 1. Redistributions of source code must retain the above copyright 18 * notice, this list of conditions and the following disclaimer. 19 * 2. Redistributions in binary form must reproduce the above copyright 20 * notice, this list of conditions and the following disclaimer in the 21 * documentation and/or other materials provided with the distribution. 22 * 3. All advertising materials mentioning features or use of this software 23 * must display the following acknowledgement: 24 * This product includes software developed by the University of 25 * California, Berkeley and its contributors. 26 * 4. Neither the name of the University nor the names of its contributors 27 * may be used to endorse or promote products derived from this software 28 * without specific prior written permission. 29 * 30 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 31 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 32 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 33 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 34 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 35 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 36 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 37 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 38 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 39 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 40 * SUCH DAMAGE. 41 * 42 * @(#)sys_generic.c 8.5 (Berkeley) 1/21/94 43 */ 44 45 #include <sys/param.h> 46 #include <sys/systm.h> 47 #include <sys/filedesc.h> 48 #include <sys/ioctl.h> 49 #include <sys/file.h> 50 #include <sys/proc.h> 51 #include <sys/resourcevar.h> 52 #include <sys/socketvar.h> 53 #include <sys/signalvar.h> 54 #include <sys/uio.h> 55 #include <sys/kernel.h> 56 #include <sys/stat.h> 57 #include <sys/malloc.h> 58 #include <sys/poll.h> 59 #ifdef KTRACE 60 #include <sys/ktrace.h> 61 #endif 62 63 #include <sys/mount.h> 64 #include <sys/syscallargs.h> 65 66 int selscan __P((struct proc *, fd_set *, fd_set *, int, register_t *)); 67 int seltrue __P((dev_t, int, struct proc *)); 68 void pollscan __P((struct proc *, struct pollfd *, int, register_t *)); 69 70 /* 71 * Read system call. 72 */ 73 /* ARGSUSED */ 74 int 75 sys_read(p, v, retval) 76 struct proc *p; 77 void *v; 78 register_t *retval; 79 { 80 struct sys_read_args /* { 81 syscallarg(int) fd; 82 syscallarg(void *) buf; 83 syscallarg(size_t) nbyte; 84 } */ *uap = v; 85 int fd = SCARG(uap, fd); 86 struct file *fp; 87 struct filedesc *fdp = p->p_fd; 88 89 if ((fp = fd_getfile(fdp, fd)) == NULL) 90 return (EBADF); 91 if ((fp->f_flag & FREAD) == 0) 92 return (EBADF); 93 94 FREF(fp); 95 96 /* dofileread() will FRELE the descriptor for us */ 97 return (dofileread(p, fd, fp, SCARG(uap, buf), SCARG(uap, nbyte), 98 &fp->f_offset, retval)); 99 } 100 101 int 102 dofileread(p, fd, fp, buf, nbyte, offset, retval) 103 struct proc *p; 104 int fd; 105 struct file *fp; 106 void *buf; 107 size_t nbyte; 108 off_t *offset; 109 register_t *retval; 110 { 111 struct uio auio; 112 struct iovec aiov; 113 long cnt, error = 0; 114 #ifdef KTRACE 115 struct iovec ktriov; 116 #endif 117 118 aiov.iov_base = (caddr_t)buf; 119 aiov.iov_len = nbyte; 120 auio.uio_iov = &aiov; 121 auio.uio_iovcnt = 1; 122 auio.uio_resid = nbyte; 123 auio.uio_rw = UIO_READ; 124 auio.uio_segflg = UIO_USERSPACE; 125 auio.uio_procp = p; 126 127 /* 128 * Reads return ssize_t because -1 is returned on error. Therefore 129 * we must restrict the length to SSIZE_MAX to avoid garbage return 130 * values. 131 */ 132 if (auio.uio_resid > SSIZE_MAX) { 133 error = EINVAL; 134 goto out; 135 } 136 137 #ifdef KTRACE 138 /* 139 * if tracing, save a copy of iovec 140 */ 141 if (KTRPOINT(p, KTR_GENIO)) 142 ktriov = aiov; 143 #endif 144 cnt = auio.uio_resid; 145 error = (*fp->f_ops->fo_read)(fp, offset, &auio, fp->f_cred); 146 if (error) 147 if (auio.uio_resid != cnt && (error == ERESTART || 148 error == EINTR || error == EWOULDBLOCK)) 149 error = 0; 150 cnt -= auio.uio_resid; 151 #ifdef KTRACE 152 if (KTRPOINT(p, KTR_GENIO) && error == 0) 153 ktrgenio(p, fd, UIO_READ, &ktriov, cnt, error); 154 #endif 155 *retval = cnt; 156 out: 157 FRELE(fp); 158 return (error); 159 } 160 161 /* 162 * Scatter read system call. 163 */ 164 int 165 sys_readv(p, v, retval) 166 struct proc *p; 167 void *v; 168 register_t *retval; 169 { 170 struct sys_readv_args /* { 171 syscallarg(int) fd; 172 syscallarg(const struct iovec *) iovp; 173 syscallarg(int) iovcnt; 174 } */ *uap = v; 175 int fd = SCARG(uap, fd); 176 struct file *fp; 177 struct filedesc *fdp = p->p_fd; 178 179 if ((fp = fd_getfile(fdp, fd)) == NULL) 180 return (EBADF); 181 if ((fp->f_flag & FREAD) == 0) 182 return (EBADF); 183 184 FREF(fp); 185 186 /* dofilereadv() will FRELE the descriptor for us */ 187 return (dofilereadv(p, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt), 188 &fp->f_offset, retval)); 189 } 190 191 int 192 dofilereadv(p, fd, fp, iovp, iovcnt, offset, retval) 193 struct proc *p; 194 int fd; 195 struct file *fp; 196 const struct iovec *iovp; 197 int iovcnt; 198 off_t *offset; 199 register_t *retval; 200 { 201 struct uio auio; 202 struct iovec *iov; 203 struct iovec *needfree; 204 struct iovec aiov[UIO_SMALLIOV]; 205 long i, cnt, error = 0; 206 u_int iovlen; 207 #ifdef KTRACE 208 struct iovec *ktriov = NULL; 209 #endif 210 211 /* note: can't use iovlen until iovcnt is validated */ 212 iovlen = iovcnt * sizeof(struct iovec); 213 if ((u_int)iovcnt > UIO_SMALLIOV) { 214 if ((u_int)iovcnt > IOV_MAX) { 215 error = EINVAL; 216 goto out; 217 } 218 iov = needfree = malloc(iovlen, M_IOV, M_WAITOK); 219 } else if ((u_int)iovcnt > 0) { 220 iov = aiov; 221 needfree = NULL; 222 } else { 223 error = EINVAL; 224 goto out; 225 } 226 227 auio.uio_iov = iov; 228 auio.uio_iovcnt = iovcnt; 229 auio.uio_rw = UIO_READ; 230 auio.uio_segflg = UIO_USERSPACE; 231 auio.uio_procp = p; 232 error = copyin(iovp, iov, iovlen); 233 if (error) 234 goto done; 235 auio.uio_resid = 0; 236 for (i = 0; i < iovcnt; i++) { 237 auio.uio_resid += iov->iov_len; 238 /* 239 * Reads return ssize_t because -1 is returned on error. 240 * Therefore we must restrict the length to SSIZE_MAX to 241 * avoid garbage return values. 242 */ 243 if (iov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) { 244 error = EINVAL; 245 goto done; 246 } 247 iov++; 248 } 249 #ifdef KTRACE 250 /* 251 * if tracing, save a copy of iovec 252 */ 253 if (KTRPOINT(p, KTR_GENIO)) { 254 ktriov = malloc(iovlen, M_TEMP, M_WAITOK); 255 bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen); 256 } 257 #endif 258 cnt = auio.uio_resid; 259 error = (*fp->f_ops->fo_read)(fp, offset, &auio, fp->f_cred); 260 if (error) 261 if (auio.uio_resid != cnt && (error == ERESTART || 262 error == EINTR || error == EWOULDBLOCK)) 263 error = 0; 264 cnt -= auio.uio_resid; 265 #ifdef KTRACE 266 if (ktriov != NULL) { 267 if (error == 0) 268 ktrgenio(p, fd, UIO_READ, ktriov, cnt, 269 error); 270 free(ktriov, M_TEMP); 271 } 272 #endif 273 *retval = cnt; 274 done: 275 if (needfree) 276 free(needfree, M_IOV); 277 out: 278 FRELE(fp); 279 return (error); 280 } 281 282 /* 283 * Write system call 284 */ 285 int 286 sys_write(p, v, retval) 287 struct proc *p; 288 void *v; 289 register_t *retval; 290 { 291 struct sys_write_args /* { 292 syscallarg(int) fd; 293 syscallarg(const void *) buf; 294 syscallarg(size_t) nbyte; 295 } */ *uap = v; 296 int fd = SCARG(uap, fd); 297 struct file *fp; 298 struct filedesc *fdp = p->p_fd; 299 300 if ((fp = fd_getfile(fdp, fd)) == NULL) 301 return (EBADF); 302 if ((fp->f_flag & FWRITE) == 0) 303 return (EBADF); 304 305 /* dofilewrite() will unuse the descriptor for us */ 306 return (dofilewrite(p, fd, fp, SCARG(uap, buf), SCARG(uap, nbyte), 307 &fp->f_offset, retval)); 308 } 309 310 int 311 dofilewrite(p, fd, fp, buf, nbyte, offset, retval) 312 struct proc *p; 313 int fd; 314 struct file *fp; 315 const void *buf; 316 size_t nbyte; 317 off_t *offset; 318 register_t *retval; 319 { 320 struct uio auio; 321 struct iovec aiov; 322 long cnt, error = 0; 323 #ifdef KTRACE 324 struct iovec ktriov; 325 #endif 326 327 aiov.iov_base = (caddr_t)buf; /* XXX kills const */ 328 aiov.iov_len = nbyte; 329 auio.uio_iov = &aiov; 330 auio.uio_iovcnt = 1; 331 auio.uio_resid = nbyte; 332 auio.uio_rw = UIO_WRITE; 333 auio.uio_segflg = UIO_USERSPACE; 334 auio.uio_procp = p; 335 336 /* 337 * Writes return ssize_t because -1 is returned on error. Therefore 338 * we must restrict the length to SSIZE_MAX to avoid garbage return 339 * values. 340 */ 341 if (auio.uio_resid > SSIZE_MAX) { 342 error = EINVAL; 343 goto out; 344 } 345 346 #ifdef KTRACE 347 /* 348 * if tracing, save a copy of iovec 349 */ 350 if (KTRPOINT(p, KTR_GENIO)) 351 ktriov = aiov; 352 #endif 353 cnt = auio.uio_resid; 354 error = (*fp->f_ops->fo_write)(fp, offset, &auio, fp->f_cred); 355 if (error) { 356 if (auio.uio_resid != cnt && (error == ERESTART || 357 error == EINTR || error == EWOULDBLOCK)) 358 error = 0; 359 if (error == EPIPE) 360 psignal(p, SIGPIPE); 361 } 362 cnt -= auio.uio_resid; 363 #ifdef KTRACE 364 if (KTRPOINT(p, KTR_GENIO) && error == 0) 365 ktrgenio(p, fd, UIO_WRITE, &ktriov, cnt, error); 366 #endif 367 *retval = cnt; 368 out: 369 return (error); 370 } 371 372 /* 373 * Gather write system call 374 */ 375 int 376 sys_writev(p, v, retval) 377 struct proc *p; 378 void *v; 379 register_t *retval; 380 { 381 struct sys_writev_args /* { 382 syscallarg(int) fd; 383 syscallarg(const struct iovec *) iovp; 384 syscallarg(int) iovcnt; 385 } */ *uap = v; 386 int fd = SCARG(uap, fd); 387 struct file *fp; 388 struct filedesc *fdp = p->p_fd; 389 390 if ((fp = fd_getfile(fdp, fd)) == NULL) 391 return (EBADF); 392 if ((fp->f_flag & FWRITE) == 0) 393 return (EBADF); 394 395 /* dofilewritev() will unuse the descriptor for us */ 396 return (dofilewritev(p, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt), 397 &fp->f_offset, retval)); 398 } 399 400 int 401 dofilewritev(p, fd, fp, iovp, iovcnt, offset, retval) 402 struct proc *p; 403 int fd; 404 struct file *fp; 405 const struct iovec *iovp; 406 int iovcnt; 407 off_t *offset; 408 register_t *retval; 409 { 410 struct uio auio; 411 struct iovec *iov; 412 struct iovec *needfree; 413 struct iovec aiov[UIO_SMALLIOV]; 414 long i, cnt, error = 0; 415 u_int iovlen; 416 #ifdef KTRACE 417 struct iovec *ktriov = NULL; 418 #endif 419 420 /* note: can't use iovlen until iovcnt is validated */ 421 iovlen = iovcnt * sizeof(struct iovec); 422 if ((u_int)iovcnt > UIO_SMALLIOV) { 423 if ((u_int)iovcnt > IOV_MAX) 424 return (EINVAL); 425 iov = needfree = malloc(iovlen, M_IOV, M_WAITOK); 426 } else if ((u_int)iovcnt > 0) { 427 iov = aiov; 428 needfree = NULL; 429 } else { 430 error = EINVAL; 431 goto out; 432 } 433 434 auio.uio_iov = iov; 435 auio.uio_iovcnt = iovcnt; 436 auio.uio_rw = UIO_WRITE; 437 auio.uio_segflg = UIO_USERSPACE; 438 auio.uio_procp = p; 439 error = copyin(iovp, iov, iovlen); 440 if (error) 441 goto done; 442 auio.uio_resid = 0; 443 for (i = 0; i < iovcnt; i++) { 444 auio.uio_resid += iov->iov_len; 445 /* 446 * Writes return ssize_t because -1 is returned on error. 447 * Therefore we must restrict the length to SSIZE_MAX to 448 * avoid garbage return values. 449 */ 450 if (iov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) { 451 error = EINVAL; 452 goto done; 453 } 454 iov++; 455 } 456 #ifdef KTRACE 457 /* 458 * if tracing, save a copy of iovec 459 */ 460 if (KTRPOINT(p, KTR_GENIO)) { 461 ktriov = malloc(iovlen, M_TEMP, M_WAITOK); 462 bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen); 463 } 464 #endif 465 cnt = auio.uio_resid; 466 error = (*fp->f_ops->fo_write)(fp, offset, &auio, fp->f_cred); 467 if (error) { 468 if (auio.uio_resid != cnt && (error == ERESTART || 469 error == EINTR || error == EWOULDBLOCK)) 470 error = 0; 471 if (error == EPIPE) 472 psignal(p, SIGPIPE); 473 } 474 cnt -= auio.uio_resid; 475 #ifdef KTRACE 476 if (ktriov != NULL) { 477 if (error == 0) 478 ktrgenio(p, fd, UIO_WRITE, ktriov, cnt, 479 error); 480 free(ktriov, M_TEMP); 481 } 482 #endif 483 *retval = cnt; 484 done: 485 if (needfree) 486 free(needfree, M_IOV); 487 out: 488 return (error); 489 } 490 491 /* 492 * Ioctl system call 493 */ 494 /* ARGSUSED */ 495 int 496 sys_ioctl(p, v, retval) 497 struct proc *p; 498 void *v; 499 register_t *retval; 500 { 501 register struct sys_ioctl_args /* { 502 syscallarg(int) fd; 503 syscallarg(u_long) com; 504 syscallarg(caddr_t) data; 505 } */ *uap = v; 506 register struct file *fp; 507 register struct filedesc *fdp; 508 register u_long com; 509 register int error; 510 register u_int size; 511 caddr_t data, memp; 512 int tmp; 513 #define STK_PARAMS 128 514 char stkbuf[STK_PARAMS]; 515 516 fdp = p->p_fd; 517 if ((fp = fd_getfile(fdp, SCARG(uap, fd))) == NULL) 518 return (EBADF); 519 520 if ((fp->f_flag & (FREAD | FWRITE)) == 0) 521 return (EBADF); 522 523 switch (com = SCARG(uap, com)) { 524 case FIONCLEX: 525 fdp->fd_ofileflags[SCARG(uap, fd)] &= ~UF_EXCLOSE; 526 return (0); 527 case FIOCLEX: 528 fdp->fd_ofileflags[SCARG(uap, fd)] |= UF_EXCLOSE; 529 return (0); 530 } 531 532 /* 533 * Interpret high order word to find amount of data to be 534 * copied to/from the user's address space. 535 */ 536 size = IOCPARM_LEN(com); 537 if (size > IOCPARM_MAX) 538 return (ENOTTY); 539 memp = NULL; 540 if (size > sizeof (stkbuf)) { 541 memp = (caddr_t)malloc((u_long)size, M_IOCTLOPS, M_WAITOK); 542 data = memp; 543 } else 544 data = stkbuf; 545 if (com&IOC_IN) { 546 if (size) { 547 error = copyin(SCARG(uap, data), data, (u_int)size); 548 if (error) { 549 if (memp) 550 free(memp, M_IOCTLOPS); 551 return (error); 552 } 553 } else 554 *(caddr_t *)data = SCARG(uap, data); 555 } else if ((com&IOC_OUT) && size) 556 /* 557 * Zero the buffer so the user always 558 * gets back something deterministic. 559 */ 560 bzero(data, size); 561 else if (com&IOC_VOID) 562 *(caddr_t *)data = SCARG(uap, data); 563 564 switch (com) { 565 566 case FIONBIO: 567 if ((tmp = *(int *)data) != 0) 568 fp->f_flag |= FNONBLOCK; 569 else 570 fp->f_flag &= ~FNONBLOCK; 571 error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&tmp, p); 572 break; 573 574 case FIOASYNC: 575 if ((tmp = *(int *)data) != 0) 576 fp->f_flag |= FASYNC; 577 else 578 fp->f_flag &= ~FASYNC; 579 error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, (caddr_t)&tmp, p); 580 break; 581 582 case FIOSETOWN: 583 tmp = *(int *)data; 584 if (fp->f_type == DTYPE_SOCKET) { 585 struct socket *so = (struct socket *)fp->f_data; 586 587 so->so_pgid = tmp; 588 so->so_siguid = p->p_cred->p_ruid; 589 so->so_sigeuid = p->p_ucred->cr_uid; 590 error = 0; 591 break; 592 } 593 if (tmp <= 0) { 594 tmp = -tmp; 595 } else { 596 struct proc *p1 = pfind(tmp); 597 if (p1 == 0) { 598 error = ESRCH; 599 break; 600 } 601 tmp = p1->p_pgrp->pg_id; 602 } 603 error = (*fp->f_ops->fo_ioctl) 604 (fp, TIOCSPGRP, (caddr_t)&tmp, p); 605 break; 606 607 case FIOGETOWN: 608 if (fp->f_type == DTYPE_SOCKET) { 609 error = 0; 610 *(int *)data = ((struct socket *)fp->f_data)->so_pgid; 611 break; 612 } 613 error = (*fp->f_ops->fo_ioctl)(fp, TIOCGPGRP, data, p); 614 *(int *)data = -*(int *)data; 615 break; 616 617 default: 618 error = (*fp->f_ops->fo_ioctl)(fp, com, data, p); 619 /* 620 * Copy any data to user, size was 621 * already set and checked above. 622 */ 623 if (error == 0 && (com&IOC_OUT) && size) 624 error = copyout(data, SCARG(uap, data), (u_int)size); 625 break; 626 } 627 if (memp) 628 free(memp, M_IOCTLOPS); 629 return (error); 630 } 631 632 int selwait, nselcoll; 633 634 /* 635 * Select system call. 636 */ 637 int 638 sys_select(p, v, retval) 639 register struct proc *p; 640 void *v; 641 register_t *retval; 642 { 643 register struct sys_select_args /* { 644 syscallarg(int) nd; 645 syscallarg(fd_set *) in; 646 syscallarg(fd_set *) ou; 647 syscallarg(fd_set *) ex; 648 syscallarg(struct timeval *) tv; 649 } */ *uap = v; 650 fd_set bits[6], *pibits[3], *pobits[3]; 651 struct timeval atv; 652 int s, ncoll, error = 0, timo; 653 u_int ni; 654 655 if (SCARG(uap, nd) > p->p_fd->fd_nfiles) { 656 /* forgiving; slightly wrong */ 657 SCARG(uap, nd) = p->p_fd->fd_nfiles; 658 } 659 ni = howmany(SCARG(uap, nd), NFDBITS) * sizeof(fd_mask); 660 if (SCARG(uap, nd) > FD_SETSIZE) { 661 caddr_t mbits; 662 663 mbits = malloc(ni * 6, M_TEMP, M_WAITOK); 664 bzero(mbits, ni * 6); 665 pibits[0] = (fd_set *)&mbits[ni * 0]; 666 pibits[1] = (fd_set *)&mbits[ni * 1]; 667 pibits[2] = (fd_set *)&mbits[ni * 2]; 668 pobits[0] = (fd_set *)&mbits[ni * 3]; 669 pobits[1] = (fd_set *)&mbits[ni * 4]; 670 pobits[2] = (fd_set *)&mbits[ni * 5]; 671 } else { 672 bzero((caddr_t)bits, sizeof(bits)); 673 pibits[0] = &bits[0]; 674 pibits[1] = &bits[1]; 675 pibits[2] = &bits[2]; 676 pobits[0] = &bits[3]; 677 pobits[1] = &bits[4]; 678 pobits[2] = &bits[5]; 679 } 680 681 #define getbits(name, x) \ 682 if (SCARG(uap, name) && (error = copyin((caddr_t)SCARG(uap, name), \ 683 (caddr_t)pibits[x], ni))) \ 684 goto done; 685 getbits(in, 0); 686 getbits(ou, 1); 687 getbits(ex, 2); 688 #undef getbits 689 690 if (SCARG(uap, tv)) { 691 error = copyin((caddr_t)SCARG(uap, tv), (caddr_t)&atv, 692 sizeof (atv)); 693 if (error) 694 goto done; 695 if (itimerfix(&atv)) { 696 error = EINVAL; 697 goto done; 698 } 699 s = splclock(); 700 timeradd(&atv, &time, &atv); 701 splx(s); 702 } else 703 timo = 0; 704 retry: 705 ncoll = nselcoll; 706 p->p_flag |= P_SELECT; 707 error = selscan(p, pibits[0], pobits[0], SCARG(uap, nd), retval); 708 if (error || *retval) 709 goto done; 710 if (SCARG(uap, tv)) { 711 /* 712 * We have to recalculate the timeout on every retry. 713 */ 714 timo = hzto(&atv); 715 if (timo <= 0) 716 goto done; 717 } 718 s = splhigh(); 719 if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) { 720 splx(s); 721 goto retry; 722 } 723 p->p_flag &= ~P_SELECT; 724 error = tsleep((caddr_t)&selwait, PSOCK | PCATCH, "select", timo); 725 splx(s); 726 if (error == 0) 727 goto retry; 728 done: 729 p->p_flag &= ~P_SELECT; 730 /* select is not restarted after signals... */ 731 if (error == ERESTART) 732 error = EINTR; 733 if (error == EWOULDBLOCK) 734 error = 0; 735 #define putbits(name, x) \ 736 if (SCARG(uap, name) && (error2 = copyout((caddr_t)pobits[x], \ 737 (caddr_t)SCARG(uap, name), ni))) \ 738 error = error2; 739 if (error == 0) { 740 int error2; 741 742 putbits(in, 0); 743 putbits(ou, 1); 744 putbits(ex, 2); 745 #undef putbits 746 } 747 748 if (pibits[0] != &bits[0]) 749 free(pibits[0], M_TEMP); 750 return (error); 751 } 752 753 int 754 selscan(p, ibits, obits, nfd, retval) 755 struct proc *p; 756 fd_set *ibits, *obits; 757 int nfd; 758 register_t *retval; 759 { 760 caddr_t cibits = (caddr_t)ibits, cobits = (caddr_t)obits; 761 register struct filedesc *fdp = p->p_fd; 762 register int msk, i, j, fd; 763 register fd_mask bits; 764 struct file *fp; 765 int ni, n = 0; 766 static int flag[3] = { FREAD, FWRITE, 0 }; 767 768 /* 769 * if nfd > FD_SETSIZE then the fd_set's contain nfd bits (rounded 770 * up to the next byte) otherwise the fd_set's are normal sized. 771 */ 772 ni = sizeof(fd_set); 773 if (nfd > FD_SETSIZE) 774 ni = howmany(nfd, NFDBITS) * sizeof(fd_mask); 775 776 for (msk = 0; msk < 3; msk++) { 777 fd_set *pibits = (fd_set *)&cibits[msk*ni]; 778 fd_set *pobits = (fd_set *)&cobits[msk*ni]; 779 780 for (i = 0; i < nfd; i += NFDBITS) { 781 bits = pibits->fds_bits[i/NFDBITS]; 782 while ((j = ffs(bits)) && (fd = i + --j) < nfd) { 783 bits &= ~(1 << j); 784 if ((fp = fd_getfile(fdp, fd)) == NULL) 785 return (EBADF); 786 if ((*fp->f_ops->fo_select)(fp, flag[msk], p)) { 787 FD_SET(fd, pobits); 788 n++; 789 } 790 } 791 } 792 } 793 *retval = n; 794 return (0); 795 } 796 797 /*ARGSUSED*/ 798 int 799 seltrue(dev, flag, p) 800 dev_t dev; 801 int flag; 802 struct proc *p; 803 { 804 805 return (1); 806 } 807 808 /* 809 * Record a select request. 810 */ 811 void 812 selrecord(selector, sip) 813 struct proc *selector; 814 struct selinfo *sip; 815 { 816 struct proc *p; 817 pid_t mypid; 818 819 mypid = selector->p_pid; 820 if (sip->si_selpid == mypid) 821 return; 822 if (sip->si_selpid && (p = pfind(sip->si_selpid)) && 823 p->p_wchan == (caddr_t)&selwait) 824 sip->si_flags |= SI_COLL; 825 else 826 sip->si_selpid = mypid; 827 } 828 829 /* 830 * Do a wakeup when a selectable event occurs. 831 */ 832 void 833 selwakeup(sip) 834 register struct selinfo *sip; 835 { 836 register struct proc *p; 837 int s; 838 839 if (sip->si_selpid == 0) 840 return; 841 if (sip->si_flags & SI_COLL) { 842 nselcoll++; 843 sip->si_flags &= ~SI_COLL; 844 wakeup((caddr_t)&selwait); 845 } 846 p = pfind(sip->si_selpid); 847 sip->si_selpid = 0; 848 if (p != NULL) { 849 s = splhigh(); 850 if (p->p_wchan == (caddr_t)&selwait) { 851 if (p->p_stat == SSLEEP) 852 setrunnable(p); 853 else 854 unsleep(p); 855 } else if (p->p_flag & P_SELECT) 856 p->p_flag &= ~P_SELECT; 857 splx(s); 858 } 859 } 860 861 void 862 pollscan(p, pl, nfd, retval) 863 struct proc *p; 864 struct pollfd *pl; 865 int nfd; 866 register_t *retval; 867 { 868 register struct filedesc *fdp = p->p_fd; 869 register int msk, i; 870 struct file *fp; 871 int x, n = 0; 872 static int flag[3] = { FREAD, FWRITE, 0 }; 873 static int pflag[3] = { POLLIN|POLLRDNORM, POLLOUT, POLLERR }; 874 875 /* 876 * XXX: We need to implement the rest of the flags. 877 */ 878 for (i = 0; i < nfd; i++) { 879 /* Check the file descriptor. */ 880 if (pl[i].fd < 0) { 881 pl[i].revents = 0; 882 continue; 883 } 884 if ((fp = fd_getfile(fdp, pl[i].fd)) == NULL) { 885 pl[i].revents = POLLNVAL; 886 n++; 887 continue; 888 } 889 for (x = msk = 0; msk < 3; msk++) { 890 if (pl[i].events & pflag[msk]) { 891 if ((*fp->f_ops->fo_select)(fp, flag[msk], p)) { 892 pl[i].revents |= pflag[msk] & 893 pl[i].events; 894 x++; 895 } 896 } 897 } 898 if (x) 899 n++; 900 } 901 *retval = n; 902 } 903 904 /* 905 * We are using the same mechanism as select only we encode/decode args 906 * differently. 907 */ 908 int 909 sys_poll(p, v, retval) 910 register struct proc *p; 911 void *v; 912 register_t *retval; 913 { 914 struct sys_poll_args *uap = v; 915 size_t sz; 916 struct pollfd pfds[4], *pl = pfds; 917 int msec = SCARG(uap, timeout); 918 struct timeval atv; 919 int timo, ncoll, i, s, error, error2; 920 extern int nselcoll, selwait; 921 922 /* Standards say no more than MAX_OPEN; this is possibly better. */ 923 if (SCARG(uap, nfds) > min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, 924 maxfiles)) 925 return (EINVAL); 926 927 sz = sizeof(struct pollfd) * SCARG(uap, nfds); 928 929 /* optimize for the default case, of a small nfds value */ 930 if (sz > sizeof(pfds)) 931 pl = (struct pollfd *) malloc(sz, M_TEMP, M_WAITOK); 932 933 if ((error = copyin(SCARG(uap, fds), pl, sz)) != 0) 934 goto bad; 935 936 for (i = 0; i < SCARG(uap, nfds); i++) 937 pl[i].revents = 0; 938 939 if (msec != -1) { 940 atv.tv_sec = msec / 1000; 941 atv.tv_usec = (msec - (atv.tv_sec * 1000)) * 1000; 942 943 if (itimerfix(&atv)) { 944 error = EINVAL; 945 goto done; 946 } 947 s = splclock(); 948 timeradd(&atv, &time, &atv); 949 splx(s); 950 } else 951 timo = 0; 952 953 retry: 954 ncoll = nselcoll; 955 p->p_flag |= P_SELECT; 956 pollscan(p, pl, SCARG(uap, nfds), retval); 957 if (*retval) 958 goto done; 959 if (msec != -1) { 960 /* 961 * We have to recalculate the timeout on every retry. 962 */ 963 timo = hzto(&atv); 964 if (timo <= 0) 965 goto done; 966 } 967 s = splhigh(); 968 if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) { 969 splx(s); 970 goto retry; 971 } 972 p->p_flag &= ~P_SELECT; 973 error = tsleep((caddr_t)&selwait, PSOCK | PCATCH, "poll", timo); 974 splx(s); 975 if (error == 0) 976 goto retry; 977 978 done: 979 p->p_flag &= ~P_SELECT; 980 /* poll is not restarted after signals... */ 981 if (error == ERESTART) 982 error = EINTR; 983 if (error == EWOULDBLOCK) 984 error = 0; 985 if ((error2 = copyout(pl, SCARG(uap, fds), sz)) != 0) 986 error = error2; 987 bad: 988 if (pl != pfds) 989 free((char *) pl, M_TEMP); 990 return (error); 991 } 992 993