1 /* $OpenBSD: sys_generic.c,v 1.78 2012/07/09 17:51:08 claudio Exp $ */ 2 /* $NetBSD: sys_generic.c,v 1.24 1996/03/29 00:25:32 cgd Exp $ */ 3 4 /* 5 * Copyright (c) 1996 Theo de Raadt 6 * Copyright (c) 1982, 1986, 1989, 1993 7 * The Regents of the University of California. All rights reserved. 8 * (c) UNIX System Laboratories, Inc. 9 * All or some portions of this file are derived from material licensed 10 * to the University of California by American Telephone and Telegraph 11 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 12 * the permission of UNIX System Laboratories, Inc. 13 * 14 * Redistribution and use in source and binary forms, with or without 15 * modification, are permitted provided that the following conditions 16 * are met: 17 * 1. Redistributions of source code must retain the above copyright 18 * notice, this list of conditions and the following disclaimer. 19 * 2. Redistributions in binary form must reproduce the above copyright 20 * notice, this list of conditions and the following disclaimer in the 21 * documentation and/or other materials provided with the distribution. 22 * 3. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)sys_generic.c 8.5 (Berkeley) 1/21/94 39 */ 40 41 #include <sys/param.h> 42 #include <sys/systm.h> 43 #include <sys/filedesc.h> 44 #include <sys/ioctl.h> 45 #include <sys/file.h> 46 #include <sys/proc.h> 47 #include <sys/resourcevar.h> 48 #include <sys/socketvar.h> 49 #include <sys/signalvar.h> 50 #include <sys/uio.h> 51 #include <sys/kernel.h> 52 #include <sys/stat.h> 53 #include <sys/malloc.h> 54 #include <sys/poll.h> 55 #ifdef KTRACE 56 #include <sys/ktrace.h> 57 #endif 58 #include <sys/sched.h> 59 60 #include <sys/mount.h> 61 #include <sys/syscallargs.h> 62 63 #include <uvm/uvm_extern.h> 64 65 int selscan(struct proc *, fd_set *, fd_set *, int, int, register_t *); 66 void pollscan(struct proc *, struct pollfd *, u_int, register_t *); 67 int pollout(struct pollfd *, struct pollfd *, u_int); 68 69 /* 70 * Read system call. 71 */ 72 /* ARGSUSED */ 73 int 74 sys_read(struct proc *p, void *v, register_t *retval) 75 { 76 struct sys_read_args /* { 77 syscallarg(int) fd; 78 syscallarg(void *) buf; 79 syscallarg(size_t) nbyte; 80 } */ *uap = v; 81 struct iovec iov; 82 int fd = SCARG(uap, fd); 83 struct file *fp; 84 struct filedesc *fdp = p->p_fd; 85 86 if ((fp = fd_getfile(fdp, fd)) == NULL) 87 return (EBADF); 88 if ((fp->f_flag & FREAD) == 0) 89 return (EBADF); 90 91 iov.iov_base = SCARG(uap, buf); 92 iov.iov_len = SCARG(uap, nbyte); 93 94 FREF(fp); 95 96 /* dofilereadv() will FRELE the descriptor for us */ 97 return (dofilereadv(p, fd, fp, &iov, 1, 0, &fp->f_offset, retval)); 98 } 99 100 /* 101 * Scatter read system call. 102 */ 103 int 104 sys_readv(struct proc *p, void *v, register_t *retval) 105 { 106 struct sys_readv_args /* { 107 syscallarg(int) fd; 108 syscallarg(const struct iovec *) iovp; 109 syscallarg(int) iovcnt; 110 } */ *uap = v; 111 int fd = SCARG(uap, fd); 112 struct file *fp; 113 struct filedesc *fdp = p->p_fd; 114 115 if ((fp = fd_getfile(fdp, fd)) == NULL) 116 return (EBADF); 117 if ((fp->f_flag & FREAD) == 0) 118 return (EBADF); 119 120 FREF(fp); 121 122 /* dofilereadv() will FRELE the descriptor for us */ 123 return (dofilereadv(p, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt), 1, 124 &fp->f_offset, retval)); 125 } 126 127 int 128 dofilereadv(struct proc *p, int fd, struct file *fp, const struct iovec *iovp, 129 int iovcnt, int userspace, off_t *offset, register_t *retval) 130 { 131 struct iovec aiov[UIO_SMALLIOV]; 132 struct uio auio; 133 struct iovec *iov; 134 struct iovec *needfree = NULL; 135 long i, cnt, error = 0; 136 u_int iovlen; 137 #ifdef KTRACE 138 struct iovec *ktriov = NULL; 139 #endif 140 141 /* note: can't use iovlen until iovcnt is validated */ 142 iovlen = iovcnt * sizeof(struct iovec); 143 144 /* 145 * If the iovec array exists in userspace, it needs to be copied in; 146 * otherwise, it can be used directly. 147 */ 148 if (userspace) { 149 if ((u_int)iovcnt > UIO_SMALLIOV) { 150 if ((u_int)iovcnt > IOV_MAX) { 151 error = EINVAL; 152 goto out; 153 } 154 iov = needfree = malloc(iovlen, M_IOV, M_WAITOK); 155 } else if ((u_int)iovcnt > 0) { 156 iov = aiov; 157 needfree = NULL; 158 } else { 159 error = EINVAL; 160 goto out; 161 } 162 if ((error = copyin(iovp, iov, iovlen))) 163 goto done; 164 } else { 165 iov = (struct iovec *)iovp; /* de-constify */ 166 } 167 168 auio.uio_iov = iov; 169 auio.uio_iovcnt = iovcnt; 170 auio.uio_rw = UIO_READ; 171 auio.uio_segflg = UIO_USERSPACE; 172 auio.uio_procp = p; 173 auio.uio_resid = 0; 174 for (i = 0; i < iovcnt; i++) { 175 auio.uio_resid += iov->iov_len; 176 /* 177 * Reads return ssize_t because -1 is returned on error. 178 * Therefore we must restrict the length to SSIZE_MAX to 179 * avoid garbage return values. Note that the addition is 180 * guaranteed to not wrap because SSIZE_MAX * 2 < SIZE_MAX. 181 */ 182 if (iov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) { 183 error = EINVAL; 184 goto done; 185 } 186 iov++; 187 } 188 #ifdef KTRACE 189 /* 190 * if tracing, save a copy of iovec 191 */ 192 if (KTRPOINT(p, KTR_GENIO)) { 193 ktriov = malloc(iovlen, M_TEMP, M_WAITOK); 194 bcopy(auio.uio_iov, ktriov, iovlen); 195 } 196 #endif 197 cnt = auio.uio_resid; 198 error = (*fp->f_ops->fo_read)(fp, offset, &auio, fp->f_cred); 199 if (error) 200 if (auio.uio_resid != cnt && (error == ERESTART || 201 error == EINTR || error == EWOULDBLOCK)) 202 error = 0; 203 cnt -= auio.uio_resid; 204 205 fp->f_rxfer++; 206 fp->f_rbytes += cnt; 207 #ifdef KTRACE 208 if (ktriov != NULL) { 209 if (error == 0) 210 ktrgenio(p, fd, UIO_READ, ktriov, cnt, 211 error); 212 free(ktriov, M_TEMP); 213 } 214 #endif 215 *retval = cnt; 216 done: 217 if (needfree) 218 free(needfree, M_IOV); 219 out: 220 FRELE(fp, p); 221 return (error); 222 } 223 224 /* 225 * Write system call 226 */ 227 int 228 sys_write(struct proc *p, void *v, register_t *retval) 229 { 230 struct sys_write_args /* { 231 syscallarg(int) fd; 232 syscallarg(const void *) buf; 233 syscallarg(size_t) nbyte; 234 } */ *uap = v; 235 struct iovec iov; 236 int fd = SCARG(uap, fd); 237 struct file *fp; 238 struct filedesc *fdp = p->p_fd; 239 240 if ((fp = fd_getfile(fdp, fd)) == NULL) 241 return (EBADF); 242 if ((fp->f_flag & FWRITE) == 0) 243 return (EBADF); 244 245 iov.iov_base = (void *)SCARG(uap, buf); 246 iov.iov_len = SCARG(uap, nbyte); 247 248 FREF(fp); 249 250 /* dofilewritev() will FRELE the descriptor for us */ 251 return (dofilewritev(p, fd, fp, &iov, 1, 0, &fp->f_offset, retval)); 252 } 253 254 /* 255 * Gather write system call 256 */ 257 int 258 sys_writev(struct proc *p, void *v, register_t *retval) 259 { 260 struct sys_writev_args /* { 261 syscallarg(int) fd; 262 syscallarg(const struct iovec *) iovp; 263 syscallarg(int) iovcnt; 264 } */ *uap = v; 265 int fd = SCARG(uap, fd); 266 struct file *fp; 267 struct filedesc *fdp = p->p_fd; 268 269 if ((fp = fd_getfile(fdp, fd)) == NULL) 270 return (EBADF); 271 if ((fp->f_flag & FWRITE) == 0) 272 return (EBADF); 273 274 FREF(fp); 275 276 /* dofilewritev() will FRELE the descriptor for us */ 277 return (dofilewritev(p, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt), 1, 278 &fp->f_offset, retval)); 279 } 280 281 int 282 dofilewritev(struct proc *p, int fd, struct file *fp, const struct iovec *iovp, 283 int iovcnt, int userspace, off_t *offset, register_t *retval) 284 { 285 struct iovec aiov[UIO_SMALLIOV]; 286 struct uio auio; 287 struct iovec *iov; 288 struct iovec *needfree = NULL; 289 long i, cnt, error = 0; 290 u_int iovlen; 291 #ifdef KTRACE 292 struct iovec *ktriov = NULL; 293 #endif 294 295 /* note: can't use iovlen until iovcnt is validated */ 296 iovlen = iovcnt * sizeof(struct iovec); 297 298 /* 299 * If the iovec array exists in userspace, it needs to be copied in; 300 * otherwise, it can be used directly. 301 */ 302 if (userspace) { 303 if ((u_int)iovcnt > UIO_SMALLIOV) { 304 if ((u_int)iovcnt > IOV_MAX) { 305 error = EINVAL; 306 goto out; 307 } 308 iov = needfree = malloc(iovlen, M_IOV, M_WAITOK); 309 } else if ((u_int)iovcnt > 0) { 310 iov = aiov; 311 needfree = NULL; 312 } else { 313 error = EINVAL; 314 goto out; 315 } 316 if ((error = copyin(iovp, iov, iovlen))) 317 goto done; 318 } else { 319 iov = (struct iovec *)iovp; /* de-constify */ 320 } 321 322 auio.uio_iov = iov; 323 auio.uio_iovcnt = iovcnt; 324 auio.uio_rw = UIO_WRITE; 325 auio.uio_segflg = UIO_USERSPACE; 326 auio.uio_procp = p; 327 auio.uio_resid = 0; 328 for (i = 0; i < iovcnt; i++) { 329 auio.uio_resid += iov->iov_len; 330 /* 331 * Writes return ssize_t because -1 is returned on error. 332 * Therefore we must restrict the length to SSIZE_MAX to 333 * avoid garbage return values. Note that the addition is 334 * guaranteed to not wrap because SSIZE_MAX * 2 < SIZE_MAX. 335 */ 336 if (iov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) { 337 error = EINVAL; 338 goto done; 339 } 340 iov++; 341 } 342 #ifdef KTRACE 343 /* 344 * if tracing, save a copy of iovec 345 */ 346 if (KTRPOINT(p, KTR_GENIO)) { 347 ktriov = malloc(iovlen, M_TEMP, M_WAITOK); 348 bcopy(auio.uio_iov, ktriov, iovlen); 349 } 350 #endif 351 cnt = auio.uio_resid; 352 error = (*fp->f_ops->fo_write)(fp, offset, &auio, fp->f_cred); 353 if (error) { 354 if (auio.uio_resid != cnt && (error == ERESTART || 355 error == EINTR || error == EWOULDBLOCK)) 356 error = 0; 357 if (error == EPIPE) 358 ptsignal(p, SIGPIPE, STHREAD); 359 } 360 cnt -= auio.uio_resid; 361 362 fp->f_wxfer++; 363 fp->f_wbytes += cnt; 364 #ifdef KTRACE 365 if (ktriov != NULL) { 366 if (error == 0) 367 ktrgenio(p, fd, UIO_WRITE, ktriov, cnt, error); 368 free(ktriov, M_TEMP); 369 } 370 #endif 371 *retval = cnt; 372 done: 373 if (needfree) 374 free(needfree, M_IOV); 375 out: 376 FRELE(fp, p); 377 return (error); 378 } 379 380 /* 381 * Ioctl system call 382 */ 383 /* ARGSUSED */ 384 int 385 sys_ioctl(struct proc *p, void *v, register_t *retval) 386 { 387 struct sys_ioctl_args /* { 388 syscallarg(int) fd; 389 syscallarg(u_long) com; 390 syscallarg(void *) data; 391 } */ *uap = v; 392 struct file *fp; 393 struct filedesc *fdp; 394 u_long com; 395 int error; 396 u_int size; 397 caddr_t data, memp; 398 int tmp; 399 #define STK_PARAMS 128 400 long long stkbuf[STK_PARAMS / sizeof(long long)]; 401 402 fdp = p->p_fd; 403 if ((fp = fd_getfile(fdp, SCARG(uap, fd))) == NULL) 404 return (EBADF); 405 406 if ((fp->f_flag & (FREAD | FWRITE)) == 0) 407 return (EBADF); 408 409 switch (com = SCARG(uap, com)) { 410 case FIONCLEX: 411 case FIOCLEX: 412 fdplock(fdp); 413 if (com == FIONCLEX) 414 fdp->fd_ofileflags[SCARG(uap, fd)] &= ~UF_EXCLOSE; 415 else 416 fdp->fd_ofileflags[SCARG(uap, fd)] |= UF_EXCLOSE; 417 fdpunlock(fdp); 418 return (0); 419 } 420 421 /* 422 * Interpret high order word to find amount of data to be 423 * copied to/from the user's address space. 424 */ 425 size = IOCPARM_LEN(com); 426 if (size > IOCPARM_MAX) 427 return (ENOTTY); 428 FREF(fp); 429 memp = NULL; 430 if (size > sizeof (stkbuf)) { 431 memp = (caddr_t)malloc((u_long)size, M_IOCTLOPS, M_WAITOK); 432 data = memp; 433 } else 434 data = (caddr_t)stkbuf; 435 if (com&IOC_IN) { 436 if (size) { 437 error = copyin(SCARG(uap, data), data, (u_int)size); 438 if (error) { 439 goto out; 440 } 441 } else 442 *(caddr_t *)data = SCARG(uap, data); 443 } else if ((com&IOC_OUT) && size) 444 /* 445 * Zero the buffer so the user always 446 * gets back something deterministic. 447 */ 448 bzero(data, size); 449 else if (com&IOC_VOID) 450 *(caddr_t *)data = SCARG(uap, data); 451 452 switch (com) { 453 454 case FIONBIO: 455 if ((tmp = *(int *)data) != 0) 456 fp->f_flag |= FNONBLOCK; 457 else 458 fp->f_flag &= ~FNONBLOCK; 459 error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&tmp, p); 460 break; 461 462 case FIOASYNC: 463 if ((tmp = *(int *)data) != 0) 464 fp->f_flag |= FASYNC; 465 else 466 fp->f_flag &= ~FASYNC; 467 error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, (caddr_t)&tmp, p); 468 break; 469 470 case FIOSETOWN: 471 tmp = *(int *)data; 472 if (fp->f_type == DTYPE_SOCKET) { 473 struct socket *so = (struct socket *)fp->f_data; 474 475 so->so_pgid = tmp; 476 so->so_siguid = p->p_cred->p_ruid; 477 so->so_sigeuid = p->p_ucred->cr_uid; 478 error = 0; 479 break; 480 } 481 if (tmp <= 0) { 482 tmp = -tmp; 483 } else { 484 struct process *pr = prfind(tmp); 485 if (pr == NULL) { 486 error = ESRCH; 487 break; 488 } 489 tmp = pr->ps_pgrp->pg_id; 490 } 491 error = (*fp->f_ops->fo_ioctl) 492 (fp, TIOCSPGRP, (caddr_t)&tmp, p); 493 break; 494 495 case FIOGETOWN: 496 if (fp->f_type == DTYPE_SOCKET) { 497 error = 0; 498 *(int *)data = ((struct socket *)fp->f_data)->so_pgid; 499 break; 500 } 501 error = (*fp->f_ops->fo_ioctl)(fp, TIOCGPGRP, data, p); 502 *(int *)data = -*(int *)data; 503 break; 504 505 default: 506 error = (*fp->f_ops->fo_ioctl)(fp, com, data, p); 507 break; 508 } 509 /* 510 * Copy any data to user, size was 511 * already set and checked above. 512 */ 513 if (error == 0 && (com&IOC_OUT) && size) 514 error = copyout(data, SCARG(uap, data), (u_int)size); 515 out: 516 FRELE(fp, p); 517 if (memp) 518 free(memp, M_IOCTLOPS); 519 return (error); 520 } 521 522 int selwait, nselcoll; 523 524 /* 525 * Select system call. 526 */ 527 int 528 sys_select(struct proc *p, void *v, register_t *retval) 529 { 530 struct sys_select_args /* { 531 syscallarg(int) nd; 532 syscallarg(fd_set *) in; 533 syscallarg(fd_set *) ou; 534 syscallarg(fd_set *) ex; 535 syscallarg(struct timeval *) tv; 536 } */ *uap = v; 537 fd_mask bits[6]; 538 fd_set *pibits[3], *pobits[3]; 539 struct timeval atv, rtv, ttv; 540 int s, ncoll, error = 0, timo; 541 u_int nd, ni; 542 543 nd = SCARG(uap, nd); 544 if (nd > p->p_fd->fd_nfiles) { 545 /* forgiving; slightly wrong */ 546 nd = p->p_fd->fd_nfiles; 547 } 548 ni = howmany(nd, NFDBITS) * sizeof(fd_mask); 549 if (ni > sizeof(bits[0])) { 550 caddr_t mbits; 551 552 mbits = malloc(ni * 6, M_TEMP, M_WAITOK|M_ZERO); 553 pibits[0] = (fd_set *)&mbits[ni * 0]; 554 pibits[1] = (fd_set *)&mbits[ni * 1]; 555 pibits[2] = (fd_set *)&mbits[ni * 2]; 556 pobits[0] = (fd_set *)&mbits[ni * 3]; 557 pobits[1] = (fd_set *)&mbits[ni * 4]; 558 pobits[2] = (fd_set *)&mbits[ni * 5]; 559 } else { 560 bzero(bits, sizeof(bits)); 561 pibits[0] = (fd_set *)&bits[0]; 562 pibits[1] = (fd_set *)&bits[1]; 563 pibits[2] = (fd_set *)&bits[2]; 564 pobits[0] = (fd_set *)&bits[3]; 565 pobits[1] = (fd_set *)&bits[4]; 566 pobits[2] = (fd_set *)&bits[5]; 567 } 568 569 #define getbits(name, x) \ 570 if (SCARG(uap, name) && (error = copyin(SCARG(uap, name), \ 571 pibits[x], ni))) \ 572 goto done; 573 getbits(in, 0); 574 getbits(ou, 1); 575 getbits(ex, 2); 576 #undef getbits 577 #ifdef KTRACE 578 if (ni > 0 && KTRPOINT(p, KTR_STRUCT)) { 579 if (SCARG(uap, in)) ktrfdset(p, pibits[0], ni); 580 if (SCARG(uap, ou)) ktrfdset(p, pibits[1], ni); 581 if (SCARG(uap, ex)) ktrfdset(p, pibits[2], ni); 582 } 583 #endif 584 585 if (SCARG(uap, tv)) { 586 error = copyin(SCARG(uap, tv), &atv, sizeof (atv)); 587 if (error) 588 goto done; 589 #ifdef KTRACE 590 if (KTRPOINT(p, KTR_STRUCT)) 591 ktrreltimeval(p, &atv); 592 #endif 593 if (itimerfix(&atv)) { 594 error = EINVAL; 595 goto done; 596 } 597 getmicrouptime(&rtv); 598 timeradd(&atv, &rtv, &atv); 599 } else { 600 atv.tv_sec = 0; 601 atv.tv_usec = 0; 602 } 603 timo = 0; 604 605 retry: 606 ncoll = nselcoll; 607 atomic_setbits_int(&p->p_flag, P_SELECT); 608 error = selscan(p, pibits[0], pobits[0], nd, ni, retval); 609 if (error || *retval) 610 goto done; 611 if (SCARG(uap, tv)) { 612 getmicrouptime(&rtv); 613 if (timercmp(&rtv, &atv, >=)) 614 goto done; 615 ttv = atv; 616 timersub(&ttv, &rtv, &ttv); 617 timo = ttv.tv_sec > 24 * 60 * 60 ? 618 24 * 60 * 60 * hz : tvtohz(&ttv); 619 } 620 s = splhigh(); 621 if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) { 622 splx(s); 623 goto retry; 624 } 625 atomic_clearbits_int(&p->p_flag, P_SELECT); 626 error = tsleep(&selwait, PSOCK | PCATCH, "select", timo); 627 splx(s); 628 if (error == 0) 629 goto retry; 630 done: 631 atomic_clearbits_int(&p->p_flag, P_SELECT); 632 /* select is not restarted after signals... */ 633 if (error == ERESTART) 634 error = EINTR; 635 if (error == EWOULDBLOCK) 636 error = 0; 637 #define putbits(name, x) \ 638 if (SCARG(uap, name) && (error2 = copyout(pobits[x], \ 639 SCARG(uap, name), ni))) \ 640 error = error2; 641 if (error == 0) { 642 int error2; 643 644 putbits(in, 0); 645 putbits(ou, 1); 646 putbits(ex, 2); 647 #undef putbits 648 #ifdef KTRACE 649 if (ni > 0 && KTRPOINT(p, KTR_STRUCT)) { 650 if (SCARG(uap, in)) ktrfdset(p, pobits[0], ni); 651 if (SCARG(uap, ou)) ktrfdset(p, pobits[1], ni); 652 if (SCARG(uap, ex)) ktrfdset(p, pobits[2], ni); 653 } 654 #endif 655 } 656 657 if (pibits[0] != (fd_set *)&bits[0]) 658 free(pibits[0], M_TEMP); 659 return (error); 660 } 661 662 int 663 selscan(struct proc *p, fd_set *ibits, fd_set *obits, int nfd, int ni, 664 register_t *retval) 665 { 666 caddr_t cibits = (caddr_t)ibits, cobits = (caddr_t)obits; 667 struct filedesc *fdp = p->p_fd; 668 int msk, i, j, fd; 669 fd_mask bits; 670 struct file *fp; 671 int n = 0; 672 static const int flag[3] = { POLLIN, POLLOUT, POLLPRI }; 673 674 for (msk = 0; msk < 3; msk++) { 675 fd_set *pibits = (fd_set *)&cibits[msk*ni]; 676 fd_set *pobits = (fd_set *)&cobits[msk*ni]; 677 678 for (i = 0; i < nfd; i += NFDBITS) { 679 bits = pibits->fds_bits[i/NFDBITS]; 680 while ((j = ffs(bits)) && (fd = i + --j) < nfd) { 681 bits &= ~(1 << j); 682 if ((fp = fd_getfile(fdp, fd)) == NULL) 683 return (EBADF); 684 FREF(fp); 685 if ((*fp->f_ops->fo_poll)(fp, flag[msk], p)) { 686 FD_SET(fd, pobits); 687 n++; 688 } 689 FRELE(fp, p); 690 } 691 } 692 } 693 *retval = n; 694 return (0); 695 } 696 697 /*ARGSUSED*/ 698 int 699 seltrue(dev_t dev, int events, struct proc *p) 700 { 701 702 return (events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM)); 703 } 704 705 int 706 selfalse(dev_t dev, int events, struct proc *p) 707 { 708 709 return (0); 710 } 711 712 /* 713 * Record a select request. 714 */ 715 void 716 selrecord(struct proc *selector, struct selinfo *sip) 717 { 718 struct proc *p; 719 pid_t mypid; 720 721 mypid = selector->p_pid; 722 if (sip->si_selpid == mypid) 723 return; 724 if (sip->si_selpid && (p = pfind(sip->si_selpid)) && 725 p->p_wchan == (caddr_t)&selwait) 726 sip->si_flags |= SI_COLL; 727 else 728 sip->si_selpid = mypid; 729 } 730 731 /* 732 * Do a wakeup when a selectable event occurs. 733 */ 734 void 735 selwakeup(struct selinfo *sip) 736 { 737 struct proc *p; 738 int s; 739 740 KNOTE(&sip->si_note, 0); 741 if (sip->si_selpid == 0) 742 return; 743 if (sip->si_flags & SI_COLL) { 744 nselcoll++; 745 sip->si_flags &= ~SI_COLL; 746 wakeup(&selwait); 747 } 748 p = pfind(sip->si_selpid); 749 sip->si_selpid = 0; 750 if (p != NULL) { 751 SCHED_LOCK(s); 752 if (p->p_wchan == (caddr_t)&selwait) { 753 if (p->p_stat == SSLEEP) 754 setrunnable(p); 755 else 756 unsleep(p); 757 } else if (p->p_flag & P_SELECT) 758 atomic_clearbits_int(&p->p_flag, P_SELECT); 759 SCHED_UNLOCK(s); 760 } 761 } 762 763 void 764 pollscan(struct proc *p, struct pollfd *pl, u_int nfd, register_t *retval) 765 { 766 struct filedesc *fdp = p->p_fd; 767 struct file *fp; 768 u_int i; 769 int n = 0; 770 771 for (i = 0; i < nfd; i++, pl++) { 772 /* Check the file descriptor. */ 773 if (pl->fd < 0) { 774 pl->revents = 0; 775 continue; 776 } 777 if ((fp = fd_getfile(fdp, pl->fd)) == NULL) { 778 pl->revents = POLLNVAL; 779 n++; 780 continue; 781 } 782 FREF(fp); 783 pl->revents = (*fp->f_ops->fo_poll)(fp, pl->events, p); 784 FRELE(fp, p); 785 if (pl->revents != 0) 786 n++; 787 } 788 *retval = n; 789 } 790 791 /* 792 * Only copyout the revents field. 793 */ 794 int 795 pollout(struct pollfd *pl, struct pollfd *upl, u_int nfds) 796 { 797 int error = 0; 798 u_int i = 0; 799 800 while (!error && i++ < nfds) { 801 error = copyout(&pl->revents, &upl->revents, 802 sizeof(upl->revents)); 803 pl++; 804 upl++; 805 } 806 807 return (error); 808 } 809 810 /* 811 * We are using the same mechanism as select only we encode/decode args 812 * differently. 813 */ 814 int 815 sys_poll(struct proc *p, void *v, register_t *retval) 816 { 817 struct sys_poll_args /* { 818 syscallarg(struct pollfd *) fds; 819 syscallarg(u_int) nfds; 820 syscallarg(int) timeout; 821 } */ *uap = v; 822 size_t sz; 823 struct pollfd pfds[4], *pl = pfds; 824 int msec = SCARG(uap, timeout); 825 struct timeval atv, rtv, ttv; 826 int timo, ncoll, i, s, error; 827 extern int nselcoll, selwait; 828 u_int nfds = SCARG(uap, nfds); 829 830 /* Standards say no more than MAX_OPEN; this is possibly better. */ 831 if (nfds > min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfiles)) 832 return (EINVAL); 833 834 sz = sizeof(struct pollfd) * nfds; 835 836 /* optimize for the default case, of a small nfds value */ 837 if (sz > sizeof(pfds)) 838 pl = (struct pollfd *) malloc(sz, M_TEMP, M_WAITOK); 839 840 if ((error = copyin(SCARG(uap, fds), pl, sz)) != 0) 841 goto bad; 842 843 for (i = 0; i < nfds; i++) 844 pl[i].revents = 0; 845 846 if (msec != INFTIM) { 847 atv.tv_sec = msec / 1000; 848 atv.tv_usec = (msec - (atv.tv_sec * 1000)) * 1000; 849 850 if (itimerfix(&atv)) { 851 error = EINVAL; 852 goto done; 853 } 854 getmicrouptime(&rtv); 855 timeradd(&atv, &rtv, &atv); 856 } else { 857 atv.tv_sec = 0; 858 atv.tv_usec = 0; 859 } 860 timo = 0; 861 862 retry: 863 ncoll = nselcoll; 864 atomic_setbits_int(&p->p_flag, P_SELECT); 865 pollscan(p, pl, nfds, retval); 866 if (*retval) 867 goto done; 868 if (msec != INFTIM) { 869 getmicrouptime(&rtv); 870 if (timercmp(&rtv, &atv, >=)) 871 goto done; 872 ttv = atv; 873 timersub(&ttv, &rtv, &ttv); 874 timo = ttv.tv_sec > 24 * 60 * 60 ? 875 24 * 60 * 60 * hz : tvtohz(&ttv); 876 } 877 s = splhigh(); 878 if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) { 879 splx(s); 880 goto retry; 881 } 882 atomic_clearbits_int(&p->p_flag, P_SELECT); 883 error = tsleep(&selwait, PSOCK | PCATCH, "poll", timo); 884 splx(s); 885 if (error == 0) 886 goto retry; 887 888 done: 889 atomic_clearbits_int(&p->p_flag, P_SELECT); 890 /* 891 * NOTE: poll(2) is not restarted after a signal and EWOULDBLOCK is 892 * ignored (since the whole point is to see what would block). 893 */ 894 switch (error) { 895 case ERESTART: 896 error = pollout(pl, SCARG(uap, fds), nfds); 897 if (error == 0) 898 error = EINTR; 899 break; 900 case EWOULDBLOCK: 901 case 0: 902 error = pollout(pl, SCARG(uap, fds), nfds); 903 break; 904 } 905 bad: 906 if (pl != pfds) 907 free(pl, M_TEMP); 908 return (error); 909 } 910