1 /* $OpenBSD: sys_generic.c,v 1.135 2021/01/08 09:29:04 visa Exp $ */ 2 /* $NetBSD: sys_generic.c,v 1.24 1996/03/29 00:25:32 cgd Exp $ */ 3 4 /* 5 * Copyright (c) 1996 Theo de Raadt 6 * Copyright (c) 1982, 1986, 1989, 1993 7 * The Regents of the University of California. All rights reserved. 8 * (c) UNIX System Laboratories, Inc. 9 * All or some portions of this file are derived from material licensed 10 * to the University of California by American Telephone and Telegraph 11 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 12 * the permission of UNIX System Laboratories, Inc. 13 * 14 * Redistribution and use in source and binary forms, with or without 15 * modification, are permitted provided that the following conditions 16 * are met: 17 * 1. Redistributions of source code must retain the above copyright 18 * notice, this list of conditions and the following disclaimer. 19 * 2. Redistributions in binary form must reproduce the above copyright 20 * notice, this list of conditions and the following disclaimer in the 21 * documentation and/or other materials provided with the distribution. 22 * 3. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)sys_generic.c 8.5 (Berkeley) 1/21/94 39 */ 40 41 #include <sys/param.h> 42 #include <sys/systm.h> 43 #include <sys/filedesc.h> 44 #include <sys/ioctl.h> 45 #include <sys/fcntl.h> 46 #include <sys/vnode.h> 47 #include <sys/file.h> 48 #include <sys/proc.h> 49 #include <sys/resourcevar.h> 50 #include <sys/socketvar.h> 51 #include <sys/signalvar.h> 52 #include <sys/uio.h> 53 #include <sys/kernel.h> 54 #include <sys/stat.h> 55 #include <sys/time.h> 56 #include <sys/malloc.h> 57 #include <sys/poll.h> 58 #ifdef KTRACE 59 #include <sys/ktrace.h> 60 #endif 61 #include <sys/sched.h> 62 #include <sys/pledge.h> 63 64 #include <sys/mount.h> 65 #include <sys/syscallargs.h> 66 67 #include <uvm/uvm_extern.h> 68 69 int selscan(struct proc *, fd_set *, fd_set *, int, int, register_t *); 70 void pollscan(struct proc *, struct pollfd *, u_int, register_t *); 71 int pollout(struct pollfd *, struct pollfd *, u_int); 72 int dopselect(struct proc *, int, fd_set *, fd_set *, fd_set *, 73 struct timespec *, const sigset_t *, register_t *); 74 int doppoll(struct proc *, struct pollfd *, u_int, struct timespec *, 75 const sigset_t *, register_t *); 76 void doselwakeup(struct selinfo *); 77 78 int 79 iovec_copyin(const struct iovec *uiov, struct iovec **iovp, struct iovec *aiov, 80 unsigned int iovcnt, size_t *residp) 81 { 82 #ifdef KTRACE 83 struct proc *p = curproc; 84 #endif 85 struct iovec *iov; 86 int error, i; 87 size_t resid = 0; 88 89 if (iovcnt > UIO_SMALLIOV) { 90 if (iovcnt > IOV_MAX) 91 return (EINVAL); 92 iov = mallocarray(iovcnt, sizeof(*iov), M_IOV, M_WAITOK); 93 } else if (iovcnt > 0) { 94 iov = aiov; 95 } else { 96 return (EINVAL); 97 } 98 *iovp = iov; 99 100 if ((error = copyin(uiov, iov, iovcnt * sizeof(*iov)))) 101 return (error); 102 103 #ifdef KTRACE 104 if (KTRPOINT(p, KTR_STRUCT)) 105 ktriovec(p, iov, iovcnt); 106 #endif 107 108 for (i = 0; i < iovcnt; i++) { 109 resid += iov->iov_len; 110 /* 111 * Writes return ssize_t because -1 is returned on error. 112 * Therefore we must restrict the length to SSIZE_MAX to 113 * avoid garbage return values. Note that the addition is 114 * guaranteed to not wrap because SSIZE_MAX * 2 < SIZE_MAX. 115 */ 116 if (iov->iov_len > SSIZE_MAX || resid > SSIZE_MAX) 117 return (EINVAL); 118 iov++; 119 } 120 121 if (residp != NULL) 122 *residp = resid; 123 124 return (0); 125 } 126 127 void 128 iovec_free(struct iovec *iov, unsigned int iovcnt) 129 { 130 if (iovcnt > UIO_SMALLIOV) 131 free(iov, M_IOV, iovcnt * sizeof(*iov)); 132 } 133 134 /* 135 * Read system call. 136 */ 137 int 138 sys_read(struct proc *p, void *v, register_t *retval) 139 { 140 struct sys_read_args /* { 141 syscallarg(int) fd; 142 syscallarg(void *) buf; 143 syscallarg(size_t) nbyte; 144 } */ *uap = v; 145 struct iovec iov; 146 struct uio auio; 147 148 iov.iov_base = SCARG(uap, buf); 149 iov.iov_len = SCARG(uap, nbyte); 150 if (iov.iov_len > SSIZE_MAX) 151 return (EINVAL); 152 153 auio.uio_iov = &iov; 154 auio.uio_iovcnt = 1; 155 auio.uio_resid = iov.iov_len; 156 157 return (dofilereadv(p, SCARG(uap, fd), &auio, 0, retval)); 158 } 159 160 /* 161 * Scatter read system call. 162 */ 163 int 164 sys_readv(struct proc *p, void *v, register_t *retval) 165 { 166 struct sys_readv_args /* { 167 syscallarg(int) fd; 168 syscallarg(const struct iovec *) iovp; 169 syscallarg(int) iovcnt; 170 } */ *uap = v; 171 struct iovec aiov[UIO_SMALLIOV], *iov = NULL; 172 int error, iovcnt = SCARG(uap, iovcnt); 173 struct uio auio; 174 size_t resid; 175 176 error = iovec_copyin(SCARG(uap, iovp), &iov, aiov, iovcnt, &resid); 177 if (error) 178 goto done; 179 180 auio.uio_iov = iov; 181 auio.uio_iovcnt = iovcnt; 182 auio.uio_resid = resid; 183 184 error = dofilereadv(p, SCARG(uap, fd), &auio, 0, retval); 185 done: 186 iovec_free(iov, iovcnt); 187 return (error); 188 } 189 190 int 191 dofilereadv(struct proc *p, int fd, struct uio *uio, int flags, 192 register_t *retval) 193 { 194 struct filedesc *fdp = p->p_fd; 195 struct file *fp; 196 long cnt, error = 0; 197 u_int iovlen; 198 #ifdef KTRACE 199 struct iovec *ktriov = NULL; 200 #endif 201 202 KASSERT(uio->uio_iov != NULL && uio->uio_iovcnt > 0); 203 iovlen = uio->uio_iovcnt * sizeof(struct iovec); 204 205 if ((fp = fd_getfile_mode(fdp, fd, FREAD)) == NULL) 206 return (EBADF); 207 208 /* Checks for positioned read. */ 209 if (flags & FO_POSITION) { 210 struct vnode *vp = fp->f_data; 211 212 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO || 213 (vp->v_flag & VISTTY)) { 214 error = ESPIPE; 215 goto done; 216 } 217 218 if (uio->uio_offset < 0 && vp->v_type != VCHR) { 219 error = EINVAL; 220 goto done; 221 } 222 } 223 224 uio->uio_rw = UIO_READ; 225 uio->uio_segflg = UIO_USERSPACE; 226 uio->uio_procp = p; 227 #ifdef KTRACE 228 /* 229 * if tracing, save a copy of iovec 230 */ 231 if (KTRPOINT(p, KTR_GENIO)) { 232 ktriov = malloc(iovlen, M_TEMP, M_WAITOK); 233 memcpy(ktriov, uio->uio_iov, iovlen); 234 } 235 #endif 236 cnt = uio->uio_resid; 237 error = (*fp->f_ops->fo_read)(fp, uio, flags); 238 if (error) { 239 if (uio->uio_resid != cnt && (error == ERESTART || 240 error == EINTR || error == EWOULDBLOCK)) 241 error = 0; 242 } 243 cnt -= uio->uio_resid; 244 245 mtx_enter(&fp->f_mtx); 246 fp->f_rxfer++; 247 fp->f_rbytes += cnt; 248 mtx_leave(&fp->f_mtx); 249 #ifdef KTRACE 250 if (ktriov != NULL) { 251 if (error == 0) 252 ktrgenio(p, fd, UIO_READ, ktriov, cnt); 253 free(ktriov, M_TEMP, iovlen); 254 } 255 #endif 256 *retval = cnt; 257 done: 258 FRELE(fp, p); 259 return (error); 260 } 261 262 /* 263 * Write system call 264 */ 265 int 266 sys_write(struct proc *p, void *v, register_t *retval) 267 { 268 struct sys_write_args /* { 269 syscallarg(int) fd; 270 syscallarg(const void *) buf; 271 syscallarg(size_t) nbyte; 272 } */ *uap = v; 273 struct iovec iov; 274 struct uio auio; 275 276 iov.iov_base = (void *)SCARG(uap, buf); 277 iov.iov_len = SCARG(uap, nbyte); 278 if (iov.iov_len > SSIZE_MAX) 279 return (EINVAL); 280 281 auio.uio_iov = &iov; 282 auio.uio_iovcnt = 1; 283 auio.uio_resid = iov.iov_len; 284 285 return (dofilewritev(p, SCARG(uap, fd), &auio, 0, retval)); 286 } 287 288 /* 289 * Gather write system call 290 */ 291 int 292 sys_writev(struct proc *p, void *v, register_t *retval) 293 { 294 struct sys_writev_args /* { 295 syscallarg(int) fd; 296 syscallarg(const struct iovec *) iovp; 297 syscallarg(int) iovcnt; 298 } */ *uap = v; 299 struct iovec aiov[UIO_SMALLIOV], *iov = NULL; 300 int error, iovcnt = SCARG(uap, iovcnt); 301 struct uio auio; 302 size_t resid; 303 304 error = iovec_copyin(SCARG(uap, iovp), &iov, aiov, iovcnt, &resid); 305 if (error) 306 goto done; 307 308 auio.uio_iov = iov; 309 auio.uio_iovcnt = iovcnt; 310 auio.uio_resid = resid; 311 312 error = dofilewritev(p, SCARG(uap, fd), &auio, 0, retval); 313 done: 314 iovec_free(iov, iovcnt); 315 return (error); 316 } 317 318 int 319 dofilewritev(struct proc *p, int fd, struct uio *uio, int flags, 320 register_t *retval) 321 { 322 struct filedesc *fdp = p->p_fd; 323 struct file *fp; 324 long cnt, error = 0; 325 u_int iovlen; 326 #ifdef KTRACE 327 struct iovec *ktriov = NULL; 328 #endif 329 330 KASSERT(uio->uio_iov != NULL && uio->uio_iovcnt > 0); 331 iovlen = uio->uio_iovcnt * sizeof(struct iovec); 332 333 if ((fp = fd_getfile_mode(fdp, fd, FWRITE)) == NULL) 334 return (EBADF); 335 336 /* Checks for positioned write. */ 337 if (flags & FO_POSITION) { 338 struct vnode *vp = fp->f_data; 339 340 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO || 341 (vp->v_flag & VISTTY)) { 342 error = ESPIPE; 343 goto done; 344 } 345 346 if (uio->uio_offset < 0 && vp->v_type != VCHR) { 347 error = EINVAL; 348 goto done; 349 } 350 } 351 352 uio->uio_rw = UIO_WRITE; 353 uio->uio_segflg = UIO_USERSPACE; 354 uio->uio_procp = p; 355 #ifdef KTRACE 356 /* 357 * if tracing, save a copy of iovec 358 */ 359 if (KTRPOINT(p, KTR_GENIO)) { 360 ktriov = malloc(iovlen, M_TEMP, M_WAITOK); 361 memcpy(ktriov, uio->uio_iov, iovlen); 362 } 363 #endif 364 cnt = uio->uio_resid; 365 error = (*fp->f_ops->fo_write)(fp, uio, flags); 366 if (error) { 367 if (uio->uio_resid != cnt && (error == ERESTART || 368 error == EINTR || error == EWOULDBLOCK)) 369 error = 0; 370 if (error == EPIPE) { 371 KERNEL_LOCK(); 372 ptsignal(p, SIGPIPE, STHREAD); 373 KERNEL_UNLOCK(); 374 } 375 } 376 cnt -= uio->uio_resid; 377 378 mtx_enter(&fp->f_mtx); 379 fp->f_wxfer++; 380 fp->f_wbytes += cnt; 381 mtx_leave(&fp->f_mtx); 382 #ifdef KTRACE 383 if (ktriov != NULL) { 384 if (error == 0) 385 ktrgenio(p, fd, UIO_WRITE, ktriov, cnt); 386 free(ktriov, M_TEMP, iovlen); 387 } 388 #endif 389 *retval = cnt; 390 done: 391 FRELE(fp, p); 392 return (error); 393 } 394 395 /* 396 * Ioctl system call 397 */ 398 int 399 sys_ioctl(struct proc *p, void *v, register_t *retval) 400 { 401 struct sys_ioctl_args /* { 402 syscallarg(int) fd; 403 syscallarg(u_long) com; 404 syscallarg(void *) data; 405 } */ *uap = v; 406 struct file *fp; 407 struct filedesc *fdp = p->p_fd; 408 u_long com = SCARG(uap, com); 409 int error = 0; 410 u_int size = 0; 411 caddr_t data, memp = NULL; 412 int tmp; 413 #define STK_PARAMS 128 414 long long stkbuf[STK_PARAMS / sizeof(long long)]; 415 416 if ((fp = fd_getfile_mode(fdp, SCARG(uap, fd), FREAD|FWRITE)) == NULL) 417 return (EBADF); 418 419 if (fp->f_type == DTYPE_SOCKET) { 420 struct socket *so = fp->f_data; 421 422 if (so->so_state & SS_DNS) { 423 error = EINVAL; 424 goto out; 425 } 426 } 427 428 error = pledge_ioctl(p, com, fp); 429 if (error) 430 goto out; 431 432 switch (com) { 433 case FIONCLEX: 434 case FIOCLEX: 435 fdplock(fdp); 436 if (com == FIONCLEX) 437 fdp->fd_ofileflags[SCARG(uap, fd)] &= ~UF_EXCLOSE; 438 else 439 fdp->fd_ofileflags[SCARG(uap, fd)] |= UF_EXCLOSE; 440 fdpunlock(fdp); 441 goto out; 442 } 443 444 /* 445 * Interpret high order word to find amount of data to be 446 * copied to/from the user's address space. 447 */ 448 size = IOCPARM_LEN(com); 449 if (size > IOCPARM_MAX) { 450 error = ENOTTY; 451 goto out; 452 } 453 if (size > sizeof (stkbuf)) { 454 memp = malloc(size, M_IOCTLOPS, M_WAITOK); 455 data = memp; 456 } else 457 data = (caddr_t)stkbuf; 458 if (com&IOC_IN) { 459 if (size) { 460 error = copyin(SCARG(uap, data), data, size); 461 if (error) { 462 goto out; 463 } 464 } else 465 *(caddr_t *)data = SCARG(uap, data); 466 } else if ((com&IOC_OUT) && size) 467 /* 468 * Zero the buffer so the user always 469 * gets back something deterministic. 470 */ 471 memset(data, 0, size); 472 else if (com&IOC_VOID) 473 *(caddr_t *)data = SCARG(uap, data); 474 475 switch (com) { 476 477 case FIONBIO: 478 if ((tmp = *(int *)data) != 0) 479 atomic_setbits_int(&fp->f_flag, FNONBLOCK); 480 else 481 atomic_clearbits_int(&fp->f_flag, FNONBLOCK); 482 error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&tmp, p); 483 break; 484 485 case FIOASYNC: 486 if ((tmp = *(int *)data) != 0) 487 atomic_setbits_int(&fp->f_flag, FASYNC); 488 else 489 atomic_clearbits_int(&fp->f_flag, FASYNC); 490 error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, (caddr_t)&tmp, p); 491 break; 492 493 default: 494 error = (*fp->f_ops->fo_ioctl)(fp, com, data, p); 495 break; 496 } 497 /* 498 * Copy any data to user, size was 499 * already set and checked above. 500 */ 501 if (error == 0 && (com&IOC_OUT) && size) 502 error = copyout(data, SCARG(uap, data), size); 503 out: 504 FRELE(fp, p); 505 free(memp, M_IOCTLOPS, size); 506 return (error); 507 } 508 509 int selwait, nselcoll; 510 511 /* 512 * Select system call. 513 */ 514 int 515 sys_select(struct proc *p, void *v, register_t *retval) 516 { 517 struct sys_select_args /* { 518 syscallarg(int) nd; 519 syscallarg(fd_set *) in; 520 syscallarg(fd_set *) ou; 521 syscallarg(fd_set *) ex; 522 syscallarg(struct timeval *) tv; 523 } */ *uap = v; 524 525 struct timespec ts, *tsp = NULL; 526 int error; 527 528 if (SCARG(uap, tv) != NULL) { 529 struct timeval tv; 530 if ((error = copyin(SCARG(uap, tv), &tv, sizeof tv)) != 0) 531 return (error); 532 #ifdef KTRACE 533 if (KTRPOINT(p, KTR_STRUCT)) 534 ktrreltimeval(p, &tv); 535 #endif 536 if (tv.tv_sec < 0 || !timerisvalid(&tv)) 537 return (EINVAL); 538 TIMEVAL_TO_TIMESPEC(&tv, &ts); 539 tsp = &ts; 540 } 541 542 return (dopselect(p, SCARG(uap, nd), SCARG(uap, in), SCARG(uap, ou), 543 SCARG(uap, ex), tsp, NULL, retval)); 544 } 545 546 int 547 sys_pselect(struct proc *p, void *v, register_t *retval) 548 { 549 struct sys_pselect_args /* { 550 syscallarg(int) nd; 551 syscallarg(fd_set *) in; 552 syscallarg(fd_set *) ou; 553 syscallarg(fd_set *) ex; 554 syscallarg(const struct timespec *) ts; 555 syscallarg(const sigset_t *) mask; 556 } */ *uap = v; 557 558 struct timespec ts, *tsp = NULL; 559 sigset_t ss, *ssp = NULL; 560 int error; 561 562 if (SCARG(uap, ts) != NULL) { 563 if ((error = copyin(SCARG(uap, ts), &ts, sizeof ts)) != 0) 564 return (error); 565 #ifdef KTRACE 566 if (KTRPOINT(p, KTR_STRUCT)) 567 ktrreltimespec(p, &ts); 568 #endif 569 if (ts.tv_sec < 0 || !timespecisvalid(&ts)) 570 return (EINVAL); 571 tsp = &ts; 572 } 573 if (SCARG(uap, mask) != NULL) { 574 if ((error = copyin(SCARG(uap, mask), &ss, sizeof ss)) != 0) 575 return (error); 576 ssp = &ss; 577 } 578 579 return (dopselect(p, SCARG(uap, nd), SCARG(uap, in), SCARG(uap, ou), 580 SCARG(uap, ex), tsp, ssp, retval)); 581 } 582 583 int 584 dopselect(struct proc *p, int nd, fd_set *in, fd_set *ou, fd_set *ex, 585 struct timespec *timeout, const sigset_t *sigmask, register_t *retval) 586 { 587 fd_mask bits[6]; 588 fd_set *pibits[3], *pobits[3]; 589 struct timespec elapsed, start, stop; 590 uint64_t nsecs; 591 int s, ncoll, error = 0; 592 u_int ni; 593 594 if (nd < 0) 595 return (EINVAL); 596 if (nd > p->p_fd->fd_nfiles) { 597 /* forgiving; slightly wrong */ 598 nd = p->p_fd->fd_nfiles; 599 } 600 ni = howmany(nd, NFDBITS) * sizeof(fd_mask); 601 if (ni > sizeof(bits[0])) { 602 caddr_t mbits; 603 604 mbits = mallocarray(6, ni, M_TEMP, M_WAITOK|M_ZERO); 605 pibits[0] = (fd_set *)&mbits[ni * 0]; 606 pibits[1] = (fd_set *)&mbits[ni * 1]; 607 pibits[2] = (fd_set *)&mbits[ni * 2]; 608 pobits[0] = (fd_set *)&mbits[ni * 3]; 609 pobits[1] = (fd_set *)&mbits[ni * 4]; 610 pobits[2] = (fd_set *)&mbits[ni * 5]; 611 } else { 612 memset(bits, 0, sizeof(bits)); 613 pibits[0] = (fd_set *)&bits[0]; 614 pibits[1] = (fd_set *)&bits[1]; 615 pibits[2] = (fd_set *)&bits[2]; 616 pobits[0] = (fd_set *)&bits[3]; 617 pobits[1] = (fd_set *)&bits[4]; 618 pobits[2] = (fd_set *)&bits[5]; 619 } 620 621 #define getbits(name, x) \ 622 if (name && (error = copyin(name, pibits[x], ni))) \ 623 goto done; 624 getbits(in, 0); 625 getbits(ou, 1); 626 getbits(ex, 2); 627 #undef getbits 628 #ifdef KTRACE 629 if (ni > 0 && KTRPOINT(p, KTR_STRUCT)) { 630 if (in) ktrfdset(p, pibits[0], ni); 631 if (ou) ktrfdset(p, pibits[1], ni); 632 if (ex) ktrfdset(p, pibits[2], ni); 633 } 634 #endif 635 636 if (sigmask) 637 dosigsuspend(p, *sigmask &~ sigcantmask); 638 639 retry: 640 ncoll = nselcoll; 641 atomic_setbits_int(&p->p_flag, P_SELECT); 642 error = selscan(p, pibits[0], pobits[0], nd, ni, retval); 643 if (error || *retval) 644 goto done; 645 if (timeout == NULL || timespecisset(timeout)) { 646 if (timeout != NULL) { 647 getnanouptime(&start); 648 nsecs = MIN(TIMESPEC_TO_NSEC(timeout), MAXTSLP); 649 } else 650 nsecs = INFSLP; 651 s = splhigh(); 652 if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) { 653 splx(s); 654 goto retry; 655 } 656 atomic_clearbits_int(&p->p_flag, P_SELECT); 657 error = tsleep_nsec(&selwait, PSOCK | PCATCH, "select", nsecs); 658 splx(s); 659 if (timeout != NULL) { 660 getnanouptime(&stop); 661 timespecsub(&stop, &start, &elapsed); 662 timespecsub(timeout, &elapsed, timeout); 663 if (timeout->tv_sec < 0) 664 timespecclear(timeout); 665 } 666 if (error == 0 || error == EWOULDBLOCK) 667 goto retry; 668 } 669 done: 670 atomic_clearbits_int(&p->p_flag, P_SELECT); 671 /* select is not restarted after signals... */ 672 if (error == ERESTART) 673 error = EINTR; 674 if (error == EWOULDBLOCK) 675 error = 0; 676 #define putbits(name, x) \ 677 if (name && (error2 = copyout(pobits[x], name, ni))) \ 678 error = error2; 679 if (error == 0) { 680 int error2; 681 682 putbits(in, 0); 683 putbits(ou, 1); 684 putbits(ex, 2); 685 #undef putbits 686 #ifdef KTRACE 687 if (ni > 0 && KTRPOINT(p, KTR_STRUCT)) { 688 if (in) ktrfdset(p, pobits[0], ni); 689 if (ou) ktrfdset(p, pobits[1], ni); 690 if (ex) ktrfdset(p, pobits[2], ni); 691 } 692 #endif 693 } 694 695 if (pibits[0] != (fd_set *)&bits[0]) 696 free(pibits[0], M_TEMP, 6 * ni); 697 return (error); 698 } 699 700 int 701 selscan(struct proc *p, fd_set *ibits, fd_set *obits, int nfd, int ni, 702 register_t *retval) 703 { 704 caddr_t cibits = (caddr_t)ibits, cobits = (caddr_t)obits; 705 struct filedesc *fdp = p->p_fd; 706 int msk, i, j, fd; 707 fd_mask bits; 708 struct file *fp; 709 int n = 0; 710 static const int flag[3] = { POLLIN, POLLOUT|POLL_NOHUP, POLLPRI }; 711 712 for (msk = 0; msk < 3; msk++) { 713 fd_set *pibits = (fd_set *)&cibits[msk*ni]; 714 fd_set *pobits = (fd_set *)&cobits[msk*ni]; 715 716 for (i = 0; i < nfd; i += NFDBITS) { 717 bits = pibits->fds_bits[i/NFDBITS]; 718 while ((j = ffs(bits)) && (fd = i + --j) < nfd) { 719 bits &= ~(1 << j); 720 if ((fp = fd_getfile(fdp, fd)) == NULL) 721 return (EBADF); 722 if ((*fp->f_ops->fo_poll)(fp, flag[msk], p)) { 723 FD_SET(fd, pobits); 724 n++; 725 } 726 FRELE(fp, p); 727 } 728 } 729 } 730 *retval = n; 731 return (0); 732 } 733 734 int 735 seltrue(dev_t dev, int events, struct proc *p) 736 { 737 738 return (events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM)); 739 } 740 741 int 742 selfalse(dev_t dev, int events, struct proc *p) 743 { 744 745 return (0); 746 } 747 748 /* 749 * Record a select request. 750 */ 751 void 752 selrecord(struct proc *selector, struct selinfo *sip) 753 { 754 struct proc *p; 755 pid_t mytid; 756 757 KERNEL_ASSERT_LOCKED(); 758 759 mytid = selector->p_tid; 760 if (sip->si_seltid == mytid) 761 return; 762 if (sip->si_seltid && (p = tfind(sip->si_seltid)) && 763 p->p_wchan == (caddr_t)&selwait) 764 sip->si_flags |= SI_COLL; 765 else 766 sip->si_seltid = mytid; 767 } 768 769 /* 770 * Do a wakeup when a selectable event occurs. 771 */ 772 void 773 selwakeup(struct selinfo *sip) 774 { 775 KERNEL_LOCK(); 776 KNOTE(&sip->si_note, NOTE_SUBMIT); 777 doselwakeup(sip); 778 KERNEL_UNLOCK(); 779 } 780 781 void 782 doselwakeup(struct selinfo *sip) 783 { 784 struct proc *p; 785 786 KERNEL_ASSERT_LOCKED(); 787 788 if (sip->si_seltid == 0) 789 return; 790 if (sip->si_flags & SI_COLL) { 791 nselcoll++; 792 sip->si_flags &= ~SI_COLL; 793 wakeup(&selwait); 794 } 795 p = tfind(sip->si_seltid); 796 sip->si_seltid = 0; 797 if (p != NULL) { 798 if (wakeup_proc(p, &selwait)) { 799 /* nothing else to do */ 800 } else if (p->p_flag & P_SELECT) 801 atomic_clearbits_int(&p->p_flag, P_SELECT); 802 } 803 } 804 805 void 806 pollscan(struct proc *p, struct pollfd *pl, u_int nfd, register_t *retval) 807 { 808 struct filedesc *fdp = p->p_fd; 809 struct file *fp; 810 u_int i; 811 int n = 0; 812 813 for (i = 0; i < nfd; i++, pl++) { 814 /* Check the file descriptor. */ 815 if (pl->fd < 0) { 816 pl->revents = 0; 817 continue; 818 } 819 if ((fp = fd_getfile(fdp, pl->fd)) == NULL) { 820 pl->revents = POLLNVAL; 821 n++; 822 continue; 823 } 824 pl->revents = (*fp->f_ops->fo_poll)(fp, pl->events, p); 825 FRELE(fp, p); 826 if (pl->revents != 0) 827 n++; 828 } 829 *retval = n; 830 } 831 832 /* 833 * Only copyout the revents field. 834 */ 835 int 836 pollout(struct pollfd *pl, struct pollfd *upl, u_int nfds) 837 { 838 int error = 0; 839 u_int i = 0; 840 841 while (!error && i++ < nfds) { 842 error = copyout(&pl->revents, &upl->revents, 843 sizeof(upl->revents)); 844 pl++; 845 upl++; 846 } 847 848 return (error); 849 } 850 851 /* 852 * We are using the same mechanism as select only we encode/decode args 853 * differently. 854 */ 855 int 856 sys_poll(struct proc *p, void *v, register_t *retval) 857 { 858 struct sys_poll_args /* { 859 syscallarg(struct pollfd *) fds; 860 syscallarg(u_int) nfds; 861 syscallarg(int) timeout; 862 } */ *uap = v; 863 864 struct timespec ts, *tsp = NULL; 865 int msec = SCARG(uap, timeout); 866 867 if (msec != INFTIM) { 868 if (msec < 0) 869 return (EINVAL); 870 ts.tv_sec = msec / 1000; 871 ts.tv_nsec = (msec - (ts.tv_sec * 1000)) * 1000000; 872 tsp = &ts; 873 } 874 875 return (doppoll(p, SCARG(uap, fds), SCARG(uap, nfds), tsp, NULL, 876 retval)); 877 } 878 879 int 880 sys_ppoll(struct proc *p, void *v, register_t *retval) 881 { 882 struct sys_ppoll_args /* { 883 syscallarg(struct pollfd *) fds; 884 syscallarg(u_int) nfds; 885 syscallarg(const struct timespec *) ts; 886 syscallarg(const sigset_t *) mask; 887 } */ *uap = v; 888 889 int error; 890 struct timespec ts, *tsp = NULL; 891 sigset_t ss, *ssp = NULL; 892 893 if (SCARG(uap, ts) != NULL) { 894 if ((error = copyin(SCARG(uap, ts), &ts, sizeof ts)) != 0) 895 return (error); 896 #ifdef KTRACE 897 if (KTRPOINT(p, KTR_STRUCT)) 898 ktrreltimespec(p, &ts); 899 #endif 900 if (ts.tv_sec < 0 || !timespecisvalid(&ts)) 901 return (EINVAL); 902 tsp = &ts; 903 } 904 905 if (SCARG(uap, mask) != NULL) { 906 if ((error = copyin(SCARG(uap, mask), &ss, sizeof ss)) != 0) 907 return (error); 908 ssp = &ss; 909 } 910 911 return (doppoll(p, SCARG(uap, fds), SCARG(uap, nfds), tsp, ssp, 912 retval)); 913 } 914 915 int 916 doppoll(struct proc *p, struct pollfd *fds, u_int nfds, 917 struct timespec *timeout, const sigset_t *sigmask, register_t *retval) 918 { 919 size_t sz; 920 struct pollfd pfds[4], *pl = pfds; 921 struct timespec elapsed, start, stop; 922 uint64_t nsecs; 923 int ncoll, i, s, error; 924 925 /* Standards say no more than MAX_OPEN; this is possibly better. */ 926 if (nfds > min((int)lim_cur(RLIMIT_NOFILE), maxfiles)) 927 return (EINVAL); 928 929 /* optimize for the default case, of a small nfds value */ 930 if (nfds > nitems(pfds)) { 931 pl = mallocarray(nfds, sizeof(*pl), M_TEMP, 932 M_WAITOK | M_CANFAIL); 933 if (pl == NULL) 934 return (EINVAL); 935 } 936 937 sz = nfds * sizeof(*pl); 938 939 if ((error = copyin(fds, pl, sz)) != 0) 940 goto bad; 941 942 for (i = 0; i < nfds; i++) { 943 pl[i].events &= ~POLL_NOHUP; 944 pl[i].revents = 0; 945 } 946 947 if (sigmask) 948 dosigsuspend(p, *sigmask &~ sigcantmask); 949 950 retry: 951 ncoll = nselcoll; 952 atomic_setbits_int(&p->p_flag, P_SELECT); 953 pollscan(p, pl, nfds, retval); 954 if (*retval) 955 goto done; 956 if (timeout == NULL || timespecisset(timeout)) { 957 if (timeout != NULL) { 958 getnanouptime(&start); 959 nsecs = MIN(TIMESPEC_TO_NSEC(timeout), MAXTSLP); 960 } else 961 nsecs = INFSLP; 962 s = splhigh(); 963 if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) { 964 splx(s); 965 goto retry; 966 } 967 atomic_clearbits_int(&p->p_flag, P_SELECT); 968 error = tsleep_nsec(&selwait, PSOCK | PCATCH, "poll", nsecs); 969 splx(s); 970 if (timeout != NULL) { 971 getnanouptime(&stop); 972 timespecsub(&stop, &start, &elapsed); 973 timespecsub(timeout, &elapsed, timeout); 974 if (timeout->tv_sec < 0) 975 timespecclear(timeout); 976 } 977 if (error == 0 || error == EWOULDBLOCK) 978 goto retry; 979 } 980 981 done: 982 atomic_clearbits_int(&p->p_flag, P_SELECT); 983 /* 984 * NOTE: poll(2) is not restarted after a signal and EWOULDBLOCK is 985 * ignored (since the whole point is to see what would block). 986 */ 987 switch (error) { 988 case ERESTART: 989 error = pollout(pl, fds, nfds); 990 if (error == 0) 991 error = EINTR; 992 break; 993 case EWOULDBLOCK: 994 case 0: 995 error = pollout(pl, fds, nfds); 996 break; 997 } 998 #ifdef KTRACE 999 if (KTRPOINT(p, KTR_STRUCT)) 1000 ktrpollfd(p, pl, nfds); 1001 #endif /* KTRACE */ 1002 bad: 1003 if (pl != pfds) 1004 free(pl, M_TEMP, sz); 1005 return (error); 1006 } 1007 1008 /* 1009 * utrace system call 1010 */ 1011 int 1012 sys_utrace(struct proc *curp, void *v, register_t *retval) 1013 { 1014 #ifdef KTRACE 1015 struct sys_utrace_args /* { 1016 syscallarg(const char *) label; 1017 syscallarg(const void *) addr; 1018 syscallarg(size_t) len; 1019 } */ *uap = v; 1020 1021 return (ktruser(curp, SCARG(uap, label), SCARG(uap, addr), 1022 SCARG(uap, len))); 1023 #else 1024 return (0); 1025 #endif 1026 } 1027