1 /* $OpenBSD: sys_generic.c,v 1.126 2019/10/03 18:47:19 cheloha Exp $ */ 2 /* $NetBSD: sys_generic.c,v 1.24 1996/03/29 00:25:32 cgd Exp $ */ 3 4 /* 5 * Copyright (c) 1996 Theo de Raadt 6 * Copyright (c) 1982, 1986, 1989, 1993 7 * The Regents of the University of California. All rights reserved. 8 * (c) UNIX System Laboratories, Inc. 9 * All or some portions of this file are derived from material licensed 10 * to the University of California by American Telephone and Telegraph 11 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 12 * the permission of UNIX System Laboratories, Inc. 13 * 14 * Redistribution and use in source and binary forms, with or without 15 * modification, are permitted provided that the following conditions 16 * are met: 17 * 1. Redistributions of source code must retain the above copyright 18 * notice, this list of conditions and the following disclaimer. 19 * 2. Redistributions in binary form must reproduce the above copyright 20 * notice, this list of conditions and the following disclaimer in the 21 * documentation and/or other materials provided with the distribution. 22 * 3. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)sys_generic.c 8.5 (Berkeley) 1/21/94 39 */ 40 41 #include <sys/param.h> 42 #include <sys/systm.h> 43 #include <sys/filedesc.h> 44 #include <sys/ioctl.h> 45 #include <sys/fcntl.h> 46 #include <sys/vnode.h> 47 #include <sys/file.h> 48 #include <sys/proc.h> 49 #include <sys/resourcevar.h> 50 #include <sys/socketvar.h> 51 #include <sys/signalvar.h> 52 #include <sys/uio.h> 53 #include <sys/kernel.h> 54 #include <sys/stat.h> 55 #include <sys/time.h> 56 #include <sys/malloc.h> 57 #include <sys/poll.h> 58 #ifdef KTRACE 59 #include <sys/ktrace.h> 60 #endif 61 #include <sys/sched.h> 62 #include <sys/pledge.h> 63 64 #include <sys/mount.h> 65 #include <sys/syscallargs.h> 66 67 #include <uvm/uvm_extern.h> 68 69 int selscan(struct proc *, fd_set *, fd_set *, int, int, register_t *); 70 void pollscan(struct proc *, struct pollfd *, u_int, register_t *); 71 int pollout(struct pollfd *, struct pollfd *, u_int); 72 int dopselect(struct proc *, int, fd_set *, fd_set *, fd_set *, 73 struct timespec *, const sigset_t *, register_t *); 74 int doppoll(struct proc *, struct pollfd *, u_int, struct timespec *, 75 const sigset_t *, register_t *); 76 77 int 78 iovec_copyin(const struct iovec *uiov, struct iovec **iovp, struct iovec *aiov, 79 unsigned int iovcnt, size_t *residp) 80 { 81 #ifdef KTRACE 82 struct proc *p = curproc; 83 #endif 84 struct iovec *iov; 85 int error, i; 86 size_t resid = 0; 87 88 if (iovcnt > UIO_SMALLIOV) { 89 if (iovcnt > IOV_MAX) 90 return (EINVAL); 91 iov = mallocarray(iovcnt, sizeof(*iov), M_IOV, M_WAITOK); 92 } else if (iovcnt > 0) { 93 iov = aiov; 94 } else { 95 return (EINVAL); 96 } 97 *iovp = iov; 98 99 if ((error = copyin(uiov, iov, iovcnt * sizeof(*iov)))) 100 return (error); 101 102 #ifdef KTRACE 103 if (KTRPOINT(p, KTR_STRUCT)) 104 ktriovec(p, iov, iovcnt); 105 #endif 106 107 for (i = 0; i < iovcnt; i++) { 108 resid += iov->iov_len; 109 /* 110 * Writes return ssize_t because -1 is returned on error. 111 * Therefore we must restrict the length to SSIZE_MAX to 112 * avoid garbage return values. Note that the addition is 113 * guaranteed to not wrap because SSIZE_MAX * 2 < SIZE_MAX. 114 */ 115 if (iov->iov_len > SSIZE_MAX || resid > SSIZE_MAX) 116 return (EINVAL); 117 iov++; 118 } 119 120 if (residp != NULL) 121 *residp = resid; 122 123 return (0); 124 } 125 126 void 127 iovec_free(struct iovec *iov, unsigned int iovcnt) 128 { 129 if (iovcnt > UIO_SMALLIOV) 130 free(iov, M_IOV, iovcnt * sizeof(*iov)); 131 } 132 133 /* 134 * Read system call. 135 */ 136 int 137 sys_read(struct proc *p, void *v, register_t *retval) 138 { 139 struct sys_read_args /* { 140 syscallarg(int) fd; 141 syscallarg(void *) buf; 142 syscallarg(size_t) nbyte; 143 } */ *uap = v; 144 struct iovec iov; 145 struct uio auio; 146 147 iov.iov_base = SCARG(uap, buf); 148 iov.iov_len = SCARG(uap, nbyte); 149 if (iov.iov_len > SSIZE_MAX) 150 return (EINVAL); 151 152 auio.uio_iov = &iov; 153 auio.uio_iovcnt = 1; 154 auio.uio_resid = iov.iov_len; 155 156 return (dofilereadv(p, SCARG(uap, fd), &auio, 0, retval)); 157 } 158 159 /* 160 * Scatter read system call. 161 */ 162 int 163 sys_readv(struct proc *p, void *v, register_t *retval) 164 { 165 struct sys_readv_args /* { 166 syscallarg(int) fd; 167 syscallarg(const struct iovec *) iovp; 168 syscallarg(int) iovcnt; 169 } */ *uap = v; 170 struct iovec aiov[UIO_SMALLIOV], *iov = NULL; 171 int error, iovcnt = SCARG(uap, iovcnt); 172 struct uio auio; 173 size_t resid; 174 175 error = iovec_copyin(SCARG(uap, iovp), &iov, aiov, iovcnt, &resid); 176 if (error) 177 goto done; 178 179 auio.uio_iov = iov; 180 auio.uio_iovcnt = iovcnt; 181 auio.uio_resid = resid; 182 183 error = dofilereadv(p, SCARG(uap, fd), &auio, 0, retval); 184 done: 185 iovec_free(iov, iovcnt); 186 return (error); 187 } 188 189 int 190 dofilereadv(struct proc *p, int fd, struct uio *uio, int flags, 191 register_t *retval) 192 { 193 struct filedesc *fdp = p->p_fd; 194 struct file *fp; 195 long cnt, error = 0; 196 u_int iovlen; 197 #ifdef KTRACE 198 struct iovec *ktriov = NULL; 199 #endif 200 201 KASSERT(uio->uio_iov != NULL && uio->uio_iovcnt > 0); 202 iovlen = uio->uio_iovcnt * sizeof(struct iovec); 203 204 if ((fp = fd_getfile_mode(fdp, fd, FREAD)) == NULL) 205 return (EBADF); 206 207 /* Checks for positioned read. */ 208 if (flags & FO_POSITION) { 209 struct vnode *vp = fp->f_data; 210 211 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO || 212 (vp->v_flag & VISTTY)) { 213 error = ESPIPE; 214 goto done; 215 } 216 217 if (uio->uio_offset < 0 && vp->v_type != VCHR) { 218 error = EINVAL; 219 goto done; 220 } 221 } 222 223 uio->uio_rw = UIO_READ; 224 uio->uio_segflg = UIO_USERSPACE; 225 uio->uio_procp = p; 226 #ifdef KTRACE 227 /* 228 * if tracing, save a copy of iovec 229 */ 230 if (KTRPOINT(p, KTR_GENIO)) { 231 ktriov = malloc(iovlen, M_TEMP, M_WAITOK); 232 memcpy(ktriov, uio->uio_iov, iovlen); 233 } 234 #endif 235 cnt = uio->uio_resid; 236 error = (*fp->f_ops->fo_read)(fp, uio, flags); 237 if (error) { 238 if (uio->uio_resid != cnt && (error == ERESTART || 239 error == EINTR || error == EWOULDBLOCK)) 240 error = 0; 241 } 242 cnt -= uio->uio_resid; 243 244 mtx_enter(&fp->f_mtx); 245 fp->f_rxfer++; 246 fp->f_rbytes += cnt; 247 mtx_leave(&fp->f_mtx); 248 #ifdef KTRACE 249 if (ktriov != NULL) { 250 if (error == 0) 251 ktrgenio(p, fd, UIO_READ, ktriov, cnt); 252 free(ktriov, M_TEMP, iovlen); 253 } 254 #endif 255 *retval = cnt; 256 done: 257 FRELE(fp, p); 258 return (error); 259 } 260 261 /* 262 * Write system call 263 */ 264 int 265 sys_write(struct proc *p, void *v, register_t *retval) 266 { 267 struct sys_write_args /* { 268 syscallarg(int) fd; 269 syscallarg(const void *) buf; 270 syscallarg(size_t) nbyte; 271 } */ *uap = v; 272 struct iovec iov; 273 struct uio auio; 274 275 iov.iov_base = (void *)SCARG(uap, buf); 276 iov.iov_len = SCARG(uap, nbyte); 277 if (iov.iov_len > SSIZE_MAX) 278 return (EINVAL); 279 280 auio.uio_iov = &iov; 281 auio.uio_iovcnt = 1; 282 auio.uio_resid = iov.iov_len; 283 284 return (dofilewritev(p, SCARG(uap, fd), &auio, 0, retval)); 285 } 286 287 /* 288 * Gather write system call 289 */ 290 int 291 sys_writev(struct proc *p, void *v, register_t *retval) 292 { 293 struct sys_writev_args /* { 294 syscallarg(int) fd; 295 syscallarg(const struct iovec *) iovp; 296 syscallarg(int) iovcnt; 297 } */ *uap = v; 298 struct iovec aiov[UIO_SMALLIOV], *iov = NULL; 299 int error, iovcnt = SCARG(uap, iovcnt); 300 struct uio auio; 301 size_t resid; 302 303 error = iovec_copyin(SCARG(uap, iovp), &iov, aiov, iovcnt, &resid); 304 if (error) 305 goto done; 306 307 auio.uio_iov = iov; 308 auio.uio_iovcnt = iovcnt; 309 auio.uio_resid = resid; 310 311 error = dofilewritev(p, SCARG(uap, fd), &auio, 0, retval); 312 done: 313 iovec_free(iov, iovcnt); 314 return (error); 315 } 316 317 int 318 dofilewritev(struct proc *p, int fd, struct uio *uio, int flags, 319 register_t *retval) 320 { 321 struct filedesc *fdp = p->p_fd; 322 struct file *fp; 323 long cnt, error = 0; 324 u_int iovlen; 325 #ifdef KTRACE 326 struct iovec *ktriov = NULL; 327 #endif 328 329 KASSERT(uio->uio_iov != NULL && uio->uio_iovcnt > 0); 330 iovlen = uio->uio_iovcnt * sizeof(struct iovec); 331 332 if ((fp = fd_getfile_mode(fdp, fd, FWRITE)) == NULL) 333 return (EBADF); 334 335 /* Checks for positioned write. */ 336 if (flags & FO_POSITION) { 337 struct vnode *vp = fp->f_data; 338 339 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO || 340 (vp->v_flag & VISTTY)) { 341 error = ESPIPE; 342 goto done; 343 } 344 345 if (uio->uio_offset < 0 && vp->v_type != VCHR) { 346 error = EINVAL; 347 goto done; 348 } 349 } 350 351 uio->uio_rw = UIO_WRITE; 352 uio->uio_segflg = UIO_USERSPACE; 353 uio->uio_procp = p; 354 #ifdef KTRACE 355 /* 356 * if tracing, save a copy of iovec 357 */ 358 if (KTRPOINT(p, KTR_GENIO)) { 359 ktriov = malloc(iovlen, M_TEMP, M_WAITOK); 360 memcpy(ktriov, uio->uio_iov, iovlen); 361 } 362 #endif 363 cnt = uio->uio_resid; 364 error = (*fp->f_ops->fo_write)(fp, uio, flags); 365 if (error) { 366 if (uio->uio_resid != cnt && (error == ERESTART || 367 error == EINTR || error == EWOULDBLOCK)) 368 error = 0; 369 if (error == EPIPE) { 370 KERNEL_LOCK(); 371 ptsignal(p, SIGPIPE, STHREAD); 372 KERNEL_UNLOCK(); 373 } 374 } 375 cnt -= uio->uio_resid; 376 377 mtx_enter(&fp->f_mtx); 378 fp->f_wxfer++; 379 fp->f_wbytes += cnt; 380 mtx_leave(&fp->f_mtx); 381 #ifdef KTRACE 382 if (ktriov != NULL) { 383 if (error == 0) 384 ktrgenio(p, fd, UIO_WRITE, ktriov, cnt); 385 free(ktriov, M_TEMP, iovlen); 386 } 387 #endif 388 *retval = cnt; 389 done: 390 FRELE(fp, p); 391 return (error); 392 } 393 394 /* 395 * Ioctl system call 396 */ 397 int 398 sys_ioctl(struct proc *p, void *v, register_t *retval) 399 { 400 struct sys_ioctl_args /* { 401 syscallarg(int) fd; 402 syscallarg(u_long) com; 403 syscallarg(void *) data; 404 } */ *uap = v; 405 struct file *fp; 406 struct filedesc *fdp = p->p_fd; 407 u_long com = SCARG(uap, com); 408 int error = 0; 409 u_int size = 0; 410 caddr_t data, memp = NULL; 411 int tmp; 412 #define STK_PARAMS 128 413 long long stkbuf[STK_PARAMS / sizeof(long long)]; 414 415 if ((fp = fd_getfile_mode(fdp, SCARG(uap, fd), FREAD|FWRITE)) == NULL) 416 return (EBADF); 417 418 if (fp->f_type == DTYPE_SOCKET) { 419 struct socket *so = fp->f_data; 420 421 if (so->so_state & SS_DNS) { 422 error = EINVAL; 423 goto out; 424 } 425 } 426 427 error = pledge_ioctl(p, com, fp); 428 if (error) 429 goto out; 430 431 switch (com) { 432 case FIONCLEX: 433 case FIOCLEX: 434 fdplock(fdp); 435 if (com == FIONCLEX) 436 fdp->fd_ofileflags[SCARG(uap, fd)] &= ~UF_EXCLOSE; 437 else 438 fdp->fd_ofileflags[SCARG(uap, fd)] |= UF_EXCLOSE; 439 fdpunlock(fdp); 440 goto out; 441 } 442 443 /* 444 * Interpret high order word to find amount of data to be 445 * copied to/from the user's address space. 446 */ 447 size = IOCPARM_LEN(com); 448 if (size > IOCPARM_MAX) { 449 error = ENOTTY; 450 goto out; 451 } 452 if (size > sizeof (stkbuf)) { 453 memp = malloc(size, M_IOCTLOPS, M_WAITOK); 454 data = memp; 455 } else 456 data = (caddr_t)stkbuf; 457 if (com&IOC_IN) { 458 if (size) { 459 error = copyin(SCARG(uap, data), data, size); 460 if (error) { 461 goto out; 462 } 463 } else 464 *(caddr_t *)data = SCARG(uap, data); 465 } else if ((com&IOC_OUT) && size) 466 /* 467 * Zero the buffer so the user always 468 * gets back something deterministic. 469 */ 470 memset(data, 0, size); 471 else if (com&IOC_VOID) 472 *(caddr_t *)data = SCARG(uap, data); 473 474 switch (com) { 475 476 case FIONBIO: 477 if ((tmp = *(int *)data) != 0) 478 fp->f_flag |= FNONBLOCK; 479 else 480 fp->f_flag &= ~FNONBLOCK; 481 error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&tmp, p); 482 break; 483 484 case FIOASYNC: 485 if ((tmp = *(int *)data) != 0) 486 fp->f_flag |= FASYNC; 487 else 488 fp->f_flag &= ~FASYNC; 489 error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, (caddr_t)&tmp, p); 490 break; 491 492 case FIOSETOWN: 493 tmp = *(int *)data; 494 495 if (fp->f_type == DTYPE_SOCKET || fp->f_type == DTYPE_PIPE) { 496 /* nothing */ 497 } else if (tmp <= 0) { 498 tmp = -tmp; 499 } else { 500 struct process *pr = prfind(tmp); 501 if (pr == NULL) { 502 error = ESRCH; 503 break; 504 } 505 tmp = pr->ps_pgrp->pg_id; 506 } 507 error = (*fp->f_ops->fo_ioctl) 508 (fp, TIOCSPGRP, (caddr_t)&tmp, p); 509 break; 510 511 case FIOGETOWN: 512 error = (*fp->f_ops->fo_ioctl)(fp, TIOCGPGRP, data, p); 513 *(int *)data = -*(int *)data; 514 break; 515 516 default: 517 error = (*fp->f_ops->fo_ioctl)(fp, com, data, p); 518 break; 519 } 520 /* 521 * Copy any data to user, size was 522 * already set and checked above. 523 */ 524 if (error == 0 && (com&IOC_OUT) && size) 525 error = copyout(data, SCARG(uap, data), size); 526 out: 527 FRELE(fp, p); 528 free(memp, M_IOCTLOPS, size); 529 return (error); 530 } 531 532 int selwait, nselcoll; 533 534 /* 535 * Select system call. 536 */ 537 int 538 sys_select(struct proc *p, void *v, register_t *retval) 539 { 540 struct sys_select_args /* { 541 syscallarg(int) nd; 542 syscallarg(fd_set *) in; 543 syscallarg(fd_set *) ou; 544 syscallarg(fd_set *) ex; 545 syscallarg(struct timeval *) tv; 546 } */ *uap = v; 547 548 struct timespec ts, *tsp = NULL; 549 int error; 550 551 if (SCARG(uap, tv) != NULL) { 552 struct timeval tv; 553 if ((error = copyin(SCARG(uap, tv), &tv, sizeof tv)) != 0) 554 return (error); 555 if (tv.tv_sec < 0 || !timerisvalid(&tv)) 556 return (EINVAL); 557 #ifdef KTRACE 558 if (KTRPOINT(p, KTR_STRUCT)) 559 ktrreltimeval(p, &tv); 560 #endif 561 TIMEVAL_TO_TIMESPEC(&tv, &ts); 562 tsp = &ts; 563 } 564 565 return (dopselect(p, SCARG(uap, nd), SCARG(uap, in), SCARG(uap, ou), 566 SCARG(uap, ex), tsp, NULL, retval)); 567 } 568 569 int 570 sys_pselect(struct proc *p, void *v, register_t *retval) 571 { 572 struct sys_pselect_args /* { 573 syscallarg(int) nd; 574 syscallarg(fd_set *) in; 575 syscallarg(fd_set *) ou; 576 syscallarg(fd_set *) ex; 577 syscallarg(const struct timespec *) ts; 578 syscallarg(const sigset_t *) mask; 579 } */ *uap = v; 580 581 struct timespec ts, *tsp = NULL; 582 sigset_t ss, *ssp = NULL; 583 int error; 584 585 if (SCARG(uap, ts) != NULL) { 586 if ((error = copyin(SCARG(uap, ts), &ts, sizeof ts)) != 0) 587 return (error); 588 if (ts.tv_sec < 0 || !timespecisvalid(&ts)) 589 return (EINVAL); 590 #ifdef KTRACE 591 if (KTRPOINT(p, KTR_STRUCT)) 592 ktrreltimespec(p, &ts); 593 #endif 594 tsp = &ts; 595 } 596 if (SCARG(uap, mask) != NULL) { 597 if ((error = copyin(SCARG(uap, mask), &ss, sizeof ss)) != 0) 598 return (error); 599 ssp = &ss; 600 } 601 602 return (dopselect(p, SCARG(uap, nd), SCARG(uap, in), SCARG(uap, ou), 603 SCARG(uap, ex), tsp, ssp, retval)); 604 } 605 606 int 607 dopselect(struct proc *p, int nd, fd_set *in, fd_set *ou, fd_set *ex, 608 struct timespec *timeout, const sigset_t *sigmask, register_t *retval) 609 { 610 fd_mask bits[6]; 611 fd_set *pibits[3], *pobits[3]; 612 struct timespec elapsed, start, stop; 613 int s, ncoll, error = 0, timo; 614 u_int ni; 615 616 if (nd < 0) 617 return (EINVAL); 618 if (nd > p->p_fd->fd_nfiles) { 619 /* forgiving; slightly wrong */ 620 nd = p->p_fd->fd_nfiles; 621 } 622 ni = howmany(nd, NFDBITS) * sizeof(fd_mask); 623 if (ni > sizeof(bits[0])) { 624 caddr_t mbits; 625 626 mbits = mallocarray(6, ni, M_TEMP, M_WAITOK|M_ZERO); 627 pibits[0] = (fd_set *)&mbits[ni * 0]; 628 pibits[1] = (fd_set *)&mbits[ni * 1]; 629 pibits[2] = (fd_set *)&mbits[ni * 2]; 630 pobits[0] = (fd_set *)&mbits[ni * 3]; 631 pobits[1] = (fd_set *)&mbits[ni * 4]; 632 pobits[2] = (fd_set *)&mbits[ni * 5]; 633 } else { 634 memset(bits, 0, sizeof(bits)); 635 pibits[0] = (fd_set *)&bits[0]; 636 pibits[1] = (fd_set *)&bits[1]; 637 pibits[2] = (fd_set *)&bits[2]; 638 pobits[0] = (fd_set *)&bits[3]; 639 pobits[1] = (fd_set *)&bits[4]; 640 pobits[2] = (fd_set *)&bits[5]; 641 } 642 643 #define getbits(name, x) \ 644 if (name && (error = copyin(name, pibits[x], ni))) \ 645 goto done; 646 getbits(in, 0); 647 getbits(ou, 1); 648 getbits(ex, 2); 649 #undef getbits 650 #ifdef KTRACE 651 if (ni > 0 && KTRPOINT(p, KTR_STRUCT)) { 652 if (in) ktrfdset(p, pibits[0], ni); 653 if (ou) ktrfdset(p, pibits[1], ni); 654 if (ex) ktrfdset(p, pibits[2], ni); 655 } 656 #endif 657 658 if (sigmask) 659 dosigsuspend(p, *sigmask &~ sigcantmask); 660 661 retry: 662 ncoll = nselcoll; 663 atomic_setbits_int(&p->p_flag, P_SELECT); 664 error = selscan(p, pibits[0], pobits[0], nd, ni, retval); 665 if (error || *retval) 666 goto done; 667 if (timeout == NULL || timespecisset(timeout)) { 668 timo = (timeout == NULL) ? 0 : tstohz(timeout); 669 if (timeout != NULL) 670 getnanouptime(&start); 671 s = splhigh(); 672 if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) { 673 splx(s); 674 goto retry; 675 } 676 atomic_clearbits_int(&p->p_flag, P_SELECT); 677 error = tsleep(&selwait, PSOCK | PCATCH, "select", timo); 678 splx(s); 679 if (timeout != NULL) { 680 getnanouptime(&stop); 681 timespecsub(&stop, &start, &elapsed); 682 timespecsub(timeout, &elapsed, timeout); 683 if (timeout->tv_sec < 0) 684 timespecclear(timeout); 685 } 686 if (error == 0 || error == EWOULDBLOCK) 687 goto retry; 688 } 689 done: 690 atomic_clearbits_int(&p->p_flag, P_SELECT); 691 /* select is not restarted after signals... */ 692 if (error == ERESTART) 693 error = EINTR; 694 if (error == EWOULDBLOCK) 695 error = 0; 696 #define putbits(name, x) \ 697 if (name && (error2 = copyout(pobits[x], name, ni))) \ 698 error = error2; 699 if (error == 0) { 700 int error2; 701 702 putbits(in, 0); 703 putbits(ou, 1); 704 putbits(ex, 2); 705 #undef putbits 706 #ifdef KTRACE 707 if (ni > 0 && KTRPOINT(p, KTR_STRUCT)) { 708 if (in) ktrfdset(p, pobits[0], ni); 709 if (ou) ktrfdset(p, pobits[1], ni); 710 if (ex) ktrfdset(p, pobits[2], ni); 711 } 712 #endif 713 } 714 715 if (pibits[0] != (fd_set *)&bits[0]) 716 free(pibits[0], M_TEMP, 6 * ni); 717 return (error); 718 } 719 720 int 721 selscan(struct proc *p, fd_set *ibits, fd_set *obits, int nfd, int ni, 722 register_t *retval) 723 { 724 caddr_t cibits = (caddr_t)ibits, cobits = (caddr_t)obits; 725 struct filedesc *fdp = p->p_fd; 726 int msk, i, j, fd; 727 fd_mask bits; 728 struct file *fp; 729 int n = 0; 730 static const int flag[3] = { POLLIN, POLLOUT|POLL_NOHUP, POLLPRI }; 731 732 for (msk = 0; msk < 3; msk++) { 733 fd_set *pibits = (fd_set *)&cibits[msk*ni]; 734 fd_set *pobits = (fd_set *)&cobits[msk*ni]; 735 736 for (i = 0; i < nfd; i += NFDBITS) { 737 bits = pibits->fds_bits[i/NFDBITS]; 738 while ((j = ffs(bits)) && (fd = i + --j) < nfd) { 739 bits &= ~(1 << j); 740 if ((fp = fd_getfile(fdp, fd)) == NULL) 741 return (EBADF); 742 if ((*fp->f_ops->fo_poll)(fp, flag[msk], p)) { 743 FD_SET(fd, pobits); 744 n++; 745 } 746 FRELE(fp, p); 747 } 748 } 749 } 750 *retval = n; 751 return (0); 752 } 753 754 int 755 seltrue(dev_t dev, int events, struct proc *p) 756 { 757 758 return (events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM)); 759 } 760 761 int 762 selfalse(dev_t dev, int events, struct proc *p) 763 { 764 765 return (0); 766 } 767 768 /* 769 * Record a select request. 770 */ 771 void 772 selrecord(struct proc *selector, struct selinfo *sip) 773 { 774 struct proc *p; 775 pid_t mytid; 776 777 mytid = selector->p_tid; 778 if (sip->si_seltid == mytid) 779 return; 780 if (sip->si_seltid && (p = tfind(sip->si_seltid)) && 781 p->p_wchan == (caddr_t)&selwait) 782 sip->si_flags |= SI_COLL; 783 else 784 sip->si_seltid = mytid; 785 } 786 787 /* 788 * Do a wakeup when a selectable event occurs. 789 */ 790 void 791 selwakeup(struct selinfo *sip) 792 { 793 struct proc *p; 794 int s; 795 796 KNOTE(&sip->si_note, NOTE_SUBMIT); 797 if (sip->si_seltid == 0) 798 return; 799 if (sip->si_flags & SI_COLL) { 800 nselcoll++; 801 sip->si_flags &= ~SI_COLL; 802 wakeup(&selwait); 803 } 804 p = tfind(sip->si_seltid); 805 sip->si_seltid = 0; 806 if (p != NULL) { 807 SCHED_LOCK(s); 808 if (p->p_wchan == (caddr_t)&selwait) { 809 if (p->p_stat == SSLEEP) 810 setrunnable(p); 811 else 812 unsleep(p); 813 } else if (p->p_flag & P_SELECT) 814 atomic_clearbits_int(&p->p_flag, P_SELECT); 815 SCHED_UNLOCK(s); 816 } 817 } 818 819 void 820 pollscan(struct proc *p, struct pollfd *pl, u_int nfd, register_t *retval) 821 { 822 struct filedesc *fdp = p->p_fd; 823 struct file *fp; 824 u_int i; 825 int n = 0; 826 827 for (i = 0; i < nfd; i++, pl++) { 828 /* Check the file descriptor. */ 829 if (pl->fd < 0) { 830 pl->revents = 0; 831 continue; 832 } 833 if ((fp = fd_getfile(fdp, pl->fd)) == NULL) { 834 pl->revents = POLLNVAL; 835 n++; 836 continue; 837 } 838 pl->revents = (*fp->f_ops->fo_poll)(fp, pl->events, p); 839 FRELE(fp, p); 840 if (pl->revents != 0) 841 n++; 842 } 843 *retval = n; 844 } 845 846 /* 847 * Only copyout the revents field. 848 */ 849 int 850 pollout(struct pollfd *pl, struct pollfd *upl, u_int nfds) 851 { 852 int error = 0; 853 u_int i = 0; 854 855 while (!error && i++ < nfds) { 856 error = copyout(&pl->revents, &upl->revents, 857 sizeof(upl->revents)); 858 pl++; 859 upl++; 860 } 861 862 return (error); 863 } 864 865 /* 866 * We are using the same mechanism as select only we encode/decode args 867 * differently. 868 */ 869 int 870 sys_poll(struct proc *p, void *v, register_t *retval) 871 { 872 struct sys_poll_args /* { 873 syscallarg(struct pollfd *) fds; 874 syscallarg(u_int) nfds; 875 syscallarg(int) timeout; 876 } */ *uap = v; 877 878 struct timespec ts, *tsp = NULL; 879 int msec = SCARG(uap, timeout); 880 881 if (msec != INFTIM) { 882 if (msec < 0) 883 return (EINVAL); 884 ts.tv_sec = msec / 1000; 885 ts.tv_nsec = (msec - (ts.tv_sec * 1000)) * 1000000; 886 tsp = &ts; 887 } 888 889 return (doppoll(p, SCARG(uap, fds), SCARG(uap, nfds), tsp, NULL, 890 retval)); 891 } 892 893 int 894 sys_ppoll(struct proc *p, void *v, register_t *retval) 895 { 896 struct sys_ppoll_args /* { 897 syscallarg(struct pollfd *) fds; 898 syscallarg(u_int) nfds; 899 syscallarg(const struct timespec *) ts; 900 syscallarg(const sigset_t *) mask; 901 } */ *uap = v; 902 903 int error; 904 struct timespec ts, *tsp = NULL; 905 sigset_t ss, *ssp = NULL; 906 907 if (SCARG(uap, ts) != NULL) { 908 if ((error = copyin(SCARG(uap, ts), &ts, sizeof ts)) != 0) 909 return (error); 910 if (ts.tv_sec < 0 || !timespecisvalid(&ts)) 911 return (EINVAL); 912 #ifdef KTRACE 913 if (KTRPOINT(p, KTR_STRUCT)) 914 ktrreltimespec(p, &ts); 915 #endif 916 tsp = &ts; 917 } 918 919 if (SCARG(uap, mask) != NULL) { 920 if ((error = copyin(SCARG(uap, mask), &ss, sizeof ss)) != 0) 921 return (error); 922 ssp = &ss; 923 } 924 925 return (doppoll(p, SCARG(uap, fds), SCARG(uap, nfds), tsp, ssp, 926 retval)); 927 } 928 929 int 930 doppoll(struct proc *p, struct pollfd *fds, u_int nfds, 931 struct timespec *timeout, const sigset_t *sigmask, register_t *retval) 932 { 933 size_t sz; 934 struct pollfd pfds[4], *pl = pfds; 935 struct timespec elapsed, start, stop; 936 int timo, ncoll, i, s, error; 937 938 /* Standards say no more than MAX_OPEN; this is possibly better. */ 939 if (nfds > min((int)lim_cur(RLIMIT_NOFILE), maxfiles)) 940 return (EINVAL); 941 942 /* optimize for the default case, of a small nfds value */ 943 if (nfds > nitems(pfds)) { 944 pl = mallocarray(nfds, sizeof(*pl), M_TEMP, 945 M_WAITOK | M_CANFAIL); 946 if (pl == NULL) 947 return (EINVAL); 948 } 949 950 sz = nfds * sizeof(*pl); 951 952 if ((error = copyin(fds, pl, sz)) != 0) 953 goto bad; 954 955 for (i = 0; i < nfds; i++) { 956 pl[i].events &= ~POLL_NOHUP; 957 pl[i].revents = 0; 958 } 959 960 if (sigmask) 961 dosigsuspend(p, *sigmask &~ sigcantmask); 962 963 retry: 964 ncoll = nselcoll; 965 atomic_setbits_int(&p->p_flag, P_SELECT); 966 pollscan(p, pl, nfds, retval); 967 if (*retval) 968 goto done; 969 if (timeout == NULL || timespecisset(timeout)) { 970 timo = (timeout == NULL) ? 0 : tstohz(timeout); 971 if (timeout != NULL) 972 getnanouptime(&start); 973 s = splhigh(); 974 if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) { 975 splx(s); 976 goto retry; 977 } 978 atomic_clearbits_int(&p->p_flag, P_SELECT); 979 error = tsleep(&selwait, PSOCK | PCATCH, "poll", timo); 980 splx(s); 981 if (timeout != NULL) { 982 getnanouptime(&stop); 983 timespecsub(&stop, &start, &elapsed); 984 timespecsub(timeout, &elapsed, timeout); 985 if (timeout->tv_sec < 0) 986 timespecclear(timeout); 987 } 988 if (error == 0 || error == EWOULDBLOCK) 989 goto retry; 990 } 991 992 done: 993 atomic_clearbits_int(&p->p_flag, P_SELECT); 994 /* 995 * NOTE: poll(2) is not restarted after a signal and EWOULDBLOCK is 996 * ignored (since the whole point is to see what would block). 997 */ 998 switch (error) { 999 case ERESTART: 1000 error = pollout(pl, fds, nfds); 1001 if (error == 0) 1002 error = EINTR; 1003 break; 1004 case EWOULDBLOCK: 1005 case 0: 1006 error = pollout(pl, fds, nfds); 1007 break; 1008 } 1009 #ifdef KTRACE 1010 if (KTRPOINT(p, KTR_STRUCT)) 1011 ktrpollfd(p, pl, nfds); 1012 #endif /* KTRACE */ 1013 bad: 1014 if (pl != pfds) 1015 free(pl, M_TEMP, sz); 1016 return (error); 1017 } 1018 1019 /* 1020 * utrace system call 1021 */ 1022 int 1023 sys_utrace(struct proc *curp, void *v, register_t *retval) 1024 { 1025 #ifdef KTRACE 1026 struct sys_utrace_args /* { 1027 syscallarg(const char *) label; 1028 syscallarg(const void *) addr; 1029 syscallarg(size_t) len; 1030 } */ *uap = v; 1031 return (ktruser(curp, SCARG(uap, label), SCARG(uap, addr), 1032 SCARG(uap, len))); 1033 #else 1034 return (0); 1035 #endif 1036 } 1037