1 /* $OpenBSD: sys_generic.c,v 1.124 2019/06/21 09:39:48 visa Exp $ */ 2 /* $NetBSD: sys_generic.c,v 1.24 1996/03/29 00:25:32 cgd Exp $ */ 3 4 /* 5 * Copyright (c) 1996 Theo de Raadt 6 * Copyright (c) 1982, 1986, 1989, 1993 7 * The Regents of the University of California. All rights reserved. 8 * (c) UNIX System Laboratories, Inc. 9 * All or some portions of this file are derived from material licensed 10 * to the University of California by American Telephone and Telegraph 11 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 12 * the permission of UNIX System Laboratories, Inc. 13 * 14 * Redistribution and use in source and binary forms, with or without 15 * modification, are permitted provided that the following conditions 16 * are met: 17 * 1. Redistributions of source code must retain the above copyright 18 * notice, this list of conditions and the following disclaimer. 19 * 2. Redistributions in binary form must reproduce the above copyright 20 * notice, this list of conditions and the following disclaimer in the 21 * documentation and/or other materials provided with the distribution. 22 * 3. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)sys_generic.c 8.5 (Berkeley) 1/21/94 39 */ 40 41 #include <sys/param.h> 42 #include <sys/systm.h> 43 #include <sys/filedesc.h> 44 #include <sys/ioctl.h> 45 #include <sys/fcntl.h> 46 #include <sys/vnode.h> 47 #include <sys/file.h> 48 #include <sys/proc.h> 49 #include <sys/resourcevar.h> 50 #include <sys/socketvar.h> 51 #include <sys/signalvar.h> 52 #include <sys/uio.h> 53 #include <sys/kernel.h> 54 #include <sys/stat.h> 55 #include <sys/time.h> 56 #include <sys/malloc.h> 57 #include <sys/poll.h> 58 #ifdef KTRACE 59 #include <sys/ktrace.h> 60 #endif 61 #include <sys/sched.h> 62 #include <sys/pledge.h> 63 64 #include <sys/mount.h> 65 #include <sys/syscallargs.h> 66 67 #include <uvm/uvm_extern.h> 68 69 int selscan(struct proc *, fd_set *, fd_set *, int, int, register_t *); 70 void pollscan(struct proc *, struct pollfd *, u_int, register_t *); 71 int pollout(struct pollfd *, struct pollfd *, u_int); 72 int dopselect(struct proc *, int, fd_set *, fd_set *, fd_set *, 73 struct timespec *, const sigset_t *, register_t *); 74 int doppoll(struct proc *, struct pollfd *, u_int, struct timespec *, 75 const sigset_t *, register_t *); 76 77 int 78 iovec_copyin(const struct iovec *uiov, struct iovec **iovp, struct iovec *aiov, 79 unsigned int iovcnt, size_t *residp) 80 { 81 #ifdef KTRACE 82 struct proc *p = curproc; 83 #endif 84 struct iovec *iov; 85 int error, i; 86 size_t resid = 0; 87 88 if (iovcnt > UIO_SMALLIOV) { 89 if (iovcnt > IOV_MAX) 90 return (EINVAL); 91 iov = mallocarray(iovcnt, sizeof(*iov), M_IOV, M_WAITOK); 92 } else if (iovcnt > 0) { 93 iov = aiov; 94 } else { 95 return (EINVAL); 96 } 97 *iovp = iov; 98 99 if ((error = copyin(uiov, iov, iovcnt * sizeof(*iov)))) 100 return (error); 101 102 #ifdef KTRACE 103 if (KTRPOINT(p, KTR_STRUCT)) 104 ktriovec(p, iov, iovcnt); 105 #endif 106 107 for (i = 0; i < iovcnt; i++) { 108 resid += iov->iov_len; 109 /* 110 * Writes return ssize_t because -1 is returned on error. 111 * Therefore we must restrict the length to SSIZE_MAX to 112 * avoid garbage return values. Note that the addition is 113 * guaranteed to not wrap because SSIZE_MAX * 2 < SIZE_MAX. 114 */ 115 if (iov->iov_len > SSIZE_MAX || resid > SSIZE_MAX) 116 return (EINVAL); 117 iov++; 118 } 119 120 if (residp != NULL) 121 *residp = resid; 122 123 return (0); 124 } 125 126 void 127 iovec_free(struct iovec *iov, unsigned int iovcnt) 128 { 129 if (iovcnt > UIO_SMALLIOV) 130 free(iov, M_IOV, iovcnt * sizeof(*iov)); 131 } 132 133 /* 134 * Read system call. 135 */ 136 int 137 sys_read(struct proc *p, void *v, register_t *retval) 138 { 139 struct sys_read_args /* { 140 syscallarg(int) fd; 141 syscallarg(void *) buf; 142 syscallarg(size_t) nbyte; 143 } */ *uap = v; 144 struct iovec iov; 145 struct uio auio; 146 147 iov.iov_base = SCARG(uap, buf); 148 iov.iov_len = SCARG(uap, nbyte); 149 if (iov.iov_len > SSIZE_MAX) 150 return (EINVAL); 151 152 auio.uio_iov = &iov; 153 auio.uio_iovcnt = 1; 154 auio.uio_resid = iov.iov_len; 155 156 return (dofilereadv(p, SCARG(uap, fd), &auio, 0, retval)); 157 } 158 159 /* 160 * Scatter read system call. 161 */ 162 int 163 sys_readv(struct proc *p, void *v, register_t *retval) 164 { 165 struct sys_readv_args /* { 166 syscallarg(int) fd; 167 syscallarg(const struct iovec *) iovp; 168 syscallarg(int) iovcnt; 169 } */ *uap = v; 170 struct iovec aiov[UIO_SMALLIOV], *iov = NULL; 171 int error, iovcnt = SCARG(uap, iovcnt); 172 struct uio auio; 173 size_t resid; 174 175 error = iovec_copyin(SCARG(uap, iovp), &iov, aiov, iovcnt, &resid); 176 if (error) 177 goto done; 178 179 auio.uio_iov = iov; 180 auio.uio_iovcnt = iovcnt; 181 auio.uio_resid = resid; 182 183 error = dofilereadv(p, SCARG(uap, fd), &auio, 0, retval); 184 done: 185 iovec_free(iov, iovcnt); 186 return (error); 187 } 188 189 int 190 dofilereadv(struct proc *p, int fd, struct uio *uio, int flags, 191 register_t *retval) 192 { 193 struct filedesc *fdp = p->p_fd; 194 struct file *fp; 195 long cnt, error = 0; 196 u_int iovlen; 197 #ifdef KTRACE 198 struct iovec *ktriov = NULL; 199 #endif 200 201 KASSERT(uio->uio_iov != NULL && uio->uio_iovcnt > 0); 202 iovlen = uio->uio_iovcnt * sizeof(struct iovec); 203 204 if ((fp = fd_getfile_mode(fdp, fd, FREAD)) == NULL) 205 return (EBADF); 206 207 /* Checks for positioned read. */ 208 if (flags & FO_POSITION) { 209 struct vnode *vp = fp->f_data; 210 211 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO || 212 (vp->v_flag & VISTTY)) { 213 error = ESPIPE; 214 goto done; 215 } 216 217 if (uio->uio_offset < 0 && vp->v_type != VCHR) { 218 error = EINVAL; 219 goto done; 220 } 221 } 222 223 uio->uio_rw = UIO_READ; 224 uio->uio_segflg = UIO_USERSPACE; 225 uio->uio_procp = p; 226 #ifdef KTRACE 227 /* 228 * if tracing, save a copy of iovec 229 */ 230 if (KTRPOINT(p, KTR_GENIO)) { 231 ktriov = malloc(iovlen, M_TEMP, M_WAITOK); 232 memcpy(ktriov, uio->uio_iov, iovlen); 233 } 234 #endif 235 cnt = uio->uio_resid; 236 error = (*fp->f_ops->fo_read)(fp, uio, flags); 237 if (error) { 238 if (uio->uio_resid != cnt && (error == ERESTART || 239 error == EINTR || error == EWOULDBLOCK)) 240 error = 0; 241 } 242 cnt -= uio->uio_resid; 243 244 mtx_enter(&fp->f_mtx); 245 fp->f_rxfer++; 246 fp->f_rbytes += cnt; 247 mtx_leave(&fp->f_mtx); 248 #ifdef KTRACE 249 if (ktriov != NULL) { 250 if (error == 0) 251 ktrgenio(p, fd, UIO_READ, ktriov, cnt); 252 free(ktriov, M_TEMP, iovlen); 253 } 254 #endif 255 *retval = cnt; 256 done: 257 FRELE(fp, p); 258 return (error); 259 } 260 261 /* 262 * Write system call 263 */ 264 int 265 sys_write(struct proc *p, void *v, register_t *retval) 266 { 267 struct sys_write_args /* { 268 syscallarg(int) fd; 269 syscallarg(const void *) buf; 270 syscallarg(size_t) nbyte; 271 } */ *uap = v; 272 struct iovec iov; 273 struct uio auio; 274 275 iov.iov_base = (void *)SCARG(uap, buf); 276 iov.iov_len = SCARG(uap, nbyte); 277 if (iov.iov_len > SSIZE_MAX) 278 return (EINVAL); 279 280 auio.uio_iov = &iov; 281 auio.uio_iovcnt = 1; 282 auio.uio_resid = iov.iov_len; 283 284 return (dofilewritev(p, SCARG(uap, fd), &auio, 0, retval)); 285 } 286 287 /* 288 * Gather write system call 289 */ 290 int 291 sys_writev(struct proc *p, void *v, register_t *retval) 292 { 293 struct sys_writev_args /* { 294 syscallarg(int) fd; 295 syscallarg(const struct iovec *) iovp; 296 syscallarg(int) iovcnt; 297 } */ *uap = v; 298 struct iovec aiov[UIO_SMALLIOV], *iov = NULL; 299 int error, iovcnt = SCARG(uap, iovcnt); 300 struct uio auio; 301 size_t resid; 302 303 error = iovec_copyin(SCARG(uap, iovp), &iov, aiov, iovcnt, &resid); 304 if (error) 305 goto done; 306 307 auio.uio_iov = iov; 308 auio.uio_iovcnt = iovcnt; 309 auio.uio_resid = resid; 310 311 error = dofilewritev(p, SCARG(uap, fd), &auio, 0, retval); 312 done: 313 iovec_free(iov, iovcnt); 314 return (error); 315 } 316 317 int 318 dofilewritev(struct proc *p, int fd, struct uio *uio, int flags, 319 register_t *retval) 320 { 321 struct filedesc *fdp = p->p_fd; 322 struct file *fp; 323 long cnt, error = 0; 324 u_int iovlen; 325 #ifdef KTRACE 326 struct iovec *ktriov = NULL; 327 #endif 328 329 KASSERT(uio->uio_iov != NULL && uio->uio_iovcnt > 0); 330 iovlen = uio->uio_iovcnt * sizeof(struct iovec); 331 332 if ((fp = fd_getfile_mode(fdp, fd, FWRITE)) == NULL) 333 return (EBADF); 334 335 /* Checks for positioned write. */ 336 if (flags & FO_POSITION) { 337 struct vnode *vp = fp->f_data; 338 339 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO || 340 (vp->v_flag & VISTTY)) { 341 error = ESPIPE; 342 goto done; 343 } 344 345 if (uio->uio_offset < 0 && vp->v_type != VCHR) { 346 error = EINVAL; 347 goto done; 348 } 349 } 350 351 uio->uio_rw = UIO_WRITE; 352 uio->uio_segflg = UIO_USERSPACE; 353 uio->uio_procp = p; 354 #ifdef KTRACE 355 /* 356 * if tracing, save a copy of iovec 357 */ 358 if (KTRPOINT(p, KTR_GENIO)) { 359 ktriov = malloc(iovlen, M_TEMP, M_WAITOK); 360 memcpy(ktriov, uio->uio_iov, iovlen); 361 } 362 #endif 363 cnt = uio->uio_resid; 364 error = (*fp->f_ops->fo_write)(fp, uio, flags); 365 if (error) { 366 if (uio->uio_resid != cnt && (error == ERESTART || 367 error == EINTR || error == EWOULDBLOCK)) 368 error = 0; 369 if (error == EPIPE) 370 ptsignal(p, SIGPIPE, STHREAD); 371 } 372 cnt -= uio->uio_resid; 373 374 mtx_enter(&fp->f_mtx); 375 fp->f_wxfer++; 376 fp->f_wbytes += cnt; 377 mtx_leave(&fp->f_mtx); 378 #ifdef KTRACE 379 if (ktriov != NULL) { 380 if (error == 0) 381 ktrgenio(p, fd, UIO_WRITE, ktriov, cnt); 382 free(ktriov, M_TEMP, iovlen); 383 } 384 #endif 385 *retval = cnt; 386 done: 387 FRELE(fp, p); 388 return (error); 389 } 390 391 /* 392 * Ioctl system call 393 */ 394 int 395 sys_ioctl(struct proc *p, void *v, register_t *retval) 396 { 397 struct sys_ioctl_args /* { 398 syscallarg(int) fd; 399 syscallarg(u_long) com; 400 syscallarg(void *) data; 401 } */ *uap = v; 402 struct file *fp; 403 struct filedesc *fdp = p->p_fd; 404 u_long com = SCARG(uap, com); 405 int error = 0; 406 u_int size = 0; 407 caddr_t data, memp = NULL; 408 int tmp; 409 #define STK_PARAMS 128 410 long long stkbuf[STK_PARAMS / sizeof(long long)]; 411 412 if ((fp = fd_getfile_mode(fdp, SCARG(uap, fd), FREAD|FWRITE)) == NULL) 413 return (EBADF); 414 415 if (fp->f_type == DTYPE_SOCKET) { 416 struct socket *so = fp->f_data; 417 418 if (so->so_state & SS_DNS) { 419 error = EINVAL; 420 goto out; 421 } 422 } 423 424 error = pledge_ioctl(p, com, fp); 425 if (error) 426 goto out; 427 428 switch (com) { 429 case FIONCLEX: 430 case FIOCLEX: 431 fdplock(fdp); 432 if (com == FIONCLEX) 433 fdp->fd_ofileflags[SCARG(uap, fd)] &= ~UF_EXCLOSE; 434 else 435 fdp->fd_ofileflags[SCARG(uap, fd)] |= UF_EXCLOSE; 436 fdpunlock(fdp); 437 goto out; 438 } 439 440 /* 441 * Interpret high order word to find amount of data to be 442 * copied to/from the user's address space. 443 */ 444 size = IOCPARM_LEN(com); 445 if (size > IOCPARM_MAX) { 446 error = ENOTTY; 447 goto out; 448 } 449 if (size > sizeof (stkbuf)) { 450 memp = malloc(size, M_IOCTLOPS, M_WAITOK); 451 data = memp; 452 } else 453 data = (caddr_t)stkbuf; 454 if (com&IOC_IN) { 455 if (size) { 456 error = copyin(SCARG(uap, data), data, size); 457 if (error) { 458 goto out; 459 } 460 } else 461 *(caddr_t *)data = SCARG(uap, data); 462 } else if ((com&IOC_OUT) && size) 463 /* 464 * Zero the buffer so the user always 465 * gets back something deterministic. 466 */ 467 memset(data, 0, size); 468 else if (com&IOC_VOID) 469 *(caddr_t *)data = SCARG(uap, data); 470 471 switch (com) { 472 473 case FIONBIO: 474 if ((tmp = *(int *)data) != 0) 475 fp->f_flag |= FNONBLOCK; 476 else 477 fp->f_flag &= ~FNONBLOCK; 478 error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&tmp, p); 479 break; 480 481 case FIOASYNC: 482 if ((tmp = *(int *)data) != 0) 483 fp->f_flag |= FASYNC; 484 else 485 fp->f_flag &= ~FASYNC; 486 error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, (caddr_t)&tmp, p); 487 break; 488 489 case FIOSETOWN: 490 tmp = *(int *)data; 491 492 if (fp->f_type == DTYPE_SOCKET || fp->f_type == DTYPE_PIPE) { 493 /* nothing */ 494 } else if (tmp <= 0) { 495 tmp = -tmp; 496 } else { 497 struct process *pr = prfind(tmp); 498 if (pr == NULL) { 499 error = ESRCH; 500 break; 501 } 502 tmp = pr->ps_pgrp->pg_id; 503 } 504 error = (*fp->f_ops->fo_ioctl) 505 (fp, TIOCSPGRP, (caddr_t)&tmp, p); 506 break; 507 508 case FIOGETOWN: 509 error = (*fp->f_ops->fo_ioctl)(fp, TIOCGPGRP, data, p); 510 *(int *)data = -*(int *)data; 511 break; 512 513 default: 514 error = (*fp->f_ops->fo_ioctl)(fp, com, data, p); 515 break; 516 } 517 /* 518 * Copy any data to user, size was 519 * already set and checked above. 520 */ 521 if (error == 0 && (com&IOC_OUT) && size) 522 error = copyout(data, SCARG(uap, data), size); 523 out: 524 FRELE(fp, p); 525 free(memp, M_IOCTLOPS, size); 526 return (error); 527 } 528 529 int selwait, nselcoll; 530 531 /* 532 * Select system call. 533 */ 534 int 535 sys_select(struct proc *p, void *v, register_t *retval) 536 { 537 struct sys_select_args /* { 538 syscallarg(int) nd; 539 syscallarg(fd_set *) in; 540 syscallarg(fd_set *) ou; 541 syscallarg(fd_set *) ex; 542 syscallarg(struct timeval *) tv; 543 } */ *uap = v; 544 545 struct timespec ts, *tsp = NULL; 546 int error; 547 548 if (SCARG(uap, tv) != NULL) { 549 struct timeval tv; 550 if ((error = copyin(SCARG(uap, tv), &tv, sizeof tv)) != 0) 551 return (error); 552 if (tv.tv_sec < 0 || !timerisvalid(&tv)) 553 return (EINVAL); 554 #ifdef KTRACE 555 if (KTRPOINT(p, KTR_STRUCT)) 556 ktrreltimeval(p, &tv); 557 #endif 558 TIMEVAL_TO_TIMESPEC(&tv, &ts); 559 tsp = &ts; 560 } 561 562 return (dopselect(p, SCARG(uap, nd), SCARG(uap, in), SCARG(uap, ou), 563 SCARG(uap, ex), tsp, NULL, retval)); 564 } 565 566 int 567 sys_pselect(struct proc *p, void *v, register_t *retval) 568 { 569 struct sys_pselect_args /* { 570 syscallarg(int) nd; 571 syscallarg(fd_set *) in; 572 syscallarg(fd_set *) ou; 573 syscallarg(fd_set *) ex; 574 syscallarg(const struct timespec *) ts; 575 syscallarg(const sigset_t *) mask; 576 } */ *uap = v; 577 578 struct timespec ts, *tsp = NULL; 579 sigset_t ss, *ssp = NULL; 580 int error; 581 582 if (SCARG(uap, ts) != NULL) { 583 if ((error = copyin(SCARG(uap, ts), &ts, sizeof ts)) != 0) 584 return (error); 585 if (ts.tv_sec < 0 || !timespecisvalid(&ts)) 586 return (EINVAL); 587 #ifdef KTRACE 588 if (KTRPOINT(p, KTR_STRUCT)) 589 ktrreltimespec(p, &ts); 590 #endif 591 tsp = &ts; 592 } 593 if (SCARG(uap, mask) != NULL) { 594 if ((error = copyin(SCARG(uap, mask), &ss, sizeof ss)) != 0) 595 return (error); 596 ssp = &ss; 597 } 598 599 return (dopselect(p, SCARG(uap, nd), SCARG(uap, in), SCARG(uap, ou), 600 SCARG(uap, ex), tsp, ssp, retval)); 601 } 602 603 int 604 dopselect(struct proc *p, int nd, fd_set *in, fd_set *ou, fd_set *ex, 605 struct timespec *timeout, const sigset_t *sigmask, register_t *retval) 606 { 607 fd_mask bits[6]; 608 fd_set *pibits[3], *pobits[3]; 609 struct timespec elapsed, start, stop; 610 int s, ncoll, error = 0, timo; 611 u_int ni; 612 613 if (nd < 0) 614 return (EINVAL); 615 if (nd > p->p_fd->fd_nfiles) { 616 /* forgiving; slightly wrong */ 617 nd = p->p_fd->fd_nfiles; 618 } 619 ni = howmany(nd, NFDBITS) * sizeof(fd_mask); 620 if (ni > sizeof(bits[0])) { 621 caddr_t mbits; 622 623 mbits = mallocarray(6, ni, M_TEMP, M_WAITOK|M_ZERO); 624 pibits[0] = (fd_set *)&mbits[ni * 0]; 625 pibits[1] = (fd_set *)&mbits[ni * 1]; 626 pibits[2] = (fd_set *)&mbits[ni * 2]; 627 pobits[0] = (fd_set *)&mbits[ni * 3]; 628 pobits[1] = (fd_set *)&mbits[ni * 4]; 629 pobits[2] = (fd_set *)&mbits[ni * 5]; 630 } else { 631 memset(bits, 0, sizeof(bits)); 632 pibits[0] = (fd_set *)&bits[0]; 633 pibits[1] = (fd_set *)&bits[1]; 634 pibits[2] = (fd_set *)&bits[2]; 635 pobits[0] = (fd_set *)&bits[3]; 636 pobits[1] = (fd_set *)&bits[4]; 637 pobits[2] = (fd_set *)&bits[5]; 638 } 639 640 #define getbits(name, x) \ 641 if (name && (error = copyin(name, pibits[x], ni))) \ 642 goto done; 643 getbits(in, 0); 644 getbits(ou, 1); 645 getbits(ex, 2); 646 #undef getbits 647 #ifdef KTRACE 648 if (ni > 0 && KTRPOINT(p, KTR_STRUCT)) { 649 if (in) ktrfdset(p, pibits[0], ni); 650 if (ou) ktrfdset(p, pibits[1], ni); 651 if (ex) ktrfdset(p, pibits[2], ni); 652 } 653 #endif 654 655 if (sigmask) 656 dosigsuspend(p, *sigmask &~ sigcantmask); 657 658 retry: 659 ncoll = nselcoll; 660 atomic_setbits_int(&p->p_flag, P_SELECT); 661 error = selscan(p, pibits[0], pobits[0], nd, ni, retval); 662 if (error || *retval) 663 goto done; 664 while (timeout == NULL || timespecisset(timeout)) { 665 timo = (timeout == NULL) ? 0 : tstohz(timeout); 666 if (timeout != NULL) 667 getnanouptime(&start); 668 s = splhigh(); 669 if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) { 670 splx(s); 671 goto retry; 672 } 673 atomic_clearbits_int(&p->p_flag, P_SELECT); 674 error = tsleep(&selwait, PSOCK | PCATCH, "select", timo); 675 splx(s); 676 if (timeout != NULL) { 677 getnanouptime(&stop); 678 timespecsub(&stop, &start, &elapsed); 679 timespecsub(timeout, &elapsed, timeout); 680 if (timeout->tv_sec < 0) 681 timespecclear(timeout); 682 } 683 if (error == 0) 684 goto retry; 685 if (error != EWOULDBLOCK) 686 break; 687 } 688 done: 689 atomic_clearbits_int(&p->p_flag, P_SELECT); 690 /* select is not restarted after signals... */ 691 if (error == ERESTART) 692 error = EINTR; 693 if (error == EWOULDBLOCK) 694 error = 0; 695 #define putbits(name, x) \ 696 if (name && (error2 = copyout(pobits[x], name, ni))) \ 697 error = error2; 698 if (error == 0) { 699 int error2; 700 701 putbits(in, 0); 702 putbits(ou, 1); 703 putbits(ex, 2); 704 #undef putbits 705 #ifdef KTRACE 706 if (ni > 0 && KTRPOINT(p, KTR_STRUCT)) { 707 if (in) ktrfdset(p, pobits[0], ni); 708 if (ou) ktrfdset(p, pobits[1], ni); 709 if (ex) ktrfdset(p, pobits[2], ni); 710 } 711 #endif 712 } 713 714 if (pibits[0] != (fd_set *)&bits[0]) 715 free(pibits[0], M_TEMP, 6 * ni); 716 return (error); 717 } 718 719 int 720 selscan(struct proc *p, fd_set *ibits, fd_set *obits, int nfd, int ni, 721 register_t *retval) 722 { 723 caddr_t cibits = (caddr_t)ibits, cobits = (caddr_t)obits; 724 struct filedesc *fdp = p->p_fd; 725 int msk, i, j, fd; 726 fd_mask bits; 727 struct file *fp; 728 int n = 0; 729 static const int flag[3] = { POLLIN, POLLOUT|POLL_NOHUP, POLLPRI }; 730 731 for (msk = 0; msk < 3; msk++) { 732 fd_set *pibits = (fd_set *)&cibits[msk*ni]; 733 fd_set *pobits = (fd_set *)&cobits[msk*ni]; 734 735 for (i = 0; i < nfd; i += NFDBITS) { 736 bits = pibits->fds_bits[i/NFDBITS]; 737 while ((j = ffs(bits)) && (fd = i + --j) < nfd) { 738 bits &= ~(1 << j); 739 if ((fp = fd_getfile(fdp, fd)) == NULL) 740 return (EBADF); 741 if ((*fp->f_ops->fo_poll)(fp, flag[msk], p)) { 742 FD_SET(fd, pobits); 743 n++; 744 } 745 FRELE(fp, p); 746 } 747 } 748 } 749 *retval = n; 750 return (0); 751 } 752 753 int 754 seltrue(dev_t dev, int events, struct proc *p) 755 { 756 757 return (events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM)); 758 } 759 760 int 761 selfalse(dev_t dev, int events, struct proc *p) 762 { 763 764 return (0); 765 } 766 767 /* 768 * Record a select request. 769 */ 770 void 771 selrecord(struct proc *selector, struct selinfo *sip) 772 { 773 struct proc *p; 774 pid_t mytid; 775 776 mytid = selector->p_tid; 777 if (sip->si_seltid == mytid) 778 return; 779 if (sip->si_seltid && (p = tfind(sip->si_seltid)) && 780 p->p_wchan == (caddr_t)&selwait) 781 sip->si_flags |= SI_COLL; 782 else 783 sip->si_seltid = mytid; 784 } 785 786 /* 787 * Do a wakeup when a selectable event occurs. 788 */ 789 void 790 selwakeup(struct selinfo *sip) 791 { 792 struct proc *p; 793 int s; 794 795 KNOTE(&sip->si_note, NOTE_SUBMIT); 796 if (sip->si_seltid == 0) 797 return; 798 if (sip->si_flags & SI_COLL) { 799 nselcoll++; 800 sip->si_flags &= ~SI_COLL; 801 wakeup(&selwait); 802 } 803 p = tfind(sip->si_seltid); 804 sip->si_seltid = 0; 805 if (p != NULL) { 806 SCHED_LOCK(s); 807 if (p->p_wchan == (caddr_t)&selwait) { 808 if (p->p_stat == SSLEEP) 809 setrunnable(p); 810 else 811 unsleep(p); 812 } else if (p->p_flag & P_SELECT) 813 atomic_clearbits_int(&p->p_flag, P_SELECT); 814 SCHED_UNLOCK(s); 815 } 816 } 817 818 void 819 pollscan(struct proc *p, struct pollfd *pl, u_int nfd, register_t *retval) 820 { 821 struct filedesc *fdp = p->p_fd; 822 struct file *fp; 823 u_int i; 824 int n = 0; 825 826 for (i = 0; i < nfd; i++, pl++) { 827 /* Check the file descriptor. */ 828 if (pl->fd < 0) { 829 pl->revents = 0; 830 continue; 831 } 832 if ((fp = fd_getfile(fdp, pl->fd)) == NULL) { 833 pl->revents = POLLNVAL; 834 n++; 835 continue; 836 } 837 pl->revents = (*fp->f_ops->fo_poll)(fp, pl->events, p); 838 FRELE(fp, p); 839 if (pl->revents != 0) 840 n++; 841 } 842 *retval = n; 843 } 844 845 /* 846 * Only copyout the revents field. 847 */ 848 int 849 pollout(struct pollfd *pl, struct pollfd *upl, u_int nfds) 850 { 851 int error = 0; 852 u_int i = 0; 853 854 while (!error && i++ < nfds) { 855 error = copyout(&pl->revents, &upl->revents, 856 sizeof(upl->revents)); 857 pl++; 858 upl++; 859 } 860 861 return (error); 862 } 863 864 /* 865 * We are using the same mechanism as select only we encode/decode args 866 * differently. 867 */ 868 int 869 sys_poll(struct proc *p, void *v, register_t *retval) 870 { 871 struct sys_poll_args /* { 872 syscallarg(struct pollfd *) fds; 873 syscallarg(u_int) nfds; 874 syscallarg(int) timeout; 875 } */ *uap = v; 876 877 struct timespec ts, *tsp = NULL; 878 int msec = SCARG(uap, timeout); 879 880 if (msec != INFTIM) { 881 if (msec < 0) 882 return (EINVAL); 883 ts.tv_sec = msec / 1000; 884 ts.tv_nsec = (msec - (ts.tv_sec * 1000)) * 1000000; 885 tsp = &ts; 886 } 887 888 return (doppoll(p, SCARG(uap, fds), SCARG(uap, nfds), tsp, NULL, 889 retval)); 890 } 891 892 int 893 sys_ppoll(struct proc *p, void *v, register_t *retval) 894 { 895 struct sys_ppoll_args /* { 896 syscallarg(struct pollfd *) fds; 897 syscallarg(u_int) nfds; 898 syscallarg(const struct timespec *) ts; 899 syscallarg(const sigset_t *) mask; 900 } */ *uap = v; 901 902 int error; 903 struct timespec ts, *tsp = NULL; 904 sigset_t ss, *ssp = NULL; 905 906 if (SCARG(uap, ts) != NULL) { 907 if ((error = copyin(SCARG(uap, ts), &ts, sizeof ts)) != 0) 908 return (error); 909 if (ts.tv_sec < 0 || !timespecisvalid(&ts)) 910 return (EINVAL); 911 #ifdef KTRACE 912 if (KTRPOINT(p, KTR_STRUCT)) 913 ktrreltimespec(p, &ts); 914 #endif 915 tsp = &ts; 916 } 917 918 if (SCARG(uap, mask) != NULL) { 919 if ((error = copyin(SCARG(uap, mask), &ss, sizeof ss)) != 0) 920 return (error); 921 ssp = &ss; 922 } 923 924 return (doppoll(p, SCARG(uap, fds), SCARG(uap, nfds), tsp, ssp, 925 retval)); 926 } 927 928 int 929 doppoll(struct proc *p, struct pollfd *fds, u_int nfds, 930 struct timespec *timeout, const sigset_t *sigmask, register_t *retval) 931 { 932 size_t sz; 933 struct pollfd pfds[4], *pl = pfds; 934 struct timespec elapsed, start, stop; 935 int timo, ncoll, i, s, error; 936 937 /* Standards say no more than MAX_OPEN; this is possibly better. */ 938 if (nfds > min((int)lim_cur(RLIMIT_NOFILE), maxfiles)) 939 return (EINVAL); 940 941 /* optimize for the default case, of a small nfds value */ 942 if (nfds > nitems(pfds)) { 943 pl = mallocarray(nfds, sizeof(*pl), M_TEMP, 944 M_WAITOK | M_CANFAIL); 945 if (pl == NULL) 946 return (EINVAL); 947 } 948 949 sz = nfds * sizeof(*pl); 950 951 if ((error = copyin(fds, pl, sz)) != 0) 952 goto bad; 953 954 for (i = 0; i < nfds; i++) { 955 pl[i].events &= ~POLL_NOHUP; 956 pl[i].revents = 0; 957 } 958 959 if (sigmask) 960 dosigsuspend(p, *sigmask &~ sigcantmask); 961 962 retry: 963 ncoll = nselcoll; 964 atomic_setbits_int(&p->p_flag, P_SELECT); 965 pollscan(p, pl, nfds, retval); 966 if (*retval) 967 goto done; 968 while (timeout == NULL || timespecisset(timeout)) { 969 timo = (timeout == NULL) ? 0 : tstohz(timeout); 970 if (timeout != NULL) 971 getnanouptime(&start); 972 s = splhigh(); 973 if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) { 974 splx(s); 975 goto retry; 976 } 977 atomic_clearbits_int(&p->p_flag, P_SELECT); 978 error = tsleep(&selwait, PSOCK | PCATCH, "poll", timo); 979 splx(s); 980 if (timeout != NULL) { 981 getnanouptime(&stop); 982 timespecsub(&stop, &start, &elapsed); 983 timespecsub(timeout, &elapsed, timeout); 984 if (timeout->tv_sec < 0) 985 timespecclear(timeout); 986 } 987 if (error == 0) 988 goto retry; 989 if (error != EWOULDBLOCK) 990 break; 991 } 992 993 done: 994 atomic_clearbits_int(&p->p_flag, P_SELECT); 995 /* 996 * NOTE: poll(2) is not restarted after a signal and EWOULDBLOCK is 997 * ignored (since the whole point is to see what would block). 998 */ 999 switch (error) { 1000 case ERESTART: 1001 error = pollout(pl, fds, nfds); 1002 if (error == 0) 1003 error = EINTR; 1004 break; 1005 case EWOULDBLOCK: 1006 case 0: 1007 error = pollout(pl, fds, nfds); 1008 break; 1009 } 1010 #ifdef KTRACE 1011 if (KTRPOINT(p, KTR_STRUCT)) 1012 ktrpollfd(p, pl, nfds); 1013 #endif /* KTRACE */ 1014 bad: 1015 if (pl != pfds) 1016 free(pl, M_TEMP, sz); 1017 return (error); 1018 } 1019 1020 /* 1021 * utrace system call 1022 */ 1023 int 1024 sys_utrace(struct proc *curp, void *v, register_t *retval) 1025 { 1026 #ifdef KTRACE 1027 struct sys_utrace_args /* { 1028 syscallarg(const char *) label; 1029 syscallarg(const void *) addr; 1030 syscallarg(size_t) len; 1031 } */ *uap = v; 1032 return (ktruser(curp, SCARG(uap, label), SCARG(uap, addr), 1033 SCARG(uap, len))); 1034 #else 1035 return (0); 1036 #endif 1037 } 1038