1 /* $OpenBSD: sys_generic.c,v 1.122 2018/08/20 16:00:22 mpi Exp $ */ 2 /* $NetBSD: sys_generic.c,v 1.24 1996/03/29 00:25:32 cgd Exp $ */ 3 4 /* 5 * Copyright (c) 1996 Theo de Raadt 6 * Copyright (c) 1982, 1986, 1989, 1993 7 * The Regents of the University of California. All rights reserved. 8 * (c) UNIX System Laboratories, Inc. 9 * All or some portions of this file are derived from material licensed 10 * to the University of California by American Telephone and Telegraph 11 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 12 * the permission of UNIX System Laboratories, Inc. 13 * 14 * Redistribution and use in source and binary forms, with or without 15 * modification, are permitted provided that the following conditions 16 * are met: 17 * 1. Redistributions of source code must retain the above copyright 18 * notice, this list of conditions and the following disclaimer. 19 * 2. Redistributions in binary form must reproduce the above copyright 20 * notice, this list of conditions and the following disclaimer in the 21 * documentation and/or other materials provided with the distribution. 22 * 3. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)sys_generic.c 8.5 (Berkeley) 1/21/94 39 */ 40 41 #include <sys/param.h> 42 #include <sys/systm.h> 43 #include <sys/filedesc.h> 44 #include <sys/ioctl.h> 45 #include <sys/fcntl.h> 46 #include <sys/vnode.h> 47 #include <sys/file.h> 48 #include <sys/proc.h> 49 #include <sys/resourcevar.h> 50 #include <sys/socketvar.h> 51 #include <sys/signalvar.h> 52 #include <sys/uio.h> 53 #include <sys/kernel.h> 54 #include <sys/stat.h> 55 #include <sys/malloc.h> 56 #include <sys/poll.h> 57 #ifdef KTRACE 58 #include <sys/ktrace.h> 59 #endif 60 #include <sys/sched.h> 61 #include <sys/pledge.h> 62 63 #include <sys/mount.h> 64 #include <sys/syscallargs.h> 65 66 #include <uvm/uvm_extern.h> 67 68 int selscan(struct proc *, fd_set *, fd_set *, int, int, register_t *); 69 void pollscan(struct proc *, struct pollfd *, u_int, register_t *); 70 int pollout(struct pollfd *, struct pollfd *, u_int); 71 int dopselect(struct proc *, int, fd_set *, fd_set *, fd_set *, 72 const struct timespec *, const sigset_t *, register_t *); 73 int doppoll(struct proc *, struct pollfd *, u_int, const struct timespec *, 74 const sigset_t *, register_t *); 75 76 int 77 iovec_copyin(const struct iovec *uiov, struct iovec **iovp, struct iovec *aiov, 78 unsigned int iovcnt, size_t *residp) 79 { 80 #ifdef KTRACE 81 struct proc *p = curproc; 82 #endif 83 struct iovec *iov; 84 int error, i; 85 size_t resid = 0; 86 87 if (iovcnt > UIO_SMALLIOV) { 88 if (iovcnt > IOV_MAX) 89 return (EINVAL); 90 iov = mallocarray(iovcnt, sizeof(*iov), M_IOV, M_WAITOK); 91 } else if (iovcnt > 0) { 92 iov = aiov; 93 } else { 94 return (EINVAL); 95 } 96 *iovp = iov; 97 98 if ((error = copyin(uiov, iov, iovcnt * sizeof(*iov)))) 99 return (error); 100 101 #ifdef KTRACE 102 if (KTRPOINT(p, KTR_STRUCT)) 103 ktriovec(p, iov, iovcnt); 104 #endif 105 106 for (i = 0; i < iovcnt; i++) { 107 resid += iov->iov_len; 108 /* 109 * Writes return ssize_t because -1 is returned on error. 110 * Therefore we must restrict the length to SSIZE_MAX to 111 * avoid garbage return values. Note that the addition is 112 * guaranteed to not wrap because SSIZE_MAX * 2 < SIZE_MAX. 113 */ 114 if (iov->iov_len > SSIZE_MAX || resid > SSIZE_MAX) 115 return (EINVAL); 116 iov++; 117 } 118 119 if (residp != NULL) 120 *residp = resid; 121 122 return (0); 123 } 124 125 void 126 iovec_free(struct iovec *iov, unsigned int iovcnt) 127 { 128 if (iovcnt > UIO_SMALLIOV) 129 free(iov, M_IOV, iovcnt * sizeof(*iov)); 130 } 131 132 /* 133 * Read system call. 134 */ 135 int 136 sys_read(struct proc *p, void *v, register_t *retval) 137 { 138 struct sys_read_args /* { 139 syscallarg(int) fd; 140 syscallarg(void *) buf; 141 syscallarg(size_t) nbyte; 142 } */ *uap = v; 143 struct iovec iov; 144 struct uio auio; 145 146 iov.iov_base = SCARG(uap, buf); 147 iov.iov_len = SCARG(uap, nbyte); 148 if (iov.iov_len > SSIZE_MAX) 149 return (EINVAL); 150 151 auio.uio_iov = &iov; 152 auio.uio_iovcnt = 1; 153 auio.uio_resid = iov.iov_len; 154 155 return (dofilereadv(p, SCARG(uap, fd), &auio, 0, retval)); 156 } 157 158 /* 159 * Scatter read system call. 160 */ 161 int 162 sys_readv(struct proc *p, void *v, register_t *retval) 163 { 164 struct sys_readv_args /* { 165 syscallarg(int) fd; 166 syscallarg(const struct iovec *) iovp; 167 syscallarg(int) iovcnt; 168 } */ *uap = v; 169 struct iovec aiov[UIO_SMALLIOV], *iov = NULL; 170 int error, iovcnt = SCARG(uap, iovcnt); 171 struct uio auio; 172 size_t resid; 173 174 error = iovec_copyin(SCARG(uap, iovp), &iov, aiov, iovcnt, &resid); 175 if (error) 176 goto done; 177 178 auio.uio_iov = iov; 179 auio.uio_iovcnt = iovcnt; 180 auio.uio_resid = resid; 181 182 error = dofilereadv(p, SCARG(uap, fd), &auio, 0, retval); 183 done: 184 iovec_free(iov, iovcnt); 185 return (error); 186 } 187 188 int 189 dofilereadv(struct proc *p, int fd, struct uio *uio, int flags, 190 register_t *retval) 191 { 192 struct filedesc *fdp = p->p_fd; 193 struct file *fp; 194 long cnt, error = 0; 195 u_int iovlen; 196 #ifdef KTRACE 197 struct iovec *ktriov = NULL; 198 #endif 199 200 KASSERT(uio->uio_iov != NULL && uio->uio_iovcnt > 0); 201 iovlen = uio->uio_iovcnt * sizeof(struct iovec); 202 203 if ((fp = fd_getfile_mode(fdp, fd, FREAD)) == NULL) 204 return (EBADF); 205 206 /* Checks for positioned read. */ 207 if (flags & FO_POSITION) { 208 struct vnode *vp = fp->f_data; 209 210 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO || 211 (vp->v_flag & VISTTY)) { 212 error = ESPIPE; 213 goto done; 214 } 215 216 if (uio->uio_offset < 0 && vp->v_type != VCHR) { 217 error = EINVAL; 218 goto done; 219 } 220 } 221 222 uio->uio_rw = UIO_READ; 223 uio->uio_segflg = UIO_USERSPACE; 224 uio->uio_procp = p; 225 #ifdef KTRACE 226 /* 227 * if tracing, save a copy of iovec 228 */ 229 if (KTRPOINT(p, KTR_GENIO)) { 230 ktriov = malloc(iovlen, M_TEMP, M_WAITOK); 231 memcpy(ktriov, uio->uio_iov, iovlen); 232 } 233 #endif 234 cnt = uio->uio_resid; 235 error = (*fp->f_ops->fo_read)(fp, uio, flags); 236 if (error) { 237 if (uio->uio_resid != cnt && (error == ERESTART || 238 error == EINTR || error == EWOULDBLOCK)) 239 error = 0; 240 } 241 cnt -= uio->uio_resid; 242 243 mtx_enter(&fp->f_mtx); 244 fp->f_rxfer++; 245 fp->f_rbytes += cnt; 246 mtx_leave(&fp->f_mtx); 247 #ifdef KTRACE 248 if (ktriov != NULL) { 249 if (error == 0) 250 ktrgenio(p, fd, UIO_READ, ktriov, cnt); 251 free(ktriov, M_TEMP, iovlen); 252 } 253 #endif 254 *retval = cnt; 255 done: 256 FRELE(fp, p); 257 return (error); 258 } 259 260 /* 261 * Write system call 262 */ 263 int 264 sys_write(struct proc *p, void *v, register_t *retval) 265 { 266 struct sys_write_args /* { 267 syscallarg(int) fd; 268 syscallarg(const void *) buf; 269 syscallarg(size_t) nbyte; 270 } */ *uap = v; 271 struct iovec iov; 272 struct uio auio; 273 274 iov.iov_base = (void *)SCARG(uap, buf); 275 iov.iov_len = SCARG(uap, nbyte); 276 if (iov.iov_len > SSIZE_MAX) 277 return (EINVAL); 278 279 auio.uio_iov = &iov; 280 auio.uio_iovcnt = 1; 281 auio.uio_resid = iov.iov_len; 282 283 return (dofilewritev(p, SCARG(uap, fd), &auio, 0, retval)); 284 } 285 286 /* 287 * Gather write system call 288 */ 289 int 290 sys_writev(struct proc *p, void *v, register_t *retval) 291 { 292 struct sys_writev_args /* { 293 syscallarg(int) fd; 294 syscallarg(const struct iovec *) iovp; 295 syscallarg(int) iovcnt; 296 } */ *uap = v; 297 struct iovec aiov[UIO_SMALLIOV], *iov = NULL; 298 int error, iovcnt = SCARG(uap, iovcnt); 299 struct uio auio; 300 size_t resid; 301 302 error = iovec_copyin(SCARG(uap, iovp), &iov, aiov, iovcnt, &resid); 303 if (error) 304 goto done; 305 306 auio.uio_iov = iov; 307 auio.uio_iovcnt = iovcnt; 308 auio.uio_resid = resid; 309 310 error = dofilewritev(p, SCARG(uap, fd), &auio, 0, retval); 311 done: 312 iovec_free(iov, iovcnt); 313 return (error); 314 } 315 316 int 317 dofilewritev(struct proc *p, int fd, struct uio *uio, int flags, 318 register_t *retval) 319 { 320 struct filedesc *fdp = p->p_fd; 321 struct file *fp; 322 long cnt, error = 0; 323 u_int iovlen; 324 #ifdef KTRACE 325 struct iovec *ktriov = NULL; 326 #endif 327 328 KASSERT(uio->uio_iov != NULL && uio->uio_iovcnt > 0); 329 iovlen = uio->uio_iovcnt * sizeof(struct iovec); 330 331 if ((fp = fd_getfile_mode(fdp, fd, FWRITE)) == NULL) 332 return (EBADF); 333 334 /* Checks for positioned write. */ 335 if (flags & FO_POSITION) { 336 struct vnode *vp = fp->f_data; 337 338 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO || 339 (vp->v_flag & VISTTY)) { 340 error = ESPIPE; 341 goto done; 342 } 343 344 if (uio->uio_offset < 0 && vp->v_type != VCHR) { 345 error = EINVAL; 346 goto done; 347 } 348 } 349 350 uio->uio_rw = UIO_WRITE; 351 uio->uio_segflg = UIO_USERSPACE; 352 uio->uio_procp = p; 353 #ifdef KTRACE 354 /* 355 * if tracing, save a copy of iovec 356 */ 357 if (KTRPOINT(p, KTR_GENIO)) { 358 ktriov = malloc(iovlen, M_TEMP, M_WAITOK); 359 memcpy(ktriov, uio->uio_iov, iovlen); 360 } 361 #endif 362 cnt = uio->uio_resid; 363 error = (*fp->f_ops->fo_write)(fp, uio, flags); 364 if (error) { 365 if (uio->uio_resid != cnt && (error == ERESTART || 366 error == EINTR || error == EWOULDBLOCK)) 367 error = 0; 368 if (error == EPIPE) 369 ptsignal(p, SIGPIPE, STHREAD); 370 } 371 cnt -= uio->uio_resid; 372 373 mtx_enter(&fp->f_mtx); 374 fp->f_wxfer++; 375 fp->f_wbytes += cnt; 376 mtx_leave(&fp->f_mtx); 377 #ifdef KTRACE 378 if (ktriov != NULL) { 379 if (error == 0) 380 ktrgenio(p, fd, UIO_WRITE, ktriov, cnt); 381 free(ktriov, M_TEMP, iovlen); 382 } 383 #endif 384 *retval = cnt; 385 done: 386 FRELE(fp, p); 387 return (error); 388 } 389 390 /* 391 * Ioctl system call 392 */ 393 int 394 sys_ioctl(struct proc *p, void *v, register_t *retval) 395 { 396 struct sys_ioctl_args /* { 397 syscallarg(int) fd; 398 syscallarg(u_long) com; 399 syscallarg(void *) data; 400 } */ *uap = v; 401 struct file *fp; 402 struct filedesc *fdp = p->p_fd; 403 u_long com = SCARG(uap, com); 404 int error = 0; 405 u_int size = 0; 406 caddr_t data, memp = NULL; 407 int tmp; 408 #define STK_PARAMS 128 409 long long stkbuf[STK_PARAMS / sizeof(long long)]; 410 411 if ((fp = fd_getfile_mode(fdp, SCARG(uap, fd), FREAD|FWRITE)) == NULL) 412 return (EBADF); 413 414 if (fp->f_type == DTYPE_SOCKET) { 415 struct socket *so = fp->f_data; 416 417 if (so->so_state & SS_DNS) { 418 error = EINVAL; 419 goto out; 420 } 421 } 422 423 error = pledge_ioctl(p, com, fp); 424 if (error) 425 goto out; 426 427 switch (com) { 428 case FIONCLEX: 429 case FIOCLEX: 430 fdplock(fdp); 431 if (com == FIONCLEX) 432 fdp->fd_ofileflags[SCARG(uap, fd)] &= ~UF_EXCLOSE; 433 else 434 fdp->fd_ofileflags[SCARG(uap, fd)] |= UF_EXCLOSE; 435 fdpunlock(fdp); 436 goto out; 437 } 438 439 /* 440 * Interpret high order word to find amount of data to be 441 * copied to/from the user's address space. 442 */ 443 size = IOCPARM_LEN(com); 444 if (size > IOCPARM_MAX) { 445 error = ENOTTY; 446 goto out; 447 } 448 if (size > sizeof (stkbuf)) { 449 memp = malloc(size, M_IOCTLOPS, M_WAITOK); 450 data = memp; 451 } else 452 data = (caddr_t)stkbuf; 453 if (com&IOC_IN) { 454 if (size) { 455 error = copyin(SCARG(uap, data), data, size); 456 if (error) { 457 goto out; 458 } 459 } else 460 *(caddr_t *)data = SCARG(uap, data); 461 } else if ((com&IOC_OUT) && size) 462 /* 463 * Zero the buffer so the user always 464 * gets back something deterministic. 465 */ 466 memset(data, 0, size); 467 else if (com&IOC_VOID) 468 *(caddr_t *)data = SCARG(uap, data); 469 470 switch (com) { 471 472 case FIONBIO: 473 if ((tmp = *(int *)data) != 0) 474 fp->f_flag |= FNONBLOCK; 475 else 476 fp->f_flag &= ~FNONBLOCK; 477 error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&tmp, p); 478 break; 479 480 case FIOASYNC: 481 if ((tmp = *(int *)data) != 0) 482 fp->f_flag |= FASYNC; 483 else 484 fp->f_flag &= ~FASYNC; 485 error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, (caddr_t)&tmp, p); 486 break; 487 488 case FIOSETOWN: 489 tmp = *(int *)data; 490 491 if (fp->f_type == DTYPE_SOCKET || fp->f_type == DTYPE_PIPE) { 492 /* nothing */ 493 } else if (tmp <= 0) { 494 tmp = -tmp; 495 } else { 496 struct process *pr = prfind(tmp); 497 if (pr == NULL) { 498 error = ESRCH; 499 break; 500 } 501 tmp = pr->ps_pgrp->pg_id; 502 } 503 error = (*fp->f_ops->fo_ioctl) 504 (fp, TIOCSPGRP, (caddr_t)&tmp, p); 505 break; 506 507 case FIOGETOWN: 508 error = (*fp->f_ops->fo_ioctl)(fp, TIOCGPGRP, data, p); 509 *(int *)data = -*(int *)data; 510 break; 511 512 default: 513 error = (*fp->f_ops->fo_ioctl)(fp, com, data, p); 514 break; 515 } 516 /* 517 * Copy any data to user, size was 518 * already set and checked above. 519 */ 520 if (error == 0 && (com&IOC_OUT) && size) 521 error = copyout(data, SCARG(uap, data), size); 522 out: 523 FRELE(fp, p); 524 free(memp, M_IOCTLOPS, size); 525 return (error); 526 } 527 528 int selwait, nselcoll; 529 530 /* 531 * Select system call. 532 */ 533 int 534 sys_select(struct proc *p, void *v, register_t *retval) 535 { 536 struct sys_select_args /* { 537 syscallarg(int) nd; 538 syscallarg(fd_set *) in; 539 syscallarg(fd_set *) ou; 540 syscallarg(fd_set *) ex; 541 syscallarg(struct timeval *) tv; 542 } */ *uap = v; 543 544 struct timespec ts, *tsp = NULL; 545 int error; 546 547 if (SCARG(uap, tv) != NULL) { 548 struct timeval tv; 549 if ((error = copyin(SCARG(uap, tv), &tv, sizeof tv)) != 0) 550 return (error); 551 if ((error = itimerfix(&tv)) != 0) 552 return (error); 553 #ifdef KTRACE 554 if (KTRPOINT(p, KTR_STRUCT)) 555 ktrreltimeval(p, &tv); 556 #endif 557 TIMEVAL_TO_TIMESPEC(&tv, &ts); 558 tsp = &ts; 559 } 560 561 return (dopselect(p, SCARG(uap, nd), SCARG(uap, in), SCARG(uap, ou), 562 SCARG(uap, ex), tsp, NULL, retval)); 563 } 564 565 int 566 sys_pselect(struct proc *p, void *v, register_t *retval) 567 { 568 struct sys_pselect_args /* { 569 syscallarg(int) nd; 570 syscallarg(fd_set *) in; 571 syscallarg(fd_set *) ou; 572 syscallarg(fd_set *) ex; 573 syscallarg(const struct timespec *) ts; 574 syscallarg(const sigset_t *) mask; 575 } */ *uap = v; 576 577 struct timespec ts, *tsp = NULL; 578 sigset_t ss, *ssp = NULL; 579 int error; 580 581 if (SCARG(uap, ts) != NULL) { 582 if ((error = copyin(SCARG(uap, ts), &ts, sizeof ts)) != 0) 583 return (error); 584 if ((error = timespecfix(&ts)) != 0) 585 return (error); 586 #ifdef KTRACE 587 if (KTRPOINT(p, KTR_STRUCT)) 588 ktrreltimespec(p, &ts); 589 #endif 590 tsp = &ts; 591 } 592 if (SCARG(uap, mask) != NULL) { 593 if ((error = copyin(SCARG(uap, mask), &ss, sizeof ss)) != 0) 594 return (error); 595 ssp = &ss; 596 } 597 598 return (dopselect(p, SCARG(uap, nd), SCARG(uap, in), SCARG(uap, ou), 599 SCARG(uap, ex), tsp, ssp, retval)); 600 } 601 602 int 603 dopselect(struct proc *p, int nd, fd_set *in, fd_set *ou, fd_set *ex, 604 const struct timespec *tsp, const sigset_t *sigmask, register_t *retval) 605 { 606 fd_mask bits[6]; 607 fd_set *pibits[3], *pobits[3]; 608 struct timespec ats, rts, tts; 609 int s, ncoll, error = 0, timo; 610 u_int ni; 611 612 if (nd < 0) 613 return (EINVAL); 614 if (nd > p->p_fd->fd_nfiles) { 615 /* forgiving; slightly wrong */ 616 nd = p->p_fd->fd_nfiles; 617 } 618 ni = howmany(nd, NFDBITS) * sizeof(fd_mask); 619 if (ni > sizeof(bits[0])) { 620 caddr_t mbits; 621 622 mbits = mallocarray(6, ni, M_TEMP, M_WAITOK|M_ZERO); 623 pibits[0] = (fd_set *)&mbits[ni * 0]; 624 pibits[1] = (fd_set *)&mbits[ni * 1]; 625 pibits[2] = (fd_set *)&mbits[ni * 2]; 626 pobits[0] = (fd_set *)&mbits[ni * 3]; 627 pobits[1] = (fd_set *)&mbits[ni * 4]; 628 pobits[2] = (fd_set *)&mbits[ni * 5]; 629 } else { 630 memset(bits, 0, sizeof(bits)); 631 pibits[0] = (fd_set *)&bits[0]; 632 pibits[1] = (fd_set *)&bits[1]; 633 pibits[2] = (fd_set *)&bits[2]; 634 pobits[0] = (fd_set *)&bits[3]; 635 pobits[1] = (fd_set *)&bits[4]; 636 pobits[2] = (fd_set *)&bits[5]; 637 } 638 639 #define getbits(name, x) \ 640 if (name && (error = copyin(name, pibits[x], ni))) \ 641 goto done; 642 getbits(in, 0); 643 getbits(ou, 1); 644 getbits(ex, 2); 645 #undef getbits 646 #ifdef KTRACE 647 if (ni > 0 && KTRPOINT(p, KTR_STRUCT)) { 648 if (in) ktrfdset(p, pibits[0], ni); 649 if (ou) ktrfdset(p, pibits[1], ni); 650 if (ex) ktrfdset(p, pibits[2], ni); 651 } 652 #endif 653 654 if (tsp) { 655 getnanouptime(&rts); 656 timespecadd(tsp, &rts, &ats); 657 } else { 658 ats.tv_sec = 0; 659 ats.tv_nsec = 0; 660 } 661 timo = 0; 662 663 if (sigmask) 664 dosigsuspend(p, *sigmask &~ sigcantmask); 665 666 retry: 667 ncoll = nselcoll; 668 atomic_setbits_int(&p->p_flag, P_SELECT); 669 error = selscan(p, pibits[0], pobits[0], nd, ni, retval); 670 if (error || *retval) 671 goto done; 672 if (tsp) { 673 getnanouptime(&rts); 674 if (timespeccmp(&rts, &ats, >=)) 675 goto done; 676 timespecsub(&ats, &rts, &tts); 677 timo = tts.tv_sec > 24 * 60 * 60 ? 678 24 * 60 * 60 * hz : tstohz(&tts); 679 } 680 s = splhigh(); 681 if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) { 682 splx(s); 683 goto retry; 684 } 685 atomic_clearbits_int(&p->p_flag, P_SELECT); 686 error = tsleep(&selwait, PSOCK | PCATCH, "select", timo); 687 splx(s); 688 if (error == 0) 689 goto retry; 690 done: 691 atomic_clearbits_int(&p->p_flag, P_SELECT); 692 /* select is not restarted after signals... */ 693 if (error == ERESTART) 694 error = EINTR; 695 if (error == EWOULDBLOCK) 696 error = 0; 697 #define putbits(name, x) \ 698 if (name && (error2 = copyout(pobits[x], name, ni))) \ 699 error = error2; 700 if (error == 0) { 701 int error2; 702 703 putbits(in, 0); 704 putbits(ou, 1); 705 putbits(ex, 2); 706 #undef putbits 707 #ifdef KTRACE 708 if (ni > 0 && KTRPOINT(p, KTR_STRUCT)) { 709 if (in) ktrfdset(p, pobits[0], ni); 710 if (ou) ktrfdset(p, pobits[1], ni); 711 if (ex) ktrfdset(p, pobits[2], ni); 712 } 713 #endif 714 } 715 716 if (pibits[0] != (fd_set *)&bits[0]) 717 free(pibits[0], M_TEMP, 6 * ni); 718 return (error); 719 } 720 721 int 722 selscan(struct proc *p, fd_set *ibits, fd_set *obits, int nfd, int ni, 723 register_t *retval) 724 { 725 caddr_t cibits = (caddr_t)ibits, cobits = (caddr_t)obits; 726 struct filedesc *fdp = p->p_fd; 727 int msk, i, j, fd; 728 fd_mask bits; 729 struct file *fp; 730 int n = 0; 731 static const int flag[3] = { POLLIN, POLLOUT|POLL_NOHUP, POLLPRI }; 732 733 for (msk = 0; msk < 3; msk++) { 734 fd_set *pibits = (fd_set *)&cibits[msk*ni]; 735 fd_set *pobits = (fd_set *)&cobits[msk*ni]; 736 737 for (i = 0; i < nfd; i += NFDBITS) { 738 bits = pibits->fds_bits[i/NFDBITS]; 739 while ((j = ffs(bits)) && (fd = i + --j) < nfd) { 740 bits &= ~(1 << j); 741 if ((fp = fd_getfile(fdp, fd)) == NULL) 742 return (EBADF); 743 if ((*fp->f_ops->fo_poll)(fp, flag[msk], p)) { 744 FD_SET(fd, pobits); 745 n++; 746 } 747 FRELE(fp, p); 748 } 749 } 750 } 751 *retval = n; 752 return (0); 753 } 754 755 int 756 seltrue(dev_t dev, int events, struct proc *p) 757 { 758 759 return (events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM)); 760 } 761 762 int 763 selfalse(dev_t dev, int events, struct proc *p) 764 { 765 766 return (0); 767 } 768 769 /* 770 * Record a select request. 771 */ 772 void 773 selrecord(struct proc *selector, struct selinfo *sip) 774 { 775 struct proc *p; 776 pid_t mytid; 777 778 mytid = selector->p_tid; 779 if (sip->si_seltid == mytid) 780 return; 781 if (sip->si_seltid && (p = tfind(sip->si_seltid)) && 782 p->p_wchan == (caddr_t)&selwait) 783 sip->si_flags |= SI_COLL; 784 else 785 sip->si_seltid = mytid; 786 } 787 788 /* 789 * Do a wakeup when a selectable event occurs. 790 */ 791 void 792 selwakeup(struct selinfo *sip) 793 { 794 struct proc *p; 795 int s; 796 797 KNOTE(&sip->si_note, NOTE_SUBMIT); 798 if (sip->si_seltid == 0) 799 return; 800 if (sip->si_flags & SI_COLL) { 801 nselcoll++; 802 sip->si_flags &= ~SI_COLL; 803 wakeup(&selwait); 804 } 805 p = tfind(sip->si_seltid); 806 sip->si_seltid = 0; 807 if (p != NULL) { 808 SCHED_LOCK(s); 809 if (p->p_wchan == (caddr_t)&selwait) { 810 if (p->p_stat == SSLEEP) 811 setrunnable(p); 812 else 813 unsleep(p); 814 } else if (p->p_flag & P_SELECT) 815 atomic_clearbits_int(&p->p_flag, P_SELECT); 816 SCHED_UNLOCK(s); 817 } 818 } 819 820 void 821 pollscan(struct proc *p, struct pollfd *pl, u_int nfd, register_t *retval) 822 { 823 struct filedesc *fdp = p->p_fd; 824 struct file *fp; 825 u_int i; 826 int n = 0; 827 828 for (i = 0; i < nfd; i++, pl++) { 829 /* Check the file descriptor. */ 830 if (pl->fd < 0) { 831 pl->revents = 0; 832 continue; 833 } 834 if ((fp = fd_getfile(fdp, pl->fd)) == NULL) { 835 pl->revents = POLLNVAL; 836 n++; 837 continue; 838 } 839 pl->revents = (*fp->f_ops->fo_poll)(fp, pl->events, p); 840 FRELE(fp, p); 841 if (pl->revents != 0) 842 n++; 843 } 844 *retval = n; 845 } 846 847 /* 848 * Only copyout the revents field. 849 */ 850 int 851 pollout(struct pollfd *pl, struct pollfd *upl, u_int nfds) 852 { 853 int error = 0; 854 u_int i = 0; 855 856 while (!error && i++ < nfds) { 857 error = copyout(&pl->revents, &upl->revents, 858 sizeof(upl->revents)); 859 pl++; 860 upl++; 861 } 862 863 return (error); 864 } 865 866 /* 867 * We are using the same mechanism as select only we encode/decode args 868 * differently. 869 */ 870 int 871 sys_poll(struct proc *p, void *v, register_t *retval) 872 { 873 struct sys_poll_args /* { 874 syscallarg(struct pollfd *) fds; 875 syscallarg(u_int) nfds; 876 syscallarg(int) timeout; 877 } */ *uap = v; 878 879 struct timespec ts, *tsp = NULL; 880 int msec = SCARG(uap, timeout); 881 882 if (msec != INFTIM) { 883 if (msec < 0) 884 return (EINVAL); 885 ts.tv_sec = msec / 1000; 886 ts.tv_nsec = (msec - (ts.tv_sec * 1000)) * 1000000; 887 tsp = &ts; 888 } 889 890 return (doppoll(p, SCARG(uap, fds), SCARG(uap, nfds), tsp, NULL, 891 retval)); 892 } 893 894 int 895 sys_ppoll(struct proc *p, void *v, register_t *retval) 896 { 897 struct sys_ppoll_args /* { 898 syscallarg(struct pollfd *) fds; 899 syscallarg(u_int) nfds; 900 syscallarg(const struct timespec *) ts; 901 syscallarg(const sigset_t *) mask; 902 } */ *uap = v; 903 904 int error; 905 struct timespec ts, *tsp = NULL; 906 sigset_t ss, *ssp = NULL; 907 908 if (SCARG(uap, ts) != NULL) { 909 if ((error = copyin(SCARG(uap, ts), &ts, sizeof ts)) != 0) 910 return (error); 911 if ((error = timespecfix(&ts)) != 0) 912 return (error); 913 #ifdef KTRACE 914 if (KTRPOINT(p, KTR_STRUCT)) 915 ktrreltimespec(p, &ts); 916 #endif 917 tsp = &ts; 918 } 919 920 if (SCARG(uap, mask) != NULL) { 921 if ((error = copyin(SCARG(uap, mask), &ss, sizeof ss)) != 0) 922 return (error); 923 ssp = &ss; 924 } 925 926 return (doppoll(p, SCARG(uap, fds), SCARG(uap, nfds), tsp, ssp, 927 retval)); 928 } 929 930 int 931 doppoll(struct proc *p, struct pollfd *fds, u_int nfds, 932 const struct timespec *tsp, const sigset_t *sigmask, register_t *retval) 933 { 934 size_t sz; 935 struct pollfd pfds[4], *pl = pfds; 936 struct timespec ats, rts, tts; 937 int timo, ncoll, i, s, error; 938 939 /* Standards say no more than MAX_OPEN; this is possibly better. */ 940 if (nfds > min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfiles)) 941 return (EINVAL); 942 943 /* optimize for the default case, of a small nfds value */ 944 if (nfds > nitems(pfds)) { 945 pl = mallocarray(nfds, sizeof(*pl), M_TEMP, 946 M_WAITOK | M_CANFAIL); 947 if (pl == NULL) 948 return (EINVAL); 949 } 950 951 sz = nfds * sizeof(*pl); 952 953 if ((error = copyin(fds, pl, sz)) != 0) 954 goto bad; 955 956 for (i = 0; i < nfds; i++) { 957 pl[i].events &= ~POLL_NOHUP; 958 pl[i].revents = 0; 959 } 960 961 if (tsp != NULL) { 962 getnanouptime(&rts); 963 timespecadd(tsp, &rts, &ats); 964 } else { 965 ats.tv_sec = 0; 966 ats.tv_nsec = 0; 967 } 968 timo = 0; 969 970 if (sigmask) 971 dosigsuspend(p, *sigmask &~ sigcantmask); 972 973 retry: 974 ncoll = nselcoll; 975 atomic_setbits_int(&p->p_flag, P_SELECT); 976 pollscan(p, pl, nfds, retval); 977 if (*retval) 978 goto done; 979 if (tsp != NULL) { 980 getnanouptime(&rts); 981 if (timespeccmp(&rts, &ats, >=)) 982 goto done; 983 timespecsub(&ats, &rts, &tts); 984 timo = tts.tv_sec > 24 * 60 * 60 ? 985 24 * 60 * 60 * hz : tstohz(&tts); 986 } 987 s = splhigh(); 988 if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) { 989 splx(s); 990 goto retry; 991 } 992 atomic_clearbits_int(&p->p_flag, P_SELECT); 993 error = tsleep(&selwait, PSOCK | PCATCH, "poll", timo); 994 splx(s); 995 if (error == 0) 996 goto retry; 997 998 done: 999 atomic_clearbits_int(&p->p_flag, P_SELECT); 1000 /* 1001 * NOTE: poll(2) is not restarted after a signal and EWOULDBLOCK is 1002 * ignored (since the whole point is to see what would block). 1003 */ 1004 switch (error) { 1005 case ERESTART: 1006 error = pollout(pl, fds, nfds); 1007 if (error == 0) 1008 error = EINTR; 1009 break; 1010 case EWOULDBLOCK: 1011 case 0: 1012 error = pollout(pl, fds, nfds); 1013 break; 1014 } 1015 #ifdef KTRACE 1016 if (KTRPOINT(p, KTR_STRUCT)) 1017 ktrpollfd(p, pl, nfds); 1018 #endif /* KTRACE */ 1019 bad: 1020 if (pl != pfds) 1021 free(pl, M_TEMP, sz); 1022 return (error); 1023 } 1024 1025 /* 1026 * utrace system call 1027 */ 1028 int 1029 sys_utrace(struct proc *curp, void *v, register_t *retval) 1030 { 1031 #ifdef KTRACE 1032 struct sys_utrace_args /* { 1033 syscallarg(const char *) label; 1034 syscallarg(const void *) addr; 1035 syscallarg(size_t) len; 1036 } */ *uap = v; 1037 return (ktruser(curp, SCARG(uap, label), SCARG(uap, addr), 1038 SCARG(uap, len))); 1039 #else 1040 return (0); 1041 #endif 1042 } 1043