1 /* $OpenBSD: sys_generic.c,v 1.119 2018/05/08 08:53:41 mpi Exp $ */ 2 /* $NetBSD: sys_generic.c,v 1.24 1996/03/29 00:25:32 cgd Exp $ */ 3 4 /* 5 * Copyright (c) 1996 Theo de Raadt 6 * Copyright (c) 1982, 1986, 1989, 1993 7 * The Regents of the University of California. All rights reserved. 8 * (c) UNIX System Laboratories, Inc. 9 * All or some portions of this file are derived from material licensed 10 * to the University of California by American Telephone and Telegraph 11 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 12 * the permission of UNIX System Laboratories, Inc. 13 * 14 * Redistribution and use in source and binary forms, with or without 15 * modification, are permitted provided that the following conditions 16 * are met: 17 * 1. Redistributions of source code must retain the above copyright 18 * notice, this list of conditions and the following disclaimer. 19 * 2. Redistributions in binary form must reproduce the above copyright 20 * notice, this list of conditions and the following disclaimer in the 21 * documentation and/or other materials provided with the distribution. 22 * 3. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)sys_generic.c 8.5 (Berkeley) 1/21/94 39 */ 40 41 #include <sys/param.h> 42 #include <sys/systm.h> 43 #include <sys/filedesc.h> 44 #include <sys/ioctl.h> 45 #include <sys/fcntl.h> 46 #include <sys/file.h> 47 #include <sys/proc.h> 48 #include <sys/resourcevar.h> 49 #include <sys/socketvar.h> 50 #include <sys/signalvar.h> 51 #include <sys/uio.h> 52 #include <sys/kernel.h> 53 #include <sys/stat.h> 54 #include <sys/malloc.h> 55 #include <sys/poll.h> 56 #ifdef KTRACE 57 #include <sys/ktrace.h> 58 #endif 59 #include <sys/sched.h> 60 #include <sys/pledge.h> 61 62 #include <sys/mount.h> 63 #include <sys/syscallargs.h> 64 65 #include <uvm/uvm_extern.h> 66 67 int selscan(struct proc *, fd_set *, fd_set *, int, int, register_t *); 68 void pollscan(struct proc *, struct pollfd *, u_int, register_t *); 69 int pollout(struct pollfd *, struct pollfd *, u_int); 70 int dopselect(struct proc *, int, fd_set *, fd_set *, fd_set *, 71 const struct timespec *, const sigset_t *, register_t *); 72 int doppoll(struct proc *, struct pollfd *, u_int, const struct timespec *, 73 const sigset_t *, register_t *); 74 75 /* 76 * Read system call. 77 */ 78 int 79 sys_read(struct proc *p, void *v, register_t *retval) 80 { 81 struct sys_read_args /* { 82 syscallarg(int) fd; 83 syscallarg(void *) buf; 84 syscallarg(size_t) nbyte; 85 } */ *uap = v; 86 struct iovec iov; 87 int fd = SCARG(uap, fd); 88 struct file *fp; 89 struct filedesc *fdp = p->p_fd; 90 91 if ((fp = fd_getfile_mode(fdp, fd, FREAD)) == NULL) 92 return (EBADF); 93 94 iov.iov_base = SCARG(uap, buf); 95 iov.iov_len = SCARG(uap, nbyte); 96 97 /* dofilereadv() will FRELE the descriptor for us */ 98 return (dofilereadv(p, fd, fp, &iov, 1, 0, &fp->f_offset, retval)); 99 } 100 101 /* 102 * Scatter read system call. 103 */ 104 int 105 sys_readv(struct proc *p, void *v, register_t *retval) 106 { 107 struct sys_readv_args /* { 108 syscallarg(int) fd; 109 syscallarg(const struct iovec *) iovp; 110 syscallarg(int) iovcnt; 111 } */ *uap = v; 112 int fd = SCARG(uap, fd); 113 struct file *fp; 114 struct filedesc *fdp = p->p_fd; 115 116 if ((fp = fd_getfile_mode(fdp, fd, FREAD)) == NULL) 117 return (EBADF); 118 119 /* dofilereadv() will FRELE the descriptor for us */ 120 return (dofilereadv(p, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt), 1, 121 &fp->f_offset, retval)); 122 } 123 124 int 125 dofilereadv(struct proc *p, int fd, struct file *fp, const struct iovec *iovp, 126 int iovcnt, int userspace, off_t *offset, register_t *retval) 127 { 128 struct iovec aiov[UIO_SMALLIOV]; 129 struct uio auio; 130 struct iovec *iov; 131 struct iovec *needfree = NULL; 132 long i, cnt, error = 0; 133 u_int iovlen; 134 #ifdef KTRACE 135 struct iovec *ktriov = NULL; 136 #endif 137 138 /* note: can't use iovlen until iovcnt is validated */ 139 iovlen = iovcnt * sizeof(struct iovec); 140 141 /* 142 * If the iovec array exists in userspace, it needs to be copied in; 143 * otherwise, it can be used directly. 144 */ 145 if (userspace) { 146 if ((u_int)iovcnt > UIO_SMALLIOV) { 147 if ((u_int)iovcnt > IOV_MAX) { 148 error = EINVAL; 149 goto out; 150 } 151 iov = needfree = malloc(iovlen, M_IOV, M_WAITOK); 152 } else if ((u_int)iovcnt > 0) { 153 iov = aiov; 154 needfree = NULL; 155 } else { 156 error = EINVAL; 157 goto out; 158 } 159 if ((error = copyin(iovp, iov, iovlen))) 160 goto done; 161 #ifdef KTRACE 162 if (KTRPOINT(p, KTR_STRUCT)) 163 ktriovec(p, iov, iovcnt); 164 #endif 165 } else { 166 iov = (struct iovec *)iovp; /* de-constify */ 167 } 168 169 auio.uio_iov = iov; 170 auio.uio_iovcnt = iovcnt; 171 auio.uio_rw = UIO_READ; 172 auio.uio_segflg = UIO_USERSPACE; 173 auio.uio_procp = p; 174 auio.uio_resid = 0; 175 for (i = 0; i < iovcnt; i++) { 176 auio.uio_resid += iov->iov_len; 177 /* 178 * Reads return ssize_t because -1 is returned on error. 179 * Therefore we must restrict the length to SSIZE_MAX to 180 * avoid garbage return values. Note that the addition is 181 * guaranteed to not wrap because SSIZE_MAX * 2 < SIZE_MAX. 182 */ 183 if (iov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) { 184 error = EINVAL; 185 goto done; 186 } 187 iov++; 188 } 189 #ifdef KTRACE 190 /* 191 * if tracing, save a copy of iovec 192 */ 193 if (KTRPOINT(p, KTR_GENIO)) { 194 ktriov = malloc(iovlen, M_TEMP, M_WAITOK); 195 memcpy(ktriov, auio.uio_iov, iovlen); 196 } 197 #endif 198 cnt = auio.uio_resid; 199 error = (*fp->f_ops->fo_read)(fp, offset, &auio, fp->f_cred); 200 if (error) 201 if (auio.uio_resid != cnt && (error == ERESTART || 202 error == EINTR || error == EWOULDBLOCK)) 203 error = 0; 204 cnt -= auio.uio_resid; 205 206 mtx_enter(&fp->f_mtx); 207 fp->f_rxfer++; 208 fp->f_rbytes += cnt; 209 mtx_leave(&fp->f_mtx); 210 #ifdef KTRACE 211 if (ktriov != NULL) { 212 if (error == 0) 213 ktrgenio(p, fd, UIO_READ, ktriov, cnt); 214 free(ktriov, M_TEMP, iovlen); 215 } 216 #endif 217 *retval = cnt; 218 done: 219 if (needfree) 220 free(needfree, M_IOV, iovlen); 221 out: 222 FRELE(fp, p); 223 return (error); 224 } 225 226 /* 227 * Write system call 228 */ 229 int 230 sys_write(struct proc *p, void *v, register_t *retval) 231 { 232 struct sys_write_args /* { 233 syscallarg(int) fd; 234 syscallarg(const void *) buf; 235 syscallarg(size_t) nbyte; 236 } */ *uap = v; 237 struct iovec iov; 238 int fd = SCARG(uap, fd); 239 struct file *fp; 240 struct filedesc *fdp = p->p_fd; 241 242 if ((fp = fd_getfile_mode(fdp, fd, FWRITE)) == NULL) 243 return (EBADF); 244 245 iov.iov_base = (void *)SCARG(uap, buf); 246 iov.iov_len = SCARG(uap, nbyte); 247 248 /* dofilewritev() will FRELE the descriptor for us */ 249 return (dofilewritev(p, fd, fp, &iov, 1, 0, &fp->f_offset, retval)); 250 } 251 252 /* 253 * Gather write system call 254 */ 255 int 256 sys_writev(struct proc *p, void *v, register_t *retval) 257 { 258 struct sys_writev_args /* { 259 syscallarg(int) fd; 260 syscallarg(const struct iovec *) iovp; 261 syscallarg(int) iovcnt; 262 } */ *uap = v; 263 int fd = SCARG(uap, fd); 264 struct file *fp; 265 struct filedesc *fdp = p->p_fd; 266 267 if ((fp = fd_getfile_mode(fdp, fd, FWRITE)) == NULL) 268 return (EBADF); 269 270 /* dofilewritev() will FRELE the descriptor for us */ 271 return (dofilewritev(p, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt), 1, 272 &fp->f_offset, retval)); 273 } 274 275 int 276 dofilewritev(struct proc *p, int fd, struct file *fp, const struct iovec *iovp, 277 int iovcnt, int userspace, off_t *offset, register_t *retval) 278 { 279 struct iovec aiov[UIO_SMALLIOV]; 280 struct uio auio; 281 struct iovec *iov; 282 struct iovec *needfree = NULL; 283 long i, cnt, error = 0; 284 u_int iovlen; 285 #ifdef KTRACE 286 struct iovec *ktriov = NULL; 287 #endif 288 289 /* note: can't use iovlen until iovcnt is validated */ 290 iovlen = iovcnt * sizeof(struct iovec); 291 292 /* 293 * If the iovec array exists in userspace, it needs to be copied in; 294 * otherwise, it can be used directly. 295 */ 296 if (userspace) { 297 if ((u_int)iovcnt > UIO_SMALLIOV) { 298 if ((u_int)iovcnt > IOV_MAX) { 299 error = EINVAL; 300 goto out; 301 } 302 iov = needfree = malloc(iovlen, M_IOV, M_WAITOK); 303 } else if ((u_int)iovcnt > 0) { 304 iov = aiov; 305 needfree = NULL; 306 } else { 307 error = EINVAL; 308 goto out; 309 } 310 if ((error = copyin(iovp, iov, iovlen))) 311 goto done; 312 #ifdef KTRACE 313 if (KTRPOINT(p, KTR_STRUCT)) 314 ktriovec(p, iov, iovcnt); 315 #endif 316 } else { 317 iov = (struct iovec *)iovp; /* de-constify */ 318 } 319 320 auio.uio_iov = iov; 321 auio.uio_iovcnt = iovcnt; 322 auio.uio_rw = UIO_WRITE; 323 auio.uio_segflg = UIO_USERSPACE; 324 auio.uio_procp = p; 325 auio.uio_resid = 0; 326 for (i = 0; i < iovcnt; i++) { 327 auio.uio_resid += iov->iov_len; 328 /* 329 * Writes return ssize_t because -1 is returned on error. 330 * Therefore we must restrict the length to SSIZE_MAX to 331 * avoid garbage return values. Note that the addition is 332 * guaranteed to not wrap because SSIZE_MAX * 2 < SIZE_MAX. 333 */ 334 if (iov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) { 335 error = EINVAL; 336 goto done; 337 } 338 iov++; 339 } 340 #ifdef KTRACE 341 /* 342 * if tracing, save a copy of iovec 343 */ 344 if (KTRPOINT(p, KTR_GENIO)) { 345 ktriov = malloc(iovlen, M_TEMP, M_WAITOK); 346 memcpy(ktriov, auio.uio_iov, iovlen); 347 } 348 #endif 349 cnt = auio.uio_resid; 350 error = (*fp->f_ops->fo_write)(fp, offset, &auio, fp->f_cred); 351 if (error) { 352 if (auio.uio_resid != cnt && (error == ERESTART || 353 error == EINTR || error == EWOULDBLOCK)) 354 error = 0; 355 if (error == EPIPE) 356 ptsignal(p, SIGPIPE, STHREAD); 357 } 358 cnt -= auio.uio_resid; 359 360 mtx_enter(&fp->f_mtx); 361 fp->f_wxfer++; 362 fp->f_wbytes += cnt; 363 mtx_leave(&fp->f_mtx); 364 #ifdef KTRACE 365 if (ktriov != NULL) { 366 if (error == 0) 367 ktrgenio(p, fd, UIO_WRITE, ktriov, cnt); 368 free(ktriov, M_TEMP, iovlen); 369 } 370 #endif 371 *retval = cnt; 372 done: 373 if (needfree) 374 free(needfree, M_IOV, iovlen); 375 out: 376 FRELE(fp, p); 377 return (error); 378 } 379 380 /* 381 * Ioctl system call 382 */ 383 int 384 sys_ioctl(struct proc *p, void *v, register_t *retval) 385 { 386 struct sys_ioctl_args /* { 387 syscallarg(int) fd; 388 syscallarg(u_long) com; 389 syscallarg(void *) data; 390 } */ *uap = v; 391 struct file *fp; 392 struct filedesc *fdp; 393 u_long com = SCARG(uap, com); 394 int error = 0; 395 u_int size; 396 caddr_t data, memp = NULL; 397 int tmp; 398 #define STK_PARAMS 128 399 long long stkbuf[STK_PARAMS / sizeof(long long)]; 400 401 fdp = p->p_fd; 402 if ((fp = fd_getfile_mode(fdp, SCARG(uap, fd), FREAD|FWRITE)) == NULL) 403 return (EBADF); 404 405 if (fp->f_type == DTYPE_SOCKET) { 406 struct socket *so = fp->f_data; 407 408 if (so->so_state & SS_DNS) { 409 error = EINVAL; 410 goto out; 411 } 412 } 413 414 error = pledge_ioctl(p, com, fp); 415 if (error) 416 goto out; 417 418 switch (com) { 419 case FIONCLEX: 420 case FIOCLEX: 421 fdplock(fdp); 422 if (com == FIONCLEX) 423 fdp->fd_ofileflags[SCARG(uap, fd)] &= ~UF_EXCLOSE; 424 else 425 fdp->fd_ofileflags[SCARG(uap, fd)] |= UF_EXCLOSE; 426 fdpunlock(fdp); 427 goto out; 428 } 429 430 /* 431 * Interpret high order word to find amount of data to be 432 * copied to/from the user's address space. 433 */ 434 size = IOCPARM_LEN(com); 435 if (size > IOCPARM_MAX) { 436 error = ENOTTY; 437 goto out; 438 } 439 if (size > sizeof (stkbuf)) { 440 memp = malloc(size, M_IOCTLOPS, M_WAITOK); 441 data = memp; 442 } else 443 data = (caddr_t)stkbuf; 444 if (com&IOC_IN) { 445 if (size) { 446 error = copyin(SCARG(uap, data), data, size); 447 if (error) { 448 goto out; 449 } 450 } else 451 *(caddr_t *)data = SCARG(uap, data); 452 } else if ((com&IOC_OUT) && size) 453 /* 454 * Zero the buffer so the user always 455 * gets back something deterministic. 456 */ 457 memset(data, 0, size); 458 else if (com&IOC_VOID) 459 *(caddr_t *)data = SCARG(uap, data); 460 461 switch (com) { 462 463 case FIONBIO: 464 if ((tmp = *(int *)data) != 0) 465 fp->f_flag |= FNONBLOCK; 466 else 467 fp->f_flag &= ~FNONBLOCK; 468 error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&tmp, p); 469 break; 470 471 case FIOASYNC: 472 if ((tmp = *(int *)data) != 0) 473 fp->f_flag |= FASYNC; 474 else 475 fp->f_flag &= ~FASYNC; 476 error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, (caddr_t)&tmp, p); 477 break; 478 479 case FIOSETOWN: 480 tmp = *(int *)data; 481 if (fp->f_type == DTYPE_SOCKET) { 482 struct socket *so = fp->f_data; 483 484 so->so_pgid = tmp; 485 so->so_siguid = p->p_ucred->cr_ruid; 486 so->so_sigeuid = p->p_ucred->cr_uid; 487 error = 0; 488 break; 489 } 490 if (tmp <= 0) { 491 tmp = -tmp; 492 } else { 493 struct process *pr = prfind(tmp); 494 if (pr == NULL) { 495 error = ESRCH; 496 break; 497 } 498 tmp = pr->ps_pgrp->pg_id; 499 } 500 error = (*fp->f_ops->fo_ioctl) 501 (fp, TIOCSPGRP, (caddr_t)&tmp, p); 502 break; 503 504 case FIOGETOWN: 505 if (fp->f_type == DTYPE_SOCKET) { 506 error = 0; 507 *(int *)data = ((struct socket *)fp->f_data)->so_pgid; 508 break; 509 } 510 error = (*fp->f_ops->fo_ioctl)(fp, TIOCGPGRP, data, p); 511 *(int *)data = -*(int *)data; 512 break; 513 514 default: 515 error = (*fp->f_ops->fo_ioctl)(fp, com, data, p); 516 break; 517 } 518 /* 519 * Copy any data to user, size was 520 * already set and checked above. 521 */ 522 if (error == 0 && (com&IOC_OUT) && size) 523 error = copyout(data, SCARG(uap, data), size); 524 out: 525 FRELE(fp, p); 526 free(memp, M_IOCTLOPS, size); 527 return (error); 528 } 529 530 int selwait, nselcoll; 531 532 /* 533 * Select system call. 534 */ 535 int 536 sys_select(struct proc *p, void *v, register_t *retval) 537 { 538 struct sys_select_args /* { 539 syscallarg(int) nd; 540 syscallarg(fd_set *) in; 541 syscallarg(fd_set *) ou; 542 syscallarg(fd_set *) ex; 543 syscallarg(struct timeval *) tv; 544 } */ *uap = v; 545 546 struct timespec ts, *tsp = NULL; 547 int error; 548 549 if (SCARG(uap, tv) != NULL) { 550 struct timeval tv; 551 if ((error = copyin(SCARG(uap, tv), &tv, sizeof tv)) != 0) 552 return (error); 553 if ((error = itimerfix(&tv)) != 0) 554 return (error); 555 #ifdef KTRACE 556 if (KTRPOINT(p, KTR_STRUCT)) 557 ktrreltimeval(p, &tv); 558 #endif 559 TIMEVAL_TO_TIMESPEC(&tv, &ts); 560 tsp = &ts; 561 } 562 563 return (dopselect(p, SCARG(uap, nd), SCARG(uap, in), SCARG(uap, ou), 564 SCARG(uap, ex), tsp, NULL, retval)); 565 } 566 567 int 568 sys_pselect(struct proc *p, void *v, register_t *retval) 569 { 570 struct sys_pselect_args /* { 571 syscallarg(int) nd; 572 syscallarg(fd_set *) in; 573 syscallarg(fd_set *) ou; 574 syscallarg(fd_set *) ex; 575 syscallarg(const struct timespec *) ts; 576 syscallarg(const sigset_t *) mask; 577 } */ *uap = v; 578 579 struct timespec ts, *tsp = NULL; 580 sigset_t ss, *ssp = NULL; 581 int error; 582 583 if (SCARG(uap, ts) != NULL) { 584 if ((error = copyin(SCARG(uap, ts), &ts, sizeof ts)) != 0) 585 return (error); 586 if ((error = timespecfix(&ts)) != 0) 587 return (error); 588 #ifdef KTRACE 589 if (KTRPOINT(p, KTR_STRUCT)) 590 ktrreltimespec(p, &ts); 591 #endif 592 tsp = &ts; 593 } 594 if (SCARG(uap, mask) != NULL) { 595 if ((error = copyin(SCARG(uap, mask), &ss, sizeof ss)) != 0) 596 return (error); 597 ssp = &ss; 598 } 599 600 return (dopselect(p, SCARG(uap, nd), SCARG(uap, in), SCARG(uap, ou), 601 SCARG(uap, ex), tsp, ssp, retval)); 602 } 603 604 int 605 dopselect(struct proc *p, int nd, fd_set *in, fd_set *ou, fd_set *ex, 606 const struct timespec *tsp, const sigset_t *sigmask, register_t *retval) 607 { 608 fd_mask bits[6]; 609 fd_set *pibits[3], *pobits[3]; 610 struct timespec ats, rts, tts; 611 int s, ncoll, error = 0, timo; 612 u_int ni; 613 614 if (nd < 0) 615 return (EINVAL); 616 if (nd > p->p_fd->fd_nfiles) { 617 /* forgiving; slightly wrong */ 618 nd = p->p_fd->fd_nfiles; 619 } 620 ni = howmany(nd, NFDBITS) * sizeof(fd_mask); 621 if (ni > sizeof(bits[0])) { 622 caddr_t mbits; 623 624 mbits = mallocarray(6, ni, M_TEMP, M_WAITOK|M_ZERO); 625 pibits[0] = (fd_set *)&mbits[ni * 0]; 626 pibits[1] = (fd_set *)&mbits[ni * 1]; 627 pibits[2] = (fd_set *)&mbits[ni * 2]; 628 pobits[0] = (fd_set *)&mbits[ni * 3]; 629 pobits[1] = (fd_set *)&mbits[ni * 4]; 630 pobits[2] = (fd_set *)&mbits[ni * 5]; 631 } else { 632 memset(bits, 0, sizeof(bits)); 633 pibits[0] = (fd_set *)&bits[0]; 634 pibits[1] = (fd_set *)&bits[1]; 635 pibits[2] = (fd_set *)&bits[2]; 636 pobits[0] = (fd_set *)&bits[3]; 637 pobits[1] = (fd_set *)&bits[4]; 638 pobits[2] = (fd_set *)&bits[5]; 639 } 640 641 #define getbits(name, x) \ 642 if (name && (error = copyin(name, pibits[x], ni))) \ 643 goto done; 644 getbits(in, 0); 645 getbits(ou, 1); 646 getbits(ex, 2); 647 #undef getbits 648 #ifdef KTRACE 649 if (ni > 0 && KTRPOINT(p, KTR_STRUCT)) { 650 if (in) ktrfdset(p, pibits[0], ni); 651 if (ou) ktrfdset(p, pibits[1], ni); 652 if (ex) ktrfdset(p, pibits[2], ni); 653 } 654 #endif 655 656 if (tsp) { 657 getnanouptime(&rts); 658 timespecadd(tsp, &rts, &ats); 659 } else { 660 ats.tv_sec = 0; 661 ats.tv_nsec = 0; 662 } 663 timo = 0; 664 665 if (sigmask) 666 dosigsuspend(p, *sigmask &~ sigcantmask); 667 668 retry: 669 ncoll = nselcoll; 670 atomic_setbits_int(&p->p_flag, P_SELECT); 671 error = selscan(p, pibits[0], pobits[0], nd, ni, retval); 672 if (error || *retval) 673 goto done; 674 if (tsp) { 675 getnanouptime(&rts); 676 if (timespeccmp(&rts, &ats, >=)) 677 goto done; 678 timespecsub(&ats, &rts, &tts); 679 timo = tts.tv_sec > 24 * 60 * 60 ? 680 24 * 60 * 60 * hz : tstohz(&tts); 681 } 682 s = splhigh(); 683 if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) { 684 splx(s); 685 goto retry; 686 } 687 atomic_clearbits_int(&p->p_flag, P_SELECT); 688 error = tsleep(&selwait, PSOCK | PCATCH, "select", timo); 689 splx(s); 690 if (error == 0) 691 goto retry; 692 done: 693 atomic_clearbits_int(&p->p_flag, P_SELECT); 694 /* select is not restarted after signals... */ 695 if (error == ERESTART) 696 error = EINTR; 697 if (error == EWOULDBLOCK) 698 error = 0; 699 #define putbits(name, x) \ 700 if (name && (error2 = copyout(pobits[x], name, ni))) \ 701 error = error2; 702 if (error == 0) { 703 int error2; 704 705 putbits(in, 0); 706 putbits(ou, 1); 707 putbits(ex, 2); 708 #undef putbits 709 #ifdef KTRACE 710 if (ni > 0 && KTRPOINT(p, KTR_STRUCT)) { 711 if (in) ktrfdset(p, pobits[0], ni); 712 if (ou) ktrfdset(p, pobits[1], ni); 713 if (ex) ktrfdset(p, pobits[2], ni); 714 } 715 #endif 716 } 717 718 if (pibits[0] != (fd_set *)&bits[0]) 719 free(pibits[0], M_TEMP, 6 * ni); 720 return (error); 721 } 722 723 int 724 selscan(struct proc *p, fd_set *ibits, fd_set *obits, int nfd, int ni, 725 register_t *retval) 726 { 727 caddr_t cibits = (caddr_t)ibits, cobits = (caddr_t)obits; 728 struct filedesc *fdp = p->p_fd; 729 int msk, i, j, fd; 730 fd_mask bits; 731 struct file *fp; 732 int n = 0; 733 static const int flag[3] = { POLLIN, POLLOUT|POLL_NOHUP, POLLPRI }; 734 735 for (msk = 0; msk < 3; msk++) { 736 fd_set *pibits = (fd_set *)&cibits[msk*ni]; 737 fd_set *pobits = (fd_set *)&cobits[msk*ni]; 738 739 for (i = 0; i < nfd; i += NFDBITS) { 740 bits = pibits->fds_bits[i/NFDBITS]; 741 while ((j = ffs(bits)) && (fd = i + --j) < nfd) { 742 bits &= ~(1 << j); 743 if ((fp = fd_getfile(fdp, fd)) == NULL) 744 return (EBADF); 745 if ((*fp->f_ops->fo_poll)(fp, flag[msk], p)) { 746 FD_SET(fd, pobits); 747 n++; 748 } 749 FRELE(fp, p); 750 } 751 } 752 } 753 *retval = n; 754 return (0); 755 } 756 757 int 758 seltrue(dev_t dev, int events, struct proc *p) 759 { 760 761 return (events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM)); 762 } 763 764 int 765 selfalse(dev_t dev, int events, struct proc *p) 766 { 767 768 return (0); 769 } 770 771 /* 772 * Record a select request. 773 */ 774 void 775 selrecord(struct proc *selector, struct selinfo *sip) 776 { 777 struct proc *p; 778 pid_t mytid; 779 780 mytid = selector->p_tid; 781 if (sip->si_seltid == mytid) 782 return; 783 if (sip->si_seltid && (p = tfind(sip->si_seltid)) && 784 p->p_wchan == (caddr_t)&selwait) 785 sip->si_flags |= SI_COLL; 786 else 787 sip->si_seltid = mytid; 788 } 789 790 /* 791 * Do a wakeup when a selectable event occurs. 792 */ 793 void 794 selwakeup(struct selinfo *sip) 795 { 796 struct proc *p; 797 int s; 798 799 KNOTE(&sip->si_note, NOTE_SUBMIT); 800 if (sip->si_seltid == 0) 801 return; 802 if (sip->si_flags & SI_COLL) { 803 nselcoll++; 804 sip->si_flags &= ~SI_COLL; 805 wakeup(&selwait); 806 } 807 p = tfind(sip->si_seltid); 808 sip->si_seltid = 0; 809 if (p != NULL) { 810 SCHED_LOCK(s); 811 if (p->p_wchan == (caddr_t)&selwait) { 812 if (p->p_stat == SSLEEP) 813 setrunnable(p); 814 else 815 unsleep(p); 816 } else if (p->p_flag & P_SELECT) 817 atomic_clearbits_int(&p->p_flag, P_SELECT); 818 SCHED_UNLOCK(s); 819 } 820 } 821 822 void 823 pollscan(struct proc *p, struct pollfd *pl, u_int nfd, register_t *retval) 824 { 825 struct filedesc *fdp = p->p_fd; 826 struct file *fp; 827 u_int i; 828 int n = 0; 829 830 for (i = 0; i < nfd; i++, pl++) { 831 /* Check the file descriptor. */ 832 if (pl->fd < 0) { 833 pl->revents = 0; 834 continue; 835 } 836 if ((fp = fd_getfile(fdp, pl->fd)) == NULL) { 837 pl->revents = POLLNVAL; 838 n++; 839 continue; 840 } 841 pl->revents = (*fp->f_ops->fo_poll)(fp, pl->events, p); 842 FRELE(fp, p); 843 if (pl->revents != 0) 844 n++; 845 } 846 *retval = n; 847 } 848 849 /* 850 * Only copyout the revents field. 851 */ 852 int 853 pollout(struct pollfd *pl, struct pollfd *upl, u_int nfds) 854 { 855 int error = 0; 856 u_int i = 0; 857 858 while (!error && i++ < nfds) { 859 error = copyout(&pl->revents, &upl->revents, 860 sizeof(upl->revents)); 861 pl++; 862 upl++; 863 } 864 865 return (error); 866 } 867 868 /* 869 * We are using the same mechanism as select only we encode/decode args 870 * differently. 871 */ 872 int 873 sys_poll(struct proc *p, void *v, register_t *retval) 874 { 875 struct sys_poll_args /* { 876 syscallarg(struct pollfd *) fds; 877 syscallarg(u_int) nfds; 878 syscallarg(int) timeout; 879 } */ *uap = v; 880 881 struct timespec ts, *tsp = NULL; 882 int msec = SCARG(uap, timeout); 883 884 if (msec != INFTIM) { 885 if (msec < 0) 886 return (EINVAL); 887 ts.tv_sec = msec / 1000; 888 ts.tv_nsec = (msec - (ts.tv_sec * 1000)) * 1000000; 889 tsp = &ts; 890 } 891 892 return (doppoll(p, SCARG(uap, fds), SCARG(uap, nfds), tsp, NULL, 893 retval)); 894 } 895 896 int 897 sys_ppoll(struct proc *p, void *v, register_t *retval) 898 { 899 struct sys_ppoll_args /* { 900 syscallarg(struct pollfd *) fds; 901 syscallarg(u_int) nfds; 902 syscallarg(const struct timespec *) ts; 903 syscallarg(const sigset_t *) mask; 904 } */ *uap = v; 905 906 int error; 907 struct timespec ts, *tsp = NULL; 908 sigset_t ss, *ssp = NULL; 909 910 if (SCARG(uap, ts) != NULL) { 911 if ((error = copyin(SCARG(uap, ts), &ts, sizeof ts)) != 0) 912 return (error); 913 if ((error = timespecfix(&ts)) != 0) 914 return (error); 915 #ifdef KTRACE 916 if (KTRPOINT(p, KTR_STRUCT)) 917 ktrreltimespec(p, &ts); 918 #endif 919 tsp = &ts; 920 } 921 922 if (SCARG(uap, mask) != NULL) { 923 if ((error = copyin(SCARG(uap, mask), &ss, sizeof ss)) != 0) 924 return (error); 925 ssp = &ss; 926 } 927 928 return (doppoll(p, SCARG(uap, fds), SCARG(uap, nfds), tsp, ssp, 929 retval)); 930 } 931 932 int 933 doppoll(struct proc *p, struct pollfd *fds, u_int nfds, 934 const struct timespec *tsp, const sigset_t *sigmask, register_t *retval) 935 { 936 size_t sz; 937 struct pollfd pfds[4], *pl = pfds; 938 struct timespec ats, rts, tts; 939 int timo, ncoll, i, s, error; 940 941 /* Standards say no more than MAX_OPEN; this is possibly better. */ 942 if (nfds > min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfiles)) 943 return (EINVAL); 944 945 /* optimize for the default case, of a small nfds value */ 946 if (nfds > nitems(pfds)) { 947 pl = mallocarray(nfds, sizeof(*pl), M_TEMP, 948 M_WAITOK | M_CANFAIL); 949 if (pl == NULL) 950 return (EINVAL); 951 } 952 953 sz = nfds * sizeof(*pl); 954 955 if ((error = copyin(fds, pl, sz)) != 0) 956 goto bad; 957 958 for (i = 0; i < nfds; i++) { 959 pl[i].events &= ~POLL_NOHUP; 960 pl[i].revents = 0; 961 } 962 963 if (tsp != NULL) { 964 getnanouptime(&rts); 965 timespecadd(tsp, &rts, &ats); 966 } else { 967 ats.tv_sec = 0; 968 ats.tv_nsec = 0; 969 } 970 timo = 0; 971 972 if (sigmask) 973 dosigsuspend(p, *sigmask &~ sigcantmask); 974 975 retry: 976 ncoll = nselcoll; 977 atomic_setbits_int(&p->p_flag, P_SELECT); 978 pollscan(p, pl, nfds, retval); 979 if (*retval) 980 goto done; 981 if (tsp != NULL) { 982 getnanouptime(&rts); 983 if (timespeccmp(&rts, &ats, >=)) 984 goto done; 985 timespecsub(&ats, &rts, &tts); 986 timo = tts.tv_sec > 24 * 60 * 60 ? 987 24 * 60 * 60 * hz : tstohz(&tts); 988 } 989 s = splhigh(); 990 if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) { 991 splx(s); 992 goto retry; 993 } 994 atomic_clearbits_int(&p->p_flag, P_SELECT); 995 error = tsleep(&selwait, PSOCK | PCATCH, "poll", timo); 996 splx(s); 997 if (error == 0) 998 goto retry; 999 1000 done: 1001 atomic_clearbits_int(&p->p_flag, P_SELECT); 1002 /* 1003 * NOTE: poll(2) is not restarted after a signal and EWOULDBLOCK is 1004 * ignored (since the whole point is to see what would block). 1005 */ 1006 switch (error) { 1007 case ERESTART: 1008 error = pollout(pl, fds, nfds); 1009 if (error == 0) 1010 error = EINTR; 1011 break; 1012 case EWOULDBLOCK: 1013 case 0: 1014 error = pollout(pl, fds, nfds); 1015 break; 1016 } 1017 #ifdef KTRACE 1018 if (KTRPOINT(p, KTR_STRUCT)) 1019 ktrpollfd(p, pl, nfds); 1020 #endif /* KTRACE */ 1021 bad: 1022 if (pl != pfds) 1023 free(pl, M_TEMP, sz); 1024 return (error); 1025 } 1026 1027 /* 1028 * utrace system call 1029 */ 1030 int 1031 sys_utrace(struct proc *curp, void *v, register_t *retval) 1032 { 1033 #ifdef KTRACE 1034 struct sys_utrace_args /* { 1035 syscallarg(const char *) label; 1036 syscallarg(const void *) addr; 1037 syscallarg(size_t) len; 1038 } */ *uap = v; 1039 return (ktruser(curp, SCARG(uap, label), SCARG(uap, addr), 1040 SCARG(uap, len))); 1041 #else 1042 return (0); 1043 #endif 1044 } 1045