1 /* $OpenBSD: sys_generic.c,v 1.118 2018/04/27 10:13:37 mpi Exp $ */ 2 /* $NetBSD: sys_generic.c,v 1.24 1996/03/29 00:25:32 cgd Exp $ */ 3 4 /* 5 * Copyright (c) 1996 Theo de Raadt 6 * Copyright (c) 1982, 1986, 1989, 1993 7 * The Regents of the University of California. All rights reserved. 8 * (c) UNIX System Laboratories, Inc. 9 * All or some portions of this file are derived from material licensed 10 * to the University of California by American Telephone and Telegraph 11 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 12 * the permission of UNIX System Laboratories, Inc. 13 * 14 * Redistribution and use in source and binary forms, with or without 15 * modification, are permitted provided that the following conditions 16 * are met: 17 * 1. Redistributions of source code must retain the above copyright 18 * notice, this list of conditions and the following disclaimer. 19 * 2. Redistributions in binary form must reproduce the above copyright 20 * notice, this list of conditions and the following disclaimer in the 21 * documentation and/or other materials provided with the distribution. 22 * 3. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)sys_generic.c 8.5 (Berkeley) 1/21/94 39 */ 40 41 #include <sys/param.h> 42 #include <sys/systm.h> 43 #include <sys/filedesc.h> 44 #include <sys/ioctl.h> 45 #include <sys/fcntl.h> 46 #include <sys/file.h> 47 #include <sys/proc.h> 48 #include <sys/resourcevar.h> 49 #include <sys/socketvar.h> 50 #include <sys/signalvar.h> 51 #include <sys/uio.h> 52 #include <sys/kernel.h> 53 #include <sys/stat.h> 54 #include <sys/malloc.h> 55 #include <sys/poll.h> 56 #ifdef KTRACE 57 #include <sys/ktrace.h> 58 #endif 59 #include <sys/sched.h> 60 #include <sys/pledge.h> 61 62 #include <sys/mount.h> 63 #include <sys/syscallargs.h> 64 65 #include <uvm/uvm_extern.h> 66 67 int selscan(struct proc *, fd_set *, fd_set *, int, int, register_t *); 68 void pollscan(struct proc *, struct pollfd *, u_int, register_t *); 69 int pollout(struct pollfd *, struct pollfd *, u_int); 70 int dopselect(struct proc *, int, fd_set *, fd_set *, fd_set *, 71 const struct timespec *, const sigset_t *, register_t *); 72 int doppoll(struct proc *, struct pollfd *, u_int, const struct timespec *, 73 const sigset_t *, register_t *); 74 75 /* 76 * Read system call. 77 */ 78 int 79 sys_read(struct proc *p, void *v, register_t *retval) 80 { 81 struct sys_read_args /* { 82 syscallarg(int) fd; 83 syscallarg(void *) buf; 84 syscallarg(size_t) nbyte; 85 } */ *uap = v; 86 struct iovec iov; 87 int fd = SCARG(uap, fd); 88 struct file *fp; 89 struct filedesc *fdp = p->p_fd; 90 91 if ((fp = fd_getfile_mode(fdp, fd, FREAD)) == NULL) 92 return (EBADF); 93 94 iov.iov_base = SCARG(uap, buf); 95 iov.iov_len = SCARG(uap, nbyte); 96 97 /* dofilereadv() will FRELE the descriptor for us */ 98 return (dofilereadv(p, fd, fp, &iov, 1, 0, &fp->f_offset, retval)); 99 } 100 101 /* 102 * Scatter read system call. 103 */ 104 int 105 sys_readv(struct proc *p, void *v, register_t *retval) 106 { 107 struct sys_readv_args /* { 108 syscallarg(int) fd; 109 syscallarg(const struct iovec *) iovp; 110 syscallarg(int) iovcnt; 111 } */ *uap = v; 112 int fd = SCARG(uap, fd); 113 struct file *fp; 114 struct filedesc *fdp = p->p_fd; 115 116 if ((fp = fd_getfile_mode(fdp, fd, FREAD)) == NULL) 117 return (EBADF); 118 119 /* dofilereadv() will FRELE the descriptor for us */ 120 return (dofilereadv(p, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt), 1, 121 &fp->f_offset, retval)); 122 } 123 124 int 125 dofilereadv(struct proc *p, int fd, struct file *fp, const struct iovec *iovp, 126 int iovcnt, int userspace, off_t *offset, register_t *retval) 127 { 128 struct iovec aiov[UIO_SMALLIOV]; 129 struct uio auio; 130 struct iovec *iov; 131 struct iovec *needfree = NULL; 132 long i, cnt, error = 0; 133 u_int iovlen; 134 #ifdef KTRACE 135 struct iovec *ktriov = NULL; 136 #endif 137 138 /* note: can't use iovlen until iovcnt is validated */ 139 iovlen = iovcnt * sizeof(struct iovec); 140 141 /* 142 * If the iovec array exists in userspace, it needs to be copied in; 143 * otherwise, it can be used directly. 144 */ 145 if (userspace) { 146 if ((u_int)iovcnt > UIO_SMALLIOV) { 147 if ((u_int)iovcnt > IOV_MAX) { 148 error = EINVAL; 149 goto out; 150 } 151 iov = needfree = malloc(iovlen, M_IOV, M_WAITOK); 152 } else if ((u_int)iovcnt > 0) { 153 iov = aiov; 154 needfree = NULL; 155 } else { 156 error = EINVAL; 157 goto out; 158 } 159 if ((error = copyin(iovp, iov, iovlen))) 160 goto done; 161 #ifdef KTRACE 162 if (KTRPOINT(p, KTR_STRUCT)) 163 ktriovec(p, iov, iovcnt); 164 #endif 165 } else { 166 iov = (struct iovec *)iovp; /* de-constify */ 167 } 168 169 auio.uio_iov = iov; 170 auio.uio_iovcnt = iovcnt; 171 auio.uio_rw = UIO_READ; 172 auio.uio_segflg = UIO_USERSPACE; 173 auio.uio_procp = p; 174 auio.uio_resid = 0; 175 for (i = 0; i < iovcnt; i++) { 176 auio.uio_resid += iov->iov_len; 177 /* 178 * Reads return ssize_t because -1 is returned on error. 179 * Therefore we must restrict the length to SSIZE_MAX to 180 * avoid garbage return values. Note that the addition is 181 * guaranteed to not wrap because SSIZE_MAX * 2 < SIZE_MAX. 182 */ 183 if (iov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) { 184 error = EINVAL; 185 goto done; 186 } 187 iov++; 188 } 189 #ifdef KTRACE 190 /* 191 * if tracing, save a copy of iovec 192 */ 193 if (KTRPOINT(p, KTR_GENIO)) { 194 ktriov = malloc(iovlen, M_TEMP, M_WAITOK); 195 memcpy(ktriov, auio.uio_iov, iovlen); 196 } 197 #endif 198 cnt = auio.uio_resid; 199 error = (*fp->f_ops->fo_read)(fp, offset, &auio, fp->f_cred); 200 if (error) 201 if (auio.uio_resid != cnt && (error == ERESTART || 202 error == EINTR || error == EWOULDBLOCK)) 203 error = 0; 204 cnt -= auio.uio_resid; 205 206 fp->f_rxfer++; 207 fp->f_rbytes += cnt; 208 #ifdef KTRACE 209 if (ktriov != NULL) { 210 if (error == 0) 211 ktrgenio(p, fd, UIO_READ, ktriov, cnt); 212 free(ktriov, M_TEMP, iovlen); 213 } 214 #endif 215 *retval = cnt; 216 done: 217 if (needfree) 218 free(needfree, M_IOV, iovlen); 219 out: 220 FRELE(fp, p); 221 return (error); 222 } 223 224 /* 225 * Write system call 226 */ 227 int 228 sys_write(struct proc *p, void *v, register_t *retval) 229 { 230 struct sys_write_args /* { 231 syscallarg(int) fd; 232 syscallarg(const void *) buf; 233 syscallarg(size_t) nbyte; 234 } */ *uap = v; 235 struct iovec iov; 236 int fd = SCARG(uap, fd); 237 struct file *fp; 238 struct filedesc *fdp = p->p_fd; 239 240 if ((fp = fd_getfile_mode(fdp, fd, FWRITE)) == NULL) 241 return (EBADF); 242 243 iov.iov_base = (void *)SCARG(uap, buf); 244 iov.iov_len = SCARG(uap, nbyte); 245 246 /* dofilewritev() will FRELE the descriptor for us */ 247 return (dofilewritev(p, fd, fp, &iov, 1, 0, &fp->f_offset, retval)); 248 } 249 250 /* 251 * Gather write system call 252 */ 253 int 254 sys_writev(struct proc *p, void *v, register_t *retval) 255 { 256 struct sys_writev_args /* { 257 syscallarg(int) fd; 258 syscallarg(const struct iovec *) iovp; 259 syscallarg(int) iovcnt; 260 } */ *uap = v; 261 int fd = SCARG(uap, fd); 262 struct file *fp; 263 struct filedesc *fdp = p->p_fd; 264 265 if ((fp = fd_getfile_mode(fdp, fd, FWRITE)) == NULL) 266 return (EBADF); 267 268 /* dofilewritev() will FRELE the descriptor for us */ 269 return (dofilewritev(p, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt), 1, 270 &fp->f_offset, retval)); 271 } 272 273 int 274 dofilewritev(struct proc *p, int fd, struct file *fp, const struct iovec *iovp, 275 int iovcnt, int userspace, off_t *offset, register_t *retval) 276 { 277 struct iovec aiov[UIO_SMALLIOV]; 278 struct uio auio; 279 struct iovec *iov; 280 struct iovec *needfree = NULL; 281 long i, cnt, error = 0; 282 u_int iovlen; 283 #ifdef KTRACE 284 struct iovec *ktriov = NULL; 285 #endif 286 287 /* note: can't use iovlen until iovcnt is validated */ 288 iovlen = iovcnt * sizeof(struct iovec); 289 290 /* 291 * If the iovec array exists in userspace, it needs to be copied in; 292 * otherwise, it can be used directly. 293 */ 294 if (userspace) { 295 if ((u_int)iovcnt > UIO_SMALLIOV) { 296 if ((u_int)iovcnt > IOV_MAX) { 297 error = EINVAL; 298 goto out; 299 } 300 iov = needfree = malloc(iovlen, M_IOV, M_WAITOK); 301 } else if ((u_int)iovcnt > 0) { 302 iov = aiov; 303 needfree = NULL; 304 } else { 305 error = EINVAL; 306 goto out; 307 } 308 if ((error = copyin(iovp, iov, iovlen))) 309 goto done; 310 #ifdef KTRACE 311 if (KTRPOINT(p, KTR_STRUCT)) 312 ktriovec(p, iov, iovcnt); 313 #endif 314 } else { 315 iov = (struct iovec *)iovp; /* de-constify */ 316 } 317 318 auio.uio_iov = iov; 319 auio.uio_iovcnt = iovcnt; 320 auio.uio_rw = UIO_WRITE; 321 auio.uio_segflg = UIO_USERSPACE; 322 auio.uio_procp = p; 323 auio.uio_resid = 0; 324 for (i = 0; i < iovcnt; i++) { 325 auio.uio_resid += iov->iov_len; 326 /* 327 * Writes return ssize_t because -1 is returned on error. 328 * Therefore we must restrict the length to SSIZE_MAX to 329 * avoid garbage return values. Note that the addition is 330 * guaranteed to not wrap because SSIZE_MAX * 2 < SIZE_MAX. 331 */ 332 if (iov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) { 333 error = EINVAL; 334 goto done; 335 } 336 iov++; 337 } 338 #ifdef KTRACE 339 /* 340 * if tracing, save a copy of iovec 341 */ 342 if (KTRPOINT(p, KTR_GENIO)) { 343 ktriov = malloc(iovlen, M_TEMP, M_WAITOK); 344 memcpy(ktriov, auio.uio_iov, iovlen); 345 } 346 #endif 347 cnt = auio.uio_resid; 348 error = (*fp->f_ops->fo_write)(fp, offset, &auio, fp->f_cred); 349 if (error) { 350 if (auio.uio_resid != cnt && (error == ERESTART || 351 error == EINTR || error == EWOULDBLOCK)) 352 error = 0; 353 if (error == EPIPE) 354 ptsignal(p, SIGPIPE, STHREAD); 355 } 356 cnt -= auio.uio_resid; 357 358 fp->f_wxfer++; 359 fp->f_wbytes += cnt; 360 #ifdef KTRACE 361 if (ktriov != NULL) { 362 if (error == 0) 363 ktrgenio(p, fd, UIO_WRITE, ktriov, cnt); 364 free(ktriov, M_TEMP, iovlen); 365 } 366 #endif 367 *retval = cnt; 368 done: 369 if (needfree) 370 free(needfree, M_IOV, iovlen); 371 out: 372 FRELE(fp, p); 373 return (error); 374 } 375 376 /* 377 * Ioctl system call 378 */ 379 int 380 sys_ioctl(struct proc *p, void *v, register_t *retval) 381 { 382 struct sys_ioctl_args /* { 383 syscallarg(int) fd; 384 syscallarg(u_long) com; 385 syscallarg(void *) data; 386 } */ *uap = v; 387 struct file *fp; 388 struct filedesc *fdp; 389 u_long com = SCARG(uap, com); 390 int error = 0; 391 u_int size; 392 caddr_t data, memp = NULL; 393 int tmp; 394 #define STK_PARAMS 128 395 long long stkbuf[STK_PARAMS / sizeof(long long)]; 396 397 fdp = p->p_fd; 398 if ((fp = fd_getfile_mode(fdp, SCARG(uap, fd), FREAD|FWRITE)) == NULL) 399 return (EBADF); 400 401 if (fp->f_type == DTYPE_SOCKET) { 402 struct socket *so = fp->f_data; 403 404 if (so->so_state & SS_DNS) { 405 error = EINVAL; 406 goto out; 407 } 408 } 409 410 error = pledge_ioctl(p, com, fp); 411 if (error) 412 goto out; 413 414 switch (com) { 415 case FIONCLEX: 416 case FIOCLEX: 417 fdplock(fdp); 418 if (com == FIONCLEX) 419 fdp->fd_ofileflags[SCARG(uap, fd)] &= ~UF_EXCLOSE; 420 else 421 fdp->fd_ofileflags[SCARG(uap, fd)] |= UF_EXCLOSE; 422 fdpunlock(fdp); 423 goto out; 424 } 425 426 /* 427 * Interpret high order word to find amount of data to be 428 * copied to/from the user's address space. 429 */ 430 size = IOCPARM_LEN(com); 431 if (size > IOCPARM_MAX) { 432 error = ENOTTY; 433 goto out; 434 } 435 if (size > sizeof (stkbuf)) { 436 memp = malloc(size, M_IOCTLOPS, M_WAITOK); 437 data = memp; 438 } else 439 data = (caddr_t)stkbuf; 440 if (com&IOC_IN) { 441 if (size) { 442 error = copyin(SCARG(uap, data), data, size); 443 if (error) { 444 goto out; 445 } 446 } else 447 *(caddr_t *)data = SCARG(uap, data); 448 } else if ((com&IOC_OUT) && size) 449 /* 450 * Zero the buffer so the user always 451 * gets back something deterministic. 452 */ 453 memset(data, 0, size); 454 else if (com&IOC_VOID) 455 *(caddr_t *)data = SCARG(uap, data); 456 457 switch (com) { 458 459 case FIONBIO: 460 if ((tmp = *(int *)data) != 0) 461 fp->f_flag |= FNONBLOCK; 462 else 463 fp->f_flag &= ~FNONBLOCK; 464 error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&tmp, p); 465 break; 466 467 case FIOASYNC: 468 if ((tmp = *(int *)data) != 0) 469 fp->f_flag |= FASYNC; 470 else 471 fp->f_flag &= ~FASYNC; 472 error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, (caddr_t)&tmp, p); 473 break; 474 475 case FIOSETOWN: 476 tmp = *(int *)data; 477 if (fp->f_type == DTYPE_SOCKET) { 478 struct socket *so = fp->f_data; 479 480 so->so_pgid = tmp; 481 so->so_siguid = p->p_ucred->cr_ruid; 482 so->so_sigeuid = p->p_ucred->cr_uid; 483 error = 0; 484 break; 485 } 486 if (tmp <= 0) { 487 tmp = -tmp; 488 } else { 489 struct process *pr = prfind(tmp); 490 if (pr == NULL) { 491 error = ESRCH; 492 break; 493 } 494 tmp = pr->ps_pgrp->pg_id; 495 } 496 error = (*fp->f_ops->fo_ioctl) 497 (fp, TIOCSPGRP, (caddr_t)&tmp, p); 498 break; 499 500 case FIOGETOWN: 501 if (fp->f_type == DTYPE_SOCKET) { 502 error = 0; 503 *(int *)data = ((struct socket *)fp->f_data)->so_pgid; 504 break; 505 } 506 error = (*fp->f_ops->fo_ioctl)(fp, TIOCGPGRP, data, p); 507 *(int *)data = -*(int *)data; 508 break; 509 510 default: 511 error = (*fp->f_ops->fo_ioctl)(fp, com, data, p); 512 break; 513 } 514 /* 515 * Copy any data to user, size was 516 * already set and checked above. 517 */ 518 if (error == 0 && (com&IOC_OUT) && size) 519 error = copyout(data, SCARG(uap, data), size); 520 out: 521 FRELE(fp, p); 522 free(memp, M_IOCTLOPS, size); 523 return (error); 524 } 525 526 int selwait, nselcoll; 527 528 /* 529 * Select system call. 530 */ 531 int 532 sys_select(struct proc *p, void *v, register_t *retval) 533 { 534 struct sys_select_args /* { 535 syscallarg(int) nd; 536 syscallarg(fd_set *) in; 537 syscallarg(fd_set *) ou; 538 syscallarg(fd_set *) ex; 539 syscallarg(struct timeval *) tv; 540 } */ *uap = v; 541 542 struct timespec ts, *tsp = NULL; 543 int error; 544 545 if (SCARG(uap, tv) != NULL) { 546 struct timeval tv; 547 if ((error = copyin(SCARG(uap, tv), &tv, sizeof tv)) != 0) 548 return (error); 549 if ((error = itimerfix(&tv)) != 0) 550 return (error); 551 #ifdef KTRACE 552 if (KTRPOINT(p, KTR_STRUCT)) 553 ktrreltimeval(p, &tv); 554 #endif 555 TIMEVAL_TO_TIMESPEC(&tv, &ts); 556 tsp = &ts; 557 } 558 559 return (dopselect(p, SCARG(uap, nd), SCARG(uap, in), SCARG(uap, ou), 560 SCARG(uap, ex), tsp, NULL, retval)); 561 } 562 563 int 564 sys_pselect(struct proc *p, void *v, register_t *retval) 565 { 566 struct sys_pselect_args /* { 567 syscallarg(int) nd; 568 syscallarg(fd_set *) in; 569 syscallarg(fd_set *) ou; 570 syscallarg(fd_set *) ex; 571 syscallarg(const struct timespec *) ts; 572 syscallarg(const sigset_t *) mask; 573 } */ *uap = v; 574 575 struct timespec ts, *tsp = NULL; 576 sigset_t ss, *ssp = NULL; 577 int error; 578 579 if (SCARG(uap, ts) != NULL) { 580 if ((error = copyin(SCARG(uap, ts), &ts, sizeof ts)) != 0) 581 return (error); 582 if ((error = timespecfix(&ts)) != 0) 583 return (error); 584 #ifdef KTRACE 585 if (KTRPOINT(p, KTR_STRUCT)) 586 ktrreltimespec(p, &ts); 587 #endif 588 tsp = &ts; 589 } 590 if (SCARG(uap, mask) != NULL) { 591 if ((error = copyin(SCARG(uap, mask), &ss, sizeof ss)) != 0) 592 return (error); 593 ssp = &ss; 594 } 595 596 return (dopselect(p, SCARG(uap, nd), SCARG(uap, in), SCARG(uap, ou), 597 SCARG(uap, ex), tsp, ssp, retval)); 598 } 599 600 int 601 dopselect(struct proc *p, int nd, fd_set *in, fd_set *ou, fd_set *ex, 602 const struct timespec *tsp, const sigset_t *sigmask, register_t *retval) 603 { 604 fd_mask bits[6]; 605 fd_set *pibits[3], *pobits[3]; 606 struct timespec ats, rts, tts; 607 int s, ncoll, error = 0, timo; 608 u_int ni; 609 610 if (nd < 0) 611 return (EINVAL); 612 if (nd > p->p_fd->fd_nfiles) { 613 /* forgiving; slightly wrong */ 614 nd = p->p_fd->fd_nfiles; 615 } 616 ni = howmany(nd, NFDBITS) * sizeof(fd_mask); 617 if (ni > sizeof(bits[0])) { 618 caddr_t mbits; 619 620 mbits = mallocarray(6, ni, M_TEMP, M_WAITOK|M_ZERO); 621 pibits[0] = (fd_set *)&mbits[ni * 0]; 622 pibits[1] = (fd_set *)&mbits[ni * 1]; 623 pibits[2] = (fd_set *)&mbits[ni * 2]; 624 pobits[0] = (fd_set *)&mbits[ni * 3]; 625 pobits[1] = (fd_set *)&mbits[ni * 4]; 626 pobits[2] = (fd_set *)&mbits[ni * 5]; 627 } else { 628 memset(bits, 0, sizeof(bits)); 629 pibits[0] = (fd_set *)&bits[0]; 630 pibits[1] = (fd_set *)&bits[1]; 631 pibits[2] = (fd_set *)&bits[2]; 632 pobits[0] = (fd_set *)&bits[3]; 633 pobits[1] = (fd_set *)&bits[4]; 634 pobits[2] = (fd_set *)&bits[5]; 635 } 636 637 #define getbits(name, x) \ 638 if (name && (error = copyin(name, pibits[x], ni))) \ 639 goto done; 640 getbits(in, 0); 641 getbits(ou, 1); 642 getbits(ex, 2); 643 #undef getbits 644 #ifdef KTRACE 645 if (ni > 0 && KTRPOINT(p, KTR_STRUCT)) { 646 if (in) ktrfdset(p, pibits[0], ni); 647 if (ou) ktrfdset(p, pibits[1], ni); 648 if (ex) ktrfdset(p, pibits[2], ni); 649 } 650 #endif 651 652 if (tsp) { 653 getnanouptime(&rts); 654 timespecadd(tsp, &rts, &ats); 655 } else { 656 ats.tv_sec = 0; 657 ats.tv_nsec = 0; 658 } 659 timo = 0; 660 661 if (sigmask) 662 dosigsuspend(p, *sigmask &~ sigcantmask); 663 664 retry: 665 ncoll = nselcoll; 666 atomic_setbits_int(&p->p_flag, P_SELECT); 667 error = selscan(p, pibits[0], pobits[0], nd, ni, retval); 668 if (error || *retval) 669 goto done; 670 if (tsp) { 671 getnanouptime(&rts); 672 if (timespeccmp(&rts, &ats, >=)) 673 goto done; 674 timespecsub(&ats, &rts, &tts); 675 timo = tts.tv_sec > 24 * 60 * 60 ? 676 24 * 60 * 60 * hz : tstohz(&tts); 677 } 678 s = splhigh(); 679 if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) { 680 splx(s); 681 goto retry; 682 } 683 atomic_clearbits_int(&p->p_flag, P_SELECT); 684 error = tsleep(&selwait, PSOCK | PCATCH, "select", timo); 685 splx(s); 686 if (error == 0) 687 goto retry; 688 done: 689 atomic_clearbits_int(&p->p_flag, P_SELECT); 690 /* select is not restarted after signals... */ 691 if (error == ERESTART) 692 error = EINTR; 693 if (error == EWOULDBLOCK) 694 error = 0; 695 #define putbits(name, x) \ 696 if (name && (error2 = copyout(pobits[x], name, ni))) \ 697 error = error2; 698 if (error == 0) { 699 int error2; 700 701 putbits(in, 0); 702 putbits(ou, 1); 703 putbits(ex, 2); 704 #undef putbits 705 #ifdef KTRACE 706 if (ni > 0 && KTRPOINT(p, KTR_STRUCT)) { 707 if (in) ktrfdset(p, pobits[0], ni); 708 if (ou) ktrfdset(p, pobits[1], ni); 709 if (ex) ktrfdset(p, pobits[2], ni); 710 } 711 #endif 712 } 713 714 if (pibits[0] != (fd_set *)&bits[0]) 715 free(pibits[0], M_TEMP, 6 * ni); 716 return (error); 717 } 718 719 int 720 selscan(struct proc *p, fd_set *ibits, fd_set *obits, int nfd, int ni, 721 register_t *retval) 722 { 723 caddr_t cibits = (caddr_t)ibits, cobits = (caddr_t)obits; 724 struct filedesc *fdp = p->p_fd; 725 int msk, i, j, fd; 726 fd_mask bits; 727 struct file *fp; 728 int n = 0; 729 static const int flag[3] = { POLLIN, POLLOUT|POLL_NOHUP, POLLPRI }; 730 731 for (msk = 0; msk < 3; msk++) { 732 fd_set *pibits = (fd_set *)&cibits[msk*ni]; 733 fd_set *pobits = (fd_set *)&cobits[msk*ni]; 734 735 for (i = 0; i < nfd; i += NFDBITS) { 736 bits = pibits->fds_bits[i/NFDBITS]; 737 while ((j = ffs(bits)) && (fd = i + --j) < nfd) { 738 bits &= ~(1 << j); 739 if ((fp = fd_getfile(fdp, fd)) == NULL) 740 return (EBADF); 741 if ((*fp->f_ops->fo_poll)(fp, flag[msk], p)) { 742 FD_SET(fd, pobits); 743 n++; 744 } 745 FRELE(fp, p); 746 } 747 } 748 } 749 *retval = n; 750 return (0); 751 } 752 753 int 754 seltrue(dev_t dev, int events, struct proc *p) 755 { 756 757 return (events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM)); 758 } 759 760 int 761 selfalse(dev_t dev, int events, struct proc *p) 762 { 763 764 return (0); 765 } 766 767 /* 768 * Record a select request. 769 */ 770 void 771 selrecord(struct proc *selector, struct selinfo *sip) 772 { 773 struct proc *p; 774 pid_t mytid; 775 776 mytid = selector->p_tid; 777 if (sip->si_seltid == mytid) 778 return; 779 if (sip->si_seltid && (p = tfind(sip->si_seltid)) && 780 p->p_wchan == (caddr_t)&selwait) 781 sip->si_flags |= SI_COLL; 782 else 783 sip->si_seltid = mytid; 784 } 785 786 /* 787 * Do a wakeup when a selectable event occurs. 788 */ 789 void 790 selwakeup(struct selinfo *sip) 791 { 792 struct proc *p; 793 int s; 794 795 KNOTE(&sip->si_note, NOTE_SUBMIT); 796 if (sip->si_seltid == 0) 797 return; 798 if (sip->si_flags & SI_COLL) { 799 nselcoll++; 800 sip->si_flags &= ~SI_COLL; 801 wakeup(&selwait); 802 } 803 p = tfind(sip->si_seltid); 804 sip->si_seltid = 0; 805 if (p != NULL) { 806 SCHED_LOCK(s); 807 if (p->p_wchan == (caddr_t)&selwait) { 808 if (p->p_stat == SSLEEP) 809 setrunnable(p); 810 else 811 unsleep(p); 812 } else if (p->p_flag & P_SELECT) 813 atomic_clearbits_int(&p->p_flag, P_SELECT); 814 SCHED_UNLOCK(s); 815 } 816 } 817 818 void 819 pollscan(struct proc *p, struct pollfd *pl, u_int nfd, register_t *retval) 820 { 821 struct filedesc *fdp = p->p_fd; 822 struct file *fp; 823 u_int i; 824 int n = 0; 825 826 for (i = 0; i < nfd; i++, pl++) { 827 /* Check the file descriptor. */ 828 if (pl->fd < 0) { 829 pl->revents = 0; 830 continue; 831 } 832 if ((fp = fd_getfile(fdp, pl->fd)) == NULL) { 833 pl->revents = POLLNVAL; 834 n++; 835 continue; 836 } 837 pl->revents = (*fp->f_ops->fo_poll)(fp, pl->events, p); 838 FRELE(fp, p); 839 if (pl->revents != 0) 840 n++; 841 } 842 *retval = n; 843 } 844 845 /* 846 * Only copyout the revents field. 847 */ 848 int 849 pollout(struct pollfd *pl, struct pollfd *upl, u_int nfds) 850 { 851 int error = 0; 852 u_int i = 0; 853 854 while (!error && i++ < nfds) { 855 error = copyout(&pl->revents, &upl->revents, 856 sizeof(upl->revents)); 857 pl++; 858 upl++; 859 } 860 861 return (error); 862 } 863 864 /* 865 * We are using the same mechanism as select only we encode/decode args 866 * differently. 867 */ 868 int 869 sys_poll(struct proc *p, void *v, register_t *retval) 870 { 871 struct sys_poll_args /* { 872 syscallarg(struct pollfd *) fds; 873 syscallarg(u_int) nfds; 874 syscallarg(int) timeout; 875 } */ *uap = v; 876 877 struct timespec ts, *tsp = NULL; 878 int msec = SCARG(uap, timeout); 879 880 if (msec != INFTIM) { 881 if (msec < 0) 882 return (EINVAL); 883 ts.tv_sec = msec / 1000; 884 ts.tv_nsec = (msec - (ts.tv_sec * 1000)) * 1000000; 885 tsp = &ts; 886 } 887 888 return (doppoll(p, SCARG(uap, fds), SCARG(uap, nfds), tsp, NULL, 889 retval)); 890 } 891 892 int 893 sys_ppoll(struct proc *p, void *v, register_t *retval) 894 { 895 struct sys_ppoll_args /* { 896 syscallarg(struct pollfd *) fds; 897 syscallarg(u_int) nfds; 898 syscallarg(const struct timespec *) ts; 899 syscallarg(const sigset_t *) mask; 900 } */ *uap = v; 901 902 int error; 903 struct timespec ts, *tsp = NULL; 904 sigset_t ss, *ssp = NULL; 905 906 if (SCARG(uap, ts) != NULL) { 907 if ((error = copyin(SCARG(uap, ts), &ts, sizeof ts)) != 0) 908 return (error); 909 if ((error = timespecfix(&ts)) != 0) 910 return (error); 911 #ifdef KTRACE 912 if (KTRPOINT(p, KTR_STRUCT)) 913 ktrreltimespec(p, &ts); 914 #endif 915 tsp = &ts; 916 } 917 918 if (SCARG(uap, mask) != NULL) { 919 if ((error = copyin(SCARG(uap, mask), &ss, sizeof ss)) != 0) 920 return (error); 921 ssp = &ss; 922 } 923 924 return (doppoll(p, SCARG(uap, fds), SCARG(uap, nfds), tsp, ssp, 925 retval)); 926 } 927 928 int 929 doppoll(struct proc *p, struct pollfd *fds, u_int nfds, 930 const struct timespec *tsp, const sigset_t *sigmask, register_t *retval) 931 { 932 size_t sz; 933 struct pollfd pfds[4], *pl = pfds; 934 struct timespec ats, rts, tts; 935 int timo, ncoll, i, s, error; 936 937 /* Standards say no more than MAX_OPEN; this is possibly better. */ 938 if (nfds > min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfiles)) 939 return (EINVAL); 940 941 /* optimize for the default case, of a small nfds value */ 942 if (nfds > nitems(pfds)) { 943 pl = mallocarray(nfds, sizeof(*pl), M_TEMP, 944 M_WAITOK | M_CANFAIL); 945 if (pl == NULL) 946 return (EINVAL); 947 } 948 949 sz = nfds * sizeof(*pl); 950 951 if ((error = copyin(fds, pl, sz)) != 0) 952 goto bad; 953 954 for (i = 0; i < nfds; i++) { 955 pl[i].events &= ~POLL_NOHUP; 956 pl[i].revents = 0; 957 } 958 959 if (tsp != NULL) { 960 getnanouptime(&rts); 961 timespecadd(tsp, &rts, &ats); 962 } else { 963 ats.tv_sec = 0; 964 ats.tv_nsec = 0; 965 } 966 timo = 0; 967 968 if (sigmask) 969 dosigsuspend(p, *sigmask &~ sigcantmask); 970 971 retry: 972 ncoll = nselcoll; 973 atomic_setbits_int(&p->p_flag, P_SELECT); 974 pollscan(p, pl, nfds, retval); 975 if (*retval) 976 goto done; 977 if (tsp != NULL) { 978 getnanouptime(&rts); 979 if (timespeccmp(&rts, &ats, >=)) 980 goto done; 981 timespecsub(&ats, &rts, &tts); 982 timo = tts.tv_sec > 24 * 60 * 60 ? 983 24 * 60 * 60 * hz : tstohz(&tts); 984 } 985 s = splhigh(); 986 if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) { 987 splx(s); 988 goto retry; 989 } 990 atomic_clearbits_int(&p->p_flag, P_SELECT); 991 error = tsleep(&selwait, PSOCK | PCATCH, "poll", timo); 992 splx(s); 993 if (error == 0) 994 goto retry; 995 996 done: 997 atomic_clearbits_int(&p->p_flag, P_SELECT); 998 /* 999 * NOTE: poll(2) is not restarted after a signal and EWOULDBLOCK is 1000 * ignored (since the whole point is to see what would block). 1001 */ 1002 switch (error) { 1003 case ERESTART: 1004 error = pollout(pl, fds, nfds); 1005 if (error == 0) 1006 error = EINTR; 1007 break; 1008 case EWOULDBLOCK: 1009 case 0: 1010 error = pollout(pl, fds, nfds); 1011 break; 1012 } 1013 #ifdef KTRACE 1014 if (KTRPOINT(p, KTR_STRUCT)) 1015 ktrpollfd(p, pl, nfds); 1016 #endif /* KTRACE */ 1017 bad: 1018 if (pl != pfds) 1019 free(pl, M_TEMP, sz); 1020 return (error); 1021 } 1022 1023 /* 1024 * utrace system call 1025 */ 1026 int 1027 sys_utrace(struct proc *curp, void *v, register_t *retval) 1028 { 1029 #ifdef KTRACE 1030 struct sys_utrace_args /* { 1031 syscallarg(const char *) label; 1032 syscallarg(const void *) addr; 1033 syscallarg(size_t) len; 1034 } */ *uap = v; 1035 return (ktruser(curp, SCARG(uap, label), SCARG(uap, addr), 1036 SCARG(uap, len))); 1037 #else 1038 return (0); 1039 #endif 1040 } 1041