1 /* $OpenBSD: sys_generic.c,v 1.116 2018/01/02 06:38:45 guenther Exp $ */ 2 /* $NetBSD: sys_generic.c,v 1.24 1996/03/29 00:25:32 cgd Exp $ */ 3 4 /* 5 * Copyright (c) 1996 Theo de Raadt 6 * Copyright (c) 1982, 1986, 1989, 1993 7 * The Regents of the University of California. All rights reserved. 8 * (c) UNIX System Laboratories, Inc. 9 * All or some portions of this file are derived from material licensed 10 * to the University of California by American Telephone and Telegraph 11 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 12 * the permission of UNIX System Laboratories, Inc. 13 * 14 * Redistribution and use in source and binary forms, with or without 15 * modification, are permitted provided that the following conditions 16 * are met: 17 * 1. Redistributions of source code must retain the above copyright 18 * notice, this list of conditions and the following disclaimer. 19 * 2. Redistributions in binary form must reproduce the above copyright 20 * notice, this list of conditions and the following disclaimer in the 21 * documentation and/or other materials provided with the distribution. 22 * 3. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)sys_generic.c 8.5 (Berkeley) 1/21/94 39 */ 40 41 #include <sys/param.h> 42 #include <sys/systm.h> 43 #include <sys/filedesc.h> 44 #include <sys/ioctl.h> 45 #include <sys/fcntl.h> 46 #include <sys/file.h> 47 #include <sys/proc.h> 48 #include <sys/resourcevar.h> 49 #include <sys/socketvar.h> 50 #include <sys/signalvar.h> 51 #include <sys/uio.h> 52 #include <sys/kernel.h> 53 #include <sys/stat.h> 54 #include <sys/malloc.h> 55 #include <sys/poll.h> 56 #ifdef KTRACE 57 #include <sys/ktrace.h> 58 #endif 59 #include <sys/sched.h> 60 #include <sys/pledge.h> 61 62 #include <sys/mount.h> 63 #include <sys/syscallargs.h> 64 65 #include <uvm/uvm_extern.h> 66 67 int selscan(struct proc *, fd_set *, fd_set *, int, int, register_t *); 68 void pollscan(struct proc *, struct pollfd *, u_int, register_t *); 69 int pollout(struct pollfd *, struct pollfd *, u_int); 70 int dopselect(struct proc *, int, fd_set *, fd_set *, fd_set *, 71 const struct timespec *, const sigset_t *, register_t *); 72 int doppoll(struct proc *, struct pollfd *, u_int, const struct timespec *, 73 const sigset_t *, register_t *); 74 75 /* 76 * Read system call. 77 */ 78 int 79 sys_read(struct proc *p, void *v, register_t *retval) 80 { 81 struct sys_read_args /* { 82 syscallarg(int) fd; 83 syscallarg(void *) buf; 84 syscallarg(size_t) nbyte; 85 } */ *uap = v; 86 struct iovec iov; 87 int fd = SCARG(uap, fd); 88 struct file *fp; 89 struct filedesc *fdp = p->p_fd; 90 91 if ((fp = fd_getfile_mode(fdp, fd, FREAD)) == NULL) 92 return (EBADF); 93 94 iov.iov_base = SCARG(uap, buf); 95 iov.iov_len = SCARG(uap, nbyte); 96 97 FREF(fp); 98 99 /* dofilereadv() will FRELE the descriptor for us */ 100 return (dofilereadv(p, fd, fp, &iov, 1, 0, &fp->f_offset, retval)); 101 } 102 103 /* 104 * Scatter read system call. 105 */ 106 int 107 sys_readv(struct proc *p, void *v, register_t *retval) 108 { 109 struct sys_readv_args /* { 110 syscallarg(int) fd; 111 syscallarg(const struct iovec *) iovp; 112 syscallarg(int) iovcnt; 113 } */ *uap = v; 114 int fd = SCARG(uap, fd); 115 struct file *fp; 116 struct filedesc *fdp = p->p_fd; 117 118 if ((fp = fd_getfile_mode(fdp, fd, FREAD)) == NULL) 119 return (EBADF); 120 FREF(fp); 121 122 /* dofilereadv() will FRELE the descriptor for us */ 123 return (dofilereadv(p, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt), 1, 124 &fp->f_offset, retval)); 125 } 126 127 int 128 dofilereadv(struct proc *p, int fd, struct file *fp, const struct iovec *iovp, 129 int iovcnt, int userspace, off_t *offset, register_t *retval) 130 { 131 struct iovec aiov[UIO_SMALLIOV]; 132 struct uio auio; 133 struct iovec *iov; 134 struct iovec *needfree = NULL; 135 long i, cnt, error = 0; 136 u_int iovlen; 137 #ifdef KTRACE 138 struct iovec *ktriov = NULL; 139 #endif 140 141 /* note: can't use iovlen until iovcnt is validated */ 142 iovlen = iovcnt * sizeof(struct iovec); 143 144 /* 145 * If the iovec array exists in userspace, it needs to be copied in; 146 * otherwise, it can be used directly. 147 */ 148 if (userspace) { 149 if ((u_int)iovcnt > UIO_SMALLIOV) { 150 if ((u_int)iovcnt > IOV_MAX) { 151 error = EINVAL; 152 goto out; 153 } 154 iov = needfree = malloc(iovlen, M_IOV, M_WAITOK); 155 } else if ((u_int)iovcnt > 0) { 156 iov = aiov; 157 needfree = NULL; 158 } else { 159 error = EINVAL; 160 goto out; 161 } 162 if ((error = copyin(iovp, iov, iovlen))) 163 goto done; 164 #ifdef KTRACE 165 if (KTRPOINT(p, KTR_STRUCT)) 166 ktriovec(p, iov, iovcnt); 167 #endif 168 } else { 169 iov = (struct iovec *)iovp; /* de-constify */ 170 } 171 172 auio.uio_iov = iov; 173 auio.uio_iovcnt = iovcnt; 174 auio.uio_rw = UIO_READ; 175 auio.uio_segflg = UIO_USERSPACE; 176 auio.uio_procp = p; 177 auio.uio_resid = 0; 178 for (i = 0; i < iovcnt; i++) { 179 auio.uio_resid += iov->iov_len; 180 /* 181 * Reads return ssize_t because -1 is returned on error. 182 * Therefore we must restrict the length to SSIZE_MAX to 183 * avoid garbage return values. Note that the addition is 184 * guaranteed to not wrap because SSIZE_MAX * 2 < SIZE_MAX. 185 */ 186 if (iov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) { 187 error = EINVAL; 188 goto done; 189 } 190 iov++; 191 } 192 #ifdef KTRACE 193 /* 194 * if tracing, save a copy of iovec 195 */ 196 if (KTRPOINT(p, KTR_GENIO)) { 197 ktriov = malloc(iovlen, M_TEMP, M_WAITOK); 198 memcpy(ktriov, auio.uio_iov, iovlen); 199 } 200 #endif 201 cnt = auio.uio_resid; 202 error = (*fp->f_ops->fo_read)(fp, offset, &auio, fp->f_cred); 203 if (error) 204 if (auio.uio_resid != cnt && (error == ERESTART || 205 error == EINTR || error == EWOULDBLOCK)) 206 error = 0; 207 cnt -= auio.uio_resid; 208 209 fp->f_rxfer++; 210 fp->f_rbytes += cnt; 211 #ifdef KTRACE 212 if (ktriov != NULL) { 213 if (error == 0) 214 ktrgenio(p, fd, UIO_READ, ktriov, cnt); 215 free(ktriov, M_TEMP, iovlen); 216 } 217 #endif 218 *retval = cnt; 219 done: 220 if (needfree) 221 free(needfree, M_IOV, iovlen); 222 out: 223 FRELE(fp, p); 224 return (error); 225 } 226 227 /* 228 * Write system call 229 */ 230 int 231 sys_write(struct proc *p, void *v, register_t *retval) 232 { 233 struct sys_write_args /* { 234 syscallarg(int) fd; 235 syscallarg(const void *) buf; 236 syscallarg(size_t) nbyte; 237 } */ *uap = v; 238 struct iovec iov; 239 int fd = SCARG(uap, fd); 240 struct file *fp; 241 struct filedesc *fdp = p->p_fd; 242 243 if ((fp = fd_getfile_mode(fdp, fd, FWRITE)) == NULL) 244 return (EBADF); 245 246 iov.iov_base = (void *)SCARG(uap, buf); 247 iov.iov_len = SCARG(uap, nbyte); 248 249 FREF(fp); 250 251 /* dofilewritev() will FRELE the descriptor for us */ 252 return (dofilewritev(p, fd, fp, &iov, 1, 0, &fp->f_offset, retval)); 253 } 254 255 /* 256 * Gather write system call 257 */ 258 int 259 sys_writev(struct proc *p, void *v, register_t *retval) 260 { 261 struct sys_writev_args /* { 262 syscallarg(int) fd; 263 syscallarg(const struct iovec *) iovp; 264 syscallarg(int) iovcnt; 265 } */ *uap = v; 266 int fd = SCARG(uap, fd); 267 struct file *fp; 268 struct filedesc *fdp = p->p_fd; 269 270 if ((fp = fd_getfile_mode(fdp, fd, FWRITE)) == NULL) 271 return (EBADF); 272 FREF(fp); 273 274 /* dofilewritev() will FRELE the descriptor for us */ 275 return (dofilewritev(p, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt), 1, 276 &fp->f_offset, retval)); 277 } 278 279 int 280 dofilewritev(struct proc *p, int fd, struct file *fp, const struct iovec *iovp, 281 int iovcnt, int userspace, off_t *offset, register_t *retval) 282 { 283 struct iovec aiov[UIO_SMALLIOV]; 284 struct uio auio; 285 struct iovec *iov; 286 struct iovec *needfree = NULL; 287 long i, cnt, error = 0; 288 u_int iovlen; 289 #ifdef KTRACE 290 struct iovec *ktriov = NULL; 291 #endif 292 293 /* note: can't use iovlen until iovcnt is validated */ 294 iovlen = iovcnt * sizeof(struct iovec); 295 296 /* 297 * If the iovec array exists in userspace, it needs to be copied in; 298 * otherwise, it can be used directly. 299 */ 300 if (userspace) { 301 if ((u_int)iovcnt > UIO_SMALLIOV) { 302 if ((u_int)iovcnt > IOV_MAX) { 303 error = EINVAL; 304 goto out; 305 } 306 iov = needfree = malloc(iovlen, M_IOV, M_WAITOK); 307 } else if ((u_int)iovcnt > 0) { 308 iov = aiov; 309 needfree = NULL; 310 } else { 311 error = EINVAL; 312 goto out; 313 } 314 if ((error = copyin(iovp, iov, iovlen))) 315 goto done; 316 #ifdef KTRACE 317 if (KTRPOINT(p, KTR_STRUCT)) 318 ktriovec(p, iov, iovcnt); 319 #endif 320 } else { 321 iov = (struct iovec *)iovp; /* de-constify */ 322 } 323 324 auio.uio_iov = iov; 325 auio.uio_iovcnt = iovcnt; 326 auio.uio_rw = UIO_WRITE; 327 auio.uio_segflg = UIO_USERSPACE; 328 auio.uio_procp = p; 329 auio.uio_resid = 0; 330 for (i = 0; i < iovcnt; i++) { 331 auio.uio_resid += iov->iov_len; 332 /* 333 * Writes return ssize_t because -1 is returned on error. 334 * Therefore we must restrict the length to SSIZE_MAX to 335 * avoid garbage return values. Note that the addition is 336 * guaranteed to not wrap because SSIZE_MAX * 2 < SIZE_MAX. 337 */ 338 if (iov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) { 339 error = EINVAL; 340 goto done; 341 } 342 iov++; 343 } 344 #ifdef KTRACE 345 /* 346 * if tracing, save a copy of iovec 347 */ 348 if (KTRPOINT(p, KTR_GENIO)) { 349 ktriov = malloc(iovlen, M_TEMP, M_WAITOK); 350 memcpy(ktriov, auio.uio_iov, iovlen); 351 } 352 #endif 353 cnt = auio.uio_resid; 354 error = (*fp->f_ops->fo_write)(fp, offset, &auio, fp->f_cred); 355 if (error) { 356 if (auio.uio_resid != cnt && (error == ERESTART || 357 error == EINTR || error == EWOULDBLOCK)) 358 error = 0; 359 if (error == EPIPE) 360 ptsignal(p, SIGPIPE, STHREAD); 361 } 362 cnt -= auio.uio_resid; 363 364 fp->f_wxfer++; 365 fp->f_wbytes += cnt; 366 #ifdef KTRACE 367 if (ktriov != NULL) { 368 if (error == 0) 369 ktrgenio(p, fd, UIO_WRITE, ktriov, cnt); 370 free(ktriov, M_TEMP, iovlen); 371 } 372 #endif 373 *retval = cnt; 374 done: 375 if (needfree) 376 free(needfree, M_IOV, iovlen); 377 out: 378 FRELE(fp, p); 379 return (error); 380 } 381 382 /* 383 * Ioctl system call 384 */ 385 int 386 sys_ioctl(struct proc *p, void *v, register_t *retval) 387 { 388 struct sys_ioctl_args /* { 389 syscallarg(int) fd; 390 syscallarg(u_long) com; 391 syscallarg(void *) data; 392 } */ *uap = v; 393 struct file *fp; 394 struct filedesc *fdp; 395 u_long com = SCARG(uap, com); 396 int error; 397 u_int size; 398 caddr_t data, memp; 399 int tmp; 400 #define STK_PARAMS 128 401 long long stkbuf[STK_PARAMS / sizeof(long long)]; 402 403 fdp = p->p_fd; 404 fp = fd_getfile_mode(fdp, SCARG(uap, fd), FREAD|FWRITE); 405 406 if (fp == NULL) 407 return (EBADF); 408 409 if (fp->f_type == DTYPE_SOCKET) { 410 struct socket *so = fp->f_data; 411 412 if (so->so_state & SS_DNS) 413 return (EINVAL); 414 } 415 416 error = pledge_ioctl(p, com, fp); 417 if (error) 418 return (error); 419 420 switch (com) { 421 case FIONCLEX: 422 case FIOCLEX: 423 fdplock(fdp); 424 if (com == FIONCLEX) 425 fdp->fd_ofileflags[SCARG(uap, fd)] &= ~UF_EXCLOSE; 426 else 427 fdp->fd_ofileflags[SCARG(uap, fd)] |= UF_EXCLOSE; 428 fdpunlock(fdp); 429 return (0); 430 } 431 432 /* 433 * Interpret high order word to find amount of data to be 434 * copied to/from the user's address space. 435 */ 436 size = IOCPARM_LEN(com); 437 if (size > IOCPARM_MAX) 438 return (ENOTTY); 439 FREF(fp); 440 memp = NULL; 441 if (size > sizeof (stkbuf)) { 442 memp = malloc(size, M_IOCTLOPS, M_WAITOK); 443 data = memp; 444 } else 445 data = (caddr_t)stkbuf; 446 if (com&IOC_IN) { 447 if (size) { 448 error = copyin(SCARG(uap, data), data, size); 449 if (error) { 450 goto out; 451 } 452 } else 453 *(caddr_t *)data = SCARG(uap, data); 454 } else if ((com&IOC_OUT) && size) 455 /* 456 * Zero the buffer so the user always 457 * gets back something deterministic. 458 */ 459 memset(data, 0, size); 460 else if (com&IOC_VOID) 461 *(caddr_t *)data = SCARG(uap, data); 462 463 switch (com) { 464 465 case FIONBIO: 466 if ((tmp = *(int *)data) != 0) 467 fp->f_flag |= FNONBLOCK; 468 else 469 fp->f_flag &= ~FNONBLOCK; 470 error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&tmp, p); 471 break; 472 473 case FIOASYNC: 474 if ((tmp = *(int *)data) != 0) 475 fp->f_flag |= FASYNC; 476 else 477 fp->f_flag &= ~FASYNC; 478 error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, (caddr_t)&tmp, p); 479 break; 480 481 case FIOSETOWN: 482 tmp = *(int *)data; 483 if (fp->f_type == DTYPE_SOCKET) { 484 struct socket *so = fp->f_data; 485 486 so->so_pgid = tmp; 487 so->so_siguid = p->p_ucred->cr_ruid; 488 so->so_sigeuid = p->p_ucred->cr_uid; 489 error = 0; 490 break; 491 } 492 if (tmp <= 0) { 493 tmp = -tmp; 494 } else { 495 struct process *pr = prfind(tmp); 496 if (pr == NULL) { 497 error = ESRCH; 498 break; 499 } 500 tmp = pr->ps_pgrp->pg_id; 501 } 502 error = (*fp->f_ops->fo_ioctl) 503 (fp, TIOCSPGRP, (caddr_t)&tmp, p); 504 break; 505 506 case FIOGETOWN: 507 if (fp->f_type == DTYPE_SOCKET) { 508 error = 0; 509 *(int *)data = ((struct socket *)fp->f_data)->so_pgid; 510 break; 511 } 512 error = (*fp->f_ops->fo_ioctl)(fp, TIOCGPGRP, data, p); 513 *(int *)data = -*(int *)data; 514 break; 515 516 default: 517 error = (*fp->f_ops->fo_ioctl)(fp, com, data, p); 518 break; 519 } 520 /* 521 * Copy any data to user, size was 522 * already set and checked above. 523 */ 524 if (error == 0 && (com&IOC_OUT) && size) 525 error = copyout(data, SCARG(uap, data), size); 526 out: 527 FRELE(fp, p); 528 if (memp) 529 free(memp, M_IOCTLOPS, size); 530 return (error); 531 } 532 533 int selwait, nselcoll; 534 535 /* 536 * Select system call. 537 */ 538 int 539 sys_select(struct proc *p, void *v, register_t *retval) 540 { 541 struct sys_select_args /* { 542 syscallarg(int) nd; 543 syscallarg(fd_set *) in; 544 syscallarg(fd_set *) ou; 545 syscallarg(fd_set *) ex; 546 syscallarg(struct timeval *) tv; 547 } */ *uap = v; 548 549 struct timespec ts, *tsp = NULL; 550 int error; 551 552 if (SCARG(uap, tv) != NULL) { 553 struct timeval tv; 554 if ((error = copyin(SCARG(uap, tv), &tv, sizeof tv)) != 0) 555 return (error); 556 if ((error = itimerfix(&tv)) != 0) 557 return (error); 558 #ifdef KTRACE 559 if (KTRPOINT(p, KTR_STRUCT)) 560 ktrreltimeval(p, &tv); 561 #endif 562 TIMEVAL_TO_TIMESPEC(&tv, &ts); 563 tsp = &ts; 564 } 565 566 return (dopselect(p, SCARG(uap, nd), SCARG(uap, in), SCARG(uap, ou), 567 SCARG(uap, ex), tsp, NULL, retval)); 568 } 569 570 int 571 sys_pselect(struct proc *p, void *v, register_t *retval) 572 { 573 struct sys_pselect_args /* { 574 syscallarg(int) nd; 575 syscallarg(fd_set *) in; 576 syscallarg(fd_set *) ou; 577 syscallarg(fd_set *) ex; 578 syscallarg(const struct timespec *) ts; 579 syscallarg(const sigset_t *) mask; 580 } */ *uap = v; 581 582 struct timespec ts, *tsp = NULL; 583 sigset_t ss, *ssp = NULL; 584 int error; 585 586 if (SCARG(uap, ts) != NULL) { 587 if ((error = copyin(SCARG(uap, ts), &ts, sizeof ts)) != 0) 588 return (error); 589 if ((error = timespecfix(&ts)) != 0) 590 return (error); 591 #ifdef KTRACE 592 if (KTRPOINT(p, KTR_STRUCT)) 593 ktrreltimespec(p, &ts); 594 #endif 595 tsp = &ts; 596 } 597 if (SCARG(uap, mask) != NULL) { 598 if ((error = copyin(SCARG(uap, mask), &ss, sizeof ss)) != 0) 599 return (error); 600 ssp = &ss; 601 } 602 603 return (dopselect(p, SCARG(uap, nd), SCARG(uap, in), SCARG(uap, ou), 604 SCARG(uap, ex), tsp, ssp, retval)); 605 } 606 607 int 608 dopselect(struct proc *p, int nd, fd_set *in, fd_set *ou, fd_set *ex, 609 const struct timespec *tsp, const sigset_t *sigmask, register_t *retval) 610 { 611 fd_mask bits[6]; 612 fd_set *pibits[3], *pobits[3]; 613 struct timespec ats, rts, tts; 614 int s, ncoll, error = 0, timo; 615 u_int ni; 616 617 if (nd < 0) 618 return (EINVAL); 619 if (nd > p->p_fd->fd_nfiles) { 620 /* forgiving; slightly wrong */ 621 nd = p->p_fd->fd_nfiles; 622 } 623 ni = howmany(nd, NFDBITS) * sizeof(fd_mask); 624 if (ni > sizeof(bits[0])) { 625 caddr_t mbits; 626 627 mbits = mallocarray(6, ni, M_TEMP, M_WAITOK|M_ZERO); 628 pibits[0] = (fd_set *)&mbits[ni * 0]; 629 pibits[1] = (fd_set *)&mbits[ni * 1]; 630 pibits[2] = (fd_set *)&mbits[ni * 2]; 631 pobits[0] = (fd_set *)&mbits[ni * 3]; 632 pobits[1] = (fd_set *)&mbits[ni * 4]; 633 pobits[2] = (fd_set *)&mbits[ni * 5]; 634 } else { 635 memset(bits, 0, sizeof(bits)); 636 pibits[0] = (fd_set *)&bits[0]; 637 pibits[1] = (fd_set *)&bits[1]; 638 pibits[2] = (fd_set *)&bits[2]; 639 pobits[0] = (fd_set *)&bits[3]; 640 pobits[1] = (fd_set *)&bits[4]; 641 pobits[2] = (fd_set *)&bits[5]; 642 } 643 644 #define getbits(name, x) \ 645 if (name && (error = copyin(name, pibits[x], ni))) \ 646 goto done; 647 getbits(in, 0); 648 getbits(ou, 1); 649 getbits(ex, 2); 650 #undef getbits 651 #ifdef KTRACE 652 if (ni > 0 && KTRPOINT(p, KTR_STRUCT)) { 653 if (in) ktrfdset(p, pibits[0], ni); 654 if (ou) ktrfdset(p, pibits[1], ni); 655 if (ex) ktrfdset(p, pibits[2], ni); 656 } 657 #endif 658 659 if (tsp) { 660 getnanouptime(&rts); 661 timespecadd(tsp, &rts, &ats); 662 } else { 663 ats.tv_sec = 0; 664 ats.tv_nsec = 0; 665 } 666 timo = 0; 667 668 if (sigmask) 669 dosigsuspend(p, *sigmask &~ sigcantmask); 670 671 retry: 672 ncoll = nselcoll; 673 atomic_setbits_int(&p->p_flag, P_SELECT); 674 error = selscan(p, pibits[0], pobits[0], nd, ni, retval); 675 if (error || *retval) 676 goto done; 677 if (tsp) { 678 getnanouptime(&rts); 679 if (timespeccmp(&rts, &ats, >=)) 680 goto done; 681 timespecsub(&ats, &rts, &tts); 682 timo = tts.tv_sec > 24 * 60 * 60 ? 683 24 * 60 * 60 * hz : tstohz(&tts); 684 } 685 s = splhigh(); 686 if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) { 687 splx(s); 688 goto retry; 689 } 690 atomic_clearbits_int(&p->p_flag, P_SELECT); 691 error = tsleep(&selwait, PSOCK | PCATCH, "select", timo); 692 splx(s); 693 if (error == 0) 694 goto retry; 695 done: 696 atomic_clearbits_int(&p->p_flag, P_SELECT); 697 /* select is not restarted after signals... */ 698 if (error == ERESTART) 699 error = EINTR; 700 if (error == EWOULDBLOCK) 701 error = 0; 702 #define putbits(name, x) \ 703 if (name && (error2 = copyout(pobits[x], name, ni))) \ 704 error = error2; 705 if (error == 0) { 706 int error2; 707 708 putbits(in, 0); 709 putbits(ou, 1); 710 putbits(ex, 2); 711 #undef putbits 712 #ifdef KTRACE 713 if (ni > 0 && KTRPOINT(p, KTR_STRUCT)) { 714 if (in) ktrfdset(p, pobits[0], ni); 715 if (ou) ktrfdset(p, pobits[1], ni); 716 if (ex) ktrfdset(p, pobits[2], ni); 717 } 718 #endif 719 } 720 721 if (pibits[0] != (fd_set *)&bits[0]) 722 free(pibits[0], M_TEMP, 6 * ni); 723 return (error); 724 } 725 726 int 727 selscan(struct proc *p, fd_set *ibits, fd_set *obits, int nfd, int ni, 728 register_t *retval) 729 { 730 caddr_t cibits = (caddr_t)ibits, cobits = (caddr_t)obits; 731 struct filedesc *fdp = p->p_fd; 732 int msk, i, j, fd; 733 fd_mask bits; 734 struct file *fp; 735 int n = 0; 736 static const int flag[3] = { POLLIN, POLLOUT|POLL_NOHUP, POLLPRI }; 737 738 for (msk = 0; msk < 3; msk++) { 739 fd_set *pibits = (fd_set *)&cibits[msk*ni]; 740 fd_set *pobits = (fd_set *)&cobits[msk*ni]; 741 742 for (i = 0; i < nfd; i += NFDBITS) { 743 bits = pibits->fds_bits[i/NFDBITS]; 744 while ((j = ffs(bits)) && (fd = i + --j) < nfd) { 745 bits &= ~(1 << j); 746 if ((fp = fd_getfile(fdp, fd)) == NULL) 747 return (EBADF); 748 FREF(fp); 749 if ((*fp->f_ops->fo_poll)(fp, flag[msk], p)) { 750 FD_SET(fd, pobits); 751 n++; 752 } 753 FRELE(fp, p); 754 } 755 } 756 } 757 *retval = n; 758 return (0); 759 } 760 761 int 762 seltrue(dev_t dev, int events, struct proc *p) 763 { 764 765 return (events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM)); 766 } 767 768 int 769 selfalse(dev_t dev, int events, struct proc *p) 770 { 771 772 return (0); 773 } 774 775 /* 776 * Record a select request. 777 */ 778 void 779 selrecord(struct proc *selector, struct selinfo *sip) 780 { 781 struct proc *p; 782 pid_t mytid; 783 784 mytid = selector->p_tid; 785 if (sip->si_seltid == mytid) 786 return; 787 if (sip->si_seltid && (p = tfind(sip->si_seltid)) && 788 p->p_wchan == (caddr_t)&selwait) 789 sip->si_flags |= SI_COLL; 790 else 791 sip->si_seltid = mytid; 792 } 793 794 /* 795 * Do a wakeup when a selectable event occurs. 796 */ 797 void 798 selwakeup(struct selinfo *sip) 799 { 800 struct proc *p; 801 int s; 802 803 KNOTE(&sip->si_note, NOTE_SUBMIT); 804 if (sip->si_seltid == 0) 805 return; 806 if (sip->si_flags & SI_COLL) { 807 nselcoll++; 808 sip->si_flags &= ~SI_COLL; 809 wakeup(&selwait); 810 } 811 p = tfind(sip->si_seltid); 812 sip->si_seltid = 0; 813 if (p != NULL) { 814 SCHED_LOCK(s); 815 if (p->p_wchan == (caddr_t)&selwait) { 816 if (p->p_stat == SSLEEP) 817 setrunnable(p); 818 else 819 unsleep(p); 820 } else if (p->p_flag & P_SELECT) 821 atomic_clearbits_int(&p->p_flag, P_SELECT); 822 SCHED_UNLOCK(s); 823 } 824 } 825 826 void 827 pollscan(struct proc *p, struct pollfd *pl, u_int nfd, register_t *retval) 828 { 829 struct filedesc *fdp = p->p_fd; 830 struct file *fp; 831 u_int i; 832 int n = 0; 833 834 for (i = 0; i < nfd; i++, pl++) { 835 /* Check the file descriptor. */ 836 if (pl->fd < 0) { 837 pl->revents = 0; 838 continue; 839 } 840 if ((fp = fd_getfile(fdp, pl->fd)) == NULL) { 841 pl->revents = POLLNVAL; 842 n++; 843 continue; 844 } 845 FREF(fp); 846 pl->revents = (*fp->f_ops->fo_poll)(fp, pl->events, p); 847 FRELE(fp, p); 848 if (pl->revents != 0) 849 n++; 850 } 851 *retval = n; 852 } 853 854 /* 855 * Only copyout the revents field. 856 */ 857 int 858 pollout(struct pollfd *pl, struct pollfd *upl, u_int nfds) 859 { 860 int error = 0; 861 u_int i = 0; 862 863 while (!error && i++ < nfds) { 864 error = copyout(&pl->revents, &upl->revents, 865 sizeof(upl->revents)); 866 pl++; 867 upl++; 868 } 869 870 return (error); 871 } 872 873 /* 874 * We are using the same mechanism as select only we encode/decode args 875 * differently. 876 */ 877 int 878 sys_poll(struct proc *p, void *v, register_t *retval) 879 { 880 struct sys_poll_args /* { 881 syscallarg(struct pollfd *) fds; 882 syscallarg(u_int) nfds; 883 syscallarg(int) timeout; 884 } */ *uap = v; 885 886 struct timespec ts, *tsp = NULL; 887 int msec = SCARG(uap, timeout); 888 889 if (msec != INFTIM) { 890 if (msec < 0) 891 return (EINVAL); 892 ts.tv_sec = msec / 1000; 893 ts.tv_nsec = (msec - (ts.tv_sec * 1000)) * 1000000; 894 tsp = &ts; 895 } 896 897 return (doppoll(p, SCARG(uap, fds), SCARG(uap, nfds), tsp, NULL, 898 retval)); 899 } 900 901 int 902 sys_ppoll(struct proc *p, void *v, register_t *retval) 903 { 904 struct sys_ppoll_args /* { 905 syscallarg(struct pollfd *) fds; 906 syscallarg(u_int) nfds; 907 syscallarg(const struct timespec *) ts; 908 syscallarg(const sigset_t *) mask; 909 } */ *uap = v; 910 911 int error; 912 struct timespec ts, *tsp = NULL; 913 sigset_t ss, *ssp = NULL; 914 915 if (SCARG(uap, ts) != NULL) { 916 if ((error = copyin(SCARG(uap, ts), &ts, sizeof ts)) != 0) 917 return (error); 918 if ((error = timespecfix(&ts)) != 0) 919 return (error); 920 #ifdef KTRACE 921 if (KTRPOINT(p, KTR_STRUCT)) 922 ktrreltimespec(p, &ts); 923 #endif 924 tsp = &ts; 925 } 926 927 if (SCARG(uap, mask) != NULL) { 928 if ((error = copyin(SCARG(uap, mask), &ss, sizeof ss)) != 0) 929 return (error); 930 ssp = &ss; 931 } 932 933 return (doppoll(p, SCARG(uap, fds), SCARG(uap, nfds), tsp, ssp, 934 retval)); 935 } 936 937 int 938 doppoll(struct proc *p, struct pollfd *fds, u_int nfds, 939 const struct timespec *tsp, const sigset_t *sigmask, register_t *retval) 940 { 941 size_t sz; 942 struct pollfd pfds[4], *pl = pfds; 943 struct timespec ats, rts, tts; 944 int timo, ncoll, i, s, error; 945 946 /* Standards say no more than MAX_OPEN; this is possibly better. */ 947 if (nfds > min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfiles)) 948 return (EINVAL); 949 950 /* optimize for the default case, of a small nfds value */ 951 if (nfds > nitems(pfds)) { 952 pl = mallocarray(nfds, sizeof(*pl), M_TEMP, 953 M_WAITOK | M_CANFAIL); 954 if (pl == NULL) 955 return (EINVAL); 956 } 957 958 sz = nfds * sizeof(*pl); 959 960 if ((error = copyin(fds, pl, sz)) != 0) 961 goto bad; 962 963 for (i = 0; i < nfds; i++) { 964 pl[i].events &= ~POLL_NOHUP; 965 pl[i].revents = 0; 966 } 967 968 if (tsp != NULL) { 969 getnanouptime(&rts); 970 timespecadd(tsp, &rts, &ats); 971 } else { 972 ats.tv_sec = 0; 973 ats.tv_nsec = 0; 974 } 975 timo = 0; 976 977 if (sigmask) 978 dosigsuspend(p, *sigmask &~ sigcantmask); 979 980 retry: 981 ncoll = nselcoll; 982 atomic_setbits_int(&p->p_flag, P_SELECT); 983 pollscan(p, pl, nfds, retval); 984 if (*retval) 985 goto done; 986 if (tsp != NULL) { 987 getnanouptime(&rts); 988 if (timespeccmp(&rts, &ats, >=)) 989 goto done; 990 timespecsub(&ats, &rts, &tts); 991 timo = tts.tv_sec > 24 * 60 * 60 ? 992 24 * 60 * 60 * hz : tstohz(&tts); 993 } 994 s = splhigh(); 995 if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) { 996 splx(s); 997 goto retry; 998 } 999 atomic_clearbits_int(&p->p_flag, P_SELECT); 1000 error = tsleep(&selwait, PSOCK | PCATCH, "poll", timo); 1001 splx(s); 1002 if (error == 0) 1003 goto retry; 1004 1005 done: 1006 atomic_clearbits_int(&p->p_flag, P_SELECT); 1007 /* 1008 * NOTE: poll(2) is not restarted after a signal and EWOULDBLOCK is 1009 * ignored (since the whole point is to see what would block). 1010 */ 1011 switch (error) { 1012 case ERESTART: 1013 error = pollout(pl, fds, nfds); 1014 if (error == 0) 1015 error = EINTR; 1016 break; 1017 case EWOULDBLOCK: 1018 case 0: 1019 error = pollout(pl, fds, nfds); 1020 break; 1021 } 1022 #ifdef KTRACE 1023 if (KTRPOINT(p, KTR_STRUCT)) 1024 ktrpollfd(p, pl, nfds); 1025 #endif /* KTRACE */ 1026 bad: 1027 if (pl != pfds) 1028 free(pl, M_TEMP, sz); 1029 return (error); 1030 } 1031 1032 /* 1033 * utrace system call 1034 */ 1035 int 1036 sys_utrace(struct proc *curp, void *v, register_t *retval) 1037 { 1038 #ifdef KTRACE 1039 struct sys_utrace_args /* { 1040 syscallarg(const char *) label; 1041 syscallarg(const void *) addr; 1042 syscallarg(size_t) len; 1043 } */ *uap = v; 1044 return (ktruser(curp, SCARG(uap, label), SCARG(uap, addr), 1045 SCARG(uap, len))); 1046 #else 1047 return (0); 1048 #endif 1049 } 1050