1 /* $OpenBSD: sys_generic.c,v 1.112 2016/07/05 00:35:09 tedu Exp $ */ 2 /* $NetBSD: sys_generic.c,v 1.24 1996/03/29 00:25:32 cgd Exp $ */ 3 4 /* 5 * Copyright (c) 1996 Theo de Raadt 6 * Copyright (c) 1982, 1986, 1989, 1993 7 * The Regents of the University of California. All rights reserved. 8 * (c) UNIX System Laboratories, Inc. 9 * All or some portions of this file are derived from material licensed 10 * to the University of California by American Telephone and Telegraph 11 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 12 * the permission of UNIX System Laboratories, Inc. 13 * 14 * Redistribution and use in source and binary forms, with or without 15 * modification, are permitted provided that the following conditions 16 * are met: 17 * 1. Redistributions of source code must retain the above copyright 18 * notice, this list of conditions and the following disclaimer. 19 * 2. Redistributions in binary form must reproduce the above copyright 20 * notice, this list of conditions and the following disclaimer in the 21 * documentation and/or other materials provided with the distribution. 22 * 3. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)sys_generic.c 8.5 (Berkeley) 1/21/94 39 */ 40 41 #include <sys/param.h> 42 #include <sys/systm.h> 43 #include <sys/filedesc.h> 44 #include <sys/ioctl.h> 45 #include <sys/file.h> 46 #include <sys/proc.h> 47 #include <sys/resourcevar.h> 48 #include <sys/socketvar.h> 49 #include <sys/signalvar.h> 50 #include <sys/uio.h> 51 #include <sys/kernel.h> 52 #include <sys/stat.h> 53 #include <sys/malloc.h> 54 #include <sys/poll.h> 55 #ifdef KTRACE 56 #include <sys/ktrace.h> 57 #endif 58 #include <sys/sched.h> 59 #include <sys/pledge.h> 60 61 #include <sys/mount.h> 62 #include <sys/syscallargs.h> 63 64 #include <uvm/uvm_extern.h> 65 66 int selscan(struct proc *, fd_set *, fd_set *, int, int, register_t *); 67 void pollscan(struct proc *, struct pollfd *, u_int, register_t *); 68 int pollout(struct pollfd *, struct pollfd *, u_int); 69 int dopselect(struct proc *, int, fd_set *, fd_set *, fd_set *, 70 const struct timespec *, const sigset_t *, register_t *); 71 int doppoll(struct proc *, struct pollfd *, u_int, const struct timespec *, 72 const sigset_t *, register_t *); 73 74 /* 75 * Read system call. 76 */ 77 int 78 sys_read(struct proc *p, void *v, register_t *retval) 79 { 80 struct sys_read_args /* { 81 syscallarg(int) fd; 82 syscallarg(void *) buf; 83 syscallarg(size_t) nbyte; 84 } */ *uap = v; 85 struct iovec iov; 86 int fd = SCARG(uap, fd); 87 struct file *fp; 88 struct filedesc *fdp = p->p_fd; 89 90 if ((fp = fd_getfile_mode(fdp, fd, FREAD)) == NULL) 91 return (EBADF); 92 93 iov.iov_base = SCARG(uap, buf); 94 iov.iov_len = SCARG(uap, nbyte); 95 96 FREF(fp); 97 98 /* dofilereadv() will FRELE the descriptor for us */ 99 return (dofilereadv(p, fd, fp, &iov, 1, 0, &fp->f_offset, retval)); 100 } 101 102 /* 103 * Scatter read system call. 104 */ 105 int 106 sys_readv(struct proc *p, void *v, register_t *retval) 107 { 108 struct sys_readv_args /* { 109 syscallarg(int) fd; 110 syscallarg(const struct iovec *) iovp; 111 syscallarg(int) iovcnt; 112 } */ *uap = v; 113 int fd = SCARG(uap, fd); 114 struct file *fp; 115 struct filedesc *fdp = p->p_fd; 116 117 if ((fp = fd_getfile_mode(fdp, fd, FREAD)) == NULL) 118 return (EBADF); 119 FREF(fp); 120 121 /* dofilereadv() will FRELE the descriptor for us */ 122 return (dofilereadv(p, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt), 1, 123 &fp->f_offset, retval)); 124 } 125 126 int 127 dofilereadv(struct proc *p, int fd, struct file *fp, const struct iovec *iovp, 128 int iovcnt, int userspace, off_t *offset, register_t *retval) 129 { 130 struct iovec aiov[UIO_SMALLIOV]; 131 struct uio auio; 132 struct iovec *iov; 133 struct iovec *needfree = NULL; 134 long i, cnt, error = 0; 135 u_int iovlen; 136 #ifdef KTRACE 137 struct iovec *ktriov = NULL; 138 #endif 139 140 /* note: can't use iovlen until iovcnt is validated */ 141 iovlen = iovcnt * sizeof(struct iovec); 142 143 /* 144 * If the iovec array exists in userspace, it needs to be copied in; 145 * otherwise, it can be used directly. 146 */ 147 if (userspace) { 148 if ((u_int)iovcnt > UIO_SMALLIOV) { 149 if ((u_int)iovcnt > IOV_MAX) { 150 error = EINVAL; 151 goto out; 152 } 153 iov = needfree = malloc(iovlen, M_IOV, M_WAITOK); 154 } else if ((u_int)iovcnt > 0) { 155 iov = aiov; 156 needfree = NULL; 157 } else { 158 error = EINVAL; 159 goto out; 160 } 161 if ((error = copyin(iovp, iov, iovlen))) 162 goto done; 163 #ifdef KTRACE 164 if (KTRPOINT(p, KTR_STRUCT)) 165 ktriovec(p, iov, iovcnt); 166 #endif 167 } else { 168 iov = (struct iovec *)iovp; /* de-constify */ 169 } 170 171 auio.uio_iov = iov; 172 auio.uio_iovcnt = iovcnt; 173 auio.uio_rw = UIO_READ; 174 auio.uio_segflg = UIO_USERSPACE; 175 auio.uio_procp = p; 176 auio.uio_resid = 0; 177 for (i = 0; i < iovcnt; i++) { 178 auio.uio_resid += iov->iov_len; 179 /* 180 * Reads return ssize_t because -1 is returned on error. 181 * Therefore we must restrict the length to SSIZE_MAX to 182 * avoid garbage return values. Note that the addition is 183 * guaranteed to not wrap because SSIZE_MAX * 2 < SIZE_MAX. 184 */ 185 if (iov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) { 186 error = EINVAL; 187 goto done; 188 } 189 iov++; 190 } 191 #ifdef KTRACE 192 /* 193 * if tracing, save a copy of iovec 194 */ 195 if (KTRPOINT(p, KTR_GENIO)) { 196 ktriov = malloc(iovlen, M_TEMP, M_WAITOK); 197 memcpy(ktriov, auio.uio_iov, iovlen); 198 } 199 #endif 200 cnt = auio.uio_resid; 201 error = (*fp->f_ops->fo_read)(fp, offset, &auio, fp->f_cred); 202 if (error) 203 if (auio.uio_resid != cnt && (error == ERESTART || 204 error == EINTR || error == EWOULDBLOCK)) 205 error = 0; 206 cnt -= auio.uio_resid; 207 208 fp->f_rxfer++; 209 fp->f_rbytes += cnt; 210 #ifdef KTRACE 211 if (ktriov != NULL) { 212 if (error == 0) 213 ktrgenio(p, fd, UIO_READ, ktriov, cnt); 214 free(ktriov, M_TEMP, iovlen); 215 } 216 #endif 217 *retval = cnt; 218 done: 219 if (needfree) 220 free(needfree, M_IOV, iovlen); 221 out: 222 FRELE(fp, p); 223 return (error); 224 } 225 226 /* 227 * Write system call 228 */ 229 int 230 sys_write(struct proc *p, void *v, register_t *retval) 231 { 232 struct sys_write_args /* { 233 syscallarg(int) fd; 234 syscallarg(const void *) buf; 235 syscallarg(size_t) nbyte; 236 } */ *uap = v; 237 struct iovec iov; 238 int fd = SCARG(uap, fd); 239 struct file *fp; 240 struct filedesc *fdp = p->p_fd; 241 242 if ((fp = fd_getfile_mode(fdp, fd, FWRITE)) == NULL) 243 return (EBADF); 244 245 iov.iov_base = (void *)SCARG(uap, buf); 246 iov.iov_len = SCARG(uap, nbyte); 247 248 FREF(fp); 249 250 /* dofilewritev() will FRELE the descriptor for us */ 251 return (dofilewritev(p, fd, fp, &iov, 1, 0, &fp->f_offset, retval)); 252 } 253 254 /* 255 * Gather write system call 256 */ 257 int 258 sys_writev(struct proc *p, void *v, register_t *retval) 259 { 260 struct sys_writev_args /* { 261 syscallarg(int) fd; 262 syscallarg(const struct iovec *) iovp; 263 syscallarg(int) iovcnt; 264 } */ *uap = v; 265 int fd = SCARG(uap, fd); 266 struct file *fp; 267 struct filedesc *fdp = p->p_fd; 268 269 if ((fp = fd_getfile_mode(fdp, fd, FWRITE)) == NULL) 270 return (EBADF); 271 FREF(fp); 272 273 /* dofilewritev() will FRELE the descriptor for us */ 274 return (dofilewritev(p, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt), 1, 275 &fp->f_offset, retval)); 276 } 277 278 int 279 dofilewritev(struct proc *p, int fd, struct file *fp, const struct iovec *iovp, 280 int iovcnt, int userspace, off_t *offset, register_t *retval) 281 { 282 struct iovec aiov[UIO_SMALLIOV]; 283 struct uio auio; 284 struct iovec *iov; 285 struct iovec *needfree = NULL; 286 long i, cnt, error = 0; 287 u_int iovlen; 288 #ifdef KTRACE 289 struct iovec *ktriov = NULL; 290 #endif 291 292 /* note: can't use iovlen until iovcnt is validated */ 293 iovlen = iovcnt * sizeof(struct iovec); 294 295 /* 296 * If the iovec array exists in userspace, it needs to be copied in; 297 * otherwise, it can be used directly. 298 */ 299 if (userspace) { 300 if ((u_int)iovcnt > UIO_SMALLIOV) { 301 if ((u_int)iovcnt > IOV_MAX) { 302 error = EINVAL; 303 goto out; 304 } 305 iov = needfree = malloc(iovlen, M_IOV, M_WAITOK); 306 } else if ((u_int)iovcnt > 0) { 307 iov = aiov; 308 needfree = NULL; 309 } else { 310 error = EINVAL; 311 goto out; 312 } 313 if ((error = copyin(iovp, iov, iovlen))) 314 goto done; 315 #ifdef KTRACE 316 if (KTRPOINT(p, KTR_STRUCT)) 317 ktriovec(p, iov, iovcnt); 318 #endif 319 } else { 320 iov = (struct iovec *)iovp; /* de-constify */ 321 } 322 323 auio.uio_iov = iov; 324 auio.uio_iovcnt = iovcnt; 325 auio.uio_rw = UIO_WRITE; 326 auio.uio_segflg = UIO_USERSPACE; 327 auio.uio_procp = p; 328 auio.uio_resid = 0; 329 for (i = 0; i < iovcnt; i++) { 330 auio.uio_resid += iov->iov_len; 331 /* 332 * Writes return ssize_t because -1 is returned on error. 333 * Therefore we must restrict the length to SSIZE_MAX to 334 * avoid garbage return values. Note that the addition is 335 * guaranteed to not wrap because SSIZE_MAX * 2 < SIZE_MAX. 336 */ 337 if (iov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) { 338 error = EINVAL; 339 goto done; 340 } 341 iov++; 342 } 343 #ifdef KTRACE 344 /* 345 * if tracing, save a copy of iovec 346 */ 347 if (KTRPOINT(p, KTR_GENIO)) { 348 ktriov = malloc(iovlen, M_TEMP, M_WAITOK); 349 memcpy(ktriov, auio.uio_iov, iovlen); 350 } 351 #endif 352 cnt = auio.uio_resid; 353 error = (*fp->f_ops->fo_write)(fp, offset, &auio, fp->f_cred); 354 if (error) { 355 if (auio.uio_resid != cnt && (error == ERESTART || 356 error == EINTR || error == EWOULDBLOCK)) 357 error = 0; 358 if (error == EPIPE) 359 ptsignal(p, SIGPIPE, STHREAD); 360 } 361 cnt -= auio.uio_resid; 362 363 fp->f_wxfer++; 364 fp->f_wbytes += cnt; 365 #ifdef KTRACE 366 if (ktriov != NULL) { 367 if (error == 0) 368 ktrgenio(p, fd, UIO_WRITE, ktriov, cnt); 369 free(ktriov, M_TEMP, iovlen); 370 } 371 #endif 372 *retval = cnt; 373 done: 374 if (needfree) 375 free(needfree, M_IOV, iovlen); 376 out: 377 FRELE(fp, p); 378 return (error); 379 } 380 381 /* 382 * Ioctl system call 383 */ 384 int 385 sys_ioctl(struct proc *p, void *v, register_t *retval) 386 { 387 struct sys_ioctl_args /* { 388 syscallarg(int) fd; 389 syscallarg(u_long) com; 390 syscallarg(void *) data; 391 } */ *uap = v; 392 struct file *fp; 393 struct filedesc *fdp; 394 u_long com = SCARG(uap, com); 395 int error; 396 u_int size; 397 caddr_t data, memp; 398 int tmp; 399 #define STK_PARAMS 128 400 long long stkbuf[STK_PARAMS / sizeof(long long)]; 401 402 fdp = p->p_fd; 403 fp = fd_getfile_mode(fdp, SCARG(uap, fd), FREAD|FWRITE); 404 405 if (fp == NULL) 406 return (EBADF); 407 408 if (fp->f_type == DTYPE_SOCKET) { 409 struct socket *so = fp->f_data; 410 411 if (so->so_state & SS_DNS) 412 return (EINVAL); 413 } 414 415 error = pledge_ioctl(p, com, fp); 416 if (error) 417 return (error); 418 419 switch (com) { 420 case FIONCLEX: 421 case FIOCLEX: 422 fdplock(fdp); 423 if (com == FIONCLEX) 424 fdp->fd_ofileflags[SCARG(uap, fd)] &= ~UF_EXCLOSE; 425 else 426 fdp->fd_ofileflags[SCARG(uap, fd)] |= UF_EXCLOSE; 427 fdpunlock(fdp); 428 return (0); 429 } 430 431 /* 432 * Interpret high order word to find amount of data to be 433 * copied to/from the user's address space. 434 */ 435 size = IOCPARM_LEN(com); 436 if (size > IOCPARM_MAX) 437 return (ENOTTY); 438 FREF(fp); 439 memp = NULL; 440 if (size > sizeof (stkbuf)) { 441 memp = malloc(size, M_IOCTLOPS, M_WAITOK); 442 data = memp; 443 } else 444 data = (caddr_t)stkbuf; 445 if (com&IOC_IN) { 446 if (size) { 447 error = copyin(SCARG(uap, data), data, size); 448 if (error) { 449 goto out; 450 } 451 } else 452 *(caddr_t *)data = SCARG(uap, data); 453 } else if ((com&IOC_OUT) && size) 454 /* 455 * Zero the buffer so the user always 456 * gets back something deterministic. 457 */ 458 memset(data, 0, size); 459 else if (com&IOC_VOID) 460 *(caddr_t *)data = SCARG(uap, data); 461 462 switch (com) { 463 464 case FIONBIO: 465 if ((tmp = *(int *)data) != 0) 466 fp->f_flag |= FNONBLOCK; 467 else 468 fp->f_flag &= ~FNONBLOCK; 469 error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&tmp, p); 470 break; 471 472 case FIOASYNC: 473 if ((tmp = *(int *)data) != 0) 474 fp->f_flag |= FASYNC; 475 else 476 fp->f_flag &= ~FASYNC; 477 error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, (caddr_t)&tmp, p); 478 break; 479 480 case FIOSETOWN: 481 tmp = *(int *)data; 482 if (fp->f_type == DTYPE_SOCKET) { 483 struct socket *so = fp->f_data; 484 485 so->so_pgid = tmp; 486 so->so_siguid = p->p_ucred->cr_ruid; 487 so->so_sigeuid = p->p_ucred->cr_uid; 488 error = 0; 489 break; 490 } 491 if (tmp <= 0) { 492 tmp = -tmp; 493 } else { 494 struct process *pr = prfind(tmp); 495 if (pr == NULL) { 496 error = ESRCH; 497 break; 498 } 499 tmp = pr->ps_pgrp->pg_id; 500 } 501 error = (*fp->f_ops->fo_ioctl) 502 (fp, TIOCSPGRP, (caddr_t)&tmp, p); 503 break; 504 505 case FIOGETOWN: 506 if (fp->f_type == DTYPE_SOCKET) { 507 error = 0; 508 *(int *)data = ((struct socket *)fp->f_data)->so_pgid; 509 break; 510 } 511 error = (*fp->f_ops->fo_ioctl)(fp, TIOCGPGRP, data, p); 512 *(int *)data = -*(int *)data; 513 break; 514 515 default: 516 error = (*fp->f_ops->fo_ioctl)(fp, com, data, p); 517 break; 518 } 519 /* 520 * Copy any data to user, size was 521 * already set and checked above. 522 */ 523 if (error == 0 && (com&IOC_OUT) && size) 524 error = copyout(data, SCARG(uap, data), size); 525 out: 526 FRELE(fp, p); 527 if (memp) 528 free(memp, M_IOCTLOPS, size); 529 return (error); 530 } 531 532 int selwait, nselcoll; 533 534 /* 535 * Select system call. 536 */ 537 int 538 sys_select(struct proc *p, void *v, register_t *retval) 539 { 540 struct sys_select_args /* { 541 syscallarg(int) nd; 542 syscallarg(fd_set *) in; 543 syscallarg(fd_set *) ou; 544 syscallarg(fd_set *) ex; 545 syscallarg(struct timeval *) tv; 546 } */ *uap = v; 547 548 struct timespec ts, *tsp = NULL; 549 int error; 550 551 if (SCARG(uap, tv) != NULL) { 552 struct timeval tv; 553 if ((error = copyin(SCARG(uap, tv), &tv, sizeof tv)) != 0) 554 return (error); 555 if ((error = itimerfix(&tv)) != 0) 556 return (error); 557 #ifdef KTRACE 558 if (KTRPOINT(p, KTR_STRUCT)) 559 ktrreltimeval(p, &tv); 560 #endif 561 TIMEVAL_TO_TIMESPEC(&tv, &ts); 562 tsp = &ts; 563 } 564 565 return (dopselect(p, SCARG(uap, nd), SCARG(uap, in), SCARG(uap, ou), 566 SCARG(uap, ex), tsp, NULL, retval)); 567 } 568 569 int 570 sys_pselect(struct proc *p, void *v, register_t *retval) 571 { 572 struct sys_pselect_args /* { 573 syscallarg(int) nd; 574 syscallarg(fd_set *) in; 575 syscallarg(fd_set *) ou; 576 syscallarg(fd_set *) ex; 577 syscallarg(const struct timespec *) ts; 578 syscallarg(const sigset_t *) mask; 579 } */ *uap = v; 580 581 struct timespec ts, *tsp = NULL; 582 sigset_t ss, *ssp = NULL; 583 int error; 584 585 if (SCARG(uap, ts) != NULL) { 586 if ((error = copyin(SCARG(uap, ts), &ts, sizeof ts)) != 0) 587 return (error); 588 if ((error = timespecfix(&ts)) != 0) 589 return (error); 590 #ifdef KTRACE 591 if (KTRPOINT(p, KTR_STRUCT)) 592 ktrreltimespec(p, &ts); 593 #endif 594 tsp = &ts; 595 } 596 if (SCARG(uap, mask) != NULL) { 597 if ((error = copyin(SCARG(uap, mask), &ss, sizeof ss)) != 0) 598 return (error); 599 ssp = &ss; 600 } 601 602 return (dopselect(p, SCARG(uap, nd), SCARG(uap, in), SCARG(uap, ou), 603 SCARG(uap, ex), tsp, ssp, retval)); 604 } 605 606 int 607 dopselect(struct proc *p, int nd, fd_set *in, fd_set *ou, fd_set *ex, 608 const struct timespec *tsp, const sigset_t *sigmask, register_t *retval) 609 { 610 fd_mask bits[6]; 611 fd_set *pibits[3], *pobits[3]; 612 struct timespec ats, rts, tts; 613 int s, ncoll, error = 0, timo; 614 u_int ni; 615 616 if (nd < 0) 617 return (EINVAL); 618 if (nd > p->p_fd->fd_nfiles) { 619 /* forgiving; slightly wrong */ 620 nd = p->p_fd->fd_nfiles; 621 } 622 ni = howmany(nd, NFDBITS) * sizeof(fd_mask); 623 if (ni > sizeof(bits[0])) { 624 caddr_t mbits; 625 626 mbits = mallocarray(6, ni, M_TEMP, M_WAITOK|M_ZERO); 627 pibits[0] = (fd_set *)&mbits[ni * 0]; 628 pibits[1] = (fd_set *)&mbits[ni * 1]; 629 pibits[2] = (fd_set *)&mbits[ni * 2]; 630 pobits[0] = (fd_set *)&mbits[ni * 3]; 631 pobits[1] = (fd_set *)&mbits[ni * 4]; 632 pobits[2] = (fd_set *)&mbits[ni * 5]; 633 } else { 634 memset(bits, 0, sizeof(bits)); 635 pibits[0] = (fd_set *)&bits[0]; 636 pibits[1] = (fd_set *)&bits[1]; 637 pibits[2] = (fd_set *)&bits[2]; 638 pobits[0] = (fd_set *)&bits[3]; 639 pobits[1] = (fd_set *)&bits[4]; 640 pobits[2] = (fd_set *)&bits[5]; 641 } 642 643 #define getbits(name, x) \ 644 if (name && (error = copyin(name, pibits[x], ni))) \ 645 goto done; 646 getbits(in, 0); 647 getbits(ou, 1); 648 getbits(ex, 2); 649 #undef getbits 650 #ifdef KTRACE 651 if (ni > 0 && KTRPOINT(p, KTR_STRUCT)) { 652 if (in) ktrfdset(p, pibits[0], ni); 653 if (ou) ktrfdset(p, pibits[1], ni); 654 if (ex) ktrfdset(p, pibits[2], ni); 655 } 656 #endif 657 658 if (tsp) { 659 getnanouptime(&rts); 660 timespecadd(tsp, &rts, &ats); 661 } else { 662 ats.tv_sec = 0; 663 ats.tv_nsec = 0; 664 } 665 timo = 0; 666 667 if (sigmask) 668 dosigsuspend(p, *sigmask &~ sigcantmask); 669 670 retry: 671 ncoll = nselcoll; 672 atomic_setbits_int(&p->p_flag, P_SELECT); 673 error = selscan(p, pibits[0], pobits[0], nd, ni, retval); 674 if (error || *retval) 675 goto done; 676 if (tsp) { 677 getnanouptime(&rts); 678 if (timespeccmp(&rts, &ats, >=)) 679 goto done; 680 timespecsub(&ats, &rts, &tts); 681 timo = tts.tv_sec > 24 * 60 * 60 ? 682 24 * 60 * 60 * hz : tstohz(&tts); 683 } 684 s = splhigh(); 685 if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) { 686 splx(s); 687 goto retry; 688 } 689 atomic_clearbits_int(&p->p_flag, P_SELECT); 690 error = tsleep(&selwait, PSOCK | PCATCH, "select", timo); 691 splx(s); 692 if (error == 0) 693 goto retry; 694 done: 695 atomic_clearbits_int(&p->p_flag, P_SELECT); 696 /* select is not restarted after signals... */ 697 if (error == ERESTART) 698 error = EINTR; 699 if (error == EWOULDBLOCK) 700 error = 0; 701 #define putbits(name, x) \ 702 if (name && (error2 = copyout(pobits[x], name, ni))) \ 703 error = error2; 704 if (error == 0) { 705 int error2; 706 707 putbits(in, 0); 708 putbits(ou, 1); 709 putbits(ex, 2); 710 #undef putbits 711 #ifdef KTRACE 712 if (ni > 0 && KTRPOINT(p, KTR_STRUCT)) { 713 if (in) ktrfdset(p, pobits[0], ni); 714 if (ou) ktrfdset(p, pobits[1], ni); 715 if (ex) ktrfdset(p, pobits[2], ni); 716 } 717 #endif 718 } 719 720 if (pibits[0] != (fd_set *)&bits[0]) 721 free(pibits[0], M_TEMP, 6 * ni); 722 return (error); 723 } 724 725 int 726 selscan(struct proc *p, fd_set *ibits, fd_set *obits, int nfd, int ni, 727 register_t *retval) 728 { 729 caddr_t cibits = (caddr_t)ibits, cobits = (caddr_t)obits; 730 struct filedesc *fdp = p->p_fd; 731 int msk, i, j, fd; 732 fd_mask bits; 733 struct file *fp; 734 int n = 0; 735 static const int flag[3] = { POLLIN, POLLOUT|POLL_NOHUP, POLLPRI }; 736 737 for (msk = 0; msk < 3; msk++) { 738 fd_set *pibits = (fd_set *)&cibits[msk*ni]; 739 fd_set *pobits = (fd_set *)&cobits[msk*ni]; 740 741 for (i = 0; i < nfd; i += NFDBITS) { 742 bits = pibits->fds_bits[i/NFDBITS]; 743 while ((j = ffs(bits)) && (fd = i + --j) < nfd) { 744 bits &= ~(1 << j); 745 if ((fp = fd_getfile(fdp, fd)) == NULL) 746 return (EBADF); 747 FREF(fp); 748 if ((*fp->f_ops->fo_poll)(fp, flag[msk], p)) { 749 FD_SET(fd, pobits); 750 n++; 751 } 752 FRELE(fp, p); 753 } 754 } 755 } 756 *retval = n; 757 return (0); 758 } 759 760 int 761 seltrue(dev_t dev, int events, struct proc *p) 762 { 763 764 return (events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM)); 765 } 766 767 int 768 selfalse(dev_t dev, int events, struct proc *p) 769 { 770 771 return (0); 772 } 773 774 /* 775 * Record a select request. 776 */ 777 void 778 selrecord(struct proc *selector, struct selinfo *sip) 779 { 780 struct proc *p; 781 pid_t mypid; 782 783 mypid = selector->p_pid; 784 if (sip->si_selpid == mypid) 785 return; 786 if (sip->si_selpid && (p = pfind(sip->si_selpid)) && 787 p->p_wchan == (caddr_t)&selwait) 788 sip->si_flags |= SI_COLL; 789 else 790 sip->si_selpid = mypid; 791 } 792 793 /* 794 * Do a wakeup when a selectable event occurs. 795 */ 796 void 797 selwakeup(struct selinfo *sip) 798 { 799 struct proc *p; 800 int s; 801 802 KNOTE(&sip->si_note, 0); 803 if (sip->si_selpid == 0) 804 return; 805 if (sip->si_flags & SI_COLL) { 806 nselcoll++; 807 sip->si_flags &= ~SI_COLL; 808 wakeup(&selwait); 809 } 810 p = pfind(sip->si_selpid); 811 sip->si_selpid = 0; 812 if (p != NULL) { 813 SCHED_LOCK(s); 814 if (p->p_wchan == (caddr_t)&selwait) { 815 if (p->p_stat == SSLEEP) 816 setrunnable(p); 817 else 818 unsleep(p); 819 } else if (p->p_flag & P_SELECT) 820 atomic_clearbits_int(&p->p_flag, P_SELECT); 821 SCHED_UNLOCK(s); 822 } 823 } 824 825 void 826 pollscan(struct proc *p, struct pollfd *pl, u_int nfd, register_t *retval) 827 { 828 struct filedesc *fdp = p->p_fd; 829 struct file *fp; 830 u_int i; 831 int n = 0; 832 833 for (i = 0; i < nfd; i++, pl++) { 834 /* Check the file descriptor. */ 835 if (pl->fd < 0) { 836 pl->revents = 0; 837 continue; 838 } 839 if ((fp = fd_getfile(fdp, pl->fd)) == NULL) { 840 pl->revents = POLLNVAL; 841 n++; 842 continue; 843 } 844 FREF(fp); 845 pl->revents = (*fp->f_ops->fo_poll)(fp, pl->events, p); 846 FRELE(fp, p); 847 if (pl->revents != 0) 848 n++; 849 } 850 *retval = n; 851 } 852 853 /* 854 * Only copyout the revents field. 855 */ 856 int 857 pollout(struct pollfd *pl, struct pollfd *upl, u_int nfds) 858 { 859 int error = 0; 860 u_int i = 0; 861 862 while (!error && i++ < nfds) { 863 error = copyout(&pl->revents, &upl->revents, 864 sizeof(upl->revents)); 865 pl++; 866 upl++; 867 } 868 869 return (error); 870 } 871 872 /* 873 * We are using the same mechanism as select only we encode/decode args 874 * differently. 875 */ 876 int 877 sys_poll(struct proc *p, void *v, register_t *retval) 878 { 879 struct sys_poll_args /* { 880 syscallarg(struct pollfd *) fds; 881 syscallarg(u_int) nfds; 882 syscallarg(int) timeout; 883 } */ *uap = v; 884 885 struct timespec ts, *tsp = NULL; 886 int msec = SCARG(uap, timeout); 887 888 if (msec != INFTIM) { 889 if (msec < 0) 890 return (EINVAL); 891 ts.tv_sec = msec / 1000; 892 ts.tv_nsec = (msec - (ts.tv_sec * 1000)) * 1000000; 893 tsp = &ts; 894 } 895 896 return (doppoll(p, SCARG(uap, fds), SCARG(uap, nfds), tsp, NULL, 897 retval)); 898 } 899 900 int 901 sys_ppoll(struct proc *p, void *v, register_t *retval) 902 { 903 struct sys_ppoll_args /* { 904 syscallarg(struct pollfd *) fds; 905 syscallarg(u_int) nfds; 906 syscallarg(const struct timespec *) ts; 907 syscallarg(const sigset_t *) mask; 908 } */ *uap = v; 909 910 int error; 911 struct timespec ts, *tsp = NULL; 912 sigset_t ss, *ssp = NULL; 913 914 if (SCARG(uap, ts) != NULL) { 915 if ((error = copyin(SCARG(uap, ts), &ts, sizeof ts)) != 0) 916 return (error); 917 if ((error = timespecfix(&ts)) != 0) 918 return (error); 919 #ifdef KTRACE 920 if (KTRPOINT(p, KTR_STRUCT)) 921 ktrreltimespec(p, &ts); 922 #endif 923 tsp = &ts; 924 } 925 926 if (SCARG(uap, mask) != NULL) { 927 if ((error = copyin(SCARG(uap, mask), &ss, sizeof ss)) != 0) 928 return (error); 929 ssp = &ss; 930 } 931 932 return (doppoll(p, SCARG(uap, fds), SCARG(uap, nfds), tsp, ssp, 933 retval)); 934 } 935 936 int 937 doppoll(struct proc *p, struct pollfd *fds, u_int nfds, 938 const struct timespec *tsp, const sigset_t *sigmask, register_t *retval) 939 { 940 size_t sz; 941 struct pollfd pfds[4], *pl = pfds; 942 struct timespec ats, rts, tts; 943 int timo, ncoll, i, s, error; 944 945 /* Standards say no more than MAX_OPEN; this is possibly better. */ 946 if (nfds > min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfiles)) 947 return (EINVAL); 948 949 /* optimize for the default case, of a small nfds value */ 950 if (nfds > nitems(pfds)) { 951 pl = mallocarray(nfds, sizeof(*pl), M_TEMP, 952 M_WAITOK | M_CANFAIL); 953 if (pl == NULL) 954 return (EINVAL); 955 } 956 957 sz = nfds * sizeof(*pl); 958 959 if ((error = copyin(fds, pl, sz)) != 0) 960 goto bad; 961 962 for (i = 0; i < nfds; i++) { 963 pl[i].events &= ~POLL_NOHUP; 964 pl[i].revents = 0; 965 } 966 967 if (tsp != NULL) { 968 getnanouptime(&rts); 969 timespecadd(tsp, &rts, &ats); 970 } else { 971 ats.tv_sec = 0; 972 ats.tv_nsec = 0; 973 } 974 timo = 0; 975 976 if (sigmask) 977 dosigsuspend(p, *sigmask &~ sigcantmask); 978 979 retry: 980 ncoll = nselcoll; 981 atomic_setbits_int(&p->p_flag, P_SELECT); 982 pollscan(p, pl, nfds, retval); 983 if (*retval) 984 goto done; 985 if (tsp != NULL) { 986 getnanouptime(&rts); 987 if (timespeccmp(&rts, &ats, >=)) 988 goto done; 989 timespecsub(&ats, &rts, &tts); 990 timo = tts.tv_sec > 24 * 60 * 60 ? 991 24 * 60 * 60 * hz : tstohz(&tts); 992 } 993 s = splhigh(); 994 if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) { 995 splx(s); 996 goto retry; 997 } 998 atomic_clearbits_int(&p->p_flag, P_SELECT); 999 error = tsleep(&selwait, PSOCK | PCATCH, "poll", timo); 1000 splx(s); 1001 if (error == 0) 1002 goto retry; 1003 1004 done: 1005 atomic_clearbits_int(&p->p_flag, P_SELECT); 1006 /* 1007 * NOTE: poll(2) is not restarted after a signal and EWOULDBLOCK is 1008 * ignored (since the whole point is to see what would block). 1009 */ 1010 switch (error) { 1011 case ERESTART: 1012 error = pollout(pl, fds, nfds); 1013 if (error == 0) 1014 error = EINTR; 1015 break; 1016 case EWOULDBLOCK: 1017 case 0: 1018 error = pollout(pl, fds, nfds); 1019 break; 1020 } 1021 #ifdef KTRACE 1022 if (KTRPOINT(p, KTR_STRUCT)) 1023 ktrpollfd(p, pl, nfds); 1024 #endif /* KTRACE */ 1025 bad: 1026 if (pl != pfds) 1027 free(pl, M_TEMP, sz); 1028 return (error); 1029 } 1030 1031 /* 1032 * utrace system call 1033 */ 1034 int 1035 sys_utrace(struct proc *curp, void *v, register_t *retval) 1036 { 1037 #ifdef KTRACE 1038 struct sys_utrace_args /* { 1039 syscallarg(const char *) label; 1040 syscallarg(const void *) addr; 1041 syscallarg(size_t) len; 1042 } */ *uap = v; 1043 return (ktruser(curp, SCARG(uap, label), SCARG(uap, addr), 1044 SCARG(uap, len))); 1045 #else 1046 return (0); 1047 #endif 1048 } 1049