1 /* $OpenBSD: sys_generic.c,v 1.81 2013/06/01 16:27:37 tedu Exp $ */ 2 /* $NetBSD: sys_generic.c,v 1.24 1996/03/29 00:25:32 cgd Exp $ */ 3 4 /* 5 * Copyright (c) 1996 Theo de Raadt 6 * Copyright (c) 1982, 1986, 1989, 1993 7 * The Regents of the University of California. All rights reserved. 8 * (c) UNIX System Laboratories, Inc. 9 * All or some portions of this file are derived from material licensed 10 * to the University of California by American Telephone and Telegraph 11 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 12 * the permission of UNIX System Laboratories, Inc. 13 * 14 * Redistribution and use in source and binary forms, with or without 15 * modification, are permitted provided that the following conditions 16 * are met: 17 * 1. Redistributions of source code must retain the above copyright 18 * notice, this list of conditions and the following disclaimer. 19 * 2. Redistributions in binary form must reproduce the above copyright 20 * notice, this list of conditions and the following disclaimer in the 21 * documentation and/or other materials provided with the distribution. 22 * 3. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)sys_generic.c 8.5 (Berkeley) 1/21/94 39 */ 40 41 #include <sys/param.h> 42 #include <sys/systm.h> 43 #include <sys/filedesc.h> 44 #include <sys/ioctl.h> 45 #include <sys/file.h> 46 #include <sys/proc.h> 47 #include <sys/resourcevar.h> 48 #include <sys/socketvar.h> 49 #include <sys/signalvar.h> 50 #include <sys/uio.h> 51 #include <sys/kernel.h> 52 #include <sys/stat.h> 53 #include <sys/malloc.h> 54 #include <sys/poll.h> 55 #ifdef KTRACE 56 #include <sys/ktrace.h> 57 #endif 58 #include <sys/sched.h> 59 60 #include <sys/mount.h> 61 #include <sys/syscallargs.h> 62 63 #include <uvm/uvm_extern.h> 64 65 int selscan(struct proc *, fd_set *, fd_set *, int, int, register_t *); 66 void pollscan(struct proc *, struct pollfd *, u_int, register_t *); 67 int pollout(struct pollfd *, struct pollfd *, u_int); 68 int dopselect(struct proc *, int, fd_set *, fd_set *, fd_set *, 69 const struct timespec *, const sigset_t *, register_t *); 70 int doppoll(struct proc *, struct pollfd *, u_int, const struct timespec *, 71 const sigset_t *, register_t *); 72 73 /* 74 * Read system call. 75 */ 76 /* ARGSUSED */ 77 int 78 sys_read(struct proc *p, void *v, register_t *retval) 79 { 80 struct sys_read_args /* { 81 syscallarg(int) fd; 82 syscallarg(void *) buf; 83 syscallarg(size_t) nbyte; 84 } */ *uap = v; 85 struct iovec iov; 86 int fd = SCARG(uap, fd); 87 struct file *fp; 88 struct filedesc *fdp = p->p_fd; 89 90 if ((fp = fd_getfile(fdp, fd)) == NULL) 91 return (EBADF); 92 if ((fp->f_flag & FREAD) == 0) 93 return (EBADF); 94 95 iov.iov_base = SCARG(uap, buf); 96 iov.iov_len = SCARG(uap, nbyte); 97 98 FREF(fp); 99 100 /* dofilereadv() will FRELE the descriptor for us */ 101 return (dofilereadv(p, fd, fp, &iov, 1, 0, &fp->f_offset, retval)); 102 } 103 104 /* 105 * Scatter read system call. 106 */ 107 int 108 sys_readv(struct proc *p, void *v, register_t *retval) 109 { 110 struct sys_readv_args /* { 111 syscallarg(int) fd; 112 syscallarg(const struct iovec *) iovp; 113 syscallarg(int) iovcnt; 114 } */ *uap = v; 115 int fd = SCARG(uap, fd); 116 struct file *fp; 117 struct filedesc *fdp = p->p_fd; 118 119 if ((fp = fd_getfile(fdp, fd)) == NULL) 120 return (EBADF); 121 if ((fp->f_flag & FREAD) == 0) 122 return (EBADF); 123 124 FREF(fp); 125 126 /* dofilereadv() will FRELE the descriptor for us */ 127 return (dofilereadv(p, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt), 1, 128 &fp->f_offset, retval)); 129 } 130 131 int 132 dofilereadv(struct proc *p, int fd, struct file *fp, const struct iovec *iovp, 133 int iovcnt, int userspace, off_t *offset, register_t *retval) 134 { 135 struct iovec aiov[UIO_SMALLIOV]; 136 struct uio auio; 137 struct iovec *iov; 138 struct iovec *needfree = NULL; 139 long i, cnt, error = 0; 140 u_int iovlen; 141 #ifdef KTRACE 142 struct iovec *ktriov = NULL; 143 #endif 144 145 /* note: can't use iovlen until iovcnt is validated */ 146 iovlen = iovcnt * sizeof(struct iovec); 147 148 /* 149 * If the iovec array exists in userspace, it needs to be copied in; 150 * otherwise, it can be used directly. 151 */ 152 if (userspace) { 153 if ((u_int)iovcnt > UIO_SMALLIOV) { 154 if ((u_int)iovcnt > IOV_MAX) { 155 error = EINVAL; 156 goto out; 157 } 158 iov = needfree = malloc(iovlen, M_IOV, M_WAITOK); 159 } else if ((u_int)iovcnt > 0) { 160 iov = aiov; 161 needfree = NULL; 162 } else { 163 error = EINVAL; 164 goto out; 165 } 166 if ((error = copyin(iovp, iov, iovlen))) 167 goto done; 168 } else { 169 iov = (struct iovec *)iovp; /* de-constify */ 170 } 171 172 auio.uio_iov = iov; 173 auio.uio_iovcnt = iovcnt; 174 auio.uio_rw = UIO_READ; 175 auio.uio_segflg = UIO_USERSPACE; 176 auio.uio_procp = p; 177 auio.uio_resid = 0; 178 for (i = 0; i < iovcnt; i++) { 179 auio.uio_resid += iov->iov_len; 180 /* 181 * Reads return ssize_t because -1 is returned on error. 182 * Therefore we must restrict the length to SSIZE_MAX to 183 * avoid garbage return values. Note that the addition is 184 * guaranteed to not wrap because SSIZE_MAX * 2 < SIZE_MAX. 185 */ 186 if (iov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) { 187 error = EINVAL; 188 goto done; 189 } 190 iov++; 191 } 192 #ifdef KTRACE 193 /* 194 * if tracing, save a copy of iovec 195 */ 196 if (KTRPOINT(p, KTR_GENIO)) { 197 ktriov = malloc(iovlen, M_TEMP, M_WAITOK); 198 bcopy(auio.uio_iov, ktriov, iovlen); 199 } 200 #endif 201 cnt = auio.uio_resid; 202 error = (*fp->f_ops->fo_read)(fp, offset, &auio, fp->f_cred); 203 if (error) 204 if (auio.uio_resid != cnt && (error == ERESTART || 205 error == EINTR || error == EWOULDBLOCK)) 206 error = 0; 207 cnt -= auio.uio_resid; 208 209 fp->f_rxfer++; 210 fp->f_rbytes += cnt; 211 #ifdef KTRACE 212 if (ktriov != NULL) { 213 if (error == 0) 214 ktrgenio(p, fd, UIO_READ, ktriov, cnt, 215 error); 216 free(ktriov, M_TEMP); 217 } 218 #endif 219 *retval = cnt; 220 done: 221 if (needfree) 222 free(needfree, M_IOV); 223 out: 224 FRELE(fp, p); 225 return (error); 226 } 227 228 /* 229 * Write system call 230 */ 231 int 232 sys_write(struct proc *p, void *v, register_t *retval) 233 { 234 struct sys_write_args /* { 235 syscallarg(int) fd; 236 syscallarg(const void *) buf; 237 syscallarg(size_t) nbyte; 238 } */ *uap = v; 239 struct iovec iov; 240 int fd = SCARG(uap, fd); 241 struct file *fp; 242 struct filedesc *fdp = p->p_fd; 243 244 if ((fp = fd_getfile(fdp, fd)) == NULL) 245 return (EBADF); 246 if ((fp->f_flag & FWRITE) == 0) 247 return (EBADF); 248 249 iov.iov_base = (void *)SCARG(uap, buf); 250 iov.iov_len = SCARG(uap, nbyte); 251 252 FREF(fp); 253 254 /* dofilewritev() will FRELE the descriptor for us */ 255 return (dofilewritev(p, fd, fp, &iov, 1, 0, &fp->f_offset, retval)); 256 } 257 258 /* 259 * Gather write system call 260 */ 261 int 262 sys_writev(struct proc *p, void *v, register_t *retval) 263 { 264 struct sys_writev_args /* { 265 syscallarg(int) fd; 266 syscallarg(const struct iovec *) iovp; 267 syscallarg(int) iovcnt; 268 } */ *uap = v; 269 int fd = SCARG(uap, fd); 270 struct file *fp; 271 struct filedesc *fdp = p->p_fd; 272 273 if ((fp = fd_getfile(fdp, fd)) == NULL) 274 return (EBADF); 275 if ((fp->f_flag & FWRITE) == 0) 276 return (EBADF); 277 278 FREF(fp); 279 280 /* dofilewritev() will FRELE the descriptor for us */ 281 return (dofilewritev(p, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt), 1, 282 &fp->f_offset, retval)); 283 } 284 285 int 286 dofilewritev(struct proc *p, int fd, struct file *fp, const struct iovec *iovp, 287 int iovcnt, int userspace, off_t *offset, register_t *retval) 288 { 289 struct iovec aiov[UIO_SMALLIOV]; 290 struct uio auio; 291 struct iovec *iov; 292 struct iovec *needfree = NULL; 293 long i, cnt, error = 0; 294 u_int iovlen; 295 #ifdef KTRACE 296 struct iovec *ktriov = NULL; 297 #endif 298 299 /* note: can't use iovlen until iovcnt is validated */ 300 iovlen = iovcnt * sizeof(struct iovec); 301 302 /* 303 * If the iovec array exists in userspace, it needs to be copied in; 304 * otherwise, it can be used directly. 305 */ 306 if (userspace) { 307 if ((u_int)iovcnt > UIO_SMALLIOV) { 308 if ((u_int)iovcnt > IOV_MAX) { 309 error = EINVAL; 310 goto out; 311 } 312 iov = needfree = malloc(iovlen, M_IOV, M_WAITOK); 313 } else if ((u_int)iovcnt > 0) { 314 iov = aiov; 315 needfree = NULL; 316 } else { 317 error = EINVAL; 318 goto out; 319 } 320 if ((error = copyin(iovp, iov, iovlen))) 321 goto done; 322 } else { 323 iov = (struct iovec *)iovp; /* de-constify */ 324 } 325 326 auio.uio_iov = iov; 327 auio.uio_iovcnt = iovcnt; 328 auio.uio_rw = UIO_WRITE; 329 auio.uio_segflg = UIO_USERSPACE; 330 auio.uio_procp = p; 331 auio.uio_resid = 0; 332 for (i = 0; i < iovcnt; i++) { 333 auio.uio_resid += iov->iov_len; 334 /* 335 * Writes return ssize_t because -1 is returned on error. 336 * Therefore we must restrict the length to SSIZE_MAX to 337 * avoid garbage return values. Note that the addition is 338 * guaranteed to not wrap because SSIZE_MAX * 2 < SIZE_MAX. 339 */ 340 if (iov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) { 341 error = EINVAL; 342 goto done; 343 } 344 iov++; 345 } 346 #ifdef KTRACE 347 /* 348 * if tracing, save a copy of iovec 349 */ 350 if (KTRPOINT(p, KTR_GENIO)) { 351 ktriov = malloc(iovlen, M_TEMP, M_WAITOK); 352 bcopy(auio.uio_iov, ktriov, iovlen); 353 } 354 #endif 355 cnt = auio.uio_resid; 356 error = (*fp->f_ops->fo_write)(fp, offset, &auio, fp->f_cred); 357 if (error) { 358 if (auio.uio_resid != cnt && (error == ERESTART || 359 error == EINTR || error == EWOULDBLOCK)) 360 error = 0; 361 if (error == EPIPE) 362 ptsignal(p, SIGPIPE, STHREAD); 363 } 364 cnt -= auio.uio_resid; 365 366 fp->f_wxfer++; 367 fp->f_wbytes += cnt; 368 #ifdef KTRACE 369 if (ktriov != NULL) { 370 if (error == 0) 371 ktrgenio(p, fd, UIO_WRITE, ktriov, cnt, error); 372 free(ktriov, M_TEMP); 373 } 374 #endif 375 *retval = cnt; 376 done: 377 if (needfree) 378 free(needfree, M_IOV); 379 out: 380 FRELE(fp, p); 381 return (error); 382 } 383 384 /* 385 * Ioctl system call 386 */ 387 /* ARGSUSED */ 388 int 389 sys_ioctl(struct proc *p, void *v, register_t *retval) 390 { 391 struct sys_ioctl_args /* { 392 syscallarg(int) fd; 393 syscallarg(u_long) com; 394 syscallarg(void *) data; 395 } */ *uap = v; 396 struct file *fp; 397 struct filedesc *fdp; 398 u_long com; 399 int error; 400 u_int size; 401 caddr_t data, memp; 402 int tmp; 403 #define STK_PARAMS 128 404 long long stkbuf[STK_PARAMS / sizeof(long long)]; 405 406 fdp = p->p_fd; 407 if ((fp = fd_getfile(fdp, SCARG(uap, fd))) == NULL) 408 return (EBADF); 409 410 if ((fp->f_flag & (FREAD | FWRITE)) == 0) 411 return (EBADF); 412 413 switch (com = SCARG(uap, com)) { 414 case FIONCLEX: 415 case FIOCLEX: 416 fdplock(fdp); 417 if (com == FIONCLEX) 418 fdp->fd_ofileflags[SCARG(uap, fd)] &= ~UF_EXCLOSE; 419 else 420 fdp->fd_ofileflags[SCARG(uap, fd)] |= UF_EXCLOSE; 421 fdpunlock(fdp); 422 return (0); 423 } 424 425 /* 426 * Interpret high order word to find amount of data to be 427 * copied to/from the user's address space. 428 */ 429 size = IOCPARM_LEN(com); 430 if (size > IOCPARM_MAX) 431 return (ENOTTY); 432 FREF(fp); 433 memp = NULL; 434 if (size > sizeof (stkbuf)) { 435 memp = (caddr_t)malloc((u_long)size, M_IOCTLOPS, M_WAITOK); 436 data = memp; 437 } else 438 data = (caddr_t)stkbuf; 439 if (com&IOC_IN) { 440 if (size) { 441 error = copyin(SCARG(uap, data), data, (u_int)size); 442 if (error) { 443 goto out; 444 } 445 } else 446 *(caddr_t *)data = SCARG(uap, data); 447 } else if ((com&IOC_OUT) && size) 448 /* 449 * Zero the buffer so the user always 450 * gets back something deterministic. 451 */ 452 bzero(data, size); 453 else if (com&IOC_VOID) 454 *(caddr_t *)data = SCARG(uap, data); 455 456 switch (com) { 457 458 case FIONBIO: 459 if ((tmp = *(int *)data) != 0) 460 fp->f_flag |= FNONBLOCK; 461 else 462 fp->f_flag &= ~FNONBLOCK; 463 error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&tmp, p); 464 break; 465 466 case FIOASYNC: 467 if ((tmp = *(int *)data) != 0) 468 fp->f_flag |= FASYNC; 469 else 470 fp->f_flag &= ~FASYNC; 471 error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, (caddr_t)&tmp, p); 472 break; 473 474 case FIOSETOWN: 475 tmp = *(int *)data; 476 if (fp->f_type == DTYPE_SOCKET) { 477 struct socket *so = (struct socket *)fp->f_data; 478 479 so->so_pgid = tmp; 480 so->so_siguid = p->p_cred->p_ruid; 481 so->so_sigeuid = p->p_ucred->cr_uid; 482 error = 0; 483 break; 484 } 485 if (tmp <= 0) { 486 tmp = -tmp; 487 } else { 488 struct process *pr = prfind(tmp); 489 if (pr == NULL) { 490 error = ESRCH; 491 break; 492 } 493 tmp = pr->ps_pgrp->pg_id; 494 } 495 error = (*fp->f_ops->fo_ioctl) 496 (fp, TIOCSPGRP, (caddr_t)&tmp, p); 497 break; 498 499 case FIOGETOWN: 500 if (fp->f_type == DTYPE_SOCKET) { 501 error = 0; 502 *(int *)data = ((struct socket *)fp->f_data)->so_pgid; 503 break; 504 } 505 error = (*fp->f_ops->fo_ioctl)(fp, TIOCGPGRP, data, p); 506 *(int *)data = -*(int *)data; 507 break; 508 509 default: 510 error = (*fp->f_ops->fo_ioctl)(fp, com, data, p); 511 break; 512 } 513 /* 514 * Copy any data to user, size was 515 * already set and checked above. 516 */ 517 if (error == 0 && (com&IOC_OUT) && size) 518 error = copyout(data, SCARG(uap, data), (u_int)size); 519 out: 520 FRELE(fp, p); 521 if (memp) 522 free(memp, M_IOCTLOPS); 523 return (error); 524 } 525 526 int selwait, nselcoll; 527 528 /* 529 * Select system call. 530 */ 531 int 532 sys_select(struct proc *p, void *v, register_t *retval) 533 { 534 struct sys_select_args /* { 535 syscallarg(int) nd; 536 syscallarg(fd_set *) in; 537 syscallarg(fd_set *) ou; 538 syscallarg(fd_set *) ex; 539 syscallarg(struct timeval *) tv; 540 } */ *uap = v; 541 542 struct timespec ts, *tsp = NULL; 543 int error; 544 545 if (SCARG(uap, tv) != NULL) { 546 struct timeval tv; 547 if ((error = copyin(SCARG(uap, tv), &tv, sizeof tv)) != 0) 548 return (error); 549 if ((error = itimerfix(&tv)) != 0) 550 return (error); 551 #ifdef KTRACE 552 if (KTRPOINT(p, KTR_STRUCT)) 553 ktrreltimeval(p, &tv); 554 #endif 555 TIMEVAL_TO_TIMESPEC(&tv, &ts); 556 tsp = &ts; 557 } 558 559 return (dopselect(p, SCARG(uap, nd), SCARG(uap, in), SCARG(uap, ou), 560 SCARG(uap, ex), tsp, NULL, retval)); 561 } 562 563 int 564 sys_pselect(struct proc *p, void *v, register_t *retval) 565 { 566 struct sys_pselect_args /* { 567 syscallarg(int) nd; 568 syscallarg(fd_set *) in; 569 syscallarg(fd_set *) ou; 570 syscallarg(fd_set *) ex; 571 syscallarg(const struct timespec *) ts; 572 syscallarg(const sigset_t *) mask; 573 } */ *uap = v; 574 575 struct timespec ts, *tsp = NULL; 576 sigset_t ss, *ssp = NULL; 577 int error; 578 579 if (SCARG(uap, ts) != NULL) { 580 if ((error = copyin(SCARG(uap, ts), &ts, sizeof ts)) != 0) 581 return (error); 582 if ((error = timespecfix(&ts)) != 0) 583 return (error); 584 #ifdef KTRACE 585 if (KTRPOINT(p, KTR_STRUCT)) 586 ktrreltimespec(p, &ts); 587 #endif 588 tsp = &ts; 589 } 590 if (SCARG(uap, mask) != NULL) { 591 if ((error = copyin(SCARG(uap, mask), &ss, sizeof ss)) != 0) 592 return (error); 593 ssp = &ss; 594 } 595 596 return (dopselect(p, SCARG(uap, nd), SCARG(uap, in), SCARG(uap, ou), 597 SCARG(uap, ex), tsp, ssp, retval)); 598 } 599 600 int 601 dopselect(struct proc *p, int nd, fd_set *in, fd_set *ou, fd_set *ex, 602 const struct timespec *tsp, const sigset_t *sigmask, register_t *retval) 603 { 604 fd_mask bits[6]; 605 fd_set *pibits[3], *pobits[3]; 606 struct timespec ats, rts, tts; 607 int s, ncoll, error = 0, timo; 608 u_int ni; 609 610 if (nd < 0) 611 return (EINVAL); 612 if (nd > p->p_fd->fd_nfiles) { 613 /* forgiving; slightly wrong */ 614 nd = p->p_fd->fd_nfiles; 615 } 616 ni = howmany(nd, NFDBITS) * sizeof(fd_mask); 617 if (ni > sizeof(bits[0])) { 618 caddr_t mbits; 619 620 mbits = malloc(ni * 6, M_TEMP, M_WAITOK|M_ZERO); 621 pibits[0] = (fd_set *)&mbits[ni * 0]; 622 pibits[1] = (fd_set *)&mbits[ni * 1]; 623 pibits[2] = (fd_set *)&mbits[ni * 2]; 624 pobits[0] = (fd_set *)&mbits[ni * 3]; 625 pobits[1] = (fd_set *)&mbits[ni * 4]; 626 pobits[2] = (fd_set *)&mbits[ni * 5]; 627 } else { 628 bzero(bits, sizeof(bits)); 629 pibits[0] = (fd_set *)&bits[0]; 630 pibits[1] = (fd_set *)&bits[1]; 631 pibits[2] = (fd_set *)&bits[2]; 632 pobits[0] = (fd_set *)&bits[3]; 633 pobits[1] = (fd_set *)&bits[4]; 634 pobits[2] = (fd_set *)&bits[5]; 635 } 636 637 #define getbits(name, x) \ 638 if (name && (error = copyin(name, pibits[x], ni))) \ 639 goto done; 640 getbits(in, 0); 641 getbits(ou, 1); 642 getbits(ex, 2); 643 #undef getbits 644 #ifdef KTRACE 645 if (ni > 0 && KTRPOINT(p, KTR_STRUCT)) { 646 if (in) ktrfdset(p, pibits[0], ni); 647 if (ou) ktrfdset(p, pibits[1], ni); 648 if (ex) ktrfdset(p, pibits[2], ni); 649 } 650 #endif 651 652 if (tsp) { 653 getnanouptime(&rts); 654 timespecadd(tsp, &rts, &ats); 655 } else { 656 ats.tv_sec = 0; 657 ats.tv_nsec = 0; 658 } 659 timo = 0; 660 661 if (sigmask) { 662 p->p_oldmask = p->p_sigmask; 663 atomic_setbits_int(&p->p_flag, P_SIGSUSPEND); 664 p->p_sigmask = *sigmask &~ sigcantmask; 665 } 666 667 retry: 668 ncoll = nselcoll; 669 atomic_setbits_int(&p->p_flag, P_SELECT); 670 error = selscan(p, pibits[0], pobits[0], nd, ni, retval); 671 if (error || *retval) 672 goto done; 673 if (tsp) { 674 getnanouptime(&rts); 675 if (timespeccmp(&rts, &ats, >=)) 676 goto done; 677 timespecsub(&ats, &rts, &tts); 678 timo = tts.tv_sec > 24 * 60 * 60 ? 679 24 * 60 * 60 * hz : tstohz(&tts); 680 } 681 s = splhigh(); 682 if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) { 683 splx(s); 684 goto retry; 685 } 686 atomic_clearbits_int(&p->p_flag, P_SELECT); 687 error = tsleep(&selwait, PSOCK | PCATCH, "select", timo); 688 splx(s); 689 if (error == 0) 690 goto retry; 691 done: 692 atomic_clearbits_int(&p->p_flag, P_SELECT); 693 /* select is not restarted after signals... */ 694 if (error == ERESTART) 695 error = EINTR; 696 if (error == EWOULDBLOCK) 697 error = 0; 698 #define putbits(name, x) \ 699 if (name && (error2 = copyout(pobits[x], name, ni))) \ 700 error = error2; 701 if (error == 0) { 702 int error2; 703 704 putbits(in, 0); 705 putbits(ou, 1); 706 putbits(ex, 2); 707 #undef putbits 708 #ifdef KTRACE 709 if (ni > 0 && KTRPOINT(p, KTR_STRUCT)) { 710 if (in) ktrfdset(p, pobits[0], ni); 711 if (ou) ktrfdset(p, pobits[1], ni); 712 if (ex) ktrfdset(p, pobits[2], ni); 713 } 714 #endif 715 } 716 717 if (pibits[0] != (fd_set *)&bits[0]) 718 free(pibits[0], M_TEMP); 719 return (error); 720 } 721 722 int 723 selscan(struct proc *p, fd_set *ibits, fd_set *obits, int nfd, int ni, 724 register_t *retval) 725 { 726 caddr_t cibits = (caddr_t)ibits, cobits = (caddr_t)obits; 727 struct filedesc *fdp = p->p_fd; 728 int msk, i, j, fd; 729 fd_mask bits; 730 struct file *fp; 731 int n = 0; 732 static const int flag[3] = { POLLIN, POLLOUT, POLLPRI }; 733 734 for (msk = 0; msk < 3; msk++) { 735 fd_set *pibits = (fd_set *)&cibits[msk*ni]; 736 fd_set *pobits = (fd_set *)&cobits[msk*ni]; 737 738 for (i = 0; i < nfd; i += NFDBITS) { 739 bits = pibits->fds_bits[i/NFDBITS]; 740 while ((j = ffs(bits)) && (fd = i + --j) < nfd) { 741 bits &= ~(1 << j); 742 if ((fp = fd_getfile(fdp, fd)) == NULL) 743 return (EBADF); 744 FREF(fp); 745 if ((*fp->f_ops->fo_poll)(fp, flag[msk], p)) { 746 FD_SET(fd, pobits); 747 n++; 748 } 749 FRELE(fp, p); 750 } 751 } 752 } 753 *retval = n; 754 return (0); 755 } 756 757 /*ARGSUSED*/ 758 int 759 seltrue(dev_t dev, int events, struct proc *p) 760 { 761 762 return (events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM)); 763 } 764 765 int 766 selfalse(dev_t dev, int events, struct proc *p) 767 { 768 769 return (0); 770 } 771 772 /* 773 * Record a select request. 774 */ 775 void 776 selrecord(struct proc *selector, struct selinfo *sip) 777 { 778 struct proc *p; 779 pid_t mypid; 780 781 mypid = selector->p_pid; 782 if (sip->si_selpid == mypid) 783 return; 784 if (sip->si_selpid && (p = pfind(sip->si_selpid)) && 785 p->p_wchan == (caddr_t)&selwait) 786 sip->si_flags |= SI_COLL; 787 else 788 sip->si_selpid = mypid; 789 } 790 791 /* 792 * Do a wakeup when a selectable event occurs. 793 */ 794 void 795 selwakeup(struct selinfo *sip) 796 { 797 struct proc *p; 798 int s; 799 800 KNOTE(&sip->si_note, 0); 801 if (sip->si_selpid == 0) 802 return; 803 if (sip->si_flags & SI_COLL) { 804 nselcoll++; 805 sip->si_flags &= ~SI_COLL; 806 wakeup(&selwait); 807 } 808 p = pfind(sip->si_selpid); 809 sip->si_selpid = 0; 810 if (p != NULL) { 811 SCHED_LOCK(s); 812 if (p->p_wchan == (caddr_t)&selwait) { 813 if (p->p_stat == SSLEEP) 814 setrunnable(p); 815 else 816 unsleep(p); 817 } else if (p->p_flag & P_SELECT) 818 atomic_clearbits_int(&p->p_flag, P_SELECT); 819 SCHED_UNLOCK(s); 820 } 821 } 822 823 void 824 pollscan(struct proc *p, struct pollfd *pl, u_int nfd, register_t *retval) 825 { 826 struct filedesc *fdp = p->p_fd; 827 struct file *fp; 828 u_int i; 829 int n = 0; 830 831 for (i = 0; i < nfd; i++, pl++) { 832 /* Check the file descriptor. */ 833 if (pl->fd < 0) { 834 pl->revents = 0; 835 continue; 836 } 837 if ((fp = fd_getfile(fdp, pl->fd)) == NULL) { 838 pl->revents = POLLNVAL; 839 n++; 840 continue; 841 } 842 FREF(fp); 843 pl->revents = (*fp->f_ops->fo_poll)(fp, pl->events, p); 844 FRELE(fp, p); 845 if (pl->revents != 0) 846 n++; 847 } 848 *retval = n; 849 } 850 851 /* 852 * Only copyout the revents field. 853 */ 854 int 855 pollout(struct pollfd *pl, struct pollfd *upl, u_int nfds) 856 { 857 int error = 0; 858 u_int i = 0; 859 860 while (!error && i++ < nfds) { 861 error = copyout(&pl->revents, &upl->revents, 862 sizeof(upl->revents)); 863 pl++; 864 upl++; 865 } 866 867 return (error); 868 } 869 870 /* 871 * We are using the same mechanism as select only we encode/decode args 872 * differently. 873 */ 874 int 875 sys_poll(struct proc *p, void *v, register_t *retval) 876 { 877 struct sys_poll_args /* { 878 syscallarg(struct pollfd *) fds; 879 syscallarg(u_int) nfds; 880 syscallarg(int) timeout; 881 } */ *uap = v; 882 883 struct timespec ts, *tsp = NULL; 884 int msec = SCARG(uap, timeout); 885 886 if (msec != INFTIM) { 887 if (msec < 0) 888 return (EINVAL); 889 ts.tv_sec = msec / 1000; 890 ts.tv_nsec = (msec - (ts.tv_sec * 1000)) * 1000000; 891 tsp = &ts; 892 } 893 894 return (doppoll(p, SCARG(uap, fds), SCARG(uap, nfds), tsp, NULL, 895 retval)); 896 } 897 898 int 899 sys_ppoll(struct proc *p, void *v, register_t *retval) 900 { 901 struct sys_ppoll_args /* { 902 syscallarg(struct pollfd *) fds; 903 syscallarg(u_int) nfds; 904 syscallarg(const struct timespec *) ts; 905 syscallarg(const sigset_t *) mask; 906 } */ *uap = v; 907 908 int error; 909 struct timespec ts, *tsp = NULL; 910 sigset_t ss, *ssp = NULL; 911 912 if (SCARG(uap, ts) != NULL) { 913 if ((error = copyin(SCARG(uap, ts), &ts, sizeof ts)) != 0) 914 return (error); 915 if ((error = timespecfix(&ts)) != 0) 916 return (error); 917 #ifdef KTRACE 918 if (KTRPOINT(p, KTR_STRUCT)) 919 ktrreltimespec(p, &ts); 920 #endif 921 tsp = &ts; 922 } 923 924 if (SCARG(uap, mask) != NULL) { 925 if ((error = copyin(SCARG(uap, mask), &ss, sizeof ss)) != 0) 926 return (error); 927 ssp = &ss; 928 } 929 930 return (doppoll(p, SCARG(uap, fds), SCARG(uap, nfds), tsp, ssp, 931 retval)); 932 } 933 934 int 935 doppoll(struct proc *p, struct pollfd *fds, u_int nfds, 936 const struct timespec *tsp, const sigset_t *sigmask, register_t *retval) 937 { 938 size_t sz; 939 struct pollfd pfds[4], *pl = pfds; 940 struct timespec ats, rts, tts; 941 int timo, ncoll, i, s, error; 942 extern int nselcoll, selwait; 943 944 /* Standards say no more than MAX_OPEN; this is possibly better. */ 945 if (nfds > min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfiles)) 946 return (EINVAL); 947 948 sz = sizeof(struct pollfd) * nfds; 949 950 /* optimize for the default case, of a small nfds value */ 951 if (sz > sizeof(pfds)) 952 pl = (struct pollfd *) malloc(sz, M_TEMP, M_WAITOK); 953 954 if ((error = copyin(fds, pl, sz)) != 0) 955 goto bad; 956 957 for (i = 0; i < nfds; i++) 958 pl[i].revents = 0; 959 960 if (tsp != NULL) { 961 getnanouptime(&rts); 962 timespecadd(tsp, &rts, &ats); 963 } else { 964 ats.tv_sec = 0; 965 ats.tv_nsec = 0; 966 } 967 timo = 0; 968 969 if (sigmask) { 970 p->p_oldmask = p->p_sigmask; 971 atomic_setbits_int(&p->p_flag, P_SIGSUSPEND); 972 p->p_sigmask = *sigmask &~ sigcantmask; 973 } 974 975 retry: 976 ncoll = nselcoll; 977 atomic_setbits_int(&p->p_flag, P_SELECT); 978 pollscan(p, pl, nfds, retval); 979 if (*retval) 980 goto done; 981 if (tsp != NULL) { 982 getnanouptime(&rts); 983 if (timespeccmp(&rts, &ats, >=)) 984 goto done; 985 timespecsub(&ats, &rts, &tts); 986 timo = tts.tv_sec > 24 * 60 * 60 ? 987 24 * 60 * 60 * hz : tstohz(&tts); 988 } 989 s = splhigh(); 990 if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) { 991 splx(s); 992 goto retry; 993 } 994 atomic_clearbits_int(&p->p_flag, P_SELECT); 995 error = tsleep(&selwait, PSOCK | PCATCH, "poll", timo); 996 splx(s); 997 if (error == 0) 998 goto retry; 999 1000 done: 1001 atomic_clearbits_int(&p->p_flag, P_SELECT); 1002 /* 1003 * NOTE: poll(2) is not restarted after a signal and EWOULDBLOCK is 1004 * ignored (since the whole point is to see what would block). 1005 */ 1006 switch (error) { 1007 case ERESTART: 1008 error = pollout(pl, fds, nfds); 1009 if (error == 0) 1010 error = EINTR; 1011 break; 1012 case EWOULDBLOCK: 1013 case 0: 1014 error = pollout(pl, fds, nfds); 1015 break; 1016 } 1017 bad: 1018 if (pl != pfds) 1019 free(pl, M_TEMP); 1020 return (error); 1021 } 1022 1023 /* 1024 * utrace system call 1025 */ 1026 /* ARGSUSED */ 1027 int 1028 sys_utrace(struct proc *curp, void *v, register_t *retval) 1029 { 1030 #ifdef KTRACE 1031 struct sys_utrace_args /* { 1032 syscallarg(const char *) label; 1033 syscallarg(const void *) addr; 1034 syscallarg(size_t) len; 1035 } */ *uap = v; 1036 return (ktruser(curp, SCARG(uap, label), SCARG(uap, addr), 1037 SCARG(uap, len))); 1038 #else 1039 return (0); 1040 #endif 1041 } 1042