1 /* $OpenBSD: sys_generic.c,v 1.94 2014/11/03 03:08:00 deraadt Exp $ */ 2 /* $NetBSD: sys_generic.c,v 1.24 1996/03/29 00:25:32 cgd Exp $ */ 3 4 /* 5 * Copyright (c) 1996 Theo de Raadt 6 * Copyright (c) 1982, 1986, 1989, 1993 7 * The Regents of the University of California. All rights reserved. 8 * (c) UNIX System Laboratories, Inc. 9 * All or some portions of this file are derived from material licensed 10 * to the University of California by American Telephone and Telegraph 11 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 12 * the permission of UNIX System Laboratories, Inc. 13 * 14 * Redistribution and use in source and binary forms, with or without 15 * modification, are permitted provided that the following conditions 16 * are met: 17 * 1. Redistributions of source code must retain the above copyright 18 * notice, this list of conditions and the following disclaimer. 19 * 2. Redistributions in binary form must reproduce the above copyright 20 * notice, this list of conditions and the following disclaimer in the 21 * documentation and/or other materials provided with the distribution. 22 * 3. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)sys_generic.c 8.5 (Berkeley) 1/21/94 39 */ 40 41 #include <sys/param.h> 42 #include <sys/systm.h> 43 #include <sys/filedesc.h> 44 #include <sys/ioctl.h> 45 #include <sys/file.h> 46 #include <sys/proc.h> 47 #include <sys/resourcevar.h> 48 #include <sys/socketvar.h> 49 #include <sys/signalvar.h> 50 #include <sys/uio.h> 51 #include <sys/kernel.h> 52 #include <sys/stat.h> 53 #include <sys/malloc.h> 54 #include <sys/poll.h> 55 #ifdef KTRACE 56 #include <sys/ktrace.h> 57 #endif 58 #include <sys/sched.h> 59 60 #include <sys/mount.h> 61 #include <sys/syscallargs.h> 62 63 #include <uvm/uvm_extern.h> 64 65 int selscan(struct proc *, fd_set *, fd_set *, int, int, register_t *); 66 void pollscan(struct proc *, struct pollfd *, u_int, register_t *); 67 int pollout(struct pollfd *, struct pollfd *, u_int); 68 int dopselect(struct proc *, int, fd_set *, fd_set *, fd_set *, 69 const struct timespec *, const sigset_t *, register_t *); 70 int doppoll(struct proc *, struct pollfd *, u_int, const struct timespec *, 71 const sigset_t *, register_t *); 72 73 /* 74 * Read system call. 75 */ 76 /* ARGSUSED */ 77 int 78 sys_read(struct proc *p, void *v, register_t *retval) 79 { 80 struct sys_read_args /* { 81 syscallarg(int) fd; 82 syscallarg(void *) buf; 83 syscallarg(size_t) nbyte; 84 } */ *uap = v; 85 struct iovec iov; 86 int fd = SCARG(uap, fd); 87 struct file *fp; 88 struct filedesc *fdp = p->p_fd; 89 90 if ((fp = fd_getfile(fdp, fd)) == NULL) 91 return (EBADF); 92 if ((fp->f_flag & FREAD) == 0) 93 return (EBADF); 94 95 iov.iov_base = SCARG(uap, buf); 96 iov.iov_len = SCARG(uap, nbyte); 97 98 FREF(fp); 99 100 /* dofilereadv() will FRELE the descriptor for us */ 101 return (dofilereadv(p, fd, fp, &iov, 1, 0, &fp->f_offset, retval)); 102 } 103 104 /* 105 * Scatter read system call. 106 */ 107 int 108 sys_readv(struct proc *p, void *v, register_t *retval) 109 { 110 struct sys_readv_args /* { 111 syscallarg(int) fd; 112 syscallarg(const struct iovec *) iovp; 113 syscallarg(int) iovcnt; 114 } */ *uap = v; 115 int fd = SCARG(uap, fd); 116 struct file *fp; 117 struct filedesc *fdp = p->p_fd; 118 119 if ((fp = fd_getfile(fdp, fd)) == NULL) 120 return (EBADF); 121 if ((fp->f_flag & FREAD) == 0) 122 return (EBADF); 123 124 FREF(fp); 125 126 /* dofilereadv() will FRELE the descriptor for us */ 127 return (dofilereadv(p, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt), 1, 128 &fp->f_offset, retval)); 129 } 130 131 int 132 dofilereadv(struct proc *p, int fd, struct file *fp, const struct iovec *iovp, 133 int iovcnt, int userspace, off_t *offset, register_t *retval) 134 { 135 struct iovec aiov[UIO_SMALLIOV]; 136 struct uio auio; 137 struct iovec *iov; 138 struct iovec *needfree = NULL; 139 long i, cnt, error = 0; 140 u_int iovlen; 141 #ifdef KTRACE 142 struct iovec *ktriov = NULL; 143 #endif 144 145 /* note: can't use iovlen until iovcnt is validated */ 146 iovlen = iovcnt * sizeof(struct iovec); 147 148 /* 149 * If the iovec array exists in userspace, it needs to be copied in; 150 * otherwise, it can be used directly. 151 */ 152 if (userspace) { 153 if ((u_int)iovcnt > UIO_SMALLIOV) { 154 if ((u_int)iovcnt > IOV_MAX) { 155 error = EINVAL; 156 goto out; 157 } 158 iov = needfree = malloc(iovlen, M_IOV, M_WAITOK); 159 } else if ((u_int)iovcnt > 0) { 160 iov = aiov; 161 needfree = NULL; 162 } else { 163 error = EINVAL; 164 goto out; 165 } 166 if ((error = copyin(iovp, iov, iovlen))) 167 goto done; 168 } else { 169 iov = (struct iovec *)iovp; /* de-constify */ 170 } 171 172 auio.uio_iov = iov; 173 auio.uio_iovcnt = iovcnt; 174 auio.uio_rw = UIO_READ; 175 auio.uio_segflg = UIO_USERSPACE; 176 auio.uio_procp = p; 177 auio.uio_resid = 0; 178 for (i = 0; i < iovcnt; i++) { 179 auio.uio_resid += iov->iov_len; 180 /* 181 * Reads return ssize_t because -1 is returned on error. 182 * Therefore we must restrict the length to SSIZE_MAX to 183 * avoid garbage return values. Note that the addition is 184 * guaranteed to not wrap because SSIZE_MAX * 2 < SIZE_MAX. 185 */ 186 if (iov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) { 187 error = EINVAL; 188 goto done; 189 } 190 iov++; 191 } 192 #ifdef KTRACE 193 /* 194 * if tracing, save a copy of iovec 195 */ 196 if (KTRPOINT(p, KTR_GENIO)) { 197 ktriov = malloc(iovlen, M_TEMP, M_WAITOK); 198 bcopy(auio.uio_iov, ktriov, iovlen); 199 } 200 #endif 201 cnt = auio.uio_resid; 202 error = (*fp->f_ops->fo_read)(fp, offset, &auio, fp->f_cred); 203 if (error) 204 if (auio.uio_resid != cnt && (error == ERESTART || 205 error == EINTR || error == EWOULDBLOCK)) 206 error = 0; 207 cnt -= auio.uio_resid; 208 209 fp->f_rxfer++; 210 fp->f_rbytes += cnt; 211 #ifdef KTRACE 212 if (ktriov != NULL) { 213 if (error == 0) 214 ktrgenio(p, fd, UIO_READ, ktriov, cnt); 215 free(ktriov, M_TEMP, iovlen); 216 } 217 #endif 218 *retval = cnt; 219 done: 220 if (needfree) 221 free(needfree, M_IOV, iovlen); 222 out: 223 FRELE(fp, p); 224 return (error); 225 } 226 227 /* 228 * Write system call 229 */ 230 int 231 sys_write(struct proc *p, void *v, register_t *retval) 232 { 233 struct sys_write_args /* { 234 syscallarg(int) fd; 235 syscallarg(const void *) buf; 236 syscallarg(size_t) nbyte; 237 } */ *uap = v; 238 struct iovec iov; 239 int fd = SCARG(uap, fd); 240 struct file *fp; 241 struct filedesc *fdp = p->p_fd; 242 243 if ((fp = fd_getfile(fdp, fd)) == NULL) 244 return (EBADF); 245 if ((fp->f_flag & FWRITE) == 0) 246 return (EBADF); 247 248 iov.iov_base = (void *)SCARG(uap, buf); 249 iov.iov_len = SCARG(uap, nbyte); 250 251 FREF(fp); 252 253 /* dofilewritev() will FRELE the descriptor for us */ 254 return (dofilewritev(p, fd, fp, &iov, 1, 0, &fp->f_offset, retval)); 255 } 256 257 /* 258 * Gather write system call 259 */ 260 int 261 sys_writev(struct proc *p, void *v, register_t *retval) 262 { 263 struct sys_writev_args /* { 264 syscallarg(int) fd; 265 syscallarg(const struct iovec *) iovp; 266 syscallarg(int) iovcnt; 267 } */ *uap = v; 268 int fd = SCARG(uap, fd); 269 struct file *fp; 270 struct filedesc *fdp = p->p_fd; 271 272 if ((fp = fd_getfile(fdp, fd)) == NULL) 273 return (EBADF); 274 if ((fp->f_flag & FWRITE) == 0) 275 return (EBADF); 276 277 FREF(fp); 278 279 /* dofilewritev() will FRELE the descriptor for us */ 280 return (dofilewritev(p, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt), 1, 281 &fp->f_offset, retval)); 282 } 283 284 int 285 dofilewritev(struct proc *p, int fd, struct file *fp, const struct iovec *iovp, 286 int iovcnt, int userspace, off_t *offset, register_t *retval) 287 { 288 struct iovec aiov[UIO_SMALLIOV]; 289 struct uio auio; 290 struct iovec *iov; 291 struct iovec *needfree = NULL; 292 long i, cnt, error = 0; 293 u_int iovlen; 294 #ifdef KTRACE 295 struct iovec *ktriov = NULL; 296 #endif 297 298 /* note: can't use iovlen until iovcnt is validated */ 299 iovlen = iovcnt * sizeof(struct iovec); 300 301 /* 302 * If the iovec array exists in userspace, it needs to be copied in; 303 * otherwise, it can be used directly. 304 */ 305 if (userspace) { 306 if ((u_int)iovcnt > UIO_SMALLIOV) { 307 if ((u_int)iovcnt > IOV_MAX) { 308 error = EINVAL; 309 goto out; 310 } 311 iov = needfree = malloc(iovlen, M_IOV, M_WAITOK); 312 } else if ((u_int)iovcnt > 0) { 313 iov = aiov; 314 needfree = NULL; 315 } else { 316 error = EINVAL; 317 goto out; 318 } 319 if ((error = copyin(iovp, iov, iovlen))) 320 goto done; 321 } else { 322 iov = (struct iovec *)iovp; /* de-constify */ 323 } 324 325 auio.uio_iov = iov; 326 auio.uio_iovcnt = iovcnt; 327 auio.uio_rw = UIO_WRITE; 328 auio.uio_segflg = UIO_USERSPACE; 329 auio.uio_procp = p; 330 auio.uio_resid = 0; 331 for (i = 0; i < iovcnt; i++) { 332 auio.uio_resid += iov->iov_len; 333 /* 334 * Writes return ssize_t because -1 is returned on error. 335 * Therefore we must restrict the length to SSIZE_MAX to 336 * avoid garbage return values. Note that the addition is 337 * guaranteed to not wrap because SSIZE_MAX * 2 < SIZE_MAX. 338 */ 339 if (iov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) { 340 error = EINVAL; 341 goto done; 342 } 343 iov++; 344 } 345 #ifdef KTRACE 346 /* 347 * if tracing, save a copy of iovec 348 */ 349 if (KTRPOINT(p, KTR_GENIO)) { 350 ktriov = malloc(iovlen, M_TEMP, M_WAITOK); 351 bcopy(auio.uio_iov, ktriov, iovlen); 352 } 353 #endif 354 cnt = auio.uio_resid; 355 error = (*fp->f_ops->fo_write)(fp, offset, &auio, fp->f_cred); 356 if (error) { 357 if (auio.uio_resid != cnt && (error == ERESTART || 358 error == EINTR || error == EWOULDBLOCK)) 359 error = 0; 360 if (error == EPIPE) 361 ptsignal(p, SIGPIPE, STHREAD); 362 } 363 cnt -= auio.uio_resid; 364 365 fp->f_wxfer++; 366 fp->f_wbytes += cnt; 367 #ifdef KTRACE 368 if (ktriov != NULL) { 369 if (error == 0) 370 ktrgenio(p, fd, UIO_WRITE, ktriov, cnt); 371 free(ktriov, M_TEMP, iovlen); 372 } 373 #endif 374 *retval = cnt; 375 done: 376 if (needfree) 377 free(needfree, M_IOV, iovlen); 378 out: 379 FRELE(fp, p); 380 return (error); 381 } 382 383 /* 384 * Ioctl system call 385 */ 386 /* ARGSUSED */ 387 int 388 sys_ioctl(struct proc *p, void *v, register_t *retval) 389 { 390 struct sys_ioctl_args /* { 391 syscallarg(int) fd; 392 syscallarg(u_long) com; 393 syscallarg(void *) data; 394 } */ *uap = v; 395 struct file *fp; 396 struct filedesc *fdp; 397 u_long com; 398 int error; 399 u_int size; 400 caddr_t data, memp; 401 int tmp; 402 #define STK_PARAMS 128 403 long long stkbuf[STK_PARAMS / sizeof(long long)]; 404 405 fdp = p->p_fd; 406 if ((fp = fd_getfile(fdp, SCARG(uap, fd))) == NULL) 407 return (EBADF); 408 409 if ((fp->f_flag & (FREAD | FWRITE)) == 0) 410 return (EBADF); 411 412 switch (com = SCARG(uap, com)) { 413 case FIONCLEX: 414 case FIOCLEX: 415 fdplock(fdp); 416 if (com == FIONCLEX) 417 fdp->fd_ofileflags[SCARG(uap, fd)] &= ~UF_EXCLOSE; 418 else 419 fdp->fd_ofileflags[SCARG(uap, fd)] |= UF_EXCLOSE; 420 fdpunlock(fdp); 421 return (0); 422 } 423 424 /* 425 * Interpret high order word to find amount of data to be 426 * copied to/from the user's address space. 427 */ 428 size = IOCPARM_LEN(com); 429 if (size > IOCPARM_MAX) 430 return (ENOTTY); 431 FREF(fp); 432 memp = NULL; 433 if (size > sizeof (stkbuf)) { 434 memp = (caddr_t)malloc((u_long)size, M_IOCTLOPS, M_WAITOK); 435 data = memp; 436 } else 437 data = (caddr_t)stkbuf; 438 if (com&IOC_IN) { 439 if (size) { 440 error = copyin(SCARG(uap, data), data, (u_int)size); 441 if (error) { 442 goto out; 443 } 444 } else 445 *(caddr_t *)data = SCARG(uap, data); 446 } else if ((com&IOC_OUT) && size) 447 /* 448 * Zero the buffer so the user always 449 * gets back something deterministic. 450 */ 451 memset(data, 0, size); 452 else if (com&IOC_VOID) 453 *(caddr_t *)data = SCARG(uap, data); 454 455 switch (com) { 456 457 case FIONBIO: 458 if ((tmp = *(int *)data) != 0) 459 fp->f_flag |= FNONBLOCK; 460 else 461 fp->f_flag &= ~FNONBLOCK; 462 error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&tmp, p); 463 break; 464 465 case FIOASYNC: 466 if ((tmp = *(int *)data) != 0) 467 fp->f_flag |= FASYNC; 468 else 469 fp->f_flag &= ~FASYNC; 470 error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, (caddr_t)&tmp, p); 471 break; 472 473 case FIOSETOWN: 474 tmp = *(int *)data; 475 if (fp->f_type == DTYPE_SOCKET) { 476 struct socket *so = (struct socket *)fp->f_data; 477 478 so->so_pgid = tmp; 479 so->so_siguid = p->p_ucred->cr_ruid; 480 so->so_sigeuid = p->p_ucred->cr_uid; 481 error = 0; 482 break; 483 } 484 if (tmp <= 0) { 485 tmp = -tmp; 486 } else { 487 struct process *pr = prfind(tmp); 488 if (pr == NULL) { 489 error = ESRCH; 490 break; 491 } 492 tmp = pr->ps_pgrp->pg_id; 493 } 494 error = (*fp->f_ops->fo_ioctl) 495 (fp, TIOCSPGRP, (caddr_t)&tmp, p); 496 break; 497 498 case FIOGETOWN: 499 if (fp->f_type == DTYPE_SOCKET) { 500 error = 0; 501 *(int *)data = ((struct socket *)fp->f_data)->so_pgid; 502 break; 503 } 504 error = (*fp->f_ops->fo_ioctl)(fp, TIOCGPGRP, data, p); 505 *(int *)data = -*(int *)data; 506 break; 507 508 default: 509 error = (*fp->f_ops->fo_ioctl)(fp, com, data, p); 510 break; 511 } 512 /* 513 * Copy any data to user, size was 514 * already set and checked above. 515 */ 516 if (error == 0 && (com&IOC_OUT) && size) 517 error = copyout(data, SCARG(uap, data), (u_int)size); 518 out: 519 FRELE(fp, p); 520 if (memp) 521 free(memp, M_IOCTLOPS, size); 522 return (error); 523 } 524 525 int selwait, nselcoll; 526 527 /* 528 * Select system call. 529 */ 530 int 531 sys_select(struct proc *p, void *v, register_t *retval) 532 { 533 struct sys_select_args /* { 534 syscallarg(int) nd; 535 syscallarg(fd_set *) in; 536 syscallarg(fd_set *) ou; 537 syscallarg(fd_set *) ex; 538 syscallarg(struct timeval *) tv; 539 } */ *uap = v; 540 541 struct timespec ts, *tsp = NULL; 542 int error; 543 544 if (SCARG(uap, tv) != NULL) { 545 struct timeval tv; 546 if ((error = copyin(SCARG(uap, tv), &tv, sizeof tv)) != 0) 547 return (error); 548 if ((error = itimerfix(&tv)) != 0) 549 return (error); 550 #ifdef KTRACE 551 if (KTRPOINT(p, KTR_STRUCT)) 552 ktrreltimeval(p, &tv); 553 #endif 554 TIMEVAL_TO_TIMESPEC(&tv, &ts); 555 tsp = &ts; 556 } 557 558 return (dopselect(p, SCARG(uap, nd), SCARG(uap, in), SCARG(uap, ou), 559 SCARG(uap, ex), tsp, NULL, retval)); 560 } 561 562 int 563 sys_pselect(struct proc *p, void *v, register_t *retval) 564 { 565 struct sys_pselect_args /* { 566 syscallarg(int) nd; 567 syscallarg(fd_set *) in; 568 syscallarg(fd_set *) ou; 569 syscallarg(fd_set *) ex; 570 syscallarg(const struct timespec *) ts; 571 syscallarg(const sigset_t *) mask; 572 } */ *uap = v; 573 574 struct timespec ts, *tsp = NULL; 575 sigset_t ss, *ssp = NULL; 576 int error; 577 578 if (SCARG(uap, ts) != NULL) { 579 if ((error = copyin(SCARG(uap, ts), &ts, sizeof ts)) != 0) 580 return (error); 581 if ((error = timespecfix(&ts)) != 0) 582 return (error); 583 #ifdef KTRACE 584 if (KTRPOINT(p, KTR_STRUCT)) 585 ktrreltimespec(p, &ts); 586 #endif 587 tsp = &ts; 588 } 589 if (SCARG(uap, mask) != NULL) { 590 if ((error = copyin(SCARG(uap, mask), &ss, sizeof ss)) != 0) 591 return (error); 592 ssp = &ss; 593 } 594 595 return (dopselect(p, SCARG(uap, nd), SCARG(uap, in), SCARG(uap, ou), 596 SCARG(uap, ex), tsp, ssp, retval)); 597 } 598 599 int 600 dopselect(struct proc *p, int nd, fd_set *in, fd_set *ou, fd_set *ex, 601 const struct timespec *tsp, const sigset_t *sigmask, register_t *retval) 602 { 603 fd_mask bits[6]; 604 fd_set *pibits[3], *pobits[3]; 605 struct timespec ats, rts, tts; 606 int s, ncoll, error = 0, timo; 607 u_int ni; 608 609 if (nd < 0) 610 return (EINVAL); 611 if (nd > p->p_fd->fd_nfiles) { 612 /* forgiving; slightly wrong */ 613 nd = p->p_fd->fd_nfiles; 614 } 615 ni = howmany(nd, NFDBITS) * sizeof(fd_mask); 616 if (ni > sizeof(bits[0])) { 617 caddr_t mbits; 618 619 mbits = mallocarray(6, ni, M_TEMP, M_WAITOK|M_ZERO); 620 pibits[0] = (fd_set *)&mbits[ni * 0]; 621 pibits[1] = (fd_set *)&mbits[ni * 1]; 622 pibits[2] = (fd_set *)&mbits[ni * 2]; 623 pobits[0] = (fd_set *)&mbits[ni * 3]; 624 pobits[1] = (fd_set *)&mbits[ni * 4]; 625 pobits[2] = (fd_set *)&mbits[ni * 5]; 626 } else { 627 memset(bits, 0, sizeof(bits)); 628 pibits[0] = (fd_set *)&bits[0]; 629 pibits[1] = (fd_set *)&bits[1]; 630 pibits[2] = (fd_set *)&bits[2]; 631 pobits[0] = (fd_set *)&bits[3]; 632 pobits[1] = (fd_set *)&bits[4]; 633 pobits[2] = (fd_set *)&bits[5]; 634 } 635 636 #define getbits(name, x) \ 637 if (name && (error = copyin(name, pibits[x], ni))) \ 638 goto done; 639 getbits(in, 0); 640 getbits(ou, 1); 641 getbits(ex, 2); 642 #undef getbits 643 #ifdef KTRACE 644 if (ni > 0 && KTRPOINT(p, KTR_STRUCT)) { 645 if (in) ktrfdset(p, pibits[0], ni); 646 if (ou) ktrfdset(p, pibits[1], ni); 647 if (ex) ktrfdset(p, pibits[2], ni); 648 } 649 #endif 650 651 if (tsp) { 652 getnanouptime(&rts); 653 timespecadd(tsp, &rts, &ats); 654 } else { 655 ats.tv_sec = 0; 656 ats.tv_nsec = 0; 657 } 658 timo = 0; 659 660 if (sigmask) 661 dosigsuspend(p, *sigmask &~ sigcantmask); 662 663 retry: 664 ncoll = nselcoll; 665 atomic_setbits_int(&p->p_flag, P_SELECT); 666 error = selscan(p, pibits[0], pobits[0], nd, ni, retval); 667 if (error || *retval) 668 goto done; 669 if (tsp) { 670 getnanouptime(&rts); 671 if (timespeccmp(&rts, &ats, >=)) 672 goto done; 673 timespecsub(&ats, &rts, &tts); 674 timo = tts.tv_sec > 24 * 60 * 60 ? 675 24 * 60 * 60 * hz : tstohz(&tts); 676 } 677 s = splhigh(); 678 if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) { 679 splx(s); 680 goto retry; 681 } 682 atomic_clearbits_int(&p->p_flag, P_SELECT); 683 error = tsleep(&selwait, PSOCK | PCATCH, "select", timo); 684 splx(s); 685 if (error == 0) 686 goto retry; 687 done: 688 atomic_clearbits_int(&p->p_flag, P_SELECT); 689 /* select is not restarted after signals... */ 690 if (error == ERESTART) 691 error = EINTR; 692 if (error == EWOULDBLOCK) 693 error = 0; 694 #define putbits(name, x) \ 695 if (name && (error2 = copyout(pobits[x], name, ni))) \ 696 error = error2; 697 if (error == 0) { 698 int error2; 699 700 putbits(in, 0); 701 putbits(ou, 1); 702 putbits(ex, 2); 703 #undef putbits 704 #ifdef KTRACE 705 if (ni > 0 && KTRPOINT(p, KTR_STRUCT)) { 706 if (in) ktrfdset(p, pobits[0], ni); 707 if (ou) ktrfdset(p, pobits[1], ni); 708 if (ex) ktrfdset(p, pobits[2], ni); 709 } 710 #endif 711 } 712 713 if (pibits[0] != (fd_set *)&bits[0]) 714 free(pibits[0], M_TEMP, 6 * ni); 715 return (error); 716 } 717 718 int 719 selscan(struct proc *p, fd_set *ibits, fd_set *obits, int nfd, int ni, 720 register_t *retval) 721 { 722 caddr_t cibits = (caddr_t)ibits, cobits = (caddr_t)obits; 723 struct filedesc *fdp = p->p_fd; 724 int msk, i, j, fd; 725 fd_mask bits; 726 struct file *fp; 727 int n = 0; 728 static const int flag[3] = { POLLIN, POLLOUT, POLLPRI }; 729 730 for (msk = 0; msk < 3; msk++) { 731 fd_set *pibits = (fd_set *)&cibits[msk*ni]; 732 fd_set *pobits = (fd_set *)&cobits[msk*ni]; 733 734 for (i = 0; i < nfd; i += NFDBITS) { 735 bits = pibits->fds_bits[i/NFDBITS]; 736 while ((j = ffs(bits)) && (fd = i + --j) < nfd) { 737 bits &= ~(1 << j); 738 if ((fp = fd_getfile(fdp, fd)) == NULL) 739 return (EBADF); 740 FREF(fp); 741 if ((*fp->f_ops->fo_poll)(fp, flag[msk], p)) { 742 FD_SET(fd, pobits); 743 n++; 744 } 745 FRELE(fp, p); 746 } 747 } 748 } 749 *retval = n; 750 return (0); 751 } 752 753 /*ARGSUSED*/ 754 int 755 seltrue(dev_t dev, int events, struct proc *p) 756 { 757 758 return (events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM)); 759 } 760 761 int 762 selfalse(dev_t dev, int events, struct proc *p) 763 { 764 765 return (0); 766 } 767 768 /* 769 * Record a select request. 770 */ 771 void 772 selrecord(struct proc *selector, struct selinfo *sip) 773 { 774 struct proc *p; 775 pid_t mypid; 776 777 mypid = selector->p_pid; 778 if (sip->si_selpid == mypid) 779 return; 780 if (sip->si_selpid && (p = pfind(sip->si_selpid)) && 781 p->p_wchan == (caddr_t)&selwait) 782 sip->si_flags |= SI_COLL; 783 else 784 sip->si_selpid = mypid; 785 } 786 787 /* 788 * Do a wakeup when a selectable event occurs. 789 */ 790 void 791 selwakeup(struct selinfo *sip) 792 { 793 struct proc *p; 794 int s; 795 796 KNOTE(&sip->si_note, 0); 797 if (sip->si_selpid == 0) 798 return; 799 if (sip->si_flags & SI_COLL) { 800 nselcoll++; 801 sip->si_flags &= ~SI_COLL; 802 wakeup(&selwait); 803 } 804 p = pfind(sip->si_selpid); 805 sip->si_selpid = 0; 806 if (p != NULL) { 807 SCHED_LOCK(s); 808 if (p->p_wchan == (caddr_t)&selwait) { 809 if (p->p_stat == SSLEEP) 810 setrunnable(p); 811 else 812 unsleep(p); 813 } else if (p->p_flag & P_SELECT) 814 atomic_clearbits_int(&p->p_flag, P_SELECT); 815 SCHED_UNLOCK(s); 816 } 817 } 818 819 void 820 pollscan(struct proc *p, struct pollfd *pl, u_int nfd, register_t *retval) 821 { 822 struct filedesc *fdp = p->p_fd; 823 struct file *fp; 824 u_int i; 825 int n = 0; 826 827 for (i = 0; i < nfd; i++, pl++) { 828 /* Check the file descriptor. */ 829 if (pl->fd < 0) { 830 pl->revents = 0; 831 continue; 832 } 833 if ((fp = fd_getfile(fdp, pl->fd)) == NULL) { 834 pl->revents = POLLNVAL; 835 n++; 836 continue; 837 } 838 FREF(fp); 839 pl->revents = (*fp->f_ops->fo_poll)(fp, pl->events, p); 840 FRELE(fp, p); 841 if (pl->revents != 0) 842 n++; 843 } 844 *retval = n; 845 } 846 847 /* 848 * Only copyout the revents field. 849 */ 850 int 851 pollout(struct pollfd *pl, struct pollfd *upl, u_int nfds) 852 { 853 int error = 0; 854 u_int i = 0; 855 856 while (!error && i++ < nfds) { 857 error = copyout(&pl->revents, &upl->revents, 858 sizeof(upl->revents)); 859 pl++; 860 upl++; 861 } 862 863 return (error); 864 } 865 866 /* 867 * We are using the same mechanism as select only we encode/decode args 868 * differently. 869 */ 870 int 871 sys_poll(struct proc *p, void *v, register_t *retval) 872 { 873 struct sys_poll_args /* { 874 syscallarg(struct pollfd *) fds; 875 syscallarg(u_int) nfds; 876 syscallarg(int) timeout; 877 } */ *uap = v; 878 879 struct timespec ts, *tsp = NULL; 880 int msec = SCARG(uap, timeout); 881 882 if (msec != INFTIM) { 883 if (msec < 0) 884 return (EINVAL); 885 ts.tv_sec = msec / 1000; 886 ts.tv_nsec = (msec - (ts.tv_sec * 1000)) * 1000000; 887 tsp = &ts; 888 } 889 890 return (doppoll(p, SCARG(uap, fds), SCARG(uap, nfds), tsp, NULL, 891 retval)); 892 } 893 894 int 895 sys_ppoll(struct proc *p, void *v, register_t *retval) 896 { 897 struct sys_ppoll_args /* { 898 syscallarg(struct pollfd *) fds; 899 syscallarg(u_int) nfds; 900 syscallarg(const struct timespec *) ts; 901 syscallarg(const sigset_t *) mask; 902 } */ *uap = v; 903 904 int error; 905 struct timespec ts, *tsp = NULL; 906 sigset_t ss, *ssp = NULL; 907 908 if (SCARG(uap, ts) != NULL) { 909 if ((error = copyin(SCARG(uap, ts), &ts, sizeof ts)) != 0) 910 return (error); 911 if ((error = timespecfix(&ts)) != 0) 912 return (error); 913 #ifdef KTRACE 914 if (KTRPOINT(p, KTR_STRUCT)) 915 ktrreltimespec(p, &ts); 916 #endif 917 tsp = &ts; 918 } 919 920 if (SCARG(uap, mask) != NULL) { 921 if ((error = copyin(SCARG(uap, mask), &ss, sizeof ss)) != 0) 922 return (error); 923 ssp = &ss; 924 } 925 926 return (doppoll(p, SCARG(uap, fds), SCARG(uap, nfds), tsp, ssp, 927 retval)); 928 } 929 930 int 931 doppoll(struct proc *p, struct pollfd *fds, u_int nfds, 932 const struct timespec *tsp, const sigset_t *sigmask, register_t *retval) 933 { 934 size_t sz; 935 struct pollfd pfds[4], *pl = pfds; 936 struct timespec ats, rts, tts; 937 int timo, ncoll, i, s, error; 938 extern int nselcoll, selwait; 939 940 /* Standards say no more than MAX_OPEN; this is possibly better. */ 941 if (nfds > min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfiles)) 942 return (EINVAL); 943 944 /* optimize for the default case, of a small nfds value */ 945 if (nfds > nitems(pfds)) { 946 pl = mallocarray(nfds, sizeof(*pl), M_TEMP, 947 M_WAITOK | M_CANFAIL); 948 if (pl == NULL) 949 return (EINVAL); 950 } 951 952 sz = nfds * sizeof(*pl); 953 954 if ((error = copyin(fds, pl, sz)) != 0) 955 goto bad; 956 957 for (i = 0; i < nfds; i++) 958 pl[i].revents = 0; 959 960 if (tsp != NULL) { 961 getnanouptime(&rts); 962 timespecadd(tsp, &rts, &ats); 963 } else { 964 ats.tv_sec = 0; 965 ats.tv_nsec = 0; 966 } 967 timo = 0; 968 969 if (sigmask) 970 dosigsuspend(p, *sigmask &~ sigcantmask); 971 972 retry: 973 ncoll = nselcoll; 974 atomic_setbits_int(&p->p_flag, P_SELECT); 975 pollscan(p, pl, nfds, retval); 976 if (*retval) 977 goto done; 978 if (tsp != NULL) { 979 getnanouptime(&rts); 980 if (timespeccmp(&rts, &ats, >=)) 981 goto done; 982 timespecsub(&ats, &rts, &tts); 983 timo = tts.tv_sec > 24 * 60 * 60 ? 984 24 * 60 * 60 * hz : tstohz(&tts); 985 } 986 s = splhigh(); 987 if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) { 988 splx(s); 989 goto retry; 990 } 991 atomic_clearbits_int(&p->p_flag, P_SELECT); 992 error = tsleep(&selwait, PSOCK | PCATCH, "poll", timo); 993 splx(s); 994 if (error == 0) 995 goto retry; 996 997 done: 998 atomic_clearbits_int(&p->p_flag, P_SELECT); 999 /* 1000 * NOTE: poll(2) is not restarted after a signal and EWOULDBLOCK is 1001 * ignored (since the whole point is to see what would block). 1002 */ 1003 switch (error) { 1004 case ERESTART: 1005 error = pollout(pl, fds, nfds); 1006 if (error == 0) 1007 error = EINTR; 1008 break; 1009 case EWOULDBLOCK: 1010 case 0: 1011 error = pollout(pl, fds, nfds); 1012 break; 1013 } 1014 bad: 1015 if (pl != pfds) 1016 free(pl, M_TEMP, sz); 1017 return (error); 1018 } 1019 1020 /* 1021 * utrace system call 1022 */ 1023 /* ARGSUSED */ 1024 int 1025 sys_utrace(struct proc *curp, void *v, register_t *retval) 1026 { 1027 #ifdef KTRACE 1028 struct sys_utrace_args /* { 1029 syscallarg(const char *) label; 1030 syscallarg(const void *) addr; 1031 syscallarg(size_t) len; 1032 } */ *uap = v; 1033 return (ktruser(curp, SCARG(uap, label), SCARG(uap, addr), 1034 SCARG(uap, len))); 1035 #else 1036 return (0); 1037 #endif 1038 } 1039