1 /* $OpenBSD: kern_descrip.c,v 1.135 2016/09/15 02:00:16 dlg Exp $ */ 2 /* $NetBSD: kern_descrip.c,v 1.42 1996/03/30 22:24:38 christos Exp $ */ 3 4 /* 5 * Copyright (c) 1982, 1986, 1989, 1991, 1993 6 * The Regents of the University of California. All rights reserved. 7 * (c) UNIX System Laboratories, Inc. 8 * All or some portions of this file are derived from material licensed 9 * to the University of California by American Telephone and Telegraph 10 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 11 * the permission of UNIX System Laboratories, Inc. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 3. Neither the name of the University nor the names of its contributors 22 * may be used to endorse or promote products derived from this software 23 * without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 * 37 * @(#)kern_descrip.c 8.6 (Berkeley) 4/19/94 38 */ 39 40 #include <sys/param.h> 41 #include <sys/systm.h> 42 #include <sys/filedesc.h> 43 #include <sys/kernel.h> 44 #include <sys/vnode.h> 45 #include <sys/proc.h> 46 #include <sys/file.h> 47 #include <sys/socket.h> 48 #include <sys/socketvar.h> 49 #include <sys/stat.h> 50 #include <sys/ioctl.h> 51 #include <sys/fcntl.h> 52 #include <sys/lock.h> 53 #include <sys/malloc.h> 54 #include <sys/syslog.h> 55 #include <sys/ucred.h> 56 #include <sys/unistd.h> 57 #include <sys/resourcevar.h> 58 #include <sys/mount.h> 59 #include <sys/syscallargs.h> 60 #include <sys/event.h> 61 #include <sys/pool.h> 62 #include <sys/ktrace.h> 63 #include <sys/pledge.h> 64 65 #include <sys/pipe.h> 66 67 /* 68 * Descriptor management. 69 */ 70 struct filelist filehead; /* head of list of open files */ 71 int numfiles; /* actual number of open files */ 72 73 static __inline void fd_used(struct filedesc *, int); 74 static __inline void fd_unused(struct filedesc *, int); 75 static __inline int find_next_zero(u_int *, int, u_int); 76 int finishdup(struct proc *, struct file *, int, int, register_t *, int); 77 int find_last_set(struct filedesc *, int); 78 int dodup3(struct proc *, int, int, int, register_t *); 79 80 struct pool file_pool; 81 struct pool fdesc_pool; 82 83 void 84 filedesc_init(void) 85 { 86 pool_init(&file_pool, sizeof(struct file), 0, IPL_NONE, 87 PR_WAITOK, "filepl", NULL); 88 pool_init(&fdesc_pool, sizeof(struct filedesc0), 0, IPL_NONE, 89 PR_WAITOK, "fdescpl", NULL); 90 LIST_INIT(&filehead); 91 } 92 93 static __inline int 94 find_next_zero (u_int *bitmap, int want, u_int bits) 95 { 96 int i, off, maxoff; 97 u_int sub; 98 99 if (want > bits) 100 return -1; 101 102 off = want >> NDENTRYSHIFT; 103 i = want & NDENTRYMASK; 104 if (i) { 105 sub = bitmap[off] | ((u_int)~0 >> (NDENTRIES - i)); 106 if (sub != ~0) 107 goto found; 108 off++; 109 } 110 111 maxoff = NDLOSLOTS(bits); 112 while (off < maxoff) { 113 if ((sub = bitmap[off]) != ~0) 114 goto found; 115 off++; 116 } 117 118 return -1; 119 120 found: 121 return (off << NDENTRYSHIFT) + ffs(~sub) - 1; 122 } 123 124 int 125 find_last_set(struct filedesc *fd, int last) 126 { 127 int off, i; 128 struct file **ofiles = fd->fd_ofiles; 129 u_int *bitmap = fd->fd_lomap; 130 131 off = (last - 1) >> NDENTRYSHIFT; 132 133 while (off >= 0 && !bitmap[off]) 134 off--; 135 if (off < 0) 136 return 0; 137 138 i = ((off + 1) << NDENTRYSHIFT) - 1; 139 if (i >= last) 140 i = last - 1; 141 142 while (i > 0 && ofiles[i] == NULL) 143 i--; 144 return i; 145 } 146 147 static __inline void 148 fd_used(struct filedesc *fdp, int fd) 149 { 150 u_int off = fd >> NDENTRYSHIFT; 151 152 fdp->fd_lomap[off] |= 1 << (fd & NDENTRYMASK); 153 if (fdp->fd_lomap[off] == ~0) 154 fdp->fd_himap[off >> NDENTRYSHIFT] |= 1 << (off & NDENTRYMASK); 155 156 if (fd > fdp->fd_lastfile) 157 fdp->fd_lastfile = fd; 158 fdp->fd_openfd++; 159 } 160 161 static __inline void 162 fd_unused(struct filedesc *fdp, int fd) 163 { 164 u_int off = fd >> NDENTRYSHIFT; 165 166 if (fd < fdp->fd_freefile) 167 fdp->fd_freefile = fd; 168 169 if (fdp->fd_lomap[off] == ~0) 170 fdp->fd_himap[off >> NDENTRYSHIFT] &= ~(1 << (off & NDENTRYMASK)); 171 fdp->fd_lomap[off] &= ~(1 << (fd & NDENTRYMASK)); 172 173 #ifdef DIAGNOSTIC 174 if (fd > fdp->fd_lastfile) 175 panic("fd_unused: fd_lastfile inconsistent"); 176 #endif 177 if (fd == fdp->fd_lastfile) 178 fdp->fd_lastfile = find_last_set(fdp, fd); 179 fdp->fd_openfd--; 180 } 181 182 struct file * 183 fd_getfile(struct filedesc *fdp, int fd) 184 { 185 struct file *fp; 186 187 if ((u_int)fd >= fdp->fd_nfiles || (fp = fdp->fd_ofiles[fd]) == NULL) 188 return (NULL); 189 190 if (!FILE_IS_USABLE(fp)) 191 return (NULL); 192 193 return (fp); 194 } 195 196 struct file * 197 fd_getfile_mode(struct filedesc *fdp, int fd, int mode) 198 { 199 struct file *fp; 200 201 KASSERT(mode != 0); 202 203 fp = fd_getfile(fdp, fd); 204 205 if (fp == NULL || (fp->f_flag & mode) == 0) 206 return (NULL); 207 208 return (fp); 209 } 210 211 /* 212 * System calls on descriptors. 213 */ 214 215 /* 216 * Duplicate a file descriptor. 217 */ 218 int 219 sys_dup(struct proc *p, void *v, register_t *retval) 220 { 221 struct sys_dup_args /* { 222 syscallarg(int) fd; 223 } */ *uap = v; 224 struct filedesc *fdp = p->p_fd; 225 int old = SCARG(uap, fd); 226 struct file *fp; 227 int new; 228 int error; 229 230 restart: 231 if ((fp = fd_getfile(fdp, old)) == NULL) 232 return (EBADF); 233 FREF(fp); 234 fdplock(fdp); 235 if ((error = fdalloc(p, 0, &new)) != 0) { 236 FRELE(fp, p); 237 if (error == ENOSPC) { 238 fdexpand(p); 239 fdpunlock(fdp); 240 goto restart; 241 } 242 goto out; 243 } 244 error = finishdup(p, fp, old, new, retval, 0); 245 246 out: 247 fdpunlock(fdp); 248 return (error); 249 } 250 251 /* 252 * Duplicate a file descriptor to a particular value. 253 */ 254 int 255 sys_dup2(struct proc *p, void *v, register_t *retval) 256 { 257 struct sys_dup2_args /* { 258 syscallarg(int) from; 259 syscallarg(int) to; 260 } */ *uap = v; 261 262 return (dodup3(p, SCARG(uap, from), SCARG(uap, to), 0, retval)); 263 } 264 265 int 266 sys_dup3(struct proc *p, void *v, register_t *retval) 267 { 268 struct sys_dup3_args /* { 269 syscallarg(int) from; 270 syscallarg(int) to; 271 syscallarg(int) flags; 272 } */ *uap = v; 273 274 if (SCARG(uap, from) == SCARG(uap, to)) 275 return (EINVAL); 276 if (SCARG(uap, flags) & ~O_CLOEXEC) 277 return (EINVAL); 278 return (dodup3(p, SCARG(uap, from), SCARG(uap, to), 279 SCARG(uap, flags), retval)); 280 } 281 282 int 283 dodup3(struct proc *p, int old, int new, int flags, register_t *retval) 284 { 285 struct filedesc *fdp = p->p_fd; 286 struct file *fp; 287 int i, error; 288 289 restart: 290 if ((fp = fd_getfile(fdp, old)) == NULL) 291 return (EBADF); 292 if ((u_int)new >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur || 293 (u_int)new >= maxfiles) 294 return (EBADF); 295 if (old == new) { 296 /* 297 * NOTE! This doesn't clear the close-on-exec flag. This might 298 * or might not be the intended behavior from the start, but 299 * this is what everyone else does. 300 */ 301 *retval = new; 302 return (0); 303 } 304 FREF(fp); 305 fdplock(fdp); 306 if (new >= fdp->fd_nfiles) { 307 if ((error = fdalloc(p, new, &i)) != 0) { 308 FRELE(fp, p); 309 if (error == ENOSPC) { 310 fdexpand(p); 311 fdpunlock(fdp); 312 goto restart; 313 } 314 goto out; 315 } 316 if (new != i) 317 panic("dup2: fdalloc"); 318 fd_unused(fdp, new); 319 } 320 /* finishdup() does FRELE */ 321 error = finishdup(p, fp, old, new, retval, 1); 322 if (!error && flags & O_CLOEXEC) 323 fdp->fd_ofileflags[new] |= UF_EXCLOSE; 324 325 out: 326 fdpunlock(fdp); 327 return (error); 328 } 329 330 /* 331 * The file control system call. 332 */ 333 int 334 sys_fcntl(struct proc *p, void *v, register_t *retval) 335 { 336 struct sys_fcntl_args /* { 337 syscallarg(int) fd; 338 syscallarg(int) cmd; 339 syscallarg(void *) arg; 340 } */ *uap = v; 341 int fd = SCARG(uap, fd); 342 struct filedesc *fdp = p->p_fd; 343 struct file *fp; 344 struct vnode *vp; 345 int i, tmp, newmin, flg = F_POSIX; 346 struct flock fl; 347 int error = 0; 348 349 error = pledge_fcntl(p, SCARG(uap, cmd)); 350 if (error) 351 return (error); 352 353 restart: 354 if ((fp = fd_getfile(fdp, fd)) == NULL) 355 return (EBADF); 356 FREF(fp); 357 switch (SCARG(uap, cmd)) { 358 359 case F_DUPFD: 360 case F_DUPFD_CLOEXEC: 361 newmin = (long)SCARG(uap, arg); 362 if ((u_int)newmin >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur || 363 (u_int)newmin >= maxfiles) { 364 error = EINVAL; 365 break; 366 } 367 fdplock(fdp); 368 if ((error = fdalloc(p, newmin, &i)) != 0) { 369 FRELE(fp, p); 370 if (error == ENOSPC) { 371 fdexpand(p); 372 fdpunlock(fdp); 373 goto restart; 374 } 375 } else { 376 /* finishdup will FRELE for us. */ 377 error = finishdup(p, fp, fd, i, retval, 0); 378 379 if (!error && SCARG(uap, cmd) == F_DUPFD_CLOEXEC) 380 fdp->fd_ofileflags[i] |= UF_EXCLOSE; 381 } 382 383 fdpunlock(fdp); 384 return (error); 385 386 case F_GETFD: 387 *retval = fdp->fd_ofileflags[fd] & UF_EXCLOSE ? 1 : 0; 388 break; 389 390 case F_SETFD: 391 fdplock(fdp); 392 if ((long)SCARG(uap, arg) & 1) 393 fdp->fd_ofileflags[fd] |= UF_EXCLOSE; 394 else 395 fdp->fd_ofileflags[fd] &= ~UF_EXCLOSE; 396 fdpunlock(fdp); 397 break; 398 399 case F_GETFL: 400 *retval = OFLAGS(fp->f_flag); 401 break; 402 403 case F_ISATTY: 404 vp = fp->f_data; 405 if (fp->f_type == DTYPE_VNODE && (vp->v_flag & VISTTY)) 406 *retval = 1; 407 else { 408 *retval = 0; 409 error = ENOTTY; 410 } 411 break; 412 413 case F_SETFL: 414 fp->f_flag &= ~FCNTLFLAGS; 415 fp->f_flag |= FFLAGS((long)SCARG(uap, arg)) & FCNTLFLAGS; 416 tmp = fp->f_flag & FNONBLOCK; 417 error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&tmp, p); 418 if (error) 419 break; 420 tmp = fp->f_flag & FASYNC; 421 error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, (caddr_t)&tmp, p); 422 if (!error) 423 break; 424 fp->f_flag &= ~FNONBLOCK; 425 tmp = 0; 426 (void) (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&tmp, p); 427 break; 428 429 case F_GETOWN: 430 if (fp->f_type == DTYPE_SOCKET) { 431 *retval = ((struct socket *)fp->f_data)->so_pgid; 432 break; 433 } 434 if (fp->f_type == DTYPE_PIPE) { 435 *retval = ((struct pipe *)fp->f_data)->pipe_pgid; 436 break; 437 } 438 error = (*fp->f_ops->fo_ioctl) 439 (fp, TIOCGPGRP, (caddr_t)&tmp, p); 440 *retval = -tmp; 441 break; 442 443 case F_SETOWN: 444 if (fp->f_type == DTYPE_SOCKET) { 445 struct socket *so = fp->f_data; 446 447 so->so_pgid = (long)SCARG(uap, arg); 448 so->so_siguid = p->p_ucred->cr_ruid; 449 so->so_sigeuid = p->p_ucred->cr_uid; 450 break; 451 } 452 if (fp->f_type == DTYPE_PIPE) { 453 struct pipe *mpipe = fp->f_data; 454 455 mpipe->pipe_pgid = (long)SCARG(uap, arg); 456 break; 457 } 458 if ((long)SCARG(uap, arg) <= 0) { 459 SCARG(uap, arg) = (void *)(-(long)SCARG(uap, arg)); 460 } else { 461 struct process *pr1 = prfind((long)SCARG(uap, arg)); 462 if (pr1 == 0) { 463 error = ESRCH; 464 break; 465 } 466 SCARG(uap, arg) = (void *)(long)pr1->ps_pgrp->pg_id; 467 } 468 error = ((*fp->f_ops->fo_ioctl) 469 (fp, TIOCSPGRP, (caddr_t)&SCARG(uap, arg), p)); 470 break; 471 472 case F_SETLKW: 473 flg |= F_WAIT; 474 /* FALLTHROUGH */ 475 476 case F_SETLK: 477 error = pledge_flock(p); 478 if (error != 0) 479 break; 480 481 if (fp->f_type != DTYPE_VNODE) { 482 error = EBADF; 483 break; 484 } 485 vp = fp->f_data; 486 /* Copy in the lock structure */ 487 error = copyin((caddr_t)SCARG(uap, arg), (caddr_t)&fl, 488 sizeof (fl)); 489 if (error) 490 break; 491 if (fl.l_whence == SEEK_CUR) { 492 if (fl.l_start == 0 && fl.l_len < 0) { 493 /* lockf(3) compliance hack */ 494 fl.l_len = -fl.l_len; 495 fl.l_start = fp->f_offset - fl.l_len; 496 } else 497 fl.l_start += fp->f_offset; 498 } 499 switch (fl.l_type) { 500 501 case F_RDLCK: 502 if ((fp->f_flag & FREAD) == 0) { 503 error = EBADF; 504 goto out; 505 } 506 atomic_setbits_int(&fdp->fd_flags, FD_ADVLOCK); 507 error = VOP_ADVLOCK(vp, fdp, F_SETLK, &fl, flg); 508 break; 509 510 case F_WRLCK: 511 if ((fp->f_flag & FWRITE) == 0) { 512 error = EBADF; 513 goto out; 514 } 515 atomic_setbits_int(&fdp->fd_flags, FD_ADVLOCK); 516 error = VOP_ADVLOCK(vp, fdp, F_SETLK, &fl, flg); 517 break; 518 519 case F_UNLCK: 520 error = VOP_ADVLOCK(vp, fdp, F_UNLCK, &fl, F_POSIX); 521 goto out; 522 523 default: 524 error = EINVAL; 525 goto out; 526 } 527 528 if (fp != fd_getfile(fdp, fd)) { 529 /* 530 * We have lost the race with close() or dup2(); 531 * unlock, pretend that we've won the race and that 532 * lock had been removed by close() 533 */ 534 fl.l_whence = SEEK_SET; 535 fl.l_start = 0; 536 fl.l_len = 0; 537 VOP_ADVLOCK(vp, fdp, F_UNLCK, &fl, F_POSIX); 538 fl.l_type = F_UNLCK; 539 } 540 goto out; 541 542 543 case F_GETLK: 544 error = pledge_flock(p); 545 if (error != 0) 546 break; 547 548 if (fp->f_type != DTYPE_VNODE) { 549 error = EBADF; 550 break; 551 } 552 vp = fp->f_data; 553 /* Copy in the lock structure */ 554 error = copyin((caddr_t)SCARG(uap, arg), (caddr_t)&fl, 555 sizeof (fl)); 556 if (error) 557 break; 558 if (fl.l_whence == SEEK_CUR) { 559 if (fl.l_start == 0 && fl.l_len < 0) { 560 /* lockf(3) compliance hack */ 561 fl.l_len = -fl.l_len; 562 fl.l_start = fp->f_offset - fl.l_len; 563 } else 564 fl.l_start += fp->f_offset; 565 } 566 if (fl.l_type != F_RDLCK && 567 fl.l_type != F_WRLCK && 568 fl.l_type != F_UNLCK && 569 fl.l_type != 0) { 570 error = EINVAL; 571 break; 572 } 573 error = VOP_ADVLOCK(vp, fdp, F_GETLK, &fl, F_POSIX); 574 if (error) 575 break; 576 error = (copyout((caddr_t)&fl, (caddr_t)SCARG(uap, arg), 577 sizeof (fl))); 578 break; 579 580 default: 581 error = EINVAL; 582 break; 583 } 584 out: 585 FRELE(fp, p); 586 return (error); 587 } 588 589 /* 590 * Common code for dup, dup2, and fcntl(F_DUPFD). 591 */ 592 int 593 finishdup(struct proc *p, struct file *fp, int old, int new, 594 register_t *retval, int dup2) 595 { 596 struct file *oldfp; 597 struct filedesc *fdp = p->p_fd; 598 599 fdpassertlocked(fdp); 600 if (fp->f_count == LONG_MAX-2) { 601 FRELE(fp, p); 602 return (EDEADLK); 603 } 604 605 /* 606 * Don't fd_getfile here. We want to closef LARVAL files and 607 * closef can deal with that. 608 */ 609 oldfp = fdp->fd_ofiles[new]; 610 if (oldfp != NULL) 611 FREF(oldfp); 612 613 fdp->fd_ofiles[new] = fp; 614 fdp->fd_ofileflags[new] = fdp->fd_ofileflags[old] & ~UF_EXCLOSE; 615 fp->f_count++; 616 FRELE(fp, p); 617 if (dup2 && oldfp == NULL) 618 fd_used(fdp, new); 619 *retval = new; 620 621 if (oldfp != NULL) { 622 if (new < fdp->fd_knlistsize) 623 knote_fdclose(p, new); 624 closef(oldfp, p); 625 } 626 627 return (0); 628 } 629 630 void 631 fdremove(struct filedesc *fdp, int fd) 632 { 633 fdpassertlocked(fdp); 634 fdp->fd_ofiles[fd] = NULL; 635 fd_unused(fdp, fd); 636 } 637 638 int 639 fdrelease(struct proc *p, int fd) 640 { 641 struct filedesc *fdp = p->p_fd; 642 struct file **fpp, *fp; 643 644 fdpassertlocked(fdp); 645 646 /* 647 * Don't fd_getfile here. We want to closef LARVAL files and closef 648 * can deal with that. 649 */ 650 fpp = &fdp->fd_ofiles[fd]; 651 fp = *fpp; 652 if (fp == NULL) 653 return (EBADF); 654 FREF(fp); 655 *fpp = NULL; 656 fd_unused(fdp, fd); 657 if (fd < fdp->fd_knlistsize) 658 knote_fdclose(p, fd); 659 return (closef(fp, p)); 660 } 661 662 /* 663 * Close a file descriptor. 664 */ 665 int 666 sys_close(struct proc *p, void *v, register_t *retval) 667 { 668 struct sys_close_args /* { 669 syscallarg(int) fd; 670 } */ *uap = v; 671 int fd = SCARG(uap, fd), error; 672 struct filedesc *fdp = p->p_fd; 673 674 if (fd_getfile(fdp, fd) == NULL) 675 return (EBADF); 676 fdplock(fdp); 677 error = fdrelease(p, fd); 678 fdpunlock(fdp); 679 680 return (error); 681 } 682 683 /* 684 * Return status information about a file descriptor. 685 */ 686 int 687 sys_fstat(struct proc *p, void *v, register_t *retval) 688 { 689 struct sys_fstat_args /* { 690 syscallarg(int) fd; 691 syscallarg(struct stat *) sb; 692 } */ *uap = v; 693 int fd = SCARG(uap, fd); 694 struct filedesc *fdp = p->p_fd; 695 struct file *fp; 696 struct stat ub; 697 int error; 698 699 if ((fp = fd_getfile(fdp, fd)) == NULL) 700 return (EBADF); 701 FREF(fp); 702 error = (*fp->f_ops->fo_stat)(fp, &ub, p); 703 FRELE(fp, p); 704 if (error == 0) { 705 /* 706 * Don't let non-root see generation numbers 707 * (for NFS security) 708 */ 709 if (suser(p, 0)) 710 ub.st_gen = 0; 711 error = copyout((caddr_t)&ub, (caddr_t)SCARG(uap, sb), 712 sizeof (ub)); 713 } 714 #ifdef KTRACE 715 if (error == 0 && KTRPOINT(p, KTR_STRUCT)) 716 ktrstat(p, &ub); 717 #endif 718 return (error); 719 } 720 721 /* 722 * Return pathconf information about a file descriptor. 723 */ 724 int 725 sys_fpathconf(struct proc *p, void *v, register_t *retval) 726 { 727 struct sys_fpathconf_args /* { 728 syscallarg(int) fd; 729 syscallarg(int) name; 730 } */ *uap = v; 731 int fd = SCARG(uap, fd); 732 struct filedesc *fdp = p->p_fd; 733 struct file *fp; 734 struct vnode *vp; 735 int error; 736 737 if ((fp = fd_getfile(fdp, fd)) == NULL) 738 return (EBADF); 739 FREF(fp); 740 switch (fp->f_type) { 741 case DTYPE_PIPE: 742 case DTYPE_SOCKET: 743 if (SCARG(uap, name) != _PC_PIPE_BUF) { 744 error = EINVAL; 745 break; 746 } 747 *retval = PIPE_BUF; 748 error = 0; 749 break; 750 751 case DTYPE_VNODE: 752 vp = fp->f_data; 753 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); 754 error = VOP_PATHCONF(vp, SCARG(uap, name), retval); 755 VOP_UNLOCK(vp, p); 756 break; 757 758 default: 759 error = EOPNOTSUPP; 760 break; 761 } 762 FRELE(fp, p); 763 return (error); 764 } 765 766 /* 767 * Allocate a file descriptor for the process. 768 */ 769 int 770 fdalloc(struct proc *p, int want, int *result) 771 { 772 struct filedesc *fdp = p->p_fd; 773 int lim, last, i; 774 u_int new, off; 775 776 /* 777 * Search for a free descriptor starting at the higher 778 * of want or fd_freefile. If that fails, consider 779 * expanding the ofile array. 780 */ 781 restart: 782 lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfiles); 783 last = min(fdp->fd_nfiles, lim); 784 if ((i = want) < fdp->fd_freefile) 785 i = fdp->fd_freefile; 786 off = i >> NDENTRYSHIFT; 787 new = find_next_zero(fdp->fd_himap, off, 788 (last + NDENTRIES - 1) >> NDENTRYSHIFT); 789 if (new != -1) { 790 i = find_next_zero(&fdp->fd_lomap[new], 791 new > off ? 0 : i & NDENTRYMASK, 792 NDENTRIES); 793 if (i == -1) { 794 /* 795 * Free file descriptor in this block was 796 * below want, try again with higher want. 797 */ 798 want = (new + 1) << NDENTRYSHIFT; 799 goto restart; 800 } 801 i += (new << NDENTRYSHIFT); 802 if (i < last) { 803 fd_used(fdp, i); 804 if (want <= fdp->fd_freefile) 805 fdp->fd_freefile = i; 806 *result = i; 807 fdp->fd_ofileflags[i] = 0; 808 return (0); 809 } 810 } 811 if (fdp->fd_nfiles >= lim) 812 return (EMFILE); 813 814 return (ENOSPC); 815 } 816 817 void 818 fdexpand(struct proc *p) 819 { 820 struct filedesc *fdp = p->p_fd; 821 int nfiles; 822 size_t copylen; 823 struct file **newofile; 824 char *newofileflags; 825 u_int *newhimap, *newlomap; 826 827 fdpassertlocked(fdp); 828 829 /* 830 * No space in current array. 831 */ 832 if (fdp->fd_nfiles < NDEXTENT) 833 nfiles = NDEXTENT; 834 else 835 nfiles = 2 * fdp->fd_nfiles; 836 837 newofile = mallocarray(nfiles, OFILESIZE, M_FILEDESC, M_WAITOK); 838 newofileflags = (char *) &newofile[nfiles]; 839 840 /* 841 * Copy the existing ofile and ofileflags arrays 842 * and zero the new portion of each array. 843 */ 844 copylen = sizeof(struct file *) * fdp->fd_nfiles; 845 memcpy(newofile, fdp->fd_ofiles, copylen); 846 memset((char *)newofile + copylen, 0, 847 nfiles * sizeof(struct file *) - copylen); 848 copylen = sizeof(char) * fdp->fd_nfiles; 849 memcpy(newofileflags, fdp->fd_ofileflags, copylen); 850 memset(newofileflags + copylen, 0, nfiles * sizeof(char) - copylen); 851 852 if (fdp->fd_nfiles > NDFILE) 853 free(fdp->fd_ofiles, M_FILEDESC, fdp->fd_nfiles * OFILESIZE); 854 855 if (NDHISLOTS(nfiles) > NDHISLOTS(fdp->fd_nfiles)) { 856 newhimap = mallocarray(NDHISLOTS(nfiles), sizeof(u_int), 857 M_FILEDESC, M_WAITOK); 858 newlomap = mallocarray(NDLOSLOTS(nfiles), sizeof(u_int), 859 M_FILEDESC, M_WAITOK); 860 861 copylen = NDHISLOTS(fdp->fd_nfiles) * sizeof(u_int); 862 memcpy(newhimap, fdp->fd_himap, copylen); 863 memset((char *)newhimap + copylen, 0, 864 NDHISLOTS(nfiles) * sizeof(u_int) - copylen); 865 866 copylen = NDLOSLOTS(fdp->fd_nfiles) * sizeof(u_int); 867 memcpy(newlomap, fdp->fd_lomap, copylen); 868 memset((char *)newlomap + copylen, 0, 869 NDLOSLOTS(nfiles) * sizeof(u_int) - copylen); 870 871 if (NDHISLOTS(fdp->fd_nfiles) > NDHISLOTS(NDFILE)) { 872 free(fdp->fd_himap, M_FILEDESC, 873 NDHISLOTS(fdp->fd_nfiles) * sizeof(u_int)); 874 free(fdp->fd_lomap, M_FILEDESC, 875 NDLOSLOTS(fdp->fd_nfiles) * sizeof(u_int)); 876 } 877 fdp->fd_himap = newhimap; 878 fdp->fd_lomap = newlomap; 879 } 880 fdp->fd_ofiles = newofile; 881 fdp->fd_ofileflags = newofileflags; 882 fdp->fd_nfiles = nfiles; 883 } 884 885 /* 886 * Create a new open file structure and allocate 887 * a file descriptor for the process that refers to it. 888 */ 889 int 890 falloc(struct proc *p, struct file **resultfp, int *resultfd) 891 { 892 struct file *fp, *fq; 893 int error, i; 894 895 fdpassertlocked(p->p_fd); 896 restart: 897 if ((error = fdalloc(p, 0, &i)) != 0) { 898 if (error == ENOSPC) { 899 fdexpand(p); 900 goto restart; 901 } 902 return (error); 903 } 904 if (numfiles >= maxfiles) { 905 fd_unused(p->p_fd, i); 906 tablefull("file"); 907 return (ENFILE); 908 } 909 /* 910 * Allocate a new file descriptor. 911 * If the process has file descriptor zero open, add to the list 912 * of open files at that point, otherwise put it at the front of 913 * the list of open files. 914 */ 915 numfiles++; 916 fp = pool_get(&file_pool, PR_WAITOK|PR_ZERO); 917 fp->f_iflags = FIF_LARVAL; 918 if ((fq = p->p_fd->fd_ofiles[0]) != NULL) { 919 LIST_INSERT_AFTER(fq, fp, f_list); 920 } else { 921 LIST_INSERT_HEAD(&filehead, fp, f_list); 922 } 923 p->p_fd->fd_ofiles[i] = fp; 924 fp->f_count = 1; 925 fp->f_cred = p->p_ucred; 926 crhold(fp->f_cred); 927 if (resultfp) 928 *resultfp = fp; 929 if (resultfd) 930 *resultfd = i; 931 FREF(fp); 932 return (0); 933 } 934 935 /* 936 * Build a new filedesc structure. 937 */ 938 struct filedesc * 939 fdinit(void) 940 { 941 struct filedesc0 *newfdp; 942 943 newfdp = pool_get(&fdesc_pool, PR_WAITOK|PR_ZERO); 944 rw_init(&newfdp->fd_fd.fd_lock, "fdlock"); 945 946 /* Create the file descriptor table. */ 947 newfdp->fd_fd.fd_refcnt = 1; 948 newfdp->fd_fd.fd_cmask = S_IWGRP|S_IWOTH; 949 newfdp->fd_fd.fd_ofiles = newfdp->fd_dfiles; 950 newfdp->fd_fd.fd_ofileflags = newfdp->fd_dfileflags; 951 newfdp->fd_fd.fd_nfiles = NDFILE; 952 newfdp->fd_fd.fd_himap = newfdp->fd_dhimap; 953 newfdp->fd_fd.fd_lomap = newfdp->fd_dlomap; 954 newfdp->fd_fd.fd_knlistsize = -1; 955 956 newfdp->fd_fd.fd_freefile = 0; 957 newfdp->fd_fd.fd_lastfile = 0; 958 959 return (&newfdp->fd_fd); 960 } 961 962 /* 963 * Share a filedesc structure. 964 */ 965 struct filedesc * 966 fdshare(struct process *pr) 967 { 968 pr->ps_fd->fd_refcnt++; 969 return (pr->ps_fd); 970 } 971 972 /* 973 * Copy a filedesc structure. 974 */ 975 struct filedesc * 976 fdcopy(struct process *pr) 977 { 978 struct filedesc *newfdp, *fdp = pr->ps_fd; 979 struct file **fpp; 980 int i; 981 982 fdplock(fdp); 983 newfdp = pool_get(&fdesc_pool, PR_WAITOK); 984 memcpy(newfdp, fdp, sizeof(struct filedesc)); 985 if (newfdp->fd_cdir) 986 vref(newfdp->fd_cdir); 987 if (newfdp->fd_rdir) 988 vref(newfdp->fd_rdir); 989 newfdp->fd_refcnt = 1; 990 rw_init(&newfdp->fd_lock, "fdlock"); 991 992 /* 993 * If the number of open files fits in the internal arrays 994 * of the open file structure, use them, otherwise allocate 995 * additional memory for the number of descriptors currently 996 * in use. 997 */ 998 if (newfdp->fd_lastfile < NDFILE) { 999 newfdp->fd_ofiles = ((struct filedesc0 *) newfdp)->fd_dfiles; 1000 newfdp->fd_ofileflags = 1001 ((struct filedesc0 *) newfdp)->fd_dfileflags; 1002 i = NDFILE; 1003 } else { 1004 /* 1005 * Compute the smallest multiple of NDEXTENT needed 1006 * for the file descriptors currently in use, 1007 * allowing the table to shrink. 1008 */ 1009 i = newfdp->fd_nfiles; 1010 while (i >= 2 * NDEXTENT && i > newfdp->fd_lastfile * 2) 1011 i /= 2; 1012 newfdp->fd_ofiles = mallocarray(i, OFILESIZE, M_FILEDESC, M_WAITOK); 1013 newfdp->fd_ofileflags = (char *) &newfdp->fd_ofiles[i]; 1014 } 1015 if (NDHISLOTS(i) <= NDHISLOTS(NDFILE)) { 1016 newfdp->fd_himap = 1017 ((struct filedesc0 *) newfdp)->fd_dhimap; 1018 newfdp->fd_lomap = 1019 ((struct filedesc0 *) newfdp)->fd_dlomap; 1020 } else { 1021 newfdp->fd_himap = mallocarray(NDHISLOTS(i), sizeof(u_int), 1022 M_FILEDESC, M_WAITOK); 1023 newfdp->fd_lomap = mallocarray(NDLOSLOTS(i), sizeof(u_int), 1024 M_FILEDESC, M_WAITOK); 1025 } 1026 newfdp->fd_nfiles = i; 1027 memcpy(newfdp->fd_ofiles, fdp->fd_ofiles, i * sizeof(struct file *)); 1028 memcpy(newfdp->fd_ofileflags, fdp->fd_ofileflags, i * sizeof(char)); 1029 memcpy(newfdp->fd_himap, fdp->fd_himap, NDHISLOTS(i) * sizeof(u_int)); 1030 memcpy(newfdp->fd_lomap, fdp->fd_lomap, NDLOSLOTS(i) * sizeof(u_int)); 1031 fdpunlock(fdp); 1032 1033 fdplock(newfdp); 1034 fpp = newfdp->fd_ofiles; 1035 for (i = 0; i <= newfdp->fd_lastfile; i++, fpp++) 1036 if (*fpp != NULL) { 1037 /* 1038 * XXX Gruesome hack. If count gets too high, fail 1039 * to copy an fd, since fdcopy()'s callers do not 1040 * permit it to indicate failure yet. 1041 * Meanwhile, kqueue files have to be 1042 * tied to the process that opened them to enforce 1043 * their internal consistency, so close them here. 1044 */ 1045 if ((*fpp)->f_count == LONG_MAX-2 || 1046 (*fpp)->f_type == DTYPE_KQUEUE) 1047 fdremove(newfdp, i); 1048 else 1049 (*fpp)->f_count++; 1050 } 1051 1052 /* finish cleaning up kq bits */ 1053 if (newfdp->fd_knlistsize != -1) { 1054 newfdp->fd_knlist = NULL; 1055 newfdp->fd_knlistsize = -1; 1056 newfdp->fd_knhash = NULL; 1057 newfdp->fd_knhashmask = 0; 1058 } 1059 1060 fdpunlock(newfdp); 1061 return (newfdp); 1062 } 1063 1064 /* 1065 * Release a filedesc structure. 1066 */ 1067 void 1068 fdfree(struct proc *p) 1069 { 1070 struct filedesc *fdp = p->p_fd; 1071 struct file **fpp, *fp; 1072 int i; 1073 1074 if (--fdp->fd_refcnt > 0) 1075 return; 1076 fpp = fdp->fd_ofiles; 1077 for (i = fdp->fd_lastfile; i >= 0; i--, fpp++) { 1078 fp = *fpp; 1079 if (fp != NULL) { 1080 FREF(fp); 1081 *fpp = NULL; 1082 (void) closef(fp, p); 1083 } 1084 } 1085 p->p_fd = NULL; 1086 if (fdp->fd_nfiles > NDFILE) 1087 free(fdp->fd_ofiles, M_FILEDESC, fdp->fd_nfiles * OFILESIZE); 1088 if (NDHISLOTS(fdp->fd_nfiles) > NDHISLOTS(NDFILE)) { 1089 free(fdp->fd_himap, M_FILEDESC, 1090 NDHISLOTS(fdp->fd_nfiles) * sizeof(u_int)); 1091 free(fdp->fd_lomap, M_FILEDESC, 1092 NDLOSLOTS(fdp->fd_nfiles) * sizeof(u_int)); 1093 } 1094 if (fdp->fd_cdir) 1095 vrele(fdp->fd_cdir); 1096 if (fdp->fd_rdir) 1097 vrele(fdp->fd_rdir); 1098 free(fdp->fd_knlist, M_TEMP, fdp->fd_knlistsize * sizeof(struct klist)); 1099 free(fdp->fd_knhash, M_TEMP, 0); 1100 pool_put(&fdesc_pool, fdp); 1101 } 1102 1103 /* 1104 * Internal form of close. 1105 * Decrement reference count on file structure. 1106 * Note: p may be NULL when closing a file 1107 * that was being passed in a message. 1108 * 1109 * The fp must have its usecount bumped and will be FRELEd here. 1110 */ 1111 int 1112 closef(struct file *fp, struct proc *p) 1113 { 1114 struct filedesc *fdp; 1115 1116 if (fp == NULL) 1117 return (0); 1118 1119 #ifdef DIAGNOSTIC 1120 if (fp->f_count < 2) 1121 panic("closef: count (%ld) < 2", fp->f_count); 1122 #endif 1123 fp->f_count--; 1124 1125 /* 1126 * POSIX record locking dictates that any close releases ALL 1127 * locks owned by this process. This is handled by setting 1128 * a flag in the unlock to free ONLY locks obeying POSIX 1129 * semantics, and not to free BSD-style file locks. 1130 * If the descriptor was in a message, POSIX-style locks 1131 * aren't passed with the descriptor. 1132 */ 1133 1134 if (p && ((fdp = p->p_fd) != NULL) && 1135 (fdp->fd_flags & FD_ADVLOCK) && 1136 fp->f_type == DTYPE_VNODE) { 1137 struct vnode *vp = fp->f_data; 1138 struct flock lf; 1139 1140 lf.l_whence = SEEK_SET; 1141 lf.l_start = 0; 1142 lf.l_len = 0; 1143 lf.l_type = F_UNLCK; 1144 (void) VOP_ADVLOCK(vp, fdp, F_UNLCK, &lf, F_POSIX); 1145 } 1146 1147 return (FRELE(fp, p)); 1148 } 1149 1150 int 1151 fdrop(struct file *fp, struct proc *p) 1152 { 1153 int error; 1154 1155 #ifdef DIAGNOSTIC 1156 if (fp->f_count != 0) 1157 panic("fdrop: count (%ld) != 0", fp->f_count); 1158 #endif 1159 1160 if (fp->f_ops) 1161 error = (*fp->f_ops->fo_close)(fp, p); 1162 else 1163 error = 0; 1164 1165 /* Free fp */ 1166 LIST_REMOVE(fp, f_list); 1167 crfree(fp->f_cred); 1168 numfiles--; 1169 pool_put(&file_pool, fp); 1170 1171 return (error); 1172 } 1173 1174 /* 1175 * Apply an advisory lock on a file descriptor. 1176 * 1177 * Just attempt to get a record lock of the requested type on 1178 * the entire file (l_whence = SEEK_SET, l_start = 0, l_len = 0). 1179 */ 1180 int 1181 sys_flock(struct proc *p, void *v, register_t *retval) 1182 { 1183 struct sys_flock_args /* { 1184 syscallarg(int) fd; 1185 syscallarg(int) how; 1186 } */ *uap = v; 1187 int fd = SCARG(uap, fd); 1188 int how = SCARG(uap, how); 1189 struct filedesc *fdp = p->p_fd; 1190 struct file *fp; 1191 struct vnode *vp; 1192 struct flock lf; 1193 int error; 1194 1195 if ((fp = fd_getfile(fdp, fd)) == NULL) 1196 return (EBADF); 1197 if (fp->f_type != DTYPE_VNODE) 1198 return (EOPNOTSUPP); 1199 FREF(fp); 1200 vp = fp->f_data; 1201 lf.l_whence = SEEK_SET; 1202 lf.l_start = 0; 1203 lf.l_len = 0; 1204 if (how & LOCK_UN) { 1205 lf.l_type = F_UNLCK; 1206 fp->f_iflags &= ~FIF_HASLOCK; 1207 error = VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK); 1208 goto out; 1209 } 1210 if (how & LOCK_EX) 1211 lf.l_type = F_WRLCK; 1212 else if (how & LOCK_SH) 1213 lf.l_type = F_RDLCK; 1214 else { 1215 error = EINVAL; 1216 goto out; 1217 } 1218 fp->f_iflags |= FIF_HASLOCK; 1219 if (how & LOCK_NB) 1220 error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, F_FLOCK); 1221 else 1222 error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, F_FLOCK|F_WAIT); 1223 out: 1224 FRELE(fp, p); 1225 return (error); 1226 } 1227 1228 /* 1229 * File Descriptor pseudo-device driver (/dev/fd/). 1230 * 1231 * Opening minor device N dup()s the file (if any) connected to file 1232 * descriptor N belonging to the calling process. Note that this driver 1233 * consists of only the ``open()'' routine, because all subsequent 1234 * references to this file will be direct to the other driver. 1235 */ 1236 int 1237 filedescopen(dev_t dev, int mode, int type, struct proc *p) 1238 { 1239 1240 /* 1241 * XXX Kludge: set curproc->p_dupfd to contain the value of the 1242 * the file descriptor being sought for duplication. The error 1243 * return ensures that the vnode for this device will be released 1244 * by vn_open. Open will detect this special error and take the 1245 * actions in dupfdopen below. Other callers of vn_open or VOP_OPEN 1246 * will simply report the error. 1247 */ 1248 p->p_dupfd = minor(dev); 1249 return (ENODEV); 1250 } 1251 1252 /* 1253 * Duplicate the specified descriptor to a free descriptor. 1254 */ 1255 int 1256 dupfdopen(struct filedesc *fdp, int indx, int dfd, int mode) 1257 { 1258 struct file *wfp; 1259 1260 fdpassertlocked(fdp); 1261 1262 /* 1263 * Assume that the filename was user-specified; applications do 1264 * not tend to open /dev/fd/# when they can just call dup() 1265 */ 1266 if ((curproc->p_p->ps_flags & (PS_SUGIDEXEC | PS_SUGID))) { 1267 if (curproc->p_descfd == 255) 1268 return (EPERM); 1269 if (curproc->p_descfd != curproc->p_dupfd) 1270 return (EPERM); 1271 } 1272 1273 /* 1274 * If the to-be-dup'd fd number is greater than the allowed number 1275 * of file descriptors, or the fd to be dup'd has already been 1276 * closed, reject. Note, there is no need to check for new == old 1277 * because fd_getfile will return NULL if the file at indx is 1278 * newly created by falloc (FIF_LARVAL). 1279 */ 1280 if ((wfp = fd_getfile(fdp, dfd)) == NULL) 1281 return (EBADF); 1282 1283 /* 1284 * Check that the mode the file is being opened for is a 1285 * subset of the mode of the existing descriptor. 1286 */ 1287 if (((mode & (FREAD|FWRITE)) | wfp->f_flag) != wfp->f_flag) 1288 return (EACCES); 1289 if (wfp->f_count == LONG_MAX-2) 1290 return (EDEADLK); 1291 1292 fdp->fd_ofiles[indx] = wfp; 1293 fdp->fd_ofileflags[indx] = (fdp->fd_ofileflags[indx] & UF_EXCLOSE) | 1294 (fdp->fd_ofileflags[dfd] & ~UF_EXCLOSE); 1295 wfp->f_count++; 1296 fd_used(fdp, indx); 1297 return (0); 1298 } 1299 1300 /* 1301 * Close any files on exec? 1302 */ 1303 void 1304 fdcloseexec(struct proc *p) 1305 { 1306 struct filedesc *fdp = p->p_fd; 1307 int fd; 1308 1309 fdplock(fdp); 1310 for (fd = 0; fd <= fdp->fd_lastfile; fd++) 1311 if (fdp->fd_ofileflags[fd] & UF_EXCLOSE) 1312 (void) fdrelease(p, fd); 1313 fdpunlock(fdp); 1314 } 1315 1316 int 1317 sys_closefrom(struct proc *p, void *v, register_t *retval) 1318 { 1319 struct sys_closefrom_args *uap = v; 1320 struct filedesc *fdp = p->p_fd; 1321 u_int startfd, i; 1322 1323 startfd = SCARG(uap, fd); 1324 fdplock(fdp); 1325 1326 if (startfd > fdp->fd_lastfile) { 1327 fdpunlock(fdp); 1328 return (EBADF); 1329 } 1330 1331 for (i = startfd; i <= fdp->fd_lastfile; i++) 1332 fdrelease(p, i); 1333 1334 fdpunlock(fdp); 1335 return (0); 1336 } 1337 1338 int 1339 sys_getdtablecount(struct proc *p, void *v, register_t *retval) 1340 { 1341 *retval = p->p_fd->fd_openfd; 1342 return (0); 1343 } 1344