1 /* $OpenBSD: kern_descrip.c,v 1.112 2014/07/13 15:29:04 tedu Exp $ */ 2 /* $NetBSD: kern_descrip.c,v 1.42 1996/03/30 22:24:38 christos Exp $ */ 3 4 /* 5 * Copyright (c) 1982, 1986, 1989, 1991, 1993 6 * The Regents of the University of California. All rights reserved. 7 * (c) UNIX System Laboratories, Inc. 8 * All or some portions of this file are derived from material licensed 9 * to the University of California by American Telephone and Telegraph 10 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 11 * the permission of UNIX System Laboratories, Inc. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 3. Neither the name of the University nor the names of its contributors 22 * may be used to endorse or promote products derived from this software 23 * without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 * 37 * @(#)kern_descrip.c 8.6 (Berkeley) 4/19/94 38 */ 39 40 #include <sys/param.h> 41 #include <sys/systm.h> 42 #include <sys/filedesc.h> 43 #include <sys/kernel.h> 44 #include <sys/vnode.h> 45 #include <sys/proc.h> 46 #include <sys/file.h> 47 #include <sys/socket.h> 48 #include <sys/socketvar.h> 49 #include <sys/stat.h> 50 #include <sys/ioctl.h> 51 #include <sys/fcntl.h> 52 #include <sys/malloc.h> 53 #include <sys/syslog.h> 54 #include <sys/ucred.h> 55 #include <sys/unistd.h> 56 #include <sys/resourcevar.h> 57 #include <sys/conf.h> 58 #include <sys/mount.h> 59 #include <sys/syscallargs.h> 60 #include <sys/event.h> 61 #include <sys/pool.h> 62 #include <sys/ktrace.h> 63 64 #include <sys/pipe.h> 65 66 /* 67 * Descriptor management. 68 */ 69 struct filelist filehead; /* head of list of open files */ 70 int nfiles; /* actual number of open files */ 71 72 static __inline void fd_used(struct filedesc *, int); 73 static __inline void fd_unused(struct filedesc *, int); 74 static __inline int find_next_zero(u_int *, int, u_int); 75 int finishdup(struct proc *, struct file *, int, int, register_t *, int); 76 int find_last_set(struct filedesc *, int); 77 78 struct pool file_pool; 79 struct pool fdesc_pool; 80 81 void 82 filedesc_init(void) 83 { 84 pool_init(&file_pool, sizeof(struct file), 0, 0, 0, "filepl", 85 &pool_allocator_nointr); 86 pool_init(&fdesc_pool, sizeof(struct filedesc0), 0, 0, 0, "fdescpl", 87 &pool_allocator_nointr); 88 LIST_INIT(&filehead); 89 } 90 91 static __inline int 92 find_next_zero (u_int *bitmap, int want, u_int bits) 93 { 94 int i, off, maxoff; 95 u_int sub; 96 97 if (want > bits) 98 return -1; 99 100 off = want >> NDENTRYSHIFT; 101 i = want & NDENTRYMASK; 102 if (i) { 103 sub = bitmap[off] | ((u_int)~0 >> (NDENTRIES - i)); 104 if (sub != ~0) 105 goto found; 106 off++; 107 } 108 109 maxoff = NDLOSLOTS(bits); 110 while (off < maxoff) { 111 if ((sub = bitmap[off]) != ~0) 112 goto found; 113 off++; 114 } 115 116 return -1; 117 118 found: 119 return (off << NDENTRYSHIFT) + ffs(~sub) - 1; 120 } 121 122 int 123 find_last_set(struct filedesc *fd, int last) 124 { 125 int off, i; 126 struct file **ofiles = fd->fd_ofiles; 127 u_int *bitmap = fd->fd_lomap; 128 129 off = (last - 1) >> NDENTRYSHIFT; 130 131 while (off >= 0 && !bitmap[off]) 132 off--; 133 if (off < 0) 134 return 0; 135 136 i = ((off + 1) << NDENTRYSHIFT) - 1; 137 if (i >= last) 138 i = last - 1; 139 140 while (i > 0 && ofiles[i] == NULL) 141 i--; 142 return i; 143 } 144 145 static __inline void 146 fd_used(struct filedesc *fdp, int fd) 147 { 148 u_int off = fd >> NDENTRYSHIFT; 149 150 fdp->fd_lomap[off] |= 1 << (fd & NDENTRYMASK); 151 if (fdp->fd_lomap[off] == ~0) 152 fdp->fd_himap[off >> NDENTRYSHIFT] |= 1 << (off & NDENTRYMASK); 153 154 if (fd > fdp->fd_lastfile) 155 fdp->fd_lastfile = fd; 156 fdp->fd_openfd++; 157 } 158 159 static __inline void 160 fd_unused(struct filedesc *fdp, int fd) 161 { 162 u_int off = fd >> NDENTRYSHIFT; 163 164 if (fd < fdp->fd_freefile) 165 fdp->fd_freefile = fd; 166 167 if (fdp->fd_lomap[off] == ~0) 168 fdp->fd_himap[off >> NDENTRYSHIFT] &= ~(1 << (off & NDENTRYMASK)); 169 fdp->fd_lomap[off] &= ~(1 << (fd & NDENTRYMASK)); 170 171 #ifdef DIAGNOSTIC 172 if (fd > fdp->fd_lastfile) 173 panic("fd_unused: fd_lastfile inconsistent"); 174 #endif 175 if (fd == fdp->fd_lastfile) 176 fdp->fd_lastfile = find_last_set(fdp, fd); 177 fdp->fd_openfd--; 178 } 179 180 struct file * 181 fd_getfile(struct filedesc *fdp, int fd) 182 { 183 struct file *fp; 184 185 if ((u_int)fd >= fdp->fd_nfiles || (fp = fdp->fd_ofiles[fd]) == NULL) 186 return (NULL); 187 188 if (!FILE_IS_USABLE(fp)) 189 return (NULL); 190 191 return (fp); 192 } 193 194 /* 195 * System calls on descriptors. 196 */ 197 198 /* 199 * Duplicate a file descriptor. 200 */ 201 /* ARGSUSED */ 202 int 203 sys_dup(struct proc *p, void *v, register_t *retval) 204 { 205 struct sys_dup_args /* { 206 syscallarg(int) fd; 207 } */ *uap = v; 208 struct filedesc *fdp = p->p_fd; 209 int old = SCARG(uap, fd); 210 struct file *fp; 211 int new; 212 int error; 213 214 restart: 215 if ((fp = fd_getfile(fdp, old)) == NULL) 216 return (EBADF); 217 FREF(fp); 218 fdplock(fdp); 219 if ((error = fdalloc(p, 0, &new)) != 0) { 220 FRELE(fp, p); 221 if (error == ENOSPC) { 222 fdexpand(p); 223 fdpunlock(fdp); 224 goto restart; 225 } 226 goto out; 227 } 228 error = finishdup(p, fp, old, new, retval, 0); 229 230 out: 231 fdpunlock(fdp); 232 return (error); 233 } 234 235 /* 236 * Duplicate a file descriptor to a particular value. 237 */ 238 /* ARGSUSED */ 239 int 240 sys_dup2(struct proc *p, void *v, register_t *retval) 241 { 242 struct sys_dup2_args /* { 243 syscallarg(int) from; 244 syscallarg(int) to; 245 } */ *uap = v; 246 int old = SCARG(uap, from), new = SCARG(uap, to); 247 struct filedesc *fdp = p->p_fd; 248 struct file *fp; 249 int i, error; 250 251 restart: 252 if ((fp = fd_getfile(fdp, old)) == NULL) 253 return (EBADF); 254 if ((u_int)new >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur || 255 (u_int)new >= maxfiles) 256 return (EBADF); 257 if (old == new) { 258 /* 259 * NOTE! This doesn't clear the close-on-exec flag. This might 260 * or might not be the intended behavior from the start, but 261 * this is what everyone else does. 262 */ 263 *retval = new; 264 return (0); 265 } 266 FREF(fp); 267 fdplock(fdp); 268 if (new >= fdp->fd_nfiles) { 269 if ((error = fdalloc(p, new, &i)) != 0) { 270 FRELE(fp, p); 271 if (error == ENOSPC) { 272 fdexpand(p); 273 fdpunlock(fdp); 274 goto restart; 275 } 276 goto out; 277 } 278 if (new != i) 279 panic("dup2: fdalloc"); 280 fd_unused(fdp, new); 281 } 282 /* finishdup() does FRELE */ 283 error = finishdup(p, fp, old, new, retval, 1); 284 285 out: 286 fdpunlock(fdp); 287 return (error); 288 } 289 290 /* 291 * The file control system call. 292 */ 293 /* ARGSUSED */ 294 int 295 sys_fcntl(struct proc *p, void *v, register_t *retval) 296 { 297 struct sys_fcntl_args /* { 298 syscallarg(int) fd; 299 syscallarg(int) cmd; 300 syscallarg(void *) arg; 301 } */ *uap = v; 302 int fd = SCARG(uap, fd); 303 struct filedesc *fdp = p->p_fd; 304 struct file *fp; 305 struct vnode *vp; 306 int i, tmp, newmin, flg = F_POSIX; 307 struct flock fl; 308 int error = 0; 309 310 restart: 311 if ((fp = fd_getfile(fdp, fd)) == NULL) 312 return (EBADF); 313 FREF(fp); 314 switch (SCARG(uap, cmd)) { 315 316 case F_DUPFD: 317 case F_DUPFD_CLOEXEC: 318 newmin = (long)SCARG(uap, arg); 319 if ((u_int)newmin >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur || 320 (u_int)newmin >= maxfiles) { 321 error = EINVAL; 322 break; 323 } 324 fdplock(fdp); 325 if ((error = fdalloc(p, newmin, &i)) != 0) { 326 FRELE(fp, p); 327 if (error == ENOSPC) { 328 fdexpand(p); 329 fdpunlock(fdp); 330 goto restart; 331 } 332 } else { 333 /* finishdup will FRELE for us. */ 334 error = finishdup(p, fp, fd, i, retval, 0); 335 336 if (!error && SCARG(uap, cmd) == F_DUPFD_CLOEXEC) 337 fdp->fd_ofileflags[i] |= UF_EXCLOSE; 338 } 339 340 fdpunlock(fdp); 341 return (error); 342 343 case F_GETFD: 344 *retval = fdp->fd_ofileflags[fd] & UF_EXCLOSE ? 1 : 0; 345 break; 346 347 case F_SETFD: 348 fdplock(fdp); 349 if ((long)SCARG(uap, arg) & 1) 350 fdp->fd_ofileflags[fd] |= UF_EXCLOSE; 351 else 352 fdp->fd_ofileflags[fd] &= ~UF_EXCLOSE; 353 fdpunlock(fdp); 354 break; 355 356 case F_GETFL: 357 *retval = OFLAGS(fp->f_flag); 358 break; 359 360 case F_SETFL: 361 fp->f_flag &= ~FCNTLFLAGS; 362 fp->f_flag |= FFLAGS((long)SCARG(uap, arg)) & FCNTLFLAGS; 363 tmp = fp->f_flag & FNONBLOCK; 364 error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&tmp, p); 365 if (error) 366 break; 367 tmp = fp->f_flag & FASYNC; 368 error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, (caddr_t)&tmp, p); 369 if (!error) 370 break; 371 fp->f_flag &= ~FNONBLOCK; 372 tmp = 0; 373 (void) (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&tmp, p); 374 break; 375 376 case F_GETOWN: 377 if (fp->f_type == DTYPE_SOCKET) { 378 *retval = ((struct socket *)fp->f_data)->so_pgid; 379 break; 380 } 381 error = (*fp->f_ops->fo_ioctl) 382 (fp, TIOCGPGRP, (caddr_t)&tmp, p); 383 *retval = -tmp; 384 break; 385 386 case F_SETOWN: 387 if (fp->f_type == DTYPE_SOCKET) { 388 struct socket *so = (struct socket *)fp->f_data; 389 390 so->so_pgid = (long)SCARG(uap, arg); 391 so->so_siguid = p->p_ucred->cr_ruid; 392 so->so_sigeuid = p->p_ucred->cr_uid; 393 break; 394 } 395 if ((long)SCARG(uap, arg) <= 0) { 396 SCARG(uap, arg) = (void *)(-(long)SCARG(uap, arg)); 397 } else { 398 struct process *pr1 = prfind((long)SCARG(uap, arg)); 399 if (pr1 == 0) { 400 error = ESRCH; 401 break; 402 } 403 SCARG(uap, arg) = (void *)(long)pr1->ps_pgrp->pg_id; 404 } 405 error = ((*fp->f_ops->fo_ioctl) 406 (fp, TIOCSPGRP, (caddr_t)&SCARG(uap, arg), p)); 407 break; 408 409 case F_SETLKW: 410 flg |= F_WAIT; 411 /* FALLTHROUGH */ 412 413 case F_SETLK: 414 if (fp->f_type != DTYPE_VNODE) { 415 error = EBADF; 416 break; 417 } 418 vp = (struct vnode *)fp->f_data; 419 /* Copy in the lock structure */ 420 error = copyin((caddr_t)SCARG(uap, arg), (caddr_t)&fl, 421 sizeof (fl)); 422 if (error) 423 break; 424 if (fl.l_whence == SEEK_CUR) { 425 if (fl.l_start == 0 && fl.l_len < 0) { 426 /* lockf(3) compliance hack */ 427 fl.l_len = -fl.l_len; 428 fl.l_start = fp->f_offset - fl.l_len; 429 } else 430 fl.l_start += fp->f_offset; 431 } 432 switch (fl.l_type) { 433 434 case F_RDLCK: 435 if ((fp->f_flag & FREAD) == 0) { 436 error = EBADF; 437 goto out; 438 } 439 atomic_setbits_int(&fdp->fd_flags, FD_ADVLOCK); 440 error = VOP_ADVLOCK(vp, fdp, F_SETLK, &fl, flg); 441 break; 442 443 case F_WRLCK: 444 if ((fp->f_flag & FWRITE) == 0) { 445 error = EBADF; 446 goto out; 447 } 448 atomic_setbits_int(&fdp->fd_flags, FD_ADVLOCK); 449 error = VOP_ADVLOCK(vp, fdp, F_SETLK, &fl, flg); 450 break; 451 452 case F_UNLCK: 453 error = VOP_ADVLOCK(vp, fdp, F_UNLCK, &fl, F_POSIX); 454 goto out; 455 456 default: 457 error = EINVAL; 458 goto out; 459 } 460 461 if (fp != fd_getfile(fdp, fd)) { 462 /* 463 * We have lost the race with close() or dup2(); 464 * unlock, pretend that we've won the race and that 465 * lock had been removed by close() 466 */ 467 fl.l_whence = SEEK_SET; 468 fl.l_start = 0; 469 fl.l_len = 0; 470 VOP_ADVLOCK(vp, fdp, F_UNLCK, &fl, F_POSIX); 471 fl.l_type = F_UNLCK; 472 } 473 goto out; 474 475 476 case F_GETLK: 477 if (fp->f_type != DTYPE_VNODE) { 478 error = EBADF; 479 break; 480 } 481 vp = (struct vnode *)fp->f_data; 482 /* Copy in the lock structure */ 483 error = copyin((caddr_t)SCARG(uap, arg), (caddr_t)&fl, 484 sizeof (fl)); 485 if (error) 486 break; 487 if (fl.l_whence == SEEK_CUR) { 488 if (fl.l_start == 0 && fl.l_len < 0) { 489 /* lockf(3) compliance hack */ 490 fl.l_len = -fl.l_len; 491 fl.l_start = fp->f_offset - fl.l_len; 492 } else 493 fl.l_start += fp->f_offset; 494 } 495 if (fl.l_type != F_RDLCK && 496 fl.l_type != F_WRLCK && 497 fl.l_type != F_UNLCK && 498 fl.l_type != 0) { 499 error = EINVAL; 500 break; 501 } 502 error = VOP_ADVLOCK(vp, fdp, F_GETLK, &fl, F_POSIX); 503 if (error) 504 break; 505 error = (copyout((caddr_t)&fl, (caddr_t)SCARG(uap, arg), 506 sizeof (fl))); 507 break; 508 509 default: 510 error = EINVAL; 511 break; 512 } 513 out: 514 FRELE(fp, p); 515 return (error); 516 } 517 518 /* 519 * Common code for dup, dup2, and fcntl(F_DUPFD). 520 */ 521 int 522 finishdup(struct proc *p, struct file *fp, int old, int new, 523 register_t *retval, int dup2) 524 { 525 struct file *oldfp; 526 struct filedesc *fdp = p->p_fd; 527 528 fdpassertlocked(fdp); 529 if (fp->f_count == LONG_MAX-2) { 530 FRELE(fp, p); 531 return (EDEADLK); 532 } 533 534 /* 535 * Don't fd_getfile here. We want to closef LARVAL files and 536 * closef can deal with that. 537 */ 538 oldfp = fdp->fd_ofiles[new]; 539 if (oldfp != NULL) 540 FREF(oldfp); 541 542 fdp->fd_ofiles[new] = fp; 543 fdp->fd_ofileflags[new] = fdp->fd_ofileflags[old] & ~UF_EXCLOSE; 544 fp->f_count++; 545 FRELE(fp, p); 546 if (dup2 && oldfp == NULL) 547 fd_used(fdp, new); 548 *retval = new; 549 550 if (oldfp != NULL) { 551 if (new < fdp->fd_knlistsize) 552 knote_fdclose(p, new); 553 closef(oldfp, p); 554 } 555 556 return (0); 557 } 558 559 void 560 fdremove(struct filedesc *fdp, int fd) 561 { 562 fdpassertlocked(fdp); 563 fdp->fd_ofiles[fd] = NULL; 564 fd_unused(fdp, fd); 565 } 566 567 int 568 fdrelease(struct proc *p, int fd) 569 { 570 struct filedesc *fdp = p->p_fd; 571 struct file **fpp, *fp; 572 573 fdpassertlocked(fdp); 574 575 /* 576 * Don't fd_getfile here. We want to closef LARVAL files and closef 577 * can deal with that. 578 */ 579 fpp = &fdp->fd_ofiles[fd]; 580 fp = *fpp; 581 if (fp == NULL) 582 return (EBADF); 583 FREF(fp); 584 *fpp = NULL; 585 fd_unused(fdp, fd); 586 if (fd < fdp->fd_knlistsize) 587 knote_fdclose(p, fd); 588 return (closef(fp, p)); 589 } 590 591 /* 592 * Close a file descriptor. 593 */ 594 /* ARGSUSED */ 595 int 596 sys_close(struct proc *p, void *v, register_t *retval) 597 { 598 struct sys_close_args /* { 599 syscallarg(int) fd; 600 } */ *uap = v; 601 int fd = SCARG(uap, fd), error; 602 struct filedesc *fdp = p->p_fd; 603 604 if (fd_getfile(fdp, fd) == NULL) 605 return (EBADF); 606 fdplock(fdp); 607 error = fdrelease(p, fd); 608 fdpunlock(fdp); 609 610 return (error); 611 } 612 613 /* 614 * Return status information about a file descriptor. 615 */ 616 int 617 sys_fstat(struct proc *p, void *v, register_t *retval) 618 { 619 struct sys_fstat_args /* { 620 syscallarg(int) fd; 621 syscallarg(struct stat *) sb; 622 } */ *uap = v; 623 int fd = SCARG(uap, fd); 624 struct filedesc *fdp = p->p_fd; 625 struct file *fp; 626 struct stat ub; 627 int error; 628 629 if ((fp = fd_getfile(fdp, fd)) == NULL) 630 return (EBADF); 631 FREF(fp); 632 error = (*fp->f_ops->fo_stat)(fp, &ub, p); 633 FRELE(fp, p); 634 if (error == 0) { 635 /* 636 * Don't let non-root see generation numbers 637 * (for NFS security) 638 */ 639 if (suser(p, 0)) 640 ub.st_gen = 0; 641 error = copyout((caddr_t)&ub, (caddr_t)SCARG(uap, sb), 642 sizeof (ub)); 643 } 644 #ifdef KTRACE 645 if (error == 0 && KTRPOINT(p, KTR_STRUCT)) 646 ktrstat(p, &ub); 647 #endif 648 return (error); 649 } 650 651 /* 652 * Return pathconf information about a file descriptor. 653 */ 654 /* ARGSUSED */ 655 int 656 sys_fpathconf(struct proc *p, void *v, register_t *retval) 657 { 658 struct sys_fpathconf_args /* { 659 syscallarg(int) fd; 660 syscallarg(int) name; 661 } */ *uap = v; 662 int fd = SCARG(uap, fd); 663 struct filedesc *fdp = p->p_fd; 664 struct file *fp; 665 struct vnode *vp; 666 int error; 667 668 if ((fp = fd_getfile(fdp, fd)) == NULL) 669 return (EBADF); 670 FREF(fp); 671 switch (fp->f_type) { 672 case DTYPE_PIPE: 673 case DTYPE_SOCKET: 674 if (SCARG(uap, name) != _PC_PIPE_BUF) { 675 error = EINVAL; 676 break; 677 } 678 *retval = PIPE_BUF; 679 error = 0; 680 break; 681 682 case DTYPE_VNODE: 683 vp = (struct vnode *)fp->f_data; 684 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); 685 error = VOP_PATHCONF(vp, SCARG(uap, name), retval); 686 VOP_UNLOCK(vp, 0, p); 687 break; 688 689 default: 690 error = EOPNOTSUPP; 691 break; 692 } 693 FRELE(fp, p); 694 return (error); 695 } 696 697 /* 698 * Allocate a file descriptor for the process. 699 */ 700 int 701 fdalloc(struct proc *p, int want, int *result) 702 { 703 struct filedesc *fdp = p->p_fd; 704 int lim, last, i; 705 u_int new, off; 706 707 /* 708 * Search for a free descriptor starting at the higher 709 * of want or fd_freefile. If that fails, consider 710 * expanding the ofile array. 711 */ 712 restart: 713 lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfiles); 714 last = min(fdp->fd_nfiles, lim); 715 if ((i = want) < fdp->fd_freefile) 716 i = fdp->fd_freefile; 717 off = i >> NDENTRYSHIFT; 718 new = find_next_zero(fdp->fd_himap, off, 719 (last + NDENTRIES - 1) >> NDENTRYSHIFT); 720 if (new != -1) { 721 i = find_next_zero(&fdp->fd_lomap[new], 722 new > off ? 0 : i & NDENTRYMASK, 723 NDENTRIES); 724 if (i == -1) { 725 /* 726 * Free file descriptor in this block was 727 * below want, try again with higher want. 728 */ 729 want = (new + 1) << NDENTRYSHIFT; 730 goto restart; 731 } 732 i += (new << NDENTRYSHIFT); 733 if (i < last) { 734 fd_used(fdp, i); 735 if (want <= fdp->fd_freefile) 736 fdp->fd_freefile = i; 737 *result = i; 738 fdp->fd_ofileflags[i] = 0; 739 return (0); 740 } 741 } 742 if (fdp->fd_nfiles >= lim) 743 return (EMFILE); 744 745 return (ENOSPC); 746 } 747 748 void 749 fdexpand(struct proc *p) 750 { 751 struct filedesc *fdp = p->p_fd; 752 int nfiles; 753 size_t copylen; 754 struct file **newofile; 755 char *newofileflags; 756 u_int *newhimap, *newlomap; 757 758 fdpassertlocked(fdp); 759 760 /* 761 * No space in current array. 762 */ 763 if (fdp->fd_nfiles < NDEXTENT) 764 nfiles = NDEXTENT; 765 else 766 nfiles = 2 * fdp->fd_nfiles; 767 768 newofile = mallocarray(nfiles, OFILESIZE, M_FILEDESC, M_WAITOK); 769 newofileflags = (char *) &newofile[nfiles]; 770 771 /* 772 * Copy the existing ofile and ofileflags arrays 773 * and zero the new portion of each array. 774 */ 775 copylen = sizeof(struct file *) * fdp->fd_nfiles; 776 memcpy(newofile, fdp->fd_ofiles, copylen); 777 memset((char *)newofile + copylen, 0, 778 nfiles * sizeof(struct file *) - copylen); 779 copylen = sizeof(char) * fdp->fd_nfiles; 780 memcpy(newofileflags, fdp->fd_ofileflags, copylen); 781 memset(newofileflags + copylen, 0, nfiles * sizeof(char) - copylen); 782 783 if (fdp->fd_nfiles > NDFILE) 784 free(fdp->fd_ofiles, M_FILEDESC, 0); 785 786 if (NDHISLOTS(nfiles) > NDHISLOTS(fdp->fd_nfiles)) { 787 newhimap = mallocarray(NDHISLOTS(nfiles), sizeof(u_int), 788 M_FILEDESC, M_WAITOK); 789 newlomap = mallocarray(NDLOSLOTS(nfiles), sizeof(u_int), 790 M_FILEDESC, M_WAITOK); 791 792 copylen = NDHISLOTS(fdp->fd_nfiles) * sizeof(u_int); 793 memcpy(newhimap, fdp->fd_himap, copylen); 794 memset((char *)newhimap + copylen, 0, 795 NDHISLOTS(nfiles) * sizeof(u_int) - copylen); 796 797 copylen = NDLOSLOTS(fdp->fd_nfiles) * sizeof(u_int); 798 memcpy(newlomap, fdp->fd_lomap, copylen); 799 memset((char *)newlomap + copylen, 0, 800 NDLOSLOTS(nfiles) * sizeof(u_int) - copylen); 801 802 if (NDHISLOTS(fdp->fd_nfiles) > NDHISLOTS(NDFILE)) { 803 free(fdp->fd_himap, M_FILEDESC, 0); 804 free(fdp->fd_lomap, M_FILEDESC, 0); 805 } 806 fdp->fd_himap = newhimap; 807 fdp->fd_lomap = newlomap; 808 } 809 fdp->fd_ofiles = newofile; 810 fdp->fd_ofileflags = newofileflags; 811 fdp->fd_nfiles = nfiles; 812 } 813 814 /* 815 * Create a new open file structure and allocate 816 * a file descriptor for the process that refers to it. 817 */ 818 int 819 falloc(struct proc *p, struct file **resultfp, int *resultfd) 820 { 821 struct file *fp, *fq; 822 int error, i; 823 824 fdpassertlocked(p->p_fd); 825 restart: 826 if ((error = fdalloc(p, 0, &i)) != 0) { 827 if (error == ENOSPC) { 828 fdexpand(p); 829 goto restart; 830 } 831 return (error); 832 } 833 if (nfiles >= maxfiles) { 834 fd_unused(p->p_fd, i); 835 tablefull("file"); 836 return (ENFILE); 837 } 838 /* 839 * Allocate a new file descriptor. 840 * If the process has file descriptor zero open, add to the list 841 * of open files at that point, otherwise put it at the front of 842 * the list of open files. 843 */ 844 nfiles++; 845 fp = pool_get(&file_pool, PR_WAITOK|PR_ZERO); 846 fp->f_iflags = FIF_LARVAL; 847 if ((fq = p->p_fd->fd_ofiles[0]) != NULL) { 848 LIST_INSERT_AFTER(fq, fp, f_list); 849 } else { 850 LIST_INSERT_HEAD(&filehead, fp, f_list); 851 } 852 p->p_fd->fd_ofiles[i] = fp; 853 fp->f_count = 1; 854 fp->f_cred = p->p_ucred; 855 crhold(fp->f_cred); 856 if (resultfp) 857 *resultfp = fp; 858 if (resultfd) 859 *resultfd = i; 860 FREF(fp); 861 return (0); 862 } 863 864 /* 865 * Build a new filedesc structure. 866 */ 867 struct filedesc * 868 fdinit(void) 869 { 870 struct filedesc0 *newfdp; 871 extern int cmask; 872 873 newfdp = pool_get(&fdesc_pool, PR_WAITOK|PR_ZERO); 874 rw_init(&newfdp->fd_fd.fd_lock, "fdlock"); 875 876 /* Create the file descriptor table. */ 877 newfdp->fd_fd.fd_refcnt = 1; 878 newfdp->fd_fd.fd_cmask = cmask; 879 newfdp->fd_fd.fd_ofiles = newfdp->fd_dfiles; 880 newfdp->fd_fd.fd_ofileflags = newfdp->fd_dfileflags; 881 newfdp->fd_fd.fd_nfiles = NDFILE; 882 newfdp->fd_fd.fd_himap = newfdp->fd_dhimap; 883 newfdp->fd_fd.fd_lomap = newfdp->fd_dlomap; 884 newfdp->fd_fd.fd_knlistsize = -1; 885 886 newfdp->fd_fd.fd_freefile = 0; 887 newfdp->fd_fd.fd_lastfile = 0; 888 889 return (&newfdp->fd_fd); 890 } 891 892 /* 893 * Share a filedesc structure. 894 */ 895 struct filedesc * 896 fdshare(struct process *pr) 897 { 898 pr->ps_fd->fd_refcnt++; 899 return (pr->ps_fd); 900 } 901 902 /* 903 * Copy a filedesc structure. 904 */ 905 struct filedesc * 906 fdcopy(struct process *pr) 907 { 908 struct filedesc *newfdp, *fdp = pr->ps_fd; 909 struct file **fpp; 910 int i; 911 912 fdplock(fdp); 913 newfdp = pool_get(&fdesc_pool, PR_WAITOK); 914 memcpy(newfdp, fdp, sizeof(struct filedesc)); 915 if (newfdp->fd_cdir) 916 vref(newfdp->fd_cdir); 917 if (newfdp->fd_rdir) 918 vref(newfdp->fd_rdir); 919 newfdp->fd_refcnt = 1; 920 rw_init(&newfdp->fd_lock, "fdlock"); 921 922 /* 923 * If the number of open files fits in the internal arrays 924 * of the open file structure, use them, otherwise allocate 925 * additional memory for the number of descriptors currently 926 * in use. 927 */ 928 if (newfdp->fd_lastfile < NDFILE) { 929 newfdp->fd_ofiles = ((struct filedesc0 *) newfdp)->fd_dfiles; 930 newfdp->fd_ofileflags = 931 ((struct filedesc0 *) newfdp)->fd_dfileflags; 932 i = NDFILE; 933 } else { 934 /* 935 * Compute the smallest multiple of NDEXTENT needed 936 * for the file descriptors currently in use, 937 * allowing the table to shrink. 938 */ 939 i = newfdp->fd_nfiles; 940 while (i >= 2 * NDEXTENT && i > newfdp->fd_lastfile * 2) 941 i /= 2; 942 newfdp->fd_ofiles = mallocarray(i, OFILESIZE, M_FILEDESC, M_WAITOK); 943 newfdp->fd_ofileflags = (char *) &newfdp->fd_ofiles[i]; 944 } 945 if (NDHISLOTS(i) <= NDHISLOTS(NDFILE)) { 946 newfdp->fd_himap = 947 ((struct filedesc0 *) newfdp)->fd_dhimap; 948 newfdp->fd_lomap = 949 ((struct filedesc0 *) newfdp)->fd_dlomap; 950 } else { 951 newfdp->fd_himap = mallocarray(NDHISLOTS(i), sizeof(u_int), 952 M_FILEDESC, M_WAITOK); 953 newfdp->fd_lomap = mallocarray(NDLOSLOTS(i), sizeof(u_int), 954 M_FILEDESC, M_WAITOK); 955 } 956 newfdp->fd_nfiles = i; 957 memcpy(newfdp->fd_ofiles, fdp->fd_ofiles, i * sizeof(struct file *)); 958 memcpy(newfdp->fd_ofileflags, fdp->fd_ofileflags, i * sizeof(char)); 959 memcpy(newfdp->fd_himap, fdp->fd_himap, NDHISLOTS(i) * sizeof(u_int)); 960 memcpy(newfdp->fd_lomap, fdp->fd_lomap, NDLOSLOTS(i) * sizeof(u_int)); 961 fdpunlock(fdp); 962 963 fdplock(newfdp); 964 fpp = newfdp->fd_ofiles; 965 for (i = 0; i <= newfdp->fd_lastfile; i++, fpp++) 966 if (*fpp != NULL) { 967 /* 968 * XXX Gruesome hack. If count gets too high, fail 969 * to copy an fd, since fdcopy()'s callers do not 970 * permit it to indicate failure yet. 971 * Meanwhile, kqueue and systrace files have to be 972 * tied to the process that opened them to enforce 973 * their internal consistency, so close them here. 974 */ 975 if ((*fpp)->f_count == LONG_MAX-2 || 976 (*fpp)->f_type == DTYPE_KQUEUE || 977 (*fpp)->f_type == DTYPE_SYSTRACE) 978 fdremove(newfdp, i); 979 else 980 (*fpp)->f_count++; 981 } 982 983 /* finish cleaning up kq bits */ 984 if (newfdp->fd_knlistsize != -1) { 985 newfdp->fd_knlist = NULL; 986 newfdp->fd_knlistsize = -1; 987 newfdp->fd_knhash = NULL; 988 newfdp->fd_knhashmask = 0; 989 } 990 991 fdpunlock(newfdp); 992 return (newfdp); 993 } 994 995 /* 996 * Release a filedesc structure. 997 */ 998 void 999 fdfree(struct proc *p) 1000 { 1001 struct filedesc *fdp = p->p_fd; 1002 struct file **fpp, *fp; 1003 int i; 1004 1005 if (--fdp->fd_refcnt > 0) 1006 return; 1007 fpp = fdp->fd_ofiles; 1008 for (i = fdp->fd_lastfile; i >= 0; i--, fpp++) { 1009 fp = *fpp; 1010 if (fp != NULL) { 1011 FREF(fp); 1012 *fpp = NULL; 1013 (void) closef(fp, p); 1014 } 1015 } 1016 p->p_fd = NULL; 1017 if (fdp->fd_nfiles > NDFILE) 1018 free(fdp->fd_ofiles, M_FILEDESC, 0); 1019 if (NDHISLOTS(fdp->fd_nfiles) > NDHISLOTS(NDFILE)) { 1020 free(fdp->fd_himap, M_FILEDESC, 0); 1021 free(fdp->fd_lomap, M_FILEDESC, 0); 1022 } 1023 if (fdp->fd_cdir) 1024 vrele(fdp->fd_cdir); 1025 if (fdp->fd_rdir) 1026 vrele(fdp->fd_rdir); 1027 if (fdp->fd_knlist) 1028 free(fdp->fd_knlist, M_TEMP, 0); 1029 if (fdp->fd_knhash) 1030 free(fdp->fd_knhash, M_TEMP, 0); 1031 pool_put(&fdesc_pool, fdp); 1032 } 1033 1034 /* 1035 * Internal form of close. 1036 * Decrement reference count on file structure. 1037 * Note: p may be NULL when closing a file 1038 * that was being passed in a message. 1039 * 1040 * The fp must have its usecount bumped and will be FRELEd here. 1041 */ 1042 int 1043 closef(struct file *fp, struct proc *p) 1044 { 1045 struct filedesc *fdp; 1046 1047 if (fp == NULL) 1048 return (0); 1049 1050 #ifdef DIAGNOSTIC 1051 if (fp->f_count < 2) 1052 panic("closef: count (%ld) < 2", fp->f_count); 1053 #endif 1054 fp->f_count--; 1055 1056 /* 1057 * POSIX record locking dictates that any close releases ALL 1058 * locks owned by this process. This is handled by setting 1059 * a flag in the unlock to free ONLY locks obeying POSIX 1060 * semantics, and not to free BSD-style file locks. 1061 * If the descriptor was in a message, POSIX-style locks 1062 * aren't passed with the descriptor. 1063 */ 1064 1065 if (p && ((fdp = p->p_fd) != NULL) && 1066 (fdp->fd_flags & FD_ADVLOCK) && 1067 fp->f_type == DTYPE_VNODE) { 1068 struct vnode *vp = fp->f_data; 1069 struct flock lf; 1070 1071 lf.l_whence = SEEK_SET; 1072 lf.l_start = 0; 1073 lf.l_len = 0; 1074 lf.l_type = F_UNLCK; 1075 (void) VOP_ADVLOCK(vp, fdp, F_UNLCK, &lf, F_POSIX); 1076 } 1077 1078 return (FRELE(fp, p)); 1079 } 1080 1081 int 1082 fdrop(struct file *fp, struct proc *p) 1083 { 1084 int error; 1085 1086 #ifdef DIAGNOSTIC 1087 if (fp->f_count != 0) 1088 panic("fdrop: count (%ld) != 0", fp->f_count); 1089 #endif 1090 1091 if (fp->f_ops) 1092 error = (*fp->f_ops->fo_close)(fp, p); 1093 else 1094 error = 0; 1095 1096 /* Free fp */ 1097 LIST_REMOVE(fp, f_list); 1098 crfree(fp->f_cred); 1099 nfiles--; 1100 pool_put(&file_pool, fp); 1101 1102 return (error); 1103 } 1104 1105 /* 1106 * Apply an advisory lock on a file descriptor. 1107 * 1108 * Just attempt to get a record lock of the requested type on 1109 * the entire file (l_whence = SEEK_SET, l_start = 0, l_len = 0). 1110 */ 1111 /* ARGSUSED */ 1112 int 1113 sys_flock(struct proc *p, void *v, register_t *retval) 1114 { 1115 struct sys_flock_args /* { 1116 syscallarg(int) fd; 1117 syscallarg(int) how; 1118 } */ *uap = v; 1119 int fd = SCARG(uap, fd); 1120 int how = SCARG(uap, how); 1121 struct filedesc *fdp = p->p_fd; 1122 struct file *fp; 1123 struct vnode *vp; 1124 struct flock lf; 1125 int error; 1126 1127 if ((fp = fd_getfile(fdp, fd)) == NULL) 1128 return (EBADF); 1129 if (fp->f_type != DTYPE_VNODE) 1130 return (EOPNOTSUPP); 1131 FREF(fp); 1132 vp = (struct vnode *)fp->f_data; 1133 lf.l_whence = SEEK_SET; 1134 lf.l_start = 0; 1135 lf.l_len = 0; 1136 if (how & LOCK_UN) { 1137 lf.l_type = F_UNLCK; 1138 fp->f_iflags &= ~FIF_HASLOCK; 1139 error = VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK); 1140 goto out; 1141 } 1142 if (how & LOCK_EX) 1143 lf.l_type = F_WRLCK; 1144 else if (how & LOCK_SH) 1145 lf.l_type = F_RDLCK; 1146 else { 1147 error = EINVAL; 1148 goto out; 1149 } 1150 fp->f_iflags |= FIF_HASLOCK; 1151 if (how & LOCK_NB) 1152 error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, F_FLOCK); 1153 else 1154 error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, F_FLOCK|F_WAIT); 1155 out: 1156 FRELE(fp, p); 1157 return (error); 1158 } 1159 1160 /* 1161 * File Descriptor pseudo-device driver (/dev/fd/). 1162 * 1163 * Opening minor device N dup()s the file (if any) connected to file 1164 * descriptor N belonging to the calling process. Note that this driver 1165 * consists of only the ``open()'' routine, because all subsequent 1166 * references to this file will be direct to the other driver. 1167 */ 1168 /* ARGSUSED */ 1169 int 1170 filedescopen(dev_t dev, int mode, int type, struct proc *p) 1171 { 1172 1173 /* 1174 * XXX Kludge: set curproc->p_dupfd to contain the value of the 1175 * the file descriptor being sought for duplication. The error 1176 * return ensures that the vnode for this device will be released 1177 * by vn_open. Open will detect this special error and take the 1178 * actions in dupfdopen below. Other callers of vn_open or VOP_OPEN 1179 * will simply report the error. 1180 */ 1181 p->p_dupfd = minor(dev); 1182 return (ENODEV); 1183 } 1184 1185 /* 1186 * Duplicate the specified descriptor to a free descriptor. 1187 */ 1188 int 1189 dupfdopen(struct filedesc *fdp, int indx, int dfd, int mode) 1190 { 1191 struct file *wfp; 1192 1193 fdpassertlocked(fdp); 1194 1195 /* 1196 * Assume that the filename was user-specified; applications do 1197 * not tend to open /dev/fd/# when they can just call dup() 1198 */ 1199 if ((curproc->p_p->ps_flags & (PS_SUGIDEXEC | PS_SUGID))) { 1200 if (curproc->p_descfd == 255) 1201 return (EPERM); 1202 if (curproc->p_descfd != curproc->p_dupfd) 1203 return (EPERM); 1204 } 1205 1206 /* 1207 * If the to-be-dup'd fd number is greater than the allowed number 1208 * of file descriptors, or the fd to be dup'd has already been 1209 * closed, reject. Note, there is no need to check for new == old 1210 * because fd_getfile will return NULL if the file at indx is 1211 * newly created by falloc (FIF_LARVAL). 1212 */ 1213 if ((wfp = fd_getfile(fdp, dfd)) == NULL) 1214 return (EBADF); 1215 1216 /* 1217 * Check that the mode the file is being opened for is a 1218 * subset of the mode of the existing descriptor. 1219 */ 1220 if (((mode & (FREAD|FWRITE)) | wfp->f_flag) != wfp->f_flag) 1221 return (EACCES); 1222 if (wfp->f_count == LONG_MAX-2) 1223 return (EDEADLK); 1224 1225 fdp->fd_ofiles[indx] = wfp; 1226 fdp->fd_ofileflags[indx] = (fdp->fd_ofileflags[indx] & UF_EXCLOSE) | 1227 (fdp->fd_ofileflags[dfd] & ~UF_EXCLOSE); 1228 wfp->f_count++; 1229 fd_used(fdp, indx); 1230 return (0); 1231 } 1232 1233 /* 1234 * Close any files on exec? 1235 */ 1236 void 1237 fdcloseexec(struct proc *p) 1238 { 1239 struct filedesc *fdp = p->p_fd; 1240 int fd; 1241 1242 fdplock(fdp); 1243 for (fd = 0; fd <= fdp->fd_lastfile; fd++) 1244 if (fdp->fd_ofileflags[fd] & UF_EXCLOSE) 1245 (void) fdrelease(p, fd); 1246 fdpunlock(fdp); 1247 } 1248 1249 int 1250 sys_closefrom(struct proc *p, void *v, register_t *retval) 1251 { 1252 struct sys_closefrom_args *uap = v; 1253 struct filedesc *fdp = p->p_fd; 1254 u_int startfd, i; 1255 1256 startfd = SCARG(uap, fd); 1257 fdplock(fdp); 1258 1259 if (startfd > fdp->fd_lastfile) { 1260 fdpunlock(fdp); 1261 return (EBADF); 1262 } 1263 1264 for (i = startfd; i <= fdp->fd_lastfile; i++) 1265 fdrelease(p, i); 1266 1267 fdpunlock(fdp); 1268 return (0); 1269 } 1270 1271 int 1272 sys_getdtablecount(struct proc *p, void *v, register_t *retval) 1273 { 1274 *retval = p->p_fd->fd_openfd; 1275 return (0); 1276 } 1277