1 /* $OpenBSD: kern_descrip.c,v 1.98 2012/07/11 23:07:19 guenther Exp $ */ 2 /* $NetBSD: kern_descrip.c,v 1.42 1996/03/30 22:24:38 christos Exp $ */ 3 4 /* 5 * Copyright (c) 1982, 1986, 1989, 1991, 1993 6 * The Regents of the University of California. All rights reserved. 7 * (c) UNIX System Laboratories, Inc. 8 * All or some portions of this file are derived from material licensed 9 * to the University of California by American Telephone and Telegraph 10 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 11 * the permission of UNIX System Laboratories, Inc. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 3. Neither the name of the University nor the names of its contributors 22 * may be used to endorse or promote products derived from this software 23 * without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 * 37 * @(#)kern_descrip.c 8.6 (Berkeley) 4/19/94 38 */ 39 40 #include <sys/param.h> 41 #include <sys/systm.h> 42 #include <sys/filedesc.h> 43 #include <sys/kernel.h> 44 #include <sys/vnode.h> 45 #include <sys/proc.h> 46 #include <sys/file.h> 47 #include <sys/socket.h> 48 #include <sys/socketvar.h> 49 #include <sys/stat.h> 50 #include <sys/ioctl.h> 51 #include <sys/fcntl.h> 52 #include <sys/malloc.h> 53 #include <sys/syslog.h> 54 #include <sys/ucred.h> 55 #include <sys/unistd.h> 56 #include <sys/resourcevar.h> 57 #include <sys/conf.h> 58 #include <sys/mount.h> 59 #include <sys/syscallargs.h> 60 #include <sys/event.h> 61 #include <sys/pool.h> 62 #include <sys/ktrace.h> 63 64 #include <uvm/uvm_extern.h> 65 66 #include <sys/pipe.h> 67 68 /* 69 * Descriptor management. 70 */ 71 struct filelist filehead; /* head of list of open files */ 72 int nfiles; /* actual number of open files */ 73 74 static __inline void fd_used(struct filedesc *, int); 75 static __inline void fd_unused(struct filedesc *, int); 76 static __inline int find_next_zero(u_int *, int, u_int); 77 int finishdup(struct proc *, struct file *, int, int, register_t *, int); 78 int find_last_set(struct filedesc *, int); 79 80 struct pool file_pool; 81 struct pool fdesc_pool; 82 83 void 84 filedesc_init(void) 85 { 86 pool_init(&file_pool, sizeof(struct file), 0, 0, 0, "filepl", 87 &pool_allocator_nointr); 88 pool_init(&fdesc_pool, sizeof(struct filedesc0), 0, 0, 0, "fdescpl", 89 &pool_allocator_nointr); 90 LIST_INIT(&filehead); 91 } 92 93 static __inline int 94 find_next_zero (u_int *bitmap, int want, u_int bits) 95 { 96 int i, off, maxoff; 97 u_int sub; 98 99 if (want > bits) 100 return -1; 101 102 off = want >> NDENTRYSHIFT; 103 i = want & NDENTRYMASK; 104 if (i) { 105 sub = bitmap[off] | ((u_int)~0 >> (NDENTRIES - i)); 106 if (sub != ~0) 107 goto found; 108 off++; 109 } 110 111 maxoff = NDLOSLOTS(bits); 112 while (off < maxoff) { 113 if ((sub = bitmap[off]) != ~0) 114 goto found; 115 off++; 116 } 117 118 return -1; 119 120 found: 121 return (off << NDENTRYSHIFT) + ffs(~sub) - 1; 122 } 123 124 int 125 find_last_set(struct filedesc *fd, int last) 126 { 127 int off, i; 128 struct file **ofiles = fd->fd_ofiles; 129 u_int *bitmap = fd->fd_lomap; 130 131 off = (last - 1) >> NDENTRYSHIFT; 132 133 while (off >= 0 && !bitmap[off]) 134 off--; 135 if (off < 0) 136 return 0; 137 138 i = ((off + 1) << NDENTRYSHIFT) - 1; 139 if (i >= last) 140 i = last - 1; 141 142 while (i > 0 && ofiles[i] == NULL) 143 i--; 144 return i; 145 } 146 147 static __inline void 148 fd_used(struct filedesc *fdp, int fd) 149 { 150 u_int off = fd >> NDENTRYSHIFT; 151 152 fdp->fd_lomap[off] |= 1 << (fd & NDENTRYMASK); 153 if (fdp->fd_lomap[off] == ~0) 154 fdp->fd_himap[off >> NDENTRYSHIFT] |= 1 << (off & NDENTRYMASK); 155 156 if (fd > fdp->fd_lastfile) 157 fdp->fd_lastfile = fd; 158 fdp->fd_openfd++; 159 } 160 161 static __inline void 162 fd_unused(struct filedesc *fdp, int fd) 163 { 164 u_int off = fd >> NDENTRYSHIFT; 165 166 if (fd < fdp->fd_freefile) 167 fdp->fd_freefile = fd; 168 169 if (fdp->fd_lomap[off] == ~0) 170 fdp->fd_himap[off >> NDENTRYSHIFT] &= ~(1 << (off & NDENTRYMASK)); 171 fdp->fd_lomap[off] &= ~(1 << (fd & NDENTRYMASK)); 172 173 #ifdef DIAGNOSTIC 174 if (fd > fdp->fd_lastfile) 175 panic("fd_unused: fd_lastfile inconsistent"); 176 #endif 177 if (fd == fdp->fd_lastfile) 178 fdp->fd_lastfile = find_last_set(fdp, fd); 179 fdp->fd_openfd--; 180 } 181 182 struct file * 183 fd_getfile(struct filedesc *fdp, int fd) 184 { 185 struct file *fp; 186 187 if ((u_int)fd >= fdp->fd_nfiles || (fp = fdp->fd_ofiles[fd]) == NULL) 188 return (NULL); 189 190 if (!FILE_IS_USABLE(fp)) 191 return (NULL); 192 193 return (fp); 194 } 195 196 /* 197 * System calls on descriptors. 198 */ 199 200 /* 201 * Duplicate a file descriptor. 202 */ 203 /* ARGSUSED */ 204 int 205 sys_dup(struct proc *p, void *v, register_t *retval) 206 { 207 struct sys_dup_args /* { 208 syscallarg(int) fd; 209 } */ *uap = v; 210 struct filedesc *fdp = p->p_fd; 211 int old = SCARG(uap, fd); 212 struct file *fp; 213 int new; 214 int error; 215 216 restart: 217 if ((fp = fd_getfile(fdp, old)) == NULL) 218 return (EBADF); 219 FREF(fp); 220 fdplock(fdp); 221 if ((error = fdalloc(p, 0, &new)) != 0) { 222 FRELE(fp, p); 223 if (error == ENOSPC) { 224 fdexpand(p); 225 fdpunlock(fdp); 226 goto restart; 227 } 228 goto out; 229 } 230 error = finishdup(p, fp, old, new, retval, 0); 231 232 out: 233 fdpunlock(fdp); 234 return (error); 235 } 236 237 /* 238 * Duplicate a file descriptor to a particular value. 239 */ 240 /* ARGSUSED */ 241 int 242 sys_dup2(struct proc *p, void *v, register_t *retval) 243 { 244 struct sys_dup2_args /* { 245 syscallarg(int) from; 246 syscallarg(int) to; 247 } */ *uap = v; 248 int old = SCARG(uap, from), new = SCARG(uap, to); 249 struct filedesc *fdp = p->p_fd; 250 struct file *fp; 251 int i, error; 252 253 restart: 254 if ((fp = fd_getfile(fdp, old)) == NULL) 255 return (EBADF); 256 if ((u_int)new >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur || 257 (u_int)new >= maxfiles) 258 return (EBADF); 259 if (old == new) { 260 /* 261 * NOTE! This doesn't clear the close-on-exec flag. This might 262 * or might not be the intended behavior from the start, but 263 * this is what everyone else does. 264 */ 265 *retval = new; 266 return (0); 267 } 268 FREF(fp); 269 fdplock(fdp); 270 if (new >= fdp->fd_nfiles) { 271 if ((error = fdalloc(p, new, &i)) != 0) { 272 FRELE(fp, p); 273 if (error == ENOSPC) { 274 fdexpand(p); 275 fdpunlock(fdp); 276 goto restart; 277 } 278 goto out; 279 } 280 if (new != i) 281 panic("dup2: fdalloc"); 282 fd_unused(fdp, new); 283 } 284 /* finishdup() does FRELE */ 285 error = finishdup(p, fp, old, new, retval, 1); 286 287 out: 288 fdpunlock(fdp); 289 return (error); 290 } 291 292 /* 293 * The file control system call. 294 */ 295 /* ARGSUSED */ 296 int 297 sys_fcntl(struct proc *p, void *v, register_t *retval) 298 { 299 struct sys_fcntl_args /* { 300 syscallarg(int) fd; 301 syscallarg(int) cmd; 302 syscallarg(void *) arg; 303 } */ *uap = v; 304 int fd = SCARG(uap, fd); 305 struct filedesc *fdp = p->p_fd; 306 struct file *fp; 307 struct vnode *vp; 308 int i, tmp, newmin, flg = F_POSIX; 309 struct flock fl; 310 int error = 0; 311 312 restart: 313 if ((fp = fd_getfile(fdp, fd)) == NULL) 314 return (EBADF); 315 FREF(fp); 316 switch (SCARG(uap, cmd)) { 317 318 case F_DUPFD: 319 case F_DUPFD_CLOEXEC: 320 newmin = (long)SCARG(uap, arg); 321 if ((u_int)newmin >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur || 322 (u_int)newmin >= maxfiles) { 323 error = EINVAL; 324 break; 325 } 326 fdplock(fdp); 327 if ((error = fdalloc(p, newmin, &i)) != 0) { 328 FRELE(fp, p); 329 if (error == ENOSPC) { 330 fdexpand(p); 331 fdpunlock(fdp); 332 goto restart; 333 } 334 } else { 335 /* finishdup will FRELE for us. */ 336 error = finishdup(p, fp, fd, i, retval, 0); 337 338 if (!error && SCARG(uap, cmd) == F_DUPFD_CLOEXEC) 339 fdp->fd_ofileflags[i] |= UF_EXCLOSE; 340 } 341 342 fdpunlock(fdp); 343 return (error); 344 345 case F_GETFD: 346 *retval = fdp->fd_ofileflags[fd] & UF_EXCLOSE ? 1 : 0; 347 break; 348 349 case F_SETFD: 350 fdplock(fdp); 351 if ((long)SCARG(uap, arg) & 1) 352 fdp->fd_ofileflags[fd] |= UF_EXCLOSE; 353 else 354 fdp->fd_ofileflags[fd] &= ~UF_EXCLOSE; 355 fdpunlock(fdp); 356 break; 357 358 case F_GETFL: 359 *retval = OFLAGS(fp->f_flag); 360 break; 361 362 case F_SETFL: 363 fp->f_flag &= ~FCNTLFLAGS; 364 fp->f_flag |= FFLAGS((long)SCARG(uap, arg)) & FCNTLFLAGS; 365 tmp = fp->f_flag & FNONBLOCK; 366 error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&tmp, p); 367 if (error) 368 break; 369 tmp = fp->f_flag & FASYNC; 370 error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, (caddr_t)&tmp, p); 371 if (!error) 372 break; 373 fp->f_flag &= ~FNONBLOCK; 374 tmp = 0; 375 (void) (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&tmp, p); 376 break; 377 378 case F_GETOWN: 379 if (fp->f_type == DTYPE_SOCKET) { 380 *retval = ((struct socket *)fp->f_data)->so_pgid; 381 break; 382 } 383 error = (*fp->f_ops->fo_ioctl) 384 (fp, TIOCGPGRP, (caddr_t)&tmp, p); 385 *retval = -tmp; 386 break; 387 388 case F_SETOWN: 389 if (fp->f_type == DTYPE_SOCKET) { 390 struct socket *so = (struct socket *)fp->f_data; 391 392 so->so_pgid = (long)SCARG(uap, arg); 393 so->so_siguid = p->p_cred->p_ruid; 394 so->so_sigeuid = p->p_ucred->cr_uid; 395 break; 396 } 397 if ((long)SCARG(uap, arg) <= 0) { 398 SCARG(uap, arg) = (void *)(-(long)SCARG(uap, arg)); 399 } else { 400 struct process *pr1 = prfind((long)SCARG(uap, arg)); 401 if (pr1 == 0) { 402 error = ESRCH; 403 break; 404 } 405 SCARG(uap, arg) = (void *)(long)pr1->ps_pgrp->pg_id; 406 } 407 error = ((*fp->f_ops->fo_ioctl) 408 (fp, TIOCSPGRP, (caddr_t)&SCARG(uap, arg), p)); 409 break; 410 411 case F_SETLKW: 412 flg |= F_WAIT; 413 /* FALLTHROUGH */ 414 415 case F_SETLK: 416 if (fp->f_type != DTYPE_VNODE) { 417 error = EBADF; 418 break; 419 } 420 vp = (struct vnode *)fp->f_data; 421 /* Copy in the lock structure */ 422 error = copyin((caddr_t)SCARG(uap, arg), (caddr_t)&fl, 423 sizeof (fl)); 424 if (error) 425 break; 426 if (fl.l_whence == SEEK_CUR) { 427 if (fl.l_start == 0 && fl.l_len < 0) { 428 /* lockf(3) compliance hack */ 429 fl.l_len = -fl.l_len; 430 fl.l_start = fp->f_offset - fl.l_len; 431 } else 432 fl.l_start += fp->f_offset; 433 } 434 switch (fl.l_type) { 435 436 case F_RDLCK: 437 if ((fp->f_flag & FREAD) == 0) { 438 error = EBADF; 439 goto out; 440 } 441 atomic_setbits_int(&fdp->fd_flags, FD_ADVLOCK); 442 error = VOP_ADVLOCK(vp, fdp, F_SETLK, &fl, flg); 443 break; 444 445 case F_WRLCK: 446 if ((fp->f_flag & FWRITE) == 0) { 447 error = EBADF; 448 goto out; 449 } 450 atomic_setbits_int(&fdp->fd_flags, FD_ADVLOCK); 451 error = VOP_ADVLOCK(vp, fdp, F_SETLK, &fl, flg); 452 break; 453 454 case F_UNLCK: 455 error = VOP_ADVLOCK(vp, fdp, F_UNLCK, &fl, F_POSIX); 456 goto out; 457 458 default: 459 error = EINVAL; 460 goto out; 461 } 462 463 if (fp != fd_getfile(fdp, fd)) { 464 /* 465 * We have lost the race with close() or dup2(); 466 * unlock, pretend that we've won the race and that 467 * lock had been removed by close() 468 */ 469 fl.l_whence = SEEK_SET; 470 fl.l_start = 0; 471 fl.l_len = 0; 472 VOP_ADVLOCK(vp, fdp, F_UNLCK, &fl, F_POSIX); 473 fl.l_type = F_UNLCK; 474 } 475 goto out; 476 477 478 case F_GETLK: 479 if (fp->f_type != DTYPE_VNODE) { 480 error = EBADF; 481 break; 482 } 483 vp = (struct vnode *)fp->f_data; 484 /* Copy in the lock structure */ 485 error = copyin((caddr_t)SCARG(uap, arg), (caddr_t)&fl, 486 sizeof (fl)); 487 if (error) 488 break; 489 if (fl.l_whence == SEEK_CUR) { 490 if (fl.l_start == 0 && fl.l_len < 0) { 491 /* lockf(3) compliance hack */ 492 fl.l_len = -fl.l_len; 493 fl.l_start = fp->f_offset - fl.l_len; 494 } else 495 fl.l_start += fp->f_offset; 496 } 497 if (fl.l_type != F_RDLCK && 498 fl.l_type != F_WRLCK && 499 fl.l_type != F_UNLCK && 500 fl.l_type != 0) { 501 error = EINVAL; 502 break; 503 } 504 error = VOP_ADVLOCK(vp, fdp, F_GETLK, &fl, F_POSIX); 505 if (error) 506 break; 507 error = (copyout((caddr_t)&fl, (caddr_t)SCARG(uap, arg), 508 sizeof (fl))); 509 break; 510 511 default: 512 error = EINVAL; 513 break; 514 } 515 out: 516 FRELE(fp, p); 517 return (error); 518 } 519 520 /* 521 * Common code for dup, dup2, and fcntl(F_DUPFD). 522 */ 523 int 524 finishdup(struct proc *p, struct file *fp, int old, int new, 525 register_t *retval, int dup2) 526 { 527 struct file *oldfp; 528 struct filedesc *fdp = p->p_fd; 529 530 fdpassertlocked(fdp); 531 if (fp->f_count == LONG_MAX-2) { 532 FRELE(fp, p); 533 return (EDEADLK); 534 } 535 536 /* 537 * Don't fd_getfile here. We want to closef LARVAL files and 538 * closef can deal with that. 539 */ 540 oldfp = fdp->fd_ofiles[new]; 541 if (oldfp != NULL) 542 FREF(oldfp); 543 544 fdp->fd_ofiles[new] = fp; 545 fdp->fd_ofileflags[new] = fdp->fd_ofileflags[old] & ~UF_EXCLOSE; 546 fp->f_count++; 547 FRELE(fp, p); 548 if (dup2 && oldfp == NULL) 549 fd_used(fdp, new); 550 *retval = new; 551 552 if (oldfp != NULL) { 553 if (new < fdp->fd_knlistsize) 554 knote_fdclose(p, new); 555 closef(oldfp, p); 556 } 557 558 return (0); 559 } 560 561 void 562 fdremove(struct filedesc *fdp, int fd) 563 { 564 fdpassertlocked(fdp); 565 fdp->fd_ofiles[fd] = NULL; 566 fd_unused(fdp, fd); 567 } 568 569 int 570 fdrelease(struct proc *p, int fd) 571 { 572 struct filedesc *fdp = p->p_fd; 573 struct file **fpp, *fp; 574 575 fdpassertlocked(fdp); 576 577 /* 578 * Don't fd_getfile here. We want to closef LARVAL files and closef 579 * can deal with that. 580 */ 581 fpp = &fdp->fd_ofiles[fd]; 582 fp = *fpp; 583 if (fp == NULL) 584 return (EBADF); 585 FREF(fp); 586 *fpp = NULL; 587 fd_unused(fdp, fd); 588 if (fd < fdp->fd_knlistsize) 589 knote_fdclose(p, fd); 590 return (closef(fp, p)); 591 } 592 593 /* 594 * Close a file descriptor. 595 */ 596 /* ARGSUSED */ 597 int 598 sys_close(struct proc *p, void *v, register_t *retval) 599 { 600 struct sys_close_args /* { 601 syscallarg(int) fd; 602 } */ *uap = v; 603 int fd = SCARG(uap, fd), error; 604 struct filedesc *fdp = p->p_fd; 605 606 if (fd_getfile(fdp, fd) == NULL) 607 return (EBADF); 608 fdplock(fdp); 609 error = fdrelease(p, fd); 610 fdpunlock(fdp); 611 612 return (error); 613 } 614 615 /* 616 * Return status information about a file descriptor. 617 */ 618 /* ARGSUSED */ 619 int 620 sys_fstat(struct proc *p, void *v, register_t *retval) 621 { 622 struct sys_fstat_args /* { 623 syscallarg(int) fd; 624 syscallarg(struct stat *) sb; 625 } */ *uap = v; 626 int fd = SCARG(uap, fd); 627 struct filedesc *fdp = p->p_fd; 628 struct file *fp; 629 struct stat ub; 630 int error; 631 632 if ((fp = fd_getfile(fdp, fd)) == NULL) 633 return (EBADF); 634 FREF(fp); 635 error = (*fp->f_ops->fo_stat)(fp, &ub, p); 636 FRELE(fp, p); 637 if (error == 0) { 638 /* 639 * Don't let non-root see generation numbers 640 * (for NFS security) 641 */ 642 if (suser(p, 0)) 643 ub.st_gen = 0; 644 error = copyout((caddr_t)&ub, (caddr_t)SCARG(uap, sb), 645 sizeof (ub)); 646 } 647 #ifdef KTRACE 648 if (error == 0 && KTRPOINT(p, KTR_STRUCT)) 649 ktrstat(p, &ub); 650 #endif 651 return (error); 652 } 653 654 /* 655 * Return pathconf information about a file descriptor. 656 */ 657 /* ARGSUSED */ 658 int 659 sys_fpathconf(struct proc *p, void *v, register_t *retval) 660 { 661 struct sys_fpathconf_args /* { 662 syscallarg(int) fd; 663 syscallarg(int) name; 664 } */ *uap = v; 665 int fd = SCARG(uap, fd); 666 struct filedesc *fdp = p->p_fd; 667 struct file *fp; 668 struct vnode *vp; 669 int error; 670 671 if ((fp = fd_getfile(fdp, fd)) == NULL) 672 return (EBADF); 673 FREF(fp); 674 switch (fp->f_type) { 675 case DTYPE_PIPE: 676 case DTYPE_SOCKET: 677 if (SCARG(uap, name) != _PC_PIPE_BUF) { 678 error = EINVAL; 679 break; 680 } 681 *retval = PIPE_BUF; 682 error = 0; 683 break; 684 685 case DTYPE_VNODE: 686 vp = (struct vnode *)fp->f_data; 687 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); 688 error = VOP_PATHCONF(vp, SCARG(uap, name), retval); 689 VOP_UNLOCK(vp, 0, p); 690 break; 691 692 default: 693 error = EOPNOTSUPP; 694 break; 695 } 696 FRELE(fp, p); 697 return (error); 698 } 699 700 /* 701 * Allocate a file descriptor for the process. 702 */ 703 int 704 fdalloc(struct proc *p, int want, int *result) 705 { 706 struct filedesc *fdp = p->p_fd; 707 int lim, last, i; 708 u_int new, off; 709 710 /* 711 * Search for a free descriptor starting at the higher 712 * of want or fd_freefile. If that fails, consider 713 * expanding the ofile array. 714 */ 715 restart: 716 lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfiles); 717 last = min(fdp->fd_nfiles, lim); 718 if ((i = want) < fdp->fd_freefile) 719 i = fdp->fd_freefile; 720 off = i >> NDENTRYSHIFT; 721 new = find_next_zero(fdp->fd_himap, off, 722 (last + NDENTRIES - 1) >> NDENTRYSHIFT); 723 if (new != -1) { 724 i = find_next_zero(&fdp->fd_lomap[new], 725 new > off ? 0 : i & NDENTRYMASK, 726 NDENTRIES); 727 if (i == -1) { 728 /* 729 * Free file descriptor in this block was 730 * below want, try again with higher want. 731 */ 732 want = (new + 1) << NDENTRYSHIFT; 733 goto restart; 734 } 735 i += (new << NDENTRYSHIFT); 736 if (i < last) { 737 fd_used(fdp, i); 738 if (want <= fdp->fd_freefile) 739 fdp->fd_freefile = i; 740 *result = i; 741 fdp->fd_ofileflags[i] = 0; 742 return (0); 743 } 744 } 745 if (fdp->fd_nfiles >= lim) 746 return (EMFILE); 747 748 return (ENOSPC); 749 } 750 751 void 752 fdexpand(struct proc *p) 753 { 754 struct filedesc *fdp = p->p_fd; 755 int nfiles, i; 756 struct file **newofile; 757 char *newofileflags; 758 u_int *newhimap, *newlomap; 759 760 fdpassertlocked(fdp); 761 762 /* 763 * No space in current array. 764 */ 765 if (fdp->fd_nfiles < NDEXTENT) 766 nfiles = NDEXTENT; 767 else 768 nfiles = 2 * fdp->fd_nfiles; 769 770 newofile = malloc(nfiles * OFILESIZE, M_FILEDESC, M_WAITOK); 771 newofileflags = (char *) &newofile[nfiles]; 772 773 /* 774 * Copy the existing ofile and ofileflags arrays 775 * and zero the new portion of each array. 776 */ 777 bcopy(fdp->fd_ofiles, newofile, 778 (i = sizeof(struct file *) * fdp->fd_nfiles)); 779 bzero((char *)newofile + i, nfiles * sizeof(struct file *) - i); 780 bcopy(fdp->fd_ofileflags, newofileflags, 781 (i = sizeof(char) * fdp->fd_nfiles)); 782 bzero(newofileflags + i, nfiles * sizeof(char) - i); 783 784 if (fdp->fd_nfiles > NDFILE) 785 free(fdp->fd_ofiles, M_FILEDESC); 786 787 if (NDHISLOTS(nfiles) > NDHISLOTS(fdp->fd_nfiles)) { 788 newhimap = malloc(NDHISLOTS(nfiles) * sizeof(u_int), 789 M_FILEDESC, M_WAITOK); 790 newlomap = malloc(NDLOSLOTS(nfiles) * sizeof(u_int), 791 M_FILEDESC, M_WAITOK); 792 793 bcopy(fdp->fd_himap, newhimap, 794 (i = NDHISLOTS(fdp->fd_nfiles) * sizeof(u_int))); 795 bzero((char *)newhimap + i, 796 NDHISLOTS(nfiles) * sizeof(u_int) - i); 797 798 bcopy(fdp->fd_lomap, newlomap, 799 (i = NDLOSLOTS(fdp->fd_nfiles) * sizeof(u_int))); 800 bzero((char *)newlomap + i, 801 NDLOSLOTS(nfiles) * sizeof(u_int) - i); 802 803 if (NDHISLOTS(fdp->fd_nfiles) > NDHISLOTS(NDFILE)) { 804 free(fdp->fd_himap, M_FILEDESC); 805 free(fdp->fd_lomap, M_FILEDESC); 806 } 807 fdp->fd_himap = newhimap; 808 fdp->fd_lomap = newlomap; 809 } 810 fdp->fd_ofiles = newofile; 811 fdp->fd_ofileflags = newofileflags; 812 fdp->fd_nfiles = nfiles; 813 } 814 815 /* 816 * Create a new open file structure and allocate 817 * a file descriptor for the process that refers to it. 818 */ 819 int 820 falloc(struct proc *p, struct file **resultfp, int *resultfd) 821 { 822 struct file *fp, *fq; 823 int error, i; 824 825 fdpassertlocked(p->p_fd); 826 restart: 827 if ((error = fdalloc(p, 0, &i)) != 0) { 828 if (error == ENOSPC) { 829 fdexpand(p); 830 goto restart; 831 } 832 return (error); 833 } 834 if (nfiles >= maxfiles) { 835 fd_unused(p->p_fd, i); 836 tablefull("file"); 837 return (ENFILE); 838 } 839 /* 840 * Allocate a new file descriptor. 841 * If the process has file descriptor zero open, add to the list 842 * of open files at that point, otherwise put it at the front of 843 * the list of open files. 844 */ 845 nfiles++; 846 fp = pool_get(&file_pool, PR_WAITOK|PR_ZERO); 847 fp->f_iflags = FIF_LARVAL; 848 if ((fq = p->p_fd->fd_ofiles[0]) != NULL) { 849 LIST_INSERT_AFTER(fq, fp, f_list); 850 } else { 851 LIST_INSERT_HEAD(&filehead, fp, f_list); 852 } 853 p->p_fd->fd_ofiles[i] = fp; 854 fp->f_count = 1; 855 fp->f_cred = p->p_ucred; 856 crhold(fp->f_cred); 857 if (resultfp) 858 *resultfp = fp; 859 if (resultfd) 860 *resultfd = i; 861 FREF(fp); 862 return (0); 863 } 864 865 /* 866 * Build a new filedesc structure. 867 */ 868 struct filedesc * 869 fdinit(struct proc *p) 870 { 871 struct filedesc0 *newfdp; 872 extern int cmask; 873 874 newfdp = pool_get(&fdesc_pool, PR_WAITOK|PR_ZERO); 875 if (p != NULL) { 876 struct filedesc *fdp = p->p_fd; 877 878 newfdp->fd_fd.fd_cdir = fdp->fd_cdir; 879 vref(newfdp->fd_fd.fd_cdir); 880 newfdp->fd_fd.fd_rdir = fdp->fd_rdir; 881 if (newfdp->fd_fd.fd_rdir) 882 vref(newfdp->fd_fd.fd_rdir); 883 } 884 rw_init(&newfdp->fd_fd.fd_lock, "fdlock"); 885 886 /* Create the file descriptor table. */ 887 newfdp->fd_fd.fd_refcnt = 1; 888 newfdp->fd_fd.fd_cmask = cmask; 889 newfdp->fd_fd.fd_ofiles = newfdp->fd_dfiles; 890 newfdp->fd_fd.fd_ofileflags = newfdp->fd_dfileflags; 891 newfdp->fd_fd.fd_nfiles = NDFILE; 892 newfdp->fd_fd.fd_himap = newfdp->fd_dhimap; 893 newfdp->fd_fd.fd_lomap = newfdp->fd_dlomap; 894 newfdp->fd_fd.fd_knlistsize = -1; 895 896 newfdp->fd_fd.fd_freefile = 0; 897 newfdp->fd_fd.fd_lastfile = 0; 898 899 return (&newfdp->fd_fd); 900 } 901 902 /* 903 * Share a filedesc structure. 904 */ 905 struct filedesc * 906 fdshare(struct proc *p) 907 { 908 p->p_fd->fd_refcnt++; 909 return (p->p_fd); 910 } 911 912 /* 913 * Copy a filedesc structure. 914 */ 915 struct filedesc * 916 fdcopy(struct proc *p) 917 { 918 struct filedesc *newfdp, *fdp = p->p_fd; 919 struct file **fpp; 920 int i; 921 922 fdplock(fdp); 923 newfdp = pool_get(&fdesc_pool, PR_WAITOK); 924 bcopy(fdp, newfdp, sizeof(struct filedesc)); 925 if (newfdp->fd_cdir) 926 vref(newfdp->fd_cdir); 927 if (newfdp->fd_rdir) 928 vref(newfdp->fd_rdir); 929 newfdp->fd_refcnt = 1; 930 rw_init(&newfdp->fd_lock, "fdlock"); 931 932 /* 933 * If the number of open files fits in the internal arrays 934 * of the open file structure, use them, otherwise allocate 935 * additional memory for the number of descriptors currently 936 * in use. 937 */ 938 if (newfdp->fd_lastfile < NDFILE) { 939 newfdp->fd_ofiles = ((struct filedesc0 *) newfdp)->fd_dfiles; 940 newfdp->fd_ofileflags = 941 ((struct filedesc0 *) newfdp)->fd_dfileflags; 942 i = NDFILE; 943 } else { 944 /* 945 * Compute the smallest multiple of NDEXTENT needed 946 * for the file descriptors currently in use, 947 * allowing the table to shrink. 948 */ 949 i = newfdp->fd_nfiles; 950 while (i >= 2 * NDEXTENT && i > newfdp->fd_lastfile * 2) 951 i /= 2; 952 newfdp->fd_ofiles = malloc(i * OFILESIZE, M_FILEDESC, M_WAITOK); 953 newfdp->fd_ofileflags = (char *) &newfdp->fd_ofiles[i]; 954 } 955 if (NDHISLOTS(i) <= NDHISLOTS(NDFILE)) { 956 newfdp->fd_himap = 957 ((struct filedesc0 *) newfdp)->fd_dhimap; 958 newfdp->fd_lomap = 959 ((struct filedesc0 *) newfdp)->fd_dlomap; 960 } else { 961 newfdp->fd_himap = malloc(NDHISLOTS(i) * sizeof(u_int), 962 M_FILEDESC, M_WAITOK); 963 newfdp->fd_lomap = malloc(NDLOSLOTS(i) * sizeof(u_int), 964 M_FILEDESC, M_WAITOK); 965 } 966 newfdp->fd_nfiles = i; 967 bcopy(fdp->fd_ofiles, newfdp->fd_ofiles, i * sizeof(struct file **)); 968 bcopy(fdp->fd_ofileflags, newfdp->fd_ofileflags, i * sizeof(char)); 969 bcopy(fdp->fd_himap, newfdp->fd_himap, NDHISLOTS(i) * sizeof(u_int)); 970 bcopy(fdp->fd_lomap, newfdp->fd_lomap, NDLOSLOTS(i) * sizeof(u_int)); 971 fdpunlock(fdp); 972 973 /* 974 * kq descriptors cannot be copied. 975 */ 976 fdplock(newfdp); 977 if (newfdp->fd_knlistsize != -1) { 978 fpp = newfdp->fd_ofiles; 979 for (i = 0; i <= newfdp->fd_lastfile; i++, fpp++) 980 if (*fpp != NULL && (*fpp)->f_type == DTYPE_KQUEUE) 981 fdremove(newfdp, i); 982 newfdp->fd_knlist = NULL; 983 newfdp->fd_knlistsize = -1; 984 newfdp->fd_knhash = NULL; 985 newfdp->fd_knhashmask = 0; 986 } 987 988 fpp = newfdp->fd_ofiles; 989 for (i = 0; i <= newfdp->fd_lastfile; i++, fpp++) 990 if (*fpp != NULL) { 991 /* 992 * XXX Gruesome hack. If count gets too high, fail 993 * to copy an fd, since fdcopy()'s callers do not 994 * permit it to indicate failure yet. 995 */ 996 if ((*fpp)->f_count == LONG_MAX-2) 997 fdremove(newfdp, i); 998 else 999 (*fpp)->f_count++; 1000 } 1001 fdpunlock(newfdp); 1002 return (newfdp); 1003 } 1004 1005 /* 1006 * Release a filedesc structure. 1007 */ 1008 void 1009 fdfree(struct proc *p) 1010 { 1011 struct filedesc *fdp = p->p_fd; 1012 struct file **fpp, *fp; 1013 int i; 1014 1015 if (--fdp->fd_refcnt > 0) 1016 return; 1017 fpp = fdp->fd_ofiles; 1018 for (i = fdp->fd_lastfile; i >= 0; i--, fpp++) { 1019 fp = *fpp; 1020 if (fp != NULL) { 1021 FREF(fp); 1022 *fpp = NULL; 1023 (void) closef(fp, p); 1024 } 1025 } 1026 p->p_fd = NULL; 1027 if (fdp->fd_nfiles > NDFILE) 1028 free(fdp->fd_ofiles, M_FILEDESC); 1029 if (NDHISLOTS(fdp->fd_nfiles) > NDHISLOTS(NDFILE)) { 1030 free(fdp->fd_himap, M_FILEDESC); 1031 free(fdp->fd_lomap, M_FILEDESC); 1032 } 1033 if (fdp->fd_cdir) 1034 vrele(fdp->fd_cdir); 1035 if (fdp->fd_rdir) 1036 vrele(fdp->fd_rdir); 1037 if (fdp->fd_knlist) 1038 free(fdp->fd_knlist, M_TEMP); 1039 if (fdp->fd_knhash) 1040 free(fdp->fd_knhash, M_TEMP); 1041 pool_put(&fdesc_pool, fdp); 1042 } 1043 1044 /* 1045 * Internal form of close. 1046 * Decrement reference count on file structure. 1047 * Note: p may be NULL when closing a file 1048 * that was being passed in a message. 1049 * 1050 * The fp must have its usecount bumped and will be FRELEd here. 1051 */ 1052 int 1053 closef(struct file *fp, struct proc *p) 1054 { 1055 struct filedesc *fdp; 1056 1057 if (fp == NULL) 1058 return (0); 1059 1060 #ifdef DIAGNOSTIC 1061 if (fp->f_count < 2) 1062 panic("closef: count (%d) < 2", fp->f_count); 1063 #endif 1064 fp->f_count--; 1065 1066 /* 1067 * POSIX record locking dictates that any close releases ALL 1068 * locks owned by this process. This is handled by setting 1069 * a flag in the unlock to free ONLY locks obeying POSIX 1070 * semantics, and not to free BSD-style file locks. 1071 * If the descriptor was in a message, POSIX-style locks 1072 * aren't passed with the descriptor. 1073 */ 1074 1075 if (p && ((fdp = p->p_fd) != NULL) && 1076 (fdp->fd_flags & FD_ADVLOCK) && 1077 fp->f_type == DTYPE_VNODE) { 1078 struct vnode *vp = fp->f_data; 1079 struct flock lf; 1080 1081 lf.l_whence = SEEK_SET; 1082 lf.l_start = 0; 1083 lf.l_len = 0; 1084 lf.l_type = F_UNLCK; 1085 (void) VOP_ADVLOCK(vp, fdp, F_UNLCK, &lf, F_POSIX); 1086 } 1087 1088 return (FRELE(fp, p)); 1089 } 1090 1091 int 1092 fdrop(struct file *fp, struct proc *p) 1093 { 1094 int error; 1095 1096 #ifdef DIAGNOSTIC 1097 if (fp->f_count != 0) 1098 panic("fdrop: count (%d) != 0", fp->f_count); 1099 #endif 1100 1101 if (fp->f_ops) 1102 error = (*fp->f_ops->fo_close)(fp, p); 1103 else 1104 error = 0; 1105 1106 /* Free fp */ 1107 LIST_REMOVE(fp, f_list); 1108 crfree(fp->f_cred); 1109 nfiles--; 1110 pool_put(&file_pool, fp); 1111 1112 return (error); 1113 } 1114 1115 /* 1116 * Apply an advisory lock on a file descriptor. 1117 * 1118 * Just attempt to get a record lock of the requested type on 1119 * the entire file (l_whence = SEEK_SET, l_start = 0, l_len = 0). 1120 */ 1121 /* ARGSUSED */ 1122 int 1123 sys_flock(struct proc *p, void *v, register_t *retval) 1124 { 1125 struct sys_flock_args /* { 1126 syscallarg(int) fd; 1127 syscallarg(int) how; 1128 } */ *uap = v; 1129 int fd = SCARG(uap, fd); 1130 int how = SCARG(uap, how); 1131 struct filedesc *fdp = p->p_fd; 1132 struct file *fp; 1133 struct vnode *vp; 1134 struct flock lf; 1135 int error; 1136 1137 if ((fp = fd_getfile(fdp, fd)) == NULL) 1138 return (EBADF); 1139 if (fp->f_type != DTYPE_VNODE) 1140 return (EOPNOTSUPP); 1141 FREF(fp); 1142 vp = (struct vnode *)fp->f_data; 1143 lf.l_whence = SEEK_SET; 1144 lf.l_start = 0; 1145 lf.l_len = 0; 1146 if (how & LOCK_UN) { 1147 lf.l_type = F_UNLCK; 1148 fp->f_flag &= ~FHASLOCK; 1149 error = VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK); 1150 goto out; 1151 } 1152 if (how & LOCK_EX) 1153 lf.l_type = F_WRLCK; 1154 else if (how & LOCK_SH) 1155 lf.l_type = F_RDLCK; 1156 else { 1157 error = EINVAL; 1158 goto out; 1159 } 1160 fp->f_flag |= FHASLOCK; 1161 if (how & LOCK_NB) 1162 error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, F_FLOCK); 1163 else 1164 error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, F_FLOCK|F_WAIT); 1165 out: 1166 FRELE(fp, p); 1167 return (error); 1168 } 1169 1170 /* 1171 * File Descriptor pseudo-device driver (/dev/fd/). 1172 * 1173 * Opening minor device N dup()s the file (if any) connected to file 1174 * descriptor N belonging to the calling process. Note that this driver 1175 * consists of only the ``open()'' routine, because all subsequent 1176 * references to this file will be direct to the other driver. 1177 */ 1178 /* ARGSUSED */ 1179 int 1180 filedescopen(dev_t dev, int mode, int type, struct proc *p) 1181 { 1182 1183 /* 1184 * XXX Kludge: set curproc->p_dupfd to contain the value of the 1185 * the file descriptor being sought for duplication. The error 1186 * return ensures that the vnode for this device will be released 1187 * by vn_open. Open will detect this special error and take the 1188 * actions in dupfdopen below. Other callers of vn_open or VOP_OPEN 1189 * will simply report the error. 1190 */ 1191 p->p_dupfd = minor(dev); 1192 return (ENODEV); 1193 } 1194 1195 /* 1196 * Duplicate the specified descriptor to a free descriptor. 1197 */ 1198 int 1199 dupfdopen(struct filedesc *fdp, int indx, int dfd, int mode) 1200 { 1201 struct file *wfp; 1202 1203 fdpassertlocked(fdp); 1204 1205 /* 1206 * Assume that the filename was user-specified; applications do 1207 * not tend to open /dev/fd/# when they can just call dup() 1208 */ 1209 if ((curproc->p_p->ps_flags & (PS_SUGIDEXEC | PS_SUGID))) { 1210 if (curproc->p_descfd == 255) 1211 return (EPERM); 1212 if (curproc->p_descfd != curproc->p_dupfd) 1213 return (EPERM); 1214 } 1215 1216 /* 1217 * If the to-be-dup'd fd number is greater than the allowed number 1218 * of file descriptors, or the fd to be dup'd has already been 1219 * closed, reject. Note, there is no need to check for new == old 1220 * because fd_getfile will return NULL if the file at indx is 1221 * newly created by falloc (FIF_LARVAL). 1222 */ 1223 if ((wfp = fd_getfile(fdp, dfd)) == NULL) 1224 return (EBADF); 1225 1226 /* 1227 * Check that the mode the file is being opened for is a 1228 * subset of the mode of the existing descriptor. 1229 */ 1230 if (((mode & (FREAD|FWRITE)) | wfp->f_flag) != wfp->f_flag) 1231 return (EACCES); 1232 if (wfp->f_count == LONG_MAX-2) 1233 return (EDEADLK); 1234 1235 fdp->fd_ofiles[indx] = wfp; 1236 fdp->fd_ofileflags[indx] = (fdp->fd_ofileflags[indx] & UF_EXCLOSE) | 1237 (fdp->fd_ofileflags[dfd] & ~UF_EXCLOSE); 1238 wfp->f_count++; 1239 fd_used(fdp, indx); 1240 return (0); 1241 } 1242 1243 /* 1244 * Close any files on exec? 1245 */ 1246 void 1247 fdcloseexec(struct proc *p) 1248 { 1249 struct filedesc *fdp = p->p_fd; 1250 int fd; 1251 1252 fdplock(fdp); 1253 for (fd = 0; fd <= fdp->fd_lastfile; fd++) 1254 if (fdp->fd_ofileflags[fd] & UF_EXCLOSE) 1255 (void) fdrelease(p, fd); 1256 fdpunlock(fdp); 1257 } 1258 1259 int 1260 sys_closefrom(struct proc *p, void *v, register_t *retval) 1261 { 1262 struct sys_closefrom_args *uap = v; 1263 struct filedesc *fdp = p->p_fd; 1264 u_int startfd, i; 1265 1266 startfd = SCARG(uap, fd); 1267 fdplock(fdp); 1268 1269 if (startfd > fdp->fd_lastfile) { 1270 fdpunlock(fdp); 1271 return (EBADF); 1272 } 1273 1274 for (i = startfd; i <= fdp->fd_lastfile; i++) 1275 fdrelease(p, i); 1276 1277 fdpunlock(fdp); 1278 return (0); 1279 } 1280 1281 int 1282 sys_getdtablecount(struct proc *p, void *v, register_t *retval) 1283 { 1284 *retval = p->p_fd->fd_openfd; 1285 return (0); 1286 } 1287