1 /* $NetBSD: kern_descrip.c,v 1.95 2002/09/23 04:19:16 simonb Exp $ */ 2 3 /* 4 * Copyright (c) 1982, 1986, 1989, 1991, 1993 5 * The Regents of the University of California. All rights reserved. 6 * (c) UNIX System Laboratories, Inc. 7 * All or some portions of this file are derived from material licensed 8 * to the University of California by American Telephone and Telegraph 9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 10 * the permission of UNIX System Laboratories, Inc. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 3. All advertising materials mentioning features or use of this software 21 * must display the following acknowledgement: 22 * This product includes software developed by the University of 23 * California, Berkeley and its contributors. 24 * 4. Neither the name of the University nor the names of its contributors 25 * may be used to endorse or promote products derived from this software 26 * without specific prior written permission. 27 * 28 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 29 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 30 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 31 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 32 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 33 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 34 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 35 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 37 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 38 * SUCH DAMAGE. 39 * 40 * @(#)kern_descrip.c 8.8 (Berkeley) 2/14/95 41 */ 42 43 #include <sys/cdefs.h> 44 __KERNEL_RCSID(0, "$NetBSD: kern_descrip.c,v 1.95 2002/09/23 04:19:16 simonb Exp $"); 45 46 #include <sys/param.h> 47 #include <sys/systm.h> 48 #include <sys/filedesc.h> 49 #include <sys/kernel.h> 50 #include <sys/vnode.h> 51 #include <sys/proc.h> 52 #include <sys/file.h> 53 #include <sys/namei.h> 54 #include <sys/socket.h> 55 #include <sys/socketvar.h> 56 #include <sys/stat.h> 57 #include <sys/ioctl.h> 58 #include <sys/fcntl.h> 59 #include <sys/malloc.h> 60 #include <sys/pool.h> 61 #include <sys/syslog.h> 62 #include <sys/unistd.h> 63 #include <sys/resourcevar.h> 64 #include <sys/conf.h> 65 66 #include <sys/mount.h> 67 #include <sys/syscallargs.h> 68 69 /* 70 * Descriptor management. 71 */ 72 struct filelist filehead; /* head of list of open files */ 73 int nfiles; /* actual number of open files */ 74 struct pool file_pool; /* memory pool for file structures */ 75 struct pool cwdi_pool; /* memory pool for cwdinfo structures */ 76 struct pool filedesc0_pool; /* memory pool for filedesc0 structures */ 77 78 static __inline void fd_used(struct filedesc *, int); 79 static __inline void fd_unused(struct filedesc *, int); 80 int finishdup(struct proc *, int, int, register_t *); 81 int fcntl_forfs(int, struct proc *, int, void *); 82 83 dev_type_open(filedescopen); 84 85 const struct cdevsw filedesc_cdevsw = { 86 filedescopen, noclose, noread, nowrite, noioctl, 87 nostop, notty, nopoll, nommap, 88 }; 89 90 static __inline void 91 fd_used(struct filedesc *fdp, int fd) 92 { 93 94 if (fd > fdp->fd_lastfile) 95 fdp->fd_lastfile = fd; 96 } 97 98 static __inline void 99 fd_unused(struct filedesc *fdp, int fd) 100 { 101 102 if (fd < fdp->fd_freefile) 103 fdp->fd_freefile = fd; 104 #ifdef DIAGNOSTIC 105 if (fd > fdp->fd_lastfile) 106 panic("fd_unused: fd_lastfile inconsistent"); 107 #endif 108 if (fd == fdp->fd_lastfile) { 109 do { 110 fd--; 111 } while (fd >= 0 && fdp->fd_ofiles[fd] == NULL); 112 fdp->fd_lastfile = fd; 113 } 114 } 115 116 struct file * 117 fd_getfile(struct filedesc *fdp, int fd) 118 { 119 struct file *fp; 120 121 if ((u_int) fd >= fdp->fd_nfiles || (fp = fdp->fd_ofiles[fd]) == NULL) 122 return (NULL); 123 124 if (FILE_IS_USABLE(fp) == 0) 125 return (NULL); 126 127 return (fp); 128 } 129 130 /* 131 * System calls on descriptors. 132 */ 133 134 /* 135 * Duplicate a file descriptor. 136 */ 137 /* ARGSUSED */ 138 int 139 sys_dup(struct proc *p, void *v, register_t *retval) 140 { 141 struct sys_dup_args /* { 142 syscallarg(int) fd; 143 } */ *uap = v; 144 struct file *fp; 145 struct filedesc *fdp; 146 int old, new, error; 147 148 fdp = p->p_fd; 149 old = SCARG(uap, fd); 150 151 restart: 152 if ((fp = fd_getfile(fdp, old)) == NULL) 153 return (EBADF); 154 155 FILE_USE(fp); 156 157 if ((error = fdalloc(p, 0, &new)) != 0) { 158 if (error == ENOSPC) { 159 fdexpand(p); 160 FILE_UNUSE(fp, p); 161 goto restart; 162 } 163 FILE_UNUSE(fp, p); 164 return (error); 165 } 166 167 /* finishdup() will unuse the descriptors for us */ 168 return (finishdup(p, old, new, retval)); 169 } 170 171 /* 172 * Duplicate a file descriptor to a particular value. 173 */ 174 /* ARGSUSED */ 175 int 176 sys_dup2(struct proc *p, void *v, register_t *retval) 177 { 178 struct sys_dup2_args /* { 179 syscallarg(int) from; 180 syscallarg(int) to; 181 } */ *uap = v; 182 struct file *fp; 183 struct filedesc *fdp; 184 int old, new, i, error; 185 186 fdp = p->p_fd; 187 old = SCARG(uap, from); 188 new = SCARG(uap, to); 189 190 restart: 191 if ((fp = fd_getfile(fdp, old)) == NULL) 192 return (EBADF); 193 194 if ((u_int)new >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur || 195 (u_int)new >= maxfiles) 196 return (EBADF); 197 198 if (old == new) { 199 *retval = new; 200 return (0); 201 } 202 203 FILE_USE(fp); 204 205 if (new >= fdp->fd_nfiles) { 206 if ((error = fdalloc(p, new, &i)) != 0) { 207 if (error == ENOSPC) { 208 fdexpand(p); 209 FILE_UNUSE(fp, p); 210 goto restart; 211 } 212 FILE_UNUSE(fp, p); 213 return (error); 214 } 215 if (new != i) 216 panic("dup2: fdalloc"); 217 } 218 219 /* 220 * finishdup() will close the file that's in the `new' 221 * slot, if there's one there. 222 */ 223 224 /* finishdup() will unuse the descriptors for us */ 225 return (finishdup(p, old, new, retval)); 226 } 227 228 /* 229 * The file control system call. 230 */ 231 /* ARGSUSED */ 232 int 233 sys_fcntl(struct proc *p, void *v, register_t *retval) 234 { 235 struct sys_fcntl_args /* { 236 syscallarg(int) fd; 237 syscallarg(int) cmd; 238 syscallarg(void *) arg; 239 } */ *uap = v; 240 struct filedesc *fdp; 241 struct file *fp; 242 struct vnode *vp; 243 int fd, i, tmp, error, flg, cmd, newmin; 244 struct flock fl; 245 246 fd = SCARG(uap, fd); 247 fdp = p->p_fd; 248 error = 0; 249 flg = F_POSIX; 250 251 restart: 252 if ((fp = fd_getfile(fdp, fd)) == NULL) 253 return (EBADF); 254 255 FILE_USE(fp); 256 257 cmd = SCARG(uap, cmd); 258 if ((cmd & F_FSCTL)) { 259 error = fcntl_forfs(fd, p, cmd, SCARG(uap, arg)); 260 goto out; 261 } 262 263 switch (cmd) { 264 265 case F_DUPFD: 266 newmin = (long)SCARG(uap, arg); 267 if ((u_int)newmin >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur || 268 (u_int)newmin >= maxfiles) { 269 error = EINVAL; 270 goto out; 271 } 272 if ((error = fdalloc(p, newmin, &i)) != 0) { 273 if (error == ENOSPC) { 274 fdexpand(p); 275 FILE_UNUSE(fp, p); 276 goto restart; 277 } 278 goto out; 279 } 280 281 /* finishdup() will unuse the descriptors for us */ 282 return (finishdup(p, fd, i, retval)); 283 284 case F_GETFD: 285 *retval = fdp->fd_ofileflags[fd] & UF_EXCLOSE ? 1 : 0; 286 break; 287 288 case F_SETFD: 289 if ((long)SCARG(uap, arg) & 1) 290 fdp->fd_ofileflags[fd] |= UF_EXCLOSE; 291 else 292 fdp->fd_ofileflags[fd] &= ~UF_EXCLOSE; 293 break; 294 295 case F_GETFL: 296 *retval = OFLAGS(fp->f_flag); 297 break; 298 299 case F_SETFL: 300 tmp = FFLAGS((long)SCARG(uap, arg)) & FCNTLFLAGS; 301 error = (*fp->f_ops->fo_fcntl)(fp, F_SETFL, (caddr_t)&tmp, p); 302 if (error) 303 goto out; 304 fp->f_flag &= ~FCNTLFLAGS; 305 fp->f_flag |= tmp; 306 tmp = fp->f_flag & FNONBLOCK; 307 error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&tmp, p); 308 if (error) 309 goto out; 310 tmp = fp->f_flag & FASYNC; 311 error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, (caddr_t)&tmp, p); 312 if (error == 0) 313 goto out; 314 fp->f_flag &= ~FNONBLOCK; 315 tmp = 0; 316 (void) (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&tmp, p); 317 break; 318 319 case F_GETOWN: 320 if (fp->f_type == DTYPE_SOCKET) { 321 *retval = ((struct socket *)fp->f_data)->so_pgid; 322 goto out; 323 } 324 error = (*fp->f_ops->fo_ioctl) 325 (fp, TIOCGPGRP, (caddr_t)&tmp, p); 326 *retval = -tmp; 327 break; 328 329 case F_SETOWN: 330 if (fp->f_type == DTYPE_SOCKET) { 331 ((struct socket *)fp->f_data)->so_pgid = 332 (long)SCARG(uap, arg); 333 goto out; 334 } 335 if ((long)SCARG(uap, arg) <= 0) { 336 tmp = (-(long)SCARG(uap, arg)); 337 } else { 338 struct proc *p1 = pfind((long)SCARG(uap, arg)); 339 if (p1 == 0) { 340 error = ESRCH; 341 goto out; 342 } 343 tmp = (long)p1->p_pgrp->pg_id; 344 } 345 error = (*fp->f_ops->fo_ioctl) 346 (fp, TIOCSPGRP, (caddr_t)&tmp, p); 347 break; 348 349 case F_SETLKW: 350 flg |= F_WAIT; 351 /* Fall into F_SETLK */ 352 353 case F_SETLK: 354 if (fp->f_type != DTYPE_VNODE) { 355 error = EINVAL; 356 goto out; 357 } 358 vp = (struct vnode *)fp->f_data; 359 /* Copy in the lock structure */ 360 error = copyin((caddr_t)SCARG(uap, arg), (caddr_t)&fl, 361 sizeof(fl)); 362 if (error) 363 goto out; 364 if (fl.l_whence == SEEK_CUR) 365 fl.l_start += fp->f_offset; 366 switch (fl.l_type) { 367 case F_RDLCK: 368 if ((fp->f_flag & FREAD) == 0) { 369 error = EBADF; 370 goto out; 371 } 372 p->p_flag |= P_ADVLOCK; 373 error = VOP_ADVLOCK(vp, (caddr_t)p, F_SETLK, &fl, flg); 374 goto out; 375 376 case F_WRLCK: 377 if ((fp->f_flag & FWRITE) == 0) { 378 error = EBADF; 379 goto out; 380 } 381 p->p_flag |= P_ADVLOCK; 382 error = VOP_ADVLOCK(vp, (caddr_t)p, F_SETLK, &fl, flg); 383 goto out; 384 385 case F_UNLCK: 386 error = VOP_ADVLOCK(vp, (caddr_t)p, F_UNLCK, &fl, 387 F_POSIX); 388 goto out; 389 390 default: 391 error = EINVAL; 392 goto out; 393 } 394 395 case F_GETLK: 396 if (fp->f_type != DTYPE_VNODE) { 397 error = EINVAL; 398 goto out; 399 } 400 vp = (struct vnode *)fp->f_data; 401 /* Copy in the lock structure */ 402 error = copyin((caddr_t)SCARG(uap, arg), (caddr_t)&fl, 403 sizeof(fl)); 404 if (error) 405 goto out; 406 if (fl.l_whence == SEEK_CUR) 407 fl.l_start += fp->f_offset; 408 if (fl.l_type != F_RDLCK && 409 fl.l_type != F_WRLCK && 410 fl.l_type != F_UNLCK) { 411 error = EINVAL; 412 goto out; 413 } 414 error = VOP_ADVLOCK(vp, (caddr_t)p, F_GETLK, &fl, F_POSIX); 415 if (error) 416 goto out; 417 error = copyout((caddr_t)&fl, (caddr_t)SCARG(uap, arg), 418 sizeof(fl)); 419 break; 420 421 default: 422 error = EINVAL; 423 } 424 425 out: 426 FILE_UNUSE(fp, p); 427 return (error); 428 } 429 430 /* 431 * Common code for dup, dup2, and fcntl(F_DUPFD). 432 */ 433 int 434 finishdup(struct proc *p, int old, int new, register_t *retval) 435 { 436 struct filedesc *fdp; 437 struct file *fp, *delfp; 438 439 fdp = p->p_fd; 440 441 /* 442 * If there is a file in the new slot, remember it so we 443 * can close it after we've finished the dup. We need 444 * to do it after the dup is finished, since closing 445 * the file may block. 446 * 447 * Note: `old' is already used for us. 448 */ 449 delfp = fdp->fd_ofiles[new]; 450 451 fp = fdp->fd_ofiles[old]; 452 fdp->fd_ofiles[new] = fp; 453 fdp->fd_ofileflags[new] = fdp->fd_ofileflags[old] &~ UF_EXCLOSE; 454 fp->f_count++; 455 /* 456 * Note, don't have to mark it "used" in the table if there 457 * was already a file in the `new' slot. 458 */ 459 if (delfp == NULL) 460 fd_used(fdp, new); 461 *retval = new; 462 FILE_UNUSE(fp, p); 463 464 if (delfp != NULL) { 465 FILE_USE(delfp); 466 (void) closef(delfp, p); 467 } 468 return (0); 469 } 470 471 void 472 fdremove(struct filedesc *fdp, int fd) 473 { 474 475 fdp->fd_ofiles[fd] = NULL; 476 fd_unused(fdp, fd); 477 } 478 479 int 480 fdrelease(struct proc *p, int fd) 481 { 482 struct filedesc *fdp; 483 struct file **fpp, *fp; 484 485 fdp = p->p_fd; 486 fpp = &fdp->fd_ofiles[fd]; 487 fp = *fpp; 488 if (fp == NULL) 489 return (EBADF); 490 491 FILE_USE(fp); 492 493 *fpp = NULL; 494 fdp->fd_ofileflags[fd] = 0; 495 fd_unused(fdp, fd); 496 return (closef(fp, p)); 497 } 498 499 /* 500 * Close a file descriptor. 501 */ 502 /* ARGSUSED */ 503 int 504 sys_close(struct proc *p, void *v, register_t *retval) 505 { 506 struct sys_close_args /* { 507 syscallarg(int) fd; 508 } */ *uap = v; 509 int fd; 510 struct filedesc *fdp; 511 struct file *fp; 512 513 fd = SCARG(uap, fd); 514 fdp = p->p_fd; 515 516 if ((fp = fd_getfile(fdp, fd)) == NULL) 517 return (EBADF); 518 519 return (fdrelease(p, fd)); 520 } 521 522 /* 523 * Return status information about a file descriptor. 524 */ 525 /* ARGSUSED */ 526 int 527 sys___fstat13(struct proc *p, void *v, register_t *retval) 528 { 529 struct sys___fstat13_args /* { 530 syscallarg(int) fd; 531 syscallarg(struct stat *) sb; 532 } */ *uap = v; 533 int fd; 534 struct filedesc *fdp; 535 struct file *fp; 536 struct stat ub; 537 int error; 538 539 fd = SCARG(uap, fd); 540 fdp = p->p_fd; 541 542 if ((fp = fd_getfile(fdp, fd)) == NULL) 543 return (EBADF); 544 545 FILE_USE(fp); 546 error = (*fp->f_ops->fo_stat)(fp, &ub, p); 547 FILE_UNUSE(fp, p); 548 549 if (error == 0) 550 error = copyout(&ub, SCARG(uap, sb), sizeof(ub)); 551 552 return (error); 553 } 554 555 /* 556 * Return pathconf information about a file descriptor. 557 */ 558 /* ARGSUSED */ 559 int 560 sys_fpathconf(struct proc *p, void *v, register_t *retval) 561 { 562 struct sys_fpathconf_args /* { 563 syscallarg(int) fd; 564 syscallarg(int) name; 565 } */ *uap = v; 566 int fd; 567 struct filedesc *fdp; 568 struct file *fp; 569 struct vnode *vp; 570 int error; 571 572 fd = SCARG(uap, fd); 573 fdp = p->p_fd; 574 error = 0; 575 576 if ((fp = fd_getfile(fdp, fd)) == NULL) 577 return (EBADF); 578 579 FILE_USE(fp); 580 581 switch (fp->f_type) { 582 583 case DTYPE_SOCKET: 584 case DTYPE_PIPE: 585 if (SCARG(uap, name) != _PC_PIPE_BUF) 586 error = EINVAL; 587 else 588 *retval = PIPE_BUF; 589 break; 590 591 case DTYPE_VNODE: 592 vp = (struct vnode *)fp->f_data; 593 error = VOP_PATHCONF(vp, SCARG(uap, name), retval); 594 break; 595 596 default: 597 error = EOPNOTSUPP; 598 break; 599 } 600 601 FILE_UNUSE(fp, p); 602 return (error); 603 } 604 605 /* 606 * Allocate a file descriptor for the process. 607 */ 608 int fdexpanded; /* XXX: what else uses this? */ 609 610 int 611 fdalloc(struct proc *p, int want, int *result) 612 { 613 struct filedesc *fdp; 614 int i, lim, last; 615 616 fdp = p->p_fd; 617 618 /* 619 * Search for a free descriptor starting at the higher 620 * of want or fd_freefile. If that fails, consider 621 * expanding the ofile array. 622 */ 623 lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfiles); 624 last = min(fdp->fd_nfiles, lim); 625 if ((i = want) < fdp->fd_freefile) 626 i = fdp->fd_freefile; 627 for (; i < last; i++) { 628 if (fdp->fd_ofiles[i] == NULL) { 629 fd_used(fdp, i); 630 if (want <= fdp->fd_freefile) 631 fdp->fd_freefile = i; 632 *result = i; 633 return (0); 634 } 635 } 636 637 /* No space in current array. Expand? */ 638 if (fdp->fd_nfiles >= lim) 639 return (EMFILE); 640 641 /* Let the caller do it. */ 642 return (ENOSPC); 643 } 644 645 void 646 fdexpand(struct proc *p) 647 { 648 struct filedesc *fdp; 649 int i, nfiles; 650 struct file **newofile; 651 char *newofileflags; 652 653 fdp = p->p_fd; 654 655 if (fdp->fd_nfiles < NDEXTENT) 656 nfiles = NDEXTENT; 657 else 658 nfiles = 2 * fdp->fd_nfiles; 659 newofile = malloc(nfiles * OFILESIZE, M_FILEDESC, M_WAITOK); 660 newofileflags = (char *) &newofile[nfiles]; 661 /* 662 * Copy the existing ofile and ofileflags arrays 663 * and zero the new portion of each array. 664 */ 665 memcpy(newofile, fdp->fd_ofiles, 666 (i = sizeof(struct file *) * fdp->fd_nfiles)); 667 memset((char *)newofile + i, 0, 668 nfiles * sizeof(struct file *) - i); 669 memcpy(newofileflags, fdp->fd_ofileflags, 670 (i = sizeof(char) * fdp->fd_nfiles)); 671 memset(newofileflags + i, 0, nfiles * sizeof(char) - i); 672 if (fdp->fd_nfiles > NDFILE) 673 free(fdp->fd_ofiles, M_FILEDESC); 674 fdp->fd_ofiles = newofile; 675 fdp->fd_ofileflags = newofileflags; 676 fdp->fd_nfiles = nfiles; 677 fdexpanded++; 678 } 679 680 /* 681 * Check to see whether n user file descriptors 682 * are available to the process p. 683 */ 684 int 685 fdavail(struct proc *p, int n) 686 { 687 struct filedesc *fdp; 688 struct file **fpp; 689 int i, lim; 690 691 fdp = p->p_fd; 692 lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfiles); 693 if ((i = lim - fdp->fd_nfiles) > 0 && (n -= i) <= 0) 694 return (1); 695 fpp = &fdp->fd_ofiles[fdp->fd_freefile]; 696 for (i = min(lim,fdp->fd_nfiles) - fdp->fd_freefile; --i >= 0; fpp++) 697 if (*fpp == NULL && --n <= 0) 698 return (1); 699 return (0); 700 } 701 702 /* 703 * Initialize the data structures necessary for managing files. 704 */ 705 void 706 finit(void) 707 { 708 709 pool_init(&file_pool, sizeof(struct file), 0, 0, 0, "filepl", 710 &pool_allocator_nointr); 711 pool_init(&cwdi_pool, sizeof(struct cwdinfo), 0, 0, 0, "cwdipl", 712 &pool_allocator_nointr); 713 pool_init(&filedesc0_pool, sizeof(struct filedesc0), 0, 0, 0, "fdescpl", 714 &pool_allocator_nointr); 715 } 716 717 /* 718 * Create a new open file structure and allocate 719 * a file decriptor for the process that refers to it. 720 */ 721 int 722 falloc(struct proc *p, struct file **resultfp, int *resultfd) 723 { 724 struct file *fp, *fq; 725 int error, i; 726 727 restart: 728 if ((error = fdalloc(p, 0, &i)) != 0) { 729 if (error == ENOSPC) { 730 fdexpand(p); 731 goto restart; 732 } 733 return (error); 734 } 735 if (nfiles >= maxfiles) { 736 tablefull("file", "increase kern.maxfiles or MAXFILES"); 737 return (ENFILE); 738 } 739 /* 740 * Allocate a new file descriptor. 741 * If the process has file descriptor zero open, add to the list 742 * of open files at that point, otherwise put it at the front of 743 * the list of open files. 744 */ 745 nfiles++; 746 fp = pool_get(&file_pool, PR_WAITOK); 747 memset(fp, 0, sizeof(struct file)); 748 fp->f_iflags = FIF_LARVAL; 749 if ((fq = p->p_fd->fd_ofiles[0]) != NULL) { 750 LIST_INSERT_AFTER(fq, fp, f_list); 751 } else { 752 LIST_INSERT_HEAD(&filehead, fp, f_list); 753 } 754 p->p_fd->fd_ofiles[i] = fp; 755 fp->f_count = 1; 756 fp->f_cred = p->p_ucred; 757 crhold(fp->f_cred); 758 if (resultfp) { 759 FILE_USE(fp); 760 *resultfp = fp; 761 } 762 if (resultfd) 763 *resultfd = i; 764 return (0); 765 } 766 767 /* 768 * Free a file descriptor. 769 */ 770 void 771 ffree(struct file *fp) 772 { 773 774 #ifdef DIAGNOSTIC 775 if (fp->f_usecount) 776 panic("ffree"); 777 #endif 778 779 LIST_REMOVE(fp, f_list); 780 crfree(fp->f_cred); 781 #ifdef DIAGNOSTIC 782 fp->f_count = 0; 783 #endif 784 nfiles--; 785 pool_put(&file_pool, fp); 786 } 787 788 /* 789 * Create an initial cwdinfo structure, using the same current and root 790 * directories as p. 791 */ 792 struct cwdinfo * 793 cwdinit(struct proc *p) 794 { 795 struct cwdinfo *cwdi; 796 797 cwdi = pool_get(&cwdi_pool, PR_WAITOK); 798 799 cwdi->cwdi_cdir = p->p_cwdi->cwdi_cdir; 800 if (cwdi->cwdi_cdir) 801 VREF(cwdi->cwdi_cdir); 802 cwdi->cwdi_rdir = p->p_cwdi->cwdi_rdir; 803 if (cwdi->cwdi_rdir) 804 VREF(cwdi->cwdi_rdir); 805 cwdi->cwdi_cmask = p->p_cwdi->cwdi_cmask; 806 cwdi->cwdi_refcnt = 1; 807 808 return (cwdi); 809 } 810 811 /* 812 * Make p2 share p1's cwdinfo. 813 */ 814 void 815 cwdshare(struct proc *p1, struct proc *p2) 816 { 817 818 p2->p_cwdi = p1->p_cwdi; 819 p1->p_cwdi->cwdi_refcnt++; 820 } 821 822 /* 823 * Make this process not share its cwdinfo structure, maintaining 824 * all cwdinfo state. 825 */ 826 void 827 cwdunshare(struct proc *p) 828 { 829 struct cwdinfo *newcwdi; 830 831 if (p->p_cwdi->cwdi_refcnt == 1) 832 return; 833 834 newcwdi = cwdinit(p); 835 cwdfree(p); 836 p->p_cwdi = newcwdi; 837 } 838 839 /* 840 * Release a cwdinfo structure. 841 */ 842 void 843 cwdfree(struct proc *p) 844 { 845 struct cwdinfo *cwdi; 846 847 cwdi = p->p_cwdi; 848 if (--cwdi->cwdi_refcnt > 0) 849 return; 850 851 p->p_cwdi = NULL; 852 853 vrele(cwdi->cwdi_cdir); 854 if (cwdi->cwdi_rdir) 855 vrele(cwdi->cwdi_rdir); 856 pool_put(&cwdi_pool, cwdi); 857 } 858 859 /* 860 * Create an initial filedesc structure, using the same current and root 861 * directories as p. 862 */ 863 struct filedesc * 864 fdinit(struct proc *p) 865 { 866 struct filedesc0 *newfdp; 867 868 newfdp = pool_get(&filedesc0_pool, PR_WAITOK); 869 memset(newfdp, 0, sizeof(struct filedesc0)); 870 871 fdinit1(newfdp); 872 873 return (&newfdp->fd_fd); 874 } 875 876 /* 877 * Initialize a file descriptor table. 878 */ 879 void 880 fdinit1(struct filedesc0 *newfdp) 881 { 882 883 newfdp->fd_fd.fd_refcnt = 1; 884 newfdp->fd_fd.fd_ofiles = newfdp->fd_dfiles; 885 newfdp->fd_fd.fd_ofileflags = newfdp->fd_dfileflags; 886 newfdp->fd_fd.fd_nfiles = NDFILE; 887 } 888 889 /* 890 * Make p2 share p1's filedesc structure. 891 */ 892 void 893 fdshare(struct proc *p1, struct proc *p2) 894 { 895 896 p2->p_fd = p1->p_fd; 897 p1->p_fd->fd_refcnt++; 898 } 899 900 /* 901 * Make this process not share its filedesc structure, maintaining 902 * all file descriptor state. 903 */ 904 void 905 fdunshare(struct proc *p) 906 { 907 struct filedesc *newfd; 908 909 if (p->p_fd->fd_refcnt == 1) 910 return; 911 912 newfd = fdcopy(p); 913 fdfree(p); 914 p->p_fd = newfd; 915 } 916 917 /* 918 * Clear a process's fd table. 919 */ 920 void 921 fdclear(struct proc *p) 922 { 923 struct filedesc *newfd; 924 925 newfd = fdinit(p); 926 fdfree(p); 927 p->p_fd = newfd; 928 } 929 930 /* 931 * Copy a filedesc structure. 932 */ 933 struct filedesc * 934 fdcopy(struct proc *p) 935 { 936 struct filedesc *newfdp, *fdp; 937 struct file **fpp; 938 int i; 939 940 fdp = p->p_fd; 941 newfdp = pool_get(&filedesc0_pool, PR_WAITOK); 942 memcpy(newfdp, fdp, sizeof(struct filedesc)); 943 newfdp->fd_refcnt = 1; 944 945 /* 946 * If the number of open files fits in the internal arrays 947 * of the open file structure, use them, otherwise allocate 948 * additional memory for the number of descriptors currently 949 * in use. 950 */ 951 if (newfdp->fd_lastfile < NDFILE) { 952 newfdp->fd_ofiles = ((struct filedesc0 *) newfdp)->fd_dfiles; 953 newfdp->fd_ofileflags = 954 ((struct filedesc0 *) newfdp)->fd_dfileflags; 955 i = NDFILE; 956 } else { 957 /* 958 * Compute the smallest multiple of NDEXTENT needed 959 * for the file descriptors currently in use, 960 * allowing the table to shrink. 961 */ 962 i = newfdp->fd_nfiles; 963 while (i >= 2 * NDEXTENT && i > newfdp->fd_lastfile * 2) 964 i /= 2; 965 newfdp->fd_ofiles = malloc(i * OFILESIZE, M_FILEDESC, M_WAITOK); 966 newfdp->fd_ofileflags = (char *) &newfdp->fd_ofiles[i]; 967 } 968 newfdp->fd_nfiles = i; 969 memcpy(newfdp->fd_ofiles, fdp->fd_ofiles, i * sizeof(struct file **)); 970 memcpy(newfdp->fd_ofileflags, fdp->fd_ofileflags, i * sizeof(char)); 971 fpp = newfdp->fd_ofiles; 972 for (i = newfdp->fd_lastfile; i >= 0; i--, fpp++) 973 if (*fpp != NULL) 974 (*fpp)->f_count++; 975 return (newfdp); 976 } 977 978 /* 979 * Release a filedesc structure. 980 */ 981 void 982 fdfree(struct proc *p) 983 { 984 struct filedesc *fdp; 985 struct file **fpp, *fp; 986 int i; 987 988 fdp = p->p_fd; 989 if (--fdp->fd_refcnt > 0) 990 return; 991 fpp = fdp->fd_ofiles; 992 for (i = fdp->fd_lastfile; i >= 0; i--, fpp++) { 993 fp = *fpp; 994 if (fp != NULL) { 995 *fpp = NULL; 996 FILE_USE(fp); 997 (void) closef(fp, p); 998 } 999 } 1000 p->p_fd = NULL; 1001 if (fdp->fd_nfiles > NDFILE) 1002 free(fdp->fd_ofiles, M_FILEDESC); 1003 pool_put(&filedesc0_pool, fdp); 1004 } 1005 1006 /* 1007 * Internal form of close. 1008 * Decrement reference count on file structure. 1009 * Note: p may be NULL when closing a file 1010 * that was being passed in a message. 1011 * 1012 * Note: we expect the caller is holding a usecount, and expects us 1013 * to drop it (the caller thinks the file is going away forever). 1014 */ 1015 int 1016 closef(struct file *fp, struct proc *p) 1017 { 1018 struct vnode *vp; 1019 struct flock lf; 1020 int error; 1021 1022 if (fp == NULL) 1023 return (0); 1024 1025 /* 1026 * POSIX record locking dictates that any close releases ALL 1027 * locks owned by this process. This is handled by setting 1028 * a flag in the unlock to free ONLY locks obeying POSIX 1029 * semantics, and not to free BSD-style file locks. 1030 * If the descriptor was in a message, POSIX-style locks 1031 * aren't passed with the descriptor. 1032 */ 1033 if (p && (p->p_flag & P_ADVLOCK) && fp->f_type == DTYPE_VNODE) { 1034 lf.l_whence = SEEK_SET; 1035 lf.l_start = 0; 1036 lf.l_len = 0; 1037 lf.l_type = F_UNLCK; 1038 vp = (struct vnode *)fp->f_data; 1039 (void) VOP_ADVLOCK(vp, (caddr_t)p, F_UNLCK, &lf, F_POSIX); 1040 } 1041 1042 /* 1043 * If WANTCLOSE is set, then the reference count on the file 1044 * is 0, but there were multiple users of the file. This can 1045 * happen if a filedesc structure is shared by multiple 1046 * processes. 1047 */ 1048 if (fp->f_iflags & FIF_WANTCLOSE) { 1049 /* 1050 * Another user of the file is already closing, and is 1051 * simply waiting for other users of the file to drain. 1052 * Release our usecount, and wake up the closer if it 1053 * is the only remaining use. 1054 */ 1055 #ifdef DIAGNOSTIC 1056 if (fp->f_count != 0) 1057 panic("closef: wantclose and count != 0"); 1058 if (fp->f_usecount < 2) 1059 panic("closef: wantclose and usecount < 2"); 1060 #endif 1061 if (--fp->f_usecount == 1) 1062 wakeup(&fp->f_usecount); 1063 return (0); 1064 } else { 1065 /* 1066 * Decrement the reference count. If we were not the 1067 * last reference, then release our use and just 1068 * return. 1069 */ 1070 if (--fp->f_count > 0) { 1071 #ifdef DIAGNOSTIC 1072 if (fp->f_usecount < 1) 1073 panic("closef: no wantclose and usecount < 1"); 1074 #endif 1075 fp->f_usecount--; 1076 return (0); 1077 } 1078 } 1079 1080 /* 1081 * The reference count is now 0. However, there may be 1082 * multiple potential users of this file. This can happen 1083 * if multiple processes shared a single filedesc structure. 1084 * 1085 * Notify these potential users that the file is closing. 1086 * This will prevent them from adding additional uses to 1087 * the file. 1088 */ 1089 fp->f_iflags |= FIF_WANTCLOSE; 1090 1091 /* 1092 * We expect the caller to add a use to the file. So, if we 1093 * are the last user, usecount will be 1. If it is not, we 1094 * must wait for the usecount to drain. When it drains back 1095 * to 1, we will be awakened so that we may proceed with the 1096 * close. 1097 */ 1098 #ifdef DIAGNOSTIC 1099 if (fp->f_usecount < 1) 1100 panic("closef: usecount < 1"); 1101 #endif 1102 while (fp->f_usecount > 1) 1103 (void) tsleep(&fp->f_usecount, PRIBIO, "closef", 0); 1104 #ifdef DIAGNOSTIC 1105 if (fp->f_usecount != 1) 1106 panic("closef: usecount != 1"); 1107 #endif 1108 1109 if ((fp->f_flag & FHASLOCK) && fp->f_type == DTYPE_VNODE) { 1110 lf.l_whence = SEEK_SET; 1111 lf.l_start = 0; 1112 lf.l_len = 0; 1113 lf.l_type = F_UNLCK; 1114 vp = (struct vnode *)fp->f_data; 1115 (void) VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK); 1116 } 1117 if (fp->f_ops) 1118 error = (*fp->f_ops->fo_close)(fp, p); 1119 else 1120 error = 0; 1121 1122 /* Nothing references the file now, drop the final use (us). */ 1123 fp->f_usecount--; 1124 1125 ffree(fp); 1126 return (error); 1127 } 1128 1129 /* 1130 * Apply an advisory lock on a file descriptor. 1131 * 1132 * Just attempt to get a record lock of the requested type on 1133 * the entire file (l_whence = SEEK_SET, l_start = 0, l_len = 0). 1134 */ 1135 /* ARGSUSED */ 1136 int 1137 sys_flock(struct proc *p, void *v, register_t *retval) 1138 { 1139 struct sys_flock_args /* { 1140 syscallarg(int) fd; 1141 syscallarg(int) how; 1142 } */ *uap = v; 1143 int fd, how, error; 1144 struct filedesc *fdp; 1145 struct file *fp; 1146 struct vnode *vp; 1147 struct flock lf; 1148 1149 fd = SCARG(uap, fd); 1150 how = SCARG(uap, how); 1151 fdp = p->p_fd; 1152 error = 0; 1153 1154 if ((fp = fd_getfile(fdp, fd)) == NULL) 1155 return (EBADF); 1156 1157 FILE_USE(fp); 1158 1159 if (fp->f_type != DTYPE_VNODE) { 1160 error = EOPNOTSUPP; 1161 goto out; 1162 } 1163 1164 vp = (struct vnode *)fp->f_data; 1165 lf.l_whence = SEEK_SET; 1166 lf.l_start = 0; 1167 lf.l_len = 0; 1168 if (how & LOCK_UN) { 1169 lf.l_type = F_UNLCK; 1170 fp->f_flag &= ~FHASLOCK; 1171 error = VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK); 1172 goto out; 1173 } 1174 if (how & LOCK_EX) 1175 lf.l_type = F_WRLCK; 1176 else if (how & LOCK_SH) 1177 lf.l_type = F_RDLCK; 1178 else { 1179 error = EINVAL; 1180 goto out; 1181 } 1182 fp->f_flag |= FHASLOCK; 1183 if (how & LOCK_NB) 1184 error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, F_FLOCK); 1185 else 1186 error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, 1187 F_FLOCK|F_WAIT); 1188 out: 1189 FILE_UNUSE(fp, p); 1190 return (error); 1191 } 1192 1193 /* 1194 * File Descriptor pseudo-device driver (/dev/fd/). 1195 * 1196 * Opening minor device N dup()s the file (if any) connected to file 1197 * descriptor N belonging to the calling process. Note that this driver 1198 * consists of only the ``open()'' routine, because all subsequent 1199 * references to this file will be direct to the other driver. 1200 */ 1201 /* ARGSUSED */ 1202 int 1203 filedescopen(dev_t dev, int mode, int type, struct proc *p) 1204 { 1205 1206 /* 1207 * XXX Kludge: set p->p_dupfd to contain the value of the 1208 * the file descriptor being sought for duplication. The error 1209 * return ensures that the vnode for this device will be released 1210 * by vn_open. Open will detect this special error and take the 1211 * actions in dupfdopen below. Other callers of vn_open or VOP_OPEN 1212 * will simply report the error. 1213 */ 1214 p->p_dupfd = minor(dev); 1215 return (ENODEV); 1216 } 1217 1218 /* 1219 * Duplicate the specified descriptor to a free descriptor. 1220 */ 1221 int 1222 dupfdopen(struct proc *p, int indx, int dfd, int mode, int error) 1223 { 1224 struct filedesc *fdp; 1225 struct file *wfp, *fp; 1226 1227 fdp = p->p_fd; 1228 /* 1229 * If the to-be-dup'd fd number is greater than the allowed number 1230 * of file descriptors, or the fd to be dup'd has already been 1231 * closed, reject. Note, check for new == old is necessary as 1232 * falloc could allocate an already closed to-be-dup'd descriptor 1233 * as the new descriptor. 1234 */ 1235 fp = fdp->fd_ofiles[indx]; 1236 1237 if ((wfp = fd_getfile(fdp, dfd)) == NULL) 1238 return (EBADF); 1239 1240 if (fp == wfp) 1241 return (EBADF); 1242 1243 FILE_USE(wfp); 1244 1245 /* 1246 * There are two cases of interest here. 1247 * 1248 * For ENODEV simply dup (dfd) to file descriptor 1249 * (indx) and return. 1250 * 1251 * For ENXIO steal away the file structure from (dfd) and 1252 * store it in (indx). (dfd) is effectively closed by 1253 * this operation. 1254 * 1255 * Any other error code is just returned. 1256 */ 1257 switch (error) { 1258 case ENODEV: 1259 /* 1260 * Check that the mode the file is being opened for is a 1261 * subset of the mode of the existing descriptor. 1262 */ 1263 if (((mode & (FREAD|FWRITE)) | wfp->f_flag) != wfp->f_flag) { 1264 FILE_UNUSE(wfp, p); 1265 return (EACCES); 1266 } 1267 fdp->fd_ofiles[indx] = wfp; 1268 fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd]; 1269 wfp->f_count++; 1270 fd_used(fdp, indx); 1271 FILE_UNUSE(wfp, p); 1272 return (0); 1273 1274 case ENXIO: 1275 /* 1276 * Steal away the file pointer from dfd, and stuff it into indx. 1277 */ 1278 fdp->fd_ofiles[indx] = fdp->fd_ofiles[dfd]; 1279 fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd]; 1280 fdp->fd_ofiles[dfd] = NULL; 1281 fdp->fd_ofileflags[dfd] = 0; 1282 /* 1283 * Complete the clean up of the filedesc structure by 1284 * recomputing the various hints. 1285 */ 1286 fd_used(fdp, indx); 1287 fd_unused(fdp, dfd); 1288 FILE_UNUSE(wfp, p); 1289 return (0); 1290 1291 default: 1292 FILE_UNUSE(wfp, p); 1293 return (error); 1294 } 1295 /* NOTREACHED */ 1296 } 1297 1298 /* 1299 * fcntl call which is being passed to the file's fs. 1300 */ 1301 int 1302 fcntl_forfs(int fd, struct proc *p, int cmd, void *arg) 1303 { 1304 struct file *fp; 1305 struct filedesc *fdp; 1306 int error; 1307 u_int size; 1308 caddr_t data, memp; 1309 #define STK_PARAMS 128 1310 char stkbuf[STK_PARAMS]; 1311 1312 /* fd's value was validated in sys_fcntl before calling this routine */ 1313 fdp = p->p_fd; 1314 fp = fdp->fd_ofiles[fd]; 1315 1316 if ((fp->f_flag & (FREAD | FWRITE)) == 0) 1317 return (EBADF); 1318 1319 /* 1320 * Interpret high order word to find amount of data to be 1321 * copied to/from the user's address space. 1322 */ 1323 size = (size_t)F_PARAM_LEN(cmd); 1324 if (size > F_PARAM_MAX) 1325 return (EINVAL); 1326 memp = NULL; 1327 if (size > sizeof(stkbuf)) { 1328 memp = (caddr_t)malloc((u_long)size, M_IOCTLOPS, M_WAITOK); 1329 data = memp; 1330 } else 1331 data = stkbuf; 1332 if (cmd & F_FSIN) { 1333 if (size) { 1334 error = copyin(arg, data, size); 1335 if (error) { 1336 if (memp) 1337 free(memp, M_IOCTLOPS); 1338 return (error); 1339 } 1340 } else 1341 *(caddr_t *)data = arg; 1342 } else if ((cmd & F_FSOUT) && size) 1343 /* 1344 * Zero the buffer so the user always 1345 * gets back something deterministic. 1346 */ 1347 memset(data, 0, size); 1348 else if (cmd & F_FSVOID) 1349 *(caddr_t *)data = arg; 1350 1351 1352 error = (*fp->f_ops->fo_fcntl)(fp, cmd, data, p); 1353 1354 /* 1355 * Copy any data to user, size was 1356 * already set and checked above. 1357 */ 1358 if (error == 0 && (cmd & F_FSOUT) && size) 1359 error = copyout(data, arg, size); 1360 if (memp) 1361 free(memp, M_IOCTLOPS); 1362 return (error); 1363 } 1364 1365 /* 1366 * Close any files on exec? 1367 */ 1368 void 1369 fdcloseexec(struct proc *p) 1370 { 1371 struct filedesc *fdp; 1372 int fd; 1373 1374 fdunshare(p); 1375 cwdunshare(p); 1376 1377 fdp = p->p_fd; 1378 for (fd = 0; fd <= fdp->fd_lastfile; fd++) 1379 if (fdp->fd_ofileflags[fd] & UF_EXCLOSE) 1380 (void) fdrelease(p, fd); 1381 } 1382 1383 /* 1384 * It is unsafe for set[ug]id processes to be started with file 1385 * descriptors 0..2 closed, as these descriptors are given implicit 1386 * significance in the Standard C library. fdcheckstd() will create a 1387 * descriptor referencing /dev/null for each of stdin, stdout, and 1388 * stderr that is not already open. 1389 */ 1390 #define CHECK_UPTO 3 1391 int 1392 fdcheckstd(p) 1393 struct proc *p; 1394 { 1395 struct nameidata nd; 1396 struct filedesc *fdp; 1397 struct file *fp; 1398 struct file *devnullfp; 1399 struct proc *pp; 1400 register_t retval; 1401 int fd, i, error, flags = FREAD|FWRITE, devnull = -1; 1402 char closed[CHECK_UPTO * 3 + 1], which[3 + 1]; 1403 1404 closed[0] = '\0'; 1405 if ((fdp = p->p_fd) == NULL) 1406 return (0); 1407 for (i = 0; i < CHECK_UPTO; i++) { 1408 if (fdp->fd_ofiles[i] != NULL) 1409 continue; 1410 snprintf(which, sizeof(which), ",%d", i); 1411 strcat(closed, which); 1412 if (devnull < 0) { 1413 if ((error = falloc(p, &fp, &fd)) != 0) 1414 return (error); 1415 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, "/dev/null", 1416 p); 1417 if ((error = vn_open(&nd, flags, 0)) != 0) { 1418 FILE_UNUSE(fp, p); 1419 ffree(fp); 1420 fdremove(p->p_fd, fd); 1421 return (error); 1422 } 1423 fp->f_data = (caddr_t)nd.ni_vp; 1424 fp->f_flag = flags; 1425 fp->f_ops = &vnops; 1426 fp->f_type = DTYPE_VNODE; 1427 VOP_UNLOCK(nd.ni_vp, 0); 1428 devnull = fd; 1429 devnullfp = fp; 1430 FILE_SET_MATURE(fp); 1431 FILE_UNUSE(fp, p); 1432 } else { 1433 restart: 1434 if ((error = fdalloc(p, 0, &fd)) != 0) { 1435 if (error == ENOSPC) { 1436 fdexpand(p); 1437 goto restart; 1438 } 1439 return (error); 1440 } 1441 1442 FILE_USE(devnullfp); 1443 /* finishdup() will unuse the descriptors for us */ 1444 if ((error = finishdup(p, devnull, fd, &retval)) != 0) 1445 return (error); 1446 } 1447 } 1448 if (closed[0] != '\0') { 1449 pp = p->p_pptr; 1450 log(LOG_WARNING, "set{u,g}id pid %d (%s) " 1451 "was invoked by uid %d ppid %d (%s) " 1452 "with fd %s closed\n", 1453 p->p_pid, p->p_comm, pp->p_ucred->cr_uid, 1454 pp->p_pid, pp->p_comm, &closed[1]); 1455 } 1456 return (0); 1457 } 1458 #undef CHECK_UPTO 1459