1 /* $NetBSD: kern_descrip.c,v 1.165 2007/12/08 19:29:47 pooka Exp $ */ 2 3 /* 4 * Copyright (c) 1982, 1986, 1989, 1991, 1993 5 * The Regents of the University of California. All rights reserved. 6 * (c) UNIX System Laboratories, Inc. 7 * All or some portions of this file are derived from material licensed 8 * to the University of California by American Telephone and Telegraph 9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 10 * the permission of UNIX System Laboratories, Inc. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 3. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * @(#)kern_descrip.c 8.8 (Berkeley) 2/14/95 37 */ 38 39 #include <sys/cdefs.h> 40 __KERNEL_RCSID(0, "$NetBSD: kern_descrip.c,v 1.165 2007/12/08 19:29:47 pooka Exp $"); 41 42 #include <sys/param.h> 43 #include <sys/systm.h> 44 #include <sys/filedesc.h> 45 #include <sys/kernel.h> 46 #include <sys/vnode.h> 47 #include <sys/proc.h> 48 #include <sys/file.h> 49 #include <sys/namei.h> 50 #include <sys/socket.h> 51 #include <sys/socketvar.h> 52 #include <sys/stat.h> 53 #include <sys/ioctl.h> 54 #include <sys/fcntl.h> 55 #include <sys/malloc.h> 56 #include <sys/pool.h> 57 #include <sys/syslog.h> 58 #include <sys/unistd.h> 59 #include <sys/resourcevar.h> 60 #include <sys/conf.h> 61 #include <sys/event.h> 62 #include <sys/kauth.h> 63 #include <sys/atomic.h> 64 65 #include <sys/mount.h> 66 #include <sys/syscallargs.h> 67 68 static int cwdi_ctor(void *, void *, int); 69 static void cwdi_dtor(void *, void *); 70 71 /* 72 * Descriptor management. 73 */ 74 struct filelist filehead; /* head of list of open files */ 75 int nfiles; /* actual number of open files */ 76 77 static pool_cache_t cwdi_cache; 78 static pool_cache_t filedesc0_cache; 79 static pool_cache_t file_cache; 80 81 /* Global file list lock */ 82 kmutex_t filelist_lock; 83 84 MALLOC_DEFINE(M_FILE, "file", "Open file structure"); 85 MALLOC_DEFINE(M_FILEDESC, "file desc", "Open file descriptor table"); 86 MALLOC_DEFINE(M_IOCTLOPS, "ioctlops", "ioctl data buffer"); 87 88 static inline int 89 find_next_zero(uint32_t *bitmap, int want, u_int bits) 90 { 91 int i, off, maxoff; 92 uint32_t sub; 93 94 if (want > bits) 95 return -1; 96 97 off = want >> NDENTRYSHIFT; 98 i = want & NDENTRYMASK; 99 if (i) { 100 sub = bitmap[off] | ((u_int)~0 >> (NDENTRIES - i)); 101 if (sub != ~0) 102 goto found; 103 off++; 104 } 105 106 maxoff = NDLOSLOTS(bits); 107 while (off < maxoff) { 108 if ((sub = bitmap[off]) != ~0) 109 goto found; 110 off++; 111 } 112 113 return (-1); 114 115 found: 116 return (off << NDENTRYSHIFT) + ffs(~sub) - 1; 117 } 118 119 static int 120 find_last_set(struct filedesc *fd, int last) 121 { 122 int off, i; 123 struct file **ofiles = fd->fd_ofiles; 124 uint32_t *bitmap = fd->fd_lomap; 125 126 off = (last - 1) >> NDENTRYSHIFT; 127 128 while (off >= 0 && !bitmap[off]) 129 off--; 130 131 if (off < 0) 132 return (-1); 133 134 i = ((off + 1) << NDENTRYSHIFT) - 1; 135 if (i >= last) 136 i = last - 1; 137 138 while (i > 0 && ofiles[i] == NULL) 139 i--; 140 141 return (i); 142 } 143 144 static inline void 145 fd_used(struct filedesc *fdp, int fd) 146 { 147 u_int off = fd >> NDENTRYSHIFT; 148 149 KASSERT(rw_write_held(&fdp->fd_lock)); 150 KDASSERT((fdp->fd_lomap[off] & (1 << (fd & NDENTRYMASK))) == 0); 151 152 fdp->fd_lomap[off] |= 1 << (fd & NDENTRYMASK); 153 if (fdp->fd_lomap[off] == ~0) { 154 KDASSERT((fdp->fd_himap[off >> NDENTRYSHIFT] & 155 (1 << (off & NDENTRYMASK))) == 0); 156 fdp->fd_himap[off >> NDENTRYSHIFT] |= 1 << (off & NDENTRYMASK); 157 } 158 159 if (fd > fdp->fd_lastfile) 160 fdp->fd_lastfile = fd; 161 } 162 163 static inline void 164 fd_unused(struct filedesc *fdp, int fd) 165 { 166 u_int off = fd >> NDENTRYSHIFT; 167 168 KASSERT(rw_write_held(&fdp->fd_lock)); 169 if (fd < fdp->fd_freefile) 170 fdp->fd_freefile = fd; 171 172 if (fdp->fd_lomap[off] == ~0) { 173 KDASSERT((fdp->fd_himap[off >> NDENTRYSHIFT] & 174 (1 << (off & NDENTRYMASK))) != 0); 175 fdp->fd_himap[off >> NDENTRYSHIFT] &= 176 ~(1 << (off & NDENTRYMASK)); 177 } 178 KDASSERT((fdp->fd_lomap[off] & (1 << (fd & NDENTRYMASK))) != 0); 179 fdp->fd_lomap[off] &= ~(1 << (fd & NDENTRYMASK)); 180 181 #ifdef DIAGNOSTIC 182 if (fd > fdp->fd_lastfile) 183 panic("fd_unused: fd_lastfile inconsistent"); 184 #endif 185 if (fd == fdp->fd_lastfile) 186 fdp->fd_lastfile = find_last_set(fdp, fd); 187 } 188 189 /* 190 * Lookup the file structure corresponding to a file descriptor 191 * and return it locked. 192 * Note: typical usage is: `fp = fd_getfile(..); FILE_USE(fp);' 193 * The locking strategy has been optimised for this case, i.e. 194 * fd_getfile() returns the file locked while FILE_USE() will increment 195 * the file's use count and unlock. 196 */ 197 struct file * 198 fd_getfile(struct filedesc *fdp, int fd) 199 { 200 struct file *fp; 201 202 rw_enter(&fdp->fd_lock, RW_READER); 203 if ((u_int) fd >= fdp->fd_nfiles || (fp = fdp->fd_ofiles[fd]) == NULL) { 204 rw_exit(&fdp->fd_lock); 205 return (NULL); 206 } 207 208 mutex_enter(&fp->f_lock); 209 if (FILE_IS_USABLE(fp) == 0) { 210 mutex_exit(&fp->f_lock); 211 rw_exit(&fdp->fd_lock); 212 return (NULL); 213 } 214 rw_exit(&fdp->fd_lock); 215 216 return (fp); 217 } 218 219 /* 220 * Common code for dup, dup2, and fcntl(F_DUPFD). 221 */ 222 static int 223 finishdup(struct lwp *l, int old, int new, register_t *retval) 224 { 225 struct filedesc *fdp; 226 struct file *fp, *delfp; 227 228 fdp = l->l_proc->p_fd; 229 230 /* 231 * If there is a file in the new slot, remember it so we 232 * can close it after we've finished the dup. We need 233 * to do it after the dup is finished, since closing 234 * the file may block. 235 * 236 * Note: `old' is already used for us. 237 * Note: Caller already marked `new' slot "used". 238 */ 239 rw_enter(&fdp->fd_lock, RW_WRITER); 240 delfp = fdp->fd_ofiles[new]; 241 242 fp = fdp->fd_ofiles[old]; 243 KDASSERT(fp != NULL); 244 fdp->fd_ofiles[new] = fp; 245 fdp->fd_ofileflags[new] = fdp->fd_ofileflags[old] &~ UF_EXCLOSE; 246 rw_exit(&fdp->fd_lock); 247 248 *retval = new; 249 mutex_enter(&fp->f_lock); 250 fp->f_count++; 251 FILE_UNUSE_HAVELOCK(fp, l); 252 253 if (delfp != NULL) { 254 mutex_enter(&delfp->f_lock); 255 FILE_USE(delfp); 256 if (new < fdp->fd_knlistsize) 257 knote_fdclose(l, new); 258 (void) closef(delfp, l); 259 } 260 return (0); 261 } 262 263 /* 264 * Initialize the descriptor system. 265 */ 266 void 267 filedesc_init(void) 268 { 269 270 mutex_init(&filelist_lock, MUTEX_DEFAULT, IPL_NONE); 271 272 file_cache = pool_cache_init(sizeof(struct file), 0, 0, 0, 273 "filepl", NULL, IPL_NONE, NULL, NULL, NULL); 274 KASSERT(file_cache != NULL); 275 276 cwdi_cache = pool_cache_init(sizeof(struct cwdinfo), 0, 0, 0, 277 "cwdipl", NULL, IPL_NONE, cwdi_ctor, cwdi_dtor, NULL); 278 KASSERT(cwdi_cache != NULL); 279 280 filedesc0_cache = pool_cache_init(sizeof(struct filedesc0), 0, 0, 0, 281 "fdescpl", NULL, IPL_NONE, NULL, NULL, NULL); 282 KASSERT(filedesc0_cache != NULL); 283 } 284 285 /* 286 * System calls on descriptors. 287 */ 288 289 /* 290 * Duplicate a file descriptor. 291 */ 292 /* ARGSUSED */ 293 int 294 sys_dup(struct lwp *l, void *v, register_t *retval) 295 { 296 struct sys_dup_args /* { 297 syscallarg(int) fd; 298 } */ *uap = v; 299 struct file *fp; 300 struct filedesc *fdp; 301 struct proc *p; 302 int old, new, error; 303 304 p = l->l_proc; 305 fdp = p->p_fd; 306 old = SCARG(uap, fd); 307 308 restart: 309 if ((fp = fd_getfile(fdp, old)) == NULL) 310 return (EBADF); 311 312 FILE_USE(fp); 313 314 if ((error = fdalloc(p, 0, &new)) != 0) { 315 if (error == ENOSPC) { 316 fdexpand(p); 317 FILE_UNUSE(fp, l); 318 goto restart; 319 } 320 FILE_UNUSE(fp, l); 321 return (error); 322 } 323 324 /* finishdup() will unuse the descriptors for us */ 325 return (finishdup(l, old, new, retval)); 326 } 327 328 /* 329 * Duplicate a file descriptor to a particular value. 330 */ 331 /* ARGSUSED */ 332 int 333 sys_dup2(struct lwp *l, void *v, register_t *retval) 334 { 335 struct sys_dup2_args /* { 336 syscallarg(int) from; 337 syscallarg(int) to; 338 } */ *uap = v; 339 struct file *fp; 340 struct filedesc *fdp; 341 struct proc *p; 342 int old, new, i, error; 343 344 p = l->l_proc; 345 fdp = p->p_fd; 346 old = SCARG(uap, from); 347 new = SCARG(uap, to); 348 349 restart: 350 if ((fp = fd_getfile(fdp, old)) == NULL) 351 return (EBADF); 352 353 if ((u_int)new >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur || 354 (u_int)new >= maxfiles) { 355 mutex_exit(&fp->f_lock); 356 return (EBADF); 357 } 358 359 if (old == new) { 360 mutex_exit(&fp->f_lock); 361 *retval = new; 362 return (0); 363 } 364 365 FILE_USE(fp); 366 367 if (new >= fdp->fd_nfiles) { 368 if ((error = fdalloc(p, new, &i)) != 0) { 369 if (error == ENOSPC) { 370 fdexpand(p); 371 FILE_UNUSE(fp, l); 372 goto restart; 373 } 374 FILE_UNUSE(fp, l); 375 return (error); 376 } 377 if (new != i) 378 panic("dup2: fdalloc"); 379 } else { 380 rw_enter(&fdp->fd_lock, RW_WRITER); 381 /* 382 * Mark `new' slot "used" only if it was empty. 383 */ 384 if (fdp->fd_ofiles[new] == NULL) 385 fd_used(fdp, new); 386 rw_exit(&fdp->fd_lock); 387 } 388 389 /* 390 * finishdup() will close the file that's in the `new' 391 * slot, if there's one there. 392 */ 393 394 /* finishdup() will unuse the descriptors for us */ 395 return (finishdup(l, old, new, retval)); 396 } 397 398 /* 399 * fcntl call which is being passed to the file's fs. 400 */ 401 static int 402 fcntl_forfs(int fd, struct lwp *l, int cmd, void *arg) 403 { 404 struct file *fp; 405 struct filedesc *fdp; 406 int error; 407 u_int size; 408 void *data, *memp; 409 #define STK_PARAMS 128 410 char stkbuf[STK_PARAMS]; 411 412 /* fd's value was validated in sys_fcntl before calling this routine */ 413 fdp = l->l_proc->p_fd; 414 fp = fdp->fd_ofiles[fd]; 415 416 if ((fp->f_flag & (FREAD | FWRITE)) == 0) 417 return (EBADF); 418 419 /* 420 * Interpret high order word to find amount of data to be 421 * copied to/from the user's address space. 422 */ 423 size = (size_t)F_PARAM_LEN(cmd); 424 if (size > F_PARAM_MAX) 425 return (EINVAL); 426 memp = NULL; 427 if (size > sizeof(stkbuf)) { 428 memp = malloc((u_long)size, M_IOCTLOPS, M_WAITOK); 429 data = memp; 430 } else 431 data = stkbuf; 432 if (cmd & F_FSIN) { 433 if (size) { 434 error = copyin(arg, data, size); 435 if (error) { 436 if (memp) 437 free(memp, M_IOCTLOPS); 438 return (error); 439 } 440 } else 441 *(void **)data = arg; 442 } else if ((cmd & F_FSOUT) && size) 443 /* 444 * Zero the buffer so the user always 445 * gets back something deterministic. 446 */ 447 memset(data, 0, size); 448 else if (cmd & F_FSVOID) 449 *(void **)data = arg; 450 451 452 error = (*fp->f_ops->fo_fcntl)(fp, cmd, data, l); 453 454 /* 455 * Copy any data to user, size was 456 * already set and checked above. 457 */ 458 if (error == 0 && (cmd & F_FSOUT) && size) 459 error = copyout(data, arg, size); 460 if (memp) 461 free(memp, M_IOCTLOPS); 462 return (error); 463 } 464 465 int 466 do_fcntl_lock(struct lwp *l, int fd, int cmd, struct flock *fl) 467 { 468 struct file *fp; 469 struct vnode *vp; 470 struct proc *p = l->l_proc; 471 int error, flg; 472 473 if ((fp = fd_getfile(p->p_fd, fd)) == NULL) 474 return (EBADF); 475 476 FILE_USE(fp); 477 478 if (fp->f_type != DTYPE_VNODE) { 479 error = EINVAL; 480 goto out; 481 } 482 vp = (struct vnode *)fp->f_data; 483 if (fl->l_whence == SEEK_CUR) 484 fl->l_start += fp->f_offset; 485 486 flg = F_POSIX; 487 488 switch (cmd) { 489 490 case F_SETLKW: 491 flg |= F_WAIT; 492 /* Fall into F_SETLK */ 493 494 case F_SETLK: 495 switch (fl->l_type) { 496 case F_RDLCK: 497 if ((fp->f_flag & FREAD) == 0) { 498 error = EBADF; 499 goto out; 500 } 501 p->p_flag |= PK_ADVLOCK; 502 error = VOP_ADVLOCK(vp, p, F_SETLK, fl, flg); 503 goto out; 504 505 case F_WRLCK: 506 if ((fp->f_flag & FWRITE) == 0) { 507 error = EBADF; 508 goto out; 509 } 510 p->p_flag |= PK_ADVLOCK; 511 error = VOP_ADVLOCK(vp, p, F_SETLK, fl, flg); 512 goto out; 513 514 case F_UNLCK: 515 error = VOP_ADVLOCK(vp, p, F_UNLCK, fl, F_POSIX); 516 goto out; 517 518 default: 519 error = EINVAL; 520 goto out; 521 } 522 523 case F_GETLK: 524 if (fl->l_type != F_RDLCK && 525 fl->l_type != F_WRLCK && 526 fl->l_type != F_UNLCK) { 527 error = EINVAL; 528 goto out; 529 } 530 error = VOP_ADVLOCK(vp, p, F_GETLK, fl, F_POSIX); 531 break; 532 533 default: 534 error = EINVAL; 535 break; 536 } 537 538 out: 539 FILE_UNUSE(fp, l); 540 return error; 541 } 542 543 /* 544 * The file control system call. 545 */ 546 /* ARGSUSED */ 547 int 548 sys_fcntl(struct lwp *l, void *v, register_t *retval) 549 { 550 struct sys_fcntl_args /* { 551 syscallarg(int) fd; 552 syscallarg(int) cmd; 553 syscallarg(void *) arg; 554 } */ *uap = v; 555 struct filedesc *fdp; 556 struct file *fp; 557 struct proc *p; 558 int fd, i, tmp, error, cmd, newmin; 559 struct flock fl; 560 561 p = l->l_proc; 562 fd = SCARG(uap, fd); 563 cmd = SCARG(uap, cmd); 564 fdp = p->p_fd; 565 error = 0; 566 567 switch (cmd) { 568 case F_CLOSEM: 569 if (fd < 0) 570 return EBADF; 571 while (fdp->fd_lastfile >= fd) 572 fdrelease(l, fdp->fd_lastfile); 573 return 0; 574 575 case F_MAXFD: 576 *retval = fdp->fd_lastfile; 577 return 0; 578 579 case F_SETLKW: 580 case F_SETLK: 581 case F_GETLK: 582 error = copyin(SCARG(uap, arg), &fl, sizeof(fl)); 583 if (error) 584 return error; 585 error = do_fcntl_lock(l, fd, cmd, &fl); 586 if (cmd == F_GETLK && error == 0) 587 error = copyout(&fl, SCARG(uap, arg), sizeof(fl)); 588 return error; 589 590 default: 591 /* Handled below */ 592 break; 593 } 594 595 restart: 596 if ((fp = fd_getfile(fdp, fd)) == NULL) 597 return (EBADF); 598 599 FILE_USE(fp); 600 601 if ((cmd & F_FSCTL)) { 602 error = fcntl_forfs(fd, l, cmd, SCARG(uap, arg)); 603 goto out; 604 } 605 606 switch (cmd) { 607 608 case F_DUPFD: 609 newmin = (long)SCARG(uap, arg); 610 if ((u_int)newmin >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur || 611 (u_int)newmin >= maxfiles) { 612 error = EINVAL; 613 goto out; 614 } 615 if ((error = fdalloc(p, newmin, &i)) != 0) { 616 if (error == ENOSPC) { 617 fdexpand(p); 618 FILE_UNUSE(fp, l); 619 goto restart; 620 } 621 goto out; 622 } 623 624 /* finishdup() will unuse the descriptors for us */ 625 return (finishdup(l, fd, i, retval)); 626 627 case F_GETFD: 628 *retval = fdp->fd_ofileflags[fd] & UF_EXCLOSE ? 1 : 0; 629 break; 630 631 case F_SETFD: 632 if ((long)SCARG(uap, arg) & 1) 633 fdp->fd_ofileflags[fd] |= UF_EXCLOSE; 634 else 635 fdp->fd_ofileflags[fd] &= ~UF_EXCLOSE; 636 break; 637 638 case F_GETFL: 639 *retval = OFLAGS(fp->f_flag); 640 break; 641 642 case F_SETFL: 643 tmp = FFLAGS((long)SCARG(uap, arg)) & FCNTLFLAGS; 644 error = (*fp->f_ops->fo_fcntl)(fp, F_SETFL, &tmp, l); 645 if (error) 646 break; 647 i = tmp ^ fp->f_flag; 648 if (i & FNONBLOCK) { 649 int flgs = tmp & FNONBLOCK; 650 error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, &flgs, l); 651 if (error) 652 goto reset_fcntl; 653 } 654 if (i & FASYNC) { 655 int flgs = tmp & FASYNC; 656 error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, &flgs, l); 657 if (error) { 658 if (i & FNONBLOCK) { 659 tmp = fp->f_flag & FNONBLOCK; 660 (void)(*fp->f_ops->fo_ioctl)(fp, 661 FIONBIO, &tmp, l); 662 } 663 goto reset_fcntl; 664 } 665 } 666 fp->f_flag = (fp->f_flag & ~FCNTLFLAGS) | tmp; 667 break; 668 reset_fcntl: 669 (void)(*fp->f_ops->fo_fcntl)(fp, F_SETFL, &fp->f_flag, l); 670 break; 671 672 case F_GETOWN: 673 error = (*fp->f_ops->fo_ioctl)(fp, FIOGETOWN, &tmp, l); 674 *retval = tmp; 675 break; 676 677 case F_SETOWN: 678 tmp = (int)(intptr_t) SCARG(uap, arg); 679 error = (*fp->f_ops->fo_ioctl)(fp, FIOSETOWN, &tmp, l); 680 break; 681 682 default: 683 error = EINVAL; 684 } 685 686 out: 687 FILE_UNUSE(fp, l); 688 return (error); 689 } 690 691 void 692 fdremove(struct filedesc *fdp, int fd) 693 { 694 695 rw_enter(&fdp->fd_lock, RW_WRITER); 696 fdp->fd_ofiles[fd] = NULL; 697 fd_unused(fdp, fd); 698 rw_exit(&fdp->fd_lock); 699 } 700 701 int 702 fdrelease(struct lwp *l, int fd) 703 { 704 struct proc *p = l->l_proc; 705 struct filedesc *fdp; 706 struct file **fpp, *fp; 707 708 fdp = p->p_fd; 709 rw_enter(&fdp->fd_lock, RW_WRITER); 710 if (fd < 0 || fd > fdp->fd_lastfile) 711 goto badf; 712 fpp = &fdp->fd_ofiles[fd]; 713 fp = *fpp; 714 if (fp == NULL) 715 goto badf; 716 717 mutex_enter(&fp->f_lock); 718 if (!FILE_IS_USABLE(fp)) { 719 mutex_exit(&fp->f_lock); 720 goto badf; 721 } 722 723 FILE_USE(fp); 724 725 *fpp = NULL; 726 fdp->fd_ofileflags[fd] = 0; 727 fd_unused(fdp, fd); 728 rw_exit(&fdp->fd_lock); 729 if (fd < fdp->fd_knlistsize) 730 knote_fdclose(l, fd); 731 return (closef(fp, l)); 732 733 badf: 734 rw_exit(&fdp->fd_lock); 735 return (EBADF); 736 } 737 738 /* 739 * Close a file descriptor. 740 */ 741 /* ARGSUSED */ 742 int 743 sys_close(struct lwp *l, void *v, register_t *retval) 744 { 745 struct sys_close_args /* { 746 syscallarg(int) fd; 747 } */ *uap = v; 748 int fd; 749 struct filedesc *fdp; 750 struct proc *p; 751 752 p = l->l_proc; 753 fd = SCARG(uap, fd); 754 fdp = p->p_fd; 755 756 #if 0 757 if (fd_getfile(fdp, fd) == NULL) 758 return (EBADF); 759 #endif 760 761 return (fdrelease(l, fd)); 762 } 763 764 /* 765 * Return status information about a file descriptor. 766 * Common function for compat code. 767 */ 768 int 769 do_sys_fstat(struct lwp *l, int fd, struct stat *sb) 770 { 771 struct file *fp; 772 int error; 773 774 fp = fd_getfile(l->l_proc->p_fd, fd); 775 if (fp == NULL) 776 return EBADF; 777 778 FILE_USE(fp); 779 error = (*fp->f_ops->fo_stat)(fp, sb, l); 780 FILE_UNUSE(fp, l); 781 782 return error; 783 } 784 785 /* 786 * Return status information about a file descriptor. 787 */ 788 /* ARGSUSED */ 789 int 790 sys___fstat30(struct lwp *l, void *v, register_t *retval) 791 { 792 struct sys___fstat30_args /* { 793 syscallarg(int) fd; 794 syscallarg(struct stat *) sb; 795 } */ *uap = v; 796 struct stat sb; 797 int error; 798 799 error = do_sys_fstat(l, SCARG(uap, fd), &sb); 800 801 if (error == 0) 802 error = copyout(&sb, SCARG(uap, sb), sizeof(sb)); 803 804 return (error); 805 } 806 807 /* 808 * Return pathconf information about a file descriptor. 809 */ 810 /* ARGSUSED */ 811 int 812 sys_fpathconf(struct lwp *l, void *v, register_t *retval) 813 { 814 struct sys_fpathconf_args /* { 815 syscallarg(int) fd; 816 syscallarg(int) name; 817 } */ *uap = v; 818 int fd; 819 struct filedesc *fdp; 820 struct file *fp; 821 struct proc *p; 822 struct vnode *vp; 823 int error; 824 825 p = l->l_proc; 826 fd = SCARG(uap, fd); 827 fdp = p->p_fd; 828 error = 0; 829 830 if ((fp = fd_getfile(fdp, fd)) == NULL) 831 return (EBADF); 832 833 FILE_USE(fp); 834 835 switch (fp->f_type) { 836 837 case DTYPE_SOCKET: 838 case DTYPE_PIPE: 839 if (SCARG(uap, name) != _PC_PIPE_BUF) 840 error = EINVAL; 841 else 842 *retval = PIPE_BUF; 843 break; 844 845 case DTYPE_VNODE: 846 vp = (struct vnode *)fp->f_data; 847 error = VOP_PATHCONF(vp, SCARG(uap, name), retval); 848 break; 849 850 case DTYPE_KQUEUE: 851 error = EINVAL; 852 break; 853 854 default: 855 error = EOPNOTSUPP; 856 break; 857 } 858 859 FILE_UNUSE(fp, l); 860 return (error); 861 } 862 863 /* 864 * Allocate a file descriptor for the process. 865 */ 866 int fdexpanded; /* XXX: what else uses this? */ 867 868 int 869 fdalloc(struct proc *p, int want, int *result) 870 { 871 struct filedesc *fdp; 872 int i, lim, last, error; 873 u_int off, new; 874 875 fdp = p->p_fd; 876 rw_enter(&fdp->fd_lock, RW_WRITER); 877 878 /* 879 * Search for a free descriptor starting at the higher 880 * of want or fd_freefile. If that fails, consider 881 * expanding the ofile array. 882 */ 883 lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfiles); 884 last = min(fdp->fd_nfiles, lim); 885 again: 886 if ((i = want) < fdp->fd_freefile) 887 i = fdp->fd_freefile; 888 off = i >> NDENTRYSHIFT; 889 new = find_next_zero(fdp->fd_himap, off, 890 (last + NDENTRIES - 1) >> NDENTRYSHIFT); 891 if (new != -1) { 892 i = find_next_zero(&fdp->fd_lomap[new], 893 new > off ? 0 : i & NDENTRYMASK, NDENTRIES); 894 if (i == -1) { 895 /* 896 * free file descriptor in this block was 897 * below want, try again with higher want. 898 */ 899 want = (new + 1) << NDENTRYSHIFT; 900 goto again; 901 } 902 i += (new << NDENTRYSHIFT); 903 if (i < last) { 904 if (fdp->fd_ofiles[i] == NULL) { 905 fd_used(fdp, i); 906 if (want <= fdp->fd_freefile) 907 fdp->fd_freefile = i; 908 *result = i; 909 error = 0; 910 goto out; 911 } 912 } 913 } 914 915 /* No space in current array. Expand or let the caller do it. */ 916 error = (fdp->fd_nfiles >= lim) ? EMFILE : ENOSPC; 917 918 out: 919 rw_exit(&fdp->fd_lock); 920 return (error); 921 } 922 923 void 924 fdexpand(struct proc *p) 925 { 926 struct filedesc *fdp; 927 int i, numfiles, oldnfiles; 928 struct file **newofile; 929 char *newofileflags; 930 uint32_t *newhimap = NULL, *newlomap = NULL; 931 932 fdp = p->p_fd; 933 934 restart: 935 oldnfiles = fdp->fd_nfiles; 936 937 if (oldnfiles < NDEXTENT) 938 numfiles = NDEXTENT; 939 else 940 numfiles = 2 * oldnfiles; 941 942 newofile = malloc(numfiles * OFILESIZE, M_FILEDESC, M_WAITOK); 943 if (NDHISLOTS(numfiles) > NDHISLOTS(oldnfiles)) { 944 newhimap = malloc(NDHISLOTS(numfiles) * sizeof(uint32_t), 945 M_FILEDESC, M_WAITOK); 946 newlomap = malloc(NDLOSLOTS(numfiles) * sizeof(uint32_t), 947 M_FILEDESC, M_WAITOK); 948 } 949 950 rw_enter(&fdp->fd_lock, RW_WRITER); 951 /* lock fdp */ 952 if (fdp->fd_nfiles != oldnfiles) { 953 /* fdp changed; retry */ 954 rw_exit(&fdp->fd_lock); 955 free(newofile, M_FILEDESC); 956 if (newhimap != NULL) free(newhimap, M_FILEDESC); 957 if (newlomap != NULL) free(newlomap, M_FILEDESC); 958 goto restart; 959 } 960 961 newofileflags = (char *) &newofile[numfiles]; 962 /* 963 * Copy the existing ofile and ofileflags arrays 964 * and zero the new portion of each array. 965 */ 966 memcpy(newofile, fdp->fd_ofiles, 967 (i = sizeof(struct file *) * fdp->fd_nfiles)); 968 memset((char *)newofile + i, 0, 969 numfiles * sizeof(struct file *) - i); 970 memcpy(newofileflags, fdp->fd_ofileflags, 971 (i = sizeof(char) * fdp->fd_nfiles)); 972 memset(newofileflags + i, 0, numfiles * sizeof(char) - i); 973 if (oldnfiles > NDFILE) 974 free(fdp->fd_ofiles, M_FILEDESC); 975 976 if (NDHISLOTS(numfiles) > NDHISLOTS(oldnfiles)) { 977 memcpy(newhimap, fdp->fd_himap, 978 (i = NDHISLOTS(oldnfiles) * sizeof(uint32_t))); 979 memset((char *)newhimap + i, 0, 980 NDHISLOTS(numfiles) * sizeof(uint32_t) - i); 981 982 memcpy(newlomap, fdp->fd_lomap, 983 (i = NDLOSLOTS(oldnfiles) * sizeof(uint32_t))); 984 memset((char *)newlomap + i, 0, 985 NDLOSLOTS(numfiles) * sizeof(uint32_t) - i); 986 987 if (NDHISLOTS(oldnfiles) > NDHISLOTS(NDFILE)) { 988 free(fdp->fd_himap, M_FILEDESC); 989 free(fdp->fd_lomap, M_FILEDESC); 990 } 991 fdp->fd_himap = newhimap; 992 fdp->fd_lomap = newlomap; 993 } 994 995 fdp->fd_ofiles = newofile; 996 fdp->fd_ofileflags = newofileflags; 997 fdp->fd_nfiles = numfiles; 998 999 rw_exit(&fdp->fd_lock); 1000 1001 fdexpanded++; 1002 } 1003 1004 /* 1005 * Create a new open file structure and allocate 1006 * a file descriptor for the process that refers to it. 1007 */ 1008 int 1009 falloc(struct lwp *l, struct file **resultfp, int *resultfd) 1010 { 1011 struct filedesc *fdp; 1012 struct file *fp, *fq; 1013 struct proc *p; 1014 int error, i; 1015 1016 p = l->l_proc; 1017 fdp = p->p_fd; 1018 1019 restart: 1020 if ((error = fdalloc(p, 0, &i)) != 0) { 1021 if (error == ENOSPC) { 1022 fdexpand(p); 1023 goto restart; 1024 } 1025 return (error); 1026 } 1027 1028 fp = pool_cache_get(file_cache, PR_WAITOK); 1029 memset(fp, 0, sizeof(struct file)); 1030 mutex_init(&fp->f_lock, MUTEX_DEFAULT, IPL_NONE); 1031 mutex_enter(&filelist_lock); 1032 if (nfiles >= maxfiles) { 1033 tablefull("file", "increase kern.maxfiles or MAXFILES"); 1034 mutex_exit(&filelist_lock); 1035 rw_enter(&fdp->fd_lock, RW_WRITER); 1036 fd_unused(fdp, i); 1037 rw_exit(&fdp->fd_lock); 1038 mutex_destroy(&fp->f_lock); 1039 pool_cache_put(file_cache, fp); 1040 return (ENFILE); 1041 } 1042 /* 1043 * Allocate a new file descriptor. 1044 * If the process has file descriptor zero open, add to the list 1045 * of open files at that point, otherwise put it at the front of 1046 * the list of open files. 1047 */ 1048 nfiles++; 1049 fp->f_iflags = FIF_LARVAL; 1050 cv_init(&fp->f_cv, "closef"); 1051 rw_enter(&fdp->fd_lock, RW_WRITER); /* XXXAD check order */ 1052 if ((fq = fdp->fd_ofiles[0]) != NULL) { 1053 LIST_INSERT_AFTER(fq, fp, f_list); 1054 } else { 1055 LIST_INSERT_HEAD(&filehead, fp, f_list); 1056 } 1057 KDASSERT(fdp->fd_ofiles[i] == NULL); 1058 fdp->fd_ofiles[i] = fp; 1059 fp->f_count = 1; 1060 fp->f_cred = l->l_cred; 1061 kauth_cred_hold(fp->f_cred); 1062 if (resultfp) { 1063 fp->f_usecount = 1; 1064 *resultfp = fp; 1065 } 1066 mutex_exit(&filelist_lock); 1067 rw_exit(&fdp->fd_lock); 1068 if (resultfd) 1069 *resultfd = i; 1070 1071 return (0); 1072 } 1073 1074 /* 1075 * Free a file descriptor. 1076 */ 1077 void 1078 ffree(struct file *fp) 1079 { 1080 kauth_cred_t cred; 1081 1082 #ifdef DIAGNOSTIC 1083 if (fp->f_usecount) 1084 panic("ffree"); 1085 #endif 1086 1087 mutex_enter(&filelist_lock); 1088 LIST_REMOVE(fp, f_list); 1089 cred = fp->f_cred; 1090 #ifdef DIAGNOSTIC 1091 fp->f_cred = NULL; 1092 fp->f_count = 0; /* What's the point? */ 1093 #endif 1094 nfiles--; 1095 mutex_exit(&filelist_lock); 1096 mutex_destroy(&fp->f_lock); 1097 cv_destroy(&fp->f_cv); 1098 pool_cache_put(file_cache, fp); 1099 kauth_cred_free(cred); 1100 } 1101 1102 /* 1103 * Create an initial cwdinfo structure, using the same current and root 1104 * directories as p. 1105 */ 1106 struct cwdinfo * 1107 cwdinit(struct proc *p) 1108 { 1109 struct cwdinfo *cwdi; 1110 struct cwdinfo *copy; 1111 1112 cwdi = pool_cache_get(cwdi_cache, PR_WAITOK); 1113 copy = p->p_cwdi; 1114 1115 rw_enter(©->cwdi_lock, RW_READER); 1116 cwdi->cwdi_cdir = p->p_cwdi->cwdi_cdir; 1117 if (cwdi->cwdi_cdir) 1118 VREF(cwdi->cwdi_cdir); 1119 cwdi->cwdi_rdir = p->p_cwdi->cwdi_rdir; 1120 if (cwdi->cwdi_rdir) 1121 VREF(cwdi->cwdi_rdir); 1122 cwdi->cwdi_edir = p->p_cwdi->cwdi_edir; 1123 if (cwdi->cwdi_edir) 1124 VREF(cwdi->cwdi_edir); 1125 cwdi->cwdi_cmask = p->p_cwdi->cwdi_cmask; 1126 cwdi->cwdi_refcnt = 1; 1127 rw_exit(©->cwdi_lock); 1128 1129 return (cwdi); 1130 } 1131 1132 static int 1133 cwdi_ctor(void *arg, void *obj, int flags) 1134 { 1135 struct cwdinfo *cwdi; 1136 1137 cwdi = obj; 1138 rw_init(&cwdi->cwdi_lock); 1139 1140 return 0; 1141 } 1142 1143 static void 1144 cwdi_dtor(void *arg, void *obj) 1145 { 1146 struct cwdinfo *cwdi; 1147 1148 cwdi = obj; 1149 rw_destroy(&cwdi->cwdi_lock); 1150 } 1151 1152 /* 1153 * Make p2 share p1's cwdinfo. 1154 */ 1155 void 1156 cwdshare(struct proc *p1, struct proc *p2) 1157 { 1158 struct cwdinfo *cwdi = p1->p_cwdi; 1159 1160 atomic_inc_uint(&cwdi->cwdi_refcnt); 1161 p2->p_cwdi = cwdi; 1162 } 1163 1164 /* 1165 * Make this process not share its cwdinfo structure, maintaining 1166 * all cwdinfo state. 1167 */ 1168 void 1169 cwdunshare(struct proc *p) 1170 { 1171 struct cwdinfo *oldcwdi, *newcwdi; 1172 1173 if (p->p_cwdi->cwdi_refcnt == 1) 1174 return; 1175 1176 newcwdi = cwdinit(p); 1177 oldcwdi = p->p_cwdi; 1178 p->p_cwdi = newcwdi; 1179 cwdfree(oldcwdi); 1180 } 1181 1182 /* 1183 * Release a cwdinfo structure. 1184 */ 1185 void 1186 cwdfree(struct cwdinfo *cwdi) 1187 { 1188 1189 if (atomic_dec_uint_nv(&cwdi->cwdi_refcnt) > 0) 1190 return; 1191 1192 vrele(cwdi->cwdi_cdir); 1193 if (cwdi->cwdi_rdir) 1194 vrele(cwdi->cwdi_rdir); 1195 if (cwdi->cwdi_edir) 1196 vrele(cwdi->cwdi_edir); 1197 pool_cache_put(cwdi_cache, cwdi); 1198 } 1199 1200 /* 1201 * Create an initial filedesc structure, using the same current and root 1202 * directories as p. 1203 */ 1204 struct filedesc * 1205 fdinit(struct proc *p) 1206 { 1207 struct filedesc0 *newfdp; 1208 1209 newfdp = pool_cache_get(filedesc0_cache, PR_WAITOK); 1210 memset(newfdp, 0, sizeof(struct filedesc0)); 1211 1212 fdinit1(newfdp); 1213 1214 return (&newfdp->fd_fd); 1215 } 1216 1217 /* 1218 * Initialize a file descriptor table. 1219 */ 1220 void 1221 fdinit1(struct filedesc0 *newfdp) 1222 { 1223 1224 newfdp->fd_fd.fd_refcnt = 1; 1225 newfdp->fd_fd.fd_ofiles = newfdp->fd_dfiles; 1226 newfdp->fd_fd.fd_ofileflags = newfdp->fd_dfileflags; 1227 newfdp->fd_fd.fd_nfiles = NDFILE; 1228 newfdp->fd_fd.fd_knlistsize = -1; 1229 newfdp->fd_fd.fd_himap = newfdp->fd_dhimap; 1230 newfdp->fd_fd.fd_lomap = newfdp->fd_dlomap; 1231 newfdp->fd_fd.fd_lastfile = -1; 1232 rw_init(&newfdp->fd_fd.fd_lock); 1233 } 1234 1235 /* 1236 * Make p2 share p1's filedesc structure. 1237 */ 1238 void 1239 fdshare(struct proc *p1, struct proc *p2) 1240 { 1241 struct filedesc *fdp = p1->p_fd; 1242 1243 p2->p_fd = fdp; 1244 atomic_inc_uint(&fdp->fd_refcnt); 1245 } 1246 1247 /* 1248 * Make this process not share its filedesc structure, maintaining 1249 * all file descriptor state. 1250 */ 1251 void 1252 fdunshare(struct lwp *l) 1253 { 1254 struct proc *p = l->l_proc; 1255 struct filedesc *newfd; 1256 1257 if (p->p_fd->fd_refcnt == 1) 1258 return; 1259 1260 newfd = fdcopy(p); 1261 fdfree(l); 1262 p->p_fd = newfd; 1263 } 1264 1265 /* 1266 * Clear a process's fd table. 1267 */ 1268 void 1269 fdclear(struct lwp *l) 1270 { 1271 struct proc *p = l->l_proc; 1272 struct filedesc *newfd; 1273 1274 newfd = fdinit(p); 1275 fdfree(l); 1276 p->p_fd = newfd; 1277 } 1278 1279 /* 1280 * Copy a filedesc structure. 1281 */ 1282 struct filedesc * 1283 fdcopy(struct proc *p) 1284 { 1285 struct filedesc *newfdp, *fdp; 1286 struct file **fpp, **nfpp; 1287 int i, numfiles, lastfile; 1288 1289 fdp = p->p_fd; 1290 newfdp = pool_cache_get(filedesc0_cache, PR_WAITOK); 1291 newfdp->fd_refcnt = 1; 1292 rw_init(&newfdp->fd_lock); 1293 1294 restart: 1295 numfiles = fdp->fd_nfiles; 1296 lastfile = fdp->fd_lastfile; 1297 1298 /* 1299 * If the number of open files fits in the internal arrays 1300 * of the open file structure, use them, otherwise allocate 1301 * additional memory for the number of descriptors currently 1302 * in use. 1303 */ 1304 if (lastfile < NDFILE) { 1305 i = NDFILE; 1306 } else { 1307 /* 1308 * Compute the smallest multiple of NDEXTENT needed 1309 * for the file descriptors currently in use, 1310 * allowing the table to shrink. 1311 */ 1312 i = numfiles; 1313 while (i >= 2 * NDEXTENT && i > lastfile * 2) 1314 i /= 2; 1315 newfdp->fd_ofiles = malloc(i * OFILESIZE, M_FILEDESC, M_WAITOK); 1316 } 1317 if (NDHISLOTS(i) > NDHISLOTS(NDFILE)) { 1318 newfdp->fd_himap = malloc(NDHISLOTS(i) * sizeof(uint32_t), 1319 M_FILEDESC, M_WAITOK); 1320 newfdp->fd_lomap = malloc(NDLOSLOTS(i) * sizeof(uint32_t), 1321 M_FILEDESC, M_WAITOK); 1322 } 1323 1324 rw_enter(&fdp->fd_lock, RW_READER); 1325 if (numfiles != fdp->fd_nfiles || lastfile != fdp->fd_lastfile) { 1326 rw_exit(&fdp->fd_lock); 1327 if (i > NDFILE) 1328 free(newfdp->fd_ofiles, M_FILEDESC); 1329 if (NDHISLOTS(i) > NDHISLOTS(NDFILE)) { 1330 free(newfdp->fd_himap, M_FILEDESC); 1331 free(newfdp->fd_lomap, M_FILEDESC); 1332 } 1333 goto restart; 1334 } 1335 1336 if (lastfile < NDFILE) { 1337 newfdp->fd_ofiles = ((struct filedesc0 *) newfdp)->fd_dfiles; 1338 newfdp->fd_ofileflags = 1339 ((struct filedesc0 *) newfdp)->fd_dfileflags; 1340 } else { 1341 newfdp->fd_ofileflags = (char *) &newfdp->fd_ofiles[i]; 1342 } 1343 if (NDHISLOTS(i) <= NDHISLOTS(NDFILE)) { 1344 newfdp->fd_himap = 1345 ((struct filedesc0 *) newfdp)->fd_dhimap; 1346 newfdp->fd_lomap = 1347 ((struct filedesc0 *) newfdp)->fd_dlomap; 1348 } 1349 1350 newfdp->fd_nfiles = i; 1351 newfdp->fd_lastfile = lastfile; 1352 newfdp->fd_freefile = fdp->fd_freefile; 1353 1354 /* Clear the entries that will not be copied over. 1355 * Avoid calling memset with 0 size (i.e. when 1356 * lastfile == i-1 */ 1357 if (lastfile < (i-1)) 1358 memset(newfdp->fd_ofiles + lastfile + 1, 0, 1359 (i - lastfile - 1) * sizeof(struct file **)); 1360 memcpy(newfdp->fd_ofileflags, fdp->fd_ofileflags, i * sizeof(char)); 1361 if (i < NDENTRIES * NDENTRIES) 1362 i = NDENTRIES * NDENTRIES; /* size of inlined bitmaps */ 1363 memcpy(newfdp->fd_himap, fdp->fd_himap, NDHISLOTS(i)*sizeof(uint32_t)); 1364 memcpy(newfdp->fd_lomap, fdp->fd_lomap, NDLOSLOTS(i)*sizeof(uint32_t)); 1365 1366 fpp = fdp->fd_ofiles; 1367 nfpp = newfdp->fd_ofiles; 1368 for (i = 0; i <= lastfile; i++, fpp++, nfpp++) { 1369 if ((*nfpp = *fpp) == NULL) 1370 continue; 1371 1372 if ((*fpp)->f_type == DTYPE_KQUEUE) 1373 /* kq descriptors cannot be copied. */ 1374 fdremove(newfdp, i); 1375 else { 1376 mutex_enter(&(*fpp)->f_lock); 1377 (*fpp)->f_count++; 1378 mutex_exit(&(*fpp)->f_lock); 1379 } 1380 } 1381 1382 rw_exit(&fdp->fd_lock); 1383 1384 newfdp->fd_knlist = NULL; 1385 newfdp->fd_knlistsize = -1; 1386 newfdp->fd_knhash = NULL; 1387 newfdp->fd_knhashmask = 0; 1388 1389 return (newfdp); 1390 } 1391 1392 /* 1393 * Release a filedesc structure. 1394 */ 1395 void 1396 fdfree(struct lwp *l) 1397 { 1398 struct proc *p = l->l_proc; 1399 struct filedesc *fdp; 1400 struct file **fpp, *fp; 1401 int i; 1402 1403 fdp = p->p_fd; 1404 if (atomic_dec_uint_nv(&fdp->fd_refcnt) > 0) 1405 return; 1406 1407 rw_destroy(&fdp->fd_lock); 1408 fpp = fdp->fd_ofiles; 1409 for (i = fdp->fd_lastfile; i >= 0; i--, fpp++) { 1410 fp = *fpp; 1411 if (fp != NULL) { 1412 *fpp = NULL; 1413 mutex_enter(&fp->f_lock); 1414 FILE_USE(fp); 1415 if ((fdp->fd_lastfile - i) < fdp->fd_knlistsize) 1416 knote_fdclose(l, fdp->fd_lastfile - i); 1417 (void) closef(fp, l); 1418 } 1419 } 1420 p->p_fd = NULL; 1421 if (fdp->fd_nfiles > NDFILE) 1422 free(fdp->fd_ofiles, M_FILEDESC); 1423 if (NDHISLOTS(fdp->fd_nfiles) > NDHISLOTS(NDFILE)) { 1424 free(fdp->fd_himap, M_FILEDESC); 1425 free(fdp->fd_lomap, M_FILEDESC); 1426 } 1427 if (fdp->fd_knlist) 1428 free(fdp->fd_knlist, M_KEVENT); 1429 if (fdp->fd_knhash) 1430 hashdone(fdp->fd_knhash, M_KEVENT); 1431 pool_cache_put(filedesc0_cache, fdp); 1432 } 1433 1434 /* 1435 * Internal form of close. 1436 * Decrement reference count on file structure. 1437 * Note: p may be NULL when closing a file 1438 * that was being passed in a message. 1439 * 1440 * Note: we expect the caller is holding a usecount, and expects us 1441 * to drop it (the caller thinks the file is going away forever). 1442 */ 1443 int 1444 closef(struct file *fp, struct lwp *l) 1445 { 1446 struct proc *p = l ? l->l_proc : NULL; 1447 struct vnode *vp; 1448 struct flock lf; 1449 int error; 1450 1451 if (fp == NULL) 1452 return (0); 1453 1454 /* 1455 * POSIX record locking dictates that any close releases ALL 1456 * locks owned by this process. This is handled by setting 1457 * a flag in the unlock to free ONLY locks obeying POSIX 1458 * semantics, and not to free BSD-style file locks. 1459 * If the descriptor was in a message, POSIX-style locks 1460 * aren't passed with the descriptor. 1461 */ 1462 if (p && (p->p_flag & PK_ADVLOCK) && fp->f_type == DTYPE_VNODE) { 1463 lf.l_whence = SEEK_SET; 1464 lf.l_start = 0; 1465 lf.l_len = 0; 1466 lf.l_type = F_UNLCK; 1467 vp = (struct vnode *)fp->f_data; 1468 (void) VOP_ADVLOCK(vp, p, F_UNLCK, &lf, F_POSIX); 1469 } 1470 1471 /* 1472 * If WANTCLOSE is set, then the reference count on the file 1473 * is 0, but there were multiple users of the file. This can 1474 * happen if a filedesc structure is shared by multiple 1475 * processes. 1476 */ 1477 mutex_enter(&fp->f_lock); 1478 if (fp->f_iflags & FIF_WANTCLOSE) { 1479 /* 1480 * Another user of the file is already closing, and is 1481 * simply waiting for other users of the file to drain. 1482 * Release our usecount, and wake up the closer if it 1483 * is the only remaining use. 1484 */ 1485 #ifdef DIAGNOSTIC 1486 if (fp->f_count != 0) 1487 panic("closef: wantclose and count != 0"); 1488 if (fp->f_usecount < 2) 1489 panic("closef: wantclose and usecount < 2"); 1490 #endif 1491 if (--fp->f_usecount == 1) 1492 cv_broadcast(&fp->f_cv); 1493 mutex_exit(&fp->f_lock); 1494 return (0); 1495 } else { 1496 /* 1497 * Decrement the reference count. If we were not the 1498 * last reference, then release our use and just 1499 * return. 1500 */ 1501 if (--fp->f_count > 0) { 1502 #ifdef DIAGNOSTIC 1503 if (fp->f_usecount < 1) 1504 panic("closef: no wantclose and usecount < 1"); 1505 #endif 1506 fp->f_usecount--; 1507 mutex_exit(&fp->f_lock); 1508 return (0); 1509 } 1510 } 1511 1512 /* 1513 * The reference count is now 0. However, there may be 1514 * multiple potential users of this file. This can happen 1515 * if multiple processes shared a single filedesc structure. 1516 * 1517 * Notify these potential users that the file is closing. 1518 * This will prevent them from adding additional uses to 1519 * the file. 1520 */ 1521 fp->f_iflags |= FIF_WANTCLOSE; 1522 1523 /* 1524 * We expect the caller to add a use to the file. So, if we 1525 * are the last user, usecount will be 1. If it is not, we 1526 * must wait for the usecount to drain. When it drains back 1527 * to 1, we will be awakened so that we may proceed with the 1528 * close. 1529 */ 1530 #ifdef DIAGNOSTIC 1531 if (fp->f_usecount < 1) 1532 panic("closef: usecount < 1"); 1533 #endif 1534 while (fp->f_usecount > 1) 1535 cv_wait(&fp->f_cv, &fp->f_lock); 1536 #ifdef DIAGNOSTIC 1537 if (fp->f_usecount != 1) 1538 panic("closef: usecount != 1"); 1539 #endif 1540 1541 mutex_exit(&fp->f_lock); 1542 if ((fp->f_flag & FHASLOCK) && fp->f_type == DTYPE_VNODE) { 1543 lf.l_whence = SEEK_SET; 1544 lf.l_start = 0; 1545 lf.l_len = 0; 1546 lf.l_type = F_UNLCK; 1547 vp = (struct vnode *)fp->f_data; 1548 (void) VOP_ADVLOCK(vp, fp, F_UNLCK, &lf, F_FLOCK); 1549 } 1550 if (fp->f_ops) 1551 error = (*fp->f_ops->fo_close)(fp, l); 1552 else 1553 error = 0; 1554 1555 /* Nothing references the file now, drop the final use (us). */ 1556 fp->f_usecount--; 1557 1558 ffree(fp); 1559 return (error); 1560 } 1561 1562 /* 1563 * Apply an advisory lock on a file descriptor. 1564 * 1565 * Just attempt to get a record lock of the requested type on 1566 * the entire file (l_whence = SEEK_SET, l_start = 0, l_len = 0). 1567 */ 1568 /* ARGSUSED */ 1569 int 1570 sys_flock(struct lwp *l, void *v, register_t *retval) 1571 { 1572 struct sys_flock_args /* { 1573 syscallarg(int) fd; 1574 syscallarg(int) how; 1575 } */ *uap = v; 1576 int fd, how, error; 1577 struct proc *p; 1578 struct filedesc *fdp; 1579 struct file *fp; 1580 struct vnode *vp; 1581 struct flock lf; 1582 1583 p = l->l_proc; 1584 fd = SCARG(uap, fd); 1585 how = SCARG(uap, how); 1586 fdp = p->p_fd; 1587 error = 0; 1588 1589 if ((fp = fd_getfile(fdp, fd)) == NULL) 1590 return (EBADF); 1591 1592 FILE_USE(fp); 1593 1594 if (fp->f_type != DTYPE_VNODE) { 1595 error = EOPNOTSUPP; 1596 goto out; 1597 } 1598 1599 vp = (struct vnode *)fp->f_data; 1600 lf.l_whence = SEEK_SET; 1601 lf.l_start = 0; 1602 lf.l_len = 0; 1603 if (how & LOCK_UN) { 1604 lf.l_type = F_UNLCK; 1605 fp->f_flag &= ~FHASLOCK; 1606 error = VOP_ADVLOCK(vp, fp, F_UNLCK, &lf, F_FLOCK); 1607 goto out; 1608 } 1609 if (how & LOCK_EX) 1610 lf.l_type = F_WRLCK; 1611 else if (how & LOCK_SH) 1612 lf.l_type = F_RDLCK; 1613 else { 1614 error = EINVAL; 1615 goto out; 1616 } 1617 fp->f_flag |= FHASLOCK; 1618 if (how & LOCK_NB) 1619 error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, F_FLOCK); 1620 else 1621 error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, 1622 F_FLOCK|F_WAIT); 1623 out: 1624 FILE_UNUSE(fp, l); 1625 return (error); 1626 } 1627 1628 /* ARGSUSED */ 1629 int 1630 sys_posix_fadvise(struct lwp *l, void *v, register_t *retval) 1631 { 1632 const struct sys_posix_fadvise_args /* { 1633 syscallarg(int) fd; 1634 syscallarg(off_t) offset; 1635 syscallarg(off_t) len; 1636 syscallarg(int) advice; 1637 } */ *uap = v; 1638 const int fd = SCARG(uap, fd); 1639 const int advice = SCARG(uap, advice); 1640 struct proc *p = l->l_proc; 1641 struct file *fp; 1642 int error = 0; 1643 1644 fp = fd_getfile(p->p_fd, fd); 1645 if (fp == NULL) { 1646 error = EBADF; 1647 goto out; 1648 } 1649 FILE_USE(fp); 1650 1651 if (fp->f_type != DTYPE_VNODE) { 1652 if (fp->f_type == DTYPE_PIPE || fp->f_type == DTYPE_SOCKET) { 1653 error = ESPIPE; 1654 } else { 1655 error = EOPNOTSUPP; 1656 } 1657 goto out; 1658 } 1659 1660 switch (advice) { 1661 case POSIX_FADV_NORMAL: 1662 case POSIX_FADV_RANDOM: 1663 case POSIX_FADV_SEQUENTIAL: 1664 KASSERT(POSIX_FADV_NORMAL == UVM_ADV_NORMAL); 1665 KASSERT(POSIX_FADV_RANDOM == UVM_ADV_RANDOM); 1666 KASSERT(POSIX_FADV_SEQUENTIAL == UVM_ADV_SEQUENTIAL); 1667 1668 /* 1669 * we ignore offset and size. 1670 */ 1671 1672 fp->f_advice = advice; 1673 break; 1674 1675 case POSIX_FADV_WILLNEED: 1676 case POSIX_FADV_DONTNEED: 1677 case POSIX_FADV_NOREUSE: 1678 1679 /* 1680 * not implemented yet. 1681 */ 1682 1683 break; 1684 default: 1685 error = EINVAL; 1686 break; 1687 } 1688 out: 1689 if (fp != NULL) { 1690 FILE_UNUSE(fp, l); 1691 } 1692 *retval = error; 1693 return 0; 1694 } 1695 1696 /* 1697 * File Descriptor pseudo-device driver (/dev/fd/). 1698 * 1699 * Opening minor device N dup()s the file (if any) connected to file 1700 * descriptor N belonging to the calling process. Note that this driver 1701 * consists of only the ``open()'' routine, because all subsequent 1702 * references to this file will be direct to the other driver. 1703 */ 1704 /* ARGSUSED */ 1705 static int 1706 filedescopen(dev_t dev, int mode, int type, struct lwp *l) 1707 { 1708 1709 /* 1710 * XXX Kludge: set dupfd to contain the value of the 1711 * the file descriptor being sought for duplication. The error 1712 * return ensures that the vnode for this device will be released 1713 * by vn_open. Open will detect this special error and take the 1714 * actions in dupfdopen below. Other callers of vn_open or VOP_OPEN 1715 * will simply report the error. 1716 */ 1717 l->l_dupfd = minor(dev); /* XXX */ 1718 return EDUPFD; 1719 } 1720 1721 const struct cdevsw filedesc_cdevsw = { 1722 filedescopen, noclose, noread, nowrite, noioctl, 1723 nostop, notty, nopoll, nommap, nokqfilter, D_OTHER, 1724 }; 1725 1726 /* 1727 * Duplicate the specified descriptor to a free descriptor. 1728 * 1729 * 'indx' has been fdalloc'ed (and will be fdremove'ed on error) by the caller. 1730 */ 1731 int 1732 dupfdopen(struct lwp *l, int indx, int dfd, int mode, int error) 1733 { 1734 struct proc *p = l->l_proc; 1735 struct filedesc *fdp; 1736 struct file *wfp; 1737 1738 fdp = p->p_fd; 1739 1740 /* should be cleared by the caller */ 1741 KASSERT(fdp->fd_ofiles[indx] == NULL); 1742 1743 /* 1744 * If the to-be-dup'd fd number is greater than the allowed number 1745 * of file descriptors, or the fd to be dup'd has already been 1746 * closed, reject. 1747 */ 1748 1749 /* 1750 * Note, in the case of indx == dfd, fd_getfile below returns NULL. 1751 */ 1752 if ((wfp = fd_getfile(fdp, dfd)) == NULL) 1753 return (EBADF); 1754 1755 FILE_USE(wfp); 1756 1757 /* 1758 * There are two cases of interest here. 1759 * 1760 * For EDUPFD simply dup (dfd) to file descriptor 1761 * (indx) and return. 1762 * 1763 * For EMOVEFD steal away the file structure from (dfd) and 1764 * store it in (indx). (dfd) is effectively closed by 1765 * this operation. 1766 * 1767 * Any other error code is just returned. 1768 */ 1769 switch (error) { 1770 case EDUPFD: 1771 /* 1772 * Check that the mode the file is being opened for is a 1773 * subset of the mode of the existing descriptor. 1774 */ 1775 if (((mode & (FREAD|FWRITE)) | wfp->f_flag) != wfp->f_flag) { 1776 FILE_UNUSE(wfp, l); 1777 return (EACCES); 1778 } 1779 rw_enter(&fdp->fd_lock, RW_WRITER); 1780 fdp->fd_ofiles[indx] = wfp; 1781 fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd]; 1782 rw_exit(&fdp->fd_lock); 1783 mutex_enter(&wfp->f_lock); 1784 wfp->f_count++; 1785 /* 'indx' has been fd_used'ed by caller */ 1786 FILE_UNUSE_HAVELOCK(wfp, l); 1787 return (0); 1788 1789 case EMOVEFD: 1790 /* 1791 * Steal away the file pointer from dfd, and stuff it into indx. 1792 */ 1793 rw_enter(&fdp->fd_lock, RW_WRITER); 1794 fdp->fd_ofiles[indx] = fdp->fd_ofiles[dfd]; 1795 fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd]; 1796 fdp->fd_ofiles[dfd] = NULL; 1797 fdp->fd_ofileflags[dfd] = 0; 1798 /* 1799 * Complete the clean up of the filedesc structure by 1800 * recomputing the various hints. 1801 */ 1802 /* 'indx' has been fd_used'ed by caller */ 1803 fd_unused(fdp, dfd); 1804 rw_exit(&fdp->fd_lock); 1805 FILE_UNUSE(wfp, l); 1806 return (0); 1807 1808 default: 1809 FILE_UNUSE(wfp, l); 1810 return (error); 1811 } 1812 /* NOTREACHED */ 1813 } 1814 1815 /* 1816 * Close any files on exec? 1817 */ 1818 void 1819 fdcloseexec(struct lwp *l) 1820 { 1821 struct proc *p = l->l_proc; 1822 struct filedesc *fdp; 1823 int fd; 1824 1825 fdunshare(l); 1826 cwdunshare(p); 1827 1828 if (p->p_cwdi->cwdi_edir) 1829 vrele(p->p_cwdi->cwdi_edir); 1830 1831 fdp = p->p_fd; 1832 for (fd = 0; fd <= fdp->fd_lastfile; fd++) 1833 if (fdp->fd_ofileflags[fd] & UF_EXCLOSE) 1834 (void) fdrelease(l, fd); 1835 } 1836 1837 /* 1838 * It is unsafe for set[ug]id processes to be started with file 1839 * descriptors 0..2 closed, as these descriptors are given implicit 1840 * significance in the Standard C library. fdcheckstd() will create a 1841 * descriptor referencing /dev/null for each of stdin, stdout, and 1842 * stderr that is not already open. 1843 */ 1844 #define CHECK_UPTO 3 1845 int 1846 fdcheckstd(struct lwp *l) 1847 { 1848 struct proc *p; 1849 struct nameidata nd; 1850 struct filedesc *fdp; 1851 struct file *fp; 1852 struct file *devnullfp = NULL; /* Quell compiler warning */ 1853 struct proc *pp; 1854 register_t retval; 1855 int fd, i, error, flags = FREAD|FWRITE, devnull = -1; 1856 char closed[CHECK_UPTO * 3 + 1], which[3 + 1]; 1857 1858 p = l->l_proc; 1859 closed[0] = '\0'; 1860 if ((fdp = p->p_fd) == NULL) 1861 return (0); 1862 for (i = 0; i < CHECK_UPTO; i++) { 1863 if (fdp->fd_ofiles[i] != NULL) 1864 continue; 1865 snprintf(which, sizeof(which), ",%d", i); 1866 strlcat(closed, which, sizeof(closed)); 1867 if (devnullfp == NULL) { 1868 if ((error = falloc(l, &fp, &fd)) != 0) 1869 return (error); 1870 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, "/dev/null"); 1871 if ((error = vn_open(&nd, flags, 0)) != 0) { 1872 FILE_UNUSE(fp, l); 1873 ffree(fp); 1874 fdremove(p->p_fd, fd); 1875 return (error); 1876 } 1877 fp->f_data = nd.ni_vp; 1878 fp->f_flag = flags; 1879 fp->f_ops = &vnops; 1880 fp->f_type = DTYPE_VNODE; 1881 VOP_UNLOCK(nd.ni_vp, 0); 1882 devnull = fd; 1883 devnullfp = fp; 1884 FILE_SET_MATURE(fp); 1885 } else { 1886 restart: 1887 if ((error = fdalloc(p, 0, &fd)) != 0) { 1888 if (error == ENOSPC) { 1889 fdexpand(p); 1890 goto restart; 1891 } 1892 return (error); 1893 } 1894 1895 mutex_enter(&devnullfp->f_lock); 1896 FILE_USE(devnullfp); 1897 /* finishdup() will unuse the descriptors for us */ 1898 if ((error = finishdup(l, devnull, fd, &retval)) != 0) 1899 return (error); 1900 } 1901 } 1902 if (devnullfp) 1903 FILE_UNUSE(devnullfp, l); 1904 if (closed[0] != '\0') { 1905 mutex_enter(&proclist_lock); 1906 pp = p->p_pptr; 1907 mutex_enter(&pp->p_mutex); 1908 log(LOG_WARNING, "set{u,g}id pid %d (%s) " 1909 "was invoked by uid %d ppid %d (%s) " 1910 "with fd %s closed\n", 1911 p->p_pid, p->p_comm, kauth_cred_geteuid(pp->p_cred), 1912 pp->p_pid, pp->p_comm, &closed[1]); 1913 mutex_exit(&pp->p_mutex); 1914 mutex_exit(&proclist_lock); 1915 } 1916 return (0); 1917 } 1918 #undef CHECK_UPTO 1919 1920 /* 1921 * Sets descriptor owner. If the owner is a process, 'pgid' 1922 * is set to positive value, process ID. If the owner is process group, 1923 * 'pgid' is set to -pg_id. 1924 */ 1925 int 1926 fsetown(struct proc *p, pid_t *pgid, int cmd, const void *data) 1927 { 1928 int id = *(const int *)data; 1929 int error; 1930 1931 switch (cmd) { 1932 case TIOCSPGRP: 1933 if (id < 0) 1934 return (EINVAL); 1935 id = -id; 1936 break; 1937 default: 1938 break; 1939 } 1940 1941 if (id > 0 && !pfind(id)) 1942 return (ESRCH); 1943 else if (id < 0 && (error = pgid_in_session(p, -id))) 1944 return (error); 1945 1946 *pgid = id; 1947 return (0); 1948 } 1949 1950 /* 1951 * Return descriptor owner information. If the value is positive, 1952 * it's process ID. If it's negative, it's process group ID and 1953 * needs the sign removed before use. 1954 */ 1955 int 1956 fgetown(struct proc *p, pid_t pgid, int cmd, void *data) 1957 { 1958 switch (cmd) { 1959 case TIOCGPGRP: 1960 *(int *)data = -pgid; 1961 break; 1962 default: 1963 *(int *)data = pgid; 1964 break; 1965 } 1966 return (0); 1967 } 1968 1969 /* 1970 * Send signal to descriptor owner, either process or process group. 1971 */ 1972 void 1973 fownsignal(pid_t pgid, int signo, int code, int band, void *fdescdata) 1974 { 1975 struct proc *p1; 1976 struct pgrp *pgrp; 1977 ksiginfo_t ksi; 1978 1979 KSI_INIT(&ksi); 1980 ksi.ksi_signo = signo; 1981 ksi.ksi_code = code; 1982 ksi.ksi_band = band; 1983 1984 /* 1985 * Since we may be called from an interrupt context, we must use 1986 * the proclist_mutex. 1987 */ 1988 mutex_enter(&proclist_mutex); 1989 if (pgid > 0 && (p1 = p_find(pgid, PFIND_LOCKED))) 1990 kpsignal(p1, &ksi, fdescdata); 1991 else if (pgid < 0 && (pgrp = pg_find(-pgid, PFIND_LOCKED))) 1992 kpgsignal(pgrp, &ksi, fdescdata, 0); 1993 mutex_exit(&proclist_mutex); 1994 } 1995 1996 int 1997 fdclone(struct lwp *l, struct file *fp, int fd, int flag, 1998 const struct fileops *fops, void *data) 1999 { 2000 fp->f_flag = flag; 2001 fp->f_type = DTYPE_MISC; 2002 fp->f_ops = fops; 2003 fp->f_data = data; 2004 2005 l->l_dupfd = fd; 2006 2007 FILE_SET_MATURE(fp); 2008 FILE_UNUSE(fp, l); 2009 return EMOVEFD; 2010 } 2011 2012 /* ARGSUSED */ 2013 int 2014 fnullop_fcntl(struct file *fp, u_int cmd, void *data, struct lwp *l) 2015 { 2016 2017 if (cmd == F_SETFL) 2018 return 0; 2019 2020 return EOPNOTSUPP; 2021 } 2022 2023 /* ARGSUSED */ 2024 int 2025 fnullop_poll(struct file *fp, int which, struct lwp *l) 2026 { 2027 2028 return 0; 2029 } 2030 2031 2032 /* ARGSUSED */ 2033 int 2034 fnullop_kqfilter(struct file *fp, struct knote *kn) 2035 { 2036 2037 return 0; 2038 } 2039 2040 /* ARGSUSED */ 2041 int 2042 fbadop_read(struct file *fp, off_t *offset, struct uio *uio, 2043 kauth_cred_t cred, int flags) 2044 { 2045 2046 return EOPNOTSUPP; 2047 } 2048 2049 /* ARGSUSED */ 2050 int 2051 fbadop_write(struct file *fp, off_t *offset, struct uio *uio, 2052 kauth_cred_t cred, int flags) 2053 { 2054 2055 return EOPNOTSUPP; 2056 } 2057 2058 /* ARGSUSED */ 2059 int 2060 fbadop_ioctl(struct file *fp, u_long com, void *data, struct lwp *l) 2061 { 2062 2063 return EOPNOTSUPP; 2064 } 2065 2066 /* ARGSUSED */ 2067 int 2068 fbadop_stat(struct file *fp, struct stat *sb, struct lwp *l) 2069 { 2070 2071 return EOPNOTSUPP; 2072 } 2073 2074 /* ARGSUSED */ 2075 int 2076 fbadop_close(struct file *fp, struct lwp *l) 2077 { 2078 2079 return EOPNOTSUPP; 2080 } 2081