1 /* $NetBSD: kern_descrip.c,v 1.162 2007/11/07 00:23:20 ad Exp $ */ 2 3 /* 4 * Copyright (c) 1982, 1986, 1989, 1991, 1993 5 * The Regents of the University of California. All rights reserved. 6 * (c) UNIX System Laboratories, Inc. 7 * All or some portions of this file are derived from material licensed 8 * to the University of California by American Telephone and Telegraph 9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 10 * the permission of UNIX System Laboratories, Inc. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 3. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * @(#)kern_descrip.c 8.8 (Berkeley) 2/14/95 37 */ 38 39 #include <sys/cdefs.h> 40 __KERNEL_RCSID(0, "$NetBSD: kern_descrip.c,v 1.162 2007/11/07 00:23:20 ad Exp $"); 41 42 #include <sys/param.h> 43 #include <sys/systm.h> 44 #include <sys/filedesc.h> 45 #include <sys/kernel.h> 46 #include <sys/vnode.h> 47 #include <sys/proc.h> 48 #include <sys/file.h> 49 #include <sys/namei.h> 50 #include <sys/socket.h> 51 #include <sys/socketvar.h> 52 #include <sys/stat.h> 53 #include <sys/ioctl.h> 54 #include <sys/fcntl.h> 55 #include <sys/malloc.h> 56 #include <sys/pool.h> 57 #include <sys/syslog.h> 58 #include <sys/unistd.h> 59 #include <sys/resourcevar.h> 60 #include <sys/conf.h> 61 #include <sys/event.h> 62 #include <sys/kauth.h> 63 64 #include <sys/mount.h> 65 #include <sys/syscallargs.h> 66 67 static int cwdi_ctor(void *, void *, int); 68 static void cwdi_dtor(void *, void *); 69 70 /* 71 * Descriptor management. 72 */ 73 struct filelist filehead; /* head of list of open files */ 74 int nfiles; /* actual number of open files */ 75 76 static pool_cache_t cwdi_cache; 77 static pool_cache_t filedesc0_cache; 78 static pool_cache_t file_cache; 79 80 /* Global file list lock */ 81 kmutex_t filelist_lock; 82 83 MALLOC_DEFINE(M_FILE, "file", "Open file structure"); 84 MALLOC_DEFINE(M_FILEDESC, "file desc", "Open file descriptor table"); 85 MALLOC_DEFINE(M_IOCTLOPS, "ioctlops", "ioctl data buffer"); 86 87 static inline int 88 find_next_zero(uint32_t *bitmap, int want, u_int bits) 89 { 90 int i, off, maxoff; 91 uint32_t sub; 92 93 if (want > bits) 94 return -1; 95 96 off = want >> NDENTRYSHIFT; 97 i = want & NDENTRYMASK; 98 if (i) { 99 sub = bitmap[off] | ((u_int)~0 >> (NDENTRIES - i)); 100 if (sub != ~0) 101 goto found; 102 off++; 103 } 104 105 maxoff = NDLOSLOTS(bits); 106 while (off < maxoff) { 107 if ((sub = bitmap[off]) != ~0) 108 goto found; 109 off++; 110 } 111 112 return (-1); 113 114 found: 115 return (off << NDENTRYSHIFT) + ffs(~sub) - 1; 116 } 117 118 static int 119 find_last_set(struct filedesc *fd, int last) 120 { 121 int off, i; 122 struct file **ofiles = fd->fd_ofiles; 123 uint32_t *bitmap = fd->fd_lomap; 124 125 off = (last - 1) >> NDENTRYSHIFT; 126 127 while (off >= 0 && !bitmap[off]) 128 off--; 129 130 if (off < 0) 131 return (-1); 132 133 i = ((off + 1) << NDENTRYSHIFT) - 1; 134 if (i >= last) 135 i = last - 1; 136 137 while (i > 0 && ofiles[i] == NULL) 138 i--; 139 140 return (i); 141 } 142 143 static inline void 144 fd_used(struct filedesc *fdp, int fd) 145 { 146 u_int off = fd >> NDENTRYSHIFT; 147 148 KASSERT(rw_write_held(&fdp->fd_lock)); 149 KDASSERT((fdp->fd_lomap[off] & (1 << (fd & NDENTRYMASK))) == 0); 150 151 fdp->fd_lomap[off] |= 1 << (fd & NDENTRYMASK); 152 if (fdp->fd_lomap[off] == ~0) { 153 KDASSERT((fdp->fd_himap[off >> NDENTRYSHIFT] & 154 (1 << (off & NDENTRYMASK))) == 0); 155 fdp->fd_himap[off >> NDENTRYSHIFT] |= 1 << (off & NDENTRYMASK); 156 } 157 158 if (fd > fdp->fd_lastfile) 159 fdp->fd_lastfile = fd; 160 } 161 162 static inline void 163 fd_unused(struct filedesc *fdp, int fd) 164 { 165 u_int off = fd >> NDENTRYSHIFT; 166 167 KASSERT(rw_write_held(&fdp->fd_lock)); 168 if (fd < fdp->fd_freefile) 169 fdp->fd_freefile = fd; 170 171 if (fdp->fd_lomap[off] == ~0) { 172 KDASSERT((fdp->fd_himap[off >> NDENTRYSHIFT] & 173 (1 << (off & NDENTRYMASK))) != 0); 174 fdp->fd_himap[off >> NDENTRYSHIFT] &= 175 ~(1 << (off & NDENTRYMASK)); 176 } 177 KDASSERT((fdp->fd_lomap[off] & (1 << (fd & NDENTRYMASK))) != 0); 178 fdp->fd_lomap[off] &= ~(1 << (fd & NDENTRYMASK)); 179 180 #ifdef DIAGNOSTIC 181 if (fd > fdp->fd_lastfile) 182 panic("fd_unused: fd_lastfile inconsistent"); 183 #endif 184 if (fd == fdp->fd_lastfile) 185 fdp->fd_lastfile = find_last_set(fdp, fd); 186 } 187 188 /* 189 * Lookup the file structure corresponding to a file descriptor 190 * and return it locked. 191 * Note: typical usage is: `fp = fd_getfile(..); FILE_USE(fp);' 192 * The locking strategy has been optimised for this case, i.e. 193 * fd_getfile() returns the file locked while FILE_USE() will increment 194 * the file's use count and unlock. 195 */ 196 struct file * 197 fd_getfile(struct filedesc *fdp, int fd) 198 { 199 struct file *fp; 200 201 rw_enter(&fdp->fd_lock, RW_READER); 202 if ((u_int) fd >= fdp->fd_nfiles || (fp = fdp->fd_ofiles[fd]) == NULL) { 203 rw_exit(&fdp->fd_lock); 204 return (NULL); 205 } 206 207 mutex_enter(&fp->f_lock); 208 if (FILE_IS_USABLE(fp) == 0) { 209 mutex_exit(&fp->f_lock); 210 rw_exit(&fdp->fd_lock); 211 return (NULL); 212 } 213 rw_exit(&fdp->fd_lock); 214 215 return (fp); 216 } 217 218 /* 219 * Common code for dup, dup2, and fcntl(F_DUPFD). 220 */ 221 static int 222 finishdup(struct lwp *l, int old, int new, register_t *retval) 223 { 224 struct filedesc *fdp; 225 struct file *fp, *delfp; 226 227 fdp = l->l_proc->p_fd; 228 229 /* 230 * If there is a file in the new slot, remember it so we 231 * can close it after we've finished the dup. We need 232 * to do it after the dup is finished, since closing 233 * the file may block. 234 * 235 * Note: `old' is already used for us. 236 * Note: Caller already marked `new' slot "used". 237 */ 238 rw_enter(&fdp->fd_lock, RW_WRITER); 239 delfp = fdp->fd_ofiles[new]; 240 241 fp = fdp->fd_ofiles[old]; 242 KDASSERT(fp != NULL); 243 fdp->fd_ofiles[new] = fp; 244 fdp->fd_ofileflags[new] = fdp->fd_ofileflags[old] &~ UF_EXCLOSE; 245 rw_exit(&fdp->fd_lock); 246 247 *retval = new; 248 mutex_enter(&fp->f_lock); 249 fp->f_count++; 250 FILE_UNUSE_HAVELOCK(fp, l); 251 252 if (delfp != NULL) { 253 mutex_enter(&delfp->f_lock); 254 FILE_USE(delfp); 255 if (new < fdp->fd_knlistsize) 256 knote_fdclose(l, new); 257 (void) closef(delfp, l); 258 } 259 return (0); 260 } 261 262 /* 263 * Initialize the descriptor system. 264 */ 265 void 266 filedesc_init(void) 267 { 268 269 mutex_init(&filelist_lock, MUTEX_DEFAULT, IPL_NONE); 270 271 file_cache = pool_cache_init(sizeof(struct file), 0, 0, 0, 272 "filepl", NULL, IPL_NONE, NULL, NULL, NULL); 273 KASSERT(file_cache != NULL); 274 275 cwdi_cache = pool_cache_init(sizeof(struct cwdinfo), 0, 0, 0, 276 "cwdipl", NULL, IPL_NONE, cwdi_ctor, cwdi_dtor, NULL); 277 KASSERT(cwdi_cache != NULL); 278 279 filedesc0_cache = pool_cache_init(sizeof(struct filedesc0), 0, 0, 0, 280 "fdescpl", NULL, IPL_NONE, NULL, NULL, NULL); 281 KASSERT(filedesc0_cache != NULL); 282 } 283 284 /* 285 * System calls on descriptors. 286 */ 287 288 /* 289 * Duplicate a file descriptor. 290 */ 291 /* ARGSUSED */ 292 int 293 sys_dup(struct lwp *l, void *v, register_t *retval) 294 { 295 struct sys_dup_args /* { 296 syscallarg(int) fd; 297 } */ *uap = v; 298 struct file *fp; 299 struct filedesc *fdp; 300 struct proc *p; 301 int old, new, error; 302 303 p = l->l_proc; 304 fdp = p->p_fd; 305 old = SCARG(uap, fd); 306 307 restart: 308 if ((fp = fd_getfile(fdp, old)) == NULL) 309 return (EBADF); 310 311 FILE_USE(fp); 312 313 if ((error = fdalloc(p, 0, &new)) != 0) { 314 if (error == ENOSPC) { 315 fdexpand(p); 316 FILE_UNUSE(fp, l); 317 goto restart; 318 } 319 FILE_UNUSE(fp, l); 320 return (error); 321 } 322 323 /* finishdup() will unuse the descriptors for us */ 324 return (finishdup(l, old, new, retval)); 325 } 326 327 /* 328 * Duplicate a file descriptor to a particular value. 329 */ 330 /* ARGSUSED */ 331 int 332 sys_dup2(struct lwp *l, void *v, register_t *retval) 333 { 334 struct sys_dup2_args /* { 335 syscallarg(int) from; 336 syscallarg(int) to; 337 } */ *uap = v; 338 struct file *fp; 339 struct filedesc *fdp; 340 struct proc *p; 341 int old, new, i, error; 342 343 p = l->l_proc; 344 fdp = p->p_fd; 345 old = SCARG(uap, from); 346 new = SCARG(uap, to); 347 348 restart: 349 if ((fp = fd_getfile(fdp, old)) == NULL) 350 return (EBADF); 351 352 if ((u_int)new >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur || 353 (u_int)new >= maxfiles) { 354 mutex_exit(&fp->f_lock); 355 return (EBADF); 356 } 357 358 if (old == new) { 359 mutex_exit(&fp->f_lock); 360 *retval = new; 361 return (0); 362 } 363 364 FILE_USE(fp); 365 366 if (new >= fdp->fd_nfiles) { 367 if ((error = fdalloc(p, new, &i)) != 0) { 368 if (error == ENOSPC) { 369 fdexpand(p); 370 FILE_UNUSE(fp, l); 371 goto restart; 372 } 373 FILE_UNUSE(fp, l); 374 return (error); 375 } 376 if (new != i) 377 panic("dup2: fdalloc"); 378 } else { 379 rw_enter(&fdp->fd_lock, RW_WRITER); 380 /* 381 * Mark `new' slot "used" only if it was empty. 382 */ 383 if (fdp->fd_ofiles[new] == NULL) 384 fd_used(fdp, new); 385 rw_exit(&fdp->fd_lock); 386 } 387 388 /* 389 * finishdup() will close the file that's in the `new' 390 * slot, if there's one there. 391 */ 392 393 /* finishdup() will unuse the descriptors for us */ 394 return (finishdup(l, old, new, retval)); 395 } 396 397 /* 398 * fcntl call which is being passed to the file's fs. 399 */ 400 static int 401 fcntl_forfs(int fd, struct lwp *l, int cmd, void *arg) 402 { 403 struct file *fp; 404 struct filedesc *fdp; 405 int error; 406 u_int size; 407 void *data, *memp; 408 #define STK_PARAMS 128 409 char stkbuf[STK_PARAMS]; 410 411 /* fd's value was validated in sys_fcntl before calling this routine */ 412 fdp = l->l_proc->p_fd; 413 fp = fdp->fd_ofiles[fd]; 414 415 if ((fp->f_flag & (FREAD | FWRITE)) == 0) 416 return (EBADF); 417 418 /* 419 * Interpret high order word to find amount of data to be 420 * copied to/from the user's address space. 421 */ 422 size = (size_t)F_PARAM_LEN(cmd); 423 if (size > F_PARAM_MAX) 424 return (EINVAL); 425 memp = NULL; 426 if (size > sizeof(stkbuf)) { 427 memp = malloc((u_long)size, M_IOCTLOPS, M_WAITOK); 428 data = memp; 429 } else 430 data = stkbuf; 431 if (cmd & F_FSIN) { 432 if (size) { 433 error = copyin(arg, data, size); 434 if (error) { 435 if (memp) 436 free(memp, M_IOCTLOPS); 437 return (error); 438 } 439 } else 440 *(void **)data = arg; 441 } else if ((cmd & F_FSOUT) && size) 442 /* 443 * Zero the buffer so the user always 444 * gets back something deterministic. 445 */ 446 memset(data, 0, size); 447 else if (cmd & F_FSVOID) 448 *(void **)data = arg; 449 450 451 error = (*fp->f_ops->fo_fcntl)(fp, cmd, data, l); 452 453 /* 454 * Copy any data to user, size was 455 * already set and checked above. 456 */ 457 if (error == 0 && (cmd & F_FSOUT) && size) 458 error = copyout(data, arg, size); 459 if (memp) 460 free(memp, M_IOCTLOPS); 461 return (error); 462 } 463 464 int 465 do_fcntl_lock(struct lwp *l, int fd, int cmd, struct flock *fl) 466 { 467 struct file *fp; 468 struct vnode *vp; 469 struct proc *p = l->l_proc; 470 int error, flg; 471 472 if ((fp = fd_getfile(p->p_fd, fd)) == NULL) 473 return (EBADF); 474 475 FILE_USE(fp); 476 477 if (fp->f_type != DTYPE_VNODE) { 478 error = EINVAL; 479 goto out; 480 } 481 vp = (struct vnode *)fp->f_data; 482 if (fl->l_whence == SEEK_CUR) 483 fl->l_start += fp->f_offset; 484 485 flg = F_POSIX; 486 487 switch (cmd) { 488 489 case F_SETLKW: 490 flg |= F_WAIT; 491 /* Fall into F_SETLK */ 492 493 case F_SETLK: 494 switch (fl->l_type) { 495 case F_RDLCK: 496 if ((fp->f_flag & FREAD) == 0) { 497 error = EBADF; 498 goto out; 499 } 500 p->p_flag |= PK_ADVLOCK; 501 error = VOP_ADVLOCK(vp, p, F_SETLK, fl, flg); 502 goto out; 503 504 case F_WRLCK: 505 if ((fp->f_flag & FWRITE) == 0) { 506 error = EBADF; 507 goto out; 508 } 509 p->p_flag |= PK_ADVLOCK; 510 error = VOP_ADVLOCK(vp, p, F_SETLK, fl, flg); 511 goto out; 512 513 case F_UNLCK: 514 error = VOP_ADVLOCK(vp, p, F_UNLCK, fl, F_POSIX); 515 goto out; 516 517 default: 518 error = EINVAL; 519 goto out; 520 } 521 522 case F_GETLK: 523 if (fl->l_type != F_RDLCK && 524 fl->l_type != F_WRLCK && 525 fl->l_type != F_UNLCK) { 526 error = EINVAL; 527 goto out; 528 } 529 error = VOP_ADVLOCK(vp, p, F_GETLK, fl, F_POSIX); 530 break; 531 532 default: 533 error = EINVAL; 534 break; 535 } 536 537 out: 538 FILE_UNUSE(fp, l); 539 return error; 540 } 541 542 /* 543 * The file control system call. 544 */ 545 /* ARGSUSED */ 546 int 547 sys_fcntl(struct lwp *l, void *v, register_t *retval) 548 { 549 struct sys_fcntl_args /* { 550 syscallarg(int) fd; 551 syscallarg(int) cmd; 552 syscallarg(void *) arg; 553 } */ *uap = v; 554 struct filedesc *fdp; 555 struct file *fp; 556 struct proc *p; 557 int fd, i, tmp, error, cmd, newmin; 558 struct flock fl; 559 560 p = l->l_proc; 561 fd = SCARG(uap, fd); 562 cmd = SCARG(uap, cmd); 563 fdp = p->p_fd; 564 error = 0; 565 566 switch (cmd) { 567 case F_CLOSEM: 568 if (fd < 0) 569 return EBADF; 570 while (fdp->fd_lastfile >= fd) 571 fdrelease(l, fdp->fd_lastfile); 572 return 0; 573 574 case F_MAXFD: 575 *retval = fdp->fd_lastfile; 576 return 0; 577 578 case F_SETLKW: 579 case F_SETLK: 580 case F_GETLK: 581 error = copyin(SCARG(uap, arg), &fl, sizeof(fl)); 582 if (error) 583 return error; 584 error = do_fcntl_lock(l, fd, cmd, &fl); 585 if (cmd == F_GETLK && error == 0) 586 error = copyout(&fl, SCARG(uap, arg), sizeof(fl)); 587 return error; 588 589 default: 590 /* Handled below */ 591 break; 592 } 593 594 restart: 595 if ((fp = fd_getfile(fdp, fd)) == NULL) 596 return (EBADF); 597 598 FILE_USE(fp); 599 600 if ((cmd & F_FSCTL)) { 601 error = fcntl_forfs(fd, l, cmd, SCARG(uap, arg)); 602 goto out; 603 } 604 605 switch (cmd) { 606 607 case F_DUPFD: 608 newmin = (long)SCARG(uap, arg); 609 if ((u_int)newmin >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur || 610 (u_int)newmin >= maxfiles) { 611 error = EINVAL; 612 goto out; 613 } 614 if ((error = fdalloc(p, newmin, &i)) != 0) { 615 if (error == ENOSPC) { 616 fdexpand(p); 617 FILE_UNUSE(fp, l); 618 goto restart; 619 } 620 goto out; 621 } 622 623 /* finishdup() will unuse the descriptors for us */ 624 return (finishdup(l, fd, i, retval)); 625 626 case F_GETFD: 627 *retval = fdp->fd_ofileflags[fd] & UF_EXCLOSE ? 1 : 0; 628 break; 629 630 case F_SETFD: 631 if ((long)SCARG(uap, arg) & 1) 632 fdp->fd_ofileflags[fd] |= UF_EXCLOSE; 633 else 634 fdp->fd_ofileflags[fd] &= ~UF_EXCLOSE; 635 break; 636 637 case F_GETFL: 638 *retval = OFLAGS(fp->f_flag); 639 break; 640 641 case F_SETFL: 642 tmp = FFLAGS((long)SCARG(uap, arg)) & FCNTLFLAGS; 643 error = (*fp->f_ops->fo_fcntl)(fp, F_SETFL, &tmp, l); 644 if (error) 645 break; 646 i = tmp ^ fp->f_flag; 647 if (i & FNONBLOCK) { 648 int flgs = tmp & FNONBLOCK; 649 error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, &flgs, l); 650 if (error) 651 goto reset_fcntl; 652 } 653 if (i & FASYNC) { 654 int flgs = tmp & FASYNC; 655 error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, &flgs, l); 656 if (error) { 657 if (i & FNONBLOCK) { 658 tmp = fp->f_flag & FNONBLOCK; 659 (void)(*fp->f_ops->fo_ioctl)(fp, 660 FIONBIO, &tmp, l); 661 } 662 goto reset_fcntl; 663 } 664 } 665 fp->f_flag = (fp->f_flag & ~FCNTLFLAGS) | tmp; 666 break; 667 reset_fcntl: 668 (void)(*fp->f_ops->fo_fcntl)(fp, F_SETFL, &fp->f_flag, l); 669 break; 670 671 case F_GETOWN: 672 error = (*fp->f_ops->fo_ioctl)(fp, FIOGETOWN, &tmp, l); 673 *retval = tmp; 674 break; 675 676 case F_SETOWN: 677 tmp = (int)(intptr_t) SCARG(uap, arg); 678 error = (*fp->f_ops->fo_ioctl)(fp, FIOSETOWN, &tmp, l); 679 break; 680 681 default: 682 error = EINVAL; 683 } 684 685 out: 686 FILE_UNUSE(fp, l); 687 return (error); 688 } 689 690 void 691 fdremove(struct filedesc *fdp, int fd) 692 { 693 694 rw_enter(&fdp->fd_lock, RW_WRITER); 695 fdp->fd_ofiles[fd] = NULL; 696 fd_unused(fdp, fd); 697 rw_exit(&fdp->fd_lock); 698 } 699 700 int 701 fdrelease(struct lwp *l, int fd) 702 { 703 struct proc *p = l->l_proc; 704 struct filedesc *fdp; 705 struct file **fpp, *fp; 706 707 fdp = p->p_fd; 708 rw_enter(&fdp->fd_lock, RW_WRITER); 709 if (fd < 0 || fd > fdp->fd_lastfile) 710 goto badf; 711 fpp = &fdp->fd_ofiles[fd]; 712 fp = *fpp; 713 if (fp == NULL) 714 goto badf; 715 716 mutex_enter(&fp->f_lock); 717 if (!FILE_IS_USABLE(fp)) { 718 mutex_exit(&fp->f_lock); 719 goto badf; 720 } 721 722 FILE_USE(fp); 723 724 *fpp = NULL; 725 fdp->fd_ofileflags[fd] = 0; 726 fd_unused(fdp, fd); 727 rw_exit(&fdp->fd_lock); 728 if (fd < fdp->fd_knlistsize) 729 knote_fdclose(l, fd); 730 return (closef(fp, l)); 731 732 badf: 733 rw_exit(&fdp->fd_lock); 734 return (EBADF); 735 } 736 737 /* 738 * Close a file descriptor. 739 */ 740 /* ARGSUSED */ 741 int 742 sys_close(struct lwp *l, void *v, register_t *retval) 743 { 744 struct sys_close_args /* { 745 syscallarg(int) fd; 746 } */ *uap = v; 747 int fd; 748 struct filedesc *fdp; 749 struct proc *p; 750 751 p = l->l_proc; 752 fd = SCARG(uap, fd); 753 fdp = p->p_fd; 754 755 #if 0 756 if (fd_getfile(fdp, fd) == NULL) 757 return (EBADF); 758 #endif 759 760 return (fdrelease(l, fd)); 761 } 762 763 /* 764 * Return status information about a file descriptor. 765 * Common function for compat code. 766 */ 767 int 768 do_sys_fstat(struct lwp *l, int fd, struct stat *sb) 769 { 770 struct file *fp; 771 int error; 772 773 fp = fd_getfile(l->l_proc->p_fd, fd); 774 if (fp == NULL) 775 return EBADF; 776 777 FILE_USE(fp); 778 error = (*fp->f_ops->fo_stat)(fp, sb, l); 779 FILE_UNUSE(fp, l); 780 781 return error; 782 } 783 784 /* 785 * Return status information about a file descriptor. 786 */ 787 /* ARGSUSED */ 788 int 789 sys___fstat30(struct lwp *l, void *v, register_t *retval) 790 { 791 struct sys___fstat30_args /* { 792 syscallarg(int) fd; 793 syscallarg(struct stat *) sb; 794 } */ *uap = v; 795 struct stat sb; 796 int error; 797 798 error = do_sys_fstat(l, SCARG(uap, fd), &sb); 799 800 if (error == 0) 801 error = copyout(&sb, SCARG(uap, sb), sizeof(sb)); 802 803 return (error); 804 } 805 806 /* 807 * Return pathconf information about a file descriptor. 808 */ 809 /* ARGSUSED */ 810 int 811 sys_fpathconf(struct lwp *l, void *v, register_t *retval) 812 { 813 struct sys_fpathconf_args /* { 814 syscallarg(int) fd; 815 syscallarg(int) name; 816 } */ *uap = v; 817 int fd; 818 struct filedesc *fdp; 819 struct file *fp; 820 struct proc *p; 821 struct vnode *vp; 822 int error; 823 824 p = l->l_proc; 825 fd = SCARG(uap, fd); 826 fdp = p->p_fd; 827 error = 0; 828 829 if ((fp = fd_getfile(fdp, fd)) == NULL) 830 return (EBADF); 831 832 FILE_USE(fp); 833 834 switch (fp->f_type) { 835 836 case DTYPE_SOCKET: 837 case DTYPE_PIPE: 838 if (SCARG(uap, name) != _PC_PIPE_BUF) 839 error = EINVAL; 840 else 841 *retval = PIPE_BUF; 842 break; 843 844 case DTYPE_VNODE: 845 vp = (struct vnode *)fp->f_data; 846 error = VOP_PATHCONF(vp, SCARG(uap, name), retval); 847 break; 848 849 case DTYPE_KQUEUE: 850 error = EINVAL; 851 break; 852 853 default: 854 error = EOPNOTSUPP; 855 break; 856 } 857 858 FILE_UNUSE(fp, l); 859 return (error); 860 } 861 862 /* 863 * Allocate a file descriptor for the process. 864 */ 865 int fdexpanded; /* XXX: what else uses this? */ 866 867 int 868 fdalloc(struct proc *p, int want, int *result) 869 { 870 struct filedesc *fdp; 871 int i, lim, last, error; 872 u_int off, new; 873 874 fdp = p->p_fd; 875 rw_enter(&fdp->fd_lock, RW_WRITER); 876 877 /* 878 * Search for a free descriptor starting at the higher 879 * of want or fd_freefile. If that fails, consider 880 * expanding the ofile array. 881 */ 882 lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfiles); 883 last = min(fdp->fd_nfiles, lim); 884 again: 885 if ((i = want) < fdp->fd_freefile) 886 i = fdp->fd_freefile; 887 off = i >> NDENTRYSHIFT; 888 new = find_next_zero(fdp->fd_himap, off, 889 (last + NDENTRIES - 1) >> NDENTRYSHIFT); 890 if (new != -1) { 891 i = find_next_zero(&fdp->fd_lomap[new], 892 new > off ? 0 : i & NDENTRYMASK, NDENTRIES); 893 if (i == -1) { 894 /* 895 * free file descriptor in this block was 896 * below want, try again with higher want. 897 */ 898 want = (new + 1) << NDENTRYSHIFT; 899 goto again; 900 } 901 i += (new << NDENTRYSHIFT); 902 if (i < last) { 903 if (fdp->fd_ofiles[i] == NULL) { 904 fd_used(fdp, i); 905 if (want <= fdp->fd_freefile) 906 fdp->fd_freefile = i; 907 *result = i; 908 error = 0; 909 goto out; 910 } 911 } 912 } 913 914 /* No space in current array. Expand or let the caller do it. */ 915 error = (fdp->fd_nfiles >= lim) ? EMFILE : ENOSPC; 916 917 out: 918 rw_exit(&fdp->fd_lock); 919 return (error); 920 } 921 922 void 923 fdexpand(struct proc *p) 924 { 925 struct filedesc *fdp; 926 int i, numfiles, oldnfiles; 927 struct file **newofile; 928 char *newofileflags; 929 uint32_t *newhimap = NULL, *newlomap = NULL; 930 931 fdp = p->p_fd; 932 933 restart: 934 oldnfiles = fdp->fd_nfiles; 935 936 if (oldnfiles < NDEXTENT) 937 numfiles = NDEXTENT; 938 else 939 numfiles = 2 * oldnfiles; 940 941 newofile = malloc(numfiles * OFILESIZE, M_FILEDESC, M_WAITOK); 942 if (NDHISLOTS(numfiles) > NDHISLOTS(oldnfiles)) { 943 newhimap = malloc(NDHISLOTS(numfiles) * sizeof(uint32_t), 944 M_FILEDESC, M_WAITOK); 945 newlomap = malloc(NDLOSLOTS(numfiles) * sizeof(uint32_t), 946 M_FILEDESC, M_WAITOK); 947 } 948 949 rw_enter(&fdp->fd_lock, RW_WRITER); 950 /* lock fdp */ 951 if (fdp->fd_nfiles != oldnfiles) { 952 /* fdp changed; retry */ 953 rw_exit(&fdp->fd_lock); 954 free(newofile, M_FILEDESC); 955 if (newhimap != NULL) free(newhimap, M_FILEDESC); 956 if (newlomap != NULL) free(newlomap, M_FILEDESC); 957 goto restart; 958 } 959 960 newofileflags = (char *) &newofile[numfiles]; 961 /* 962 * Copy the existing ofile and ofileflags arrays 963 * and zero the new portion of each array. 964 */ 965 memcpy(newofile, fdp->fd_ofiles, 966 (i = sizeof(struct file *) * fdp->fd_nfiles)); 967 memset((char *)newofile + i, 0, 968 numfiles * sizeof(struct file *) - i); 969 memcpy(newofileflags, fdp->fd_ofileflags, 970 (i = sizeof(char) * fdp->fd_nfiles)); 971 memset(newofileflags + i, 0, numfiles * sizeof(char) - i); 972 if (oldnfiles > NDFILE) 973 free(fdp->fd_ofiles, M_FILEDESC); 974 975 if (NDHISLOTS(numfiles) > NDHISLOTS(oldnfiles)) { 976 memcpy(newhimap, fdp->fd_himap, 977 (i = NDHISLOTS(oldnfiles) * sizeof(uint32_t))); 978 memset((char *)newhimap + i, 0, 979 NDHISLOTS(numfiles) * sizeof(uint32_t) - i); 980 981 memcpy(newlomap, fdp->fd_lomap, 982 (i = NDLOSLOTS(oldnfiles) * sizeof(uint32_t))); 983 memset((char *)newlomap + i, 0, 984 NDLOSLOTS(numfiles) * sizeof(uint32_t) - i); 985 986 if (NDHISLOTS(oldnfiles) > NDHISLOTS(NDFILE)) { 987 free(fdp->fd_himap, M_FILEDESC); 988 free(fdp->fd_lomap, M_FILEDESC); 989 } 990 fdp->fd_himap = newhimap; 991 fdp->fd_lomap = newlomap; 992 } 993 994 fdp->fd_ofiles = newofile; 995 fdp->fd_ofileflags = newofileflags; 996 fdp->fd_nfiles = numfiles; 997 998 rw_exit(&fdp->fd_lock); 999 1000 fdexpanded++; 1001 } 1002 1003 /* 1004 * Create a new open file structure and allocate 1005 * a file descriptor for the process that refers to it. 1006 */ 1007 int 1008 falloc(struct lwp *l, struct file **resultfp, int *resultfd) 1009 { 1010 struct filedesc *fdp; 1011 struct file *fp, *fq; 1012 struct proc *p; 1013 int error, i; 1014 1015 p = l->l_proc; 1016 fdp = p->p_fd; 1017 1018 restart: 1019 if ((error = fdalloc(p, 0, &i)) != 0) { 1020 if (error == ENOSPC) { 1021 fdexpand(p); 1022 goto restart; 1023 } 1024 return (error); 1025 } 1026 1027 fp = pool_cache_get(file_cache, PR_WAITOK); 1028 memset(fp, 0, sizeof(struct file)); 1029 mutex_init(&fp->f_lock, MUTEX_DEFAULT, IPL_NONE); 1030 mutex_enter(&filelist_lock); 1031 if (nfiles >= maxfiles) { 1032 tablefull("file", "increase kern.maxfiles or MAXFILES"); 1033 mutex_exit(&filelist_lock); 1034 rw_enter(&fdp->fd_lock, RW_WRITER); 1035 fd_unused(fdp, i); 1036 rw_exit(&fdp->fd_lock); 1037 mutex_destroy(&fp->f_lock); 1038 pool_cache_put(file_cache, fp); 1039 return (ENFILE); 1040 } 1041 /* 1042 * Allocate a new file descriptor. 1043 * If the process has file descriptor zero open, add to the list 1044 * of open files at that point, otherwise put it at the front of 1045 * the list of open files. 1046 */ 1047 nfiles++; 1048 fp->f_iflags = FIF_LARVAL; 1049 cv_init(&fp->f_cv, "closef"); 1050 rw_enter(&fdp->fd_lock, RW_WRITER); /* XXXAD check order */ 1051 if ((fq = fdp->fd_ofiles[0]) != NULL) { 1052 LIST_INSERT_AFTER(fq, fp, f_list); 1053 } else { 1054 LIST_INSERT_HEAD(&filehead, fp, f_list); 1055 } 1056 KDASSERT(fdp->fd_ofiles[i] == NULL); 1057 fdp->fd_ofiles[i] = fp; 1058 fp->f_count = 1; 1059 fp->f_cred = l->l_cred; 1060 kauth_cred_hold(fp->f_cred); 1061 if (resultfp) { 1062 fp->f_usecount = 1; 1063 *resultfp = fp; 1064 } 1065 mutex_exit(&filelist_lock); 1066 rw_exit(&fdp->fd_lock); 1067 if (resultfd) 1068 *resultfd = i; 1069 1070 return (0); 1071 } 1072 1073 /* 1074 * Free a file descriptor. 1075 */ 1076 void 1077 ffree(struct file *fp) 1078 { 1079 kauth_cred_t cred; 1080 1081 #ifdef DIAGNOSTIC 1082 if (fp->f_usecount) 1083 panic("ffree"); 1084 #endif 1085 1086 mutex_enter(&filelist_lock); 1087 LIST_REMOVE(fp, f_list); 1088 cred = fp->f_cred; 1089 #ifdef DIAGNOSTIC 1090 fp->f_cred = NULL; 1091 fp->f_count = 0; /* What's the point? */ 1092 #endif 1093 nfiles--; 1094 mutex_exit(&filelist_lock); 1095 mutex_destroy(&fp->f_lock); 1096 cv_destroy(&fp->f_cv); 1097 pool_cache_put(file_cache, fp); 1098 kauth_cred_free(cred); 1099 } 1100 1101 /* 1102 * Create an initial cwdinfo structure, using the same current and root 1103 * directories as p. 1104 */ 1105 struct cwdinfo * 1106 cwdinit(struct proc *p) 1107 { 1108 struct cwdinfo *cwdi; 1109 struct cwdinfo *copy; 1110 1111 cwdi = pool_cache_get(cwdi_cache, PR_WAITOK); 1112 copy = p->p_cwdi; 1113 1114 rw_enter(©->cwdi_lock, RW_READER); 1115 cwdi->cwdi_cdir = p->p_cwdi->cwdi_cdir; 1116 if (cwdi->cwdi_cdir) 1117 VREF(cwdi->cwdi_cdir); 1118 cwdi->cwdi_rdir = p->p_cwdi->cwdi_rdir; 1119 if (cwdi->cwdi_rdir) 1120 VREF(cwdi->cwdi_rdir); 1121 cwdi->cwdi_edir = p->p_cwdi->cwdi_edir; 1122 if (cwdi->cwdi_edir) 1123 VREF(cwdi->cwdi_edir); 1124 cwdi->cwdi_cmask = p->p_cwdi->cwdi_cmask; 1125 cwdi->cwdi_refcnt = 1; 1126 rw_exit(©->cwdi_lock); 1127 1128 return (cwdi); 1129 } 1130 1131 static int 1132 cwdi_ctor(void *arg, void *obj, int flags) 1133 { 1134 struct cwdinfo *cwdi; 1135 1136 cwdi = obj; 1137 rw_init(&cwdi->cwdi_lock); 1138 1139 return 0; 1140 } 1141 1142 static void 1143 cwdi_dtor(void *arg, void *obj) 1144 { 1145 struct cwdinfo *cwdi; 1146 1147 cwdi = obj; 1148 rw_destroy(&cwdi->cwdi_lock); 1149 } 1150 1151 /* 1152 * Make p2 share p1's cwdinfo. 1153 */ 1154 void 1155 cwdshare(struct proc *p1, struct proc *p2) 1156 { 1157 struct cwdinfo *cwdi = p1->p_cwdi; 1158 1159 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 1160 cwdi->cwdi_refcnt++; 1161 rw_exit(&cwdi->cwdi_lock); 1162 p2->p_cwdi = cwdi; 1163 } 1164 1165 /* 1166 * Make this process not share its cwdinfo structure, maintaining 1167 * all cwdinfo state. 1168 */ 1169 void 1170 cwdunshare(struct proc *p) 1171 { 1172 struct cwdinfo *oldcwdi, *newcwdi; 1173 1174 if (p->p_cwdi->cwdi_refcnt == 1) 1175 return; 1176 1177 newcwdi = cwdinit(p); 1178 oldcwdi = p->p_cwdi; 1179 p->p_cwdi = newcwdi; 1180 cwdfree(oldcwdi); 1181 } 1182 1183 /* 1184 * Release a cwdinfo structure. 1185 */ 1186 void 1187 cwdfree(struct cwdinfo *cwdi) 1188 { 1189 int n; 1190 1191 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 1192 n = --cwdi->cwdi_refcnt; 1193 rw_exit(&cwdi->cwdi_lock); 1194 if (n > 0) 1195 return; 1196 1197 vrele(cwdi->cwdi_cdir); 1198 if (cwdi->cwdi_rdir) 1199 vrele(cwdi->cwdi_rdir); 1200 if (cwdi->cwdi_edir) 1201 vrele(cwdi->cwdi_edir); 1202 pool_cache_put(cwdi_cache, cwdi); 1203 } 1204 1205 /* 1206 * Create an initial filedesc structure, using the same current and root 1207 * directories as p. 1208 */ 1209 struct filedesc * 1210 fdinit(struct proc *p) 1211 { 1212 struct filedesc0 *newfdp; 1213 1214 newfdp = pool_cache_get(filedesc0_cache, PR_WAITOK); 1215 memset(newfdp, 0, sizeof(struct filedesc0)); 1216 1217 fdinit1(newfdp); 1218 1219 return (&newfdp->fd_fd); 1220 } 1221 1222 /* 1223 * Initialize a file descriptor table. 1224 */ 1225 void 1226 fdinit1(struct filedesc0 *newfdp) 1227 { 1228 1229 newfdp->fd_fd.fd_refcnt = 1; 1230 newfdp->fd_fd.fd_ofiles = newfdp->fd_dfiles; 1231 newfdp->fd_fd.fd_ofileflags = newfdp->fd_dfileflags; 1232 newfdp->fd_fd.fd_nfiles = NDFILE; 1233 newfdp->fd_fd.fd_knlistsize = -1; 1234 newfdp->fd_fd.fd_himap = newfdp->fd_dhimap; 1235 newfdp->fd_fd.fd_lomap = newfdp->fd_dlomap; 1236 newfdp->fd_fd.fd_lastfile = -1; 1237 rw_init(&newfdp->fd_fd.fd_lock); 1238 } 1239 1240 /* 1241 * Make p2 share p1's filedesc structure. 1242 */ 1243 void 1244 fdshare(struct proc *p1, struct proc *p2) 1245 { 1246 struct filedesc *fdp = p1->p_fd; 1247 1248 rw_enter(&fdp->fd_lock, RW_WRITER); 1249 p2->p_fd = fdp; 1250 fdp->fd_refcnt++; 1251 rw_exit(&fdp->fd_lock); 1252 } 1253 1254 /* 1255 * Make this process not share its filedesc structure, maintaining 1256 * all file descriptor state. 1257 */ 1258 void 1259 fdunshare(struct lwp *l) 1260 { 1261 struct proc *p = l->l_proc; 1262 struct filedesc *newfd; 1263 1264 if (p->p_fd->fd_refcnt == 1) 1265 return; 1266 1267 newfd = fdcopy(p); 1268 fdfree(l); 1269 p->p_fd = newfd; 1270 } 1271 1272 /* 1273 * Clear a process's fd table. 1274 */ 1275 void 1276 fdclear(struct lwp *l) 1277 { 1278 struct proc *p = l->l_proc; 1279 struct filedesc *newfd; 1280 1281 newfd = fdinit(p); 1282 fdfree(l); 1283 p->p_fd = newfd; 1284 } 1285 1286 /* 1287 * Copy a filedesc structure. 1288 */ 1289 struct filedesc * 1290 fdcopy(struct proc *p) 1291 { 1292 struct filedesc *newfdp, *fdp; 1293 struct file **fpp, **nfpp; 1294 int i, numfiles, lastfile; 1295 1296 fdp = p->p_fd; 1297 newfdp = pool_cache_get(filedesc0_cache, PR_WAITOK); 1298 newfdp->fd_refcnt = 1; 1299 rw_init(&newfdp->fd_lock); 1300 1301 restart: 1302 numfiles = fdp->fd_nfiles; 1303 lastfile = fdp->fd_lastfile; 1304 1305 /* 1306 * If the number of open files fits in the internal arrays 1307 * of the open file structure, use them, otherwise allocate 1308 * additional memory for the number of descriptors currently 1309 * in use. 1310 */ 1311 if (lastfile < NDFILE) { 1312 i = NDFILE; 1313 } else { 1314 /* 1315 * Compute the smallest multiple of NDEXTENT needed 1316 * for the file descriptors currently in use, 1317 * allowing the table to shrink. 1318 */ 1319 i = numfiles; 1320 while (i >= 2 * NDEXTENT && i > lastfile * 2) 1321 i /= 2; 1322 newfdp->fd_ofiles = malloc(i * OFILESIZE, M_FILEDESC, M_WAITOK); 1323 } 1324 if (NDHISLOTS(i) > NDHISLOTS(NDFILE)) { 1325 newfdp->fd_himap = malloc(NDHISLOTS(i) * sizeof(uint32_t), 1326 M_FILEDESC, M_WAITOK); 1327 newfdp->fd_lomap = malloc(NDLOSLOTS(i) * sizeof(uint32_t), 1328 M_FILEDESC, M_WAITOK); 1329 } 1330 1331 rw_enter(&fdp->fd_lock, RW_READER); 1332 if (numfiles != fdp->fd_nfiles || lastfile != fdp->fd_lastfile) { 1333 rw_exit(&fdp->fd_lock); 1334 if (i > NDFILE) 1335 free(newfdp->fd_ofiles, M_FILEDESC); 1336 if (NDHISLOTS(i) > NDHISLOTS(NDFILE)) { 1337 free(newfdp->fd_himap, M_FILEDESC); 1338 free(newfdp->fd_lomap, M_FILEDESC); 1339 } 1340 goto restart; 1341 } 1342 1343 if (lastfile < NDFILE) { 1344 newfdp->fd_ofiles = ((struct filedesc0 *) newfdp)->fd_dfiles; 1345 newfdp->fd_ofileflags = 1346 ((struct filedesc0 *) newfdp)->fd_dfileflags; 1347 } else { 1348 newfdp->fd_ofileflags = (char *) &newfdp->fd_ofiles[i]; 1349 } 1350 if (NDHISLOTS(i) <= NDHISLOTS(NDFILE)) { 1351 newfdp->fd_himap = 1352 ((struct filedesc0 *) newfdp)->fd_dhimap; 1353 newfdp->fd_lomap = 1354 ((struct filedesc0 *) newfdp)->fd_dlomap; 1355 } 1356 1357 newfdp->fd_nfiles = i; 1358 newfdp->fd_lastfile = lastfile; 1359 newfdp->fd_freefile = fdp->fd_freefile; 1360 1361 /* Clear the entries that will not be copied over. 1362 * Avoid calling memset with 0 size (i.e. when 1363 * lastfile == i-1 */ 1364 if (lastfile < (i-1)) 1365 memset(newfdp->fd_ofiles + lastfile + 1, 0, 1366 (i - lastfile - 1) * sizeof(struct file **)); 1367 memcpy(newfdp->fd_ofileflags, fdp->fd_ofileflags, i * sizeof(char)); 1368 if (i < NDENTRIES * NDENTRIES) 1369 i = NDENTRIES * NDENTRIES; /* size of inlined bitmaps */ 1370 memcpy(newfdp->fd_himap, fdp->fd_himap, NDHISLOTS(i)*sizeof(uint32_t)); 1371 memcpy(newfdp->fd_lomap, fdp->fd_lomap, NDLOSLOTS(i)*sizeof(uint32_t)); 1372 1373 fpp = fdp->fd_ofiles; 1374 nfpp = newfdp->fd_ofiles; 1375 for (i = 0; i <= lastfile; i++, fpp++, nfpp++) { 1376 if ((*nfpp = *fpp) == NULL) 1377 continue; 1378 1379 if ((*fpp)->f_type == DTYPE_KQUEUE) 1380 /* kq descriptors cannot be copied. */ 1381 fdremove(newfdp, i); 1382 else { 1383 mutex_enter(&(*fpp)->f_lock); 1384 (*fpp)->f_count++; 1385 mutex_exit(&(*fpp)->f_lock); 1386 } 1387 } 1388 1389 rw_exit(&fdp->fd_lock); 1390 1391 newfdp->fd_knlist = NULL; 1392 newfdp->fd_knlistsize = -1; 1393 newfdp->fd_knhash = NULL; 1394 newfdp->fd_knhashmask = 0; 1395 1396 return (newfdp); 1397 } 1398 1399 /* 1400 * Release a filedesc structure. 1401 */ 1402 void 1403 fdfree(struct lwp *l) 1404 { 1405 struct proc *p = l->l_proc; 1406 struct filedesc *fdp; 1407 struct file **fpp, *fp; 1408 int i; 1409 1410 fdp = p->p_fd; 1411 rw_enter(&fdp->fd_lock, RW_WRITER); 1412 i = --fdp->fd_refcnt; 1413 rw_exit(&fdp->fd_lock); 1414 if (i > 0) 1415 return; 1416 1417 rw_destroy(&fdp->fd_lock); 1418 fpp = fdp->fd_ofiles; 1419 for (i = fdp->fd_lastfile; i >= 0; i--, fpp++) { 1420 fp = *fpp; 1421 if (fp != NULL) { 1422 *fpp = NULL; 1423 mutex_enter(&fp->f_lock); 1424 FILE_USE(fp); 1425 if ((fdp->fd_lastfile - i) < fdp->fd_knlistsize) 1426 knote_fdclose(l, fdp->fd_lastfile - i); 1427 (void) closef(fp, l); 1428 } 1429 } 1430 p->p_fd = NULL; 1431 if (fdp->fd_nfiles > NDFILE) 1432 free(fdp->fd_ofiles, M_FILEDESC); 1433 if (NDHISLOTS(fdp->fd_nfiles) > NDHISLOTS(NDFILE)) { 1434 free(fdp->fd_himap, M_FILEDESC); 1435 free(fdp->fd_lomap, M_FILEDESC); 1436 } 1437 if (fdp->fd_knlist) 1438 free(fdp->fd_knlist, M_KEVENT); 1439 if (fdp->fd_knhash) 1440 hashdone(fdp->fd_knhash, M_KEVENT); 1441 pool_cache_put(filedesc0_cache, fdp); 1442 } 1443 1444 /* 1445 * Internal form of close. 1446 * Decrement reference count on file structure. 1447 * Note: p may be NULL when closing a file 1448 * that was being passed in a message. 1449 * 1450 * Note: we expect the caller is holding a usecount, and expects us 1451 * to drop it (the caller thinks the file is going away forever). 1452 */ 1453 int 1454 closef(struct file *fp, struct lwp *l) 1455 { 1456 struct proc *p = l ? l->l_proc : NULL; 1457 struct vnode *vp; 1458 struct flock lf; 1459 int error; 1460 1461 if (fp == NULL) 1462 return (0); 1463 1464 /* 1465 * POSIX record locking dictates that any close releases ALL 1466 * locks owned by this process. This is handled by setting 1467 * a flag in the unlock to free ONLY locks obeying POSIX 1468 * semantics, and not to free BSD-style file locks. 1469 * If the descriptor was in a message, POSIX-style locks 1470 * aren't passed with the descriptor. 1471 */ 1472 if (p && (p->p_flag & PK_ADVLOCK) && fp->f_type == DTYPE_VNODE) { 1473 lf.l_whence = SEEK_SET; 1474 lf.l_start = 0; 1475 lf.l_len = 0; 1476 lf.l_type = F_UNLCK; 1477 vp = (struct vnode *)fp->f_data; 1478 (void) VOP_ADVLOCK(vp, p, F_UNLCK, &lf, F_POSIX); 1479 } 1480 1481 /* 1482 * If WANTCLOSE is set, then the reference count on the file 1483 * is 0, but there were multiple users of the file. This can 1484 * happen if a filedesc structure is shared by multiple 1485 * processes. 1486 */ 1487 mutex_enter(&fp->f_lock); 1488 if (fp->f_iflags & FIF_WANTCLOSE) { 1489 /* 1490 * Another user of the file is already closing, and is 1491 * simply waiting for other users of the file to drain. 1492 * Release our usecount, and wake up the closer if it 1493 * is the only remaining use. 1494 */ 1495 #ifdef DIAGNOSTIC 1496 if (fp->f_count != 0) 1497 panic("closef: wantclose and count != 0"); 1498 if (fp->f_usecount < 2) 1499 panic("closef: wantclose and usecount < 2"); 1500 #endif 1501 if (--fp->f_usecount == 1) 1502 cv_broadcast(&fp->f_cv); 1503 mutex_exit(&fp->f_lock); 1504 return (0); 1505 } else { 1506 /* 1507 * Decrement the reference count. If we were not the 1508 * last reference, then release our use and just 1509 * return. 1510 */ 1511 if (--fp->f_count > 0) { 1512 #ifdef DIAGNOSTIC 1513 if (fp->f_usecount < 1) 1514 panic("closef: no wantclose and usecount < 1"); 1515 #endif 1516 fp->f_usecount--; 1517 mutex_exit(&fp->f_lock); 1518 return (0); 1519 } 1520 } 1521 1522 /* 1523 * The reference count is now 0. However, there may be 1524 * multiple potential users of this file. This can happen 1525 * if multiple processes shared a single filedesc structure. 1526 * 1527 * Notify these potential users that the file is closing. 1528 * This will prevent them from adding additional uses to 1529 * the file. 1530 */ 1531 fp->f_iflags |= FIF_WANTCLOSE; 1532 1533 /* 1534 * We expect the caller to add a use to the file. So, if we 1535 * are the last user, usecount will be 1. If it is not, we 1536 * must wait for the usecount to drain. When it drains back 1537 * to 1, we will be awakened so that we may proceed with the 1538 * close. 1539 */ 1540 #ifdef DIAGNOSTIC 1541 if (fp->f_usecount < 1) 1542 panic("closef: usecount < 1"); 1543 #endif 1544 while (fp->f_usecount > 1) 1545 cv_wait(&fp->f_cv, &fp->f_lock); 1546 #ifdef DIAGNOSTIC 1547 if (fp->f_usecount != 1) 1548 panic("closef: usecount != 1"); 1549 #endif 1550 1551 mutex_exit(&fp->f_lock); 1552 if ((fp->f_flag & FHASLOCK) && fp->f_type == DTYPE_VNODE) { 1553 lf.l_whence = SEEK_SET; 1554 lf.l_start = 0; 1555 lf.l_len = 0; 1556 lf.l_type = F_UNLCK; 1557 vp = (struct vnode *)fp->f_data; 1558 (void) VOP_ADVLOCK(vp, fp, F_UNLCK, &lf, F_FLOCK); 1559 } 1560 if (fp->f_ops) 1561 error = (*fp->f_ops->fo_close)(fp, l); 1562 else 1563 error = 0; 1564 1565 /* Nothing references the file now, drop the final use (us). */ 1566 fp->f_usecount--; 1567 1568 ffree(fp); 1569 return (error); 1570 } 1571 1572 /* 1573 * Apply an advisory lock on a file descriptor. 1574 * 1575 * Just attempt to get a record lock of the requested type on 1576 * the entire file (l_whence = SEEK_SET, l_start = 0, l_len = 0). 1577 */ 1578 /* ARGSUSED */ 1579 int 1580 sys_flock(struct lwp *l, void *v, register_t *retval) 1581 { 1582 struct sys_flock_args /* { 1583 syscallarg(int) fd; 1584 syscallarg(int) how; 1585 } */ *uap = v; 1586 int fd, how, error; 1587 struct proc *p; 1588 struct filedesc *fdp; 1589 struct file *fp; 1590 struct vnode *vp; 1591 struct flock lf; 1592 1593 p = l->l_proc; 1594 fd = SCARG(uap, fd); 1595 how = SCARG(uap, how); 1596 fdp = p->p_fd; 1597 error = 0; 1598 1599 if ((fp = fd_getfile(fdp, fd)) == NULL) 1600 return (EBADF); 1601 1602 FILE_USE(fp); 1603 1604 if (fp->f_type != DTYPE_VNODE) { 1605 error = EOPNOTSUPP; 1606 goto out; 1607 } 1608 1609 vp = (struct vnode *)fp->f_data; 1610 lf.l_whence = SEEK_SET; 1611 lf.l_start = 0; 1612 lf.l_len = 0; 1613 if (how & LOCK_UN) { 1614 lf.l_type = F_UNLCK; 1615 fp->f_flag &= ~FHASLOCK; 1616 error = VOP_ADVLOCK(vp, fp, F_UNLCK, &lf, F_FLOCK); 1617 goto out; 1618 } 1619 if (how & LOCK_EX) 1620 lf.l_type = F_WRLCK; 1621 else if (how & LOCK_SH) 1622 lf.l_type = F_RDLCK; 1623 else { 1624 error = EINVAL; 1625 goto out; 1626 } 1627 fp->f_flag |= FHASLOCK; 1628 if (how & LOCK_NB) 1629 error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, F_FLOCK); 1630 else 1631 error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, 1632 F_FLOCK|F_WAIT); 1633 out: 1634 FILE_UNUSE(fp, l); 1635 return (error); 1636 } 1637 1638 /* ARGSUSED */ 1639 int 1640 sys_posix_fadvise(struct lwp *l, void *v, register_t *retval) 1641 { 1642 const struct sys_posix_fadvise_args /* { 1643 syscallarg(int) fd; 1644 syscallarg(off_t) offset; 1645 syscallarg(off_t) len; 1646 syscallarg(int) advice; 1647 } */ *uap = v; 1648 const int fd = SCARG(uap, fd); 1649 const int advice = SCARG(uap, advice); 1650 struct proc *p = l->l_proc; 1651 struct file *fp; 1652 int error = 0; 1653 1654 fp = fd_getfile(p->p_fd, fd); 1655 if (fp == NULL) { 1656 error = EBADF; 1657 goto out; 1658 } 1659 FILE_USE(fp); 1660 1661 if (fp->f_type != DTYPE_VNODE) { 1662 if (fp->f_type == DTYPE_PIPE || fp->f_type == DTYPE_SOCKET) { 1663 error = ESPIPE; 1664 } else { 1665 error = EOPNOTSUPP; 1666 } 1667 goto out; 1668 } 1669 1670 switch (advice) { 1671 case POSIX_FADV_NORMAL: 1672 case POSIX_FADV_RANDOM: 1673 case POSIX_FADV_SEQUENTIAL: 1674 KASSERT(POSIX_FADV_NORMAL == UVM_ADV_NORMAL); 1675 KASSERT(POSIX_FADV_RANDOM == UVM_ADV_RANDOM); 1676 KASSERT(POSIX_FADV_SEQUENTIAL == UVM_ADV_SEQUENTIAL); 1677 1678 /* 1679 * we ignore offset and size. 1680 */ 1681 1682 fp->f_advice = advice; 1683 break; 1684 1685 case POSIX_FADV_WILLNEED: 1686 case POSIX_FADV_DONTNEED: 1687 case POSIX_FADV_NOREUSE: 1688 1689 /* 1690 * not implemented yet. 1691 */ 1692 1693 break; 1694 default: 1695 error = EINVAL; 1696 break; 1697 } 1698 out: 1699 if (fp != NULL) { 1700 FILE_UNUSE(fp, l); 1701 } 1702 *retval = error; 1703 return 0; 1704 } 1705 1706 /* 1707 * File Descriptor pseudo-device driver (/dev/fd/). 1708 * 1709 * Opening minor device N dup()s the file (if any) connected to file 1710 * descriptor N belonging to the calling process. Note that this driver 1711 * consists of only the ``open()'' routine, because all subsequent 1712 * references to this file will be direct to the other driver. 1713 */ 1714 /* ARGSUSED */ 1715 static int 1716 filedescopen(dev_t dev, int mode, int type, struct lwp *l) 1717 { 1718 1719 /* 1720 * XXX Kludge: set dupfd to contain the value of the 1721 * the file descriptor being sought for duplication. The error 1722 * return ensures that the vnode for this device will be released 1723 * by vn_open. Open will detect this special error and take the 1724 * actions in dupfdopen below. Other callers of vn_open or VOP_OPEN 1725 * will simply report the error. 1726 */ 1727 l->l_dupfd = minor(dev); /* XXX */ 1728 return EDUPFD; 1729 } 1730 1731 const struct cdevsw filedesc_cdevsw = { 1732 filedescopen, noclose, noread, nowrite, noioctl, 1733 nostop, notty, nopoll, nommap, nokqfilter, D_OTHER, 1734 }; 1735 1736 /* 1737 * Duplicate the specified descriptor to a free descriptor. 1738 * 1739 * 'indx' has been fdalloc'ed (and will be fdremove'ed on error) by the caller. 1740 */ 1741 int 1742 dupfdopen(struct lwp *l, int indx, int dfd, int mode, int error) 1743 { 1744 struct proc *p = l->l_proc; 1745 struct filedesc *fdp; 1746 struct file *wfp; 1747 1748 fdp = p->p_fd; 1749 1750 /* should be cleared by the caller */ 1751 KASSERT(fdp->fd_ofiles[indx] == NULL); 1752 1753 /* 1754 * If the to-be-dup'd fd number is greater than the allowed number 1755 * of file descriptors, or the fd to be dup'd has already been 1756 * closed, reject. 1757 */ 1758 1759 /* 1760 * Note, in the case of indx == dfd, fd_getfile below returns NULL. 1761 */ 1762 if ((wfp = fd_getfile(fdp, dfd)) == NULL) 1763 return (EBADF); 1764 1765 FILE_USE(wfp); 1766 1767 /* 1768 * There are two cases of interest here. 1769 * 1770 * For EDUPFD simply dup (dfd) to file descriptor 1771 * (indx) and return. 1772 * 1773 * For EMOVEFD steal away the file structure from (dfd) and 1774 * store it in (indx). (dfd) is effectively closed by 1775 * this operation. 1776 * 1777 * Any other error code is just returned. 1778 */ 1779 switch (error) { 1780 case EDUPFD: 1781 /* 1782 * Check that the mode the file is being opened for is a 1783 * subset of the mode of the existing descriptor. 1784 */ 1785 if (((mode & (FREAD|FWRITE)) | wfp->f_flag) != wfp->f_flag) { 1786 FILE_UNUSE(wfp, l); 1787 return (EACCES); 1788 } 1789 rw_enter(&fdp->fd_lock, RW_WRITER); 1790 fdp->fd_ofiles[indx] = wfp; 1791 fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd]; 1792 rw_exit(&fdp->fd_lock); 1793 mutex_enter(&wfp->f_lock); 1794 wfp->f_count++; 1795 /* 'indx' has been fd_used'ed by caller */ 1796 FILE_UNUSE_HAVELOCK(wfp, l); 1797 return (0); 1798 1799 case EMOVEFD: 1800 /* 1801 * Steal away the file pointer from dfd, and stuff it into indx. 1802 */ 1803 rw_enter(&fdp->fd_lock, RW_WRITER); 1804 fdp->fd_ofiles[indx] = fdp->fd_ofiles[dfd]; 1805 fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd]; 1806 fdp->fd_ofiles[dfd] = NULL; 1807 fdp->fd_ofileflags[dfd] = 0; 1808 /* 1809 * Complete the clean up of the filedesc structure by 1810 * recomputing the various hints. 1811 */ 1812 /* 'indx' has been fd_used'ed by caller */ 1813 fd_unused(fdp, dfd); 1814 rw_exit(&fdp->fd_lock); 1815 FILE_UNUSE(wfp, l); 1816 return (0); 1817 1818 default: 1819 FILE_UNUSE(wfp, l); 1820 return (error); 1821 } 1822 /* NOTREACHED */ 1823 } 1824 1825 /* 1826 * Close any files on exec? 1827 */ 1828 void 1829 fdcloseexec(struct lwp *l) 1830 { 1831 struct proc *p = l->l_proc; 1832 struct filedesc *fdp; 1833 int fd; 1834 1835 fdunshare(l); 1836 cwdunshare(p); 1837 1838 if (p->p_cwdi->cwdi_edir) 1839 vrele(p->p_cwdi->cwdi_edir); 1840 1841 fdp = p->p_fd; 1842 for (fd = 0; fd <= fdp->fd_lastfile; fd++) 1843 if (fdp->fd_ofileflags[fd] & UF_EXCLOSE) 1844 (void) fdrelease(l, fd); 1845 } 1846 1847 /* 1848 * It is unsafe for set[ug]id processes to be started with file 1849 * descriptors 0..2 closed, as these descriptors are given implicit 1850 * significance in the Standard C library. fdcheckstd() will create a 1851 * descriptor referencing /dev/null for each of stdin, stdout, and 1852 * stderr that is not already open. 1853 */ 1854 #define CHECK_UPTO 3 1855 int 1856 fdcheckstd(struct lwp *l) 1857 { 1858 struct proc *p; 1859 struct nameidata nd; 1860 struct filedesc *fdp; 1861 struct file *fp; 1862 struct file *devnullfp = NULL; /* Quell compiler warning */ 1863 struct proc *pp; 1864 register_t retval; 1865 int fd, i, error, flags = FREAD|FWRITE, devnull = -1; 1866 char closed[CHECK_UPTO * 3 + 1], which[3 + 1]; 1867 1868 p = l->l_proc; 1869 closed[0] = '\0'; 1870 if ((fdp = p->p_fd) == NULL) 1871 return (0); 1872 for (i = 0; i < CHECK_UPTO; i++) { 1873 if (fdp->fd_ofiles[i] != NULL) 1874 continue; 1875 snprintf(which, sizeof(which), ",%d", i); 1876 strlcat(closed, which, sizeof(closed)); 1877 if (devnullfp == NULL) { 1878 if ((error = falloc(l, &fp, &fd)) != 0) 1879 return (error); 1880 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, "/dev/null", 1881 l); 1882 if ((error = vn_open(&nd, flags, 0)) != 0) { 1883 FILE_UNUSE(fp, l); 1884 ffree(fp); 1885 fdremove(p->p_fd, fd); 1886 return (error); 1887 } 1888 fp->f_data = nd.ni_vp; 1889 fp->f_flag = flags; 1890 fp->f_ops = &vnops; 1891 fp->f_type = DTYPE_VNODE; 1892 VOP_UNLOCK(nd.ni_vp, 0); 1893 devnull = fd; 1894 devnullfp = fp; 1895 FILE_SET_MATURE(fp); 1896 } else { 1897 restart: 1898 if ((error = fdalloc(p, 0, &fd)) != 0) { 1899 if (error == ENOSPC) { 1900 fdexpand(p); 1901 goto restart; 1902 } 1903 return (error); 1904 } 1905 1906 mutex_enter(&devnullfp->f_lock); 1907 FILE_USE(devnullfp); 1908 /* finishdup() will unuse the descriptors for us */ 1909 if ((error = finishdup(l, devnull, fd, &retval)) != 0) 1910 return (error); 1911 } 1912 } 1913 if (devnullfp) 1914 FILE_UNUSE(devnullfp, l); 1915 if (closed[0] != '\0') { 1916 mutex_enter(&proclist_lock); 1917 pp = p->p_pptr; 1918 mutex_enter(&pp->p_mutex); 1919 log(LOG_WARNING, "set{u,g}id pid %d (%s) " 1920 "was invoked by uid %d ppid %d (%s) " 1921 "with fd %s closed\n", 1922 p->p_pid, p->p_comm, kauth_cred_geteuid(pp->p_cred), 1923 pp->p_pid, pp->p_comm, &closed[1]); 1924 mutex_exit(&pp->p_mutex); 1925 mutex_exit(&proclist_lock); 1926 } 1927 return (0); 1928 } 1929 #undef CHECK_UPTO 1930 1931 /* 1932 * Sets descriptor owner. If the owner is a process, 'pgid' 1933 * is set to positive value, process ID. If the owner is process group, 1934 * 'pgid' is set to -pg_id. 1935 */ 1936 int 1937 fsetown(struct proc *p, pid_t *pgid, int cmd, const void *data) 1938 { 1939 int id = *(const int *)data; 1940 int error; 1941 1942 switch (cmd) { 1943 case TIOCSPGRP: 1944 if (id < 0) 1945 return (EINVAL); 1946 id = -id; 1947 break; 1948 default: 1949 break; 1950 } 1951 1952 if (id > 0 && !pfind(id)) 1953 return (ESRCH); 1954 else if (id < 0 && (error = pgid_in_session(p, -id))) 1955 return (error); 1956 1957 *pgid = id; 1958 return (0); 1959 } 1960 1961 /* 1962 * Return descriptor owner information. If the value is positive, 1963 * it's process ID. If it's negative, it's process group ID and 1964 * needs the sign removed before use. 1965 */ 1966 int 1967 fgetown(struct proc *p, pid_t pgid, int cmd, void *data) 1968 { 1969 switch (cmd) { 1970 case TIOCGPGRP: 1971 *(int *)data = -pgid; 1972 break; 1973 default: 1974 *(int *)data = pgid; 1975 break; 1976 } 1977 return (0); 1978 } 1979 1980 /* 1981 * Send signal to descriptor owner, either process or process group. 1982 */ 1983 void 1984 fownsignal(pid_t pgid, int signo, int code, int band, void *fdescdata) 1985 { 1986 struct proc *p1; 1987 struct pgrp *pgrp; 1988 ksiginfo_t ksi; 1989 1990 KSI_INIT(&ksi); 1991 ksi.ksi_signo = signo; 1992 ksi.ksi_code = code; 1993 ksi.ksi_band = band; 1994 1995 /* 1996 * Since we may be called from an interrupt context, we must use 1997 * the proclist_mutex. 1998 */ 1999 mutex_enter(&proclist_mutex); 2000 if (pgid > 0 && (p1 = p_find(pgid, PFIND_LOCKED))) 2001 kpsignal(p1, &ksi, fdescdata); 2002 else if (pgid < 0 && (pgrp = pg_find(-pgid, PFIND_LOCKED))) 2003 kpgsignal(pgrp, &ksi, fdescdata, 0); 2004 mutex_exit(&proclist_mutex); 2005 } 2006 2007 int 2008 fdclone(struct lwp *l, struct file *fp, int fd, int flag, 2009 const struct fileops *fops, void *data) 2010 { 2011 fp->f_flag = flag; 2012 fp->f_type = DTYPE_MISC; 2013 fp->f_ops = fops; 2014 fp->f_data = data; 2015 2016 l->l_dupfd = fd; 2017 2018 FILE_SET_MATURE(fp); 2019 FILE_UNUSE(fp, l); 2020 return EMOVEFD; 2021 } 2022 2023 /* ARGSUSED */ 2024 int 2025 fnullop_fcntl(struct file *fp, u_int cmd, void *data, struct lwp *l) 2026 { 2027 2028 if (cmd == F_SETFL) 2029 return 0; 2030 2031 return EOPNOTSUPP; 2032 } 2033 2034 /* ARGSUSED */ 2035 int 2036 fnullop_poll(struct file *fp, int which, struct lwp *l) 2037 { 2038 2039 return 0; 2040 } 2041 2042 2043 /* ARGSUSED */ 2044 int 2045 fnullop_kqfilter(struct file *fp, struct knote *kn) 2046 { 2047 2048 return 0; 2049 } 2050 2051 /* ARGSUSED */ 2052 int 2053 fbadop_read(struct file *fp, off_t *offset, struct uio *uio, 2054 kauth_cred_t cred, int flags) 2055 { 2056 2057 return EOPNOTSUPP; 2058 } 2059 2060 /* ARGSUSED */ 2061 int 2062 fbadop_write(struct file *fp, off_t *offset, struct uio *uio, 2063 kauth_cred_t cred, int flags) 2064 { 2065 2066 return EOPNOTSUPP; 2067 } 2068 2069 /* ARGSUSED */ 2070 int 2071 fbadop_ioctl(struct file *fp, u_long com, void *data, struct lwp *l) 2072 { 2073 2074 return EOPNOTSUPP; 2075 } 2076 2077 /* ARGSUSED */ 2078 int 2079 fbadop_stat(struct file *fp, struct stat *sb, struct lwp *l) 2080 { 2081 2082 return EOPNOTSUPP; 2083 } 2084 2085 /* ARGSUSED */ 2086 int 2087 fbadop_close(struct file *fp, struct lwp *l) 2088 { 2089 2090 return EOPNOTSUPP; 2091 } 2092