1 /* $NetBSD: kern_descrip.c,v 1.146 2006/10/12 01:32:14 christos Exp $ */ 2 3 /* 4 * Copyright (c) 1982, 1986, 1989, 1991, 1993 5 * The Regents of the University of California. All rights reserved. 6 * (c) UNIX System Laboratories, Inc. 7 * All or some portions of this file are derived from material licensed 8 * to the University of California by American Telephone and Telegraph 9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 10 * the permission of UNIX System Laboratories, Inc. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 3. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * @(#)kern_descrip.c 8.8 (Berkeley) 2/14/95 37 */ 38 39 #include <sys/cdefs.h> 40 __KERNEL_RCSID(0, "$NetBSD: kern_descrip.c,v 1.146 2006/10/12 01:32:14 christos Exp $"); 41 42 #include <sys/param.h> 43 #include <sys/systm.h> 44 #include <sys/filedesc.h> 45 #include <sys/kernel.h> 46 #include <sys/vnode.h> 47 #include <sys/proc.h> 48 #include <sys/file.h> 49 #include <sys/namei.h> 50 #include <sys/socket.h> 51 #include <sys/socketvar.h> 52 #include <sys/stat.h> 53 #include <sys/ioctl.h> 54 #include <sys/fcntl.h> 55 #include <sys/malloc.h> 56 #include <sys/pool.h> 57 #include <sys/syslog.h> 58 #include <sys/unistd.h> 59 #include <sys/resourcevar.h> 60 #include <sys/conf.h> 61 #include <sys/event.h> 62 #include <sys/kauth.h> 63 64 #include <sys/mount.h> 65 #include <sys/sa.h> 66 #include <sys/syscallargs.h> 67 68 /* 69 * Descriptor management. 70 */ 71 struct filelist filehead; /* head of list of open files */ 72 int nfiles; /* actual number of open files */ 73 POOL_INIT(file_pool, sizeof(struct file), 0, 0, 0, "filepl", 74 &pool_allocator_nointr); 75 POOL_INIT(cwdi_pool, sizeof(struct cwdinfo), 0, 0, 0, "cwdipl", 76 &pool_allocator_nointr); 77 POOL_INIT(filedesc0_pool, sizeof(struct filedesc0), 0, 0, 0, "fdescpl", 78 &pool_allocator_nointr); 79 80 /* Global file list lock */ 81 static struct simplelock filelist_slock = SIMPLELOCK_INITIALIZER; 82 83 MALLOC_DEFINE(M_FILE, "file", "Open file structure"); 84 MALLOC_DEFINE(M_FILEDESC, "file desc", "Open file descriptor table"); 85 MALLOC_DEFINE(M_IOCTLOPS, "ioctlops", "ioctl data buffer"); 86 87 static inline int 88 find_next_zero(uint32_t *bitmap, int want, u_int bits) 89 { 90 int i, off, maxoff; 91 uint32_t sub; 92 93 if (want > bits) 94 return -1; 95 96 off = want >> NDENTRYSHIFT; 97 i = want & NDENTRYMASK; 98 if (i) { 99 sub = bitmap[off] | ((u_int)~0 >> (NDENTRIES - i)); 100 if (sub != ~0) 101 goto found; 102 off++; 103 } 104 105 maxoff = NDLOSLOTS(bits); 106 while (off < maxoff) { 107 if ((sub = bitmap[off]) != ~0) 108 goto found; 109 off++; 110 } 111 112 return (-1); 113 114 found: 115 return (off << NDENTRYSHIFT) + ffs(~sub) - 1; 116 } 117 118 static int 119 find_last_set(struct filedesc *fd, int last) 120 { 121 int off, i; 122 struct file **ofiles = fd->fd_ofiles; 123 uint32_t *bitmap = fd->fd_lomap; 124 125 off = (last - 1) >> NDENTRYSHIFT; 126 127 while (off >= 0 && !bitmap[off]) 128 off--; 129 130 if (off < 0) 131 return (-1); 132 133 i = ((off + 1) << NDENTRYSHIFT) - 1; 134 if (i >= last) 135 i = last - 1; 136 137 while (i > 0 && ofiles[i] == NULL) 138 i--; 139 140 return (i); 141 } 142 143 static inline void 144 fd_used(struct filedesc *fdp, int fd) 145 { 146 u_int off = fd >> NDENTRYSHIFT; 147 148 LOCK_ASSERT(simple_lock_held(&fdp->fd_slock)); 149 KDASSERT((fdp->fd_lomap[off] & (1 << (fd & NDENTRYMASK))) == 0); 150 151 fdp->fd_lomap[off] |= 1 << (fd & NDENTRYMASK); 152 if (fdp->fd_lomap[off] == ~0) { 153 KDASSERT((fdp->fd_himap[off >> NDENTRYSHIFT] & 154 (1 << (off & NDENTRYMASK))) == 0); 155 fdp->fd_himap[off >> NDENTRYSHIFT] |= 1 << (off & NDENTRYMASK); 156 } 157 158 if (fd > fdp->fd_lastfile) 159 fdp->fd_lastfile = fd; 160 } 161 162 static inline void 163 fd_unused(struct filedesc *fdp, int fd) 164 { 165 u_int off = fd >> NDENTRYSHIFT; 166 167 LOCK_ASSERT(simple_lock_held(&fdp->fd_slock)); 168 if (fd < fdp->fd_freefile) 169 fdp->fd_freefile = fd; 170 171 if (fdp->fd_lomap[off] == ~0) { 172 KDASSERT((fdp->fd_himap[off >> NDENTRYSHIFT] & 173 (1 << (off & NDENTRYMASK))) != 0); 174 fdp->fd_himap[off >> NDENTRYSHIFT] &= 175 ~(1 << (off & NDENTRYMASK)); 176 } 177 KDASSERT((fdp->fd_lomap[off] & (1 << (fd & NDENTRYMASK))) != 0); 178 fdp->fd_lomap[off] &= ~(1 << (fd & NDENTRYMASK)); 179 180 #ifdef DIAGNOSTIC 181 if (fd > fdp->fd_lastfile) 182 panic("fd_unused: fd_lastfile inconsistent"); 183 #endif 184 if (fd == fdp->fd_lastfile) 185 fdp->fd_lastfile = find_last_set(fdp, fd); 186 } 187 188 /* 189 * Lookup the file structure corresponding to a file descriptor 190 * and return it locked. 191 * Note: typical usage is: `fp = fd_getfile(..); FILE_USE(fp);' 192 * The locking strategy has been optimised for this case, i.e. 193 * fd_getfile() returns the file locked while FILE_USE() will increment 194 * the file's use count and unlock. 195 */ 196 struct file * 197 fd_getfile(struct filedesc *fdp, int fd) 198 { 199 struct file *fp; 200 201 if ((u_int) fd >= fdp->fd_nfiles || (fp = fdp->fd_ofiles[fd]) == NULL) 202 return (NULL); 203 204 simple_lock(&fp->f_slock); 205 if (FILE_IS_USABLE(fp) == 0) { 206 simple_unlock(&fp->f_slock); 207 return (NULL); 208 } 209 210 return (fp); 211 } 212 213 /* 214 * Common code for dup, dup2, and fcntl(F_DUPFD). 215 */ 216 static int 217 finishdup(struct lwp *l, int old, int new, register_t *retval) 218 { 219 struct filedesc *fdp; 220 struct file *fp, *delfp; 221 222 fdp = l->l_proc->p_fd; 223 224 /* 225 * If there is a file in the new slot, remember it so we 226 * can close it after we've finished the dup. We need 227 * to do it after the dup is finished, since closing 228 * the file may block. 229 * 230 * Note: `old' is already used for us. 231 * Note: Caller already marked `new' slot "used". 232 */ 233 simple_lock(&fdp->fd_slock); 234 delfp = fdp->fd_ofiles[new]; 235 236 fp = fdp->fd_ofiles[old]; 237 KDASSERT(fp != NULL); 238 fdp->fd_ofiles[new] = fp; 239 fdp->fd_ofileflags[new] = fdp->fd_ofileflags[old] &~ UF_EXCLOSE; 240 simple_unlock(&fdp->fd_slock); 241 242 *retval = new; 243 simple_lock(&fp->f_slock); 244 fp->f_count++; 245 FILE_UNUSE_HAVELOCK(fp, l); 246 247 if (delfp != NULL) { 248 simple_lock(&delfp->f_slock); 249 FILE_USE(delfp); 250 if (new < fdp->fd_knlistsize) 251 knote_fdclose(l, new); 252 (void) closef(delfp, l); 253 } 254 return (0); 255 } 256 257 /* 258 * System calls on descriptors. 259 */ 260 261 /* 262 * Duplicate a file descriptor. 263 */ 264 /* ARGSUSED */ 265 int 266 sys_dup(struct lwp *l, void *v, register_t *retval) 267 { 268 struct sys_dup_args /* { 269 syscallarg(int) fd; 270 } */ *uap = v; 271 struct file *fp; 272 struct filedesc *fdp; 273 struct proc *p; 274 int old, new, error; 275 276 p = l->l_proc; 277 fdp = p->p_fd; 278 old = SCARG(uap, fd); 279 280 restart: 281 if ((fp = fd_getfile(fdp, old)) == NULL) 282 return (EBADF); 283 284 FILE_USE(fp); 285 286 if ((error = fdalloc(p, 0, &new)) != 0) { 287 if (error == ENOSPC) { 288 fdexpand(p); 289 FILE_UNUSE(fp, l); 290 goto restart; 291 } 292 FILE_UNUSE(fp, l); 293 return (error); 294 } 295 296 /* finishdup() will unuse the descriptors for us */ 297 return (finishdup(l, old, new, retval)); 298 } 299 300 /* 301 * Duplicate a file descriptor to a particular value. 302 */ 303 /* ARGSUSED */ 304 int 305 sys_dup2(struct lwp *l, void *v, register_t *retval) 306 { 307 struct sys_dup2_args /* { 308 syscallarg(int) from; 309 syscallarg(int) to; 310 } */ *uap = v; 311 struct file *fp; 312 struct filedesc *fdp; 313 struct proc *p; 314 int old, new, i, error; 315 316 p = l->l_proc; 317 fdp = p->p_fd; 318 old = SCARG(uap, from); 319 new = SCARG(uap, to); 320 321 restart: 322 if ((fp = fd_getfile(fdp, old)) == NULL) 323 return (EBADF); 324 325 if ((u_int)new >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur || 326 (u_int)new >= maxfiles) { 327 simple_unlock(&fp->f_slock); 328 return (EBADF); 329 } 330 331 if (old == new) { 332 simple_unlock(&fp->f_slock); 333 *retval = new; 334 return (0); 335 } 336 337 FILE_USE(fp); 338 339 if (new >= fdp->fd_nfiles) { 340 if ((error = fdalloc(p, new, &i)) != 0) { 341 if (error == ENOSPC) { 342 fdexpand(p); 343 FILE_UNUSE(fp, l); 344 goto restart; 345 } 346 FILE_UNUSE(fp, l); 347 return (error); 348 } 349 if (new != i) 350 panic("dup2: fdalloc"); 351 } else { 352 simple_lock(&fdp->fd_slock); 353 /* 354 * Mark `new' slot "used" only if it was empty. 355 */ 356 if (fdp->fd_ofiles[new] == NULL) 357 fd_used(fdp, new); 358 simple_unlock(&fdp->fd_slock); 359 } 360 361 /* 362 * finishdup() will close the file that's in the `new' 363 * slot, if there's one there. 364 */ 365 366 /* finishdup() will unuse the descriptors for us */ 367 return (finishdup(l, old, new, retval)); 368 } 369 370 /* 371 * fcntl call which is being passed to the file's fs. 372 */ 373 static int 374 fcntl_forfs(int fd, struct lwp *l, int cmd, void *arg) 375 { 376 struct file *fp; 377 struct filedesc *fdp; 378 int error; 379 u_int size; 380 void *data, *memp; 381 #define STK_PARAMS 128 382 char stkbuf[STK_PARAMS]; 383 384 /* fd's value was validated in sys_fcntl before calling this routine */ 385 fdp = l->l_proc->p_fd; 386 fp = fdp->fd_ofiles[fd]; 387 388 if ((fp->f_flag & (FREAD | FWRITE)) == 0) 389 return (EBADF); 390 391 /* 392 * Interpret high order word to find amount of data to be 393 * copied to/from the user's address space. 394 */ 395 size = (size_t)F_PARAM_LEN(cmd); 396 if (size > F_PARAM_MAX) 397 return (EINVAL); 398 memp = NULL; 399 if (size > sizeof(stkbuf)) { 400 memp = malloc((u_long)size, M_IOCTLOPS, M_WAITOK); 401 data = memp; 402 } else 403 data = stkbuf; 404 if (cmd & F_FSIN) { 405 if (size) { 406 error = copyin(arg, data, size); 407 if (error) { 408 if (memp) 409 free(memp, M_IOCTLOPS); 410 return (error); 411 } 412 } else 413 *(void **)data = arg; 414 } else if ((cmd & F_FSOUT) && size) 415 /* 416 * Zero the buffer so the user always 417 * gets back something deterministic. 418 */ 419 memset(data, 0, size); 420 else if (cmd & F_FSVOID) 421 *(void **)data = arg; 422 423 424 error = (*fp->f_ops->fo_fcntl)(fp, cmd, data, l); 425 426 /* 427 * Copy any data to user, size was 428 * already set and checked above. 429 */ 430 if (error == 0 && (cmd & F_FSOUT) && size) 431 error = copyout(data, arg, size); 432 if (memp) 433 free(memp, M_IOCTLOPS); 434 return (error); 435 } 436 437 /* 438 * The file control system call. 439 */ 440 /* ARGSUSED */ 441 int 442 sys_fcntl(struct lwp *l, void *v, register_t *retval) 443 { 444 struct sys_fcntl_args /* { 445 syscallarg(int) fd; 446 syscallarg(int) cmd; 447 syscallarg(void *) arg; 448 } */ *uap = v; 449 struct filedesc *fdp; 450 struct file *fp; 451 struct proc *p; 452 struct vnode *vp; 453 int fd, i, tmp, error, flg, cmd, newmin; 454 struct flock fl; 455 456 p = l->l_proc; 457 fd = SCARG(uap, fd); 458 cmd = SCARG(uap, cmd); 459 fdp = p->p_fd; 460 error = 0; 461 flg = F_POSIX; 462 463 switch (cmd) { 464 case F_CLOSEM: 465 if (fd < 0) 466 return EBADF; 467 while (fdp->fd_lastfile >= fd) 468 fdrelease(l, fdp->fd_lastfile); 469 return 0; 470 471 case F_MAXFD: 472 *retval = fdp->fd_lastfile; 473 return 0; 474 475 default: 476 /* Handled below */ 477 break; 478 } 479 480 restart: 481 if ((fp = fd_getfile(fdp, fd)) == NULL) 482 return (EBADF); 483 484 FILE_USE(fp); 485 486 if ((cmd & F_FSCTL)) { 487 error = fcntl_forfs(fd, l, cmd, SCARG(uap, arg)); 488 goto out; 489 } 490 491 switch (cmd) { 492 493 case F_DUPFD: 494 newmin = (long)SCARG(uap, arg); 495 if ((u_int)newmin >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur || 496 (u_int)newmin >= maxfiles) { 497 error = EINVAL; 498 goto out; 499 } 500 if ((error = fdalloc(p, newmin, &i)) != 0) { 501 if (error == ENOSPC) { 502 fdexpand(p); 503 FILE_UNUSE(fp, l); 504 goto restart; 505 } 506 goto out; 507 } 508 509 /* finishdup() will unuse the descriptors for us */ 510 return (finishdup(l, fd, i, retval)); 511 512 case F_GETFD: 513 *retval = fdp->fd_ofileflags[fd] & UF_EXCLOSE ? 1 : 0; 514 break; 515 516 case F_SETFD: 517 if ((long)SCARG(uap, arg) & 1) 518 fdp->fd_ofileflags[fd] |= UF_EXCLOSE; 519 else 520 fdp->fd_ofileflags[fd] &= ~UF_EXCLOSE; 521 break; 522 523 case F_GETFL: 524 *retval = OFLAGS(fp->f_flag); 525 break; 526 527 case F_SETFL: 528 tmp = FFLAGS((long)SCARG(uap, arg)) & FCNTLFLAGS; 529 error = (*fp->f_ops->fo_fcntl)(fp, F_SETFL, &tmp, l); 530 if (error) 531 break; 532 i = tmp ^ fp->f_flag; 533 if (i & FNONBLOCK) { 534 int flgs = tmp & FNONBLOCK; 535 error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, &flgs, l); 536 if (error) 537 goto reset_fcntl; 538 } 539 if (i & FASYNC) { 540 int flgs = tmp & FASYNC; 541 error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, &flgs, l); 542 if (error) { 543 if (i & FNONBLOCK) { 544 tmp = fp->f_flag & FNONBLOCK; 545 (void)(*fp->f_ops->fo_ioctl)(fp, 546 FIONBIO, &tmp, l); 547 } 548 goto reset_fcntl; 549 } 550 } 551 fp->f_flag = (fp->f_flag & ~FCNTLFLAGS) | tmp; 552 break; 553 reset_fcntl: 554 (void)(*fp->f_ops->fo_fcntl)(fp, F_SETFL, &fp->f_flag, l); 555 break; 556 557 case F_GETOWN: 558 error = (*fp->f_ops->fo_ioctl)(fp, FIOGETOWN, &tmp, l); 559 *retval = tmp; 560 break; 561 562 case F_SETOWN: 563 tmp = (int)(intptr_t) SCARG(uap, arg); 564 error = (*fp->f_ops->fo_ioctl)(fp, FIOSETOWN, &tmp, l); 565 break; 566 567 case F_SETLKW: 568 flg |= F_WAIT; 569 /* Fall into F_SETLK */ 570 571 case F_SETLK: 572 if (fp->f_type != DTYPE_VNODE) { 573 error = EINVAL; 574 goto out; 575 } 576 vp = (struct vnode *)fp->f_data; 577 /* Copy in the lock structure */ 578 error = copyin(SCARG(uap, arg), &fl, sizeof(fl)); 579 if (error) 580 goto out; 581 if (fl.l_whence == SEEK_CUR) 582 fl.l_start += fp->f_offset; 583 switch (fl.l_type) { 584 case F_RDLCK: 585 if ((fp->f_flag & FREAD) == 0) { 586 error = EBADF; 587 goto out; 588 } 589 p->p_flag |= P_ADVLOCK; 590 error = VOP_ADVLOCK(vp, p, F_SETLK, &fl, flg); 591 goto out; 592 593 case F_WRLCK: 594 if ((fp->f_flag & FWRITE) == 0) { 595 error = EBADF; 596 goto out; 597 } 598 p->p_flag |= P_ADVLOCK; 599 error = VOP_ADVLOCK(vp, p, F_SETLK, &fl, flg); 600 goto out; 601 602 case F_UNLCK: 603 error = VOP_ADVLOCK(vp, p, F_UNLCK, &fl, F_POSIX); 604 goto out; 605 606 default: 607 error = EINVAL; 608 goto out; 609 } 610 611 case F_GETLK: 612 if (fp->f_type != DTYPE_VNODE) { 613 error = EINVAL; 614 goto out; 615 } 616 vp = (struct vnode *)fp->f_data; 617 /* Copy in the lock structure */ 618 error = copyin(SCARG(uap, arg), &fl, sizeof(fl)); 619 if (error) 620 goto out; 621 if (fl.l_whence == SEEK_CUR) 622 fl.l_start += fp->f_offset; 623 if (fl.l_type != F_RDLCK && 624 fl.l_type != F_WRLCK && 625 fl.l_type != F_UNLCK) { 626 error = EINVAL; 627 goto out; 628 } 629 error = VOP_ADVLOCK(vp, p, F_GETLK, &fl, F_POSIX); 630 if (error) 631 goto out; 632 error = copyout(&fl, SCARG(uap, arg), sizeof(fl)); 633 break; 634 635 default: 636 error = EINVAL; 637 } 638 639 out: 640 FILE_UNUSE(fp, l); 641 return (error); 642 } 643 644 void 645 fdremove(struct filedesc *fdp, int fd) 646 { 647 648 simple_lock(&fdp->fd_slock); 649 fdp->fd_ofiles[fd] = NULL; 650 fd_unused(fdp, fd); 651 simple_unlock(&fdp->fd_slock); 652 } 653 654 int 655 fdrelease(struct lwp *l, int fd) 656 { 657 struct proc *p = l->l_proc; 658 struct filedesc *fdp; 659 struct file **fpp, *fp; 660 661 fdp = p->p_fd; 662 simple_lock(&fdp->fd_slock); 663 if (fd < 0 || fd > fdp->fd_lastfile) 664 goto badf; 665 fpp = &fdp->fd_ofiles[fd]; 666 fp = *fpp; 667 if (fp == NULL) 668 goto badf; 669 670 simple_lock(&fp->f_slock); 671 if (!FILE_IS_USABLE(fp)) { 672 simple_unlock(&fp->f_slock); 673 goto badf; 674 } 675 676 FILE_USE(fp); 677 678 *fpp = NULL; 679 fdp->fd_ofileflags[fd] = 0; 680 fd_unused(fdp, fd); 681 simple_unlock(&fdp->fd_slock); 682 if (fd < fdp->fd_knlistsize) 683 knote_fdclose(l, fd); 684 return (closef(fp, l)); 685 686 badf: 687 simple_unlock(&fdp->fd_slock); 688 return (EBADF); 689 } 690 691 /* 692 * Close a file descriptor. 693 */ 694 /* ARGSUSED */ 695 int 696 sys_close(struct lwp *l, void *v, register_t *retval __unused) 697 { 698 struct sys_close_args /* { 699 syscallarg(int) fd; 700 } */ *uap = v; 701 int fd; 702 struct filedesc *fdp; 703 struct proc *p; 704 705 p = l->l_proc; 706 fd = SCARG(uap, fd); 707 fdp = p->p_fd; 708 709 #if 0 710 if (fd_getfile(fdp, fd) == NULL) 711 return (EBADF); 712 #endif 713 714 return (fdrelease(l, fd)); 715 } 716 717 /* 718 * Return status information about a file descriptor. 719 */ 720 /* ARGSUSED */ 721 int 722 sys___fstat30(struct lwp *l, void *v, register_t *retval __unused) 723 { 724 struct sys___fstat30_args /* { 725 syscallarg(int) fd; 726 syscallarg(struct stat *) sb; 727 } */ *uap = v; 728 int fd; 729 struct filedesc *fdp; 730 struct file *fp; 731 struct proc *p; 732 struct stat ub; 733 int error; 734 735 p = l->l_proc; 736 fd = SCARG(uap, fd); 737 fdp = p->p_fd; 738 739 if ((fp = fd_getfile(fdp, fd)) == NULL) 740 return (EBADF); 741 742 FILE_USE(fp); 743 error = (*fp->f_ops->fo_stat)(fp, &ub, l); 744 FILE_UNUSE(fp, l); 745 746 if (error == 0) 747 error = copyout(&ub, SCARG(uap, sb), sizeof(ub)); 748 749 return (error); 750 } 751 752 /* 753 * Return pathconf information about a file descriptor. 754 */ 755 /* ARGSUSED */ 756 int 757 sys_fpathconf(struct lwp *l, void *v, register_t *retval) 758 { 759 struct sys_fpathconf_args /* { 760 syscallarg(int) fd; 761 syscallarg(int) name; 762 } */ *uap = v; 763 int fd; 764 struct filedesc *fdp; 765 struct file *fp; 766 struct proc *p; 767 struct vnode *vp; 768 int error; 769 770 p = l->l_proc; 771 fd = SCARG(uap, fd); 772 fdp = p->p_fd; 773 error = 0; 774 775 if ((fp = fd_getfile(fdp, fd)) == NULL) 776 return (EBADF); 777 778 FILE_USE(fp); 779 780 switch (fp->f_type) { 781 782 case DTYPE_SOCKET: 783 case DTYPE_PIPE: 784 if (SCARG(uap, name) != _PC_PIPE_BUF) 785 error = EINVAL; 786 else 787 *retval = PIPE_BUF; 788 break; 789 790 case DTYPE_VNODE: 791 vp = (struct vnode *)fp->f_data; 792 error = VOP_PATHCONF(vp, SCARG(uap, name), retval); 793 break; 794 795 case DTYPE_KQUEUE: 796 error = EINVAL; 797 break; 798 799 default: 800 error = EOPNOTSUPP; 801 break; 802 } 803 804 FILE_UNUSE(fp, l); 805 return (error); 806 } 807 808 /* 809 * Allocate a file descriptor for the process. 810 */ 811 int fdexpanded; /* XXX: what else uses this? */ 812 813 int 814 fdalloc(struct proc *p, int want, int *result) 815 { 816 struct filedesc *fdp; 817 int i, lim, last, error; 818 u_int off, new; 819 820 fdp = p->p_fd; 821 simple_lock(&fdp->fd_slock); 822 823 /* 824 * Search for a free descriptor starting at the higher 825 * of want or fd_freefile. If that fails, consider 826 * expanding the ofile array. 827 */ 828 lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfiles); 829 last = min(fdp->fd_nfiles, lim); 830 again: 831 if ((i = want) < fdp->fd_freefile) 832 i = fdp->fd_freefile; 833 off = i >> NDENTRYSHIFT; 834 new = find_next_zero(fdp->fd_himap, off, 835 (last + NDENTRIES - 1) >> NDENTRYSHIFT); 836 if (new != -1) { 837 i = find_next_zero(&fdp->fd_lomap[new], 838 new > off ? 0 : i & NDENTRYMASK, NDENTRIES); 839 if (i == -1) { 840 /* 841 * free file descriptor in this block was 842 * below want, try again with higher want. 843 */ 844 want = (new + 1) << NDENTRYSHIFT; 845 goto again; 846 } 847 i += (new << NDENTRYSHIFT); 848 if (i < last) { 849 if (fdp->fd_ofiles[i] == NULL) { 850 fd_used(fdp, i); 851 if (want <= fdp->fd_freefile) 852 fdp->fd_freefile = i; 853 *result = i; 854 error = 0; 855 goto out; 856 } 857 } 858 } 859 860 /* No space in current array. Expand or let the caller do it. */ 861 error = (fdp->fd_nfiles >= lim) ? EMFILE : ENOSPC; 862 863 out: 864 simple_unlock(&fdp->fd_slock); 865 return (error); 866 } 867 868 void 869 fdexpand(struct proc *p) 870 { 871 struct filedesc *fdp; 872 int i, numfiles, oldnfiles; 873 struct file **newofile; 874 char *newofileflags; 875 uint32_t *newhimap = NULL, *newlomap = NULL; 876 877 fdp = p->p_fd; 878 879 restart: 880 oldnfiles = fdp->fd_nfiles; 881 882 if (oldnfiles < NDEXTENT) 883 numfiles = NDEXTENT; 884 else 885 numfiles = 2 * oldnfiles; 886 887 newofile = malloc(numfiles * OFILESIZE, M_FILEDESC, M_WAITOK); 888 if (NDHISLOTS(numfiles) > NDHISLOTS(oldnfiles)) { 889 newhimap = malloc(NDHISLOTS(numfiles) * sizeof(uint32_t), 890 M_FILEDESC, M_WAITOK); 891 newlomap = malloc(NDLOSLOTS(numfiles) * sizeof(uint32_t), 892 M_FILEDESC, M_WAITOK); 893 } 894 895 simple_lock(&fdp->fd_slock); 896 /* lock fdp */ 897 if (fdp->fd_nfiles != oldnfiles) { 898 /* fdp changed; retry */ 899 simple_unlock(&fdp->fd_slock); 900 free(newofile, M_FILEDESC); 901 if (newhimap != NULL) free(newhimap, M_FILEDESC); 902 if (newlomap != NULL) free(newlomap, M_FILEDESC); 903 goto restart; 904 } 905 906 newofileflags = (char *) &newofile[numfiles]; 907 /* 908 * Copy the existing ofile and ofileflags arrays 909 * and zero the new portion of each array. 910 */ 911 memcpy(newofile, fdp->fd_ofiles, 912 (i = sizeof(struct file *) * fdp->fd_nfiles)); 913 memset((char *)newofile + i, 0, 914 numfiles * sizeof(struct file *) - i); 915 memcpy(newofileflags, fdp->fd_ofileflags, 916 (i = sizeof(char) * fdp->fd_nfiles)); 917 memset(newofileflags + i, 0, numfiles * sizeof(char) - i); 918 if (oldnfiles > NDFILE) 919 free(fdp->fd_ofiles, M_FILEDESC); 920 921 if (NDHISLOTS(numfiles) > NDHISLOTS(oldnfiles)) { 922 memcpy(newhimap, fdp->fd_himap, 923 (i = NDHISLOTS(oldnfiles) * sizeof(uint32_t))); 924 memset((char *)newhimap + i, 0, 925 NDHISLOTS(numfiles) * sizeof(uint32_t) - i); 926 927 memcpy(newlomap, fdp->fd_lomap, 928 (i = NDLOSLOTS(oldnfiles) * sizeof(uint32_t))); 929 memset((char *)newlomap + i, 0, 930 NDLOSLOTS(numfiles) * sizeof(uint32_t) - i); 931 932 if (NDHISLOTS(oldnfiles) > NDHISLOTS(NDFILE)) { 933 free(fdp->fd_himap, M_FILEDESC); 934 free(fdp->fd_lomap, M_FILEDESC); 935 } 936 fdp->fd_himap = newhimap; 937 fdp->fd_lomap = newlomap; 938 } 939 940 fdp->fd_ofiles = newofile; 941 fdp->fd_ofileflags = newofileflags; 942 fdp->fd_nfiles = numfiles; 943 944 simple_unlock(&fdp->fd_slock); 945 946 fdexpanded++; 947 } 948 949 /* 950 * Create a new open file structure and allocate 951 * a file descriptor for the process that refers to it. 952 */ 953 int 954 falloc(struct lwp *l, struct file **resultfp, int *resultfd) 955 { 956 struct file *fp, *fq; 957 struct proc *p; 958 int error, i; 959 960 p = l->l_proc; 961 962 restart: 963 if ((error = fdalloc(p, 0, &i)) != 0) { 964 if (error == ENOSPC) { 965 fdexpand(p); 966 goto restart; 967 } 968 return (error); 969 } 970 971 fp = pool_get(&file_pool, PR_WAITOK); 972 simple_lock(&filelist_slock); 973 if (nfiles >= maxfiles) { 974 tablefull("file", "increase kern.maxfiles or MAXFILES"); 975 simple_unlock(&filelist_slock); 976 simple_lock(&p->p_fd->fd_slock); 977 fd_unused(p->p_fd, i); 978 simple_unlock(&p->p_fd->fd_slock); 979 pool_put(&file_pool, fp); 980 return (ENFILE); 981 } 982 /* 983 * Allocate a new file descriptor. 984 * If the process has file descriptor zero open, add to the list 985 * of open files at that point, otherwise put it at the front of 986 * the list of open files. 987 */ 988 nfiles++; 989 memset(fp, 0, sizeof(struct file)); 990 fp->f_iflags = FIF_LARVAL; 991 if ((fq = p->p_fd->fd_ofiles[0]) != NULL) { 992 LIST_INSERT_AFTER(fq, fp, f_list); 993 } else { 994 LIST_INSERT_HEAD(&filehead, fp, f_list); 995 } 996 simple_unlock(&filelist_slock); 997 KDASSERT(p->p_fd->fd_ofiles[i] == NULL); 998 p->p_fd->fd_ofiles[i] = fp; 999 simple_lock_init(&fp->f_slock); 1000 fp->f_count = 1; 1001 fp->f_cred = l->l_cred; 1002 kauth_cred_hold(fp->f_cred); 1003 if (resultfp) { 1004 fp->f_usecount = 1; 1005 *resultfp = fp; 1006 } 1007 if (resultfd) 1008 *resultfd = i; 1009 return (0); 1010 } 1011 1012 /* 1013 * Free a file descriptor. 1014 */ 1015 void 1016 ffree(struct file *fp) 1017 { 1018 1019 #ifdef DIAGNOSTIC 1020 if (fp->f_usecount) 1021 panic("ffree"); 1022 #endif 1023 1024 simple_lock(&filelist_slock); 1025 LIST_REMOVE(fp, f_list); 1026 kauth_cred_free(fp->f_cred); 1027 #ifdef DIAGNOSTIC 1028 fp->f_count = 0; /* What's the point? */ 1029 #endif 1030 nfiles--; 1031 simple_unlock(&filelist_slock); 1032 pool_put(&file_pool, fp); 1033 } 1034 1035 /* 1036 * Create an initial cwdinfo structure, using the same current and root 1037 * directories as p. 1038 */ 1039 struct cwdinfo * 1040 cwdinit(struct proc *p) 1041 { 1042 struct cwdinfo *cwdi; 1043 1044 cwdi = pool_get(&cwdi_pool, PR_WAITOK); 1045 1046 simple_lock_init(&cwdi->cwdi_slock); 1047 cwdi->cwdi_cdir = p->p_cwdi->cwdi_cdir; 1048 if (cwdi->cwdi_cdir) 1049 VREF(cwdi->cwdi_cdir); 1050 cwdi->cwdi_rdir = p->p_cwdi->cwdi_rdir; 1051 if (cwdi->cwdi_rdir) 1052 VREF(cwdi->cwdi_rdir); 1053 cwdi->cwdi_cmask = p->p_cwdi->cwdi_cmask; 1054 cwdi->cwdi_refcnt = 1; 1055 1056 return (cwdi); 1057 } 1058 1059 /* 1060 * Make p2 share p1's cwdinfo. 1061 */ 1062 void 1063 cwdshare(struct proc *p1, struct proc *p2) 1064 { 1065 struct cwdinfo *cwdi = p1->p_cwdi; 1066 1067 simple_lock(&cwdi->cwdi_slock); 1068 cwdi->cwdi_refcnt++; 1069 simple_unlock(&cwdi->cwdi_slock); 1070 p2->p_cwdi = cwdi; 1071 } 1072 1073 /* 1074 * Make this process not share its cwdinfo structure, maintaining 1075 * all cwdinfo state. 1076 */ 1077 void 1078 cwdunshare(struct proc *p) 1079 { 1080 struct cwdinfo *oldcwdi, *newcwdi; 1081 1082 if (p->p_cwdi->cwdi_refcnt == 1) 1083 return; 1084 1085 newcwdi = cwdinit(p); 1086 oldcwdi = p->p_cwdi; 1087 p->p_cwdi = newcwdi; 1088 cwdfree(oldcwdi); 1089 } 1090 1091 /* 1092 * Release a cwdinfo structure. 1093 */ 1094 void 1095 cwdfree(struct cwdinfo *cwdi) 1096 { 1097 int n; 1098 1099 simple_lock(&cwdi->cwdi_slock); 1100 n = --cwdi->cwdi_refcnt; 1101 simple_unlock(&cwdi->cwdi_slock); 1102 if (n > 0) 1103 return; 1104 1105 vrele(cwdi->cwdi_cdir); 1106 if (cwdi->cwdi_rdir) 1107 vrele(cwdi->cwdi_rdir); 1108 pool_put(&cwdi_pool, cwdi); 1109 } 1110 1111 /* 1112 * Create an initial filedesc structure, using the same current and root 1113 * directories as p. 1114 */ 1115 struct filedesc * 1116 fdinit(struct proc *p __unused) 1117 { 1118 struct filedesc0 *newfdp; 1119 1120 newfdp = pool_get(&filedesc0_pool, PR_WAITOK); 1121 memset(newfdp, 0, sizeof(struct filedesc0)); 1122 1123 fdinit1(newfdp); 1124 1125 return (&newfdp->fd_fd); 1126 } 1127 1128 /* 1129 * Initialize a file descriptor table. 1130 */ 1131 void 1132 fdinit1(struct filedesc0 *newfdp) 1133 { 1134 1135 newfdp->fd_fd.fd_refcnt = 1; 1136 newfdp->fd_fd.fd_ofiles = newfdp->fd_dfiles; 1137 newfdp->fd_fd.fd_ofileflags = newfdp->fd_dfileflags; 1138 newfdp->fd_fd.fd_nfiles = NDFILE; 1139 newfdp->fd_fd.fd_knlistsize = -1; 1140 newfdp->fd_fd.fd_himap = newfdp->fd_dhimap; 1141 newfdp->fd_fd.fd_lomap = newfdp->fd_dlomap; 1142 newfdp->fd_fd.fd_lastfile = -1; 1143 simple_lock_init(&newfdp->fd_fd.fd_slock); 1144 } 1145 1146 /* 1147 * Make p2 share p1's filedesc structure. 1148 */ 1149 void 1150 fdshare(struct proc *p1, struct proc *p2) 1151 { 1152 struct filedesc *fdp = p1->p_fd; 1153 1154 simple_lock(&fdp->fd_slock); 1155 p2->p_fd = fdp; 1156 fdp->fd_refcnt++; 1157 simple_unlock(&fdp->fd_slock); 1158 } 1159 1160 /* 1161 * Make this process not share its filedesc structure, maintaining 1162 * all file descriptor state. 1163 */ 1164 void 1165 fdunshare(struct lwp *l) 1166 { 1167 struct proc *p = l->l_proc; 1168 struct filedesc *newfd; 1169 1170 if (p->p_fd->fd_refcnt == 1) 1171 return; 1172 1173 newfd = fdcopy(p); 1174 fdfree(l); 1175 p->p_fd = newfd; 1176 } 1177 1178 /* 1179 * Clear a process's fd table. 1180 */ 1181 void 1182 fdclear(struct lwp *l) 1183 { 1184 struct proc *p = l->l_proc; 1185 struct filedesc *newfd; 1186 1187 newfd = fdinit(p); 1188 fdfree(l); 1189 p->p_fd = newfd; 1190 } 1191 1192 /* 1193 * Copy a filedesc structure. 1194 */ 1195 struct filedesc * 1196 fdcopy(struct proc *p) 1197 { 1198 struct filedesc *newfdp, *fdp; 1199 struct file **fpp, **nfpp; 1200 int i, numfiles, lastfile; 1201 1202 fdp = p->p_fd; 1203 newfdp = pool_get(&filedesc0_pool, PR_WAITOK); 1204 newfdp->fd_refcnt = 1; 1205 simple_lock_init(&newfdp->fd_slock); 1206 1207 restart: 1208 numfiles = fdp->fd_nfiles; 1209 lastfile = fdp->fd_lastfile; 1210 1211 /* 1212 * If the number of open files fits in the internal arrays 1213 * of the open file structure, use them, otherwise allocate 1214 * additional memory for the number of descriptors currently 1215 * in use. 1216 */ 1217 if (lastfile < NDFILE) { 1218 i = NDFILE; 1219 } else { 1220 /* 1221 * Compute the smallest multiple of NDEXTENT needed 1222 * for the file descriptors currently in use, 1223 * allowing the table to shrink. 1224 */ 1225 i = numfiles; 1226 while (i >= 2 * NDEXTENT && i > lastfile * 2) 1227 i /= 2; 1228 newfdp->fd_ofiles = malloc(i * OFILESIZE, M_FILEDESC, M_WAITOK); 1229 } 1230 if (NDHISLOTS(i) > NDHISLOTS(NDFILE)) { 1231 newfdp->fd_himap = malloc(NDHISLOTS(i) * sizeof(uint32_t), 1232 M_FILEDESC, M_WAITOK); 1233 newfdp->fd_lomap = malloc(NDLOSLOTS(i) * sizeof(uint32_t), 1234 M_FILEDESC, M_WAITOK); 1235 } 1236 1237 simple_lock(&fdp->fd_slock); 1238 if (numfiles != fdp->fd_nfiles || lastfile != fdp->fd_lastfile) { 1239 simple_unlock(&fdp->fd_slock); 1240 if (i > NDFILE) 1241 free(newfdp->fd_ofiles, M_FILEDESC); 1242 if (NDHISLOTS(i) > NDHISLOTS(NDFILE)) { 1243 free(newfdp->fd_himap, M_FILEDESC); 1244 free(newfdp->fd_lomap, M_FILEDESC); 1245 } 1246 goto restart; 1247 } 1248 1249 if (lastfile < NDFILE) { 1250 newfdp->fd_ofiles = ((struct filedesc0 *) newfdp)->fd_dfiles; 1251 newfdp->fd_ofileflags = 1252 ((struct filedesc0 *) newfdp)->fd_dfileflags; 1253 } else { 1254 newfdp->fd_ofileflags = (char *) &newfdp->fd_ofiles[i]; 1255 } 1256 if (NDHISLOTS(i) <= NDHISLOTS(NDFILE)) { 1257 newfdp->fd_himap = 1258 ((struct filedesc0 *) newfdp)->fd_dhimap; 1259 newfdp->fd_lomap = 1260 ((struct filedesc0 *) newfdp)->fd_dlomap; 1261 } 1262 1263 newfdp->fd_nfiles = i; 1264 newfdp->fd_lastfile = lastfile; 1265 newfdp->fd_freefile = fdp->fd_freefile; 1266 1267 /* Clear the entries that will not be copied over. 1268 * Avoid calling memset with 0 size (i.e. when 1269 * lastfile == i-1 */ 1270 if (lastfile < (i-1)) 1271 memset(newfdp->fd_ofiles + lastfile + 1, 0, 1272 (i - lastfile - 1) * sizeof(struct file **)); 1273 memcpy(newfdp->fd_ofileflags, fdp->fd_ofileflags, i * sizeof(char)); 1274 if (i < NDENTRIES * NDENTRIES) 1275 i = NDENTRIES * NDENTRIES; /* size of inlined bitmaps */ 1276 memcpy(newfdp->fd_himap, fdp->fd_himap, NDHISLOTS(i)*sizeof(uint32_t)); 1277 memcpy(newfdp->fd_lomap, fdp->fd_lomap, NDLOSLOTS(i)*sizeof(uint32_t)); 1278 1279 fpp = fdp->fd_ofiles; 1280 nfpp = newfdp->fd_ofiles; 1281 for (i = 0; i <= lastfile; i++, fpp++, nfpp++) { 1282 if ((*nfpp = *fpp) == NULL) 1283 continue; 1284 1285 if ((*fpp)->f_type == DTYPE_KQUEUE) 1286 /* kq descriptors cannot be copied. */ 1287 fdremove(newfdp, i); 1288 else { 1289 simple_lock(&(*fpp)->f_slock); 1290 (*fpp)->f_count++; 1291 simple_unlock(&(*fpp)->f_slock); 1292 } 1293 } 1294 1295 simple_unlock(&fdp->fd_slock); 1296 1297 newfdp->fd_knlist = NULL; 1298 newfdp->fd_knlistsize = -1; 1299 newfdp->fd_knhash = NULL; 1300 newfdp->fd_knhashmask = 0; 1301 1302 return (newfdp); 1303 } 1304 1305 /* 1306 * Release a filedesc structure. 1307 */ 1308 void 1309 fdfree(struct lwp *l) 1310 { 1311 struct proc *p = l->l_proc; 1312 struct filedesc *fdp; 1313 struct file **fpp, *fp; 1314 int i; 1315 1316 fdp = p->p_fd; 1317 simple_lock(&fdp->fd_slock); 1318 i = --fdp->fd_refcnt; 1319 simple_unlock(&fdp->fd_slock); 1320 if (i > 0) 1321 return; 1322 1323 fpp = fdp->fd_ofiles; 1324 for (i = fdp->fd_lastfile; i >= 0; i--, fpp++) { 1325 fp = *fpp; 1326 if (fp != NULL) { 1327 *fpp = NULL; 1328 simple_lock(&fp->f_slock); 1329 FILE_USE(fp); 1330 if ((fdp->fd_lastfile - i) < fdp->fd_knlistsize) 1331 knote_fdclose(l, fdp->fd_lastfile - i); 1332 (void) closef(fp, l); 1333 } 1334 } 1335 p->p_fd = NULL; 1336 if (fdp->fd_nfiles > NDFILE) 1337 free(fdp->fd_ofiles, M_FILEDESC); 1338 if (NDHISLOTS(fdp->fd_nfiles) > NDHISLOTS(NDFILE)) { 1339 free(fdp->fd_himap, M_FILEDESC); 1340 free(fdp->fd_lomap, M_FILEDESC); 1341 } 1342 if (fdp->fd_knlist) 1343 free(fdp->fd_knlist, M_KEVENT); 1344 if (fdp->fd_knhash) 1345 hashdone(fdp->fd_knhash, M_KEVENT); 1346 pool_put(&filedesc0_pool, fdp); 1347 } 1348 1349 /* 1350 * Internal form of close. 1351 * Decrement reference count on file structure. 1352 * Note: p may be NULL when closing a file 1353 * that was being passed in a message. 1354 * 1355 * Note: we expect the caller is holding a usecount, and expects us 1356 * to drop it (the caller thinks the file is going away forever). 1357 */ 1358 int 1359 closef(struct file *fp, struct lwp *l) 1360 { 1361 struct proc *p = l ? l->l_proc : NULL; 1362 struct vnode *vp; 1363 struct flock lf; 1364 int error; 1365 1366 if (fp == NULL) 1367 return (0); 1368 1369 /* 1370 * POSIX record locking dictates that any close releases ALL 1371 * locks owned by this process. This is handled by setting 1372 * a flag in the unlock to free ONLY locks obeying POSIX 1373 * semantics, and not to free BSD-style file locks. 1374 * If the descriptor was in a message, POSIX-style locks 1375 * aren't passed with the descriptor. 1376 */ 1377 if (p && (p->p_flag & P_ADVLOCK) && fp->f_type == DTYPE_VNODE) { 1378 lf.l_whence = SEEK_SET; 1379 lf.l_start = 0; 1380 lf.l_len = 0; 1381 lf.l_type = F_UNLCK; 1382 vp = (struct vnode *)fp->f_data; 1383 (void) VOP_ADVLOCK(vp, p, F_UNLCK, &lf, F_POSIX); 1384 } 1385 1386 /* 1387 * If WANTCLOSE is set, then the reference count on the file 1388 * is 0, but there were multiple users of the file. This can 1389 * happen if a filedesc structure is shared by multiple 1390 * processes. 1391 */ 1392 simple_lock(&fp->f_slock); 1393 if (fp->f_iflags & FIF_WANTCLOSE) { 1394 /* 1395 * Another user of the file is already closing, and is 1396 * simply waiting for other users of the file to drain. 1397 * Release our usecount, and wake up the closer if it 1398 * is the only remaining use. 1399 */ 1400 #ifdef DIAGNOSTIC 1401 if (fp->f_count != 0) 1402 panic("closef: wantclose and count != 0"); 1403 if (fp->f_usecount < 2) 1404 panic("closef: wantclose and usecount < 2"); 1405 #endif 1406 if (--fp->f_usecount == 1) 1407 wakeup(&fp->f_usecount); 1408 simple_unlock(&fp->f_slock); 1409 return (0); 1410 } else { 1411 /* 1412 * Decrement the reference count. If we were not the 1413 * last reference, then release our use and just 1414 * return. 1415 */ 1416 if (--fp->f_count > 0) { 1417 #ifdef DIAGNOSTIC 1418 if (fp->f_usecount < 1) 1419 panic("closef: no wantclose and usecount < 1"); 1420 #endif 1421 fp->f_usecount--; 1422 simple_unlock(&fp->f_slock); 1423 return (0); 1424 } 1425 } 1426 1427 /* 1428 * The reference count is now 0. However, there may be 1429 * multiple potential users of this file. This can happen 1430 * if multiple processes shared a single filedesc structure. 1431 * 1432 * Notify these potential users that the file is closing. 1433 * This will prevent them from adding additional uses to 1434 * the file. 1435 */ 1436 fp->f_iflags |= FIF_WANTCLOSE; 1437 1438 /* 1439 * We expect the caller to add a use to the file. So, if we 1440 * are the last user, usecount will be 1. If it is not, we 1441 * must wait for the usecount to drain. When it drains back 1442 * to 1, we will be awakened so that we may proceed with the 1443 * close. 1444 */ 1445 #ifdef DIAGNOSTIC 1446 if (fp->f_usecount < 1) 1447 panic("closef: usecount < 1"); 1448 #endif 1449 while (fp->f_usecount > 1) 1450 (void) ltsleep(&fp->f_usecount, PRIBIO, "closef", 0, 1451 &fp->f_slock); 1452 #ifdef DIAGNOSTIC 1453 if (fp->f_usecount != 1) 1454 panic("closef: usecount != 1"); 1455 #endif 1456 1457 simple_unlock(&fp->f_slock); 1458 if ((fp->f_flag & FHASLOCK) && fp->f_type == DTYPE_VNODE) { 1459 lf.l_whence = SEEK_SET; 1460 lf.l_start = 0; 1461 lf.l_len = 0; 1462 lf.l_type = F_UNLCK; 1463 vp = (struct vnode *)fp->f_data; 1464 (void) VOP_ADVLOCK(vp, fp, F_UNLCK, &lf, F_FLOCK); 1465 } 1466 if (fp->f_ops) 1467 error = (*fp->f_ops->fo_close)(fp, l); 1468 else 1469 error = 0; 1470 1471 /* Nothing references the file now, drop the final use (us). */ 1472 fp->f_usecount--; 1473 1474 ffree(fp); 1475 return (error); 1476 } 1477 1478 /* 1479 * Apply an advisory lock on a file descriptor. 1480 * 1481 * Just attempt to get a record lock of the requested type on 1482 * the entire file (l_whence = SEEK_SET, l_start = 0, l_len = 0). 1483 */ 1484 /* ARGSUSED */ 1485 int 1486 sys_flock(struct lwp *l, void *v, register_t *retval __unused) 1487 { 1488 struct sys_flock_args /* { 1489 syscallarg(int) fd; 1490 syscallarg(int) how; 1491 } */ *uap = v; 1492 int fd, how, error; 1493 struct proc *p; 1494 struct filedesc *fdp; 1495 struct file *fp; 1496 struct vnode *vp; 1497 struct flock lf; 1498 1499 p = l->l_proc; 1500 fd = SCARG(uap, fd); 1501 how = SCARG(uap, how); 1502 fdp = p->p_fd; 1503 error = 0; 1504 1505 if ((fp = fd_getfile(fdp, fd)) == NULL) 1506 return (EBADF); 1507 1508 FILE_USE(fp); 1509 1510 if (fp->f_type != DTYPE_VNODE) { 1511 error = EOPNOTSUPP; 1512 goto out; 1513 } 1514 1515 vp = (struct vnode *)fp->f_data; 1516 lf.l_whence = SEEK_SET; 1517 lf.l_start = 0; 1518 lf.l_len = 0; 1519 if (how & LOCK_UN) { 1520 lf.l_type = F_UNLCK; 1521 fp->f_flag &= ~FHASLOCK; 1522 error = VOP_ADVLOCK(vp, fp, F_UNLCK, &lf, F_FLOCK); 1523 goto out; 1524 } 1525 if (how & LOCK_EX) 1526 lf.l_type = F_WRLCK; 1527 else if (how & LOCK_SH) 1528 lf.l_type = F_RDLCK; 1529 else { 1530 error = EINVAL; 1531 goto out; 1532 } 1533 fp->f_flag |= FHASLOCK; 1534 if (how & LOCK_NB) 1535 error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, F_FLOCK); 1536 else 1537 error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, 1538 F_FLOCK|F_WAIT); 1539 out: 1540 FILE_UNUSE(fp, l); 1541 return (error); 1542 } 1543 1544 /* ARGSUSED */ 1545 int 1546 sys_posix_fadvise(struct lwp *l, void *v, register_t *retval) 1547 { 1548 const struct sys_posix_fadvise_args /* { 1549 syscallarg(int) fd; 1550 syscallarg(off_t) offset; 1551 syscallarg(off_t) len; 1552 syscallarg(int) advice; 1553 } */ *uap = v; 1554 const int fd = SCARG(uap, fd); 1555 const int advice = SCARG(uap, advice); 1556 struct proc *p = l->l_proc; 1557 struct file *fp; 1558 int error = 0; 1559 1560 fp = fd_getfile(p->p_fd, fd); 1561 if (fp == NULL) { 1562 error = EBADF; 1563 goto out; 1564 } 1565 FILE_USE(fp); 1566 1567 if (fp->f_type != DTYPE_VNODE) { 1568 if (fp->f_type == DTYPE_PIPE || fp->f_type == DTYPE_SOCKET) { 1569 error = ESPIPE; 1570 } else { 1571 error = EOPNOTSUPP; 1572 } 1573 goto out; 1574 } 1575 1576 switch (advice) { 1577 case POSIX_FADV_NORMAL: 1578 case POSIX_FADV_RANDOM: 1579 case POSIX_FADV_SEQUENTIAL: 1580 KASSERT(POSIX_FADV_NORMAL == UVM_ADV_NORMAL); 1581 KASSERT(POSIX_FADV_RANDOM == UVM_ADV_RANDOM); 1582 KASSERT(POSIX_FADV_SEQUENTIAL == UVM_ADV_SEQUENTIAL); 1583 1584 /* 1585 * we ignore offset and size. 1586 */ 1587 1588 fp->f_advice = advice; 1589 break; 1590 1591 case POSIX_FADV_WILLNEED: 1592 case POSIX_FADV_DONTNEED: 1593 case POSIX_FADV_NOREUSE: 1594 1595 /* 1596 * not implemented yet. 1597 */ 1598 1599 break; 1600 default: 1601 error = EINVAL; 1602 break; 1603 } 1604 out: 1605 if (fp != NULL) { 1606 FILE_UNUSE(fp, l); 1607 } 1608 *retval = error; 1609 return 0; 1610 } 1611 1612 /* 1613 * File Descriptor pseudo-device driver (/dev/fd/). 1614 * 1615 * Opening minor device N dup()s the file (if any) connected to file 1616 * descriptor N belonging to the calling process. Note that this driver 1617 * consists of only the ``open()'' routine, because all subsequent 1618 * references to this file will be direct to the other driver. 1619 */ 1620 /* ARGSUSED */ 1621 static int 1622 filedescopen(dev_t dev, int mode __unused, int type __unused, struct lwp *l) 1623 { 1624 1625 /* 1626 * XXX Kludge: set dupfd to contain the value of the 1627 * the file descriptor being sought for duplication. The error 1628 * return ensures that the vnode for this device will be released 1629 * by vn_open. Open will detect this special error and take the 1630 * actions in dupfdopen below. Other callers of vn_open or VOP_OPEN 1631 * will simply report the error. 1632 */ 1633 l->l_dupfd = minor(dev); /* XXX */ 1634 return EDUPFD; 1635 } 1636 1637 const struct cdevsw filedesc_cdevsw = { 1638 filedescopen, noclose, noread, nowrite, noioctl, 1639 nostop, notty, nopoll, nommap, nokqfilter, D_OTHER, 1640 }; 1641 1642 /* 1643 * Duplicate the specified descriptor to a free descriptor. 1644 * 1645 * 'indx' has been fdalloc'ed (and will be fdremove'ed on error) by the caller. 1646 */ 1647 int 1648 dupfdopen(struct lwp *l, int indx, int dfd, int mode, int error) 1649 { 1650 struct proc *p = l->l_proc; 1651 struct filedesc *fdp; 1652 struct file *wfp; 1653 1654 fdp = p->p_fd; 1655 1656 /* should be cleared by the caller */ 1657 KASSERT(fdp->fd_ofiles[indx] == NULL); 1658 1659 /* 1660 * If the to-be-dup'd fd number is greater than the allowed number 1661 * of file descriptors, or the fd to be dup'd has already been 1662 * closed, reject. 1663 */ 1664 1665 /* 1666 * Note, in the case of indx == dfd, fd_getfile below returns NULL. 1667 */ 1668 if ((wfp = fd_getfile(fdp, dfd)) == NULL) 1669 return (EBADF); 1670 1671 FILE_USE(wfp); 1672 1673 /* 1674 * There are two cases of interest here. 1675 * 1676 * For EDUPFD simply dup (dfd) to file descriptor 1677 * (indx) and return. 1678 * 1679 * For EMOVEFD steal away the file structure from (dfd) and 1680 * store it in (indx). (dfd) is effectively closed by 1681 * this operation. 1682 * 1683 * Any other error code is just returned. 1684 */ 1685 switch (error) { 1686 case EDUPFD: 1687 /* 1688 * Check that the mode the file is being opened for is a 1689 * subset of the mode of the existing descriptor. 1690 */ 1691 if (((mode & (FREAD|FWRITE)) | wfp->f_flag) != wfp->f_flag) { 1692 FILE_UNUSE(wfp, l); 1693 return (EACCES); 1694 } 1695 simple_lock(&fdp->fd_slock); 1696 fdp->fd_ofiles[indx] = wfp; 1697 fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd]; 1698 simple_unlock(&fdp->fd_slock); 1699 simple_lock(&wfp->f_slock); 1700 wfp->f_count++; 1701 /* 'indx' has been fd_used'ed by caller */ 1702 FILE_UNUSE_HAVELOCK(wfp, l); 1703 return (0); 1704 1705 case EMOVEFD: 1706 /* 1707 * Steal away the file pointer from dfd, and stuff it into indx. 1708 */ 1709 simple_lock(&fdp->fd_slock); 1710 fdp->fd_ofiles[indx] = fdp->fd_ofiles[dfd]; 1711 fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd]; 1712 fdp->fd_ofiles[dfd] = NULL; 1713 fdp->fd_ofileflags[dfd] = 0; 1714 /* 1715 * Complete the clean up of the filedesc structure by 1716 * recomputing the various hints. 1717 */ 1718 /* 'indx' has been fd_used'ed by caller */ 1719 fd_unused(fdp, dfd); 1720 simple_unlock(&fdp->fd_slock); 1721 FILE_UNUSE(wfp, l); 1722 return (0); 1723 1724 default: 1725 FILE_UNUSE(wfp, l); 1726 return (error); 1727 } 1728 /* NOTREACHED */ 1729 } 1730 1731 /* 1732 * Close any files on exec? 1733 */ 1734 void 1735 fdcloseexec(struct lwp *l) 1736 { 1737 struct proc *p = l->l_proc; 1738 struct filedesc *fdp; 1739 int fd; 1740 1741 fdunshare(l); 1742 cwdunshare(p); 1743 1744 fdp = p->p_fd; 1745 for (fd = 0; fd <= fdp->fd_lastfile; fd++) 1746 if (fdp->fd_ofileflags[fd] & UF_EXCLOSE) 1747 (void) fdrelease(l, fd); 1748 } 1749 1750 /* 1751 * It is unsafe for set[ug]id processes to be started with file 1752 * descriptors 0..2 closed, as these descriptors are given implicit 1753 * significance in the Standard C library. fdcheckstd() will create a 1754 * descriptor referencing /dev/null for each of stdin, stdout, and 1755 * stderr that is not already open. 1756 */ 1757 #define CHECK_UPTO 3 1758 int 1759 fdcheckstd(l) 1760 struct lwp *l; 1761 { 1762 struct proc *p; 1763 struct nameidata nd; 1764 struct filedesc *fdp; 1765 struct file *fp; 1766 struct file *devnullfp = NULL; /* Quell compiler warning */ 1767 struct proc *pp; 1768 register_t retval; 1769 int fd, i, error, flags = FREAD|FWRITE, devnull = -1; 1770 char closed[CHECK_UPTO * 3 + 1], which[3 + 1]; 1771 1772 p = l->l_proc; 1773 closed[0] = '\0'; 1774 if ((fdp = p->p_fd) == NULL) 1775 return (0); 1776 for (i = 0; i < CHECK_UPTO; i++) { 1777 if (fdp->fd_ofiles[i] != NULL) 1778 continue; 1779 snprintf(which, sizeof(which), ",%d", i); 1780 strlcat(closed, which, sizeof(closed)); 1781 if (devnullfp == NULL) { 1782 if ((error = falloc(l, &fp, &fd)) != 0) 1783 return (error); 1784 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, "/dev/null", 1785 l); 1786 if ((error = vn_open(&nd, flags, 0)) != 0) { 1787 FILE_UNUSE(fp, l); 1788 ffree(fp); 1789 fdremove(p->p_fd, fd); 1790 return (error); 1791 } 1792 fp->f_data = nd.ni_vp; 1793 fp->f_flag = flags; 1794 fp->f_ops = &vnops; 1795 fp->f_type = DTYPE_VNODE; 1796 VOP_UNLOCK(nd.ni_vp, 0); 1797 devnull = fd; 1798 devnullfp = fp; 1799 FILE_SET_MATURE(fp); 1800 } else { 1801 restart: 1802 if ((error = fdalloc(p, 0, &fd)) != 0) { 1803 if (error == ENOSPC) { 1804 fdexpand(p); 1805 goto restart; 1806 } 1807 return (error); 1808 } 1809 1810 simple_lock(&devnullfp->f_slock); 1811 FILE_USE(devnullfp); 1812 /* finishdup() will unuse the descriptors for us */ 1813 if ((error = finishdup(l, devnull, fd, &retval)) != 0) 1814 return (error); 1815 } 1816 } 1817 if (devnullfp) 1818 FILE_UNUSE(devnullfp, l); 1819 if (closed[0] != '\0') { 1820 pp = p->p_pptr; 1821 log(LOG_WARNING, "set{u,g}id pid %d (%s) " 1822 "was invoked by uid %d ppid %d (%s) " 1823 "with fd %s closed\n", 1824 p->p_pid, p->p_comm, kauth_cred_geteuid(pp->p_cred), 1825 pp->p_pid, pp->p_comm, &closed[1]); 1826 } 1827 return (0); 1828 } 1829 #undef CHECK_UPTO 1830 1831 /* 1832 * Sets descriptor owner. If the owner is a process, 'pgid' 1833 * is set to positive value, process ID. If the owner is process group, 1834 * 'pgid' is set to -pg_id. 1835 */ 1836 int 1837 fsetown(struct proc *p, pid_t *pgid, int cmd, const void *data) 1838 { 1839 int id = *(const int *)data; 1840 int error; 1841 1842 switch (cmd) { 1843 case TIOCSPGRP: 1844 if (id < 0) 1845 return (EINVAL); 1846 id = -id; 1847 break; 1848 default: 1849 break; 1850 } 1851 1852 if (id > 0 && !pfind(id)) 1853 return (ESRCH); 1854 else if (id < 0 && (error = pgid_in_session(p, -id))) 1855 return (error); 1856 1857 *pgid = id; 1858 return (0); 1859 } 1860 1861 /* 1862 * Return descriptor owner information. If the value is positive, 1863 * it's process ID. If it's negative, it's process group ID and 1864 * needs the sign removed before use. 1865 */ 1866 int 1867 fgetown(struct proc *p __unused, pid_t pgid, int cmd, void *data) 1868 { 1869 switch (cmd) { 1870 case TIOCGPGRP: 1871 *(int *)data = -pgid; 1872 break; 1873 default: 1874 *(int *)data = pgid; 1875 break; 1876 } 1877 return (0); 1878 } 1879 1880 /* 1881 * Send signal to descriptor owner, either process or process group. 1882 */ 1883 void 1884 fownsignal(pid_t pgid, int signo, int code, int band, void *fdescdata) 1885 { 1886 struct proc *p1; 1887 ksiginfo_t ksi; 1888 1889 memset(&ksi, 0, sizeof(ksi)); 1890 ksi.ksi_signo = signo; 1891 ksi.ksi_code = code; 1892 ksi.ksi_band = band; 1893 1894 if (pgid > 0 && (p1 = pfind(pgid))) 1895 kpsignal(p1, &ksi, fdescdata); 1896 else if (pgid < 0) 1897 kgsignal(-pgid, &ksi, fdescdata); 1898 } 1899 1900 int 1901 fdclone(struct lwp *l, struct file *fp, int fd, int flag, 1902 const struct fileops *fops, void *data) 1903 { 1904 fp->f_flag = flag; 1905 fp->f_type = DTYPE_MISC; 1906 fp->f_ops = fops; 1907 fp->f_data = data; 1908 1909 l->l_dupfd = fd; 1910 1911 FILE_SET_MATURE(fp); 1912 FILE_UNUSE(fp, l); 1913 return EMOVEFD; 1914 } 1915 1916 /* ARGSUSED */ 1917 int 1918 fnullop_fcntl(struct file *fp __unused, u_int cmd, void *data __unused, 1919 struct lwp *l __unused) 1920 { 1921 if (cmd == F_SETFL) 1922 return 0; 1923 1924 return EOPNOTSUPP; 1925 } 1926 1927 /* ARGSUSED */ 1928 int 1929 fnullop_poll(struct file *fp __unused, int which __unused, 1930 struct lwp *l __unused) 1931 { 1932 return 0; 1933 } 1934 1935 1936 /* ARGSUSED */ 1937 int 1938 fnullop_kqfilter(struct file *fp __unused, struct knote *kn __unused) 1939 { 1940 1941 return 0; 1942 } 1943 1944 /* ARGSUSED */ 1945 int 1946 fbadop_stat(struct file *fp __unused, struct stat *sb __unused, 1947 struct lwp *l __unused) 1948 { 1949 return EOPNOTSUPP; 1950 } 1951