1 /* $NetBSD: kern_descrip.c,v 1.140 2006/01/31 14:02:10 yamt Exp $ */ 2 3 /* 4 * Copyright (c) 1982, 1986, 1989, 1991, 1993 5 * The Regents of the University of California. All rights reserved. 6 * (c) UNIX System Laboratories, Inc. 7 * All or some portions of this file are derived from material licensed 8 * to the University of California by American Telephone and Telegraph 9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 10 * the permission of UNIX System Laboratories, Inc. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 3. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * @(#)kern_descrip.c 8.8 (Berkeley) 2/14/95 37 */ 38 39 #include <sys/cdefs.h> 40 __KERNEL_RCSID(0, "$NetBSD: kern_descrip.c,v 1.140 2006/01/31 14:02:10 yamt Exp $"); 41 42 #include <sys/param.h> 43 #include <sys/systm.h> 44 #include <sys/filedesc.h> 45 #include <sys/kernel.h> 46 #include <sys/vnode.h> 47 #include <sys/proc.h> 48 #include <sys/file.h> 49 #include <sys/namei.h> 50 #include <sys/socket.h> 51 #include <sys/socketvar.h> 52 #include <sys/stat.h> 53 #include <sys/ioctl.h> 54 #include <sys/fcntl.h> 55 #include <sys/malloc.h> 56 #include <sys/pool.h> 57 #include <sys/syslog.h> 58 #include <sys/unistd.h> 59 #include <sys/resourcevar.h> 60 #include <sys/conf.h> 61 #include <sys/event.h> 62 63 #include <sys/mount.h> 64 #include <sys/sa.h> 65 #include <sys/syscallargs.h> 66 67 /* 68 * Descriptor management. 69 */ 70 struct filelist filehead; /* head of list of open files */ 71 int nfiles; /* actual number of open files */ 72 POOL_INIT(file_pool, sizeof(struct file), 0, 0, 0, "filepl", 73 &pool_allocator_nointr); 74 POOL_INIT(cwdi_pool, sizeof(struct cwdinfo), 0, 0, 0, "cwdipl", 75 &pool_allocator_nointr); 76 POOL_INIT(filedesc0_pool, sizeof(struct filedesc0), 0, 0, 0, "fdescpl", 77 &pool_allocator_nointr); 78 79 /* Global file list lock */ 80 static struct simplelock filelist_slock = SIMPLELOCK_INITIALIZER; 81 82 MALLOC_DEFINE(M_FILE, "file", "Open file structure"); 83 MALLOC_DEFINE(M_FILEDESC, "file desc", "Open file descriptor table"); 84 MALLOC_DEFINE(M_IOCTLOPS, "ioctlops", "ioctl data buffer"); 85 86 static inline int 87 find_next_zero(uint32_t *bitmap, int want, u_int bits) 88 { 89 int i, off, maxoff; 90 uint32_t sub; 91 92 if (want > bits) 93 return -1; 94 95 off = want >> NDENTRYSHIFT; 96 i = want & NDENTRYMASK; 97 if (i) { 98 sub = bitmap[off] | ((u_int)~0 >> (NDENTRIES - i)); 99 if (sub != ~0) 100 goto found; 101 off++; 102 } 103 104 maxoff = NDLOSLOTS(bits); 105 while (off < maxoff) { 106 if ((sub = bitmap[off]) != ~0) 107 goto found; 108 off++; 109 } 110 111 return (-1); 112 113 found: 114 return (off << NDENTRYSHIFT) + ffs(~sub) - 1; 115 } 116 117 static int 118 find_last_set(struct filedesc *fd, int last) 119 { 120 int off, i; 121 struct file **ofiles = fd->fd_ofiles; 122 uint32_t *bitmap = fd->fd_lomap; 123 124 off = (last - 1) >> NDENTRYSHIFT; 125 126 while (off >= 0 && !bitmap[off]) 127 off--; 128 129 if (off < 0) 130 return (-1); 131 132 i = ((off + 1) << NDENTRYSHIFT) - 1; 133 if (i >= last) 134 i = last - 1; 135 136 while (i > 0 && ofiles[i] == NULL) 137 i--; 138 139 return (i); 140 } 141 142 static inline void 143 fd_used(struct filedesc *fdp, int fd) 144 { 145 u_int off = fd >> NDENTRYSHIFT; 146 147 LOCK_ASSERT(simple_lock_held(&fdp->fd_slock)); 148 KDASSERT((fdp->fd_lomap[off] & (1 << (fd & NDENTRYMASK))) == 0); 149 150 fdp->fd_lomap[off] |= 1 << (fd & NDENTRYMASK); 151 if (fdp->fd_lomap[off] == ~0) { 152 KDASSERT((fdp->fd_himap[off >> NDENTRYSHIFT] & 153 (1 << (off & NDENTRYMASK))) == 0); 154 fdp->fd_himap[off >> NDENTRYSHIFT] |= 1 << (off & NDENTRYMASK); 155 } 156 157 if (fd > fdp->fd_lastfile) 158 fdp->fd_lastfile = fd; 159 } 160 161 static inline void 162 fd_unused(struct filedesc *fdp, int fd) 163 { 164 u_int off = fd >> NDENTRYSHIFT; 165 166 LOCK_ASSERT(simple_lock_held(&fdp->fd_slock)); 167 if (fd < fdp->fd_freefile) 168 fdp->fd_freefile = fd; 169 170 if (fdp->fd_lomap[off] == ~0) { 171 KDASSERT((fdp->fd_himap[off >> NDENTRYSHIFT] & 172 (1 << (off & NDENTRYMASK))) != 0); 173 fdp->fd_himap[off >> NDENTRYSHIFT] &= 174 ~(1 << (off & NDENTRYMASK)); 175 } 176 KDASSERT((fdp->fd_lomap[off] & (1 << (fd & NDENTRYMASK))) != 0); 177 fdp->fd_lomap[off] &= ~(1 << (fd & NDENTRYMASK)); 178 179 #ifdef DIAGNOSTIC 180 if (fd > fdp->fd_lastfile) 181 panic("fd_unused: fd_lastfile inconsistent"); 182 #endif 183 if (fd == fdp->fd_lastfile) 184 fdp->fd_lastfile = find_last_set(fdp, fd); 185 } 186 187 /* 188 * Lookup the file structure corresponding to a file descriptor 189 * and return it locked. 190 * Note: typical usage is: `fp = fd_getfile(..); FILE_USE(fp);' 191 * The locking strategy has been optimised for this case, i.e. 192 * fd_getfile() returns the file locked while FILE_USE() will increment 193 * the file's use count and unlock. 194 */ 195 struct file * 196 fd_getfile(struct filedesc *fdp, int fd) 197 { 198 struct file *fp; 199 200 if ((u_int) fd >= fdp->fd_nfiles || (fp = fdp->fd_ofiles[fd]) == NULL) 201 return (NULL); 202 203 simple_lock(&fp->f_slock); 204 if (FILE_IS_USABLE(fp) == 0) { 205 simple_unlock(&fp->f_slock); 206 return (NULL); 207 } 208 209 return (fp); 210 } 211 212 /* 213 * Common code for dup, dup2, and fcntl(F_DUPFD). 214 */ 215 static int 216 finishdup(struct lwp *l, int old, int new, register_t *retval) 217 { 218 struct filedesc *fdp; 219 struct file *fp, *delfp; 220 221 fdp = l->l_proc->p_fd; 222 223 /* 224 * If there is a file in the new slot, remember it so we 225 * can close it after we've finished the dup. We need 226 * to do it after the dup is finished, since closing 227 * the file may block. 228 * 229 * Note: `old' is already used for us. 230 * Note: Caller already marked `new' slot "used". 231 */ 232 simple_lock(&fdp->fd_slock); 233 delfp = fdp->fd_ofiles[new]; 234 235 fp = fdp->fd_ofiles[old]; 236 KDASSERT(fp != NULL); 237 fdp->fd_ofiles[new] = fp; 238 fdp->fd_ofileflags[new] = fdp->fd_ofileflags[old] &~ UF_EXCLOSE; 239 simple_unlock(&fdp->fd_slock); 240 241 *retval = new; 242 simple_lock(&fp->f_slock); 243 fp->f_count++; 244 FILE_UNUSE_HAVELOCK(fp, l); 245 246 if (delfp != NULL) { 247 simple_lock(&delfp->f_slock); 248 FILE_USE(delfp); 249 if (new < fdp->fd_knlistsize) 250 knote_fdclose(l, new); 251 (void) closef(delfp, l); 252 } 253 return (0); 254 } 255 256 /* 257 * System calls on descriptors. 258 */ 259 260 /* 261 * Duplicate a file descriptor. 262 */ 263 /* ARGSUSED */ 264 int 265 sys_dup(struct lwp *l, void *v, register_t *retval) 266 { 267 struct sys_dup_args /* { 268 syscallarg(int) fd; 269 } */ *uap = v; 270 struct file *fp; 271 struct filedesc *fdp; 272 struct proc *p; 273 int old, new, error; 274 275 p = l->l_proc; 276 fdp = p->p_fd; 277 old = SCARG(uap, fd); 278 279 restart: 280 if ((fp = fd_getfile(fdp, old)) == NULL) 281 return (EBADF); 282 283 FILE_USE(fp); 284 285 if ((error = fdalloc(p, 0, &new)) != 0) { 286 if (error == ENOSPC) { 287 fdexpand(p); 288 FILE_UNUSE(fp, l); 289 goto restart; 290 } 291 FILE_UNUSE(fp, l); 292 return (error); 293 } 294 295 /* finishdup() will unuse the descriptors for us */ 296 return (finishdup(l, old, new, retval)); 297 } 298 299 /* 300 * Duplicate a file descriptor to a particular value. 301 */ 302 /* ARGSUSED */ 303 int 304 sys_dup2(struct lwp *l, void *v, register_t *retval) 305 { 306 struct sys_dup2_args /* { 307 syscallarg(int) from; 308 syscallarg(int) to; 309 } */ *uap = v; 310 struct file *fp; 311 struct filedesc *fdp; 312 struct proc *p; 313 int old, new, i, error; 314 315 p = l->l_proc; 316 fdp = p->p_fd; 317 old = SCARG(uap, from); 318 new = SCARG(uap, to); 319 320 restart: 321 if ((fp = fd_getfile(fdp, old)) == NULL) 322 return (EBADF); 323 324 if ((u_int)new >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur || 325 (u_int)new >= maxfiles) { 326 simple_unlock(&fp->f_slock); 327 return (EBADF); 328 } 329 330 if (old == new) { 331 simple_unlock(&fp->f_slock); 332 *retval = new; 333 return (0); 334 } 335 336 FILE_USE(fp); 337 338 if (new >= fdp->fd_nfiles) { 339 if ((error = fdalloc(p, new, &i)) != 0) { 340 if (error == ENOSPC) { 341 fdexpand(p); 342 FILE_UNUSE(fp, l); 343 goto restart; 344 } 345 FILE_UNUSE(fp, l); 346 return (error); 347 } 348 if (new != i) 349 panic("dup2: fdalloc"); 350 } else { 351 simple_lock(&fdp->fd_slock); 352 /* 353 * Mark `new' slot "used" only if it was empty. 354 */ 355 if (fdp->fd_ofiles[new] == NULL) 356 fd_used(fdp, new); 357 simple_unlock(&fdp->fd_slock); 358 } 359 360 /* 361 * finishdup() will close the file that's in the `new' 362 * slot, if there's one there. 363 */ 364 365 /* finishdup() will unuse the descriptors for us */ 366 return (finishdup(l, old, new, retval)); 367 } 368 369 /* 370 * fcntl call which is being passed to the file's fs. 371 */ 372 static int 373 fcntl_forfs(int fd, struct lwp *l, int cmd, void *arg) 374 { 375 struct file *fp; 376 struct filedesc *fdp; 377 int error; 378 u_int size; 379 void *data, *memp; 380 #define STK_PARAMS 128 381 char stkbuf[STK_PARAMS]; 382 383 /* fd's value was validated in sys_fcntl before calling this routine */ 384 fdp = l->l_proc->p_fd; 385 fp = fdp->fd_ofiles[fd]; 386 387 if ((fp->f_flag & (FREAD | FWRITE)) == 0) 388 return (EBADF); 389 390 /* 391 * Interpret high order word to find amount of data to be 392 * copied to/from the user's address space. 393 */ 394 size = (size_t)F_PARAM_LEN(cmd); 395 if (size > F_PARAM_MAX) 396 return (EINVAL); 397 memp = NULL; 398 if (size > sizeof(stkbuf)) { 399 memp = malloc((u_long)size, M_IOCTLOPS, M_WAITOK); 400 data = memp; 401 } else 402 data = stkbuf; 403 if (cmd & F_FSIN) { 404 if (size) { 405 error = copyin(arg, data, size); 406 if (error) { 407 if (memp) 408 free(memp, M_IOCTLOPS); 409 return (error); 410 } 411 } else 412 *(void **)data = arg; 413 } else if ((cmd & F_FSOUT) && size) 414 /* 415 * Zero the buffer so the user always 416 * gets back something deterministic. 417 */ 418 memset(data, 0, size); 419 else if (cmd & F_FSVOID) 420 *(void **)data = arg; 421 422 423 error = (*fp->f_ops->fo_fcntl)(fp, cmd, data, l); 424 425 /* 426 * Copy any data to user, size was 427 * already set and checked above. 428 */ 429 if (error == 0 && (cmd & F_FSOUT) && size) 430 error = copyout(data, arg, size); 431 if (memp) 432 free(memp, M_IOCTLOPS); 433 return (error); 434 } 435 436 /* 437 * The file control system call. 438 */ 439 /* ARGSUSED */ 440 int 441 sys_fcntl(struct lwp *l, void *v, register_t *retval) 442 { 443 struct sys_fcntl_args /* { 444 syscallarg(int) fd; 445 syscallarg(int) cmd; 446 syscallarg(void *) arg; 447 } */ *uap = v; 448 struct filedesc *fdp; 449 struct file *fp; 450 struct proc *p; 451 struct vnode *vp; 452 int fd, i, tmp, error, flg, cmd, newmin; 453 struct flock fl; 454 455 p = l->l_proc; 456 fd = SCARG(uap, fd); 457 cmd = SCARG(uap, cmd); 458 fdp = p->p_fd; 459 error = 0; 460 flg = F_POSIX; 461 462 switch (cmd) { 463 case F_CLOSEM: 464 if (fd < 0) 465 return EBADF; 466 while (fdp->fd_lastfile >= fd) 467 fdrelease(l, fdp->fd_lastfile); 468 return 0; 469 470 case F_MAXFD: 471 *retval = fdp->fd_lastfile; 472 return 0; 473 474 default: 475 /* Handled below */ 476 break; 477 } 478 479 restart: 480 if ((fp = fd_getfile(fdp, fd)) == NULL) 481 return (EBADF); 482 483 FILE_USE(fp); 484 485 if ((cmd & F_FSCTL)) { 486 error = fcntl_forfs(fd, l, cmd, SCARG(uap, arg)); 487 goto out; 488 } 489 490 switch (cmd) { 491 492 case F_DUPFD: 493 newmin = (long)SCARG(uap, arg); 494 if ((u_int)newmin >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur || 495 (u_int)newmin >= maxfiles) { 496 error = EINVAL; 497 goto out; 498 } 499 if ((error = fdalloc(p, newmin, &i)) != 0) { 500 if (error == ENOSPC) { 501 fdexpand(p); 502 FILE_UNUSE(fp, l); 503 goto restart; 504 } 505 goto out; 506 } 507 508 /* finishdup() will unuse the descriptors for us */ 509 return (finishdup(l, fd, i, retval)); 510 511 case F_GETFD: 512 *retval = fdp->fd_ofileflags[fd] & UF_EXCLOSE ? 1 : 0; 513 break; 514 515 case F_SETFD: 516 if ((long)SCARG(uap, arg) & 1) 517 fdp->fd_ofileflags[fd] |= UF_EXCLOSE; 518 else 519 fdp->fd_ofileflags[fd] &= ~UF_EXCLOSE; 520 break; 521 522 case F_GETFL: 523 *retval = OFLAGS(fp->f_flag); 524 break; 525 526 case F_SETFL: 527 tmp = FFLAGS((long)SCARG(uap, arg)) & FCNTLFLAGS; 528 error = (*fp->f_ops->fo_fcntl)(fp, F_SETFL, &tmp, l); 529 if (error) 530 break; 531 i = tmp ^ fp->f_flag; 532 if (i & FNONBLOCK) { 533 int flgs = tmp & FNONBLOCK; 534 error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, &flgs, l); 535 if (error) 536 goto reset_fcntl; 537 } 538 if (i & FASYNC) { 539 int flgs = tmp & FASYNC; 540 error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, &flgs, l); 541 if (error) { 542 if (i & FNONBLOCK) { 543 tmp = fp->f_flag & FNONBLOCK; 544 (void)(*fp->f_ops->fo_ioctl)(fp, 545 FIONBIO, &tmp, l); 546 } 547 goto reset_fcntl; 548 } 549 } 550 fp->f_flag = (fp->f_flag & ~FCNTLFLAGS) | tmp; 551 break; 552 reset_fcntl: 553 (void)(*fp->f_ops->fo_fcntl)(fp, F_SETFL, &fp->f_flag, l); 554 break; 555 556 case F_GETOWN: 557 error = (*fp->f_ops->fo_ioctl)(fp, FIOGETOWN, &tmp, l); 558 *retval = tmp; 559 break; 560 561 case F_SETOWN: 562 tmp = (int)(intptr_t) SCARG(uap, arg); 563 error = (*fp->f_ops->fo_ioctl)(fp, FIOSETOWN, &tmp, l); 564 break; 565 566 case F_SETLKW: 567 flg |= F_WAIT; 568 /* Fall into F_SETLK */ 569 570 case F_SETLK: 571 if (fp->f_type != DTYPE_VNODE) { 572 error = EINVAL; 573 goto out; 574 } 575 vp = (struct vnode *)fp->f_data; 576 /* Copy in the lock structure */ 577 error = copyin(SCARG(uap, arg), &fl, sizeof(fl)); 578 if (error) 579 goto out; 580 if (fl.l_whence == SEEK_CUR) 581 fl.l_start += fp->f_offset; 582 switch (fl.l_type) { 583 case F_RDLCK: 584 if ((fp->f_flag & FREAD) == 0) { 585 error = EBADF; 586 goto out; 587 } 588 p->p_flag |= P_ADVLOCK; 589 error = VOP_ADVLOCK(vp, p, F_SETLK, &fl, flg); 590 goto out; 591 592 case F_WRLCK: 593 if ((fp->f_flag & FWRITE) == 0) { 594 error = EBADF; 595 goto out; 596 } 597 p->p_flag |= P_ADVLOCK; 598 error = VOP_ADVLOCK(vp, p, F_SETLK, &fl, flg); 599 goto out; 600 601 case F_UNLCK: 602 error = VOP_ADVLOCK(vp, p, F_UNLCK, &fl, F_POSIX); 603 goto out; 604 605 default: 606 error = EINVAL; 607 goto out; 608 } 609 610 case F_GETLK: 611 if (fp->f_type != DTYPE_VNODE) { 612 error = EINVAL; 613 goto out; 614 } 615 vp = (struct vnode *)fp->f_data; 616 /* Copy in the lock structure */ 617 error = copyin(SCARG(uap, arg), &fl, sizeof(fl)); 618 if (error) 619 goto out; 620 if (fl.l_whence == SEEK_CUR) 621 fl.l_start += fp->f_offset; 622 if (fl.l_type != F_RDLCK && 623 fl.l_type != F_WRLCK && 624 fl.l_type != F_UNLCK) { 625 error = EINVAL; 626 goto out; 627 } 628 error = VOP_ADVLOCK(vp, p, F_GETLK, &fl, F_POSIX); 629 if (error) 630 goto out; 631 error = copyout(&fl, SCARG(uap, arg), sizeof(fl)); 632 break; 633 634 default: 635 error = EINVAL; 636 } 637 638 out: 639 FILE_UNUSE(fp, l); 640 return (error); 641 } 642 643 void 644 fdremove(struct filedesc *fdp, int fd) 645 { 646 647 simple_lock(&fdp->fd_slock); 648 fdp->fd_ofiles[fd] = NULL; 649 fd_unused(fdp, fd); 650 simple_unlock(&fdp->fd_slock); 651 } 652 653 int 654 fdrelease(struct lwp *l, int fd) 655 { 656 struct proc *p = l->l_proc; 657 struct filedesc *fdp; 658 struct file **fpp, *fp; 659 660 fdp = p->p_fd; 661 simple_lock(&fdp->fd_slock); 662 if (fd < 0 || fd > fdp->fd_lastfile) 663 goto badf; 664 fpp = &fdp->fd_ofiles[fd]; 665 fp = *fpp; 666 if (fp == NULL) 667 goto badf; 668 669 simple_lock(&fp->f_slock); 670 if (!FILE_IS_USABLE(fp)) { 671 simple_unlock(&fp->f_slock); 672 goto badf; 673 } 674 675 FILE_USE(fp); 676 677 *fpp = NULL; 678 fdp->fd_ofileflags[fd] = 0; 679 fd_unused(fdp, fd); 680 simple_unlock(&fdp->fd_slock); 681 if (fd < fdp->fd_knlistsize) 682 knote_fdclose(l, fd); 683 return (closef(fp, l)); 684 685 badf: 686 simple_unlock(&fdp->fd_slock); 687 return (EBADF); 688 } 689 690 /* 691 * Close a file descriptor. 692 */ 693 /* ARGSUSED */ 694 int 695 sys_close(struct lwp *l, void *v, register_t *retval) 696 { 697 struct sys_close_args /* { 698 syscallarg(int) fd; 699 } */ *uap = v; 700 int fd; 701 struct filedesc *fdp; 702 struct proc *p; 703 704 p = l->l_proc; 705 fd = SCARG(uap, fd); 706 fdp = p->p_fd; 707 708 #if 0 709 if (fd_getfile(fdp, fd) == NULL) 710 return (EBADF); 711 #endif 712 713 return (fdrelease(l, fd)); 714 } 715 716 /* 717 * Return status information about a file descriptor. 718 */ 719 /* ARGSUSED */ 720 int 721 sys___fstat30(struct lwp *l, void *v, register_t *retval) 722 { 723 struct sys___fstat30_args /* { 724 syscallarg(int) fd; 725 syscallarg(struct stat *) sb; 726 } */ *uap = v; 727 int fd; 728 struct filedesc *fdp; 729 struct file *fp; 730 struct proc *p; 731 struct stat ub; 732 int error; 733 734 p = l->l_proc; 735 fd = SCARG(uap, fd); 736 fdp = p->p_fd; 737 738 if ((fp = fd_getfile(fdp, fd)) == NULL) 739 return (EBADF); 740 741 FILE_USE(fp); 742 error = (*fp->f_ops->fo_stat)(fp, &ub, l); 743 FILE_UNUSE(fp, l); 744 745 if (error == 0) 746 error = copyout(&ub, SCARG(uap, sb), sizeof(ub)); 747 748 return (error); 749 } 750 751 /* 752 * Return pathconf information about a file descriptor. 753 */ 754 /* ARGSUSED */ 755 int 756 sys_fpathconf(struct lwp *l, void *v, register_t *retval) 757 { 758 struct sys_fpathconf_args /* { 759 syscallarg(int) fd; 760 syscallarg(int) name; 761 } */ *uap = v; 762 int fd; 763 struct filedesc *fdp; 764 struct file *fp; 765 struct proc *p; 766 struct vnode *vp; 767 int error; 768 769 p = l->l_proc; 770 fd = SCARG(uap, fd); 771 fdp = p->p_fd; 772 error = 0; 773 774 if ((fp = fd_getfile(fdp, fd)) == NULL) 775 return (EBADF); 776 777 FILE_USE(fp); 778 779 switch (fp->f_type) { 780 781 case DTYPE_SOCKET: 782 case DTYPE_PIPE: 783 if (SCARG(uap, name) != _PC_PIPE_BUF) 784 error = EINVAL; 785 else 786 *retval = PIPE_BUF; 787 break; 788 789 case DTYPE_VNODE: 790 vp = (struct vnode *)fp->f_data; 791 error = VOP_PATHCONF(vp, SCARG(uap, name), retval); 792 break; 793 794 case DTYPE_KQUEUE: 795 error = EINVAL; 796 break; 797 798 default: 799 error = EOPNOTSUPP; 800 break; 801 } 802 803 FILE_UNUSE(fp, l); 804 return (error); 805 } 806 807 /* 808 * Allocate a file descriptor for the process. 809 */ 810 int fdexpanded; /* XXX: what else uses this? */ 811 812 int 813 fdalloc(struct proc *p, int want, int *result) 814 { 815 struct filedesc *fdp; 816 int i, lim, last, error; 817 u_int off, new; 818 819 fdp = p->p_fd; 820 simple_lock(&fdp->fd_slock); 821 822 /* 823 * Search for a free descriptor starting at the higher 824 * of want or fd_freefile. If that fails, consider 825 * expanding the ofile array. 826 */ 827 lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfiles); 828 last = min(fdp->fd_nfiles, lim); 829 again: 830 if ((i = want) < fdp->fd_freefile) 831 i = fdp->fd_freefile; 832 off = i >> NDENTRYSHIFT; 833 new = find_next_zero(fdp->fd_himap, off, 834 (last + NDENTRIES - 1) >> NDENTRYSHIFT); 835 if (new != -1) { 836 i = find_next_zero(&fdp->fd_lomap[new], 837 new > off ? 0 : i & NDENTRYMASK, NDENTRIES); 838 if (i == -1) { 839 /* 840 * free file descriptor in this block was 841 * below want, try again with higher want. 842 */ 843 want = (new + 1) << NDENTRYSHIFT; 844 goto again; 845 } 846 i += (new << NDENTRYSHIFT); 847 if (i < last) { 848 if (fdp->fd_ofiles[i] == NULL) { 849 fd_used(fdp, i); 850 if (want <= fdp->fd_freefile) 851 fdp->fd_freefile = i; 852 *result = i; 853 error = 0; 854 goto out; 855 } 856 } 857 } 858 859 /* No space in current array. Expand or let the caller do it. */ 860 error = (fdp->fd_nfiles >= lim) ? EMFILE : ENOSPC; 861 862 out: 863 simple_unlock(&fdp->fd_slock); 864 return (error); 865 } 866 867 void 868 fdexpand(struct proc *p) 869 { 870 struct filedesc *fdp; 871 int i, numfiles, oldnfiles; 872 struct file **newofile; 873 char *newofileflags; 874 uint32_t *newhimap = NULL, *newlomap = NULL; 875 876 fdp = p->p_fd; 877 878 restart: 879 oldnfiles = fdp->fd_nfiles; 880 881 if (oldnfiles < NDEXTENT) 882 numfiles = NDEXTENT; 883 else 884 numfiles = 2 * oldnfiles; 885 886 newofile = malloc(numfiles * OFILESIZE, M_FILEDESC, M_WAITOK); 887 if (NDHISLOTS(numfiles) > NDHISLOTS(oldnfiles)) { 888 newhimap = malloc(NDHISLOTS(numfiles) * sizeof(uint32_t), 889 M_FILEDESC, M_WAITOK); 890 newlomap = malloc(NDLOSLOTS(numfiles) * sizeof(uint32_t), 891 M_FILEDESC, M_WAITOK); 892 } 893 894 simple_lock(&fdp->fd_slock); 895 /* lock fdp */ 896 if (fdp->fd_nfiles != oldnfiles) { 897 /* fdp changed; retry */ 898 simple_unlock(&fdp->fd_slock); 899 free(newofile, M_FILEDESC); 900 if (newhimap != NULL) free(newhimap, M_FILEDESC); 901 if (newlomap != NULL) free(newlomap, M_FILEDESC); 902 goto restart; 903 } 904 905 newofileflags = (char *) &newofile[numfiles]; 906 /* 907 * Copy the existing ofile and ofileflags arrays 908 * and zero the new portion of each array. 909 */ 910 memcpy(newofile, fdp->fd_ofiles, 911 (i = sizeof(struct file *) * fdp->fd_nfiles)); 912 memset((char *)newofile + i, 0, 913 numfiles * sizeof(struct file *) - i); 914 memcpy(newofileflags, fdp->fd_ofileflags, 915 (i = sizeof(char) * fdp->fd_nfiles)); 916 memset(newofileflags + i, 0, numfiles * sizeof(char) - i); 917 if (oldnfiles > NDFILE) 918 free(fdp->fd_ofiles, M_FILEDESC); 919 920 if (NDHISLOTS(numfiles) > NDHISLOTS(oldnfiles)) { 921 memcpy(newhimap, fdp->fd_himap, 922 (i = NDHISLOTS(oldnfiles) * sizeof(uint32_t))); 923 memset((char *)newhimap + i, 0, 924 NDHISLOTS(numfiles) * sizeof(uint32_t) - i); 925 926 memcpy(newlomap, fdp->fd_lomap, 927 (i = NDLOSLOTS(oldnfiles) * sizeof(uint32_t))); 928 memset((char *)newlomap + i, 0, 929 NDLOSLOTS(numfiles) * sizeof(uint32_t) - i); 930 931 if (NDHISLOTS(oldnfiles) > NDHISLOTS(NDFILE)) { 932 free(fdp->fd_himap, M_FILEDESC); 933 free(fdp->fd_lomap, M_FILEDESC); 934 } 935 fdp->fd_himap = newhimap; 936 fdp->fd_lomap = newlomap; 937 } 938 939 fdp->fd_ofiles = newofile; 940 fdp->fd_ofileflags = newofileflags; 941 fdp->fd_nfiles = numfiles; 942 943 simple_unlock(&fdp->fd_slock); 944 945 fdexpanded++; 946 } 947 948 /* 949 * Check to see whether n user file descriptors 950 * are available to the process p. 951 */ 952 int 953 fdavail(struct proc *p, int n) 954 { 955 struct filedesc *fdp; 956 struct file **fpp; 957 int i, lim; 958 959 fdp = p->p_fd; 960 lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfiles); 961 if ((i = lim - fdp->fd_nfiles) > 0 && (n -= i) <= 0) 962 return (1); 963 fpp = &fdp->fd_ofiles[fdp->fd_freefile]; 964 for (i = min(lim,fdp->fd_nfiles) - fdp->fd_freefile; --i >= 0; fpp++) 965 if (*fpp == NULL && --n <= 0) 966 return (1); 967 return (0); 968 } 969 970 /* 971 * Create a new open file structure and allocate 972 * a file descriptor for the process that refers to it. 973 */ 974 int 975 falloc(struct proc *p, struct file **resultfp, int *resultfd) 976 { 977 struct file *fp, *fq; 978 int error, i; 979 980 restart: 981 if ((error = fdalloc(p, 0, &i)) != 0) { 982 if (error == ENOSPC) { 983 fdexpand(p); 984 goto restart; 985 } 986 return (error); 987 } 988 989 fp = pool_get(&file_pool, PR_WAITOK); 990 simple_lock(&filelist_slock); 991 if (nfiles >= maxfiles) { 992 tablefull("file", "increase kern.maxfiles or MAXFILES"); 993 simple_unlock(&filelist_slock); 994 simple_lock(&p->p_fd->fd_slock); 995 fd_unused(p->p_fd, i); 996 simple_unlock(&p->p_fd->fd_slock); 997 pool_put(&file_pool, fp); 998 return (ENFILE); 999 } 1000 /* 1001 * Allocate a new file descriptor. 1002 * If the process has file descriptor zero open, add to the list 1003 * of open files at that point, otherwise put it at the front of 1004 * the list of open files. 1005 */ 1006 nfiles++; 1007 memset(fp, 0, sizeof(struct file)); 1008 fp->f_iflags = FIF_LARVAL; 1009 if ((fq = p->p_fd->fd_ofiles[0]) != NULL) { 1010 LIST_INSERT_AFTER(fq, fp, f_list); 1011 } else { 1012 LIST_INSERT_HEAD(&filehead, fp, f_list); 1013 } 1014 simple_unlock(&filelist_slock); 1015 KDASSERT(p->p_fd->fd_ofiles[i] == NULL); 1016 p->p_fd->fd_ofiles[i] = fp; 1017 simple_lock_init(&fp->f_slock); 1018 fp->f_count = 1; 1019 fp->f_cred = p->p_ucred; 1020 crhold(fp->f_cred); 1021 if (resultfp) { 1022 fp->f_usecount = 1; 1023 *resultfp = fp; 1024 } 1025 if (resultfd) 1026 *resultfd = i; 1027 return (0); 1028 } 1029 1030 /* 1031 * Free a file descriptor. 1032 */ 1033 void 1034 ffree(struct file *fp) 1035 { 1036 1037 #ifdef DIAGNOSTIC 1038 if (fp->f_usecount) 1039 panic("ffree"); 1040 #endif 1041 1042 simple_lock(&filelist_slock); 1043 LIST_REMOVE(fp, f_list); 1044 crfree(fp->f_cred); 1045 #ifdef DIAGNOSTIC 1046 fp->f_count = 0; /* What's the point? */ 1047 #endif 1048 nfiles--; 1049 simple_unlock(&filelist_slock); 1050 pool_put(&file_pool, fp); 1051 } 1052 1053 /* 1054 * Create an initial cwdinfo structure, using the same current and root 1055 * directories as p. 1056 */ 1057 struct cwdinfo * 1058 cwdinit(struct proc *p) 1059 { 1060 struct cwdinfo *cwdi; 1061 1062 cwdi = pool_get(&cwdi_pool, PR_WAITOK); 1063 1064 simple_lock_init(&cwdi->cwdi_slock); 1065 cwdi->cwdi_cdir = p->p_cwdi->cwdi_cdir; 1066 if (cwdi->cwdi_cdir) 1067 VREF(cwdi->cwdi_cdir); 1068 cwdi->cwdi_rdir = p->p_cwdi->cwdi_rdir; 1069 if (cwdi->cwdi_rdir) 1070 VREF(cwdi->cwdi_rdir); 1071 cwdi->cwdi_cmask = p->p_cwdi->cwdi_cmask; 1072 cwdi->cwdi_refcnt = 1; 1073 1074 return (cwdi); 1075 } 1076 1077 /* 1078 * Make p2 share p1's cwdinfo. 1079 */ 1080 void 1081 cwdshare(struct proc *p1, struct proc *p2) 1082 { 1083 struct cwdinfo *cwdi = p1->p_cwdi; 1084 1085 simple_lock(&cwdi->cwdi_slock); 1086 cwdi->cwdi_refcnt++; 1087 simple_unlock(&cwdi->cwdi_slock); 1088 p2->p_cwdi = cwdi; 1089 } 1090 1091 /* 1092 * Make this process not share its cwdinfo structure, maintaining 1093 * all cwdinfo state. 1094 */ 1095 void 1096 cwdunshare(struct proc *p) 1097 { 1098 struct cwdinfo *oldcwdi, *newcwdi; 1099 1100 if (p->p_cwdi->cwdi_refcnt == 1) 1101 return; 1102 1103 newcwdi = cwdinit(p); 1104 oldcwdi = p->p_cwdi; 1105 p->p_cwdi = newcwdi; 1106 cwdfree(oldcwdi); 1107 } 1108 1109 /* 1110 * Release a cwdinfo structure. 1111 */ 1112 void 1113 cwdfree(struct cwdinfo *cwdi) 1114 { 1115 int n; 1116 1117 simple_lock(&cwdi->cwdi_slock); 1118 n = --cwdi->cwdi_refcnt; 1119 simple_unlock(&cwdi->cwdi_slock); 1120 if (n > 0) 1121 return; 1122 1123 vrele(cwdi->cwdi_cdir); 1124 if (cwdi->cwdi_rdir) 1125 vrele(cwdi->cwdi_rdir); 1126 pool_put(&cwdi_pool, cwdi); 1127 } 1128 1129 /* 1130 * Create an initial filedesc structure, using the same current and root 1131 * directories as p. 1132 */ 1133 struct filedesc * 1134 fdinit(struct proc *p) 1135 { 1136 struct filedesc0 *newfdp; 1137 1138 newfdp = pool_get(&filedesc0_pool, PR_WAITOK); 1139 memset(newfdp, 0, sizeof(struct filedesc0)); 1140 1141 fdinit1(newfdp); 1142 1143 return (&newfdp->fd_fd); 1144 } 1145 1146 /* 1147 * Initialize a file descriptor table. 1148 */ 1149 void 1150 fdinit1(struct filedesc0 *newfdp) 1151 { 1152 1153 newfdp->fd_fd.fd_refcnt = 1; 1154 newfdp->fd_fd.fd_ofiles = newfdp->fd_dfiles; 1155 newfdp->fd_fd.fd_ofileflags = newfdp->fd_dfileflags; 1156 newfdp->fd_fd.fd_nfiles = NDFILE; 1157 newfdp->fd_fd.fd_knlistsize = -1; 1158 newfdp->fd_fd.fd_himap = newfdp->fd_dhimap; 1159 newfdp->fd_fd.fd_lomap = newfdp->fd_dlomap; 1160 newfdp->fd_fd.fd_lastfile = -1; 1161 simple_lock_init(&newfdp->fd_fd.fd_slock); 1162 } 1163 1164 /* 1165 * Make p2 share p1's filedesc structure. 1166 */ 1167 void 1168 fdshare(struct proc *p1, struct proc *p2) 1169 { 1170 struct filedesc *fdp = p1->p_fd; 1171 1172 simple_lock(&fdp->fd_slock); 1173 p2->p_fd = fdp; 1174 fdp->fd_refcnt++; 1175 simple_unlock(&fdp->fd_slock); 1176 } 1177 1178 /* 1179 * Make this process not share its filedesc structure, maintaining 1180 * all file descriptor state. 1181 */ 1182 void 1183 fdunshare(struct lwp *l) 1184 { 1185 struct proc *p = l->l_proc; 1186 struct filedesc *newfd; 1187 1188 if (p->p_fd->fd_refcnt == 1) 1189 return; 1190 1191 newfd = fdcopy(p); 1192 fdfree(l); 1193 p->p_fd = newfd; 1194 } 1195 1196 /* 1197 * Clear a process's fd table. 1198 */ 1199 void 1200 fdclear(struct lwp *l) 1201 { 1202 struct proc *p = l->l_proc; 1203 struct filedesc *newfd; 1204 1205 newfd = fdinit(p); 1206 fdfree(l); 1207 p->p_fd = newfd; 1208 } 1209 1210 /* 1211 * Copy a filedesc structure. 1212 */ 1213 struct filedesc * 1214 fdcopy(struct proc *p) 1215 { 1216 struct filedesc *newfdp, *fdp; 1217 struct file **fpp, **nfpp; 1218 int i, numfiles, lastfile; 1219 1220 fdp = p->p_fd; 1221 newfdp = pool_get(&filedesc0_pool, PR_WAITOK); 1222 newfdp->fd_refcnt = 1; 1223 simple_lock_init(&newfdp->fd_slock); 1224 1225 restart: 1226 numfiles = fdp->fd_nfiles; 1227 lastfile = fdp->fd_lastfile; 1228 1229 /* 1230 * If the number of open files fits in the internal arrays 1231 * of the open file structure, use them, otherwise allocate 1232 * additional memory for the number of descriptors currently 1233 * in use. 1234 */ 1235 if (lastfile < NDFILE) { 1236 i = NDFILE; 1237 } else { 1238 /* 1239 * Compute the smallest multiple of NDEXTENT needed 1240 * for the file descriptors currently in use, 1241 * allowing the table to shrink. 1242 */ 1243 i = numfiles; 1244 while (i >= 2 * NDEXTENT && i > lastfile * 2) 1245 i /= 2; 1246 newfdp->fd_ofiles = malloc(i * OFILESIZE, M_FILEDESC, M_WAITOK); 1247 } 1248 if (NDHISLOTS(i) > NDHISLOTS(NDFILE)) { 1249 newfdp->fd_himap = malloc(NDHISLOTS(i) * sizeof(uint32_t), 1250 M_FILEDESC, M_WAITOK); 1251 newfdp->fd_lomap = malloc(NDLOSLOTS(i) * sizeof(uint32_t), 1252 M_FILEDESC, M_WAITOK); 1253 } 1254 1255 simple_lock(&fdp->fd_slock); 1256 if (numfiles != fdp->fd_nfiles || lastfile != fdp->fd_lastfile) { 1257 simple_unlock(&fdp->fd_slock); 1258 if (i > NDFILE) 1259 free(newfdp->fd_ofiles, M_FILEDESC); 1260 if (NDHISLOTS(i) > NDHISLOTS(NDFILE)) { 1261 free(newfdp->fd_himap, M_FILEDESC); 1262 free(newfdp->fd_lomap, M_FILEDESC); 1263 } 1264 goto restart; 1265 } 1266 1267 if (lastfile < NDFILE) { 1268 newfdp->fd_ofiles = ((struct filedesc0 *) newfdp)->fd_dfiles; 1269 newfdp->fd_ofileflags = 1270 ((struct filedesc0 *) newfdp)->fd_dfileflags; 1271 } else { 1272 newfdp->fd_ofileflags = (char *) &newfdp->fd_ofiles[i]; 1273 } 1274 if (NDHISLOTS(i) <= NDHISLOTS(NDFILE)) { 1275 newfdp->fd_himap = 1276 ((struct filedesc0 *) newfdp)->fd_dhimap; 1277 newfdp->fd_lomap = 1278 ((struct filedesc0 *) newfdp)->fd_dlomap; 1279 } 1280 1281 newfdp->fd_nfiles = i; 1282 newfdp->fd_lastfile = lastfile; 1283 newfdp->fd_freefile = fdp->fd_freefile; 1284 1285 /* Clear the entries that will not be copied over. 1286 * Avoid calling memset with 0 size (i.e. when 1287 * lastfile == i-1 */ 1288 if (lastfile < (i-1)) 1289 memset(newfdp->fd_ofiles + lastfile + 1, 0, 1290 (i - lastfile - 1) * sizeof(struct file **)); 1291 memcpy(newfdp->fd_ofileflags, fdp->fd_ofileflags, i * sizeof(char)); 1292 if (i < NDENTRIES * NDENTRIES) 1293 i = NDENTRIES * NDENTRIES; /* size of inlined bitmaps */ 1294 memcpy(newfdp->fd_himap, fdp->fd_himap, NDHISLOTS(i)*sizeof(uint32_t)); 1295 memcpy(newfdp->fd_lomap, fdp->fd_lomap, NDLOSLOTS(i)*sizeof(uint32_t)); 1296 1297 fpp = fdp->fd_ofiles; 1298 nfpp = newfdp->fd_ofiles; 1299 for (i = 0; i <= lastfile; i++, fpp++, nfpp++) { 1300 if ((*nfpp = *fpp) == NULL) 1301 continue; 1302 1303 if ((*fpp)->f_type == DTYPE_KQUEUE) 1304 /* kq descriptors cannot be copied. */ 1305 fdremove(newfdp, i); 1306 else { 1307 simple_lock(&(*fpp)->f_slock); 1308 (*fpp)->f_count++; 1309 simple_unlock(&(*fpp)->f_slock); 1310 } 1311 } 1312 1313 simple_unlock(&fdp->fd_slock); 1314 1315 newfdp->fd_knlist = NULL; 1316 newfdp->fd_knlistsize = -1; 1317 newfdp->fd_knhash = NULL; 1318 newfdp->fd_knhashmask = 0; 1319 1320 return (newfdp); 1321 } 1322 1323 /* 1324 * Release a filedesc structure. 1325 */ 1326 void 1327 fdfree(struct lwp *l) 1328 { 1329 struct proc *p = l->l_proc; 1330 struct filedesc *fdp; 1331 struct file **fpp, *fp; 1332 int i; 1333 1334 fdp = p->p_fd; 1335 simple_lock(&fdp->fd_slock); 1336 i = --fdp->fd_refcnt; 1337 simple_unlock(&fdp->fd_slock); 1338 if (i > 0) 1339 return; 1340 1341 fpp = fdp->fd_ofiles; 1342 for (i = fdp->fd_lastfile; i >= 0; i--, fpp++) { 1343 fp = *fpp; 1344 if (fp != NULL) { 1345 *fpp = NULL; 1346 simple_lock(&fp->f_slock); 1347 FILE_USE(fp); 1348 if ((fdp->fd_lastfile - i) < fdp->fd_knlistsize) 1349 knote_fdclose(l, fdp->fd_lastfile - i); 1350 (void) closef(fp, l); 1351 } 1352 } 1353 p->p_fd = NULL; 1354 if (fdp->fd_nfiles > NDFILE) 1355 free(fdp->fd_ofiles, M_FILEDESC); 1356 if (NDHISLOTS(fdp->fd_nfiles) > NDHISLOTS(NDFILE)) { 1357 free(fdp->fd_himap, M_FILEDESC); 1358 free(fdp->fd_lomap, M_FILEDESC); 1359 } 1360 if (fdp->fd_knlist) 1361 free(fdp->fd_knlist, M_KEVENT); 1362 if (fdp->fd_knhash) 1363 hashdone(fdp->fd_knhash, M_KEVENT); 1364 pool_put(&filedesc0_pool, fdp); 1365 } 1366 1367 /* 1368 * Internal form of close. 1369 * Decrement reference count on file structure. 1370 * Note: p may be NULL when closing a file 1371 * that was being passed in a message. 1372 * 1373 * Note: we expect the caller is holding a usecount, and expects us 1374 * to drop it (the caller thinks the file is going away forever). 1375 */ 1376 int 1377 closef(struct file *fp, struct lwp *l) 1378 { 1379 struct proc *p = l ? l->l_proc : NULL; 1380 struct vnode *vp; 1381 struct flock lf; 1382 int error; 1383 1384 if (fp == NULL) 1385 return (0); 1386 1387 /* 1388 * POSIX record locking dictates that any close releases ALL 1389 * locks owned by this process. This is handled by setting 1390 * a flag in the unlock to free ONLY locks obeying POSIX 1391 * semantics, and not to free BSD-style file locks. 1392 * If the descriptor was in a message, POSIX-style locks 1393 * aren't passed with the descriptor. 1394 */ 1395 if (p && (p->p_flag & P_ADVLOCK) && fp->f_type == DTYPE_VNODE) { 1396 lf.l_whence = SEEK_SET; 1397 lf.l_start = 0; 1398 lf.l_len = 0; 1399 lf.l_type = F_UNLCK; 1400 vp = (struct vnode *)fp->f_data; 1401 (void) VOP_ADVLOCK(vp, p, F_UNLCK, &lf, F_POSIX); 1402 } 1403 1404 /* 1405 * If WANTCLOSE is set, then the reference count on the file 1406 * is 0, but there were multiple users of the file. This can 1407 * happen if a filedesc structure is shared by multiple 1408 * processes. 1409 */ 1410 simple_lock(&fp->f_slock); 1411 if (fp->f_iflags & FIF_WANTCLOSE) { 1412 /* 1413 * Another user of the file is already closing, and is 1414 * simply waiting for other users of the file to drain. 1415 * Release our usecount, and wake up the closer if it 1416 * is the only remaining use. 1417 */ 1418 #ifdef DIAGNOSTIC 1419 if (fp->f_count != 0) 1420 panic("closef: wantclose and count != 0"); 1421 if (fp->f_usecount < 2) 1422 panic("closef: wantclose and usecount < 2"); 1423 #endif 1424 if (--fp->f_usecount == 1) 1425 wakeup(&fp->f_usecount); 1426 simple_unlock(&fp->f_slock); 1427 return (0); 1428 } else { 1429 /* 1430 * Decrement the reference count. If we were not the 1431 * last reference, then release our use and just 1432 * return. 1433 */ 1434 if (--fp->f_count > 0) { 1435 #ifdef DIAGNOSTIC 1436 if (fp->f_usecount < 1) 1437 panic("closef: no wantclose and usecount < 1"); 1438 #endif 1439 fp->f_usecount--; 1440 simple_unlock(&fp->f_slock); 1441 return (0); 1442 } 1443 } 1444 1445 /* 1446 * The reference count is now 0. However, there may be 1447 * multiple potential users of this file. This can happen 1448 * if multiple processes shared a single filedesc structure. 1449 * 1450 * Notify these potential users that the file is closing. 1451 * This will prevent them from adding additional uses to 1452 * the file. 1453 */ 1454 fp->f_iflags |= FIF_WANTCLOSE; 1455 1456 /* 1457 * We expect the caller to add a use to the file. So, if we 1458 * are the last user, usecount will be 1. If it is not, we 1459 * must wait for the usecount to drain. When it drains back 1460 * to 1, we will be awakened so that we may proceed with the 1461 * close. 1462 */ 1463 #ifdef DIAGNOSTIC 1464 if (fp->f_usecount < 1) 1465 panic("closef: usecount < 1"); 1466 #endif 1467 while (fp->f_usecount > 1) 1468 (void) ltsleep(&fp->f_usecount, PRIBIO, "closef", 0, 1469 &fp->f_slock); 1470 #ifdef DIAGNOSTIC 1471 if (fp->f_usecount != 1) 1472 panic("closef: usecount != 1"); 1473 #endif 1474 1475 simple_unlock(&fp->f_slock); 1476 if ((fp->f_flag & FHASLOCK) && fp->f_type == DTYPE_VNODE) { 1477 lf.l_whence = SEEK_SET; 1478 lf.l_start = 0; 1479 lf.l_len = 0; 1480 lf.l_type = F_UNLCK; 1481 vp = (struct vnode *)fp->f_data; 1482 (void) VOP_ADVLOCK(vp, fp, F_UNLCK, &lf, F_FLOCK); 1483 } 1484 if (fp->f_ops) 1485 error = (*fp->f_ops->fo_close)(fp, l); 1486 else 1487 error = 0; 1488 1489 /* Nothing references the file now, drop the final use (us). */ 1490 fp->f_usecount--; 1491 1492 ffree(fp); 1493 return (error); 1494 } 1495 1496 /* 1497 * Apply an advisory lock on a file descriptor. 1498 * 1499 * Just attempt to get a record lock of the requested type on 1500 * the entire file (l_whence = SEEK_SET, l_start = 0, l_len = 0). 1501 */ 1502 /* ARGSUSED */ 1503 int 1504 sys_flock(struct lwp *l, void *v, register_t *retval) 1505 { 1506 struct sys_flock_args /* { 1507 syscallarg(int) fd; 1508 syscallarg(int) how; 1509 } */ *uap = v; 1510 int fd, how, error; 1511 struct proc *p; 1512 struct filedesc *fdp; 1513 struct file *fp; 1514 struct vnode *vp; 1515 struct flock lf; 1516 1517 p = l->l_proc; 1518 fd = SCARG(uap, fd); 1519 how = SCARG(uap, how); 1520 fdp = p->p_fd; 1521 error = 0; 1522 1523 if ((fp = fd_getfile(fdp, fd)) == NULL) 1524 return (EBADF); 1525 1526 FILE_USE(fp); 1527 1528 if (fp->f_type != DTYPE_VNODE) { 1529 error = EOPNOTSUPP; 1530 goto out; 1531 } 1532 1533 vp = (struct vnode *)fp->f_data; 1534 lf.l_whence = SEEK_SET; 1535 lf.l_start = 0; 1536 lf.l_len = 0; 1537 if (how & LOCK_UN) { 1538 lf.l_type = F_UNLCK; 1539 fp->f_flag &= ~FHASLOCK; 1540 error = VOP_ADVLOCK(vp, fp, F_UNLCK, &lf, F_FLOCK); 1541 goto out; 1542 } 1543 if (how & LOCK_EX) 1544 lf.l_type = F_WRLCK; 1545 else if (how & LOCK_SH) 1546 lf.l_type = F_RDLCK; 1547 else { 1548 error = EINVAL; 1549 goto out; 1550 } 1551 fp->f_flag |= FHASLOCK; 1552 if (how & LOCK_NB) 1553 error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, F_FLOCK); 1554 else 1555 error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, 1556 F_FLOCK|F_WAIT); 1557 out: 1558 FILE_UNUSE(fp, l); 1559 return (error); 1560 } 1561 1562 /* ARGSUSED */ 1563 int 1564 sys_posix_fadvise(struct lwp *l, void *v, register_t *retval) 1565 { 1566 const struct sys_posix_fadvise_args /* { 1567 syscallarg(int) fd; 1568 syscallarg(off_t) offset; 1569 syscallarg(off_t) len; 1570 syscallarg(int) advice; 1571 } */ *uap = v; 1572 const int fd = SCARG(uap, fd); 1573 const int advice = SCARG(uap, advice); 1574 struct proc *p = l->l_proc; 1575 struct file *fp; 1576 int error = 0; 1577 1578 fp = fd_getfile(p->p_fd, fd); 1579 if (fp == NULL) { 1580 error = EBADF; 1581 goto out; 1582 } 1583 FILE_USE(fp); 1584 1585 if (fp->f_type != DTYPE_VNODE) { 1586 if (fp->f_type == DTYPE_PIPE || fp->f_type == DTYPE_SOCKET) { 1587 error = ESPIPE; 1588 } else { 1589 error = EOPNOTSUPP; 1590 } 1591 goto out; 1592 } 1593 1594 switch (advice) { 1595 case POSIX_FADV_NORMAL: 1596 case POSIX_FADV_RANDOM: 1597 case POSIX_FADV_SEQUENTIAL: 1598 KASSERT(POSIX_FADV_NORMAL == UVM_ADV_NORMAL); 1599 KASSERT(POSIX_FADV_RANDOM == UVM_ADV_RANDOM); 1600 KASSERT(POSIX_FADV_SEQUENTIAL == UVM_ADV_SEQUENTIAL); 1601 1602 /* 1603 * we ignore offset and size. 1604 */ 1605 1606 fp->f_advice = advice; 1607 break; 1608 1609 case POSIX_FADV_WILLNEED: 1610 case POSIX_FADV_DONTNEED: 1611 case POSIX_FADV_NOREUSE: 1612 1613 /* 1614 * not implemented yet. 1615 */ 1616 1617 break; 1618 default: 1619 error = EINVAL; 1620 break; 1621 } 1622 out: 1623 if (fp != NULL) { 1624 FILE_UNUSE(fp, l); 1625 } 1626 *retval = error; 1627 return 0; 1628 } 1629 1630 /* 1631 * File Descriptor pseudo-device driver (/dev/fd/). 1632 * 1633 * Opening minor device N dup()s the file (if any) connected to file 1634 * descriptor N belonging to the calling process. Note that this driver 1635 * consists of only the ``open()'' routine, because all subsequent 1636 * references to this file will be direct to the other driver. 1637 */ 1638 /* ARGSUSED */ 1639 static int 1640 filedescopen(dev_t dev, int mode, int type, struct lwp *l) 1641 { 1642 1643 /* 1644 * XXX Kludge: set dupfd to contain the value of the 1645 * the file descriptor being sought for duplication. The error 1646 * return ensures that the vnode for this device will be released 1647 * by vn_open. Open will detect this special error and take the 1648 * actions in dupfdopen below. Other callers of vn_open or VOP_OPEN 1649 * will simply report the error. 1650 */ 1651 l->l_dupfd = minor(dev); /* XXX */ 1652 return EDUPFD; 1653 } 1654 1655 const struct cdevsw filedesc_cdevsw = { 1656 filedescopen, noclose, noread, nowrite, noioctl, 1657 nostop, notty, nopoll, nommap, nokqfilter, 1658 }; 1659 1660 /* 1661 * Duplicate the specified descriptor to a free descriptor. 1662 * 1663 * 'indx' has been fdalloc'ed (and will be fdremove'ed on error) by the caller. 1664 */ 1665 int 1666 dupfdopen(struct lwp *l, int indx, int dfd, int mode, int error) 1667 { 1668 struct proc *p = l->l_proc; 1669 struct filedesc *fdp; 1670 struct file *wfp; 1671 1672 fdp = p->p_fd; 1673 1674 /* should be cleared by the caller */ 1675 KASSERT(fdp->fd_ofiles[indx] == NULL); 1676 1677 /* 1678 * If the to-be-dup'd fd number is greater than the allowed number 1679 * of file descriptors, or the fd to be dup'd has already been 1680 * closed, reject. 1681 */ 1682 1683 /* 1684 * Note, in the case of indx == dfd, fd_getfile below returns NULL. 1685 */ 1686 if ((wfp = fd_getfile(fdp, dfd)) == NULL) 1687 return (EBADF); 1688 1689 FILE_USE(wfp); 1690 1691 /* 1692 * There are two cases of interest here. 1693 * 1694 * For EDUPFD simply dup (dfd) to file descriptor 1695 * (indx) and return. 1696 * 1697 * For EMOVEFD steal away the file structure from (dfd) and 1698 * store it in (indx). (dfd) is effectively closed by 1699 * this operation. 1700 * 1701 * Any other error code is just returned. 1702 */ 1703 switch (error) { 1704 case EDUPFD: 1705 /* 1706 * Check that the mode the file is being opened for is a 1707 * subset of the mode of the existing descriptor. 1708 */ 1709 if (((mode & (FREAD|FWRITE)) | wfp->f_flag) != wfp->f_flag) { 1710 FILE_UNUSE(wfp, l); 1711 return (EACCES); 1712 } 1713 simple_lock(&fdp->fd_slock); 1714 fdp->fd_ofiles[indx] = wfp; 1715 fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd]; 1716 simple_unlock(&fdp->fd_slock); 1717 simple_lock(&wfp->f_slock); 1718 wfp->f_count++; 1719 /* 'indx' has been fd_used'ed by caller */ 1720 FILE_UNUSE_HAVELOCK(wfp, l); 1721 return (0); 1722 1723 case EMOVEFD: 1724 /* 1725 * Steal away the file pointer from dfd, and stuff it into indx. 1726 */ 1727 simple_lock(&fdp->fd_slock); 1728 fdp->fd_ofiles[indx] = fdp->fd_ofiles[dfd]; 1729 fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd]; 1730 fdp->fd_ofiles[dfd] = NULL; 1731 fdp->fd_ofileflags[dfd] = 0; 1732 /* 1733 * Complete the clean up of the filedesc structure by 1734 * recomputing the various hints. 1735 */ 1736 /* 'indx' has been fd_used'ed by caller */ 1737 fd_unused(fdp, dfd); 1738 simple_unlock(&fdp->fd_slock); 1739 FILE_UNUSE(wfp, l); 1740 return (0); 1741 1742 default: 1743 FILE_UNUSE(wfp, l); 1744 return (error); 1745 } 1746 /* NOTREACHED */ 1747 } 1748 1749 /* 1750 * Close any files on exec? 1751 */ 1752 void 1753 fdcloseexec(struct lwp *l) 1754 { 1755 struct proc *p = l->l_proc; 1756 struct filedesc *fdp; 1757 int fd; 1758 1759 fdunshare(l); 1760 cwdunshare(p); 1761 1762 fdp = p->p_fd; 1763 for (fd = 0; fd <= fdp->fd_lastfile; fd++) 1764 if (fdp->fd_ofileflags[fd] & UF_EXCLOSE) 1765 (void) fdrelease(l, fd); 1766 } 1767 1768 /* 1769 * It is unsafe for set[ug]id processes to be started with file 1770 * descriptors 0..2 closed, as these descriptors are given implicit 1771 * significance in the Standard C library. fdcheckstd() will create a 1772 * descriptor referencing /dev/null for each of stdin, stdout, and 1773 * stderr that is not already open. 1774 */ 1775 #define CHECK_UPTO 3 1776 int 1777 fdcheckstd(l) 1778 struct lwp *l; 1779 { 1780 struct proc *p; 1781 struct nameidata nd; 1782 struct filedesc *fdp; 1783 struct file *fp; 1784 struct file *devnullfp = NULL; /* Quell compiler warning */ 1785 struct proc *pp; 1786 register_t retval; 1787 int fd, i, error, flags = FREAD|FWRITE, devnull = -1; 1788 char closed[CHECK_UPTO * 3 + 1], which[3 + 1]; 1789 1790 p = l->l_proc; 1791 closed[0] = '\0'; 1792 if ((fdp = p->p_fd) == NULL) 1793 return (0); 1794 for (i = 0; i < CHECK_UPTO; i++) { 1795 if (fdp->fd_ofiles[i] != NULL) 1796 continue; 1797 snprintf(which, sizeof(which), ",%d", i); 1798 strlcat(closed, which, sizeof(closed)); 1799 if (devnull < 0) { 1800 if ((error = falloc(p, &fp, &fd)) != 0) 1801 return (error); 1802 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, "/dev/null", 1803 l); 1804 if ((error = vn_open(&nd, flags, 0)) != 0) { 1805 FILE_UNUSE(fp, l); 1806 ffree(fp); 1807 fdremove(p->p_fd, fd); 1808 return (error); 1809 } 1810 fp->f_data = nd.ni_vp; 1811 fp->f_flag = flags; 1812 fp->f_ops = &vnops; 1813 fp->f_type = DTYPE_VNODE; 1814 VOP_UNLOCK(nd.ni_vp, 0); 1815 devnull = fd; 1816 devnullfp = fp; 1817 FILE_SET_MATURE(fp); 1818 } else { 1819 restart: 1820 if ((error = fdalloc(p, 0, &fd)) != 0) { 1821 if (error == ENOSPC) { 1822 fdexpand(p); 1823 goto restart; 1824 } 1825 return (error); 1826 } 1827 1828 simple_lock(&devnullfp->f_slock); 1829 FILE_USE(devnullfp); 1830 /* finishdup() will unuse the descriptors for us */ 1831 if ((error = finishdup(l, devnull, fd, &retval)) != 0) 1832 return (error); 1833 } 1834 } 1835 if (devnullfp) 1836 FILE_UNUSE(devnullfp, l); 1837 if (closed[0] != '\0') { 1838 pp = p->p_pptr; 1839 log(LOG_WARNING, "set{u,g}id pid %d (%s) " 1840 "was invoked by uid %d ppid %d (%s) " 1841 "with fd %s closed\n", 1842 p->p_pid, p->p_comm, pp->p_ucred->cr_uid, 1843 pp->p_pid, pp->p_comm, &closed[1]); 1844 } 1845 return (0); 1846 } 1847 #undef CHECK_UPTO 1848 1849 /* 1850 * Sets descriptor owner. If the owner is a process, 'pgid' 1851 * is set to positive value, process ID. If the owner is process group, 1852 * 'pgid' is set to -pg_id. 1853 */ 1854 int 1855 fsetown(struct proc *p, pid_t *pgid, int cmd, const void *data) 1856 { 1857 int id = *(const int *)data; 1858 int error; 1859 1860 switch (cmd) { 1861 case TIOCSPGRP: 1862 if (id < 0) 1863 return (EINVAL); 1864 id = -id; 1865 break; 1866 default: 1867 break; 1868 } 1869 1870 if (id > 0 && !pfind(id)) 1871 return (ESRCH); 1872 else if (id < 0 && (error = pgid_in_session(p, -id))) 1873 return (error); 1874 1875 *pgid = id; 1876 return (0); 1877 } 1878 1879 /* 1880 * Return descriptor owner information. If the value is positive, 1881 * it's process ID. If it's negative, it's process group ID and 1882 * needs the sign removed before use. 1883 */ 1884 int 1885 fgetown(struct proc *p, pid_t pgid, int cmd, void *data) 1886 { 1887 switch (cmd) { 1888 case TIOCGPGRP: 1889 *(int *)data = -pgid; 1890 break; 1891 default: 1892 *(int *)data = pgid; 1893 break; 1894 } 1895 return (0); 1896 } 1897 1898 /* 1899 * Send signal to descriptor owner, either process or process group. 1900 */ 1901 void 1902 fownsignal(pid_t pgid, int signo, int code, int band, void *fdescdata) 1903 { 1904 struct proc *p1; 1905 ksiginfo_t ksi; 1906 1907 memset(&ksi, 0, sizeof(ksi)); 1908 ksi.ksi_signo = signo; 1909 ksi.ksi_code = code; 1910 ksi.ksi_band = band; 1911 1912 if (pgid > 0 && (p1 = pfind(pgid))) 1913 kpsignal(p1, &ksi, fdescdata); 1914 else if (pgid < 0) 1915 kgsignal(-pgid, &ksi, fdescdata); 1916 } 1917 1918 int 1919 fdclone(struct lwp *l, struct file *fp, int fd, int flag, 1920 const struct fileops *fops, void *data) 1921 { 1922 fp->f_flag = flag; 1923 fp->f_type = DTYPE_MISC; 1924 fp->f_ops = fops; 1925 fp->f_data = data; 1926 1927 l->l_dupfd = fd; 1928 1929 FILE_SET_MATURE(fp); 1930 FILE_UNUSE(fp, l); 1931 return EMOVEFD; 1932 } 1933 1934 /* ARGSUSED */ 1935 int 1936 fnullop_fcntl(struct file *fp, u_int cmd, void *data, struct lwp *l) 1937 { 1938 if (cmd == F_SETFL) 1939 return 0; 1940 1941 return EOPNOTSUPP; 1942 } 1943 1944 /* ARGSUSED */ 1945 int 1946 fnullop_poll(struct file *fp, int which, struct lwp *l) 1947 { 1948 return 0; 1949 } 1950 1951 1952 /* ARGSUSED */ 1953 int 1954 fnullop_kqfilter(struct file *fp, struct knote *kn) 1955 { 1956 1957 return 0; 1958 } 1959 1960 /* ARGSUSED */ 1961 int 1962 fbadop_stat(struct file *fp, struct stat *sb, struct lwp *l) 1963 { 1964 return EOPNOTSUPP; 1965 } 1966