1 /* $NetBSD: kern_descrip.c,v 1.185 2008/12/21 09:58:22 ad Exp $ */ 2 3 /*- 4 * Copyright (c) 2008 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 17 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 18 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 19 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 20 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 * POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 /* 30 * Copyright (c) 1982, 1986, 1989, 1991, 1993 31 * The Regents of the University of California. All rights reserved. 32 * (c) UNIX System Laboratories, Inc. 33 * All or some portions of this file are derived from material licensed 34 * to the University of California by American Telephone and Telegraph 35 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 36 * the permission of UNIX System Laboratories, Inc. 37 * 38 * Redistribution and use in source and binary forms, with or without 39 * modification, are permitted provided that the following conditions 40 * are met: 41 * 1. Redistributions of source code must retain the above copyright 42 * notice, this list of conditions and the following disclaimer. 43 * 2. Redistributions in binary form must reproduce the above copyright 44 * notice, this list of conditions and the following disclaimer in the 45 * documentation and/or other materials provided with the distribution. 46 * 3. Neither the name of the University nor the names of its contributors 47 * may be used to endorse or promote products derived from this software 48 * without specific prior written permission. 49 * 50 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 51 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 52 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 53 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 54 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 55 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 56 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 57 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 58 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 59 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 60 * SUCH DAMAGE. 61 * 62 * @(#)kern_descrip.c 8.8 (Berkeley) 2/14/95 63 */ 64 65 /* 66 * File descriptor management. 67 */ 68 69 #include <sys/cdefs.h> 70 __KERNEL_RCSID(0, "$NetBSD: kern_descrip.c,v 1.185 2008/12/21 09:58:22 ad Exp $"); 71 72 #include <sys/param.h> 73 #include <sys/systm.h> 74 #include <sys/filedesc.h> 75 #include <sys/kernel.h> 76 #include <sys/proc.h> 77 #include <sys/file.h> 78 #include <sys/socket.h> 79 #include <sys/socketvar.h> 80 #include <sys/stat.h> 81 #include <sys/ioctl.h> 82 #include <sys/fcntl.h> 83 #include <sys/pool.h> 84 #include <sys/unistd.h> 85 #include <sys/resourcevar.h> 86 #include <sys/conf.h> 87 #include <sys/event.h> 88 #include <sys/kauth.h> 89 #include <sys/atomic.h> 90 #include <sys/syscallargs.h> 91 #include <sys/cpu.h> 92 #include <sys/kmem.h> 93 #include <sys/vnode.h> 94 95 static int file_ctor(void *, void *, int); 96 static void file_dtor(void *, void *); 97 static int fdfile_ctor(void *, void *, int); 98 static void fdfile_dtor(void *, void *); 99 static int filedesc_ctor(void *, void *, int); 100 static void filedesc_dtor(void *, void *); 101 static int filedescopen(dev_t, int, int, lwp_t *); 102 103 kmutex_t filelist_lock; /* lock on filehead */ 104 struct filelist filehead; /* head of list of open files */ 105 u_int nfiles; /* actual number of open files */ 106 107 static pool_cache_t filedesc_cache; 108 static pool_cache_t file_cache; 109 static pool_cache_t fdfile_cache; 110 111 const struct cdevsw filedesc_cdevsw = { 112 filedescopen, noclose, noread, nowrite, noioctl, 113 nostop, notty, nopoll, nommap, nokqfilter, D_OTHER | D_MPSAFE, 114 }; 115 116 /* For ease of reading. */ 117 __strong_alias(fd_putvnode,fd_putfile) 118 __strong_alias(fd_putsock,fd_putfile) 119 120 /* 121 * Initialize the descriptor system. 122 */ 123 void 124 fd_sys_init(void) 125 { 126 127 mutex_init(&filelist_lock, MUTEX_DEFAULT, IPL_NONE); 128 129 file_cache = pool_cache_init(sizeof(file_t), coherency_unit, 0, 130 0, "file", NULL, IPL_NONE, file_ctor, file_dtor, NULL); 131 KASSERT(file_cache != NULL); 132 133 fdfile_cache = pool_cache_init(sizeof(fdfile_t), coherency_unit, 0, 134 PR_LARGECACHE, "fdfile", NULL, IPL_NONE, fdfile_ctor, fdfile_dtor, 135 NULL); 136 KASSERT(fdfile_cache != NULL); 137 138 filedesc_cache = pool_cache_init(sizeof(filedesc_t), coherency_unit, 139 0, 0, "filedesc", NULL, IPL_NONE, filedesc_ctor, filedesc_dtor, 140 NULL); 141 KASSERT(filedesc_cache != NULL); 142 } 143 144 static int 145 fd_next_zero(filedesc_t *fdp, uint32_t *bitmap, int want, u_int bits) 146 { 147 int i, off, maxoff; 148 uint32_t sub; 149 150 KASSERT(mutex_owned(&fdp->fd_lock)); 151 152 if (want > bits) 153 return -1; 154 155 off = want >> NDENTRYSHIFT; 156 i = want & NDENTRYMASK; 157 if (i) { 158 sub = bitmap[off] | ((u_int)~0 >> (NDENTRIES - i)); 159 if (sub != ~0) 160 goto found; 161 off++; 162 } 163 164 maxoff = NDLOSLOTS(bits); 165 while (off < maxoff) { 166 if ((sub = bitmap[off]) != ~0) 167 goto found; 168 off++; 169 } 170 171 return (-1); 172 173 found: 174 return (off << NDENTRYSHIFT) + ffs(~sub) - 1; 175 } 176 177 static int 178 fd_last_set(filedesc_t *fd, int last) 179 { 180 int off, i; 181 fdfile_t **ofiles = fd->fd_ofiles; 182 uint32_t *bitmap = fd->fd_lomap; 183 184 KASSERT(mutex_owned(&fd->fd_lock)); 185 186 off = (last - 1) >> NDENTRYSHIFT; 187 188 while (off >= 0 && !bitmap[off]) 189 off--; 190 191 if (off < 0) 192 return (-1); 193 194 i = ((off + 1) << NDENTRYSHIFT) - 1; 195 if (i >= last) 196 i = last - 1; 197 198 /* XXX should use bitmap */ 199 /* XXXAD does not work for fd_copy() */ 200 while (i > 0 && (ofiles[i] == NULL || !ofiles[i]->ff_allocated)) 201 i--; 202 203 return (i); 204 } 205 206 void 207 fd_used(filedesc_t *fdp, unsigned fd) 208 { 209 u_int off = fd >> NDENTRYSHIFT; 210 fdfile_t *ff; 211 212 ff = fdp->fd_ofiles[fd]; 213 214 KASSERT(mutex_owned(&fdp->fd_lock)); 215 KASSERT((fdp->fd_lomap[off] & (1 << (fd & NDENTRYMASK))) == 0); 216 KASSERT(ff != NULL); 217 KASSERT(ff->ff_file == NULL); 218 KASSERT(!ff->ff_allocated); 219 220 ff->ff_allocated = 1; 221 fdp->fd_lomap[off] |= 1 << (fd & NDENTRYMASK); 222 if (fdp->fd_lomap[off] == ~0) { 223 KASSERT((fdp->fd_himap[off >> NDENTRYSHIFT] & 224 (1 << (off & NDENTRYMASK))) == 0); 225 fdp->fd_himap[off >> NDENTRYSHIFT] |= 1 << (off & NDENTRYMASK); 226 } 227 228 if ((int)fd > fdp->fd_lastfile) { 229 fdp->fd_lastfile = fd; 230 } 231 232 if (fd >= NDFDFILE) { 233 fdp->fd_nused++; 234 } else { 235 KASSERT(ff == (fdfile_t *)fdp->fd_dfdfile[fd]); 236 } 237 } 238 239 void 240 fd_unused(filedesc_t *fdp, unsigned fd) 241 { 242 u_int off = fd >> NDENTRYSHIFT; 243 fdfile_t *ff; 244 245 ff = fdp->fd_ofiles[fd]; 246 247 /* 248 * Don't assert the lock is held here, as we may be copying 249 * the table during exec() and it is not needed there. 250 * procfs and sysctl are locked out by proc::p_reflock. 251 * 252 * KASSERT(mutex_owned(&fdp->fd_lock)); 253 */ 254 KASSERT(ff != NULL); 255 KASSERT(ff->ff_file == NULL); 256 KASSERT(ff->ff_allocated); 257 258 if (fd < fdp->fd_freefile) { 259 fdp->fd_freefile = fd; 260 } 261 262 if (fdp->fd_lomap[off] == ~0) { 263 KASSERT((fdp->fd_himap[off >> NDENTRYSHIFT] & 264 (1 << (off & NDENTRYMASK))) != 0); 265 fdp->fd_himap[off >> NDENTRYSHIFT] &= 266 ~(1 << (off & NDENTRYMASK)); 267 } 268 KASSERT((fdp->fd_lomap[off] & (1 << (fd & NDENTRYMASK))) != 0); 269 fdp->fd_lomap[off] &= ~(1 << (fd & NDENTRYMASK)); 270 ff->ff_allocated = 0; 271 272 KASSERT(fd <= fdp->fd_lastfile); 273 if (fd == fdp->fd_lastfile) { 274 fdp->fd_lastfile = fd_last_set(fdp, fd); 275 } 276 277 if (fd >= NDFDFILE) { 278 KASSERT(fdp->fd_nused > 0); 279 fdp->fd_nused--; 280 } else { 281 KASSERT(ff == (fdfile_t *)fdp->fd_dfdfile[fd]); 282 } 283 } 284 285 /* 286 * Custom version of fd_unused() for fd_copy(), where the descriptor 287 * table is not yet fully initialized. 288 */ 289 static inline void 290 fd_zap(filedesc_t *fdp, unsigned fd) 291 { 292 u_int off = fd >> NDENTRYSHIFT; 293 294 if (fd < fdp->fd_freefile) { 295 fdp->fd_freefile = fd; 296 } 297 298 if (fdp->fd_lomap[off] == ~0) { 299 KASSERT((fdp->fd_himap[off >> NDENTRYSHIFT] & 300 (1 << (off & NDENTRYMASK))) != 0); 301 fdp->fd_himap[off >> NDENTRYSHIFT] &= 302 ~(1 << (off & NDENTRYMASK)); 303 } 304 KASSERT((fdp->fd_lomap[off] & (1 << (fd & NDENTRYMASK))) != 0); 305 fdp->fd_lomap[off] &= ~(1 << (fd & NDENTRYMASK)); 306 } 307 308 bool 309 fd_isused(filedesc_t *fdp, unsigned fd) 310 { 311 u_int off = fd >> NDENTRYSHIFT; 312 313 KASSERT(fd < fdp->fd_nfiles); 314 315 return (fdp->fd_lomap[off] & (1 << (fd & NDENTRYMASK))) != 0; 316 } 317 318 /* 319 * Look up the file structure corresponding to a file descriptor 320 * and return the file, holding a reference on the descriptor. 321 */ 322 inline file_t * 323 fd_getfile(unsigned fd) 324 { 325 filedesc_t *fdp; 326 fdfile_t *ff; 327 file_t *fp; 328 329 fdp = curlwp->l_fd; 330 331 /* 332 * Look up the fdfile structure representing this descriptor. 333 * Ensure that we see fd_nfiles before fd_ofiles since we 334 * are doing this unlocked. See fd_tryexpand(). 335 */ 336 if (__predict_false(fd >= fdp->fd_nfiles)) { 337 return NULL; 338 } 339 membar_consumer(); 340 ff = fdp->fd_ofiles[fd]; 341 KASSERT(fd >= NDFDFILE || ff == (fdfile_t *)fdp->fd_dfdfile[fd]); 342 if (__predict_false(ff == NULL)) { 343 return NULL; 344 } 345 346 /* 347 * Now get a reference to the descriptor. Issue a memory 348 * barrier to ensure that we acquire the file pointer _after_ 349 * adding a reference. If no memory barrier, we could fetch 350 * a stale pointer. 351 */ 352 atomic_inc_uint(&ff->ff_refcnt); 353 #ifndef __HAVE_ATOMIC_AS_MEMBAR 354 membar_enter(); 355 #endif 356 357 /* 358 * If the file is not open or is being closed then put the 359 * reference back. 360 */ 361 fp = ff->ff_file; 362 if (__predict_true(fp != NULL)) { 363 return fp; 364 } 365 fd_putfile(fd); 366 return NULL; 367 } 368 369 /* 370 * Release a reference to a file descriptor acquired with fd_getfile(). 371 */ 372 void 373 fd_putfile(unsigned fd) 374 { 375 filedesc_t *fdp; 376 fdfile_t *ff; 377 u_int u, v; 378 379 fdp = curlwp->l_fd; 380 ff = fdp->fd_ofiles[fd]; 381 382 KASSERT(fd < fdp->fd_nfiles); 383 KASSERT(ff != NULL); 384 KASSERT((ff->ff_refcnt & FR_MASK) > 0); 385 KASSERT(fd >= NDFDFILE || ff == (fdfile_t *)fdp->fd_dfdfile[fd]); 386 387 /* 388 * Ensure that any use of the file is complete and globally 389 * visible before dropping the final reference. If no membar, 390 * the current CPU could still access memory associated with 391 * the file after it has been freed or recycled by another 392 * CPU. 393 */ 394 #ifndef __HAVE_ATOMIC_AS_MEMBAR 395 membar_exit(); 396 #endif 397 398 /* 399 * Be optimistic and start out with the assumption that no other 400 * threads are trying to close the descriptor. If the CAS fails, 401 * we lost a race and/or it's being closed. 402 */ 403 for (u = ff->ff_refcnt & FR_MASK;; u = v) { 404 v = atomic_cas_uint(&ff->ff_refcnt, u, u - 1); 405 if (__predict_true(u == v)) { 406 return; 407 } 408 if (__predict_false((v & FR_CLOSING) != 0)) { 409 break; 410 } 411 } 412 413 /* Another thread is waiting to close the file: join it. */ 414 (void)fd_close(fd); 415 } 416 417 /* 418 * Convenience wrapper around fd_getfile() that returns reference 419 * to a vnode. 420 */ 421 int 422 fd_getvnode(unsigned fd, file_t **fpp) 423 { 424 vnode_t *vp; 425 file_t *fp; 426 427 fp = fd_getfile(fd); 428 if (__predict_false(fp == NULL)) { 429 return EBADF; 430 } 431 if (__predict_false(fp->f_type != DTYPE_VNODE)) { 432 fd_putfile(fd); 433 return EINVAL; 434 } 435 vp = fp->f_data; 436 if (__predict_false(vp->v_type == VBAD)) { 437 /* XXX Is this case really necessary? */ 438 fd_putfile(fd); 439 return EBADF; 440 } 441 *fpp = fp; 442 return 0; 443 } 444 445 /* 446 * Convenience wrapper around fd_getfile() that returns reference 447 * to a socket. 448 */ 449 int 450 fd_getsock(unsigned fd, struct socket **sop) 451 { 452 file_t *fp; 453 454 fp = fd_getfile(fd); 455 if (__predict_false(fp == NULL)) { 456 return EBADF; 457 } 458 if (__predict_false(fp->f_type != DTYPE_SOCKET)) { 459 fd_putfile(fd); 460 return ENOTSOCK; 461 } 462 *sop = fp->f_data; 463 return 0; 464 } 465 466 /* 467 * Look up the file structure corresponding to a file descriptor 468 * and return it with a reference held on the file, not the 469 * descriptor. 470 * 471 * This is heavyweight and only used when accessing descriptors 472 * from a foreign process. The caller must ensure that `p' does 473 * not exit or fork across this call. 474 * 475 * To release the file (not descriptor) reference, use closef(). 476 */ 477 file_t * 478 fd_getfile2(proc_t *p, unsigned fd) 479 { 480 filedesc_t *fdp; 481 fdfile_t *ff; 482 file_t *fp; 483 484 fdp = p->p_fd; 485 mutex_enter(&fdp->fd_lock); 486 if (fd > fdp->fd_nfiles) { 487 mutex_exit(&fdp->fd_lock); 488 return NULL; 489 } 490 if ((ff = fdp->fd_ofiles[fd]) == NULL) { 491 mutex_exit(&fdp->fd_lock); 492 return NULL; 493 } 494 mutex_enter(&ff->ff_lock); 495 if ((fp = ff->ff_file) == NULL) { 496 mutex_exit(&ff->ff_lock); 497 mutex_exit(&fdp->fd_lock); 498 return NULL; 499 } 500 mutex_enter(&fp->f_lock); 501 fp->f_count++; 502 mutex_exit(&fp->f_lock); 503 mutex_exit(&ff->ff_lock); 504 mutex_exit(&fdp->fd_lock); 505 506 return fp; 507 } 508 509 /* 510 * Internal form of close. Must be called with a reference to the 511 * descriptor, and will drop the reference. When all descriptor 512 * references are dropped, releases the descriptor slot and a single 513 * reference to the file structure. 514 */ 515 int 516 fd_close(unsigned fd) 517 { 518 struct flock lf; 519 filedesc_t *fdp; 520 fdfile_t *ff; 521 file_t *fp; 522 proc_t *p; 523 lwp_t *l; 524 525 l = curlwp; 526 p = l->l_proc; 527 fdp = l->l_fd; 528 ff = fdp->fd_ofiles[fd]; 529 530 KASSERT(fd >= NDFDFILE || ff == (fdfile_t *)fdp->fd_dfdfile[fd]); 531 532 mutex_enter(&ff->ff_lock); 533 KASSERT((ff->ff_refcnt & FR_MASK) > 0); 534 if (ff->ff_file == NULL) { 535 /* 536 * Another user of the file is already closing, and is 537 * waiting for other users of the file to drain. Release 538 * our reference, and wake up the closer. 539 */ 540 atomic_dec_uint(&ff->ff_refcnt); 541 cv_broadcast(&ff->ff_closing); 542 mutex_exit(&ff->ff_lock); 543 544 /* 545 * An application error, so pretend that the descriptor 546 * was already closed. We can't safely wait for it to 547 * be closed without potentially deadlocking. 548 */ 549 return (EBADF); 550 } 551 KASSERT((ff->ff_refcnt & FR_CLOSING) == 0); 552 553 /* 554 * There may be multiple users of this file within the process. 555 * Notify existing and new users that the file is closing. This 556 * will prevent them from adding additional uses to this file 557 * while we are closing it. 558 */ 559 fp = ff->ff_file; 560 ff->ff_file = NULL; 561 ff->ff_exclose = false; 562 563 /* 564 * We expect the caller to hold a descriptor reference - drop it. 565 * The reference count may increase beyond zero at this point due 566 * to an erroneous descriptor reference by an application, but 567 * fd_getfile() will notice that the file is being closed and drop 568 * the reference again. 569 */ 570 #ifndef __HAVE_ATOMIC_AS_MEMBAR 571 membar_producer(); 572 #endif 573 if (__predict_false(atomic_dec_uint_nv(&ff->ff_refcnt) != 0)) { 574 /* 575 * Wait for other references to drain. This is typically 576 * an application error - the descriptor is being closed 577 * while still in use. 578 * 579 */ 580 atomic_or_uint(&ff->ff_refcnt, FR_CLOSING); 581 /* 582 * Remove any knotes attached to the file. A knote 583 * attached to the descriptor can hold references on it. 584 */ 585 if (!SLIST_EMPTY(&ff->ff_knlist)) { 586 mutex_exit(&ff->ff_lock); 587 knote_fdclose(fd); 588 mutex_enter(&ff->ff_lock); 589 } 590 /* 591 * We need to see the count drop to zero at least once, 592 * in order to ensure that all pre-existing references 593 * have been drained. New references past this point are 594 * of no interest. 595 */ 596 while ((ff->ff_refcnt & FR_MASK) != 0) { 597 cv_wait(&ff->ff_closing, &ff->ff_lock); 598 } 599 atomic_and_uint(&ff->ff_refcnt, ~FR_CLOSING); 600 } else { 601 /* If no references, there must be no knotes. */ 602 KASSERT(SLIST_EMPTY(&ff->ff_knlist)); 603 } 604 mutex_exit(&ff->ff_lock); 605 606 /* 607 * POSIX record locking dictates that any close releases ALL 608 * locks owned by this process. This is handled by setting 609 * a flag in the unlock to free ONLY locks obeying POSIX 610 * semantics, and not to free BSD-style file locks. 611 * If the descriptor was in a message, POSIX-style locks 612 * aren't passed with the descriptor. 613 */ 614 if ((p->p_flag & PK_ADVLOCK) != 0 && fp->f_type == DTYPE_VNODE) { 615 lf.l_whence = SEEK_SET; 616 lf.l_start = 0; 617 lf.l_len = 0; 618 lf.l_type = F_UNLCK; 619 (void)VOP_ADVLOCK(fp->f_data, p, F_UNLCK, &lf, F_POSIX); 620 } 621 622 623 /* Free descriptor slot. */ 624 mutex_enter(&fdp->fd_lock); 625 fd_unused(fdp, fd); 626 mutex_exit(&fdp->fd_lock); 627 628 /* Now drop reference to the file itself. */ 629 return closef(fp); 630 } 631 632 /* 633 * Duplicate a file descriptor. 634 */ 635 int 636 fd_dup(file_t *fp, int minfd, int *newp, bool exclose) 637 { 638 proc_t *p; 639 int error; 640 641 p = curproc; 642 643 while ((error = fd_alloc(p, minfd, newp)) != 0) { 644 if (error != ENOSPC) { 645 return error; 646 } 647 fd_tryexpand(p); 648 } 649 650 curlwp->l_fd->fd_ofiles[*newp]->ff_exclose = exclose; 651 fd_affix(p, fp, *newp); 652 return 0; 653 } 654 655 /* 656 * dup2 operation. 657 */ 658 int 659 fd_dup2(file_t *fp, unsigned new) 660 { 661 filedesc_t *fdp; 662 fdfile_t *ff; 663 664 fdp = curlwp->l_fd; 665 666 /* 667 * Ensure there are enough slots in the descriptor table, 668 * and allocate an fdfile_t up front in case we need it. 669 */ 670 while (new >= fdp->fd_nfiles) { 671 fd_tryexpand(curproc); 672 } 673 ff = pool_cache_get(fdfile_cache, PR_WAITOK); 674 675 /* 676 * If there is already a file open, close it. If the file is 677 * half open, wait for it to be constructed before closing it. 678 * XXX Potential for deadlock here? 679 */ 680 mutex_enter(&fdp->fd_lock); 681 while (fd_isused(fdp, new)) { 682 mutex_exit(&fdp->fd_lock); 683 if (fd_getfile(new) != NULL) { 684 (void)fd_close(new); 685 } else { 686 /* XXX Crummy, but unlikely to happen. */ 687 kpause("dup2", false, 1, NULL); 688 } 689 mutex_enter(&fdp->fd_lock); 690 } 691 if (fdp->fd_ofiles[new] == NULL) { 692 KASSERT(new >= NDFDFILE); 693 fdp->fd_ofiles[new] = ff; 694 ff = NULL; 695 } 696 fd_used(fdp, new); 697 mutex_exit(&fdp->fd_lock); 698 699 /* Slot is now allocated. Insert copy of the file. */ 700 fd_affix(curproc, fp, new); 701 if (ff != NULL) { 702 pool_cache_put(fdfile_cache, ff); 703 } 704 return 0; 705 } 706 707 /* 708 * Drop reference to a file structure. 709 */ 710 int 711 closef(file_t *fp) 712 { 713 struct flock lf; 714 int error; 715 716 /* 717 * Drop reference. If referenced elsewhere it's still open 718 * and we have nothing more to do. 719 */ 720 mutex_enter(&fp->f_lock); 721 KASSERT(fp->f_count > 0); 722 if (--fp->f_count > 0) { 723 mutex_exit(&fp->f_lock); 724 return 0; 725 } 726 KASSERT(fp->f_count == 0); 727 mutex_exit(&fp->f_lock); 728 729 /* We held the last reference - release locks, close and free. */ 730 if ((fp->f_flag & FHASLOCK) && fp->f_type == DTYPE_VNODE) { 731 lf.l_whence = SEEK_SET; 732 lf.l_start = 0; 733 lf.l_len = 0; 734 lf.l_type = F_UNLCK; 735 (void)VOP_ADVLOCK(fp->f_data, fp, F_UNLCK, &lf, F_FLOCK); 736 } 737 if (fp->f_ops != NULL) { 738 error = (*fp->f_ops->fo_close)(fp); 739 } else { 740 error = 0; 741 } 742 ffree(fp); 743 744 return error; 745 } 746 747 /* 748 * Allocate a file descriptor for the process. 749 */ 750 int 751 fd_alloc(proc_t *p, int want, int *result) 752 { 753 filedesc_t *fdp; 754 int i, lim, last, error; 755 u_int off, new; 756 fdfile_t *ff; 757 758 KASSERT(p == curproc || p == &proc0); 759 760 fdp = p->p_fd; 761 ff = pool_cache_get(fdfile_cache, PR_WAITOK); 762 KASSERT(ff->ff_refcnt == 0); 763 KASSERT(ff->ff_file == NULL); 764 765 /* 766 * Search for a free descriptor starting at the higher 767 * of want or fd_freefile. 768 */ 769 mutex_enter(&fdp->fd_lock); 770 KASSERT(fdp->fd_ofiles[0] == (fdfile_t *)fdp->fd_dfdfile[0]); 771 lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfiles); 772 last = min(fdp->fd_nfiles, lim); 773 for (;;) { 774 if ((i = want) < fdp->fd_freefile) 775 i = fdp->fd_freefile; 776 off = i >> NDENTRYSHIFT; 777 new = fd_next_zero(fdp, fdp->fd_himap, off, 778 (last + NDENTRIES - 1) >> NDENTRYSHIFT); 779 if (new == -1) 780 break; 781 i = fd_next_zero(fdp, &fdp->fd_lomap[new], 782 new > off ? 0 : i & NDENTRYMASK, NDENTRIES); 783 if (i == -1) { 784 /* 785 * Free file descriptor in this block was 786 * below want, try again with higher want. 787 */ 788 want = (new + 1) << NDENTRYSHIFT; 789 continue; 790 } 791 i += (new << NDENTRYSHIFT); 792 if (i >= last) { 793 break; 794 } 795 if (fdp->fd_ofiles[i] == NULL) { 796 KASSERT(i >= NDFDFILE); 797 fdp->fd_ofiles[i] = ff; 798 } else { 799 pool_cache_put(fdfile_cache, ff); 800 } 801 KASSERT(fdp->fd_ofiles[i]->ff_file == NULL); 802 fd_used(fdp, i); 803 if (want <= fdp->fd_freefile) { 804 fdp->fd_freefile = i; 805 } 806 *result = i; 807 mutex_exit(&fdp->fd_lock); 808 KASSERT(i >= NDFDFILE || 809 fdp->fd_ofiles[i] == (fdfile_t *)fdp->fd_dfdfile[i]); 810 return 0; 811 } 812 813 /* No space in current array. Let the caller expand and retry. */ 814 error = (fdp->fd_nfiles >= lim) ? EMFILE : ENOSPC; 815 mutex_exit(&fdp->fd_lock); 816 pool_cache_put(fdfile_cache, ff); 817 return error; 818 } 819 820 /* 821 * Allocate memory for the open files array. 822 */ 823 static fdfile_t ** 824 fd_ofile_alloc(int n) 825 { 826 uintptr_t *ptr, sz; 827 828 KASSERT(n > NDFILE); 829 830 sz = (n + 2) * sizeof(uintptr_t); 831 ptr = kmem_alloc((size_t)sz, KM_SLEEP); 832 ptr[1] = sz; 833 834 return (fdfile_t **)(ptr + 2); 835 } 836 837 /* 838 * Free an open files array. 839 */ 840 static void 841 fd_ofile_free(int n, fdfile_t **of) 842 { 843 uintptr_t *ptr, sz; 844 845 KASSERT(n > NDFILE); 846 847 sz = (n + 2) * sizeof(uintptr_t); 848 ptr = (uintptr_t *)of - 2; 849 KASSERT(ptr[1] == sz); 850 kmem_free(ptr, sz); 851 } 852 853 /* 854 * Allocate descriptor bitmap. 855 */ 856 static void 857 fd_map_alloc(int n, uint32_t **lo, uint32_t **hi) 858 { 859 uint8_t *ptr; 860 size_t szlo, szhi; 861 862 KASSERT(n > NDENTRIES); 863 864 szlo = NDLOSLOTS(n) * sizeof(uint32_t); 865 szhi = NDHISLOTS(n) * sizeof(uint32_t); 866 ptr = kmem_alloc(szlo + szhi, KM_SLEEP); 867 *lo = (uint32_t *)ptr; 868 *hi = (uint32_t *)(ptr + szlo); 869 } 870 871 /* 872 * Free descriptor bitmap. 873 */ 874 static void 875 fd_map_free(int n, uint32_t *lo, uint32_t *hi) 876 { 877 size_t szlo, szhi; 878 879 KASSERT(n > NDENTRIES); 880 881 szlo = NDLOSLOTS(n) * sizeof(uint32_t); 882 szhi = NDHISLOTS(n) * sizeof(uint32_t); 883 KASSERT(hi == (uint32_t *)((uint8_t *)lo + szlo)); 884 kmem_free(lo, szlo + szhi); 885 } 886 887 /* 888 * Expand a process' descriptor table. 889 */ 890 void 891 fd_tryexpand(proc_t *p) 892 { 893 filedesc_t *fdp; 894 int i, numfiles, oldnfiles; 895 fdfile_t **newofile; 896 uint32_t *newhimap, *newlomap; 897 898 KASSERT(p == curproc || p == &proc0); 899 900 fdp = p->p_fd; 901 newhimap = NULL; 902 newlomap = NULL; 903 oldnfiles = fdp->fd_nfiles; 904 905 if (oldnfiles < NDEXTENT) 906 numfiles = NDEXTENT; 907 else 908 numfiles = 2 * oldnfiles; 909 910 newofile = fd_ofile_alloc(numfiles); 911 if (NDHISLOTS(numfiles) > NDHISLOTS(oldnfiles)) { 912 fd_map_alloc(numfiles, &newlomap, &newhimap); 913 } 914 915 mutex_enter(&fdp->fd_lock); 916 KASSERT(fdp->fd_ofiles[0] == (fdfile_t *)fdp->fd_dfdfile[0]); 917 if (fdp->fd_nfiles != oldnfiles) { 918 /* fdp changed; caller must retry */ 919 mutex_exit(&fdp->fd_lock); 920 fd_ofile_free(numfiles, newofile); 921 if (NDHISLOTS(numfiles) > NDHISLOTS(oldnfiles)) { 922 fd_map_free(numfiles, newlomap, newhimap); 923 } 924 return; 925 } 926 927 /* Copy the existing ofile array and zero the new portion. */ 928 i = sizeof(fdfile_t *) * fdp->fd_nfiles; 929 memcpy(newofile, fdp->fd_ofiles, i); 930 memset((uint8_t *)newofile + i, 0, numfiles * sizeof(fdfile_t *) - i); 931 932 /* 933 * Link old ofiles array into list to be discarded. We defer 934 * freeing until process exit if the descriptor table is visble 935 * to other threads. 936 */ 937 if (oldnfiles > NDFILE) { 938 if ((fdp->fd_refcnt | p->p_nlwps) > 1) { 939 fdp->fd_ofiles[-2] = (void *)fdp->fd_discard; 940 fdp->fd_discard = fdp->fd_ofiles - 2; 941 } else { 942 fd_ofile_free(oldnfiles, fdp->fd_ofiles); 943 } 944 } 945 946 if (NDHISLOTS(numfiles) > NDHISLOTS(oldnfiles)) { 947 i = NDHISLOTS(oldnfiles) * sizeof(uint32_t); 948 memcpy(newhimap, fdp->fd_himap, i); 949 memset((uint8_t *)newhimap + i, 0, 950 NDHISLOTS(numfiles) * sizeof(uint32_t) - i); 951 952 i = NDLOSLOTS(oldnfiles) * sizeof(uint32_t); 953 memcpy(newlomap, fdp->fd_lomap, i); 954 memset((uint8_t *)newlomap + i, 0, 955 NDLOSLOTS(numfiles) * sizeof(uint32_t) - i); 956 957 if (NDHISLOTS(oldnfiles) > NDHISLOTS(NDFILE)) { 958 fd_map_free(oldnfiles, fdp->fd_lomap, fdp->fd_himap); 959 } 960 fdp->fd_himap = newhimap; 961 fdp->fd_lomap = newlomap; 962 } 963 964 /* 965 * All other modifications must become globally visible before 966 * the change to fd_nfiles. See fd_getfile(). 967 */ 968 fdp->fd_ofiles = newofile; 969 membar_producer(); 970 fdp->fd_nfiles = numfiles; 971 mutex_exit(&fdp->fd_lock); 972 973 KASSERT(fdp->fd_ofiles[0] == (fdfile_t *)fdp->fd_dfdfile[0]); 974 } 975 976 /* 977 * Create a new open file structure and allocate a file descriptor 978 * for the current process. 979 */ 980 int 981 fd_allocfile(file_t **resultfp, int *resultfd) 982 { 983 file_t *fp; 984 proc_t *p; 985 int error; 986 987 p = curproc; 988 989 while ((error = fd_alloc(p, 0, resultfd)) != 0) { 990 if (error != ENOSPC) { 991 return error; 992 } 993 fd_tryexpand(p); 994 } 995 996 fp = pool_cache_get(file_cache, PR_WAITOK); 997 KASSERT(fp->f_count == 0); 998 fp->f_cred = kauth_cred_get(); 999 kauth_cred_hold(fp->f_cred); 1000 1001 if (__predict_false(atomic_inc_uint_nv(&nfiles) >= maxfiles)) { 1002 fd_abort(p, fp, *resultfd); 1003 tablefull("file", "increase kern.maxfiles or MAXFILES"); 1004 return ENFILE; 1005 } 1006 1007 fp->f_advice = 0; 1008 fp->f_msgcount = 0; 1009 fp->f_offset = 0; 1010 fp->f_iflags = 0; 1011 *resultfp = fp; 1012 1013 return 0; 1014 } 1015 1016 /* 1017 * Successful creation of a new descriptor: make visible to the process. 1018 */ 1019 void 1020 fd_affix(proc_t *p, file_t *fp, unsigned fd) 1021 { 1022 fdfile_t *ff; 1023 filedesc_t *fdp; 1024 1025 KASSERT(p == curproc || p == &proc0); 1026 1027 /* Add a reference to the file structure. */ 1028 mutex_enter(&fp->f_lock); 1029 fp->f_count++; 1030 mutex_exit(&fp->f_lock); 1031 1032 /* 1033 * Insert the new file into the descriptor slot. 1034 * 1035 * The memory barriers provided by lock activity in this routine 1036 * ensure that any updates to the file structure become globally 1037 * visible before the file becomes visible to other LWPs in the 1038 * current process. 1039 */ 1040 fdp = p->p_fd; 1041 ff = fdp->fd_ofiles[fd]; 1042 1043 KASSERT(ff != NULL); 1044 KASSERT(ff->ff_file == NULL); 1045 KASSERT(ff->ff_allocated); 1046 KASSERT(fd_isused(fdp, fd)); 1047 KASSERT(fd >= NDFDFILE || 1048 fdp->fd_ofiles[fd] == (fdfile_t *)fdp->fd_dfdfile[fd]); 1049 1050 /* No need to lock in order to make file initially visible. */ 1051 ff->ff_file = fp; 1052 } 1053 1054 /* 1055 * Abort creation of a new descriptor: free descriptor slot and file. 1056 */ 1057 void 1058 fd_abort(proc_t *p, file_t *fp, unsigned fd) 1059 { 1060 filedesc_t *fdp; 1061 fdfile_t *ff; 1062 1063 KASSERT(p == curproc || p == &proc0); 1064 1065 fdp = p->p_fd; 1066 ff = fdp->fd_ofiles[fd]; 1067 1068 KASSERT(fd >= NDFDFILE || 1069 fdp->fd_ofiles[fd] == (fdfile_t *)fdp->fd_dfdfile[fd]); 1070 1071 mutex_enter(&fdp->fd_lock); 1072 KASSERT(fd_isused(fdp, fd)); 1073 fd_unused(fdp, fd); 1074 mutex_exit(&fdp->fd_lock); 1075 1076 if (fp != NULL) { 1077 ffree(fp); 1078 } 1079 } 1080 1081 /* 1082 * Free a file descriptor. 1083 */ 1084 void 1085 ffree(file_t *fp) 1086 { 1087 1088 KASSERT(fp->f_count == 0); 1089 1090 atomic_dec_uint(&nfiles); 1091 kauth_cred_free(fp->f_cred); 1092 pool_cache_put(file_cache, fp); 1093 } 1094 1095 static int 1096 file_ctor(void *arg, void *obj, int flags) 1097 { 1098 file_t *fp = obj; 1099 1100 memset(fp, 0, sizeof(*fp)); 1101 mutex_init(&fp->f_lock, MUTEX_DEFAULT, IPL_NONE); 1102 1103 mutex_enter(&filelist_lock); 1104 LIST_INSERT_HEAD(&filehead, fp, f_list); 1105 mutex_exit(&filelist_lock); 1106 1107 return 0; 1108 } 1109 1110 static void 1111 file_dtor(void *arg, void *obj) 1112 { 1113 file_t *fp = obj; 1114 1115 mutex_enter(&filelist_lock); 1116 LIST_REMOVE(fp, f_list); 1117 mutex_exit(&filelist_lock); 1118 1119 mutex_destroy(&fp->f_lock); 1120 } 1121 1122 static int 1123 fdfile_ctor(void *arg, void *obj, int flags) 1124 { 1125 fdfile_t *ff = obj; 1126 1127 memset(ff, 0, sizeof(*ff)); 1128 mutex_init(&ff->ff_lock, MUTEX_DEFAULT, IPL_NONE); 1129 cv_init(&ff->ff_closing, "fdclose"); 1130 1131 return 0; 1132 } 1133 1134 static void 1135 fdfile_dtor(void *arg, void *obj) 1136 { 1137 fdfile_t *ff = obj; 1138 1139 mutex_destroy(&ff->ff_lock); 1140 cv_destroy(&ff->ff_closing); 1141 } 1142 1143 file_t * 1144 fgetdummy(void) 1145 { 1146 file_t *fp; 1147 1148 fp = kmem_alloc(sizeof(*fp), KM_SLEEP); 1149 if (fp != NULL) { 1150 memset(fp, 0, sizeof(*fp)); 1151 mutex_init(&fp->f_lock, MUTEX_DEFAULT, IPL_NONE); 1152 } 1153 return fp; 1154 } 1155 1156 void 1157 fputdummy(file_t *fp) 1158 { 1159 1160 mutex_destroy(&fp->f_lock); 1161 kmem_free(fp, sizeof(*fp)); 1162 } 1163 1164 /* 1165 * Create an initial filedesc structure. 1166 */ 1167 filedesc_t * 1168 fd_init(filedesc_t *fdp) 1169 { 1170 unsigned fd; 1171 1172 if (fdp == NULL) { 1173 fdp = pool_cache_get(filedesc_cache, PR_WAITOK); 1174 } else { 1175 filedesc_ctor(NULL, fdp, PR_WAITOK); 1176 } 1177 1178 fdp->fd_refcnt = 1; 1179 fdp->fd_ofiles = fdp->fd_dfiles; 1180 fdp->fd_nfiles = NDFILE; 1181 fdp->fd_himap = fdp->fd_dhimap; 1182 fdp->fd_lomap = fdp->fd_dlomap; 1183 KASSERT(fdp->fd_lastfile == -1); 1184 KASSERT(fdp->fd_lastkqfile == -1); 1185 KASSERT(fdp->fd_knhash == NULL); 1186 1187 memset(&fdp->fd_startzero, 0, sizeof(*fdp) - 1188 offsetof(filedesc_t, fd_startzero)); 1189 for (fd = 0; fd < NDFDFILE; fd++) { 1190 fdp->fd_ofiles[fd] = (fdfile_t *)fdp->fd_dfdfile[fd]; 1191 } 1192 1193 return fdp; 1194 } 1195 1196 /* 1197 * Initialize a file descriptor table. 1198 */ 1199 static int 1200 filedesc_ctor(void *arg, void *obj, int flag) 1201 { 1202 filedesc_t *fdp = obj; 1203 int i; 1204 1205 memset(fdp, 0, sizeof(*fdp)); 1206 mutex_init(&fdp->fd_lock, MUTEX_DEFAULT, IPL_NONE); 1207 fdp->fd_lastfile = -1; 1208 fdp->fd_lastkqfile = -1; 1209 1210 CTASSERT(sizeof(fdp->fd_dfdfile[0]) >= sizeof(fdfile_t)); 1211 for (i = 0; i < NDFDFILE; i++) { 1212 fdfile_ctor(NULL, fdp->fd_dfdfile[i], PR_WAITOK); 1213 } 1214 1215 return 0; 1216 } 1217 1218 static void 1219 filedesc_dtor(void *arg, void *obj) 1220 { 1221 filedesc_t *fdp = obj; 1222 int i; 1223 1224 for (i = 0; i < NDFDFILE; i++) { 1225 fdfile_dtor(NULL, fdp->fd_dfdfile[i]); 1226 } 1227 1228 mutex_destroy(&fdp->fd_lock); 1229 } 1230 1231 /* 1232 * Make p2 share p1's filedesc structure. 1233 */ 1234 void 1235 fd_share(struct proc *p2) 1236 { 1237 filedesc_t *fdp; 1238 1239 fdp = curlwp->l_fd; 1240 p2->p_fd = fdp; 1241 atomic_inc_uint(&fdp->fd_refcnt); 1242 } 1243 1244 /* 1245 * Copy a filedesc structure. 1246 */ 1247 filedesc_t * 1248 fd_copy(void) 1249 { 1250 filedesc_t *newfdp, *fdp; 1251 fdfile_t *ff, *fflist, **ffp, **nffp, *ff2; 1252 int i, nused, numfiles, lastfile, j, newlast; 1253 file_t *fp; 1254 1255 fdp = curproc->p_fd; 1256 newfdp = pool_cache_get(filedesc_cache, PR_WAITOK); 1257 newfdp->fd_refcnt = 1; 1258 1259 KASSERT(newfdp->fd_knhash == NULL); 1260 KASSERT(newfdp->fd_knhashmask == 0); 1261 KASSERT(newfdp->fd_discard == NULL); 1262 1263 for (;;) { 1264 numfiles = fdp->fd_nfiles; 1265 lastfile = fdp->fd_lastfile; 1266 1267 /* 1268 * If the number of open files fits in the internal arrays 1269 * of the open file structure, use them, otherwise allocate 1270 * additional memory for the number of descriptors currently 1271 * in use. 1272 */ 1273 if (lastfile < NDFILE) { 1274 i = NDFILE; 1275 newfdp->fd_ofiles = newfdp->fd_dfiles; 1276 } else { 1277 /* 1278 * Compute the smallest multiple of NDEXTENT needed 1279 * for the file descriptors currently in use, 1280 * allowing the table to shrink. 1281 */ 1282 i = numfiles; 1283 while (i >= 2 * NDEXTENT && i > lastfile * 2) { 1284 i /= 2; 1285 } 1286 newfdp->fd_ofiles = fd_ofile_alloc(i); 1287 KASSERT(i >= NDFILE); 1288 } 1289 if (NDHISLOTS(i) <= NDHISLOTS(NDFILE)) { 1290 newfdp->fd_himap = newfdp->fd_dhimap; 1291 newfdp->fd_lomap = newfdp->fd_dlomap; 1292 } else { 1293 fd_map_alloc(i, &newfdp->fd_lomap, 1294 &newfdp->fd_himap); 1295 } 1296 1297 /* 1298 * Allocate and string together fdfile structures. 1299 * We abuse fdfile_t::ff_file here, but it will be 1300 * cleared before this routine returns. 1301 */ 1302 nused = fdp->fd_nused; 1303 fflist = NULL; 1304 for (j = nused; j != 0; j--) { 1305 ff = pool_cache_get(fdfile_cache, PR_WAITOK); 1306 ff->ff_file = (void *)fflist; 1307 fflist = ff; 1308 } 1309 1310 mutex_enter(&fdp->fd_lock); 1311 if (numfiles == fdp->fd_nfiles && nused == fdp->fd_nused && 1312 lastfile == fdp->fd_lastfile) { 1313 break; 1314 } 1315 mutex_exit(&fdp->fd_lock); 1316 if (i >= NDFILE) { 1317 fd_ofile_free(i, newfdp->fd_ofiles); 1318 } 1319 if (NDHISLOTS(i) > NDHISLOTS(NDFILE)) { 1320 fd_map_free(i, newfdp->fd_lomap, newfdp->fd_himap); 1321 } 1322 while (fflist != NULL) { 1323 ff = fflist; 1324 fflist = (void *)ff->ff_file; 1325 ff->ff_file = NULL; 1326 pool_cache_put(fdfile_cache, ff); 1327 } 1328 } 1329 1330 newfdp->fd_nfiles = i; 1331 newfdp->fd_freefile = fdp->fd_freefile; 1332 newfdp->fd_exclose = fdp->fd_exclose; 1333 1334 /* 1335 * Clear the entries that will not be copied over. 1336 * Avoid calling memset with 0 size. 1337 */ 1338 if (lastfile < (i-1)) { 1339 memset(newfdp->fd_ofiles + lastfile + 1, 0, 1340 (i - lastfile - 1) * sizeof(file_t **)); 1341 } 1342 if (i < NDENTRIES * NDENTRIES) { 1343 i = NDENTRIES * NDENTRIES; /* size of inlined bitmaps */ 1344 } 1345 memcpy(newfdp->fd_himap, fdp->fd_himap, NDHISLOTS(i)*sizeof(uint32_t)); 1346 memcpy(newfdp->fd_lomap, fdp->fd_lomap, NDLOSLOTS(i)*sizeof(uint32_t)); 1347 1348 ffp = fdp->fd_ofiles; 1349 nffp = newfdp->fd_ofiles; 1350 j = imax(lastfile, (NDFDFILE - 1)); 1351 newlast = -1; 1352 KASSERT(j < fdp->fd_nfiles); 1353 for (i = 0; i <= j; i++, ffp++, *nffp++ = ff2) { 1354 ff = *ffp; 1355 /* Install built-in fdfiles even if unused here. */ 1356 if (i < NDFDFILE) { 1357 ff2 = (fdfile_t *)newfdp->fd_dfdfile[i]; 1358 } else { 1359 ff2 = NULL; 1360 } 1361 /* Determine if descriptor is active in parent. */ 1362 if (ff == NULL || !fd_isused(fdp, i)) { 1363 KASSERT(ff != NULL || i >= NDFDFILE); 1364 continue; 1365 } 1366 mutex_enter(&ff->ff_lock); 1367 fp = ff->ff_file; 1368 if (fp == NULL) { 1369 /* Descriptor is half-open: free slot. */ 1370 fd_zap(newfdp, i); 1371 mutex_exit(&ff->ff_lock); 1372 continue; 1373 } 1374 if (fp->f_type == DTYPE_KQUEUE) { 1375 /* kqueue descriptors cannot be copied. */ 1376 fd_zap(newfdp, i); 1377 mutex_exit(&ff->ff_lock); 1378 continue; 1379 } 1380 /* It's active: add a reference to the file. */ 1381 mutex_enter(&fp->f_lock); 1382 fp->f_count++; 1383 mutex_exit(&fp->f_lock); 1384 /* Consume one fdfile_t to represent it. */ 1385 if (i >= NDFDFILE) { 1386 ff2 = fflist; 1387 fflist = (void *)ff2->ff_file; 1388 } 1389 ff2->ff_file = fp; 1390 ff2->ff_exclose = ff->ff_exclose; 1391 ff2->ff_allocated = true; 1392 mutex_exit(&ff->ff_lock); 1393 if (i > newlast) { 1394 newlast = i; 1395 } 1396 } 1397 mutex_exit(&fdp->fd_lock); 1398 1399 /* Discard unused fdfile_t structures. */ 1400 while (__predict_false(fflist != NULL)) { 1401 ff = fflist; 1402 fflist = (void *)ff->ff_file; 1403 ff->ff_file = NULL; 1404 pool_cache_put(fdfile_cache, ff); 1405 nused--; 1406 } 1407 KASSERT(nused >= 0); 1408 KASSERT(newfdp->fd_ofiles[0] == (fdfile_t *)newfdp->fd_dfdfile[0]); 1409 1410 newfdp->fd_nused = nused; 1411 newfdp->fd_lastfile = newlast; 1412 1413 return (newfdp); 1414 } 1415 1416 /* 1417 * Release a filedesc structure. 1418 */ 1419 void 1420 fd_free(void) 1421 { 1422 filedesc_t *fdp; 1423 fdfile_t *ff; 1424 file_t *fp; 1425 int fd, lastfd; 1426 void **discard; 1427 1428 fdp = curlwp->l_fd; 1429 1430 KASSERT(fdp->fd_ofiles[0] == (fdfile_t *)fdp->fd_dfdfile[0]); 1431 1432 if (atomic_dec_uint_nv(&fdp->fd_refcnt) > 0) 1433 return; 1434 1435 /* 1436 * Close any files that the process holds open. 1437 */ 1438 for (fd = 0, lastfd = fdp->fd_nfiles - 1; fd <= lastfd; fd++) { 1439 ff = fdp->fd_ofiles[fd]; 1440 KASSERT(fd >= NDFDFILE || 1441 ff == (fdfile_t *)fdp->fd_dfdfile[fd]); 1442 if ((ff = fdp->fd_ofiles[fd]) == NULL) 1443 continue; 1444 if ((fp = ff->ff_file) != NULL) { 1445 /* 1446 * Must use fd_close() here as kqueue holds 1447 * long term references to descriptors. 1448 */ 1449 ff->ff_refcnt++; 1450 fd_close(fd); 1451 } 1452 KASSERT(ff->ff_refcnt == 0); 1453 KASSERT(ff->ff_file == NULL); 1454 KASSERT(!ff->ff_exclose); 1455 KASSERT(!ff->ff_allocated); 1456 if (fd >= NDFDFILE) { 1457 pool_cache_put(fdfile_cache, ff); 1458 } 1459 } 1460 1461 /* 1462 * Clean out the descriptor table for the next user and return 1463 * to the cache. 1464 */ 1465 while ((discard = fdp->fd_discard) != NULL) { 1466 fdp->fd_discard = discard[0]; 1467 kmem_free(discard, (uintptr_t)discard[1]); 1468 } 1469 if (NDHISLOTS(fdp->fd_nfiles) > NDHISLOTS(NDFILE)) { 1470 KASSERT(fdp->fd_himap != fdp->fd_dhimap); 1471 KASSERT(fdp->fd_lomap != fdp->fd_dlomap); 1472 fd_map_free(fdp->fd_nfiles, fdp->fd_lomap, fdp->fd_himap); 1473 } 1474 if (fdp->fd_nfiles > NDFILE) { 1475 KASSERT(fdp->fd_ofiles != fdp->fd_dfiles); 1476 fd_ofile_free(fdp->fd_nfiles, fdp->fd_ofiles); 1477 } 1478 if (fdp->fd_knhash != NULL) { 1479 hashdone(fdp->fd_knhash, HASH_LIST, fdp->fd_knhashmask); 1480 fdp->fd_knhash = NULL; 1481 fdp->fd_knhashmask = 0; 1482 } else { 1483 KASSERT(fdp->fd_knhashmask == 0); 1484 } 1485 fdp->fd_lastkqfile = -1; 1486 pool_cache_put(filedesc_cache, fdp); 1487 } 1488 1489 /* 1490 * File Descriptor pseudo-device driver (/dev/fd/). 1491 * 1492 * Opening minor device N dup()s the file (if any) connected to file 1493 * descriptor N belonging to the calling process. Note that this driver 1494 * consists of only the ``open()'' routine, because all subsequent 1495 * references to this file will be direct to the other driver. 1496 */ 1497 static int 1498 filedescopen(dev_t dev, int mode, int type, lwp_t *l) 1499 { 1500 1501 /* 1502 * XXX Kludge: set dupfd to contain the value of the 1503 * the file descriptor being sought for duplication. The error 1504 * return ensures that the vnode for this device will be released 1505 * by vn_open. Open will detect this special error and take the 1506 * actions in dupfdopen below. Other callers of vn_open or VOP_OPEN 1507 * will simply report the error. 1508 */ 1509 l->l_dupfd = minor(dev); /* XXX */ 1510 return EDUPFD; 1511 } 1512 1513 /* 1514 * Duplicate the specified descriptor to a free descriptor. 1515 */ 1516 int 1517 fd_dupopen(int old, int *new, int mode, int error) 1518 { 1519 filedesc_t *fdp; 1520 fdfile_t *ff; 1521 file_t *fp; 1522 1523 if ((fp = fd_getfile(old)) == NULL) { 1524 return EBADF; 1525 } 1526 fdp = curlwp->l_fd; 1527 ff = fdp->fd_ofiles[old]; 1528 1529 /* 1530 * There are two cases of interest here. 1531 * 1532 * For EDUPFD simply dup (dfd) to file descriptor 1533 * (indx) and return. 1534 * 1535 * For EMOVEFD steal away the file structure from (dfd) and 1536 * store it in (indx). (dfd) is effectively closed by 1537 * this operation. 1538 * 1539 * Any other error code is just returned. 1540 */ 1541 switch (error) { 1542 case EDUPFD: 1543 /* 1544 * Check that the mode the file is being opened for is a 1545 * subset of the mode of the existing descriptor. 1546 */ 1547 if (((mode & (FREAD|FWRITE)) | fp->f_flag) != fp->f_flag) { 1548 error = EACCES; 1549 break; 1550 } 1551 1552 /* Copy it. */ 1553 error = fd_dup(fp, 0, new, fdp->fd_ofiles[old]->ff_exclose); 1554 break; 1555 1556 case EMOVEFD: 1557 /* Copy it. */ 1558 error = fd_dup(fp, 0, new, fdp->fd_ofiles[old]->ff_exclose); 1559 if (error != 0) { 1560 break; 1561 } 1562 1563 /* Steal away the file pointer from 'old'. */ 1564 (void)fd_close(old); 1565 return 0; 1566 } 1567 1568 fd_putfile(old); 1569 return error; 1570 } 1571 1572 /* 1573 * Sets descriptor owner. If the owner is a process, 'pgid' 1574 * is set to positive value, process ID. If the owner is process group, 1575 * 'pgid' is set to -pg_id. 1576 */ 1577 int 1578 fsetown(pid_t *pgid, u_long cmd, const void *data) 1579 { 1580 int id = *(const int *)data; 1581 int error; 1582 1583 switch (cmd) { 1584 case TIOCSPGRP: 1585 if (id < 0) 1586 return (EINVAL); 1587 id = -id; 1588 break; 1589 default: 1590 break; 1591 } 1592 1593 if (id > 0 && !pfind(id)) 1594 return (ESRCH); 1595 else if (id < 0 && (error = pgid_in_session(curproc, -id))) 1596 return (error); 1597 1598 *pgid = id; 1599 return (0); 1600 } 1601 1602 /* 1603 * Return descriptor owner information. If the value is positive, 1604 * it's process ID. If it's negative, it's process group ID and 1605 * needs the sign removed before use. 1606 */ 1607 int 1608 fgetown(pid_t pgid, u_long cmd, void *data) 1609 { 1610 1611 switch (cmd) { 1612 case TIOCGPGRP: 1613 *(int *)data = -pgid; 1614 break; 1615 default: 1616 *(int *)data = pgid; 1617 break; 1618 } 1619 return (0); 1620 } 1621 1622 /* 1623 * Send signal to descriptor owner, either process or process group. 1624 */ 1625 void 1626 fownsignal(pid_t pgid, int signo, int code, int band, void *fdescdata) 1627 { 1628 struct proc *p1; 1629 struct pgrp *pgrp; 1630 ksiginfo_t ksi; 1631 1632 KASSERT(!cpu_intr_p()); 1633 1634 KSI_INIT(&ksi); 1635 ksi.ksi_signo = signo; 1636 ksi.ksi_code = code; 1637 ksi.ksi_band = band; 1638 1639 mutex_enter(proc_lock); 1640 if (pgid > 0 && (p1 = p_find(pgid, PFIND_LOCKED))) 1641 kpsignal(p1, &ksi, fdescdata); 1642 else if (pgid < 0 && (pgrp = pg_find(-pgid, PFIND_LOCKED))) 1643 kpgsignal(pgrp, &ksi, fdescdata, 0); 1644 mutex_exit(proc_lock); 1645 } 1646 1647 int 1648 fd_clone(file_t *fp, unsigned fd, int flag, const struct fileops *fops, 1649 void *data) 1650 { 1651 1652 fp->f_flag = flag; 1653 fp->f_type = DTYPE_MISC; 1654 fp->f_ops = fops; 1655 fp->f_data = data; 1656 curlwp->l_dupfd = fd; 1657 fd_affix(curproc, fp, fd); 1658 1659 return EMOVEFD; 1660 } 1661 1662 int 1663 fnullop_fcntl(file_t *fp, u_int cmd, void *data) 1664 { 1665 1666 if (cmd == F_SETFL) 1667 return 0; 1668 1669 return EOPNOTSUPP; 1670 } 1671 1672 int 1673 fnullop_poll(file_t *fp, int which) 1674 { 1675 1676 return 0; 1677 } 1678 1679 int 1680 fnullop_kqfilter(file_t *fp, struct knote *kn) 1681 { 1682 1683 return 0; 1684 } 1685 1686 int 1687 fbadop_read(file_t *fp, off_t *offset, struct uio *uio, 1688 kauth_cred_t cred, int flags) 1689 { 1690 1691 return EOPNOTSUPP; 1692 } 1693 1694 int 1695 fbadop_write(file_t *fp, off_t *offset, struct uio *uio, 1696 kauth_cred_t cred, int flags) 1697 { 1698 1699 return EOPNOTSUPP; 1700 } 1701 1702 int 1703 fbadop_ioctl(file_t *fp, u_long com, void *data) 1704 { 1705 1706 return EOPNOTSUPP; 1707 } 1708 1709 int 1710 fbadop_stat(file_t *fp, struct stat *sb) 1711 { 1712 1713 return EOPNOTSUPP; 1714 } 1715 1716 int 1717 fbadop_close(file_t *fp) 1718 { 1719 1720 return EOPNOTSUPP; 1721 } 1722