1 /* $NetBSD: kern_descrip.c,v 1.188 2009/03/11 06:05:29 mrg Exp $ */ 2 3 /*- 4 * Copyright (c) 2008 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 17 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 18 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 19 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 20 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 * POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 /* 30 * Copyright (c) 1982, 1986, 1989, 1991, 1993 31 * The Regents of the University of California. All rights reserved. 32 * (c) UNIX System Laboratories, Inc. 33 * All or some portions of this file are derived from material licensed 34 * to the University of California by American Telephone and Telegraph 35 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 36 * the permission of UNIX System Laboratories, Inc. 37 * 38 * Redistribution and use in source and binary forms, with or without 39 * modification, are permitted provided that the following conditions 40 * are met: 41 * 1. Redistributions of source code must retain the above copyright 42 * notice, this list of conditions and the following disclaimer. 43 * 2. Redistributions in binary form must reproduce the above copyright 44 * notice, this list of conditions and the following disclaimer in the 45 * documentation and/or other materials provided with the distribution. 46 * 3. Neither the name of the University nor the names of its contributors 47 * may be used to endorse or promote products derived from this software 48 * without specific prior written permission. 49 * 50 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 51 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 52 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 53 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 54 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 55 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 56 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 57 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 58 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 59 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 60 * SUCH DAMAGE. 61 * 62 * @(#)kern_descrip.c 8.8 (Berkeley) 2/14/95 63 */ 64 65 /* 66 * File descriptor management. 67 */ 68 69 #include <sys/cdefs.h> 70 __KERNEL_RCSID(0, "$NetBSD: kern_descrip.c,v 1.188 2009/03/11 06:05:29 mrg Exp $"); 71 72 #include <sys/param.h> 73 #include <sys/systm.h> 74 #include <sys/filedesc.h> 75 #include <sys/kernel.h> 76 #include <sys/proc.h> 77 #include <sys/file.h> 78 #include <sys/socket.h> 79 #include <sys/socketvar.h> 80 #include <sys/stat.h> 81 #include <sys/ioctl.h> 82 #include <sys/fcntl.h> 83 #include <sys/pool.h> 84 #include <sys/unistd.h> 85 #include <sys/resourcevar.h> 86 #include <sys/conf.h> 87 #include <sys/event.h> 88 #include <sys/kauth.h> 89 #include <sys/atomic.h> 90 #include <sys/syscallargs.h> 91 #include <sys/cpu.h> 92 #include <sys/kmem.h> 93 #include <sys/vnode.h> 94 95 static int file_ctor(void *, void *, int); 96 static void file_dtor(void *, void *); 97 static int fdfile_ctor(void *, void *, int); 98 static void fdfile_dtor(void *, void *); 99 static int filedesc_ctor(void *, void *, int); 100 static void filedesc_dtor(void *, void *); 101 static int filedescopen(dev_t, int, int, lwp_t *); 102 103 kmutex_t filelist_lock; /* lock on filehead */ 104 struct filelist filehead; /* head of list of open files */ 105 u_int nfiles; /* actual number of open files */ 106 107 static pool_cache_t filedesc_cache; 108 static pool_cache_t file_cache; 109 static pool_cache_t fdfile_cache; 110 111 const struct cdevsw filedesc_cdevsw = { 112 filedescopen, noclose, noread, nowrite, noioctl, 113 nostop, notty, nopoll, nommap, nokqfilter, D_OTHER | D_MPSAFE, 114 }; 115 116 /* For ease of reading. */ 117 __strong_alias(fd_putvnode,fd_putfile) 118 __strong_alias(fd_putsock,fd_putfile) 119 120 /* 121 * Initialize the descriptor system. 122 */ 123 void 124 fd_sys_init(void) 125 { 126 127 mutex_init(&filelist_lock, MUTEX_DEFAULT, IPL_NONE); 128 129 file_cache = pool_cache_init(sizeof(file_t), coherency_unit, 0, 130 0, "file", NULL, IPL_NONE, file_ctor, file_dtor, NULL); 131 KASSERT(file_cache != NULL); 132 133 fdfile_cache = pool_cache_init(sizeof(fdfile_t), coherency_unit, 0, 134 PR_LARGECACHE, "fdfile", NULL, IPL_NONE, fdfile_ctor, fdfile_dtor, 135 NULL); 136 KASSERT(fdfile_cache != NULL); 137 138 filedesc_cache = pool_cache_init(sizeof(filedesc_t), coherency_unit, 139 0, 0, "filedesc", NULL, IPL_NONE, filedesc_ctor, filedesc_dtor, 140 NULL); 141 KASSERT(filedesc_cache != NULL); 142 } 143 144 static int 145 fd_next_zero(filedesc_t *fdp, uint32_t *bitmap, int want, u_int bits) 146 { 147 int i, off, maxoff; 148 uint32_t sub; 149 150 KASSERT(mutex_owned(&fdp->fd_lock)); 151 152 if (want > bits) 153 return -1; 154 155 off = want >> NDENTRYSHIFT; 156 i = want & NDENTRYMASK; 157 if (i) { 158 sub = bitmap[off] | ((u_int)~0 >> (NDENTRIES - i)); 159 if (sub != ~0) 160 goto found; 161 off++; 162 } 163 164 maxoff = NDLOSLOTS(bits); 165 while (off < maxoff) { 166 if ((sub = bitmap[off]) != ~0) 167 goto found; 168 off++; 169 } 170 171 return (-1); 172 173 found: 174 return (off << NDENTRYSHIFT) + ffs(~sub) - 1; 175 } 176 177 static int 178 fd_last_set(filedesc_t *fd, int last) 179 { 180 int off, i; 181 fdfile_t **ofiles = fd->fd_ofiles; 182 uint32_t *bitmap = fd->fd_lomap; 183 184 KASSERT(mutex_owned(&fd->fd_lock)); 185 186 off = (last - 1) >> NDENTRYSHIFT; 187 188 while (off >= 0 && !bitmap[off]) 189 off--; 190 191 if (off < 0) 192 return (-1); 193 194 i = ((off + 1) << NDENTRYSHIFT) - 1; 195 if (i >= last) 196 i = last - 1; 197 198 /* XXX should use bitmap */ 199 /* XXXAD does not work for fd_copy() */ 200 while (i > 0 && (ofiles[i] == NULL || !ofiles[i]->ff_allocated)) 201 i--; 202 203 return (i); 204 } 205 206 void 207 fd_used(filedesc_t *fdp, unsigned fd) 208 { 209 u_int off = fd >> NDENTRYSHIFT; 210 fdfile_t *ff; 211 212 ff = fdp->fd_ofiles[fd]; 213 214 KASSERT(mutex_owned(&fdp->fd_lock)); 215 KASSERT((fdp->fd_lomap[off] & (1 << (fd & NDENTRYMASK))) == 0); 216 KASSERT(ff != NULL); 217 KASSERT(ff->ff_file == NULL); 218 KASSERT(!ff->ff_allocated); 219 220 ff->ff_allocated = 1; 221 fdp->fd_lomap[off] |= 1 << (fd & NDENTRYMASK); 222 if (fdp->fd_lomap[off] == ~0) { 223 KASSERT((fdp->fd_himap[off >> NDENTRYSHIFT] & 224 (1 << (off & NDENTRYMASK))) == 0); 225 fdp->fd_himap[off >> NDENTRYSHIFT] |= 1 << (off & NDENTRYMASK); 226 } 227 228 if ((int)fd > fdp->fd_lastfile) { 229 fdp->fd_lastfile = fd; 230 } 231 232 if (fd >= NDFDFILE) { 233 fdp->fd_nused++; 234 } else { 235 KASSERT(ff == (fdfile_t *)fdp->fd_dfdfile[fd]); 236 } 237 } 238 239 void 240 fd_unused(filedesc_t *fdp, unsigned fd) 241 { 242 u_int off = fd >> NDENTRYSHIFT; 243 fdfile_t *ff; 244 245 ff = fdp->fd_ofiles[fd]; 246 247 /* 248 * Don't assert the lock is held here, as we may be copying 249 * the table during exec() and it is not needed there. 250 * procfs and sysctl are locked out by proc::p_reflock. 251 * 252 * KASSERT(mutex_owned(&fdp->fd_lock)); 253 */ 254 KASSERT(ff != NULL); 255 KASSERT(ff->ff_file == NULL); 256 KASSERT(ff->ff_allocated); 257 258 if (fd < fdp->fd_freefile) { 259 fdp->fd_freefile = fd; 260 } 261 262 if (fdp->fd_lomap[off] == ~0) { 263 KASSERT((fdp->fd_himap[off >> NDENTRYSHIFT] & 264 (1 << (off & NDENTRYMASK))) != 0); 265 fdp->fd_himap[off >> NDENTRYSHIFT] &= 266 ~(1 << (off & NDENTRYMASK)); 267 } 268 KASSERT((fdp->fd_lomap[off] & (1 << (fd & NDENTRYMASK))) != 0); 269 fdp->fd_lomap[off] &= ~(1 << (fd & NDENTRYMASK)); 270 ff->ff_allocated = 0; 271 272 KASSERT(fd <= fdp->fd_lastfile); 273 if (fd == fdp->fd_lastfile) { 274 fdp->fd_lastfile = fd_last_set(fdp, fd); 275 } 276 277 if (fd >= NDFDFILE) { 278 KASSERT(fdp->fd_nused > 0); 279 fdp->fd_nused--; 280 } else { 281 KASSERT(ff == (fdfile_t *)fdp->fd_dfdfile[fd]); 282 } 283 } 284 285 /* 286 * Custom version of fd_unused() for fd_copy(), where the descriptor 287 * table is not yet fully initialized. 288 */ 289 static inline void 290 fd_zap(filedesc_t *fdp, unsigned fd) 291 { 292 u_int off = fd >> NDENTRYSHIFT; 293 294 if (fd < fdp->fd_freefile) { 295 fdp->fd_freefile = fd; 296 } 297 298 if (fdp->fd_lomap[off] == ~0) { 299 KASSERT((fdp->fd_himap[off >> NDENTRYSHIFT] & 300 (1 << (off & NDENTRYMASK))) != 0); 301 fdp->fd_himap[off >> NDENTRYSHIFT] &= 302 ~(1 << (off & NDENTRYMASK)); 303 } 304 KASSERT((fdp->fd_lomap[off] & (1 << (fd & NDENTRYMASK))) != 0); 305 fdp->fd_lomap[off] &= ~(1 << (fd & NDENTRYMASK)); 306 } 307 308 bool 309 fd_isused(filedesc_t *fdp, unsigned fd) 310 { 311 u_int off = fd >> NDENTRYSHIFT; 312 313 KASSERT(fd < fdp->fd_nfiles); 314 315 return (fdp->fd_lomap[off] & (1 << (fd & NDENTRYMASK))) != 0; 316 } 317 318 /* 319 * Look up the file structure corresponding to a file descriptor 320 * and return the file, holding a reference on the descriptor. 321 */ 322 inline file_t * 323 fd_getfile(unsigned fd) 324 { 325 filedesc_t *fdp; 326 fdfile_t *ff; 327 file_t *fp; 328 329 fdp = curlwp->l_fd; 330 331 /* 332 * Look up the fdfile structure representing this descriptor. 333 * Ensure that we see fd_nfiles before fd_ofiles since we 334 * are doing this unlocked. See fd_tryexpand(). 335 */ 336 if (__predict_false(fd >= fdp->fd_nfiles)) { 337 return NULL; 338 } 339 membar_consumer(); 340 ff = fdp->fd_ofiles[fd]; 341 KASSERT(fd >= NDFDFILE || ff == (fdfile_t *)fdp->fd_dfdfile[fd]); 342 if (__predict_false(ff == NULL)) { 343 return NULL; 344 } 345 346 /* 347 * Now get a reference to the descriptor. Issue a memory 348 * barrier to ensure that we acquire the file pointer _after_ 349 * adding a reference. If no memory barrier, we could fetch 350 * a stale pointer. 351 */ 352 atomic_inc_uint(&ff->ff_refcnt); 353 #ifndef __HAVE_ATOMIC_AS_MEMBAR 354 membar_enter(); 355 #endif 356 357 /* 358 * If the file is not open or is being closed then put the 359 * reference back. 360 */ 361 fp = ff->ff_file; 362 if (__predict_true(fp != NULL)) { 363 return fp; 364 } 365 fd_putfile(fd); 366 return NULL; 367 } 368 369 /* 370 * Release a reference to a file descriptor acquired with fd_getfile(). 371 */ 372 void 373 fd_putfile(unsigned fd) 374 { 375 filedesc_t *fdp; 376 fdfile_t *ff; 377 u_int u, v; 378 379 fdp = curlwp->l_fd; 380 ff = fdp->fd_ofiles[fd]; 381 382 KASSERT(fd < fdp->fd_nfiles); 383 KASSERT(ff != NULL); 384 KASSERT((ff->ff_refcnt & FR_MASK) > 0); 385 KASSERT(fd >= NDFDFILE || ff == (fdfile_t *)fdp->fd_dfdfile[fd]); 386 387 /* 388 * Ensure that any use of the file is complete and globally 389 * visible before dropping the final reference. If no membar, 390 * the current CPU could still access memory associated with 391 * the file after it has been freed or recycled by another 392 * CPU. 393 */ 394 #ifndef __HAVE_ATOMIC_AS_MEMBAR 395 membar_exit(); 396 #endif 397 398 /* 399 * Be optimistic and start out with the assumption that no other 400 * threads are trying to close the descriptor. If the CAS fails, 401 * we lost a race and/or it's being closed. 402 */ 403 for (u = ff->ff_refcnt & FR_MASK;; u = v) { 404 v = atomic_cas_uint(&ff->ff_refcnt, u, u - 1); 405 if (__predict_true(u == v)) { 406 return; 407 } 408 if (__predict_false((v & FR_CLOSING) != 0)) { 409 break; 410 } 411 } 412 413 /* Another thread is waiting to close the file: join it. */ 414 (void)fd_close(fd); 415 } 416 417 /* 418 * Convenience wrapper around fd_getfile() that returns reference 419 * to a vnode. 420 */ 421 int 422 fd_getvnode(unsigned fd, file_t **fpp) 423 { 424 vnode_t *vp; 425 file_t *fp; 426 427 fp = fd_getfile(fd); 428 if (__predict_false(fp == NULL)) { 429 return EBADF; 430 } 431 if (__predict_false(fp->f_type != DTYPE_VNODE)) { 432 fd_putfile(fd); 433 return EINVAL; 434 } 435 vp = fp->f_data; 436 if (__predict_false(vp->v_type == VBAD)) { 437 /* XXX Is this case really necessary? */ 438 fd_putfile(fd); 439 return EBADF; 440 } 441 *fpp = fp; 442 return 0; 443 } 444 445 /* 446 * Convenience wrapper around fd_getfile() that returns reference 447 * to a socket. 448 */ 449 int 450 fd_getsock(unsigned fd, struct socket **sop) 451 { 452 file_t *fp; 453 454 fp = fd_getfile(fd); 455 if (__predict_false(fp == NULL)) { 456 return EBADF; 457 } 458 if (__predict_false(fp->f_type != DTYPE_SOCKET)) { 459 fd_putfile(fd); 460 return ENOTSOCK; 461 } 462 *sop = fp->f_data; 463 return 0; 464 } 465 466 /* 467 * Look up the file structure corresponding to a file descriptor 468 * and return it with a reference held on the file, not the 469 * descriptor. 470 * 471 * This is heavyweight and only used when accessing descriptors 472 * from a foreign process. The caller must ensure that `p' does 473 * not exit or fork across this call. 474 * 475 * To release the file (not descriptor) reference, use closef(). 476 */ 477 file_t * 478 fd_getfile2(proc_t *p, unsigned fd) 479 { 480 filedesc_t *fdp; 481 fdfile_t *ff; 482 file_t *fp; 483 484 fdp = p->p_fd; 485 mutex_enter(&fdp->fd_lock); 486 if (fd > fdp->fd_nfiles) { 487 mutex_exit(&fdp->fd_lock); 488 return NULL; 489 } 490 if ((ff = fdp->fd_ofiles[fd]) == NULL) { 491 mutex_exit(&fdp->fd_lock); 492 return NULL; 493 } 494 mutex_enter(&ff->ff_lock); 495 if ((fp = ff->ff_file) == NULL) { 496 mutex_exit(&ff->ff_lock); 497 mutex_exit(&fdp->fd_lock); 498 return NULL; 499 } 500 mutex_enter(&fp->f_lock); 501 fp->f_count++; 502 mutex_exit(&fp->f_lock); 503 mutex_exit(&ff->ff_lock); 504 mutex_exit(&fdp->fd_lock); 505 506 return fp; 507 } 508 509 /* 510 * Internal form of close. Must be called with a reference to the 511 * descriptor, and will drop the reference. When all descriptor 512 * references are dropped, releases the descriptor slot and a single 513 * reference to the file structure. 514 */ 515 int 516 fd_close(unsigned fd) 517 { 518 struct flock lf; 519 filedesc_t *fdp; 520 fdfile_t *ff; 521 file_t *fp; 522 proc_t *p; 523 lwp_t *l; 524 525 l = curlwp; 526 p = l->l_proc; 527 fdp = l->l_fd; 528 ff = fdp->fd_ofiles[fd]; 529 530 KASSERT(fd >= NDFDFILE || ff == (fdfile_t *)fdp->fd_dfdfile[fd]); 531 532 mutex_enter(&ff->ff_lock); 533 KASSERT((ff->ff_refcnt & FR_MASK) > 0); 534 if (ff->ff_file == NULL) { 535 /* 536 * Another user of the file is already closing, and is 537 * waiting for other users of the file to drain. Release 538 * our reference, and wake up the closer. 539 */ 540 atomic_dec_uint(&ff->ff_refcnt); 541 cv_broadcast(&ff->ff_closing); 542 mutex_exit(&ff->ff_lock); 543 544 /* 545 * An application error, so pretend that the descriptor 546 * was already closed. We can't safely wait for it to 547 * be closed without potentially deadlocking. 548 */ 549 return (EBADF); 550 } 551 KASSERT((ff->ff_refcnt & FR_CLOSING) == 0); 552 553 /* 554 * There may be multiple users of this file within the process. 555 * Notify existing and new users that the file is closing. This 556 * will prevent them from adding additional uses to this file 557 * while we are closing it. 558 */ 559 fp = ff->ff_file; 560 ff->ff_file = NULL; 561 ff->ff_exclose = false; 562 563 /* 564 * We expect the caller to hold a descriptor reference - drop it. 565 * The reference count may increase beyond zero at this point due 566 * to an erroneous descriptor reference by an application, but 567 * fd_getfile() will notice that the file is being closed and drop 568 * the reference again. 569 */ 570 #ifndef __HAVE_ATOMIC_AS_MEMBAR 571 membar_producer(); 572 #endif 573 if (__predict_false(atomic_dec_uint_nv(&ff->ff_refcnt) != 0)) { 574 /* 575 * Wait for other references to drain. This is typically 576 * an application error - the descriptor is being closed 577 * while still in use. 578 * 579 */ 580 atomic_or_uint(&ff->ff_refcnt, FR_CLOSING); 581 /* 582 * Remove any knotes attached to the file. A knote 583 * attached to the descriptor can hold references on it. 584 */ 585 if (!SLIST_EMPTY(&ff->ff_knlist)) { 586 mutex_exit(&ff->ff_lock); 587 knote_fdclose(fd); 588 mutex_enter(&ff->ff_lock); 589 } 590 /* 591 * We need to see the count drop to zero at least once, 592 * in order to ensure that all pre-existing references 593 * have been drained. New references past this point are 594 * of no interest. 595 */ 596 while ((ff->ff_refcnt & FR_MASK) != 0) { 597 cv_wait(&ff->ff_closing, &ff->ff_lock); 598 } 599 atomic_and_uint(&ff->ff_refcnt, ~FR_CLOSING); 600 } else { 601 /* If no references, there must be no knotes. */ 602 KASSERT(SLIST_EMPTY(&ff->ff_knlist)); 603 } 604 mutex_exit(&ff->ff_lock); 605 606 /* 607 * POSIX record locking dictates that any close releases ALL 608 * locks owned by this process. This is handled by setting 609 * a flag in the unlock to free ONLY locks obeying POSIX 610 * semantics, and not to free BSD-style file locks. 611 * If the descriptor was in a message, POSIX-style locks 612 * aren't passed with the descriptor. 613 */ 614 if ((p->p_flag & PK_ADVLOCK) != 0 && fp->f_type == DTYPE_VNODE) { 615 lf.l_whence = SEEK_SET; 616 lf.l_start = 0; 617 lf.l_len = 0; 618 lf.l_type = F_UNLCK; 619 (void)VOP_ADVLOCK(fp->f_data, p, F_UNLCK, &lf, F_POSIX); 620 } 621 622 623 /* Free descriptor slot. */ 624 mutex_enter(&fdp->fd_lock); 625 fd_unused(fdp, fd); 626 mutex_exit(&fdp->fd_lock); 627 628 /* Now drop reference to the file itself. */ 629 return closef(fp); 630 } 631 632 /* 633 * Duplicate a file descriptor. 634 */ 635 int 636 fd_dup(file_t *fp, int minfd, int *newp, bool exclose) 637 { 638 proc_t *p; 639 int error; 640 641 p = curproc; 642 643 while ((error = fd_alloc(p, minfd, newp)) != 0) { 644 if (error != ENOSPC) { 645 return error; 646 } 647 fd_tryexpand(p); 648 } 649 650 curlwp->l_fd->fd_ofiles[*newp]->ff_exclose = exclose; 651 fd_affix(p, fp, *newp); 652 return 0; 653 } 654 655 /* 656 * dup2 operation. 657 */ 658 int 659 fd_dup2(file_t *fp, unsigned new) 660 { 661 filedesc_t *fdp; 662 fdfile_t *ff; 663 664 fdp = curlwp->l_fd; 665 666 /* 667 * Ensure there are enough slots in the descriptor table, 668 * and allocate an fdfile_t up front in case we need it. 669 */ 670 while (new >= fdp->fd_nfiles) { 671 fd_tryexpand(curproc); 672 } 673 ff = pool_cache_get(fdfile_cache, PR_WAITOK); 674 675 /* 676 * If there is already a file open, close it. If the file is 677 * half open, wait for it to be constructed before closing it. 678 * XXX Potential for deadlock here? 679 */ 680 mutex_enter(&fdp->fd_lock); 681 while (fd_isused(fdp, new)) { 682 mutex_exit(&fdp->fd_lock); 683 if (fd_getfile(new) != NULL) { 684 (void)fd_close(new); 685 } else { 686 /* XXX Crummy, but unlikely to happen. */ 687 kpause("dup2", false, 1, NULL); 688 } 689 mutex_enter(&fdp->fd_lock); 690 } 691 if (fdp->fd_ofiles[new] == NULL) { 692 KASSERT(new >= NDFDFILE); 693 fdp->fd_ofiles[new] = ff; 694 ff = NULL; 695 } 696 fd_used(fdp, new); 697 mutex_exit(&fdp->fd_lock); 698 699 /* Slot is now allocated. Insert copy of the file. */ 700 fd_affix(curproc, fp, new); 701 if (ff != NULL) { 702 pool_cache_put(fdfile_cache, ff); 703 } 704 return 0; 705 } 706 707 /* 708 * Drop reference to a file structure. 709 */ 710 int 711 closef(file_t *fp) 712 { 713 struct flock lf; 714 int error; 715 716 /* 717 * Drop reference. If referenced elsewhere it's still open 718 * and we have nothing more to do. 719 */ 720 mutex_enter(&fp->f_lock); 721 KASSERT(fp->f_count > 0); 722 if (--fp->f_count > 0) { 723 mutex_exit(&fp->f_lock); 724 return 0; 725 } 726 KASSERT(fp->f_count == 0); 727 mutex_exit(&fp->f_lock); 728 729 /* We held the last reference - release locks, close and free. */ 730 if ((fp->f_flag & FHASLOCK) && fp->f_type == DTYPE_VNODE) { 731 lf.l_whence = SEEK_SET; 732 lf.l_start = 0; 733 lf.l_len = 0; 734 lf.l_type = F_UNLCK; 735 (void)VOP_ADVLOCK(fp->f_data, fp, F_UNLCK, &lf, F_FLOCK); 736 } 737 if (fp->f_ops != NULL) { 738 error = (*fp->f_ops->fo_close)(fp); 739 } else { 740 error = 0; 741 } 742 ffree(fp); 743 744 return error; 745 } 746 747 /* 748 * Allocate a file descriptor for the process. 749 */ 750 int 751 fd_alloc(proc_t *p, int want, int *result) 752 { 753 filedesc_t *fdp; 754 int i, lim, last, error; 755 u_int off, new; 756 fdfile_t *ff; 757 758 KASSERT(p == curproc || p == &proc0); 759 760 fdp = p->p_fd; 761 ff = pool_cache_get(fdfile_cache, PR_WAITOK); 762 KASSERT(ff->ff_refcnt == 0); 763 KASSERT(ff->ff_file == NULL); 764 765 /* 766 * Search for a free descriptor starting at the higher 767 * of want or fd_freefile. 768 */ 769 mutex_enter(&fdp->fd_lock); 770 KASSERT(fdp->fd_ofiles[0] == (fdfile_t *)fdp->fd_dfdfile[0]); 771 lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfiles); 772 last = min(fdp->fd_nfiles, lim); 773 for (;;) { 774 if ((i = want) < fdp->fd_freefile) 775 i = fdp->fd_freefile; 776 off = i >> NDENTRYSHIFT; 777 new = fd_next_zero(fdp, fdp->fd_himap, off, 778 (last + NDENTRIES - 1) >> NDENTRYSHIFT); 779 if (new == -1) 780 break; 781 i = fd_next_zero(fdp, &fdp->fd_lomap[new], 782 new > off ? 0 : i & NDENTRYMASK, NDENTRIES); 783 if (i == -1) { 784 /* 785 * Free file descriptor in this block was 786 * below want, try again with higher want. 787 */ 788 want = (new + 1) << NDENTRYSHIFT; 789 continue; 790 } 791 i += (new << NDENTRYSHIFT); 792 if (i >= last) { 793 break; 794 } 795 if (fdp->fd_ofiles[i] == NULL) { 796 KASSERT(i >= NDFDFILE); 797 fdp->fd_ofiles[i] = ff; 798 } else { 799 pool_cache_put(fdfile_cache, ff); 800 } 801 KASSERT(fdp->fd_ofiles[i]->ff_file == NULL); 802 fd_used(fdp, i); 803 if (want <= fdp->fd_freefile) { 804 fdp->fd_freefile = i; 805 } 806 *result = i; 807 mutex_exit(&fdp->fd_lock); 808 KASSERT(i >= NDFDFILE || 809 fdp->fd_ofiles[i] == (fdfile_t *)fdp->fd_dfdfile[i]); 810 return 0; 811 } 812 813 /* No space in current array. Let the caller expand and retry. */ 814 error = (fdp->fd_nfiles >= lim) ? EMFILE : ENOSPC; 815 mutex_exit(&fdp->fd_lock); 816 pool_cache_put(fdfile_cache, ff); 817 return error; 818 } 819 820 /* 821 * Allocate memory for the open files array. 822 */ 823 static fdfile_t ** 824 fd_ofile_alloc(int n) 825 { 826 uintptr_t *ptr, sz; 827 828 KASSERT(n > NDFILE); 829 830 sz = (n + 2) * sizeof(uintptr_t); 831 ptr = kmem_alloc((size_t)sz, KM_SLEEP); 832 ptr[1] = sz; 833 834 return (fdfile_t **)(ptr + 2); 835 } 836 837 /* 838 * Free an open files array. 839 */ 840 static void 841 fd_ofile_free(int n, fdfile_t **of) 842 { 843 uintptr_t *ptr, sz; 844 845 KASSERT(n > NDFILE); 846 847 sz = (n + 2) * sizeof(uintptr_t); 848 ptr = (uintptr_t *)of - 2; 849 KASSERT(ptr[1] == sz); 850 kmem_free(ptr, sz); 851 } 852 853 /* 854 * Allocate descriptor bitmap. 855 */ 856 static void 857 fd_map_alloc(int n, uint32_t **lo, uint32_t **hi) 858 { 859 uint8_t *ptr; 860 size_t szlo, szhi; 861 862 KASSERT(n > NDENTRIES); 863 864 szlo = NDLOSLOTS(n) * sizeof(uint32_t); 865 szhi = NDHISLOTS(n) * sizeof(uint32_t); 866 ptr = kmem_alloc(szlo + szhi, KM_SLEEP); 867 *lo = (uint32_t *)ptr; 868 *hi = (uint32_t *)(ptr + szlo); 869 } 870 871 /* 872 * Free descriptor bitmap. 873 */ 874 static void 875 fd_map_free(int n, uint32_t *lo, uint32_t *hi) 876 { 877 size_t szlo, szhi; 878 879 KASSERT(n > NDENTRIES); 880 881 szlo = NDLOSLOTS(n) * sizeof(uint32_t); 882 szhi = NDHISLOTS(n) * sizeof(uint32_t); 883 KASSERT(hi == (uint32_t *)((uint8_t *)lo + szlo)); 884 kmem_free(lo, szlo + szhi); 885 } 886 887 /* 888 * Expand a process' descriptor table. 889 */ 890 void 891 fd_tryexpand(proc_t *p) 892 { 893 filedesc_t *fdp; 894 int i, numfiles, oldnfiles; 895 fdfile_t **newofile; 896 uint32_t *newhimap, *newlomap; 897 898 KASSERT(p == curproc || p == &proc0); 899 900 fdp = p->p_fd; 901 newhimap = NULL; 902 newlomap = NULL; 903 oldnfiles = fdp->fd_nfiles; 904 905 if (oldnfiles < NDEXTENT) 906 numfiles = NDEXTENT; 907 else 908 numfiles = 2 * oldnfiles; 909 910 newofile = fd_ofile_alloc(numfiles); 911 if (NDHISLOTS(numfiles) > NDHISLOTS(oldnfiles)) { 912 fd_map_alloc(numfiles, &newlomap, &newhimap); 913 } 914 915 mutex_enter(&fdp->fd_lock); 916 KASSERT(fdp->fd_ofiles[0] == (fdfile_t *)fdp->fd_dfdfile[0]); 917 if (fdp->fd_nfiles != oldnfiles) { 918 /* fdp changed; caller must retry */ 919 mutex_exit(&fdp->fd_lock); 920 fd_ofile_free(numfiles, newofile); 921 if (NDHISLOTS(numfiles) > NDHISLOTS(oldnfiles)) { 922 fd_map_free(numfiles, newlomap, newhimap); 923 } 924 return; 925 } 926 927 /* Copy the existing ofile array and zero the new portion. */ 928 i = sizeof(fdfile_t *) * fdp->fd_nfiles; 929 memcpy(newofile, fdp->fd_ofiles, i); 930 memset((uint8_t *)newofile + i, 0, numfiles * sizeof(fdfile_t *) - i); 931 932 /* 933 * Link old ofiles array into list to be discarded. We defer 934 * freeing until process exit if the descriptor table is visble 935 * to other threads. 936 */ 937 if (oldnfiles > NDFILE) { 938 if ((fdp->fd_refcnt | p->p_nlwps) > 1) { 939 fdp->fd_ofiles[-2] = (void *)fdp->fd_discard; 940 fdp->fd_discard = fdp->fd_ofiles - 2; 941 } else { 942 fd_ofile_free(oldnfiles, fdp->fd_ofiles); 943 } 944 } 945 946 if (NDHISLOTS(numfiles) > NDHISLOTS(oldnfiles)) { 947 i = NDHISLOTS(oldnfiles) * sizeof(uint32_t); 948 memcpy(newhimap, fdp->fd_himap, i); 949 memset((uint8_t *)newhimap + i, 0, 950 NDHISLOTS(numfiles) * sizeof(uint32_t) - i); 951 952 i = NDLOSLOTS(oldnfiles) * sizeof(uint32_t); 953 memcpy(newlomap, fdp->fd_lomap, i); 954 memset((uint8_t *)newlomap + i, 0, 955 NDLOSLOTS(numfiles) * sizeof(uint32_t) - i); 956 957 if (NDHISLOTS(oldnfiles) > NDHISLOTS(NDFILE)) { 958 fd_map_free(oldnfiles, fdp->fd_lomap, fdp->fd_himap); 959 } 960 fdp->fd_himap = newhimap; 961 fdp->fd_lomap = newlomap; 962 } 963 964 /* 965 * All other modifications must become globally visible before 966 * the change to fd_nfiles. See fd_getfile(). 967 */ 968 fdp->fd_ofiles = newofile; 969 membar_producer(); 970 fdp->fd_nfiles = numfiles; 971 mutex_exit(&fdp->fd_lock); 972 973 KASSERT(fdp->fd_ofiles[0] == (fdfile_t *)fdp->fd_dfdfile[0]); 974 } 975 976 /* 977 * Create a new open file structure and allocate a file descriptor 978 * for the current process. 979 */ 980 int 981 fd_allocfile(file_t **resultfp, int *resultfd) 982 { 983 file_t *fp; 984 proc_t *p; 985 int error; 986 987 p = curproc; 988 989 while ((error = fd_alloc(p, 0, resultfd)) != 0) { 990 if (error != ENOSPC) { 991 return error; 992 } 993 fd_tryexpand(p); 994 } 995 996 fp = pool_cache_get(file_cache, PR_WAITOK); 997 KASSERT(fp->f_count == 0); 998 KASSERT(fp->f_msgcount == 0); 999 KASSERT(fp->f_unpcount == 0); 1000 fp->f_cred = kauth_cred_get(); 1001 kauth_cred_hold(fp->f_cred); 1002 1003 if (__predict_false(atomic_inc_uint_nv(&nfiles) >= maxfiles)) { 1004 fd_abort(p, fp, *resultfd); 1005 tablefull("file", "increase kern.maxfiles or MAXFILES"); 1006 return ENFILE; 1007 } 1008 1009 /* 1010 * Don't allow recycled files to be scanned. 1011 */ 1012 if ((fp->f_flag & FSCAN) != 0) { 1013 mutex_enter(&fp->f_lock); 1014 atomic_and_uint(&fp->f_flag, ~FSCAN); 1015 mutex_exit(&fp->f_lock); 1016 } 1017 1018 fp->f_advice = 0; 1019 fp->f_msgcount = 0; 1020 fp->f_offset = 0; 1021 *resultfp = fp; 1022 1023 return 0; 1024 } 1025 1026 /* 1027 * Successful creation of a new descriptor: make visible to the process. 1028 */ 1029 void 1030 fd_affix(proc_t *p, file_t *fp, unsigned fd) 1031 { 1032 fdfile_t *ff; 1033 filedesc_t *fdp; 1034 1035 KASSERT(p == curproc || p == &proc0); 1036 1037 /* Add a reference to the file structure. */ 1038 mutex_enter(&fp->f_lock); 1039 fp->f_count++; 1040 mutex_exit(&fp->f_lock); 1041 1042 /* 1043 * Insert the new file into the descriptor slot. 1044 * 1045 * The memory barriers provided by lock activity in this routine 1046 * ensure that any updates to the file structure become globally 1047 * visible before the file becomes visible to other LWPs in the 1048 * current process. 1049 */ 1050 fdp = p->p_fd; 1051 ff = fdp->fd_ofiles[fd]; 1052 1053 KASSERT(ff != NULL); 1054 KASSERT(ff->ff_file == NULL); 1055 KASSERT(ff->ff_allocated); 1056 KASSERT(fd_isused(fdp, fd)); 1057 KASSERT(fd >= NDFDFILE || 1058 fdp->fd_ofiles[fd] == (fdfile_t *)fdp->fd_dfdfile[fd]); 1059 1060 /* No need to lock in order to make file initially visible. */ 1061 ff->ff_file = fp; 1062 } 1063 1064 /* 1065 * Abort creation of a new descriptor: free descriptor slot and file. 1066 */ 1067 void 1068 fd_abort(proc_t *p, file_t *fp, unsigned fd) 1069 { 1070 filedesc_t *fdp; 1071 fdfile_t *ff; 1072 1073 KASSERT(p == curproc || p == &proc0); 1074 1075 fdp = p->p_fd; 1076 ff = fdp->fd_ofiles[fd]; 1077 1078 KASSERT(fd >= NDFDFILE || 1079 fdp->fd_ofiles[fd] == (fdfile_t *)fdp->fd_dfdfile[fd]); 1080 1081 mutex_enter(&fdp->fd_lock); 1082 KASSERT(fd_isused(fdp, fd)); 1083 fd_unused(fdp, fd); 1084 mutex_exit(&fdp->fd_lock); 1085 1086 if (fp != NULL) { 1087 ffree(fp); 1088 } 1089 } 1090 1091 /* 1092 * Free a file descriptor. 1093 */ 1094 void 1095 ffree(file_t *fp) 1096 { 1097 1098 KASSERT(fp->f_count == 0); 1099 1100 atomic_dec_uint(&nfiles); 1101 kauth_cred_free(fp->f_cred); 1102 pool_cache_put(file_cache, fp); 1103 } 1104 1105 static int 1106 file_ctor(void *arg, void *obj, int flags) 1107 { 1108 file_t *fp = obj; 1109 1110 memset(fp, 0, sizeof(*fp)); 1111 mutex_init(&fp->f_lock, MUTEX_DEFAULT, IPL_NONE); 1112 1113 mutex_enter(&filelist_lock); 1114 LIST_INSERT_HEAD(&filehead, fp, f_list); 1115 mutex_exit(&filelist_lock); 1116 1117 return 0; 1118 } 1119 1120 static void 1121 file_dtor(void *arg, void *obj) 1122 { 1123 file_t *fp = obj; 1124 1125 mutex_enter(&filelist_lock); 1126 LIST_REMOVE(fp, f_list); 1127 mutex_exit(&filelist_lock); 1128 1129 mutex_destroy(&fp->f_lock); 1130 } 1131 1132 static int 1133 fdfile_ctor(void *arg, void *obj, int flags) 1134 { 1135 fdfile_t *ff = obj; 1136 1137 memset(ff, 0, sizeof(*ff)); 1138 mutex_init(&ff->ff_lock, MUTEX_DEFAULT, IPL_NONE); 1139 cv_init(&ff->ff_closing, "fdclose"); 1140 1141 return 0; 1142 } 1143 1144 static void 1145 fdfile_dtor(void *arg, void *obj) 1146 { 1147 fdfile_t *ff = obj; 1148 1149 mutex_destroy(&ff->ff_lock); 1150 cv_destroy(&ff->ff_closing); 1151 } 1152 1153 file_t * 1154 fgetdummy(void) 1155 { 1156 file_t *fp; 1157 1158 fp = kmem_alloc(sizeof(*fp), KM_SLEEP); 1159 if (fp != NULL) { 1160 memset(fp, 0, sizeof(*fp)); 1161 mutex_init(&fp->f_lock, MUTEX_DEFAULT, IPL_NONE); 1162 } 1163 return fp; 1164 } 1165 1166 void 1167 fputdummy(file_t *fp) 1168 { 1169 1170 mutex_destroy(&fp->f_lock); 1171 kmem_free(fp, sizeof(*fp)); 1172 } 1173 1174 /* 1175 * Create an initial filedesc structure. 1176 */ 1177 filedesc_t * 1178 fd_init(filedesc_t *fdp) 1179 { 1180 unsigned fd; 1181 1182 if (fdp == NULL) { 1183 fdp = pool_cache_get(filedesc_cache, PR_WAITOK); 1184 } else { 1185 filedesc_ctor(NULL, fdp, PR_WAITOK); 1186 } 1187 1188 fdp->fd_refcnt = 1; 1189 fdp->fd_ofiles = fdp->fd_dfiles; 1190 fdp->fd_nfiles = NDFILE; 1191 fdp->fd_himap = fdp->fd_dhimap; 1192 fdp->fd_lomap = fdp->fd_dlomap; 1193 KASSERT(fdp->fd_lastfile == -1); 1194 KASSERT(fdp->fd_lastkqfile == -1); 1195 KASSERT(fdp->fd_knhash == NULL); 1196 1197 memset(&fdp->fd_startzero, 0, sizeof(*fdp) - 1198 offsetof(filedesc_t, fd_startzero)); 1199 for (fd = 0; fd < NDFDFILE; fd++) { 1200 fdp->fd_ofiles[fd] = (fdfile_t *)fdp->fd_dfdfile[fd]; 1201 } 1202 1203 return fdp; 1204 } 1205 1206 /* 1207 * Initialize a file descriptor table. 1208 */ 1209 static int 1210 filedesc_ctor(void *arg, void *obj, int flag) 1211 { 1212 filedesc_t *fdp = obj; 1213 int i; 1214 1215 memset(fdp, 0, sizeof(*fdp)); 1216 mutex_init(&fdp->fd_lock, MUTEX_DEFAULT, IPL_NONE); 1217 fdp->fd_lastfile = -1; 1218 fdp->fd_lastkqfile = -1; 1219 1220 CTASSERT(sizeof(fdp->fd_dfdfile[0]) >= sizeof(fdfile_t)); 1221 for (i = 0; i < NDFDFILE; i++) { 1222 fdfile_ctor(NULL, fdp->fd_dfdfile[i], PR_WAITOK); 1223 } 1224 1225 return 0; 1226 } 1227 1228 static void 1229 filedesc_dtor(void *arg, void *obj) 1230 { 1231 filedesc_t *fdp = obj; 1232 int i; 1233 1234 for (i = 0; i < NDFDFILE; i++) { 1235 fdfile_dtor(NULL, fdp->fd_dfdfile[i]); 1236 } 1237 1238 mutex_destroy(&fdp->fd_lock); 1239 } 1240 1241 /* 1242 * Make p2 share p1's filedesc structure. 1243 */ 1244 void 1245 fd_share(struct proc *p2) 1246 { 1247 filedesc_t *fdp; 1248 1249 fdp = curlwp->l_fd; 1250 p2->p_fd = fdp; 1251 atomic_inc_uint(&fdp->fd_refcnt); 1252 } 1253 1254 /* 1255 * Copy a filedesc structure. 1256 */ 1257 filedesc_t * 1258 fd_copy(void) 1259 { 1260 filedesc_t *newfdp, *fdp; 1261 fdfile_t *ff, *fflist, **ffp, **nffp, *ff2; 1262 int i, nused, numfiles, lastfile, j, newlast; 1263 file_t *fp; 1264 1265 fdp = curproc->p_fd; 1266 newfdp = pool_cache_get(filedesc_cache, PR_WAITOK); 1267 newfdp->fd_refcnt = 1; 1268 1269 KASSERT(newfdp->fd_knhash == NULL); 1270 KASSERT(newfdp->fd_knhashmask == 0); 1271 KASSERT(newfdp->fd_discard == NULL); 1272 1273 for (;;) { 1274 numfiles = fdp->fd_nfiles; 1275 lastfile = fdp->fd_lastfile; 1276 1277 /* 1278 * If the number of open files fits in the internal arrays 1279 * of the open file structure, use them, otherwise allocate 1280 * additional memory for the number of descriptors currently 1281 * in use. 1282 */ 1283 if (lastfile < NDFILE) { 1284 i = NDFILE; 1285 newfdp->fd_ofiles = newfdp->fd_dfiles; 1286 } else { 1287 /* 1288 * Compute the smallest multiple of NDEXTENT needed 1289 * for the file descriptors currently in use, 1290 * allowing the table to shrink. 1291 */ 1292 i = numfiles; 1293 while (i >= 2 * NDEXTENT && i > lastfile * 2) { 1294 i /= 2; 1295 } 1296 newfdp->fd_ofiles = fd_ofile_alloc(i); 1297 KASSERT(i > NDFILE); 1298 } 1299 if (NDHISLOTS(i) <= NDHISLOTS(NDFILE)) { 1300 newfdp->fd_himap = newfdp->fd_dhimap; 1301 newfdp->fd_lomap = newfdp->fd_dlomap; 1302 } else { 1303 fd_map_alloc(i, &newfdp->fd_lomap, 1304 &newfdp->fd_himap); 1305 } 1306 1307 /* 1308 * Allocate and string together fdfile structures. 1309 * We abuse fdfile_t::ff_file here, but it will be 1310 * cleared before this routine returns. 1311 */ 1312 nused = fdp->fd_nused; 1313 fflist = NULL; 1314 for (j = nused; j != 0; j--) { 1315 ff = pool_cache_get(fdfile_cache, PR_WAITOK); 1316 ff->ff_file = (void *)fflist; 1317 fflist = ff; 1318 } 1319 1320 mutex_enter(&fdp->fd_lock); 1321 if (numfiles == fdp->fd_nfiles && nused == fdp->fd_nused && 1322 lastfile == fdp->fd_lastfile) { 1323 break; 1324 } 1325 mutex_exit(&fdp->fd_lock); 1326 if (i > NDFILE) { 1327 fd_ofile_free(i, newfdp->fd_ofiles); 1328 } 1329 if (NDHISLOTS(i) > NDHISLOTS(NDFILE)) { 1330 fd_map_free(i, newfdp->fd_lomap, newfdp->fd_himap); 1331 } 1332 while (fflist != NULL) { 1333 ff = fflist; 1334 fflist = (void *)ff->ff_file; 1335 ff->ff_file = NULL; 1336 pool_cache_put(fdfile_cache, ff); 1337 } 1338 } 1339 1340 newfdp->fd_nfiles = i; 1341 newfdp->fd_freefile = fdp->fd_freefile; 1342 newfdp->fd_exclose = fdp->fd_exclose; 1343 1344 /* 1345 * Clear the entries that will not be copied over. 1346 * Avoid calling memset with 0 size. 1347 */ 1348 if (lastfile < (i-1)) { 1349 memset(newfdp->fd_ofiles + lastfile + 1, 0, 1350 (i - lastfile - 1) * sizeof(file_t **)); 1351 } 1352 if (i < NDENTRIES * NDENTRIES) { 1353 i = NDENTRIES * NDENTRIES; /* size of inlined bitmaps */ 1354 } 1355 memcpy(newfdp->fd_himap, fdp->fd_himap, NDHISLOTS(i)*sizeof(uint32_t)); 1356 memcpy(newfdp->fd_lomap, fdp->fd_lomap, NDLOSLOTS(i)*sizeof(uint32_t)); 1357 1358 ffp = fdp->fd_ofiles; 1359 nffp = newfdp->fd_ofiles; 1360 j = imax(lastfile, (NDFDFILE - 1)); 1361 newlast = -1; 1362 KASSERT(j < fdp->fd_nfiles); 1363 for (i = 0; i <= j; i++, ffp++, *nffp++ = ff2) { 1364 ff = *ffp; 1365 /* Install built-in fdfiles even if unused here. */ 1366 if (i < NDFDFILE) { 1367 ff2 = (fdfile_t *)newfdp->fd_dfdfile[i]; 1368 } else { 1369 ff2 = NULL; 1370 } 1371 /* Determine if descriptor is active in parent. */ 1372 if (ff == NULL || !fd_isused(fdp, i)) { 1373 KASSERT(ff != NULL || i >= NDFDFILE); 1374 continue; 1375 } 1376 mutex_enter(&ff->ff_lock); 1377 fp = ff->ff_file; 1378 if (fp == NULL) { 1379 /* Descriptor is half-open: free slot. */ 1380 fd_zap(newfdp, i); 1381 mutex_exit(&ff->ff_lock); 1382 continue; 1383 } 1384 if (fp->f_type == DTYPE_KQUEUE) { 1385 /* kqueue descriptors cannot be copied. */ 1386 fd_zap(newfdp, i); 1387 mutex_exit(&ff->ff_lock); 1388 continue; 1389 } 1390 /* It's active: add a reference to the file. */ 1391 mutex_enter(&fp->f_lock); 1392 fp->f_count++; 1393 mutex_exit(&fp->f_lock); 1394 /* Consume one fdfile_t to represent it. */ 1395 if (i >= NDFDFILE) { 1396 ff2 = fflist; 1397 fflist = (void *)ff2->ff_file; 1398 } 1399 ff2->ff_file = fp; 1400 ff2->ff_exclose = ff->ff_exclose; 1401 ff2->ff_allocated = true; 1402 mutex_exit(&ff->ff_lock); 1403 if (i > newlast) { 1404 newlast = i; 1405 } 1406 } 1407 mutex_exit(&fdp->fd_lock); 1408 1409 /* Discard unused fdfile_t structures. */ 1410 while (__predict_false(fflist != NULL)) { 1411 ff = fflist; 1412 fflist = (void *)ff->ff_file; 1413 ff->ff_file = NULL; 1414 pool_cache_put(fdfile_cache, ff); 1415 nused--; 1416 } 1417 KASSERT(nused >= 0); 1418 KASSERT(newfdp->fd_ofiles[0] == (fdfile_t *)newfdp->fd_dfdfile[0]); 1419 1420 newfdp->fd_nused = nused; 1421 newfdp->fd_lastfile = newlast; 1422 1423 return (newfdp); 1424 } 1425 1426 /* 1427 * Release a filedesc structure. 1428 */ 1429 void 1430 fd_free(void) 1431 { 1432 filedesc_t *fdp; 1433 fdfile_t *ff; 1434 file_t *fp; 1435 int fd, lastfd; 1436 void **discard; 1437 1438 fdp = curlwp->l_fd; 1439 1440 KASSERT(fdp->fd_ofiles[0] == (fdfile_t *)fdp->fd_dfdfile[0]); 1441 1442 if (atomic_dec_uint_nv(&fdp->fd_refcnt) > 0) 1443 return; 1444 1445 /* 1446 * Close any files that the process holds open. 1447 */ 1448 for (fd = 0, lastfd = fdp->fd_nfiles - 1; fd <= lastfd; fd++) { 1449 ff = fdp->fd_ofiles[fd]; 1450 KASSERT(fd >= NDFDFILE || 1451 ff == (fdfile_t *)fdp->fd_dfdfile[fd]); 1452 if ((ff = fdp->fd_ofiles[fd]) == NULL) 1453 continue; 1454 if ((fp = ff->ff_file) != NULL) { 1455 /* 1456 * Must use fd_close() here as kqueue holds 1457 * long term references to descriptors. 1458 */ 1459 ff->ff_refcnt++; 1460 fd_close(fd); 1461 } 1462 KASSERT(ff->ff_refcnt == 0); 1463 KASSERT(ff->ff_file == NULL); 1464 KASSERT(!ff->ff_exclose); 1465 KASSERT(!ff->ff_allocated); 1466 if (fd >= NDFDFILE) { 1467 pool_cache_put(fdfile_cache, ff); 1468 } 1469 } 1470 1471 /* 1472 * Clean out the descriptor table for the next user and return 1473 * to the cache. 1474 */ 1475 while ((discard = fdp->fd_discard) != NULL) { 1476 fdp->fd_discard = discard[0]; 1477 kmem_free(discard, (uintptr_t)discard[1]); 1478 } 1479 if (NDHISLOTS(fdp->fd_nfiles) > NDHISLOTS(NDFILE)) { 1480 KASSERT(fdp->fd_himap != fdp->fd_dhimap); 1481 KASSERT(fdp->fd_lomap != fdp->fd_dlomap); 1482 fd_map_free(fdp->fd_nfiles, fdp->fd_lomap, fdp->fd_himap); 1483 } 1484 if (fdp->fd_nfiles > NDFILE) { 1485 KASSERT(fdp->fd_ofiles != fdp->fd_dfiles); 1486 fd_ofile_free(fdp->fd_nfiles, fdp->fd_ofiles); 1487 } 1488 if (fdp->fd_knhash != NULL) { 1489 hashdone(fdp->fd_knhash, HASH_LIST, fdp->fd_knhashmask); 1490 fdp->fd_knhash = NULL; 1491 fdp->fd_knhashmask = 0; 1492 } else { 1493 KASSERT(fdp->fd_knhashmask == 0); 1494 } 1495 fdp->fd_lastkqfile = -1; 1496 pool_cache_put(filedesc_cache, fdp); 1497 } 1498 1499 /* 1500 * File Descriptor pseudo-device driver (/dev/fd/). 1501 * 1502 * Opening minor device N dup()s the file (if any) connected to file 1503 * descriptor N belonging to the calling process. Note that this driver 1504 * consists of only the ``open()'' routine, because all subsequent 1505 * references to this file will be direct to the other driver. 1506 */ 1507 static int 1508 filedescopen(dev_t dev, int mode, int type, lwp_t *l) 1509 { 1510 1511 /* 1512 * XXX Kludge: set dupfd to contain the value of the 1513 * the file descriptor being sought for duplication. The error 1514 * return ensures that the vnode for this device will be released 1515 * by vn_open. Open will detect this special error and take the 1516 * actions in dupfdopen below. Other callers of vn_open or VOP_OPEN 1517 * will simply report the error. 1518 */ 1519 l->l_dupfd = minor(dev); /* XXX */ 1520 return EDUPFD; 1521 } 1522 1523 /* 1524 * Duplicate the specified descriptor to a free descriptor. 1525 */ 1526 int 1527 fd_dupopen(int old, int *new, int mode, int error) 1528 { 1529 filedesc_t *fdp; 1530 fdfile_t *ff; 1531 file_t *fp; 1532 1533 if ((fp = fd_getfile(old)) == NULL) { 1534 return EBADF; 1535 } 1536 fdp = curlwp->l_fd; 1537 ff = fdp->fd_ofiles[old]; 1538 1539 /* 1540 * There are two cases of interest here. 1541 * 1542 * For EDUPFD simply dup (dfd) to file descriptor 1543 * (indx) and return. 1544 * 1545 * For EMOVEFD steal away the file structure from (dfd) and 1546 * store it in (indx). (dfd) is effectively closed by 1547 * this operation. 1548 * 1549 * Any other error code is just returned. 1550 */ 1551 switch (error) { 1552 case EDUPFD: 1553 /* 1554 * Check that the mode the file is being opened for is a 1555 * subset of the mode of the existing descriptor. 1556 */ 1557 if (((mode & (FREAD|FWRITE)) | fp->f_flag) != fp->f_flag) { 1558 error = EACCES; 1559 break; 1560 } 1561 1562 /* Copy it. */ 1563 error = fd_dup(fp, 0, new, fdp->fd_ofiles[old]->ff_exclose); 1564 break; 1565 1566 case EMOVEFD: 1567 /* Copy it. */ 1568 error = fd_dup(fp, 0, new, fdp->fd_ofiles[old]->ff_exclose); 1569 if (error != 0) { 1570 break; 1571 } 1572 1573 /* Steal away the file pointer from 'old'. */ 1574 (void)fd_close(old); 1575 return 0; 1576 } 1577 1578 fd_putfile(old); 1579 return error; 1580 } 1581 1582 /* 1583 * Sets descriptor owner. If the owner is a process, 'pgid' 1584 * is set to positive value, process ID. If the owner is process group, 1585 * 'pgid' is set to -pg_id. 1586 */ 1587 int 1588 fsetown(pid_t *pgid, u_long cmd, const void *data) 1589 { 1590 int id = *(const int *)data; 1591 int error; 1592 1593 switch (cmd) { 1594 case TIOCSPGRP: 1595 if (id < 0) 1596 return (EINVAL); 1597 id = -id; 1598 break; 1599 default: 1600 break; 1601 } 1602 1603 if (id > 0 && !pfind(id)) 1604 return (ESRCH); 1605 else if (id < 0 && (error = pgid_in_session(curproc, -id))) 1606 return (error); 1607 1608 *pgid = id; 1609 return (0); 1610 } 1611 1612 /* 1613 * Return descriptor owner information. If the value is positive, 1614 * it's process ID. If it's negative, it's process group ID and 1615 * needs the sign removed before use. 1616 */ 1617 int 1618 fgetown(pid_t pgid, u_long cmd, void *data) 1619 { 1620 1621 switch (cmd) { 1622 case TIOCGPGRP: 1623 *(int *)data = -pgid; 1624 break; 1625 default: 1626 *(int *)data = pgid; 1627 break; 1628 } 1629 return (0); 1630 } 1631 1632 /* 1633 * Send signal to descriptor owner, either process or process group. 1634 */ 1635 void 1636 fownsignal(pid_t pgid, int signo, int code, int band, void *fdescdata) 1637 { 1638 struct proc *p1; 1639 struct pgrp *pgrp; 1640 ksiginfo_t ksi; 1641 1642 KASSERT(!cpu_intr_p()); 1643 1644 KSI_INIT(&ksi); 1645 ksi.ksi_signo = signo; 1646 ksi.ksi_code = code; 1647 ksi.ksi_band = band; 1648 1649 mutex_enter(proc_lock); 1650 if (pgid > 0 && (p1 = p_find(pgid, PFIND_LOCKED))) 1651 kpsignal(p1, &ksi, fdescdata); 1652 else if (pgid < 0 && (pgrp = pg_find(-pgid, PFIND_LOCKED))) 1653 kpgsignal(pgrp, &ksi, fdescdata, 0); 1654 mutex_exit(proc_lock); 1655 } 1656 1657 int 1658 fd_clone(file_t *fp, unsigned fd, int flag, const struct fileops *fops, 1659 void *data) 1660 { 1661 1662 fp->f_flag = flag; 1663 fp->f_type = DTYPE_MISC; 1664 fp->f_ops = fops; 1665 fp->f_data = data; 1666 curlwp->l_dupfd = fd; 1667 fd_affix(curproc, fp, fd); 1668 1669 return EMOVEFD; 1670 } 1671 1672 int 1673 fnullop_fcntl(file_t *fp, u_int cmd, void *data) 1674 { 1675 1676 if (cmd == F_SETFL) 1677 return 0; 1678 1679 return EOPNOTSUPP; 1680 } 1681 1682 int 1683 fnullop_poll(file_t *fp, int which) 1684 { 1685 1686 return 0; 1687 } 1688 1689 int 1690 fnullop_kqfilter(file_t *fp, struct knote *kn) 1691 { 1692 1693 return 0; 1694 } 1695 1696 int 1697 fbadop_read(file_t *fp, off_t *offset, struct uio *uio, 1698 kauth_cred_t cred, int flags) 1699 { 1700 1701 return EOPNOTSUPP; 1702 } 1703 1704 int 1705 fbadop_write(file_t *fp, off_t *offset, struct uio *uio, 1706 kauth_cred_t cred, int flags) 1707 { 1708 1709 return EOPNOTSUPP; 1710 } 1711 1712 int 1713 fbadop_ioctl(file_t *fp, u_long com, void *data) 1714 { 1715 1716 return EOPNOTSUPP; 1717 } 1718 1719 int 1720 fbadop_stat(file_t *fp, struct stat *sb) 1721 { 1722 1723 return EOPNOTSUPP; 1724 } 1725 1726 int 1727 fbadop_close(file_t *fp) 1728 { 1729 1730 return EOPNOTSUPP; 1731 } 1732