1 /* $NetBSD: kern_descrip.c,v 1.190 2009/04/04 10:12:51 ad Exp $ */ 2 3 /*- 4 * Copyright (c) 2008, 2009 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Andrew Doran. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /* 33 * Copyright (c) 1982, 1986, 1989, 1991, 1993 34 * The Regents of the University of California. All rights reserved. 35 * (c) UNIX System Laboratories, Inc. 36 * All or some portions of this file are derived from material licensed 37 * to the University of California by American Telephone and Telegraph 38 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 39 * the permission of UNIX System Laboratories, Inc. 40 * 41 * Redistribution and use in source and binary forms, with or without 42 * modification, are permitted provided that the following conditions 43 * are met: 44 * 1. Redistributions of source code must retain the above copyright 45 * notice, this list of conditions and the following disclaimer. 46 * 2. Redistributions in binary form must reproduce the above copyright 47 * notice, this list of conditions and the following disclaimer in the 48 * documentation and/or other materials provided with the distribution. 49 * 3. Neither the name of the University nor the names of its contributors 50 * may be used to endorse or promote products derived from this software 51 * without specific prior written permission. 52 * 53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 63 * SUCH DAMAGE. 64 * 65 * @(#)kern_descrip.c 8.8 (Berkeley) 2/14/95 66 */ 67 68 /* 69 * File descriptor management. 70 */ 71 72 #include <sys/cdefs.h> 73 __KERNEL_RCSID(0, "$NetBSD: kern_descrip.c,v 1.190 2009/04/04 10:12:51 ad Exp $"); 74 75 #include <sys/param.h> 76 #include <sys/systm.h> 77 #include <sys/filedesc.h> 78 #include <sys/kernel.h> 79 #include <sys/proc.h> 80 #include <sys/file.h> 81 #include <sys/socket.h> 82 #include <sys/socketvar.h> 83 #include <sys/stat.h> 84 #include <sys/ioctl.h> 85 #include <sys/fcntl.h> 86 #include <sys/pool.h> 87 #include <sys/unistd.h> 88 #include <sys/resourcevar.h> 89 #include <sys/conf.h> 90 #include <sys/event.h> 91 #include <sys/kauth.h> 92 #include <sys/atomic.h> 93 #include <sys/syscallargs.h> 94 #include <sys/cpu.h> 95 #include <sys/kmem.h> 96 #include <sys/vnode.h> 97 98 static int file_ctor(void *, void *, int); 99 static void file_dtor(void *, void *); 100 static int fdfile_ctor(void *, void *, int); 101 static void fdfile_dtor(void *, void *); 102 static int filedesc_ctor(void *, void *, int); 103 static void filedesc_dtor(void *, void *); 104 static int filedescopen(dev_t, int, int, lwp_t *); 105 106 kmutex_t filelist_lock; /* lock on filehead */ 107 struct filelist filehead; /* head of list of open files */ 108 u_int nfiles; /* actual number of open files */ 109 110 static pool_cache_t filedesc_cache; 111 static pool_cache_t file_cache; 112 static pool_cache_t fdfile_cache; 113 114 const struct cdevsw filedesc_cdevsw = { 115 filedescopen, noclose, noread, nowrite, noioctl, 116 nostop, notty, nopoll, nommap, nokqfilter, D_OTHER | D_MPSAFE, 117 }; 118 119 /* For ease of reading. */ 120 __strong_alias(fd_putvnode,fd_putfile) 121 __strong_alias(fd_putsock,fd_putfile) 122 123 /* 124 * Initialize the descriptor system. 125 */ 126 void 127 fd_sys_init(void) 128 { 129 130 mutex_init(&filelist_lock, MUTEX_DEFAULT, IPL_NONE); 131 132 file_cache = pool_cache_init(sizeof(file_t), coherency_unit, 0, 133 0, "file", NULL, IPL_NONE, file_ctor, file_dtor, NULL); 134 KASSERT(file_cache != NULL); 135 136 fdfile_cache = pool_cache_init(sizeof(fdfile_t), coherency_unit, 0, 137 PR_LARGECACHE, "fdfile", NULL, IPL_NONE, fdfile_ctor, fdfile_dtor, 138 NULL); 139 KASSERT(fdfile_cache != NULL); 140 141 filedesc_cache = pool_cache_init(sizeof(filedesc_t), coherency_unit, 142 0, 0, "filedesc", NULL, IPL_NONE, filedesc_ctor, filedesc_dtor, 143 NULL); 144 KASSERT(filedesc_cache != NULL); 145 } 146 147 static int 148 fd_next_zero(filedesc_t *fdp, uint32_t *bitmap, int want, u_int bits) 149 { 150 int i, off, maxoff; 151 uint32_t sub; 152 153 KASSERT(mutex_owned(&fdp->fd_lock)); 154 155 if (want > bits) 156 return -1; 157 158 off = want >> NDENTRYSHIFT; 159 i = want & NDENTRYMASK; 160 if (i) { 161 sub = bitmap[off] | ((u_int)~0 >> (NDENTRIES - i)); 162 if (sub != ~0) 163 goto found; 164 off++; 165 } 166 167 maxoff = NDLOSLOTS(bits); 168 while (off < maxoff) { 169 if ((sub = bitmap[off]) != ~0) 170 goto found; 171 off++; 172 } 173 174 return (-1); 175 176 found: 177 return (off << NDENTRYSHIFT) + ffs(~sub) - 1; 178 } 179 180 static int 181 fd_last_set(filedesc_t *fd, int last) 182 { 183 int off, i; 184 fdfile_t **ofiles = fd->fd_ofiles; 185 uint32_t *bitmap = fd->fd_lomap; 186 187 KASSERT(mutex_owned(&fd->fd_lock)); 188 189 off = (last - 1) >> NDENTRYSHIFT; 190 191 while (off >= 0 && !bitmap[off]) 192 off--; 193 194 if (off < 0) 195 return (-1); 196 197 i = ((off + 1) << NDENTRYSHIFT) - 1; 198 if (i >= last) 199 i = last - 1; 200 201 /* XXX should use bitmap */ 202 /* XXXAD does not work for fd_copy() */ 203 while (i > 0 && (ofiles[i] == NULL || !ofiles[i]->ff_allocated)) 204 i--; 205 206 return (i); 207 } 208 209 void 210 fd_used(filedesc_t *fdp, unsigned fd) 211 { 212 u_int off = fd >> NDENTRYSHIFT; 213 fdfile_t *ff; 214 215 ff = fdp->fd_ofiles[fd]; 216 217 KASSERT(mutex_owned(&fdp->fd_lock)); 218 KASSERT((fdp->fd_lomap[off] & (1 << (fd & NDENTRYMASK))) == 0); 219 KASSERT(ff != NULL); 220 KASSERT(ff->ff_file == NULL); 221 KASSERT(!ff->ff_allocated); 222 223 ff->ff_allocated = 1; 224 fdp->fd_lomap[off] |= 1 << (fd & NDENTRYMASK); 225 if (fdp->fd_lomap[off] == ~0) { 226 KASSERT((fdp->fd_himap[off >> NDENTRYSHIFT] & 227 (1 << (off & NDENTRYMASK))) == 0); 228 fdp->fd_himap[off >> NDENTRYSHIFT] |= 1 << (off & NDENTRYMASK); 229 } 230 231 if ((int)fd > fdp->fd_lastfile) { 232 fdp->fd_lastfile = fd; 233 } 234 235 if (fd >= NDFDFILE) { 236 fdp->fd_nused++; 237 } else { 238 KASSERT(ff == (fdfile_t *)fdp->fd_dfdfile[fd]); 239 } 240 } 241 242 void 243 fd_unused(filedesc_t *fdp, unsigned fd) 244 { 245 u_int off = fd >> NDENTRYSHIFT; 246 fdfile_t *ff; 247 248 ff = fdp->fd_ofiles[fd]; 249 250 /* 251 * Don't assert the lock is held here, as we may be copying 252 * the table during exec() and it is not needed there. 253 * procfs and sysctl are locked out by proc::p_reflock. 254 * 255 * KASSERT(mutex_owned(&fdp->fd_lock)); 256 */ 257 KASSERT(ff != NULL); 258 KASSERT(ff->ff_file == NULL); 259 KASSERT(ff->ff_allocated); 260 261 if (fd < fdp->fd_freefile) { 262 fdp->fd_freefile = fd; 263 } 264 265 if (fdp->fd_lomap[off] == ~0) { 266 KASSERT((fdp->fd_himap[off >> NDENTRYSHIFT] & 267 (1 << (off & NDENTRYMASK))) != 0); 268 fdp->fd_himap[off >> NDENTRYSHIFT] &= 269 ~(1 << (off & NDENTRYMASK)); 270 } 271 KASSERT((fdp->fd_lomap[off] & (1 << (fd & NDENTRYMASK))) != 0); 272 fdp->fd_lomap[off] &= ~(1 << (fd & NDENTRYMASK)); 273 ff->ff_allocated = 0; 274 275 KASSERT(fd <= fdp->fd_lastfile); 276 if (fd == fdp->fd_lastfile) { 277 fdp->fd_lastfile = fd_last_set(fdp, fd); 278 } 279 280 if (fd >= NDFDFILE) { 281 KASSERT(fdp->fd_nused > 0); 282 fdp->fd_nused--; 283 } else { 284 KASSERT(ff == (fdfile_t *)fdp->fd_dfdfile[fd]); 285 } 286 } 287 288 /* 289 * Custom version of fd_unused() for fd_copy(), where the descriptor 290 * table is not yet fully initialized. 291 */ 292 static inline void 293 fd_zap(filedesc_t *fdp, unsigned fd) 294 { 295 u_int off = fd >> NDENTRYSHIFT; 296 297 if (fd < fdp->fd_freefile) { 298 fdp->fd_freefile = fd; 299 } 300 301 if (fdp->fd_lomap[off] == ~0) { 302 KASSERT((fdp->fd_himap[off >> NDENTRYSHIFT] & 303 (1 << (off & NDENTRYMASK))) != 0); 304 fdp->fd_himap[off >> NDENTRYSHIFT] &= 305 ~(1 << (off & NDENTRYMASK)); 306 } 307 KASSERT((fdp->fd_lomap[off] & (1 << (fd & NDENTRYMASK))) != 0); 308 fdp->fd_lomap[off] &= ~(1 << (fd & NDENTRYMASK)); 309 } 310 311 bool 312 fd_isused(filedesc_t *fdp, unsigned fd) 313 { 314 u_int off = fd >> NDENTRYSHIFT; 315 316 KASSERT(fd < fdp->fd_nfiles); 317 318 return (fdp->fd_lomap[off] & (1 << (fd & NDENTRYMASK))) != 0; 319 } 320 321 /* 322 * Look up the file structure corresponding to a file descriptor 323 * and return the file, holding a reference on the descriptor. 324 */ 325 inline file_t * 326 fd_getfile(unsigned fd) 327 { 328 filedesc_t *fdp; 329 fdfile_t *ff; 330 file_t *fp; 331 332 fdp = curlwp->l_fd; 333 334 /* 335 * Look up the fdfile structure representing this descriptor. 336 * Ensure that we see fd_nfiles before fd_ofiles since we 337 * are doing this unlocked. See fd_tryexpand(). 338 */ 339 if (__predict_false(fd >= fdp->fd_nfiles)) { 340 return NULL; 341 } 342 membar_consumer(); 343 ff = fdp->fd_ofiles[fd]; 344 KASSERT(fd >= NDFDFILE || ff == (fdfile_t *)fdp->fd_dfdfile[fd]); 345 if (__predict_false(ff == NULL)) { 346 return NULL; 347 } 348 349 /* 350 * Now get a reference to the descriptor. Issue a memory 351 * barrier to ensure that we acquire the file pointer _after_ 352 * adding a reference. If no memory barrier, we could fetch 353 * a stale pointer. 354 */ 355 atomic_inc_uint(&ff->ff_refcnt); 356 #ifndef __HAVE_ATOMIC_AS_MEMBAR 357 membar_enter(); 358 #endif 359 360 /* 361 * If the file is not open or is being closed then put the 362 * reference back. 363 */ 364 fp = ff->ff_file; 365 if (__predict_true(fp != NULL)) { 366 return fp; 367 } 368 fd_putfile(fd); 369 return NULL; 370 } 371 372 /* 373 * Release a reference to a file descriptor acquired with fd_getfile(). 374 */ 375 void 376 fd_putfile(unsigned fd) 377 { 378 filedesc_t *fdp; 379 fdfile_t *ff; 380 u_int u, v; 381 382 fdp = curlwp->l_fd; 383 ff = fdp->fd_ofiles[fd]; 384 385 KASSERT(fd < fdp->fd_nfiles); 386 KASSERT(ff != NULL); 387 KASSERT((ff->ff_refcnt & FR_MASK) > 0); 388 KASSERT(fd >= NDFDFILE || ff == (fdfile_t *)fdp->fd_dfdfile[fd]); 389 390 /* 391 * Ensure that any use of the file is complete and globally 392 * visible before dropping the final reference. If no membar, 393 * the current CPU could still access memory associated with 394 * the file after it has been freed or recycled by another 395 * CPU. 396 */ 397 #ifndef __HAVE_ATOMIC_AS_MEMBAR 398 membar_exit(); 399 #endif 400 401 /* 402 * Be optimistic and start out with the assumption that no other 403 * threads are trying to close the descriptor. If the CAS fails, 404 * we lost a race and/or it's being closed. 405 */ 406 for (u = ff->ff_refcnt & FR_MASK;; u = v) { 407 v = atomic_cas_uint(&ff->ff_refcnt, u, u - 1); 408 if (__predict_true(u == v)) { 409 return; 410 } 411 if (__predict_false((v & FR_CLOSING) != 0)) { 412 break; 413 } 414 } 415 416 /* Another thread is waiting to close the file: join it. */ 417 (void)fd_close(fd); 418 } 419 420 /* 421 * Convenience wrapper around fd_getfile() that returns reference 422 * to a vnode. 423 */ 424 int 425 fd_getvnode(unsigned fd, file_t **fpp) 426 { 427 vnode_t *vp; 428 file_t *fp; 429 430 fp = fd_getfile(fd); 431 if (__predict_false(fp == NULL)) { 432 return EBADF; 433 } 434 if (__predict_false(fp->f_type != DTYPE_VNODE)) { 435 fd_putfile(fd); 436 return EINVAL; 437 } 438 vp = fp->f_data; 439 if (__predict_false(vp->v_type == VBAD)) { 440 /* XXX Is this case really necessary? */ 441 fd_putfile(fd); 442 return EBADF; 443 } 444 *fpp = fp; 445 return 0; 446 } 447 448 /* 449 * Convenience wrapper around fd_getfile() that returns reference 450 * to a socket. 451 */ 452 int 453 fd_getsock(unsigned fd, struct socket **sop) 454 { 455 file_t *fp; 456 457 fp = fd_getfile(fd); 458 if (__predict_false(fp == NULL)) { 459 return EBADF; 460 } 461 if (__predict_false(fp->f_type != DTYPE_SOCKET)) { 462 fd_putfile(fd); 463 return ENOTSOCK; 464 } 465 *sop = fp->f_data; 466 return 0; 467 } 468 469 /* 470 * Look up the file structure corresponding to a file descriptor 471 * and return it with a reference held on the file, not the 472 * descriptor. 473 * 474 * This is heavyweight and only used when accessing descriptors 475 * from a foreign process. The caller must ensure that `p' does 476 * not exit or fork across this call. 477 * 478 * To release the file (not descriptor) reference, use closef(). 479 */ 480 file_t * 481 fd_getfile2(proc_t *p, unsigned fd) 482 { 483 filedesc_t *fdp; 484 fdfile_t *ff; 485 file_t *fp; 486 487 fdp = p->p_fd; 488 mutex_enter(&fdp->fd_lock); 489 if (fd > fdp->fd_nfiles) { 490 mutex_exit(&fdp->fd_lock); 491 return NULL; 492 } 493 if ((ff = fdp->fd_ofiles[fd]) == NULL) { 494 mutex_exit(&fdp->fd_lock); 495 return NULL; 496 } 497 mutex_enter(&ff->ff_lock); 498 if ((fp = ff->ff_file) == NULL) { 499 mutex_exit(&ff->ff_lock); 500 mutex_exit(&fdp->fd_lock); 501 return NULL; 502 } 503 mutex_enter(&fp->f_lock); 504 fp->f_count++; 505 mutex_exit(&fp->f_lock); 506 mutex_exit(&ff->ff_lock); 507 mutex_exit(&fdp->fd_lock); 508 509 return fp; 510 } 511 512 /* 513 * Internal form of close. Must be called with a reference to the 514 * descriptor, and will drop the reference. When all descriptor 515 * references are dropped, releases the descriptor slot and a single 516 * reference to the file structure. 517 */ 518 int 519 fd_close(unsigned fd) 520 { 521 struct flock lf; 522 filedesc_t *fdp; 523 fdfile_t *ff; 524 file_t *fp; 525 proc_t *p; 526 lwp_t *l; 527 528 l = curlwp; 529 p = l->l_proc; 530 fdp = l->l_fd; 531 ff = fdp->fd_ofiles[fd]; 532 533 KASSERT(fd >= NDFDFILE || ff == (fdfile_t *)fdp->fd_dfdfile[fd]); 534 535 mutex_enter(&ff->ff_lock); 536 KASSERT((ff->ff_refcnt & FR_MASK) > 0); 537 if (ff->ff_file == NULL) { 538 /* 539 * Another user of the file is already closing, and is 540 * waiting for other users of the file to drain. Release 541 * our reference, and wake up the closer. 542 */ 543 atomic_dec_uint(&ff->ff_refcnt); 544 cv_broadcast(&ff->ff_closing); 545 mutex_exit(&ff->ff_lock); 546 547 /* 548 * An application error, so pretend that the descriptor 549 * was already closed. We can't safely wait for it to 550 * be closed without potentially deadlocking. 551 */ 552 return (EBADF); 553 } 554 KASSERT((ff->ff_refcnt & FR_CLOSING) == 0); 555 556 /* 557 * There may be multiple users of this file within the process. 558 * Notify existing and new users that the file is closing. This 559 * will prevent them from adding additional uses to this file 560 * while we are closing it. 561 */ 562 fp = ff->ff_file; 563 ff->ff_file = NULL; 564 ff->ff_exclose = false; 565 566 /* 567 * We expect the caller to hold a descriptor reference - drop it. 568 * The reference count may increase beyond zero at this point due 569 * to an erroneous descriptor reference by an application, but 570 * fd_getfile() will notice that the file is being closed and drop 571 * the reference again. 572 */ 573 #ifndef __HAVE_ATOMIC_AS_MEMBAR 574 membar_producer(); 575 #endif 576 if (__predict_false(atomic_dec_uint_nv(&ff->ff_refcnt) != 0)) { 577 /* 578 * Wait for other references to drain. This is typically 579 * an application error - the descriptor is being closed 580 * while still in use. 581 * 582 */ 583 atomic_or_uint(&ff->ff_refcnt, FR_CLOSING); 584 585 /* 586 * Remove any knotes attached to the file. A knote 587 * attached to the descriptor can hold references on it. 588 */ 589 mutex_exit(&ff->ff_lock); 590 if (!SLIST_EMPTY(&ff->ff_knlist)) { 591 knote_fdclose(fd); 592 } 593 594 /* Try to drain out descriptor references. */ 595 (*fp->f_ops->fo_drain)(fp); 596 mutex_enter(&ff->ff_lock); 597 598 /* 599 * We need to see the count drop to zero at least once, 600 * in order to ensure that all pre-existing references 601 * have been drained. New references past this point are 602 * of no interest. 603 */ 604 while ((ff->ff_refcnt & FR_MASK) != 0) { 605 cv_wait(&ff->ff_closing, &ff->ff_lock); 606 } 607 atomic_and_uint(&ff->ff_refcnt, ~FR_CLOSING); 608 } else { 609 /* If no references, there must be no knotes. */ 610 KASSERT(SLIST_EMPTY(&ff->ff_knlist)); 611 } 612 mutex_exit(&ff->ff_lock); 613 614 /* 615 * POSIX record locking dictates that any close releases ALL 616 * locks owned by this process. This is handled by setting 617 * a flag in the unlock to free ONLY locks obeying POSIX 618 * semantics, and not to free BSD-style file locks. 619 * If the descriptor was in a message, POSIX-style locks 620 * aren't passed with the descriptor. 621 */ 622 if ((p->p_flag & PK_ADVLOCK) != 0 && fp->f_type == DTYPE_VNODE) { 623 lf.l_whence = SEEK_SET; 624 lf.l_start = 0; 625 lf.l_len = 0; 626 lf.l_type = F_UNLCK; 627 (void)VOP_ADVLOCK(fp->f_data, p, F_UNLCK, &lf, F_POSIX); 628 } 629 630 631 /* Free descriptor slot. */ 632 mutex_enter(&fdp->fd_lock); 633 fd_unused(fdp, fd); 634 mutex_exit(&fdp->fd_lock); 635 636 /* Now drop reference to the file itself. */ 637 return closef(fp); 638 } 639 640 /* 641 * Duplicate a file descriptor. 642 */ 643 int 644 fd_dup(file_t *fp, int minfd, int *newp, bool exclose) 645 { 646 proc_t *p; 647 int error; 648 649 p = curproc; 650 651 while ((error = fd_alloc(p, minfd, newp)) != 0) { 652 if (error != ENOSPC) { 653 return error; 654 } 655 fd_tryexpand(p); 656 } 657 658 curlwp->l_fd->fd_ofiles[*newp]->ff_exclose = exclose; 659 fd_affix(p, fp, *newp); 660 return 0; 661 } 662 663 /* 664 * dup2 operation. 665 */ 666 int 667 fd_dup2(file_t *fp, unsigned new) 668 { 669 filedesc_t *fdp; 670 fdfile_t *ff; 671 672 fdp = curlwp->l_fd; 673 674 /* 675 * Ensure there are enough slots in the descriptor table, 676 * and allocate an fdfile_t up front in case we need it. 677 */ 678 while (new >= fdp->fd_nfiles) { 679 fd_tryexpand(curproc); 680 } 681 ff = pool_cache_get(fdfile_cache, PR_WAITOK); 682 683 /* 684 * If there is already a file open, close it. If the file is 685 * half open, wait for it to be constructed before closing it. 686 * XXX Potential for deadlock here? 687 */ 688 mutex_enter(&fdp->fd_lock); 689 while (fd_isused(fdp, new)) { 690 mutex_exit(&fdp->fd_lock); 691 if (fd_getfile(new) != NULL) { 692 (void)fd_close(new); 693 } else { 694 /* XXX Crummy, but unlikely to happen. */ 695 kpause("dup2", false, 1, NULL); 696 } 697 mutex_enter(&fdp->fd_lock); 698 } 699 if (fdp->fd_ofiles[new] == NULL) { 700 KASSERT(new >= NDFDFILE); 701 fdp->fd_ofiles[new] = ff; 702 ff = NULL; 703 } 704 fd_used(fdp, new); 705 mutex_exit(&fdp->fd_lock); 706 707 /* Slot is now allocated. Insert copy of the file. */ 708 fd_affix(curproc, fp, new); 709 if (ff != NULL) { 710 pool_cache_put(fdfile_cache, ff); 711 } 712 return 0; 713 } 714 715 /* 716 * Drop reference to a file structure. 717 */ 718 int 719 closef(file_t *fp) 720 { 721 struct flock lf; 722 int error; 723 724 /* 725 * Drop reference. If referenced elsewhere it's still open 726 * and we have nothing more to do. 727 */ 728 mutex_enter(&fp->f_lock); 729 KASSERT(fp->f_count > 0); 730 if (--fp->f_count > 0) { 731 mutex_exit(&fp->f_lock); 732 return 0; 733 } 734 KASSERT(fp->f_count == 0); 735 mutex_exit(&fp->f_lock); 736 737 /* We held the last reference - release locks, close and free. */ 738 if ((fp->f_flag & FHASLOCK) && fp->f_type == DTYPE_VNODE) { 739 lf.l_whence = SEEK_SET; 740 lf.l_start = 0; 741 lf.l_len = 0; 742 lf.l_type = F_UNLCK; 743 (void)VOP_ADVLOCK(fp->f_data, fp, F_UNLCK, &lf, F_FLOCK); 744 } 745 if (fp->f_ops != NULL) { 746 error = (*fp->f_ops->fo_close)(fp); 747 } else { 748 error = 0; 749 } 750 ffree(fp); 751 752 return error; 753 } 754 755 /* 756 * Allocate a file descriptor for the process. 757 */ 758 int 759 fd_alloc(proc_t *p, int want, int *result) 760 { 761 filedesc_t *fdp; 762 int i, lim, last, error; 763 u_int off, new; 764 fdfile_t *ff; 765 766 KASSERT(p == curproc || p == &proc0); 767 768 fdp = p->p_fd; 769 ff = pool_cache_get(fdfile_cache, PR_WAITOK); 770 KASSERT(ff->ff_refcnt == 0); 771 KASSERT(ff->ff_file == NULL); 772 773 /* 774 * Search for a free descriptor starting at the higher 775 * of want or fd_freefile. 776 */ 777 mutex_enter(&fdp->fd_lock); 778 KASSERT(fdp->fd_ofiles[0] == (fdfile_t *)fdp->fd_dfdfile[0]); 779 lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfiles); 780 last = min(fdp->fd_nfiles, lim); 781 for (;;) { 782 if ((i = want) < fdp->fd_freefile) 783 i = fdp->fd_freefile; 784 off = i >> NDENTRYSHIFT; 785 new = fd_next_zero(fdp, fdp->fd_himap, off, 786 (last + NDENTRIES - 1) >> NDENTRYSHIFT); 787 if (new == -1) 788 break; 789 i = fd_next_zero(fdp, &fdp->fd_lomap[new], 790 new > off ? 0 : i & NDENTRYMASK, NDENTRIES); 791 if (i == -1) { 792 /* 793 * Free file descriptor in this block was 794 * below want, try again with higher want. 795 */ 796 want = (new + 1) << NDENTRYSHIFT; 797 continue; 798 } 799 i += (new << NDENTRYSHIFT); 800 if (i >= last) { 801 break; 802 } 803 if (fdp->fd_ofiles[i] == NULL) { 804 KASSERT(i >= NDFDFILE); 805 fdp->fd_ofiles[i] = ff; 806 } else { 807 pool_cache_put(fdfile_cache, ff); 808 } 809 KASSERT(fdp->fd_ofiles[i]->ff_file == NULL); 810 fd_used(fdp, i); 811 if (want <= fdp->fd_freefile) { 812 fdp->fd_freefile = i; 813 } 814 *result = i; 815 mutex_exit(&fdp->fd_lock); 816 KASSERT(i >= NDFDFILE || 817 fdp->fd_ofiles[i] == (fdfile_t *)fdp->fd_dfdfile[i]); 818 return 0; 819 } 820 821 /* No space in current array. Let the caller expand and retry. */ 822 error = (fdp->fd_nfiles >= lim) ? EMFILE : ENOSPC; 823 mutex_exit(&fdp->fd_lock); 824 pool_cache_put(fdfile_cache, ff); 825 return error; 826 } 827 828 /* 829 * Allocate memory for the open files array. 830 */ 831 static fdfile_t ** 832 fd_ofile_alloc(int n) 833 { 834 uintptr_t *ptr, sz; 835 836 KASSERT(n > NDFILE); 837 838 sz = (n + 2) * sizeof(uintptr_t); 839 ptr = kmem_alloc((size_t)sz, KM_SLEEP); 840 ptr[1] = sz; 841 842 return (fdfile_t **)(ptr + 2); 843 } 844 845 /* 846 * Free an open files array. 847 */ 848 static void 849 fd_ofile_free(int n, fdfile_t **of) 850 { 851 uintptr_t *ptr, sz; 852 853 KASSERT(n > NDFILE); 854 855 sz = (n + 2) * sizeof(uintptr_t); 856 ptr = (uintptr_t *)of - 2; 857 KASSERT(ptr[1] == sz); 858 kmem_free(ptr, sz); 859 } 860 861 /* 862 * Allocate descriptor bitmap. 863 */ 864 static void 865 fd_map_alloc(int n, uint32_t **lo, uint32_t **hi) 866 { 867 uint8_t *ptr; 868 size_t szlo, szhi; 869 870 KASSERT(n > NDENTRIES); 871 872 szlo = NDLOSLOTS(n) * sizeof(uint32_t); 873 szhi = NDHISLOTS(n) * sizeof(uint32_t); 874 ptr = kmem_alloc(szlo + szhi, KM_SLEEP); 875 *lo = (uint32_t *)ptr; 876 *hi = (uint32_t *)(ptr + szlo); 877 } 878 879 /* 880 * Free descriptor bitmap. 881 */ 882 static void 883 fd_map_free(int n, uint32_t *lo, uint32_t *hi) 884 { 885 size_t szlo, szhi; 886 887 KASSERT(n > NDENTRIES); 888 889 szlo = NDLOSLOTS(n) * sizeof(uint32_t); 890 szhi = NDHISLOTS(n) * sizeof(uint32_t); 891 KASSERT(hi == (uint32_t *)((uint8_t *)lo + szlo)); 892 kmem_free(lo, szlo + szhi); 893 } 894 895 /* 896 * Expand a process' descriptor table. 897 */ 898 void 899 fd_tryexpand(proc_t *p) 900 { 901 filedesc_t *fdp; 902 int i, numfiles, oldnfiles; 903 fdfile_t **newofile; 904 uint32_t *newhimap, *newlomap; 905 906 KASSERT(p == curproc || p == &proc0); 907 908 fdp = p->p_fd; 909 newhimap = NULL; 910 newlomap = NULL; 911 oldnfiles = fdp->fd_nfiles; 912 913 if (oldnfiles < NDEXTENT) 914 numfiles = NDEXTENT; 915 else 916 numfiles = 2 * oldnfiles; 917 918 newofile = fd_ofile_alloc(numfiles); 919 if (NDHISLOTS(numfiles) > NDHISLOTS(oldnfiles)) { 920 fd_map_alloc(numfiles, &newlomap, &newhimap); 921 } 922 923 mutex_enter(&fdp->fd_lock); 924 KASSERT(fdp->fd_ofiles[0] == (fdfile_t *)fdp->fd_dfdfile[0]); 925 if (fdp->fd_nfiles != oldnfiles) { 926 /* fdp changed; caller must retry */ 927 mutex_exit(&fdp->fd_lock); 928 fd_ofile_free(numfiles, newofile); 929 if (NDHISLOTS(numfiles) > NDHISLOTS(oldnfiles)) { 930 fd_map_free(numfiles, newlomap, newhimap); 931 } 932 return; 933 } 934 935 /* Copy the existing ofile array and zero the new portion. */ 936 i = sizeof(fdfile_t *) * fdp->fd_nfiles; 937 memcpy(newofile, fdp->fd_ofiles, i); 938 memset((uint8_t *)newofile + i, 0, numfiles * sizeof(fdfile_t *) - i); 939 940 /* 941 * Link old ofiles array into list to be discarded. We defer 942 * freeing until process exit if the descriptor table is visble 943 * to other threads. 944 */ 945 if (oldnfiles > NDFILE) { 946 if ((fdp->fd_refcnt | p->p_nlwps) > 1) { 947 fdp->fd_ofiles[-2] = (void *)fdp->fd_discard; 948 fdp->fd_discard = fdp->fd_ofiles - 2; 949 } else { 950 fd_ofile_free(oldnfiles, fdp->fd_ofiles); 951 } 952 } 953 954 if (NDHISLOTS(numfiles) > NDHISLOTS(oldnfiles)) { 955 i = NDHISLOTS(oldnfiles) * sizeof(uint32_t); 956 memcpy(newhimap, fdp->fd_himap, i); 957 memset((uint8_t *)newhimap + i, 0, 958 NDHISLOTS(numfiles) * sizeof(uint32_t) - i); 959 960 i = NDLOSLOTS(oldnfiles) * sizeof(uint32_t); 961 memcpy(newlomap, fdp->fd_lomap, i); 962 memset((uint8_t *)newlomap + i, 0, 963 NDLOSLOTS(numfiles) * sizeof(uint32_t) - i); 964 965 if (NDHISLOTS(oldnfiles) > NDHISLOTS(NDFILE)) { 966 fd_map_free(oldnfiles, fdp->fd_lomap, fdp->fd_himap); 967 } 968 fdp->fd_himap = newhimap; 969 fdp->fd_lomap = newlomap; 970 } 971 972 /* 973 * All other modifications must become globally visible before 974 * the change to fd_nfiles. See fd_getfile(). 975 */ 976 fdp->fd_ofiles = newofile; 977 membar_producer(); 978 fdp->fd_nfiles = numfiles; 979 mutex_exit(&fdp->fd_lock); 980 981 KASSERT(fdp->fd_ofiles[0] == (fdfile_t *)fdp->fd_dfdfile[0]); 982 } 983 984 /* 985 * Create a new open file structure and allocate a file descriptor 986 * for the current process. 987 */ 988 int 989 fd_allocfile(file_t **resultfp, int *resultfd) 990 { 991 file_t *fp; 992 proc_t *p; 993 int error; 994 995 p = curproc; 996 997 while ((error = fd_alloc(p, 0, resultfd)) != 0) { 998 if (error != ENOSPC) { 999 return error; 1000 } 1001 fd_tryexpand(p); 1002 } 1003 1004 fp = pool_cache_get(file_cache, PR_WAITOK); 1005 KASSERT(fp->f_count == 0); 1006 KASSERT(fp->f_msgcount == 0); 1007 KASSERT(fp->f_unpcount == 0); 1008 fp->f_cred = kauth_cred_get(); 1009 kauth_cred_hold(fp->f_cred); 1010 1011 if (__predict_false(atomic_inc_uint_nv(&nfiles) >= maxfiles)) { 1012 fd_abort(p, fp, *resultfd); 1013 tablefull("file", "increase kern.maxfiles or MAXFILES"); 1014 return ENFILE; 1015 } 1016 1017 /* 1018 * Don't allow recycled files to be scanned. 1019 */ 1020 if ((fp->f_flag & FSCAN) != 0) { 1021 mutex_enter(&fp->f_lock); 1022 atomic_and_uint(&fp->f_flag, ~FSCAN); 1023 mutex_exit(&fp->f_lock); 1024 } 1025 1026 fp->f_advice = 0; 1027 fp->f_msgcount = 0; 1028 fp->f_offset = 0; 1029 *resultfp = fp; 1030 1031 return 0; 1032 } 1033 1034 /* 1035 * Successful creation of a new descriptor: make visible to the process. 1036 */ 1037 void 1038 fd_affix(proc_t *p, file_t *fp, unsigned fd) 1039 { 1040 fdfile_t *ff; 1041 filedesc_t *fdp; 1042 1043 KASSERT(p == curproc || p == &proc0); 1044 1045 /* Add a reference to the file structure. */ 1046 mutex_enter(&fp->f_lock); 1047 fp->f_count++; 1048 mutex_exit(&fp->f_lock); 1049 1050 /* 1051 * Insert the new file into the descriptor slot. 1052 * 1053 * The memory barriers provided by lock activity in this routine 1054 * ensure that any updates to the file structure become globally 1055 * visible before the file becomes visible to other LWPs in the 1056 * current process. 1057 */ 1058 fdp = p->p_fd; 1059 ff = fdp->fd_ofiles[fd]; 1060 1061 KASSERT(ff != NULL); 1062 KASSERT(ff->ff_file == NULL); 1063 KASSERT(ff->ff_allocated); 1064 KASSERT(fd_isused(fdp, fd)); 1065 KASSERT(fd >= NDFDFILE || 1066 fdp->fd_ofiles[fd] == (fdfile_t *)fdp->fd_dfdfile[fd]); 1067 1068 /* No need to lock in order to make file initially visible. */ 1069 ff->ff_file = fp; 1070 } 1071 1072 /* 1073 * Abort creation of a new descriptor: free descriptor slot and file. 1074 */ 1075 void 1076 fd_abort(proc_t *p, file_t *fp, unsigned fd) 1077 { 1078 filedesc_t *fdp; 1079 fdfile_t *ff; 1080 1081 KASSERT(p == curproc || p == &proc0); 1082 1083 fdp = p->p_fd; 1084 ff = fdp->fd_ofiles[fd]; 1085 1086 KASSERT(fd >= NDFDFILE || 1087 fdp->fd_ofiles[fd] == (fdfile_t *)fdp->fd_dfdfile[fd]); 1088 1089 mutex_enter(&fdp->fd_lock); 1090 KASSERT(fd_isused(fdp, fd)); 1091 fd_unused(fdp, fd); 1092 mutex_exit(&fdp->fd_lock); 1093 1094 if (fp != NULL) { 1095 ffree(fp); 1096 } 1097 } 1098 1099 /* 1100 * Free a file descriptor. 1101 */ 1102 void 1103 ffree(file_t *fp) 1104 { 1105 1106 KASSERT(fp->f_count == 0); 1107 1108 atomic_dec_uint(&nfiles); 1109 kauth_cred_free(fp->f_cred); 1110 pool_cache_put(file_cache, fp); 1111 } 1112 1113 static int 1114 file_ctor(void *arg, void *obj, int flags) 1115 { 1116 file_t *fp = obj; 1117 1118 memset(fp, 0, sizeof(*fp)); 1119 mutex_init(&fp->f_lock, MUTEX_DEFAULT, IPL_NONE); 1120 1121 mutex_enter(&filelist_lock); 1122 LIST_INSERT_HEAD(&filehead, fp, f_list); 1123 mutex_exit(&filelist_lock); 1124 1125 return 0; 1126 } 1127 1128 static void 1129 file_dtor(void *arg, void *obj) 1130 { 1131 file_t *fp = obj; 1132 1133 mutex_enter(&filelist_lock); 1134 LIST_REMOVE(fp, f_list); 1135 mutex_exit(&filelist_lock); 1136 1137 mutex_destroy(&fp->f_lock); 1138 } 1139 1140 static int 1141 fdfile_ctor(void *arg, void *obj, int flags) 1142 { 1143 fdfile_t *ff = obj; 1144 1145 memset(ff, 0, sizeof(*ff)); 1146 mutex_init(&ff->ff_lock, MUTEX_DEFAULT, IPL_NONE); 1147 cv_init(&ff->ff_closing, "fdclose"); 1148 1149 return 0; 1150 } 1151 1152 static void 1153 fdfile_dtor(void *arg, void *obj) 1154 { 1155 fdfile_t *ff = obj; 1156 1157 mutex_destroy(&ff->ff_lock); 1158 cv_destroy(&ff->ff_closing); 1159 } 1160 1161 file_t * 1162 fgetdummy(void) 1163 { 1164 file_t *fp; 1165 1166 fp = kmem_alloc(sizeof(*fp), KM_SLEEP); 1167 if (fp != NULL) { 1168 memset(fp, 0, sizeof(*fp)); 1169 mutex_init(&fp->f_lock, MUTEX_DEFAULT, IPL_NONE); 1170 } 1171 return fp; 1172 } 1173 1174 void 1175 fputdummy(file_t *fp) 1176 { 1177 1178 mutex_destroy(&fp->f_lock); 1179 kmem_free(fp, sizeof(*fp)); 1180 } 1181 1182 /* 1183 * Create an initial filedesc structure. 1184 */ 1185 filedesc_t * 1186 fd_init(filedesc_t *fdp) 1187 { 1188 unsigned fd; 1189 1190 if (fdp == NULL) { 1191 fdp = pool_cache_get(filedesc_cache, PR_WAITOK); 1192 } else { 1193 filedesc_ctor(NULL, fdp, PR_WAITOK); 1194 } 1195 1196 fdp->fd_refcnt = 1; 1197 fdp->fd_ofiles = fdp->fd_dfiles; 1198 fdp->fd_nfiles = NDFILE; 1199 fdp->fd_himap = fdp->fd_dhimap; 1200 fdp->fd_lomap = fdp->fd_dlomap; 1201 KASSERT(fdp->fd_lastfile == -1); 1202 KASSERT(fdp->fd_lastkqfile == -1); 1203 KASSERT(fdp->fd_knhash == NULL); 1204 1205 memset(&fdp->fd_startzero, 0, sizeof(*fdp) - 1206 offsetof(filedesc_t, fd_startzero)); 1207 for (fd = 0; fd < NDFDFILE; fd++) { 1208 fdp->fd_ofiles[fd] = (fdfile_t *)fdp->fd_dfdfile[fd]; 1209 } 1210 1211 return fdp; 1212 } 1213 1214 /* 1215 * Initialize a file descriptor table. 1216 */ 1217 static int 1218 filedesc_ctor(void *arg, void *obj, int flag) 1219 { 1220 filedesc_t *fdp = obj; 1221 int i; 1222 1223 memset(fdp, 0, sizeof(*fdp)); 1224 mutex_init(&fdp->fd_lock, MUTEX_DEFAULT, IPL_NONE); 1225 fdp->fd_lastfile = -1; 1226 fdp->fd_lastkqfile = -1; 1227 1228 CTASSERT(sizeof(fdp->fd_dfdfile[0]) >= sizeof(fdfile_t)); 1229 for (i = 0; i < NDFDFILE; i++) { 1230 fdfile_ctor(NULL, fdp->fd_dfdfile[i], PR_WAITOK); 1231 } 1232 1233 return 0; 1234 } 1235 1236 static void 1237 filedesc_dtor(void *arg, void *obj) 1238 { 1239 filedesc_t *fdp = obj; 1240 int i; 1241 1242 for (i = 0; i < NDFDFILE; i++) { 1243 fdfile_dtor(NULL, fdp->fd_dfdfile[i]); 1244 } 1245 1246 mutex_destroy(&fdp->fd_lock); 1247 } 1248 1249 /* 1250 * Make p2 share p1's filedesc structure. 1251 */ 1252 void 1253 fd_share(struct proc *p2) 1254 { 1255 filedesc_t *fdp; 1256 1257 fdp = curlwp->l_fd; 1258 p2->p_fd = fdp; 1259 atomic_inc_uint(&fdp->fd_refcnt); 1260 } 1261 1262 /* 1263 * Copy a filedesc structure. 1264 */ 1265 filedesc_t * 1266 fd_copy(void) 1267 { 1268 filedesc_t *newfdp, *fdp; 1269 fdfile_t *ff, *fflist, **ffp, **nffp, *ff2; 1270 int i, nused, numfiles, lastfile, j, newlast; 1271 file_t *fp; 1272 1273 fdp = curproc->p_fd; 1274 newfdp = pool_cache_get(filedesc_cache, PR_WAITOK); 1275 newfdp->fd_refcnt = 1; 1276 1277 KASSERT(newfdp->fd_knhash == NULL); 1278 KASSERT(newfdp->fd_knhashmask == 0); 1279 KASSERT(newfdp->fd_discard == NULL); 1280 1281 for (;;) { 1282 numfiles = fdp->fd_nfiles; 1283 lastfile = fdp->fd_lastfile; 1284 1285 /* 1286 * If the number of open files fits in the internal arrays 1287 * of the open file structure, use them, otherwise allocate 1288 * additional memory for the number of descriptors currently 1289 * in use. 1290 */ 1291 if (lastfile < NDFILE) { 1292 i = NDFILE; 1293 newfdp->fd_ofiles = newfdp->fd_dfiles; 1294 } else { 1295 /* 1296 * Compute the smallest multiple of NDEXTENT needed 1297 * for the file descriptors currently in use, 1298 * allowing the table to shrink. 1299 */ 1300 i = numfiles; 1301 while (i >= 2 * NDEXTENT && i > lastfile * 2) { 1302 i /= 2; 1303 } 1304 newfdp->fd_ofiles = fd_ofile_alloc(i); 1305 KASSERT(i > NDFILE); 1306 } 1307 if (NDHISLOTS(i) <= NDHISLOTS(NDFILE)) { 1308 newfdp->fd_himap = newfdp->fd_dhimap; 1309 newfdp->fd_lomap = newfdp->fd_dlomap; 1310 } else { 1311 fd_map_alloc(i, &newfdp->fd_lomap, 1312 &newfdp->fd_himap); 1313 } 1314 1315 /* 1316 * Allocate and string together fdfile structures. 1317 * We abuse fdfile_t::ff_file here, but it will be 1318 * cleared before this routine returns. 1319 */ 1320 nused = fdp->fd_nused; 1321 fflist = NULL; 1322 for (j = nused; j != 0; j--) { 1323 ff = pool_cache_get(fdfile_cache, PR_WAITOK); 1324 ff->ff_file = (void *)fflist; 1325 fflist = ff; 1326 } 1327 1328 mutex_enter(&fdp->fd_lock); 1329 if (numfiles == fdp->fd_nfiles && nused == fdp->fd_nused && 1330 lastfile == fdp->fd_lastfile) { 1331 break; 1332 } 1333 mutex_exit(&fdp->fd_lock); 1334 if (i > NDFILE) { 1335 fd_ofile_free(i, newfdp->fd_ofiles); 1336 } 1337 if (NDHISLOTS(i) > NDHISLOTS(NDFILE)) { 1338 fd_map_free(i, newfdp->fd_lomap, newfdp->fd_himap); 1339 } 1340 while (fflist != NULL) { 1341 ff = fflist; 1342 fflist = (void *)ff->ff_file; 1343 ff->ff_file = NULL; 1344 pool_cache_put(fdfile_cache, ff); 1345 } 1346 } 1347 1348 newfdp->fd_nfiles = i; 1349 newfdp->fd_freefile = fdp->fd_freefile; 1350 newfdp->fd_exclose = fdp->fd_exclose; 1351 1352 /* 1353 * Clear the entries that will not be copied over. 1354 * Avoid calling memset with 0 size. 1355 */ 1356 if (lastfile < (i-1)) { 1357 memset(newfdp->fd_ofiles + lastfile + 1, 0, 1358 (i - lastfile - 1) * sizeof(file_t **)); 1359 } 1360 if (i < NDENTRIES * NDENTRIES) { 1361 i = NDENTRIES * NDENTRIES; /* size of inlined bitmaps */ 1362 } 1363 memcpy(newfdp->fd_himap, fdp->fd_himap, NDHISLOTS(i)*sizeof(uint32_t)); 1364 memcpy(newfdp->fd_lomap, fdp->fd_lomap, NDLOSLOTS(i)*sizeof(uint32_t)); 1365 1366 ffp = fdp->fd_ofiles; 1367 nffp = newfdp->fd_ofiles; 1368 j = imax(lastfile, (NDFDFILE - 1)); 1369 newlast = -1; 1370 KASSERT(j < fdp->fd_nfiles); 1371 for (i = 0; i <= j; i++, ffp++, *nffp++ = ff2) { 1372 ff = *ffp; 1373 /* Install built-in fdfiles even if unused here. */ 1374 if (i < NDFDFILE) { 1375 ff2 = (fdfile_t *)newfdp->fd_dfdfile[i]; 1376 } else { 1377 ff2 = NULL; 1378 } 1379 /* Determine if descriptor is active in parent. */ 1380 if (ff == NULL || !fd_isused(fdp, i)) { 1381 KASSERT(ff != NULL || i >= NDFDFILE); 1382 continue; 1383 } 1384 mutex_enter(&ff->ff_lock); 1385 fp = ff->ff_file; 1386 if (fp == NULL) { 1387 /* Descriptor is half-open: free slot. */ 1388 fd_zap(newfdp, i); 1389 mutex_exit(&ff->ff_lock); 1390 continue; 1391 } 1392 if (fp->f_type == DTYPE_KQUEUE) { 1393 /* kqueue descriptors cannot be copied. */ 1394 fd_zap(newfdp, i); 1395 mutex_exit(&ff->ff_lock); 1396 continue; 1397 } 1398 /* It's active: add a reference to the file. */ 1399 mutex_enter(&fp->f_lock); 1400 fp->f_count++; 1401 mutex_exit(&fp->f_lock); 1402 /* Consume one fdfile_t to represent it. */ 1403 if (i >= NDFDFILE) { 1404 ff2 = fflist; 1405 fflist = (void *)ff2->ff_file; 1406 } 1407 ff2->ff_file = fp; 1408 ff2->ff_exclose = ff->ff_exclose; 1409 ff2->ff_allocated = true; 1410 mutex_exit(&ff->ff_lock); 1411 if (i > newlast) { 1412 newlast = i; 1413 } 1414 } 1415 mutex_exit(&fdp->fd_lock); 1416 1417 /* Discard unused fdfile_t structures. */ 1418 while (__predict_false(fflist != NULL)) { 1419 ff = fflist; 1420 fflist = (void *)ff->ff_file; 1421 ff->ff_file = NULL; 1422 pool_cache_put(fdfile_cache, ff); 1423 nused--; 1424 } 1425 KASSERT(nused >= 0); 1426 KASSERT(newfdp->fd_ofiles[0] == (fdfile_t *)newfdp->fd_dfdfile[0]); 1427 1428 newfdp->fd_nused = nused; 1429 newfdp->fd_lastfile = newlast; 1430 1431 return (newfdp); 1432 } 1433 1434 /* 1435 * Release a filedesc structure. 1436 */ 1437 void 1438 fd_free(void) 1439 { 1440 filedesc_t *fdp; 1441 fdfile_t *ff; 1442 file_t *fp; 1443 int fd, lastfd; 1444 void **discard; 1445 1446 fdp = curlwp->l_fd; 1447 1448 KASSERT(fdp->fd_ofiles[0] == (fdfile_t *)fdp->fd_dfdfile[0]); 1449 1450 if (atomic_dec_uint_nv(&fdp->fd_refcnt) > 0) 1451 return; 1452 1453 /* 1454 * Close any files that the process holds open. 1455 */ 1456 for (fd = 0, lastfd = fdp->fd_nfiles - 1; fd <= lastfd; fd++) { 1457 ff = fdp->fd_ofiles[fd]; 1458 KASSERT(fd >= NDFDFILE || 1459 ff == (fdfile_t *)fdp->fd_dfdfile[fd]); 1460 if ((ff = fdp->fd_ofiles[fd]) == NULL) 1461 continue; 1462 if ((fp = ff->ff_file) != NULL) { 1463 /* 1464 * Must use fd_close() here as kqueue holds 1465 * long term references to descriptors. 1466 */ 1467 ff->ff_refcnt++; 1468 fd_close(fd); 1469 } 1470 KASSERT(ff->ff_refcnt == 0); 1471 KASSERT(ff->ff_file == NULL); 1472 KASSERT(!ff->ff_exclose); 1473 KASSERT(!ff->ff_allocated); 1474 if (fd >= NDFDFILE) { 1475 pool_cache_put(fdfile_cache, ff); 1476 } 1477 } 1478 1479 /* 1480 * Clean out the descriptor table for the next user and return 1481 * to the cache. 1482 */ 1483 while ((discard = fdp->fd_discard) != NULL) { 1484 fdp->fd_discard = discard[0]; 1485 kmem_free(discard, (uintptr_t)discard[1]); 1486 } 1487 if (NDHISLOTS(fdp->fd_nfiles) > NDHISLOTS(NDFILE)) { 1488 KASSERT(fdp->fd_himap != fdp->fd_dhimap); 1489 KASSERT(fdp->fd_lomap != fdp->fd_dlomap); 1490 fd_map_free(fdp->fd_nfiles, fdp->fd_lomap, fdp->fd_himap); 1491 } 1492 if (fdp->fd_nfiles > NDFILE) { 1493 KASSERT(fdp->fd_ofiles != fdp->fd_dfiles); 1494 fd_ofile_free(fdp->fd_nfiles, fdp->fd_ofiles); 1495 } 1496 if (fdp->fd_knhash != NULL) { 1497 hashdone(fdp->fd_knhash, HASH_LIST, fdp->fd_knhashmask); 1498 fdp->fd_knhash = NULL; 1499 fdp->fd_knhashmask = 0; 1500 } else { 1501 KASSERT(fdp->fd_knhashmask == 0); 1502 } 1503 fdp->fd_lastkqfile = -1; 1504 pool_cache_put(filedesc_cache, fdp); 1505 } 1506 1507 /* 1508 * File Descriptor pseudo-device driver (/dev/fd/). 1509 * 1510 * Opening minor device N dup()s the file (if any) connected to file 1511 * descriptor N belonging to the calling process. Note that this driver 1512 * consists of only the ``open()'' routine, because all subsequent 1513 * references to this file will be direct to the other driver. 1514 */ 1515 static int 1516 filedescopen(dev_t dev, int mode, int type, lwp_t *l) 1517 { 1518 1519 /* 1520 * XXX Kludge: set dupfd to contain the value of the 1521 * the file descriptor being sought for duplication. The error 1522 * return ensures that the vnode for this device will be released 1523 * by vn_open. Open will detect this special error and take the 1524 * actions in dupfdopen below. Other callers of vn_open or VOP_OPEN 1525 * will simply report the error. 1526 */ 1527 l->l_dupfd = minor(dev); /* XXX */ 1528 return EDUPFD; 1529 } 1530 1531 /* 1532 * Duplicate the specified descriptor to a free descriptor. 1533 */ 1534 int 1535 fd_dupopen(int old, int *new, int mode, int error) 1536 { 1537 filedesc_t *fdp; 1538 fdfile_t *ff; 1539 file_t *fp; 1540 1541 if ((fp = fd_getfile(old)) == NULL) { 1542 return EBADF; 1543 } 1544 fdp = curlwp->l_fd; 1545 ff = fdp->fd_ofiles[old]; 1546 1547 /* 1548 * There are two cases of interest here. 1549 * 1550 * For EDUPFD simply dup (dfd) to file descriptor 1551 * (indx) and return. 1552 * 1553 * For EMOVEFD steal away the file structure from (dfd) and 1554 * store it in (indx). (dfd) is effectively closed by 1555 * this operation. 1556 * 1557 * Any other error code is just returned. 1558 */ 1559 switch (error) { 1560 case EDUPFD: 1561 /* 1562 * Check that the mode the file is being opened for is a 1563 * subset of the mode of the existing descriptor. 1564 */ 1565 if (((mode & (FREAD|FWRITE)) | fp->f_flag) != fp->f_flag) { 1566 error = EACCES; 1567 break; 1568 } 1569 1570 /* Copy it. */ 1571 error = fd_dup(fp, 0, new, fdp->fd_ofiles[old]->ff_exclose); 1572 break; 1573 1574 case EMOVEFD: 1575 /* Copy it. */ 1576 error = fd_dup(fp, 0, new, fdp->fd_ofiles[old]->ff_exclose); 1577 if (error != 0) { 1578 break; 1579 } 1580 1581 /* Steal away the file pointer from 'old'. */ 1582 (void)fd_close(old); 1583 return 0; 1584 } 1585 1586 fd_putfile(old); 1587 return error; 1588 } 1589 1590 /* 1591 * Sets descriptor owner. If the owner is a process, 'pgid' 1592 * is set to positive value, process ID. If the owner is process group, 1593 * 'pgid' is set to -pg_id. 1594 */ 1595 int 1596 fsetown(pid_t *pgid, u_long cmd, const void *data) 1597 { 1598 int id = *(const int *)data; 1599 int error; 1600 1601 switch (cmd) { 1602 case TIOCSPGRP: 1603 if (id < 0) 1604 return (EINVAL); 1605 id = -id; 1606 break; 1607 default: 1608 break; 1609 } 1610 1611 if (id > 0 && !pfind(id)) 1612 return (ESRCH); 1613 else if (id < 0 && (error = pgid_in_session(curproc, -id))) 1614 return (error); 1615 1616 *pgid = id; 1617 return (0); 1618 } 1619 1620 /* 1621 * Return descriptor owner information. If the value is positive, 1622 * it's process ID. If it's negative, it's process group ID and 1623 * needs the sign removed before use. 1624 */ 1625 int 1626 fgetown(pid_t pgid, u_long cmd, void *data) 1627 { 1628 1629 switch (cmd) { 1630 case TIOCGPGRP: 1631 *(int *)data = -pgid; 1632 break; 1633 default: 1634 *(int *)data = pgid; 1635 break; 1636 } 1637 return (0); 1638 } 1639 1640 /* 1641 * Send signal to descriptor owner, either process or process group. 1642 */ 1643 void 1644 fownsignal(pid_t pgid, int signo, int code, int band, void *fdescdata) 1645 { 1646 ksiginfo_t ksi; 1647 1648 KASSERT(!cpu_intr_p()); 1649 1650 if (pgid == 0) { 1651 return; 1652 } 1653 1654 KSI_INIT(&ksi); 1655 ksi.ksi_signo = signo; 1656 ksi.ksi_code = code; 1657 ksi.ksi_band = band; 1658 1659 mutex_enter(proc_lock); 1660 if (pgid > 0) { 1661 struct proc *p1; 1662 1663 p1 = p_find(pgid, PFIND_LOCKED); 1664 if (p1 != NULL) { 1665 kpsignal(p1, &ksi, fdescdata); 1666 } 1667 } else { 1668 struct pgrp *pgrp; 1669 1670 KASSERT(pgid < 0); 1671 pgrp = pg_find(-pgid, PFIND_LOCKED); 1672 if (pgrp != NULL) { 1673 kpgsignal(pgrp, &ksi, fdescdata, 0); 1674 } 1675 } 1676 mutex_exit(proc_lock); 1677 } 1678 1679 int 1680 fd_clone(file_t *fp, unsigned fd, int flag, const struct fileops *fops, 1681 void *data) 1682 { 1683 1684 fp->f_flag = flag; 1685 fp->f_type = DTYPE_MISC; 1686 fp->f_ops = fops; 1687 fp->f_data = data; 1688 curlwp->l_dupfd = fd; 1689 fd_affix(curproc, fp, fd); 1690 1691 return EMOVEFD; 1692 } 1693 1694 int 1695 fnullop_fcntl(file_t *fp, u_int cmd, void *data) 1696 { 1697 1698 if (cmd == F_SETFL) 1699 return 0; 1700 1701 return EOPNOTSUPP; 1702 } 1703 1704 int 1705 fnullop_poll(file_t *fp, int which) 1706 { 1707 1708 return 0; 1709 } 1710 1711 int 1712 fnullop_kqfilter(file_t *fp, struct knote *kn) 1713 { 1714 1715 return 0; 1716 } 1717 1718 void 1719 fnullop_drain(file_t *fp) 1720 { 1721 1722 } 1723 1724 int 1725 fbadop_read(file_t *fp, off_t *offset, struct uio *uio, 1726 kauth_cred_t cred, int flags) 1727 { 1728 1729 return EOPNOTSUPP; 1730 } 1731 1732 int 1733 fbadop_write(file_t *fp, off_t *offset, struct uio *uio, 1734 kauth_cred_t cred, int flags) 1735 { 1736 1737 return EOPNOTSUPP; 1738 } 1739 1740 int 1741 fbadop_ioctl(file_t *fp, u_long com, void *data) 1742 { 1743 1744 return EOPNOTSUPP; 1745 } 1746 1747 int 1748 fbadop_stat(file_t *fp, struct stat *sb) 1749 { 1750 1751 return EOPNOTSUPP; 1752 } 1753 1754 int 1755 fbadop_close(file_t *fp) 1756 { 1757 1758 return EOPNOTSUPP; 1759 } 1760