1 /* $NetBSD: vfs_trans.c,v 1.61 2019/06/17 08:07:27 hannken Exp $ */ 2 3 /*- 4 * Copyright (c) 2007 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Juergen Hannken-Illjes. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 #include <sys/cdefs.h> 33 __KERNEL_RCSID(0, "$NetBSD: vfs_trans.c,v 1.61 2019/06/17 08:07:27 hannken Exp $"); 34 35 /* 36 * File system transaction operations. 37 */ 38 39 #ifdef _KERNEL_OPT 40 #include "opt_ddb.h" 41 #endif 42 43 #include <sys/param.h> 44 #include <sys/systm.h> 45 #include <sys/atomic.h> 46 #include <sys/buf.h> 47 #include <sys/kmem.h> 48 #include <sys/mount.h> 49 #include <sys/pserialize.h> 50 #include <sys/vnode.h> 51 #include <sys/fstrans.h> 52 #include <sys/proc.h> 53 54 #include <miscfs/specfs/specdev.h> 55 56 enum fstrans_lock_type { 57 FSTRANS_LAZY, /* Granted while not suspended */ 58 FSTRANS_SHARED /* Granted while not suspending */ 59 }; 60 61 struct fscow_handler { 62 LIST_ENTRY(fscow_handler) ch_list; 63 int (*ch_func)(void *, struct buf *, bool); 64 void *ch_arg; 65 }; 66 struct fstrans_lwp_info { 67 struct fstrans_lwp_info *fli_succ; 68 struct lwp *fli_self; 69 struct mount *fli_mount; 70 struct fstrans_lwp_info *fli_alias; 71 struct fstrans_mount_info *fli_mountinfo; 72 int fli_trans_cnt; 73 int fli_alias_cnt; 74 int fli_cow_cnt; 75 enum fstrans_lock_type fli_lock_type; 76 LIST_ENTRY(fstrans_lwp_info) fli_list; 77 }; 78 struct fstrans_mount_info { 79 enum fstrans_state fmi_state; 80 unsigned int fmi_ref_cnt; 81 bool fmi_gone; 82 bool fmi_cow_change; 83 LIST_HEAD(, fscow_handler) fmi_cow_handler; 84 struct mount *fmi_mount; 85 struct lwp *fmi_owner; 86 }; 87 88 static kmutex_t vfs_suspend_lock; /* Serialize suspensions. */ 89 static kmutex_t fstrans_lock; /* Fstrans big lock. */ 90 static kmutex_t fstrans_mount_lock; /* Fstrans mount big lock. */ 91 static kcondvar_t fstrans_state_cv; /* Fstrans or cow state changed. */ 92 static kcondvar_t fstrans_count_cv; /* Fstrans or cow count changed. */ 93 static pserialize_t fstrans_psz; /* Pserialize state. */ 94 static LIST_HEAD(fstrans_lwp_head, fstrans_lwp_info) fstrans_fli_head; 95 /* List of all fstrans_lwp_info. */ 96 static int fstrans_gone_count; /* Number of fstrans_mount_info gone. */ 97 98 static void fstrans_mount_dtor(struct fstrans_mount_info *); 99 static void fstrans_clear_lwp_info(void); 100 static inline struct fstrans_lwp_info * 101 fstrans_get_lwp_info(struct mount *, bool); 102 static struct fstrans_lwp_info *fstrans_alloc_lwp_info(struct mount *); 103 static inline int _fstrans_start(struct mount *, enum fstrans_lock_type, int); 104 static bool grant_lock(const struct fstrans_mount_info *, 105 const enum fstrans_lock_type); 106 static bool state_change_done(const struct fstrans_mount_info *); 107 static bool cow_state_change_done(const struct fstrans_mount_info *); 108 static void cow_change_enter(struct fstrans_mount_info *); 109 static void cow_change_done(struct fstrans_mount_info *); 110 111 extern struct mount *dead_rootmount; 112 113 #if defined(DIAGNOSTIC) 114 115 struct fstrans_debug_mount { 116 struct mount *fdm_mount; 117 SLIST_ENTRY(fstrans_debug_mount) fdm_list; 118 }; 119 120 static SLIST_HEAD(, fstrans_debug_mount) fstrans_debug_mount_head = 121 SLIST_HEAD_INITIALIZER(fstrans_debug_mount_head); 122 123 static void 124 fstrans_debug_mount(struct mount *mp) 125 { 126 struct fstrans_debug_mount *fdm, *new; 127 128 KASSERT(mutex_owned(&fstrans_mount_lock)); 129 130 mutex_exit(&fstrans_mount_lock); 131 new = kmem_alloc(sizeof(*new), KM_SLEEP); 132 new->fdm_mount = mp; 133 mutex_enter(&fstrans_mount_lock); 134 135 SLIST_FOREACH(fdm, &fstrans_debug_mount_head, fdm_list) 136 KASSERT(fdm->fdm_mount != mp); 137 SLIST_INSERT_HEAD(&fstrans_debug_mount_head, new, fdm_list); 138 } 139 140 static void 141 fstrans_debug_unmount(struct mount *mp) 142 { 143 struct fstrans_debug_mount *fdm; 144 145 KASSERT(mutex_owned(&fstrans_mount_lock)); 146 147 SLIST_FOREACH(fdm, &fstrans_debug_mount_head, fdm_list) 148 if (fdm->fdm_mount == mp) 149 break; 150 KASSERT(fdm != NULL); 151 SLIST_REMOVE(&fstrans_debug_mount_head, fdm, 152 fstrans_debug_mount, fdm_list); 153 kmem_free(fdm, sizeof(*fdm)); 154 } 155 156 static void 157 fstrans_debug_validate_mount(struct mount *mp) 158 { 159 struct fstrans_debug_mount *fdm; 160 161 KASSERT(mutex_owned(&fstrans_mount_lock)); 162 163 SLIST_FOREACH(fdm, &fstrans_debug_mount_head, fdm_list) 164 if (fdm->fdm_mount == mp) 165 break; 166 KASSERTMSG(fdm != NULL, "mount %p invalid", mp); 167 } 168 169 #else /* defined(DIAGNOSTIC) */ 170 171 #define fstrans_debug_mount(mp) 172 #define fstrans_debug_unmount(mp) 173 #define fstrans_debug_validate_mount(mp) 174 175 #endif /* defined(DIAGNOSTIC) */ 176 177 /* 178 * Initialize. 179 */ 180 void 181 fstrans_init(void) 182 { 183 184 mutex_init(&vfs_suspend_lock, MUTEX_DEFAULT, IPL_NONE); 185 mutex_init(&fstrans_lock, MUTEX_DEFAULT, IPL_NONE); 186 mutex_init(&fstrans_mount_lock, MUTEX_DEFAULT, IPL_NONE); 187 cv_init(&fstrans_state_cv, "fstchg"); 188 cv_init(&fstrans_count_cv, "fstcnt"); 189 fstrans_psz = pserialize_create(); 190 LIST_INIT(&fstrans_fli_head); 191 } 192 193 /* 194 * Deallocate lwp state. 195 */ 196 void 197 fstrans_lwp_dtor(lwp_t *l) 198 { 199 struct fstrans_lwp_info *fli, *fli_next; 200 201 for (fli = l->l_fstrans; fli; fli = fli_next) { 202 KASSERT(fli->fli_trans_cnt == 0); 203 KASSERT(fli->fli_cow_cnt == 0); 204 KASSERT(fli->fli_self == l); 205 if (fli->fli_mount != NULL) 206 fstrans_mount_dtor(fli->fli_mountinfo); 207 fli_next = fli->fli_succ; 208 fli->fli_alias_cnt = 0; 209 fli->fli_mount = NULL; 210 fli->fli_alias = NULL; 211 fli->fli_mountinfo = NULL; 212 membar_sync(); 213 fli->fli_self = NULL; 214 } 215 216 l->l_fstrans = NULL; 217 } 218 219 /* 220 * Dereference mount state. 221 */ 222 static void 223 fstrans_mount_dtor(struct fstrans_mount_info *fmi) 224 { 225 226 mutex_enter(&fstrans_mount_lock); 227 228 KASSERT(fmi != NULL); 229 fmi->fmi_ref_cnt -= 1; 230 if (fmi->fmi_ref_cnt > 0) { 231 mutex_exit(&fstrans_mount_lock); 232 return; 233 } 234 235 KASSERT(fmi->fmi_state == FSTRANS_NORMAL); 236 KASSERT(LIST_FIRST(&fmi->fmi_cow_handler) == NULL); 237 KASSERT(fmi->fmi_owner == NULL); 238 239 KASSERT(fstrans_gone_count > 0); 240 fstrans_gone_count -= 1; 241 242 mutex_exit(&fstrans_mount_lock); 243 244 kmem_free(fmi->fmi_mount, sizeof(*fmi->fmi_mount)); 245 kmem_free(fmi, sizeof(*fmi)); 246 } 247 248 /* 249 * Allocate mount state. 250 */ 251 int 252 fstrans_mount(struct mount *mp) 253 { 254 struct fstrans_mount_info *newfmi; 255 256 newfmi = kmem_alloc(sizeof(*newfmi), KM_SLEEP); 257 newfmi->fmi_state = FSTRANS_NORMAL; 258 newfmi->fmi_ref_cnt = 1; 259 newfmi->fmi_gone = false; 260 LIST_INIT(&newfmi->fmi_cow_handler); 261 newfmi->fmi_cow_change = false; 262 newfmi->fmi_mount = mp; 263 newfmi->fmi_owner = NULL; 264 265 mutex_enter(&fstrans_mount_lock); 266 mp->mnt_transinfo = newfmi; 267 fstrans_debug_mount(mp); 268 mutex_exit(&fstrans_mount_lock); 269 270 return 0; 271 } 272 273 /* 274 * Deallocate mount state. 275 */ 276 void 277 fstrans_unmount(struct mount *mp) 278 { 279 struct fstrans_mount_info *fmi = mp->mnt_transinfo; 280 281 KASSERT(fmi != NULL); 282 283 mutex_enter(&fstrans_mount_lock); 284 fstrans_debug_unmount(mp); 285 fmi->fmi_gone = true; 286 mp->mnt_transinfo = NULL; 287 fstrans_gone_count += 1; 288 mutex_exit(&fstrans_mount_lock); 289 290 fstrans_mount_dtor(fmi); 291 } 292 293 /* 294 * Clear mount entries whose mount is gone. 295 */ 296 static void 297 fstrans_clear_lwp_info(void) 298 { 299 struct fstrans_lwp_info **p, *fli; 300 301 /* 302 * Scan our list clearing entries whose mount is gone. 303 */ 304 for (p = &curlwp->l_fstrans; *p; ) { 305 fli = *p; 306 if (fli->fli_mount != NULL && 307 fli->fli_mountinfo->fmi_gone && 308 fli->fli_trans_cnt == 0 && 309 fli->fli_cow_cnt == 0 && 310 fli->fli_alias_cnt == 0) { 311 *p = (*p)->fli_succ; 312 fstrans_mount_dtor(fli->fli_mountinfo); 313 if (fli->fli_alias) { 314 KASSERT(fli->fli_alias->fli_alias_cnt > 0); 315 fli->fli_alias->fli_alias_cnt--; 316 } 317 fli->fli_mount = NULL; 318 fli->fli_alias = NULL; 319 fli->fli_mountinfo = NULL; 320 membar_sync(); 321 fli->fli_self = NULL; 322 p = &curlwp->l_fstrans; 323 } else { 324 p = &(*p)->fli_succ; 325 } 326 } 327 #ifdef DIAGNOSTIC 328 for (fli = curlwp->l_fstrans; fli; fli = fli->fli_succ) 329 if (fli->fli_alias != NULL) 330 KASSERT(fli->fli_alias->fli_self == curlwp); 331 #endif /* DIAGNOSTIC */ 332 } 333 334 /* 335 * Allocate and return per lwp info for this mount. 336 */ 337 static struct fstrans_lwp_info * 338 fstrans_alloc_lwp_info(struct mount *mp) 339 { 340 struct fstrans_lwp_info *fli; 341 struct fstrans_mount_info *fmi; 342 343 for (fli = curlwp->l_fstrans; fli; fli = fli->fli_succ) { 344 if (fli->fli_mount == mp) 345 return fli; 346 } 347 348 /* 349 * Try to reuse a cleared entry or allocate a new one. 350 */ 351 mutex_enter(&fstrans_lock); 352 LIST_FOREACH(fli, &fstrans_fli_head, fli_list) { 353 membar_sync(); 354 if (fli->fli_self == NULL) { 355 KASSERT(fli->fli_mount == NULL); 356 KASSERT(fli->fli_trans_cnt == 0); 357 KASSERT(fli->fli_cow_cnt == 0); 358 KASSERT(fli->fli_alias_cnt == 0); 359 fli->fli_self = curlwp; 360 fli->fli_succ = curlwp->l_fstrans; 361 curlwp->l_fstrans = fli; 362 break; 363 } 364 } 365 mutex_exit(&fstrans_lock); 366 367 if (fli == NULL) { 368 fli = kmem_alloc(sizeof(*fli), KM_SLEEP); 369 mutex_enter(&fstrans_lock); 370 memset(fli, 0, sizeof(*fli)); 371 fli->fli_self = curlwp; 372 LIST_INSERT_HEAD(&fstrans_fli_head, fli, fli_list); 373 mutex_exit(&fstrans_lock); 374 fli->fli_succ = curlwp->l_fstrans; 375 curlwp->l_fstrans = fli; 376 } 377 378 /* 379 * Attach the entry to the mount if its mnt_transinfo is valid. 380 */ 381 382 mutex_enter(&fstrans_mount_lock); 383 fstrans_debug_validate_mount(mp); 384 fmi = mp->mnt_transinfo; 385 KASSERT(fmi != NULL); 386 fli->fli_mount = mp; 387 fli->fli_mountinfo = fmi; 388 fmi->fmi_ref_cnt += 1; 389 do { 390 mp = mp->mnt_lower; 391 } while (mp && mp->mnt_lower); 392 mutex_exit(&fstrans_mount_lock); 393 394 if (mp) { 395 fli->fli_alias = fstrans_alloc_lwp_info(mp); 396 fli->fli_alias->fli_alias_cnt++; 397 fli = fli->fli_alias; 398 } 399 400 return fli; 401 } 402 403 /* 404 * Retrieve the per lwp info for this mount allocating if necessary. 405 */ 406 static inline struct fstrans_lwp_info * 407 fstrans_get_lwp_info(struct mount *mp, bool do_alloc) 408 { 409 struct fstrans_lwp_info *fli; 410 411 /* 412 * Scan our list for a match. 413 */ 414 for (fli = curlwp->l_fstrans; fli; fli = fli->fli_succ) { 415 if (fli->fli_mount == mp) { 416 KASSERT((mp->mnt_lower == NULL) == 417 (fli->fli_alias == NULL)); 418 if (fli->fli_alias != NULL) 419 fli = fli->fli_alias; 420 break; 421 } 422 } 423 424 if (do_alloc) { 425 if (__predict_false(fli == NULL)) 426 fli = fstrans_alloc_lwp_info(mp); 427 KASSERT(fli != NULL && !fli->fli_mountinfo->fmi_gone); 428 } else { 429 KASSERT(fli != NULL); 430 } 431 432 return fli; 433 } 434 435 /* 436 * Check if this lock type is granted at this state. 437 */ 438 static bool 439 grant_lock(const struct fstrans_mount_info *fmi, 440 const enum fstrans_lock_type type) 441 { 442 443 if (__predict_true(fmi->fmi_state == FSTRANS_NORMAL)) 444 return true; 445 if (fmi->fmi_owner == curlwp) 446 return true; 447 if (fmi->fmi_state == FSTRANS_SUSPENDING && type == FSTRANS_LAZY) 448 return true; 449 450 return false; 451 } 452 453 /* 454 * Start a transaction. If this thread already has a transaction on this 455 * file system increment the reference counter. 456 */ 457 static inline int 458 _fstrans_start(struct mount *mp, enum fstrans_lock_type lock_type, int wait) 459 { 460 int s; 461 struct fstrans_lwp_info *fli; 462 struct fstrans_mount_info *fmi; 463 464 #ifndef FSTRANS_DEAD_ENABLED 465 if (mp == dead_rootmount) 466 return 0; 467 #endif 468 469 ASSERT_SLEEPABLE(); 470 471 fli = fstrans_get_lwp_info(mp, true); 472 fmi = fli->fli_mountinfo; 473 474 if (fli->fli_trans_cnt > 0) { 475 fli->fli_trans_cnt += 1; 476 477 return 0; 478 } 479 480 s = pserialize_read_enter(); 481 if (__predict_true(grant_lock(fmi, lock_type))) { 482 fli->fli_trans_cnt = 1; 483 fli->fli_lock_type = lock_type; 484 pserialize_read_exit(s); 485 486 return 0; 487 } 488 pserialize_read_exit(s); 489 490 if (! wait) 491 return EBUSY; 492 493 mutex_enter(&fstrans_lock); 494 while (! grant_lock(fmi, lock_type)) 495 cv_wait(&fstrans_state_cv, &fstrans_lock); 496 fli->fli_trans_cnt = 1; 497 fli->fli_lock_type = lock_type; 498 mutex_exit(&fstrans_lock); 499 500 return 0; 501 } 502 503 void 504 fstrans_start(struct mount *mp) 505 { 506 int error __diagused; 507 508 error = _fstrans_start(mp, FSTRANS_SHARED, 1); 509 KASSERT(error == 0); 510 } 511 512 int 513 fstrans_start_nowait(struct mount *mp) 514 { 515 516 return _fstrans_start(mp, FSTRANS_SHARED, 0); 517 } 518 519 void 520 fstrans_start_lazy(struct mount *mp) 521 { 522 int error __diagused; 523 524 error = _fstrans_start(mp, FSTRANS_LAZY, 1); 525 KASSERT(error == 0); 526 } 527 528 /* 529 * Finish a transaction. 530 */ 531 void 532 fstrans_done(struct mount *mp) 533 { 534 int s; 535 struct fstrans_lwp_info *fli; 536 struct fstrans_mount_info *fmi; 537 538 #ifndef FSTRANS_DEAD_ENABLED 539 if (mp == dead_rootmount) 540 return; 541 #endif 542 543 fli = fstrans_get_lwp_info(mp, false); 544 fmi = fli->fli_mountinfo; 545 KASSERT(fli->fli_trans_cnt > 0); 546 547 if (fli->fli_trans_cnt > 1) { 548 fli->fli_trans_cnt -= 1; 549 550 return; 551 } 552 553 if (__predict_false(fstrans_gone_count > 0)) 554 fstrans_clear_lwp_info(); 555 556 s = pserialize_read_enter(); 557 if (__predict_true(fmi->fmi_state == FSTRANS_NORMAL)) { 558 fli->fli_trans_cnt = 0; 559 pserialize_read_exit(s); 560 561 return; 562 } 563 pserialize_read_exit(s); 564 565 mutex_enter(&fstrans_lock); 566 fli->fli_trans_cnt = 0; 567 cv_signal(&fstrans_count_cv); 568 mutex_exit(&fstrans_lock); 569 } 570 571 /* 572 * Check if this thread has an exclusive lock. 573 */ 574 int 575 fstrans_is_owner(struct mount *mp) 576 { 577 struct fstrans_lwp_info *fli; 578 struct fstrans_mount_info *fmi; 579 580 KASSERT(mp != dead_rootmount); 581 582 fli = fstrans_get_lwp_info(mp, true); 583 fmi = fli->fli_mountinfo; 584 585 return (fmi->fmi_owner == curlwp); 586 } 587 588 /* 589 * True, if no thread is in a transaction not granted at the current state. 590 */ 591 static bool 592 state_change_done(const struct fstrans_mount_info *fmi) 593 { 594 struct fstrans_lwp_info *fli; 595 596 KASSERT(mutex_owned(&fstrans_lock)); 597 598 LIST_FOREACH(fli, &fstrans_fli_head, fli_list) { 599 if (fli->fli_mountinfo != fmi) 600 continue; 601 if (fli->fli_trans_cnt == 0) 602 continue; 603 if (fli->fli_self == curlwp) 604 continue; 605 if (grant_lock(fmi, fli->fli_lock_type)) 606 continue; 607 608 return false; 609 } 610 611 return true; 612 } 613 614 /* 615 * Set new file system state. 616 */ 617 int 618 fstrans_setstate(struct mount *mp, enum fstrans_state new_state) 619 { 620 int error; 621 enum fstrans_state old_state; 622 struct fstrans_lwp_info *fli; 623 struct fstrans_mount_info *fmi; 624 625 KASSERT(mp != dead_rootmount); 626 627 fli = fstrans_get_lwp_info(mp, true); 628 fmi = fli->fli_mountinfo; 629 old_state = fmi->fmi_state; 630 if (old_state == new_state) 631 return 0; 632 633 mutex_enter(&fstrans_lock); 634 fmi->fmi_state = new_state; 635 pserialize_perform(fstrans_psz); 636 637 /* 638 * All threads see the new state now. 639 * Wait for transactions invalid at this state to leave. 640 */ 641 error = 0; 642 while (! state_change_done(fmi)) { 643 error = cv_wait_sig(&fstrans_count_cv, &fstrans_lock); 644 if (error) { 645 new_state = fmi->fmi_state = FSTRANS_NORMAL; 646 break; 647 } 648 } 649 if (old_state != new_state) { 650 if (old_state == FSTRANS_NORMAL) { 651 KASSERT(fmi->fmi_owner == NULL); 652 fmi->fmi_owner = curlwp; 653 } 654 if (new_state == FSTRANS_NORMAL) { 655 KASSERT(fmi->fmi_owner == curlwp); 656 fmi->fmi_owner = NULL; 657 } 658 } 659 cv_broadcast(&fstrans_state_cv); 660 mutex_exit(&fstrans_lock); 661 662 return error; 663 } 664 665 /* 666 * Get current file system state. 667 */ 668 enum fstrans_state 669 fstrans_getstate(struct mount *mp) 670 { 671 struct fstrans_lwp_info *fli; 672 struct fstrans_mount_info *fmi; 673 674 KASSERT(mp != dead_rootmount); 675 676 fli = fstrans_get_lwp_info(mp, true); 677 fmi = fli->fli_mountinfo; 678 679 return fmi->fmi_state; 680 } 681 682 /* 683 * Request a filesystem to suspend all operations. 684 */ 685 int 686 vfs_suspend(struct mount *mp, int nowait) 687 { 688 struct fstrans_lwp_info *fli; 689 int error; 690 691 if (mp == dead_rootmount) 692 return EOPNOTSUPP; 693 694 fli = fstrans_get_lwp_info(mp, true); 695 mp = fli->fli_mount; 696 697 if (nowait) { 698 if (!mutex_tryenter(&vfs_suspend_lock)) 699 return EWOULDBLOCK; 700 } else 701 mutex_enter(&vfs_suspend_lock); 702 703 if ((error = VFS_SUSPENDCTL(mp, SUSPEND_SUSPEND)) != 0) 704 mutex_exit(&vfs_suspend_lock); 705 706 return error; 707 } 708 709 /* 710 * Request a filesystem to resume all operations. 711 */ 712 void 713 vfs_resume(struct mount *mp) 714 { 715 struct fstrans_lwp_info *fli; 716 717 KASSERT(mp != dead_rootmount); 718 719 fli = fstrans_get_lwp_info(mp, false); 720 mp = fli->fli_mount; 721 722 VFS_SUSPENDCTL(mp, SUSPEND_RESUME); 723 mutex_exit(&vfs_suspend_lock); 724 } 725 726 727 /* 728 * True, if no thread is running a cow handler. 729 */ 730 static bool 731 cow_state_change_done(const struct fstrans_mount_info *fmi) 732 { 733 struct fstrans_lwp_info *fli; 734 735 KASSERT(mutex_owned(&fstrans_lock)); 736 KASSERT(fmi->fmi_cow_change); 737 738 LIST_FOREACH(fli, &fstrans_fli_head, fli_list) { 739 if (fli->fli_mount != fmi->fmi_mount) 740 continue; 741 if (fli->fli_cow_cnt == 0) 742 continue; 743 744 return false; 745 } 746 747 return true; 748 } 749 750 /* 751 * Prepare for changing this mounts cow list. 752 * Returns with fstrans_lock locked. 753 */ 754 static void 755 cow_change_enter(struct fstrans_mount_info *fmi) 756 { 757 758 mutex_enter(&fstrans_lock); 759 760 /* 761 * Wait for other threads changing the list. 762 */ 763 while (fmi->fmi_cow_change) 764 cv_wait(&fstrans_state_cv, &fstrans_lock); 765 766 /* 767 * Wait until all threads are aware of a state change. 768 */ 769 fmi->fmi_cow_change = true; 770 pserialize_perform(fstrans_psz); 771 772 while (! cow_state_change_done(fmi)) 773 cv_wait(&fstrans_count_cv, &fstrans_lock); 774 } 775 776 /* 777 * Done changing this mounts cow list. 778 */ 779 static void 780 cow_change_done(struct fstrans_mount_info *fmi) 781 { 782 783 KASSERT(mutex_owned(&fstrans_lock)); 784 785 fmi->fmi_cow_change = false; 786 pserialize_perform(fstrans_psz); 787 788 cv_broadcast(&fstrans_state_cv); 789 790 mutex_exit(&fstrans_lock); 791 } 792 793 /* 794 * Add a handler to this mount. 795 */ 796 int 797 fscow_establish(struct mount *mp, int (*func)(void *, struct buf *, bool), 798 void *arg) 799 { 800 struct fstrans_mount_info *fmi; 801 struct fscow_handler *newch; 802 803 KASSERT(mp != dead_rootmount); 804 805 mutex_enter(&fstrans_mount_lock); 806 fmi = mp->mnt_transinfo; 807 KASSERT(fmi != NULL); 808 fmi->fmi_ref_cnt += 1; 809 mutex_exit(&fstrans_mount_lock); 810 811 newch = kmem_alloc(sizeof(*newch), KM_SLEEP); 812 newch->ch_func = func; 813 newch->ch_arg = arg; 814 815 cow_change_enter(fmi); 816 LIST_INSERT_HEAD(&fmi->fmi_cow_handler, newch, ch_list); 817 cow_change_done(fmi); 818 819 return 0; 820 } 821 822 /* 823 * Remove a handler from this mount. 824 */ 825 int 826 fscow_disestablish(struct mount *mp, int (*func)(void *, struct buf *, bool), 827 void *arg) 828 { 829 struct fstrans_mount_info *fmi; 830 struct fscow_handler *hp = NULL; 831 832 KASSERT(mp != dead_rootmount); 833 834 fmi = mp->mnt_transinfo; 835 KASSERT(fmi != NULL); 836 837 cow_change_enter(fmi); 838 LIST_FOREACH(hp, &fmi->fmi_cow_handler, ch_list) 839 if (hp->ch_func == func && hp->ch_arg == arg) 840 break; 841 if (hp != NULL) { 842 LIST_REMOVE(hp, ch_list); 843 kmem_free(hp, sizeof(*hp)); 844 } 845 cow_change_done(fmi); 846 847 fstrans_mount_dtor(fmi); 848 849 return hp ? 0 : EINVAL; 850 } 851 852 /* 853 * Check for need to copy block that is about to be written. 854 */ 855 int 856 fscow_run(struct buf *bp, bool data_valid) 857 { 858 int error, s; 859 struct mount *mp; 860 struct fstrans_lwp_info *fli; 861 struct fstrans_mount_info *fmi; 862 struct fscow_handler *hp; 863 864 /* 865 * First check if we need run the copy-on-write handler. 866 */ 867 if ((bp->b_flags & B_COWDONE)) 868 return 0; 869 if (bp->b_vp == NULL) { 870 bp->b_flags |= B_COWDONE; 871 return 0; 872 } 873 if (bp->b_vp->v_type == VBLK) 874 mp = spec_node_getmountedfs(bp->b_vp); 875 else 876 mp = bp->b_vp->v_mount; 877 if (mp == NULL || mp == dead_rootmount) { 878 bp->b_flags |= B_COWDONE; 879 return 0; 880 } 881 882 fli = fstrans_get_lwp_info(mp, true); 883 fmi = fli->fli_mountinfo; 884 885 /* 886 * On non-recursed run check if other threads 887 * want to change the list. 888 */ 889 if (fli->fli_cow_cnt == 0) { 890 s = pserialize_read_enter(); 891 if (__predict_false(fmi->fmi_cow_change)) { 892 pserialize_read_exit(s); 893 mutex_enter(&fstrans_lock); 894 while (fmi->fmi_cow_change) 895 cv_wait(&fstrans_state_cv, &fstrans_lock); 896 fli->fli_cow_cnt = 1; 897 mutex_exit(&fstrans_lock); 898 } else { 899 fli->fli_cow_cnt = 1; 900 pserialize_read_exit(s); 901 } 902 } else 903 fli->fli_cow_cnt += 1; 904 905 /* 906 * Run all copy-on-write handlers, stop on error. 907 */ 908 error = 0; 909 LIST_FOREACH(hp, &fmi->fmi_cow_handler, ch_list) 910 if ((error = (*hp->ch_func)(hp->ch_arg, bp, data_valid)) != 0) 911 break; 912 if (error == 0) 913 bp->b_flags |= B_COWDONE; 914 915 /* 916 * Check if other threads want to change the list. 917 */ 918 if (fli->fli_cow_cnt > 1) { 919 fli->fli_cow_cnt -= 1; 920 } else { 921 s = pserialize_read_enter(); 922 if (__predict_false(fmi->fmi_cow_change)) { 923 pserialize_read_exit(s); 924 mutex_enter(&fstrans_lock); 925 fli->fli_cow_cnt = 0; 926 cv_signal(&fstrans_count_cv); 927 mutex_exit(&fstrans_lock); 928 } else { 929 fli->fli_cow_cnt = 0; 930 pserialize_read_exit(s); 931 } 932 } 933 934 return error; 935 } 936 937 #if defined(DDB) 938 void fstrans_dump(int); 939 940 static void 941 fstrans_print_lwp(struct proc *p, struct lwp *l, int verbose) 942 { 943 char prefix[9]; 944 struct fstrans_lwp_info *fli; 945 946 snprintf(prefix, sizeof(prefix), "%d.%d", p->p_pid, l->l_lid); 947 LIST_FOREACH(fli, &fstrans_fli_head, fli_list) { 948 if (fli->fli_self != l) 949 continue; 950 if (fli->fli_trans_cnt == 0 && fli->fli_cow_cnt == 0) { 951 if (! verbose) 952 continue; 953 } 954 printf("%-8s", prefix); 955 if (verbose) 956 printf(" @%p", fli); 957 if (fli->fli_mount == dead_rootmount) 958 printf(" <dead>"); 959 else if (fli->fli_mount != NULL) 960 printf(" (%s)", fli->fli_mount->mnt_stat.f_mntonname); 961 else 962 printf(" NULL"); 963 if (fli->fli_alias != NULL) { 964 struct mount *amp = fli->fli_alias->fli_mount; 965 966 printf(" alias"); 967 if (verbose) 968 printf(" @%p", fli->fli_alias); 969 if (amp == NULL) 970 printf(" NULL"); 971 else 972 printf(" (%s)", amp->mnt_stat.f_mntonname); 973 } 974 if (fli->fli_mountinfo && fli->fli_mountinfo->fmi_gone) 975 printf(" gone"); 976 if (fli->fli_trans_cnt == 0) { 977 printf(" -"); 978 } else { 979 switch (fli->fli_lock_type) { 980 case FSTRANS_LAZY: 981 printf(" lazy"); 982 break; 983 case FSTRANS_SHARED: 984 printf(" shared"); 985 break; 986 default: 987 printf(" %#x", fli->fli_lock_type); 988 break; 989 } 990 } 991 printf(" %d cow %d alias %d\n", 992 fli->fli_trans_cnt, fli->fli_cow_cnt, fli->fli_alias_cnt); 993 prefix[0] = '\0'; 994 } 995 } 996 997 static void 998 fstrans_print_mount(struct mount *mp, int verbose) 999 { 1000 struct fstrans_mount_info *fmi; 1001 1002 fmi = mp->mnt_transinfo; 1003 if (!verbose && (fmi == NULL || fmi->fmi_state == FSTRANS_NORMAL)) 1004 return; 1005 1006 printf("%-16s ", mp->mnt_stat.f_mntonname); 1007 if (fmi == NULL) { 1008 printf("(null)\n"); 1009 return; 1010 } 1011 printf("owner %p ", fmi->fmi_owner); 1012 switch (fmi->fmi_state) { 1013 case FSTRANS_NORMAL: 1014 printf("state normal\n"); 1015 break; 1016 case FSTRANS_SUSPENDING: 1017 printf("state suspending\n"); 1018 break; 1019 case FSTRANS_SUSPENDED: 1020 printf("state suspended\n"); 1021 break; 1022 default: 1023 printf("state %#x\n", fmi->fmi_state); 1024 break; 1025 } 1026 } 1027 1028 void 1029 fstrans_dump(int full) 1030 { 1031 const struct proclist_desc *pd; 1032 struct proc *p; 1033 struct lwp *l; 1034 struct mount *mp; 1035 1036 printf("Fstrans locks by lwp:\n"); 1037 for (pd = proclists; pd->pd_list != NULL; pd++) 1038 PROCLIST_FOREACH(p, pd->pd_list) 1039 LIST_FOREACH(l, &p->p_lwps, l_sibling) 1040 fstrans_print_lwp(p, l, full == 1); 1041 1042 printf("Fstrans state by mount:\n"); 1043 for (mp = _mountlist_next(NULL); mp; mp = _mountlist_next(mp)) 1044 fstrans_print_mount(mp, full == 1); 1045 } 1046 #endif /* defined(DDB) */ 1047