1 /* $NetBSD: vfs_trans.c,v 1.63 2020/05/17 19:34:07 ad Exp $ */ 2 3 /*- 4 * Copyright (c) 2007, 2020 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Juergen Hannken-Illjes. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 #include <sys/cdefs.h> 33 __KERNEL_RCSID(0, "$NetBSD: vfs_trans.c,v 1.63 2020/05/17 19:34:07 ad Exp $"); 34 35 /* 36 * File system transaction operations. 37 */ 38 39 #ifdef _KERNEL_OPT 40 #include "opt_ddb.h" 41 #endif 42 43 #include <sys/param.h> 44 #include <sys/systm.h> 45 #include <sys/atomic.h> 46 #include <sys/buf.h> 47 #include <sys/kmem.h> 48 #include <sys/mount.h> 49 #include <sys/pserialize.h> 50 #include <sys/vnode.h> 51 #include <sys/fstrans.h> 52 #include <sys/proc.h> 53 #include <sys/pool.h> 54 55 #include <miscfs/specfs/specdev.h> 56 57 enum fstrans_lock_type { 58 FSTRANS_LAZY, /* Granted while not suspended */ 59 FSTRANS_SHARED /* Granted while not suspending */ 60 }; 61 62 struct fscow_handler { 63 LIST_ENTRY(fscow_handler) ch_list; 64 int (*ch_func)(void *, struct buf *, bool); 65 void *ch_arg; 66 }; 67 struct fstrans_lwp_info { 68 struct fstrans_lwp_info *fli_succ; 69 struct lwp *fli_self; 70 struct mount *fli_mount; 71 struct fstrans_lwp_info *fli_alias; 72 struct fstrans_mount_info *fli_mountinfo; 73 int fli_trans_cnt; 74 int fli_alias_cnt; 75 int fli_cow_cnt; 76 enum fstrans_lock_type fli_lock_type; 77 LIST_ENTRY(fstrans_lwp_info) fli_list; 78 }; 79 struct fstrans_mount_info { 80 enum fstrans_state fmi_state; 81 unsigned int fmi_ref_cnt; 82 bool fmi_gone; 83 bool fmi_cow_change; 84 LIST_HEAD(, fscow_handler) fmi_cow_handler; 85 struct mount *fmi_mount; 86 struct lwp *fmi_owner; 87 }; 88 89 static kmutex_t vfs_suspend_lock /* Serialize suspensions. */ 90 __cacheline_aligned; 91 static kmutex_t fstrans_lock /* Fstrans big lock. */ 92 __cacheline_aligned; 93 static kcondvar_t fstrans_state_cv; /* Fstrans or cow state changed. */ 94 static kcondvar_t fstrans_count_cv; /* Fstrans or cow count changed. */ 95 static pserialize_t fstrans_psz; /* Pserialize state. */ 96 static LIST_HEAD(fstrans_lwp_head, fstrans_lwp_info) fstrans_fli_head; 97 /* List of all fstrans_lwp_info. */ 98 static pool_cache_t fstrans_lwp_cache; /* Cache of fstrans_lwp_info. */ 99 100 static int fstrans_gone_count; /* Number of fstrans_mount_info gone. */ 101 102 static void fstrans_mount_dtor(struct fstrans_mount_info *); 103 static void fstrans_clear_lwp_info(void); 104 static inline struct fstrans_lwp_info * 105 fstrans_get_lwp_info(struct mount *, bool); 106 static struct fstrans_lwp_info *fstrans_alloc_lwp_info(struct mount *); 107 static int fstrans_lwp_pcc(void *, void *, int); 108 static void fstrans_lwp_pcd(void *, void *); 109 static inline int _fstrans_start(struct mount *, enum fstrans_lock_type, int); 110 static bool grant_lock(const struct fstrans_mount_info *, 111 const enum fstrans_lock_type); 112 static bool state_change_done(const struct fstrans_mount_info *); 113 static bool cow_state_change_done(const struct fstrans_mount_info *); 114 static void cow_change_enter(struct fstrans_mount_info *); 115 static void cow_change_done(struct fstrans_mount_info *); 116 117 extern struct mount *dead_rootmount; 118 119 #if defined(DIAGNOSTIC) 120 121 struct fstrans_debug_mount { 122 struct mount *fdm_mount; 123 SLIST_ENTRY(fstrans_debug_mount) fdm_list; 124 }; 125 126 static SLIST_HEAD(, fstrans_debug_mount) fstrans_debug_mount_head = 127 SLIST_HEAD_INITIALIZER(fstrans_debug_mount_head); 128 129 static void 130 fstrans_debug_mount(struct mount *mp) 131 { 132 struct fstrans_debug_mount *fdm, *new; 133 134 KASSERT(mutex_owned(&fstrans_lock)); 135 136 mutex_exit(&fstrans_lock); 137 new = kmem_alloc(sizeof(*new), KM_SLEEP); 138 new->fdm_mount = mp; 139 mutex_enter(&fstrans_lock); 140 141 SLIST_FOREACH(fdm, &fstrans_debug_mount_head, fdm_list) 142 KASSERT(fdm->fdm_mount != mp); 143 SLIST_INSERT_HEAD(&fstrans_debug_mount_head, new, fdm_list); 144 } 145 146 static void 147 fstrans_debug_unmount(struct mount *mp) 148 { 149 struct fstrans_debug_mount *fdm; 150 151 KASSERT(mutex_owned(&fstrans_lock)); 152 153 SLIST_FOREACH(fdm, &fstrans_debug_mount_head, fdm_list) 154 if (fdm->fdm_mount == mp) 155 break; 156 KASSERT(fdm != NULL); 157 SLIST_REMOVE(&fstrans_debug_mount_head, fdm, 158 fstrans_debug_mount, fdm_list); 159 kmem_free(fdm, sizeof(*fdm)); 160 } 161 162 static void 163 fstrans_debug_validate_mount(struct mount *mp) 164 { 165 struct fstrans_debug_mount *fdm; 166 167 KASSERT(mutex_owned(&fstrans_lock)); 168 169 SLIST_FOREACH(fdm, &fstrans_debug_mount_head, fdm_list) 170 if (fdm->fdm_mount == mp) 171 break; 172 KASSERTMSG(fdm != NULL, "mount %p invalid", mp); 173 } 174 175 #else /* defined(DIAGNOSTIC) */ 176 177 #define fstrans_debug_mount(mp) 178 #define fstrans_debug_unmount(mp) 179 #define fstrans_debug_validate_mount(mp) 180 181 #endif /* defined(DIAGNOSTIC) */ 182 183 /* 184 * Initialize. 185 */ 186 void 187 fstrans_init(void) 188 { 189 190 mutex_init(&vfs_suspend_lock, MUTEX_DEFAULT, IPL_NONE); 191 mutex_init(&fstrans_lock, MUTEX_DEFAULT, IPL_NONE); 192 cv_init(&fstrans_state_cv, "fstchg"); 193 cv_init(&fstrans_count_cv, "fstcnt"); 194 fstrans_psz = pserialize_create(); 195 LIST_INIT(&fstrans_fli_head); 196 fstrans_lwp_cache = pool_cache_init(sizeof(struct fstrans_lwp_info), 197 coherency_unit, 0, 0, "fstlwp", NULL, IPL_NONE, 198 fstrans_lwp_pcc, fstrans_lwp_pcd, NULL); 199 KASSERT(fstrans_lwp_cache != NULL); 200 } 201 202 /* 203 * pool_cache constructor for fstrans_lwp_info. Updating the global list 204 * produces cache misses on MP. Minimise by keeping free entries on list. 205 */ 206 int 207 fstrans_lwp_pcc(void *arg, void *obj, int flags) 208 { 209 struct fstrans_lwp_info *fli = obj; 210 211 memset(fli, 0, sizeof(*fli)); 212 213 mutex_enter(&fstrans_lock); 214 LIST_INSERT_HEAD(&fstrans_fli_head, fli, fli_list); 215 mutex_exit(&fstrans_lock); 216 217 return 0; 218 } 219 220 /* 221 * pool_cache destructor 222 */ 223 void 224 fstrans_lwp_pcd(void *arg, void *obj) 225 { 226 struct fstrans_lwp_info *fli = obj; 227 228 mutex_enter(&fstrans_lock); 229 LIST_REMOVE(fli, fli_list); 230 mutex_exit(&fstrans_lock); 231 } 232 233 /* 234 * Deallocate lwp state. 235 */ 236 void 237 fstrans_lwp_dtor(lwp_t *l) 238 { 239 struct fstrans_lwp_info *fli, *fli_next; 240 241 if (l->l_fstrans == NULL) 242 return; 243 244 mutex_enter(&fstrans_lock); 245 for (fli = l->l_fstrans; fli; fli = fli_next) { 246 KASSERT(fli->fli_trans_cnt == 0); 247 KASSERT(fli->fli_cow_cnt == 0); 248 KASSERT(fli->fli_self == l); 249 if (fli->fli_mount != NULL) 250 fstrans_mount_dtor(fli->fli_mountinfo); 251 fli_next = fli->fli_succ; 252 fli->fli_alias_cnt = 0; 253 fli->fli_mount = NULL; 254 fli->fli_alias = NULL; 255 fli->fli_mountinfo = NULL; 256 fli->fli_self = NULL; 257 } 258 mutex_exit(&fstrans_lock); 259 260 for (fli = l->l_fstrans; fli; fli = fli_next) { 261 fli_next = fli->fli_succ; 262 pool_cache_put(fstrans_lwp_cache, fli); 263 } 264 l->l_fstrans = NULL; 265 } 266 267 /* 268 * Dereference mount state. 269 */ 270 static void 271 fstrans_mount_dtor(struct fstrans_mount_info *fmi) 272 { 273 274 KASSERT(mutex_owned(&fstrans_lock)); 275 276 KASSERT(fmi != NULL); 277 fmi->fmi_ref_cnt -= 1; 278 if (__predict_true(fmi->fmi_ref_cnt > 0)) { 279 return; 280 } 281 282 KASSERT(fmi->fmi_state == FSTRANS_NORMAL); 283 KASSERT(LIST_FIRST(&fmi->fmi_cow_handler) == NULL); 284 KASSERT(fmi->fmi_owner == NULL); 285 286 KASSERT(fstrans_gone_count > 0); 287 fstrans_gone_count -= 1; 288 289 kmem_free(fmi->fmi_mount, sizeof(*fmi->fmi_mount)); 290 kmem_free(fmi, sizeof(*fmi)); 291 } 292 293 /* 294 * Allocate mount state. 295 */ 296 int 297 fstrans_mount(struct mount *mp) 298 { 299 struct fstrans_mount_info *newfmi; 300 301 newfmi = kmem_alloc(sizeof(*newfmi), KM_SLEEP); 302 newfmi->fmi_state = FSTRANS_NORMAL; 303 newfmi->fmi_ref_cnt = 1; 304 newfmi->fmi_gone = false; 305 LIST_INIT(&newfmi->fmi_cow_handler); 306 newfmi->fmi_cow_change = false; 307 newfmi->fmi_mount = mp; 308 newfmi->fmi_owner = NULL; 309 310 mutex_enter(&fstrans_lock); 311 mp->mnt_transinfo = newfmi; 312 fstrans_debug_mount(mp); 313 mutex_exit(&fstrans_lock); 314 315 return 0; 316 } 317 318 /* 319 * Deallocate mount state. 320 */ 321 void 322 fstrans_unmount(struct mount *mp) 323 { 324 struct fstrans_mount_info *fmi = mp->mnt_transinfo; 325 326 KASSERT(fmi != NULL); 327 328 mutex_enter(&fstrans_lock); 329 fstrans_debug_unmount(mp); 330 fmi->fmi_gone = true; 331 mp->mnt_transinfo = NULL; 332 fstrans_gone_count += 1; 333 fstrans_mount_dtor(fmi); 334 mutex_exit(&fstrans_lock); 335 } 336 337 /* 338 * Clear mount entries whose mount is gone. 339 */ 340 static void 341 fstrans_clear_lwp_info(void) 342 { 343 struct fstrans_lwp_info **p, *fli, *tofree = NULL; 344 345 /* 346 * Scan our list clearing entries whose mount is gone. 347 */ 348 mutex_enter(&fstrans_lock); 349 for (p = &curlwp->l_fstrans; *p; ) { 350 fli = *p; 351 if (fli->fli_mount != NULL && 352 fli->fli_mountinfo->fmi_gone && 353 fli->fli_trans_cnt == 0 && 354 fli->fli_cow_cnt == 0 && 355 fli->fli_alias_cnt == 0) { 356 *p = (*p)->fli_succ; 357 fstrans_mount_dtor(fli->fli_mountinfo); 358 if (fli->fli_alias) { 359 KASSERT(fli->fli_alias->fli_alias_cnt > 0); 360 fli->fli_alias->fli_alias_cnt--; 361 } 362 fli->fli_mount = NULL; 363 fli->fli_alias = NULL; 364 fli->fli_mountinfo = NULL; 365 fli->fli_self = NULL; 366 p = &curlwp->l_fstrans; 367 fli->fli_succ = tofree; 368 tofree = fli; 369 } else { 370 p = &(*p)->fli_succ; 371 } 372 } 373 #ifdef DIAGNOSTIC 374 for (fli = curlwp->l_fstrans; fli; fli = fli->fli_succ) 375 if (fli->fli_alias != NULL) 376 KASSERT(fli->fli_alias->fli_self == curlwp); 377 #endif /* DIAGNOSTIC */ 378 mutex_exit(&fstrans_lock); 379 380 while (tofree != NULL) { 381 fli = tofree; 382 tofree = fli->fli_succ; 383 pool_cache_put(fstrans_lwp_cache, fli); 384 } 385 } 386 387 /* 388 * Allocate and return per lwp info for this mount. 389 */ 390 static struct fstrans_lwp_info * 391 fstrans_alloc_lwp_info(struct mount *mp) 392 { 393 struct fstrans_lwp_info *fli; 394 struct fstrans_mount_info *fmi; 395 396 for (fli = curlwp->l_fstrans; fli; fli = fli->fli_succ) { 397 if (fli->fli_mount == mp) 398 return fli; 399 } 400 401 /* 402 * Allocate a new entry. 403 */ 404 fli = pool_cache_get(fstrans_lwp_cache, PR_WAITOK); 405 KASSERT(fli->fli_trans_cnt == 0); 406 KASSERT(fli->fli_cow_cnt == 0); 407 KASSERT(fli->fli_alias_cnt == 0); 408 KASSERT(fli->fli_mount == NULL); 409 KASSERT(fli->fli_alias == NULL); 410 KASSERT(fli->fli_mountinfo == NULL); 411 KASSERT(fli->fli_self == NULL); 412 fli->fli_succ = curlwp->l_fstrans; 413 curlwp->l_fstrans = fli; 414 415 /* 416 * Attach the entry to the mount if its mnt_transinfo is valid. 417 */ 418 419 mutex_enter(&fstrans_lock); 420 fli->fli_self = curlwp; 421 fstrans_debug_validate_mount(mp); 422 fmi = mp->mnt_transinfo; 423 KASSERT(fmi != NULL); 424 fli->fli_mount = mp; 425 fli->fli_mountinfo = fmi; 426 fmi->fmi_ref_cnt += 1; 427 do { 428 mp = mp->mnt_lower; 429 } while (mp && mp->mnt_lower); 430 mutex_exit(&fstrans_lock); 431 432 if (mp) { 433 fli->fli_alias = fstrans_alloc_lwp_info(mp); 434 fli->fli_alias->fli_alias_cnt++; 435 fli = fli->fli_alias; 436 } 437 438 return fli; 439 } 440 441 /* 442 * Retrieve the per lwp info for this mount allocating if necessary. 443 */ 444 static inline struct fstrans_lwp_info * 445 fstrans_get_lwp_info(struct mount *mp, bool do_alloc) 446 { 447 struct fstrans_lwp_info *fli; 448 449 /* 450 * Scan our list for a match. 451 */ 452 for (fli = curlwp->l_fstrans; fli; fli = fli->fli_succ) { 453 if (fli->fli_mount == mp) { 454 KASSERT((mp->mnt_lower == NULL) == 455 (fli->fli_alias == NULL)); 456 if (fli->fli_alias != NULL) 457 fli = fli->fli_alias; 458 break; 459 } 460 } 461 462 if (do_alloc) { 463 if (__predict_false(fli == NULL)) 464 fli = fstrans_alloc_lwp_info(mp); 465 KASSERT(fli != NULL && !fli->fli_mountinfo->fmi_gone); 466 } else { 467 KASSERT(fli != NULL); 468 } 469 470 return fli; 471 } 472 473 /* 474 * Check if this lock type is granted at this state. 475 */ 476 static bool 477 grant_lock(const struct fstrans_mount_info *fmi, 478 const enum fstrans_lock_type type) 479 { 480 481 if (__predict_true(fmi->fmi_state == FSTRANS_NORMAL)) 482 return true; 483 if (fmi->fmi_owner == curlwp) 484 return true; 485 if (fmi->fmi_state == FSTRANS_SUSPENDING && type == FSTRANS_LAZY) 486 return true; 487 488 return false; 489 } 490 491 /* 492 * Start a transaction. If this thread already has a transaction on this 493 * file system increment the reference counter. 494 */ 495 static inline int 496 _fstrans_start(struct mount *mp, enum fstrans_lock_type lock_type, int wait) 497 { 498 int s; 499 struct fstrans_lwp_info *fli; 500 struct fstrans_mount_info *fmi; 501 502 #ifndef FSTRANS_DEAD_ENABLED 503 if (mp == dead_rootmount) 504 return 0; 505 #endif 506 507 ASSERT_SLEEPABLE(); 508 509 fli = fstrans_get_lwp_info(mp, true); 510 fmi = fli->fli_mountinfo; 511 512 if (fli->fli_trans_cnt > 0) { 513 fli->fli_trans_cnt += 1; 514 515 return 0; 516 } 517 518 s = pserialize_read_enter(); 519 if (__predict_true(grant_lock(fmi, lock_type))) { 520 fli->fli_trans_cnt = 1; 521 fli->fli_lock_type = lock_type; 522 pserialize_read_exit(s); 523 524 return 0; 525 } 526 pserialize_read_exit(s); 527 528 if (! wait) 529 return EBUSY; 530 531 mutex_enter(&fstrans_lock); 532 while (! grant_lock(fmi, lock_type)) 533 cv_wait(&fstrans_state_cv, &fstrans_lock); 534 fli->fli_trans_cnt = 1; 535 fli->fli_lock_type = lock_type; 536 mutex_exit(&fstrans_lock); 537 538 return 0; 539 } 540 541 void 542 fstrans_start(struct mount *mp) 543 { 544 int error __diagused; 545 546 error = _fstrans_start(mp, FSTRANS_SHARED, 1); 547 KASSERT(error == 0); 548 } 549 550 int 551 fstrans_start_nowait(struct mount *mp) 552 { 553 554 return _fstrans_start(mp, FSTRANS_SHARED, 0); 555 } 556 557 void 558 fstrans_start_lazy(struct mount *mp) 559 { 560 int error __diagused; 561 562 error = _fstrans_start(mp, FSTRANS_LAZY, 1); 563 KASSERT(error == 0); 564 } 565 566 /* 567 * Finish a transaction. 568 */ 569 void 570 fstrans_done(struct mount *mp) 571 { 572 int s; 573 struct fstrans_lwp_info *fli; 574 struct fstrans_mount_info *fmi; 575 576 #ifndef FSTRANS_DEAD_ENABLED 577 if (mp == dead_rootmount) 578 return; 579 #endif 580 581 fli = fstrans_get_lwp_info(mp, false); 582 fmi = fli->fli_mountinfo; 583 KASSERT(fli->fli_trans_cnt > 0); 584 585 if (fli->fli_trans_cnt > 1) { 586 fli->fli_trans_cnt -= 1; 587 588 return; 589 } 590 591 if (__predict_false(fstrans_gone_count > 0)) 592 fstrans_clear_lwp_info(); 593 594 s = pserialize_read_enter(); 595 if (__predict_true(fmi->fmi_state == FSTRANS_NORMAL)) { 596 fli->fli_trans_cnt = 0; 597 pserialize_read_exit(s); 598 599 return; 600 } 601 pserialize_read_exit(s); 602 603 mutex_enter(&fstrans_lock); 604 fli->fli_trans_cnt = 0; 605 cv_signal(&fstrans_count_cv); 606 mutex_exit(&fstrans_lock); 607 } 608 609 /* 610 * Check if we hold an lock. 611 */ 612 int 613 fstrans_held(struct mount *mp) 614 { 615 struct fstrans_lwp_info *fli; 616 struct fstrans_mount_info *fmi; 617 618 KASSERT(mp != dead_rootmount); 619 620 fli = fstrans_get_lwp_info(mp, true); 621 fmi = fli->fli_mountinfo; 622 623 return (fli->fli_trans_cnt > 0 || fmi->fmi_owner == curlwp); 624 } 625 626 /* 627 * Check if this thread has an exclusive lock. 628 */ 629 int 630 fstrans_is_owner(struct mount *mp) 631 { 632 struct fstrans_lwp_info *fli; 633 struct fstrans_mount_info *fmi; 634 635 KASSERT(mp != dead_rootmount); 636 637 fli = fstrans_get_lwp_info(mp, true); 638 fmi = fli->fli_mountinfo; 639 640 return (fmi->fmi_owner == curlwp); 641 } 642 643 /* 644 * True, if no thread is in a transaction not granted at the current state. 645 */ 646 static bool 647 state_change_done(const struct fstrans_mount_info *fmi) 648 { 649 struct fstrans_lwp_info *fli; 650 651 KASSERT(mutex_owned(&fstrans_lock)); 652 653 LIST_FOREACH(fli, &fstrans_fli_head, fli_list) { 654 if (fli->fli_mountinfo != fmi) 655 continue; 656 if (fli->fli_trans_cnt == 0) 657 continue; 658 if (fli->fli_self == curlwp) 659 continue; 660 if (grant_lock(fmi, fli->fli_lock_type)) 661 continue; 662 663 return false; 664 } 665 666 return true; 667 } 668 669 /* 670 * Set new file system state. 671 */ 672 int 673 fstrans_setstate(struct mount *mp, enum fstrans_state new_state) 674 { 675 int error; 676 enum fstrans_state old_state; 677 struct fstrans_lwp_info *fli; 678 struct fstrans_mount_info *fmi; 679 680 KASSERT(mp != dead_rootmount); 681 682 fli = fstrans_get_lwp_info(mp, true); 683 fmi = fli->fli_mountinfo; 684 old_state = fmi->fmi_state; 685 if (old_state == new_state) 686 return 0; 687 688 mutex_enter(&fstrans_lock); 689 fmi->fmi_state = new_state; 690 pserialize_perform(fstrans_psz); 691 692 /* 693 * All threads see the new state now. 694 * Wait for transactions invalid at this state to leave. 695 */ 696 error = 0; 697 while (! state_change_done(fmi)) { 698 error = cv_wait_sig(&fstrans_count_cv, &fstrans_lock); 699 if (error) { 700 new_state = fmi->fmi_state = FSTRANS_NORMAL; 701 break; 702 } 703 } 704 if (old_state != new_state) { 705 if (old_state == FSTRANS_NORMAL) { 706 KASSERT(fmi->fmi_owner == NULL); 707 fmi->fmi_owner = curlwp; 708 } 709 if (new_state == FSTRANS_NORMAL) { 710 KASSERT(fmi->fmi_owner == curlwp); 711 fmi->fmi_owner = NULL; 712 } 713 } 714 cv_broadcast(&fstrans_state_cv); 715 mutex_exit(&fstrans_lock); 716 717 return error; 718 } 719 720 /* 721 * Get current file system state. 722 */ 723 enum fstrans_state 724 fstrans_getstate(struct mount *mp) 725 { 726 struct fstrans_lwp_info *fli; 727 struct fstrans_mount_info *fmi; 728 729 KASSERT(mp != dead_rootmount); 730 731 fli = fstrans_get_lwp_info(mp, true); 732 fmi = fli->fli_mountinfo; 733 734 return fmi->fmi_state; 735 } 736 737 /* 738 * Request a filesystem to suspend all operations. 739 */ 740 int 741 vfs_suspend(struct mount *mp, int nowait) 742 { 743 struct fstrans_lwp_info *fli; 744 int error; 745 746 if (mp == dead_rootmount) 747 return EOPNOTSUPP; 748 749 fli = fstrans_get_lwp_info(mp, true); 750 mp = fli->fli_mount; 751 752 if (nowait) { 753 if (!mutex_tryenter(&vfs_suspend_lock)) 754 return EWOULDBLOCK; 755 } else 756 mutex_enter(&vfs_suspend_lock); 757 758 if ((error = VFS_SUSPENDCTL(mp, SUSPEND_SUSPEND)) != 0) 759 mutex_exit(&vfs_suspend_lock); 760 761 return error; 762 } 763 764 /* 765 * Request a filesystem to resume all operations. 766 */ 767 void 768 vfs_resume(struct mount *mp) 769 { 770 struct fstrans_lwp_info *fli; 771 772 KASSERT(mp != dead_rootmount); 773 774 fli = fstrans_get_lwp_info(mp, false); 775 mp = fli->fli_mount; 776 777 VFS_SUSPENDCTL(mp, SUSPEND_RESUME); 778 mutex_exit(&vfs_suspend_lock); 779 } 780 781 782 /* 783 * True, if no thread is running a cow handler. 784 */ 785 static bool 786 cow_state_change_done(const struct fstrans_mount_info *fmi) 787 { 788 struct fstrans_lwp_info *fli; 789 790 KASSERT(mutex_owned(&fstrans_lock)); 791 KASSERT(fmi->fmi_cow_change); 792 793 LIST_FOREACH(fli, &fstrans_fli_head, fli_list) { 794 if (fli->fli_mount != fmi->fmi_mount) 795 continue; 796 if (fli->fli_cow_cnt == 0) 797 continue; 798 799 return false; 800 } 801 802 return true; 803 } 804 805 /* 806 * Prepare for changing this mounts cow list. 807 * Returns with fstrans_lock locked. 808 */ 809 static void 810 cow_change_enter(struct fstrans_mount_info *fmi) 811 { 812 813 mutex_enter(&fstrans_lock); 814 815 /* 816 * Wait for other threads changing the list. 817 */ 818 while (fmi->fmi_cow_change) 819 cv_wait(&fstrans_state_cv, &fstrans_lock); 820 821 /* 822 * Wait until all threads are aware of a state change. 823 */ 824 fmi->fmi_cow_change = true; 825 pserialize_perform(fstrans_psz); 826 827 while (! cow_state_change_done(fmi)) 828 cv_wait(&fstrans_count_cv, &fstrans_lock); 829 } 830 831 /* 832 * Done changing this mounts cow list. 833 */ 834 static void 835 cow_change_done(struct fstrans_mount_info *fmi) 836 { 837 838 KASSERT(mutex_owned(&fstrans_lock)); 839 840 fmi->fmi_cow_change = false; 841 pserialize_perform(fstrans_psz); 842 843 cv_broadcast(&fstrans_state_cv); 844 845 mutex_exit(&fstrans_lock); 846 } 847 848 /* 849 * Add a handler to this mount. 850 */ 851 int 852 fscow_establish(struct mount *mp, int (*func)(void *, struct buf *, bool), 853 void *arg) 854 { 855 struct fstrans_mount_info *fmi; 856 struct fscow_handler *newch; 857 858 KASSERT(mp != dead_rootmount); 859 860 mutex_enter(&fstrans_lock); 861 fmi = mp->mnt_transinfo; 862 KASSERT(fmi != NULL); 863 fmi->fmi_ref_cnt += 1; 864 mutex_exit(&fstrans_lock); 865 866 newch = kmem_alloc(sizeof(*newch), KM_SLEEP); 867 newch->ch_func = func; 868 newch->ch_arg = arg; 869 870 cow_change_enter(fmi); 871 LIST_INSERT_HEAD(&fmi->fmi_cow_handler, newch, ch_list); 872 cow_change_done(fmi); 873 874 return 0; 875 } 876 877 /* 878 * Remove a handler from this mount. 879 */ 880 int 881 fscow_disestablish(struct mount *mp, int (*func)(void *, struct buf *, bool), 882 void *arg) 883 { 884 struct fstrans_mount_info *fmi; 885 struct fscow_handler *hp = NULL; 886 887 KASSERT(mp != dead_rootmount); 888 889 fmi = mp->mnt_transinfo; 890 KASSERT(fmi != NULL); 891 892 cow_change_enter(fmi); 893 LIST_FOREACH(hp, &fmi->fmi_cow_handler, ch_list) 894 if (hp->ch_func == func && hp->ch_arg == arg) 895 break; 896 if (hp != NULL) { 897 LIST_REMOVE(hp, ch_list); 898 kmem_free(hp, sizeof(*hp)); 899 } 900 fstrans_mount_dtor(fmi); 901 cow_change_done(fmi); 902 903 return hp ? 0 : EINVAL; 904 } 905 906 /* 907 * Check for need to copy block that is about to be written. 908 */ 909 int 910 fscow_run(struct buf *bp, bool data_valid) 911 { 912 int error, s; 913 struct mount *mp; 914 struct fstrans_lwp_info *fli; 915 struct fstrans_mount_info *fmi; 916 struct fscow_handler *hp; 917 918 /* 919 * First check if we need run the copy-on-write handler. 920 */ 921 if ((bp->b_flags & B_COWDONE)) 922 return 0; 923 if (bp->b_vp == NULL) { 924 bp->b_flags |= B_COWDONE; 925 return 0; 926 } 927 if (bp->b_vp->v_type == VBLK) 928 mp = spec_node_getmountedfs(bp->b_vp); 929 else 930 mp = bp->b_vp->v_mount; 931 if (mp == NULL || mp == dead_rootmount) { 932 bp->b_flags |= B_COWDONE; 933 return 0; 934 } 935 936 fli = fstrans_get_lwp_info(mp, true); 937 fmi = fli->fli_mountinfo; 938 939 /* 940 * On non-recursed run check if other threads 941 * want to change the list. 942 */ 943 if (fli->fli_cow_cnt == 0) { 944 s = pserialize_read_enter(); 945 if (__predict_false(fmi->fmi_cow_change)) { 946 pserialize_read_exit(s); 947 mutex_enter(&fstrans_lock); 948 while (fmi->fmi_cow_change) 949 cv_wait(&fstrans_state_cv, &fstrans_lock); 950 fli->fli_cow_cnt = 1; 951 mutex_exit(&fstrans_lock); 952 } else { 953 fli->fli_cow_cnt = 1; 954 pserialize_read_exit(s); 955 } 956 } else 957 fli->fli_cow_cnt += 1; 958 959 /* 960 * Run all copy-on-write handlers, stop on error. 961 */ 962 error = 0; 963 LIST_FOREACH(hp, &fmi->fmi_cow_handler, ch_list) 964 if ((error = (*hp->ch_func)(hp->ch_arg, bp, data_valid)) != 0) 965 break; 966 if (error == 0) 967 bp->b_flags |= B_COWDONE; 968 969 /* 970 * Check if other threads want to change the list. 971 */ 972 if (fli->fli_cow_cnt > 1) { 973 fli->fli_cow_cnt -= 1; 974 } else { 975 s = pserialize_read_enter(); 976 if (__predict_false(fmi->fmi_cow_change)) { 977 pserialize_read_exit(s); 978 mutex_enter(&fstrans_lock); 979 fli->fli_cow_cnt = 0; 980 cv_signal(&fstrans_count_cv); 981 mutex_exit(&fstrans_lock); 982 } else { 983 fli->fli_cow_cnt = 0; 984 pserialize_read_exit(s); 985 } 986 } 987 988 return error; 989 } 990 991 #if defined(DDB) 992 void fstrans_dump(int); 993 994 static void 995 fstrans_print_lwp(struct proc *p, struct lwp *l, int verbose) 996 { 997 char prefix[9]; 998 struct fstrans_lwp_info *fli; 999 1000 snprintf(prefix, sizeof(prefix), "%d.%d", p->p_pid, l->l_lid); 1001 LIST_FOREACH(fli, &fstrans_fli_head, fli_list) { 1002 if (fli->fli_self != l) 1003 continue; 1004 if (fli->fli_trans_cnt == 0 && fli->fli_cow_cnt == 0) { 1005 if (! verbose) 1006 continue; 1007 } 1008 printf("%-8s", prefix); 1009 if (verbose) 1010 printf(" @%p", fli); 1011 if (fli->fli_mount == dead_rootmount) 1012 printf(" <dead>"); 1013 else if (fli->fli_mount != NULL) 1014 printf(" (%s)", fli->fli_mount->mnt_stat.f_mntonname); 1015 else 1016 printf(" NULL"); 1017 if (fli->fli_alias != NULL) { 1018 struct mount *amp = fli->fli_alias->fli_mount; 1019 1020 printf(" alias"); 1021 if (verbose) 1022 printf(" @%p", fli->fli_alias); 1023 if (amp == NULL) 1024 printf(" NULL"); 1025 else 1026 printf(" (%s)", amp->mnt_stat.f_mntonname); 1027 } 1028 if (fli->fli_mountinfo && fli->fli_mountinfo->fmi_gone) 1029 printf(" gone"); 1030 if (fli->fli_trans_cnt == 0) { 1031 printf(" -"); 1032 } else { 1033 switch (fli->fli_lock_type) { 1034 case FSTRANS_LAZY: 1035 printf(" lazy"); 1036 break; 1037 case FSTRANS_SHARED: 1038 printf(" shared"); 1039 break; 1040 default: 1041 printf(" %#x", fli->fli_lock_type); 1042 break; 1043 } 1044 } 1045 printf(" %d cow %d alias %d\n", 1046 fli->fli_trans_cnt, fli->fli_cow_cnt, fli->fli_alias_cnt); 1047 prefix[0] = '\0'; 1048 } 1049 } 1050 1051 static void 1052 fstrans_print_mount(struct mount *mp, int verbose) 1053 { 1054 struct fstrans_mount_info *fmi; 1055 1056 fmi = mp->mnt_transinfo; 1057 if (!verbose && (fmi == NULL || fmi->fmi_state == FSTRANS_NORMAL)) 1058 return; 1059 1060 printf("%-16s ", mp->mnt_stat.f_mntonname); 1061 if (fmi == NULL) { 1062 printf("(null)\n"); 1063 return; 1064 } 1065 printf("owner %p ", fmi->fmi_owner); 1066 switch (fmi->fmi_state) { 1067 case FSTRANS_NORMAL: 1068 printf("state normal\n"); 1069 break; 1070 case FSTRANS_SUSPENDING: 1071 printf("state suspending\n"); 1072 break; 1073 case FSTRANS_SUSPENDED: 1074 printf("state suspended\n"); 1075 break; 1076 default: 1077 printf("state %#x\n", fmi->fmi_state); 1078 break; 1079 } 1080 } 1081 1082 void 1083 fstrans_dump(int full) 1084 { 1085 const struct proclist_desc *pd; 1086 struct proc *p; 1087 struct lwp *l; 1088 struct mount *mp; 1089 1090 printf("Fstrans locks by lwp:\n"); 1091 for (pd = proclists; pd->pd_list != NULL; pd++) 1092 PROCLIST_FOREACH(p, pd->pd_list) 1093 LIST_FOREACH(l, &p->p_lwps, l_sibling) 1094 fstrans_print_lwp(p, l, full == 1); 1095 1096 printf("Fstrans state by mount:\n"); 1097 for (mp = _mountlist_next(NULL); mp; mp = _mountlist_next(mp)) 1098 fstrans_print_mount(mp, full == 1); 1099 } 1100 #endif /* defined(DDB) */ 1101