1 /* $NetBSD: vfs_trans.c,v 1.68 2022/08/22 09:13:08 hannken Exp $ */ 2 3 /*- 4 * Copyright (c) 2007, 2020 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Juergen Hannken-Illjes. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 #include <sys/cdefs.h> 33 __KERNEL_RCSID(0, "$NetBSD: vfs_trans.c,v 1.68 2022/08/22 09:13:08 hannken Exp $"); 34 35 /* 36 * File system transaction operations. 37 */ 38 39 #ifdef _KERNEL_OPT 40 #include "opt_ddb.h" 41 #endif 42 43 #include <sys/param.h> 44 #include <sys/systm.h> 45 #include <sys/atomic.h> 46 #include <sys/buf.h> 47 #include <sys/hash.h> 48 #include <sys/kmem.h> 49 #include <sys/mount.h> 50 #include <sys/pserialize.h> 51 #include <sys/vnode.h> 52 #include <sys/fstrans.h> 53 #include <sys/proc.h> 54 #include <sys/pool.h> 55 56 #include <miscfs/specfs/specdev.h> 57 58 #define FSTRANS_MOUNT_HASHSIZE 32 59 60 enum fstrans_lock_type { 61 FSTRANS_LAZY, /* Granted while not suspended */ 62 FSTRANS_SHARED /* Granted while not suspending */ 63 }; 64 65 struct fscow_handler { 66 LIST_ENTRY(fscow_handler) ch_list; 67 int (*ch_func)(void *, struct buf *, bool); 68 void *ch_arg; 69 }; 70 struct fstrans_lwp_info { 71 struct fstrans_lwp_info *fli_succ; 72 struct lwp *fli_self; 73 struct mount *fli_mount; 74 struct fstrans_lwp_info *fli_alias; 75 struct fstrans_mount_info *fli_mountinfo; 76 int fli_trans_cnt; 77 int fli_alias_cnt; 78 int fli_cow_cnt; 79 enum fstrans_lock_type fli_lock_type; 80 LIST_ENTRY(fstrans_lwp_info) fli_list; 81 }; 82 struct fstrans_mount_info { 83 enum fstrans_state fmi_state; 84 unsigned int fmi_ref_cnt; 85 bool fmi_gone; 86 bool fmi_cow_change; 87 SLIST_ENTRY(fstrans_mount_info) fmi_hash; 88 LIST_HEAD(, fscow_handler) fmi_cow_handler; 89 struct mount *fmi_mount; 90 struct fstrans_mount_info *fmi_lower_info; 91 struct lwp *fmi_owner; 92 }; 93 SLIST_HEAD(fstrans_mount_hashhead, fstrans_mount_info); 94 95 static kmutex_t vfs_suspend_lock /* Serialize suspensions. */ 96 __cacheline_aligned; 97 static kmutex_t fstrans_lock /* Fstrans big lock. */ 98 __cacheline_aligned; 99 static kcondvar_t fstrans_state_cv; /* Fstrans or cow state changed. */ 100 static kcondvar_t fstrans_count_cv; /* Fstrans or cow count changed. */ 101 static pserialize_t fstrans_psz; /* Pserialize state. */ 102 static LIST_HEAD(fstrans_lwp_head, fstrans_lwp_info) fstrans_fli_head; 103 /* List of all fstrans_lwp_info. */ 104 static pool_cache_t fstrans_lwp_cache; /* Cache of fstrans_lwp_info. */ 105 106 static u_long fstrans_mount_hashmask; 107 static struct fstrans_mount_hashhead *fstrans_mount_hashtab; 108 static int fstrans_gone_count; /* Number of fstrans_mount_info gone. */ 109 110 static inline uint32_t fstrans_mount_hash(struct mount *); 111 static inline struct fstrans_mount_info *fstrans_mount_get(struct mount *); 112 static void fstrans_mount_dtor(struct fstrans_mount_info *); 113 static void fstrans_clear_lwp_info(void); 114 static inline struct fstrans_lwp_info * 115 fstrans_get_lwp_info(struct mount *, bool); 116 static struct fstrans_lwp_info *fstrans_alloc_lwp_info(struct mount *); 117 static int fstrans_lwp_pcc(void *, void *, int); 118 static void fstrans_lwp_pcd(void *, void *); 119 static inline int _fstrans_start(struct mount *, enum fstrans_lock_type, int); 120 static bool grant_lock(const struct fstrans_mount_info *, 121 const enum fstrans_lock_type); 122 static bool state_change_done(const struct fstrans_mount_info *); 123 static bool cow_state_change_done(const struct fstrans_mount_info *); 124 static void cow_change_enter(struct fstrans_mount_info *); 125 static void cow_change_done(struct fstrans_mount_info *); 126 127 extern struct mount *dead_rootmount; 128 129 /* 130 * Initialize. 131 */ 132 void 133 fstrans_init(void) 134 { 135 136 mutex_init(&vfs_suspend_lock, MUTEX_DEFAULT, IPL_NONE); 137 mutex_init(&fstrans_lock, MUTEX_DEFAULT, IPL_NONE); 138 cv_init(&fstrans_state_cv, "fstchg"); 139 cv_init(&fstrans_count_cv, "fstcnt"); 140 fstrans_psz = pserialize_create(); 141 LIST_INIT(&fstrans_fli_head); 142 fstrans_lwp_cache = pool_cache_init(sizeof(struct fstrans_lwp_info), 143 coherency_unit, 0, 0, "fstlwp", NULL, IPL_NONE, 144 fstrans_lwp_pcc, fstrans_lwp_pcd, NULL); 145 KASSERT(fstrans_lwp_cache != NULL); 146 fstrans_mount_hashtab = hashinit(FSTRANS_MOUNT_HASHSIZE, HASH_SLIST, 147 true, &fstrans_mount_hashmask); 148 } 149 150 /* 151 * pool_cache constructor for fstrans_lwp_info. Updating the global list 152 * produces cache misses on MP. Minimise by keeping free entries on list. 153 */ 154 int 155 fstrans_lwp_pcc(void *arg, void *obj, int flags) 156 { 157 struct fstrans_lwp_info *fli = obj; 158 159 memset(fli, 0, sizeof(*fli)); 160 161 mutex_enter(&fstrans_lock); 162 LIST_INSERT_HEAD(&fstrans_fli_head, fli, fli_list); 163 mutex_exit(&fstrans_lock); 164 165 return 0; 166 } 167 168 /* 169 * pool_cache destructor 170 */ 171 void 172 fstrans_lwp_pcd(void *arg, void *obj) 173 { 174 struct fstrans_lwp_info *fli = obj; 175 176 mutex_enter(&fstrans_lock); 177 LIST_REMOVE(fli, fli_list); 178 mutex_exit(&fstrans_lock); 179 } 180 181 /* 182 * Deallocate lwp state. 183 */ 184 void 185 fstrans_lwp_dtor(lwp_t *l) 186 { 187 struct fstrans_lwp_info *fli, *fli_next; 188 189 if (l->l_fstrans == NULL) 190 return; 191 192 mutex_enter(&fstrans_lock); 193 for (fli = l->l_fstrans; fli; fli = fli_next) { 194 KASSERT(fli->fli_trans_cnt == 0); 195 KASSERT(fli->fli_cow_cnt == 0); 196 KASSERT(fli->fli_self == l); 197 if (fli->fli_mount != NULL) 198 fstrans_mount_dtor(fli->fli_mountinfo); 199 fli_next = fli->fli_succ; 200 fli->fli_alias_cnt = 0; 201 fli->fli_mount = NULL; 202 fli->fli_alias = NULL; 203 fli->fli_mountinfo = NULL; 204 fli->fli_self = NULL; 205 } 206 mutex_exit(&fstrans_lock); 207 208 for (fli = l->l_fstrans; fli; fli = fli_next) { 209 fli_next = fli->fli_succ; 210 pool_cache_put(fstrans_lwp_cache, fli); 211 } 212 l->l_fstrans = NULL; 213 } 214 215 /* 216 * mount pointer to hash 217 */ 218 static inline uint32_t 219 fstrans_mount_hash(struct mount *mp) 220 { 221 222 return hash32_buf(&mp, sizeof(mp), HASH32_BUF_INIT) & 223 fstrans_mount_hashmask; 224 } 225 226 /* 227 * retrieve fstrans_mount_info by mount or NULL 228 */ 229 static inline struct fstrans_mount_info * 230 fstrans_mount_get(struct mount *mp) 231 { 232 uint32_t indx; 233 struct fstrans_mount_info *fmi, *fmi_lower; 234 235 KASSERT(mutex_owned(&fstrans_lock)); 236 237 indx = fstrans_mount_hash(mp); 238 SLIST_FOREACH(fmi, &fstrans_mount_hashtab[indx], fmi_hash) { 239 if (fmi->fmi_mount == mp) { 240 if (__predict_false(mp->mnt_lower != NULL && 241 fmi->fmi_lower_info == NULL)) { 242 /* 243 * Intern the lower/lowest mount into 244 * this mount info on first lookup. 245 */ 246 KASSERT(fmi->fmi_ref_cnt == 1); 247 248 fmi_lower = fstrans_mount_get(mp->mnt_lower); 249 if (fmi_lower && fmi_lower->fmi_lower_info) 250 fmi_lower = fmi_lower->fmi_lower_info; 251 if (fmi_lower == NULL) 252 return NULL; 253 fmi->fmi_lower_info = fmi_lower; 254 fmi->fmi_lower_info->fmi_ref_cnt += 1; 255 } 256 return fmi; 257 } 258 } 259 260 return NULL; 261 } 262 263 /* 264 * Dereference mount state. 265 */ 266 static void 267 fstrans_mount_dtor(struct fstrans_mount_info *fmi) 268 { 269 270 KASSERT(mutex_owned(&fstrans_lock)); 271 272 KASSERT(fmi != NULL); 273 fmi->fmi_ref_cnt -= 1; 274 if (__predict_true(fmi->fmi_ref_cnt > 0)) { 275 return; 276 } 277 278 KASSERT(fmi->fmi_state == FSTRANS_NORMAL); 279 KASSERT(LIST_FIRST(&fmi->fmi_cow_handler) == NULL); 280 KASSERT(fmi->fmi_owner == NULL); 281 282 if (fmi->fmi_lower_info) 283 fstrans_mount_dtor(fmi->fmi_lower_info); 284 285 KASSERT(fstrans_gone_count > 0); 286 fstrans_gone_count -= 1; 287 288 kmem_free(fmi->fmi_mount, sizeof(*fmi->fmi_mount)); 289 kmem_free(fmi, sizeof(*fmi)); 290 } 291 292 /* 293 * Allocate mount state. 294 */ 295 int 296 fstrans_mount(struct mount *mp) 297 { 298 uint32_t indx; 299 struct fstrans_mount_info *newfmi; 300 301 indx = fstrans_mount_hash(mp); 302 303 newfmi = kmem_alloc(sizeof(*newfmi), KM_SLEEP); 304 newfmi->fmi_state = FSTRANS_NORMAL; 305 newfmi->fmi_ref_cnt = 1; 306 newfmi->fmi_gone = false; 307 LIST_INIT(&newfmi->fmi_cow_handler); 308 newfmi->fmi_cow_change = false; 309 newfmi->fmi_mount = mp; 310 newfmi->fmi_lower_info = NULL; 311 newfmi->fmi_owner = NULL; 312 313 mutex_enter(&fstrans_lock); 314 SLIST_INSERT_HEAD(&fstrans_mount_hashtab[indx], newfmi, fmi_hash); 315 mutex_exit(&fstrans_lock); 316 317 return 0; 318 } 319 320 /* 321 * Deallocate mount state. 322 */ 323 void 324 fstrans_unmount(struct mount *mp) 325 { 326 uint32_t indx; 327 struct fstrans_mount_info *fmi; 328 329 indx = fstrans_mount_hash(mp); 330 331 mutex_enter(&fstrans_lock); 332 fmi = fstrans_mount_get(mp); 333 KASSERT(fmi != NULL); 334 fmi->fmi_gone = true; 335 SLIST_REMOVE(&fstrans_mount_hashtab[indx], 336 fmi, fstrans_mount_info, fmi_hash); 337 fstrans_gone_count += 1; 338 fstrans_mount_dtor(fmi); 339 mutex_exit(&fstrans_lock); 340 } 341 342 /* 343 * Clear mount entries whose mount is gone. 344 */ 345 static void 346 fstrans_clear_lwp_info(void) 347 { 348 struct fstrans_lwp_info **p, *fli, *tofree = NULL; 349 350 /* 351 * Scan our list clearing entries whose mount is gone. 352 */ 353 mutex_enter(&fstrans_lock); 354 for (p = &curlwp->l_fstrans; *p; ) { 355 fli = *p; 356 if (fli->fli_mount != NULL && 357 fli->fli_mountinfo->fmi_gone && 358 fli->fli_trans_cnt == 0 && 359 fli->fli_cow_cnt == 0 && 360 fli->fli_alias_cnt == 0) { 361 *p = (*p)->fli_succ; 362 fstrans_mount_dtor(fli->fli_mountinfo); 363 if (fli->fli_alias) { 364 KASSERT(fli->fli_alias->fli_alias_cnt > 0); 365 fli->fli_alias->fli_alias_cnt--; 366 } 367 fli->fli_mount = NULL; 368 fli->fli_alias = NULL; 369 fli->fli_mountinfo = NULL; 370 fli->fli_self = NULL; 371 p = &curlwp->l_fstrans; 372 fli->fli_succ = tofree; 373 tofree = fli; 374 } else { 375 p = &(*p)->fli_succ; 376 } 377 } 378 #ifdef DIAGNOSTIC 379 for (fli = curlwp->l_fstrans; fli; fli = fli->fli_succ) 380 if (fli->fli_alias != NULL) 381 KASSERT(fli->fli_alias->fli_self == curlwp); 382 #endif /* DIAGNOSTIC */ 383 mutex_exit(&fstrans_lock); 384 385 while (tofree != NULL) { 386 fli = tofree; 387 tofree = fli->fli_succ; 388 pool_cache_put(fstrans_lwp_cache, fli); 389 } 390 } 391 392 /* 393 * Allocate and return per lwp info for this mount. 394 */ 395 static struct fstrans_lwp_info * 396 fstrans_alloc_lwp_info(struct mount *mp) 397 { 398 struct fstrans_lwp_info *fli, *fli_lower; 399 struct fstrans_mount_info *fmi; 400 401 for (fli = curlwp->l_fstrans; fli; fli = fli->fli_succ) { 402 if (fli->fli_mount == mp) 403 return fli; 404 } 405 406 /* 407 * Lookup mount info and get lower mount per lwp info. 408 */ 409 mutex_enter(&fstrans_lock); 410 fmi = fstrans_mount_get(mp); 411 if (fmi == NULL) { 412 mutex_exit(&fstrans_lock); 413 return NULL; 414 } 415 fmi->fmi_ref_cnt += 1; 416 mutex_exit(&fstrans_lock); 417 418 if (fmi->fmi_lower_info) { 419 fli_lower = 420 fstrans_alloc_lwp_info(fmi->fmi_lower_info->fmi_mount); 421 if (fli_lower == NULL) { 422 mutex_enter(&fstrans_lock); 423 fstrans_mount_dtor(fmi); 424 mutex_exit(&fstrans_lock); 425 426 return NULL; 427 } 428 } else { 429 fli_lower = NULL; 430 } 431 432 /* 433 * Allocate a new entry. 434 */ 435 fli = pool_cache_get(fstrans_lwp_cache, PR_WAITOK); 436 KASSERT(fli->fli_trans_cnt == 0); 437 KASSERT(fli->fli_cow_cnt == 0); 438 KASSERT(fli->fli_alias_cnt == 0); 439 KASSERT(fli->fli_mount == NULL); 440 KASSERT(fli->fli_alias == NULL); 441 KASSERT(fli->fli_mountinfo == NULL); 442 KASSERT(fli->fli_self == NULL); 443 444 /* 445 * Attach the mount info and alias. 446 */ 447 448 fli->fli_self = curlwp; 449 fli->fli_mount = mp; 450 fli->fli_mountinfo = fmi; 451 452 fli->fli_succ = curlwp->l_fstrans; 453 curlwp->l_fstrans = fli; 454 455 if (fli_lower) { 456 fli->fli_alias = fli_lower; 457 fli->fli_alias->fli_alias_cnt++; 458 fli = fli->fli_alias; 459 } 460 461 return fli; 462 } 463 464 /* 465 * Retrieve the per lwp info for this mount allocating if necessary. 466 */ 467 static inline struct fstrans_lwp_info * 468 fstrans_get_lwp_info(struct mount *mp, bool do_alloc) 469 { 470 struct fstrans_lwp_info *fli; 471 472 /* 473 * Scan our list for a match. 474 */ 475 for (fli = curlwp->l_fstrans; fli; fli = fli->fli_succ) { 476 if (fli->fli_mount == mp) { 477 KASSERT((mp->mnt_lower == NULL) == 478 (fli->fli_alias == NULL)); 479 if (fli->fli_alias != NULL) 480 fli = fli->fli_alias; 481 break; 482 } 483 } 484 485 if (do_alloc) { 486 if (__predict_false(fli == NULL)) 487 fli = fstrans_alloc_lwp_info(mp); 488 } 489 490 return fli; 491 } 492 493 /* 494 * Check if this lock type is granted at this state. 495 */ 496 static bool 497 grant_lock(const struct fstrans_mount_info *fmi, 498 const enum fstrans_lock_type type) 499 { 500 501 if (__predict_true(fmi->fmi_state == FSTRANS_NORMAL)) 502 return true; 503 if (fmi->fmi_owner == curlwp) 504 return true; 505 if (fmi->fmi_state == FSTRANS_SUSPENDING && type == FSTRANS_LAZY) 506 return true; 507 508 return false; 509 } 510 511 /* 512 * Start a transaction. If this thread already has a transaction on this 513 * file system increment the reference counter. 514 */ 515 static inline int 516 _fstrans_start(struct mount *mp, enum fstrans_lock_type lock_type, int wait) 517 { 518 int s; 519 struct fstrans_lwp_info *fli; 520 struct fstrans_mount_info *fmi; 521 522 ASSERT_SLEEPABLE(); 523 524 fli = fstrans_get_lwp_info(mp, true); 525 if (fli == NULL) 526 return 0; 527 fmi = fli->fli_mountinfo; 528 529 if (fli->fli_trans_cnt > 0) { 530 fli->fli_trans_cnt += 1; 531 532 return 0; 533 } 534 535 s = pserialize_read_enter(); 536 if (__predict_true(grant_lock(fmi, lock_type))) { 537 fli->fli_trans_cnt = 1; 538 fli->fli_lock_type = lock_type; 539 pserialize_read_exit(s); 540 541 return 0; 542 } 543 pserialize_read_exit(s); 544 545 if (! wait) 546 return EBUSY; 547 548 mutex_enter(&fstrans_lock); 549 while (! grant_lock(fmi, lock_type)) 550 cv_wait(&fstrans_state_cv, &fstrans_lock); 551 fli->fli_trans_cnt = 1; 552 fli->fli_lock_type = lock_type; 553 mutex_exit(&fstrans_lock); 554 555 return 0; 556 } 557 558 void 559 fstrans_start(struct mount *mp) 560 { 561 int error __diagused; 562 563 error = _fstrans_start(mp, FSTRANS_SHARED, 1); 564 KASSERT(error == 0); 565 } 566 567 int 568 fstrans_start_nowait(struct mount *mp) 569 { 570 571 return _fstrans_start(mp, FSTRANS_SHARED, 0); 572 } 573 574 void 575 fstrans_start_lazy(struct mount *mp) 576 { 577 int error __diagused; 578 579 error = _fstrans_start(mp, FSTRANS_LAZY, 1); 580 KASSERT(error == 0); 581 } 582 583 /* 584 * Finish a transaction. 585 */ 586 void 587 fstrans_done(struct mount *mp) 588 { 589 int s; 590 struct fstrans_lwp_info *fli; 591 struct fstrans_mount_info *fmi; 592 593 fli = fstrans_get_lwp_info(mp, false); 594 if (fli == NULL) 595 return; 596 fmi = fli->fli_mountinfo; 597 KASSERT(fli->fli_trans_cnt > 0); 598 599 if (fli->fli_trans_cnt > 1) { 600 fli->fli_trans_cnt -= 1; 601 602 return; 603 } 604 605 if (__predict_false(fstrans_gone_count > 0)) 606 fstrans_clear_lwp_info(); 607 608 s = pserialize_read_enter(); 609 if (__predict_true(fmi->fmi_state == FSTRANS_NORMAL)) { 610 fli->fli_trans_cnt = 0; 611 pserialize_read_exit(s); 612 613 return; 614 } 615 pserialize_read_exit(s); 616 617 mutex_enter(&fstrans_lock); 618 fli->fli_trans_cnt = 0; 619 cv_signal(&fstrans_count_cv); 620 mutex_exit(&fstrans_lock); 621 } 622 623 /* 624 * Check if we hold an lock. 625 */ 626 int 627 fstrans_held(struct mount *mp) 628 { 629 struct fstrans_lwp_info *fli; 630 struct fstrans_mount_info *fmi; 631 632 KASSERT(mp != dead_rootmount); 633 634 fli = fstrans_get_lwp_info(mp, false); 635 if (fli == NULL) 636 return 0; 637 fmi = fli->fli_mountinfo; 638 639 return (fli->fli_trans_cnt > 0 || fmi->fmi_owner == curlwp); 640 } 641 642 /* 643 * Check if this thread has an exclusive lock. 644 */ 645 int 646 fstrans_is_owner(struct mount *mp) 647 { 648 struct fstrans_lwp_info *fli; 649 struct fstrans_mount_info *fmi; 650 651 KASSERT(mp != dead_rootmount); 652 653 fli = fstrans_get_lwp_info(mp, false); 654 if (fli == NULL) 655 return 0; 656 fmi = fli->fli_mountinfo; 657 658 return (fmi->fmi_owner == curlwp); 659 } 660 661 /* 662 * True, if no thread is in a transaction not granted at the current state. 663 */ 664 static bool 665 state_change_done(const struct fstrans_mount_info *fmi) 666 { 667 struct fstrans_lwp_info *fli; 668 669 KASSERT(mutex_owned(&fstrans_lock)); 670 671 LIST_FOREACH(fli, &fstrans_fli_head, fli_list) { 672 if (fli->fli_mountinfo != fmi) 673 continue; 674 if (fli->fli_trans_cnt == 0) 675 continue; 676 if (fli->fli_self == curlwp) 677 continue; 678 if (grant_lock(fmi, fli->fli_lock_type)) 679 continue; 680 681 return false; 682 } 683 684 return true; 685 } 686 687 /* 688 * Set new file system state. 689 */ 690 int 691 fstrans_setstate(struct mount *mp, enum fstrans_state new_state) 692 { 693 int error; 694 enum fstrans_state old_state; 695 struct fstrans_lwp_info *fli; 696 struct fstrans_mount_info *fmi; 697 698 KASSERT(mp != dead_rootmount); 699 700 fli = fstrans_get_lwp_info(mp, true); 701 if (fli == NULL) 702 return ENOENT; 703 fmi = fli->fli_mountinfo; 704 old_state = fmi->fmi_state; 705 if (old_state == new_state) 706 return 0; 707 708 mutex_enter(&fstrans_lock); 709 fmi->fmi_state = new_state; 710 pserialize_perform(fstrans_psz); 711 712 /* 713 * All threads see the new state now. 714 * Wait for transactions invalid at this state to leave. 715 */ 716 error = 0; 717 while (! state_change_done(fmi)) { 718 error = cv_wait_sig(&fstrans_count_cv, &fstrans_lock); 719 if (error) { 720 new_state = fmi->fmi_state = FSTRANS_NORMAL; 721 break; 722 } 723 } 724 if (old_state != new_state) { 725 if (old_state == FSTRANS_NORMAL) { 726 KASSERT(fmi->fmi_owner == NULL); 727 fmi->fmi_owner = curlwp; 728 } 729 if (new_state == FSTRANS_NORMAL) { 730 KASSERT(fmi->fmi_owner == curlwp); 731 fmi->fmi_owner = NULL; 732 } 733 } 734 cv_broadcast(&fstrans_state_cv); 735 mutex_exit(&fstrans_lock); 736 737 return error; 738 } 739 740 /* 741 * Get current file system state. 742 */ 743 enum fstrans_state 744 fstrans_getstate(struct mount *mp) 745 { 746 struct fstrans_lwp_info *fli; 747 struct fstrans_mount_info *fmi; 748 749 KASSERT(mp != dead_rootmount); 750 751 fli = fstrans_get_lwp_info(mp, true); 752 KASSERT(fli != NULL); 753 fmi = fli->fli_mountinfo; 754 755 return fmi->fmi_state; 756 } 757 758 /* 759 * Request a filesystem to suspend all operations. 760 */ 761 int 762 vfs_suspend(struct mount *mp, int nowait) 763 { 764 struct fstrans_lwp_info *fli; 765 int error; 766 767 if (mp == dead_rootmount) 768 return EOPNOTSUPP; 769 770 fli = fstrans_get_lwp_info(mp, true); 771 if (fli == NULL) 772 return ENOENT; 773 774 if (nowait) { 775 if (!mutex_tryenter(&vfs_suspend_lock)) 776 return EWOULDBLOCK; 777 } else 778 mutex_enter(&vfs_suspend_lock); 779 780 if ((error = VFS_SUSPENDCTL(fli->fli_mount, SUSPEND_SUSPEND)) != 0) { 781 mutex_exit(&vfs_suspend_lock); 782 return error; 783 } 784 785 if ((mp->mnt_iflag & IMNT_GONE) != 0) { 786 vfs_resume(mp); 787 return ENOENT; 788 } 789 790 return 0; 791 } 792 793 /* 794 * Request a filesystem to resume all operations. 795 */ 796 void 797 vfs_resume(struct mount *mp) 798 { 799 struct fstrans_lwp_info *fli; 800 801 KASSERT(mp != dead_rootmount); 802 803 fli = fstrans_get_lwp_info(mp, false); 804 mp = fli->fli_mount; 805 806 VFS_SUSPENDCTL(mp, SUSPEND_RESUME); 807 mutex_exit(&vfs_suspend_lock); 808 } 809 810 811 /* 812 * True, if no thread is running a cow handler. 813 */ 814 static bool 815 cow_state_change_done(const struct fstrans_mount_info *fmi) 816 { 817 struct fstrans_lwp_info *fli; 818 819 KASSERT(mutex_owned(&fstrans_lock)); 820 KASSERT(fmi->fmi_cow_change); 821 822 LIST_FOREACH(fli, &fstrans_fli_head, fli_list) { 823 if (fli->fli_mount != fmi->fmi_mount) 824 continue; 825 if (fli->fli_cow_cnt == 0) 826 continue; 827 828 return false; 829 } 830 831 return true; 832 } 833 834 /* 835 * Prepare for changing this mounts cow list. 836 * Returns with fstrans_lock locked. 837 */ 838 static void 839 cow_change_enter(struct fstrans_mount_info *fmi) 840 { 841 842 mutex_enter(&fstrans_lock); 843 844 /* 845 * Wait for other threads changing the list. 846 */ 847 while (fmi->fmi_cow_change) 848 cv_wait(&fstrans_state_cv, &fstrans_lock); 849 850 /* 851 * Wait until all threads are aware of a state change. 852 */ 853 fmi->fmi_cow_change = true; 854 pserialize_perform(fstrans_psz); 855 856 while (! cow_state_change_done(fmi)) 857 cv_wait(&fstrans_count_cv, &fstrans_lock); 858 } 859 860 /* 861 * Done changing this mounts cow list. 862 */ 863 static void 864 cow_change_done(struct fstrans_mount_info *fmi) 865 { 866 867 KASSERT(mutex_owned(&fstrans_lock)); 868 869 fmi->fmi_cow_change = false; 870 pserialize_perform(fstrans_psz); 871 872 cv_broadcast(&fstrans_state_cv); 873 874 mutex_exit(&fstrans_lock); 875 } 876 877 /* 878 * Add a handler to this mount. 879 */ 880 int 881 fscow_establish(struct mount *mp, int (*func)(void *, struct buf *, bool), 882 void *arg) 883 { 884 struct fstrans_mount_info *fmi; 885 struct fscow_handler *newch; 886 887 KASSERT(mp != dead_rootmount); 888 889 mutex_enter(&fstrans_lock); 890 fmi = fstrans_mount_get(mp); 891 KASSERT(fmi != NULL); 892 fmi->fmi_ref_cnt += 1; 893 mutex_exit(&fstrans_lock); 894 895 newch = kmem_alloc(sizeof(*newch), KM_SLEEP); 896 newch->ch_func = func; 897 newch->ch_arg = arg; 898 899 cow_change_enter(fmi); 900 LIST_INSERT_HEAD(&fmi->fmi_cow_handler, newch, ch_list); 901 cow_change_done(fmi); 902 903 return 0; 904 } 905 906 /* 907 * Remove a handler from this mount. 908 */ 909 int 910 fscow_disestablish(struct mount *mp, int (*func)(void *, struct buf *, bool), 911 void *arg) 912 { 913 struct fstrans_mount_info *fmi; 914 struct fscow_handler *hp = NULL; 915 916 KASSERT(mp != dead_rootmount); 917 918 mutex_enter(&fstrans_lock); 919 fmi = fstrans_mount_get(mp); 920 KASSERT(fmi != NULL); 921 mutex_exit(&fstrans_lock); 922 923 cow_change_enter(fmi); 924 LIST_FOREACH(hp, &fmi->fmi_cow_handler, ch_list) 925 if (hp->ch_func == func && hp->ch_arg == arg) 926 break; 927 if (hp != NULL) { 928 LIST_REMOVE(hp, ch_list); 929 kmem_free(hp, sizeof(*hp)); 930 } 931 fstrans_mount_dtor(fmi); 932 cow_change_done(fmi); 933 934 return hp ? 0 : EINVAL; 935 } 936 937 /* 938 * Check for need to copy block that is about to be written. 939 */ 940 int 941 fscow_run(struct buf *bp, bool data_valid) 942 { 943 int error, s; 944 struct mount *mp; 945 struct fstrans_lwp_info *fli; 946 struct fstrans_mount_info *fmi; 947 struct fscow_handler *hp; 948 949 /* 950 * First check if we need run the copy-on-write handler. 951 */ 952 if ((bp->b_flags & B_COWDONE)) 953 return 0; 954 if (bp->b_vp == NULL) { 955 bp->b_flags |= B_COWDONE; 956 return 0; 957 } 958 if (bp->b_vp->v_type == VBLK) 959 mp = spec_node_getmountedfs(bp->b_vp); 960 else 961 mp = bp->b_vp->v_mount; 962 if (mp == NULL || mp == dead_rootmount) { 963 bp->b_flags |= B_COWDONE; 964 return 0; 965 } 966 967 fli = fstrans_get_lwp_info(mp, true); 968 KASSERT(fli != NULL); 969 fmi = fli->fli_mountinfo; 970 971 /* 972 * On non-recursed run check if other threads 973 * want to change the list. 974 */ 975 if (fli->fli_cow_cnt == 0) { 976 s = pserialize_read_enter(); 977 if (__predict_false(fmi->fmi_cow_change)) { 978 pserialize_read_exit(s); 979 mutex_enter(&fstrans_lock); 980 while (fmi->fmi_cow_change) 981 cv_wait(&fstrans_state_cv, &fstrans_lock); 982 fli->fli_cow_cnt = 1; 983 mutex_exit(&fstrans_lock); 984 } else { 985 fli->fli_cow_cnt = 1; 986 pserialize_read_exit(s); 987 } 988 } else 989 fli->fli_cow_cnt += 1; 990 991 /* 992 * Run all copy-on-write handlers, stop on error. 993 */ 994 error = 0; 995 LIST_FOREACH(hp, &fmi->fmi_cow_handler, ch_list) 996 if ((error = (*hp->ch_func)(hp->ch_arg, bp, data_valid)) != 0) 997 break; 998 if (error == 0) 999 bp->b_flags |= B_COWDONE; 1000 1001 /* 1002 * Check if other threads want to change the list. 1003 */ 1004 if (fli->fli_cow_cnt > 1) { 1005 fli->fli_cow_cnt -= 1; 1006 } else { 1007 s = pserialize_read_enter(); 1008 if (__predict_false(fmi->fmi_cow_change)) { 1009 pserialize_read_exit(s); 1010 mutex_enter(&fstrans_lock); 1011 fli->fli_cow_cnt = 0; 1012 cv_signal(&fstrans_count_cv); 1013 mutex_exit(&fstrans_lock); 1014 } else { 1015 fli->fli_cow_cnt = 0; 1016 pserialize_read_exit(s); 1017 } 1018 } 1019 1020 return error; 1021 } 1022 1023 #if defined(DDB) 1024 void fstrans_dump(int); 1025 1026 static void 1027 fstrans_print_lwp(struct proc *p, struct lwp *l, int verbose) 1028 { 1029 char prefix[9]; 1030 struct fstrans_lwp_info *fli; 1031 1032 snprintf(prefix, sizeof(prefix), "%d.%d", p->p_pid, l->l_lid); 1033 LIST_FOREACH(fli, &fstrans_fli_head, fli_list) { 1034 if (fli->fli_self != l) 1035 continue; 1036 if (fli->fli_trans_cnt == 0 && fli->fli_cow_cnt == 0) { 1037 if (! verbose) 1038 continue; 1039 } 1040 printf("%-8s", prefix); 1041 if (verbose) 1042 printf(" @%p", fli); 1043 if (fli->fli_mount == dead_rootmount) 1044 printf(" <dead>"); 1045 else if (fli->fli_mount != NULL) 1046 printf(" (%s)", fli->fli_mount->mnt_stat.f_mntonname); 1047 else 1048 printf(" NULL"); 1049 if (fli->fli_alias != NULL) { 1050 struct mount *amp = fli->fli_alias->fli_mount; 1051 1052 printf(" alias"); 1053 if (verbose) 1054 printf(" @%p", fli->fli_alias); 1055 if (amp == NULL) 1056 printf(" NULL"); 1057 else 1058 printf(" (%s)", amp->mnt_stat.f_mntonname); 1059 } 1060 if (fli->fli_mountinfo && fli->fli_mountinfo->fmi_gone) 1061 printf(" gone"); 1062 if (fli->fli_trans_cnt == 0) { 1063 printf(" -"); 1064 } else { 1065 switch (fli->fli_lock_type) { 1066 case FSTRANS_LAZY: 1067 printf(" lazy"); 1068 break; 1069 case FSTRANS_SHARED: 1070 printf(" shared"); 1071 break; 1072 default: 1073 printf(" %#x", fli->fli_lock_type); 1074 break; 1075 } 1076 } 1077 printf(" %d cow %d alias %d\n", 1078 fli->fli_trans_cnt, fli->fli_cow_cnt, fli->fli_alias_cnt); 1079 prefix[0] = '\0'; 1080 } 1081 } 1082 1083 static void 1084 fstrans_print_mount(struct mount *mp, int verbose) 1085 { 1086 uint32_t indx; 1087 struct fstrans_mount_info *fmi; 1088 1089 indx = fstrans_mount_hash(mp); 1090 SLIST_FOREACH(fmi, &fstrans_mount_hashtab[indx], fmi_hash) 1091 if (fmi->fmi_mount == mp) 1092 break; 1093 1094 if (!verbose && (fmi == NULL || fmi->fmi_state == FSTRANS_NORMAL)) 1095 return; 1096 1097 printf("%-16s ", mp->mnt_stat.f_mntonname); 1098 if (fmi == NULL) { 1099 printf("(null)\n"); 1100 return; 1101 } 1102 printf("owner %p ", fmi->fmi_owner); 1103 switch (fmi->fmi_state) { 1104 case FSTRANS_NORMAL: 1105 printf("state normal\n"); 1106 break; 1107 case FSTRANS_SUSPENDING: 1108 printf("state suspending\n"); 1109 break; 1110 case FSTRANS_SUSPENDED: 1111 printf("state suspended\n"); 1112 break; 1113 default: 1114 printf("state %#x\n", fmi->fmi_state); 1115 break; 1116 } 1117 } 1118 1119 void 1120 fstrans_dump(int full) 1121 { 1122 const struct proclist_desc *pd; 1123 struct proc *p; 1124 struct lwp *l; 1125 struct mount *mp; 1126 1127 printf("Fstrans locks by lwp:\n"); 1128 for (pd = proclists; pd->pd_list != NULL; pd++) 1129 PROCLIST_FOREACH(p, pd->pd_list) 1130 LIST_FOREACH(l, &p->p_lwps, l_sibling) 1131 fstrans_print_lwp(p, l, full == 1); 1132 1133 printf("Fstrans state by mount:\n"); 1134 for (mp = _mountlist_next(NULL); mp; mp = _mountlist_next(mp)) 1135 fstrans_print_mount(mp, full == 1); 1136 } 1137 #endif /* defined(DDB) */ 1138