1 /* $NetBSD: vfs_trans.c,v 1.73 2024/12/07 02:27:38 riastradh Exp $ */ 2 3 /*- 4 * Copyright (c) 2007, 2020 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Juergen Hannken-Illjes. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 #include <sys/cdefs.h> 33 __KERNEL_RCSID(0, "$NetBSD: vfs_trans.c,v 1.73 2024/12/07 02:27:38 riastradh Exp $"); 34 35 /* 36 * File system transaction operations. 37 */ 38 39 #ifdef _KERNEL_OPT 40 #include "opt_ddb.h" 41 #endif 42 43 #include <sys/param.h> 44 #include <sys/types.h> 45 46 #include <sys/atomic.h> 47 #include <sys/buf.h> 48 #include <sys/fstrans.h> 49 #include <sys/hash.h> 50 #include <sys/kmem.h> 51 #include <sys/mount.h> 52 #include <sys/pool.h> 53 #include <sys/proc.h> 54 #include <sys/pserialize.h> 55 #include <sys/sdt.h> 56 #include <sys/systm.h> 57 #include <sys/vnode.h> 58 59 #include <miscfs/deadfs/deadfs.h> 60 #include <miscfs/specfs/specdev.h> 61 62 #define FSTRANS_MOUNT_HASHSIZE 32 63 64 enum fstrans_lock_type { 65 FSTRANS_LAZY, /* Granted while not suspended */ 66 FSTRANS_SHARED /* Granted while not suspending */ 67 }; 68 69 struct fscow_handler { 70 LIST_ENTRY(fscow_handler) ch_list; 71 int (*ch_func)(void *, struct buf *, bool); 72 void *ch_arg; 73 }; 74 struct fstrans_lwp_info { 75 struct fstrans_lwp_info *fli_succ; 76 struct lwp *fli_self; 77 struct mount *fli_mount; 78 struct fstrans_lwp_info *fli_alias; 79 struct fstrans_mount_info *fli_mountinfo; 80 int fli_trans_cnt; 81 int fli_alias_cnt; 82 int fli_cow_cnt; 83 enum fstrans_lock_type fli_lock_type; 84 LIST_ENTRY(fstrans_lwp_info) fli_list; 85 }; 86 struct fstrans_mount_info { 87 enum fstrans_state fmi_state; 88 unsigned int fmi_ref_cnt; 89 bool fmi_gone; 90 bool fmi_cow_change; 91 SLIST_ENTRY(fstrans_mount_info) fmi_hash; 92 LIST_HEAD(, fscow_handler) fmi_cow_handler; 93 struct mount *fmi_mount; 94 struct fstrans_mount_info *fmi_lower_info; 95 struct lwp *fmi_owner; 96 }; 97 SLIST_HEAD(fstrans_mount_hashhead, fstrans_mount_info); 98 99 static kmutex_t vfs_suspend_lock /* Serialize suspensions. */ 100 __cacheline_aligned; 101 static kmutex_t fstrans_lock /* Fstrans big lock. */ 102 __cacheline_aligned; 103 static kcondvar_t fstrans_state_cv; /* Fstrans or cow state changed. */ 104 static kcondvar_t fstrans_count_cv; /* Fstrans or cow count changed. */ 105 static pserialize_t fstrans_psz; /* Pserialize state. */ 106 static LIST_HEAD(fstrans_lwp_head, fstrans_lwp_info) fstrans_fli_head; 107 /* List of all fstrans_lwp_info. */ 108 static pool_cache_t fstrans_lwp_cache; /* Cache of fstrans_lwp_info. */ 109 110 static u_long fstrans_mount_hashmask; 111 static struct fstrans_mount_hashhead *fstrans_mount_hashtab; 112 static int fstrans_gone_count; /* Number of fstrans_mount_info gone. */ 113 114 static inline uint32_t fstrans_mount_hash(struct mount *); 115 static inline struct fstrans_mount_info *fstrans_mount_get(struct mount *); 116 static void fstrans_mount_dtor(struct fstrans_mount_info *); 117 static void fstrans_clear_lwp_info(void); 118 static inline struct fstrans_lwp_info * 119 fstrans_get_lwp_info(struct mount *, bool); 120 static struct fstrans_lwp_info *fstrans_alloc_lwp_info(struct mount *); 121 static int fstrans_lwp_pcc(void *, void *, int); 122 static void fstrans_lwp_pcd(void *, void *); 123 static inline int _fstrans_start(struct mount *, enum fstrans_lock_type, int); 124 static bool grant_lock(const struct fstrans_mount_info *, 125 const enum fstrans_lock_type); 126 static bool state_change_done(const struct fstrans_mount_info *); 127 static bool cow_state_change_done(const struct fstrans_mount_info *); 128 static void cow_change_enter(struct fstrans_mount_info *); 129 static void cow_change_done(struct fstrans_mount_info *); 130 131 /* 132 * Initialize. 133 */ 134 void 135 fstrans_init(void) 136 { 137 138 mutex_init(&vfs_suspend_lock, MUTEX_DEFAULT, IPL_NONE); 139 mutex_init(&fstrans_lock, MUTEX_DEFAULT, IPL_NONE); 140 cv_init(&fstrans_state_cv, "fstchg"); 141 cv_init(&fstrans_count_cv, "fstcnt"); 142 fstrans_psz = pserialize_create(); 143 LIST_INIT(&fstrans_fli_head); 144 fstrans_lwp_cache = pool_cache_init(sizeof(struct fstrans_lwp_info), 145 coherency_unit, 0, 0, "fstlwp", NULL, IPL_NONE, 146 fstrans_lwp_pcc, fstrans_lwp_pcd, NULL); 147 KASSERT(fstrans_lwp_cache != NULL); 148 fstrans_mount_hashtab = hashinit(FSTRANS_MOUNT_HASHSIZE, HASH_SLIST, 149 true, &fstrans_mount_hashmask); 150 } 151 152 /* 153 * pool_cache constructor for fstrans_lwp_info. Updating the global list 154 * produces cache misses on MP. Minimise by keeping free entries on list. 155 */ 156 int 157 fstrans_lwp_pcc(void *arg, void *obj, int flags) 158 { 159 struct fstrans_lwp_info *fli = obj; 160 161 memset(fli, 0, sizeof(*fli)); 162 163 mutex_enter(&fstrans_lock); 164 LIST_INSERT_HEAD(&fstrans_fli_head, fli, fli_list); 165 mutex_exit(&fstrans_lock); 166 167 return 0; 168 } 169 170 /* 171 * pool_cache destructor 172 */ 173 void 174 fstrans_lwp_pcd(void *arg, void *obj) 175 { 176 struct fstrans_lwp_info *fli = obj; 177 178 mutex_enter(&fstrans_lock); 179 LIST_REMOVE(fli, fli_list); 180 mutex_exit(&fstrans_lock); 181 } 182 183 /* 184 * Deallocate lwp state. 185 */ 186 void 187 fstrans_lwp_dtor(lwp_t *l) 188 { 189 struct fstrans_lwp_info *fli, *fli_next; 190 191 if (l->l_fstrans == NULL) 192 return; 193 194 mutex_enter(&fstrans_lock); 195 for (fli = l->l_fstrans; fli; fli = fli_next) { 196 KASSERT(fli->fli_trans_cnt == 0); 197 KASSERT(fli->fli_cow_cnt == 0); 198 KASSERT(fli->fli_self == l); 199 if (fli->fli_mount != NULL) 200 fstrans_mount_dtor(fli->fli_mountinfo); 201 fli_next = fli->fli_succ; 202 fli->fli_alias_cnt = 0; 203 fli->fli_mount = NULL; 204 fli->fli_alias = NULL; 205 fli->fli_mountinfo = NULL; 206 fli->fli_self = NULL; 207 } 208 mutex_exit(&fstrans_lock); 209 210 for (fli = l->l_fstrans; fli; fli = fli_next) { 211 fli_next = fli->fli_succ; 212 pool_cache_put(fstrans_lwp_cache, fli); 213 } 214 l->l_fstrans = NULL; 215 } 216 217 /* 218 * mount pointer to hash 219 */ 220 static inline uint32_t 221 fstrans_mount_hash(struct mount *mp) 222 { 223 224 return hash32_buf(&mp, sizeof(mp), HASH32_BUF_INIT) & 225 fstrans_mount_hashmask; 226 } 227 228 /* 229 * retrieve fstrans_mount_info by mount or NULL 230 */ 231 static inline struct fstrans_mount_info * 232 fstrans_mount_get(struct mount *mp) 233 { 234 uint32_t indx; 235 struct fstrans_mount_info *fmi, *fmi_lower; 236 237 KASSERT(mutex_owned(&fstrans_lock)); 238 239 indx = fstrans_mount_hash(mp); 240 SLIST_FOREACH(fmi, &fstrans_mount_hashtab[indx], fmi_hash) { 241 if (fmi->fmi_mount == mp) { 242 if (__predict_false(mp->mnt_lower != NULL && 243 fmi->fmi_lower_info == NULL)) { 244 /* 245 * Intern the lower/lowest mount into 246 * this mount info on first lookup. 247 */ 248 KASSERT(fmi->fmi_ref_cnt == 1); 249 250 fmi_lower = fstrans_mount_get(mp->mnt_lower); 251 if (fmi_lower && fmi_lower->fmi_lower_info) 252 fmi_lower = fmi_lower->fmi_lower_info; 253 if (fmi_lower == NULL) 254 return NULL; 255 fmi->fmi_lower_info = fmi_lower; 256 fmi->fmi_lower_info->fmi_ref_cnt += 1; 257 } 258 return fmi; 259 } 260 } 261 262 return NULL; 263 } 264 265 /* 266 * Dereference mount state. 267 */ 268 static void 269 fstrans_mount_dtor(struct fstrans_mount_info *fmi) 270 { 271 272 KASSERT(mutex_owned(&fstrans_lock)); 273 274 KASSERT(fmi != NULL); 275 fmi->fmi_ref_cnt -= 1; 276 if (__predict_true(fmi->fmi_ref_cnt > 0)) { 277 return; 278 } 279 280 KASSERT(fmi->fmi_state == FSTRANS_NORMAL); 281 KASSERT(LIST_FIRST(&fmi->fmi_cow_handler) == NULL); 282 KASSERT(fmi->fmi_owner == NULL); 283 284 if (fmi->fmi_lower_info) 285 fstrans_mount_dtor(fmi->fmi_lower_info); 286 287 KASSERT(fstrans_gone_count > 0); 288 fstrans_gone_count -= 1; 289 290 KASSERT(fmi->fmi_mount->mnt_lower == NULL); 291 292 kmem_free(fmi->fmi_mount, sizeof(*fmi->fmi_mount)); 293 kmem_free(fmi, sizeof(*fmi)); 294 } 295 296 /* 297 * Allocate mount state. 298 */ 299 int 300 fstrans_mount(struct mount *mp) 301 { 302 uint32_t indx; 303 struct fstrans_mount_info *newfmi; 304 305 indx = fstrans_mount_hash(mp); 306 307 newfmi = kmem_alloc(sizeof(*newfmi), KM_SLEEP); 308 newfmi->fmi_state = FSTRANS_NORMAL; 309 newfmi->fmi_ref_cnt = 1; 310 newfmi->fmi_gone = false; 311 LIST_INIT(&newfmi->fmi_cow_handler); 312 newfmi->fmi_cow_change = false; 313 newfmi->fmi_mount = mp; 314 newfmi->fmi_lower_info = NULL; 315 newfmi->fmi_owner = NULL; 316 317 mutex_enter(&fstrans_lock); 318 SLIST_INSERT_HEAD(&fstrans_mount_hashtab[indx], newfmi, fmi_hash); 319 mutex_exit(&fstrans_lock); 320 321 return 0; 322 } 323 324 /* 325 * Deallocate mount state. 326 */ 327 void 328 fstrans_unmount(struct mount *mp) 329 { 330 uint32_t indx; 331 struct fstrans_mount_info *fmi; 332 333 indx = fstrans_mount_hash(mp); 334 335 mutex_enter(&fstrans_lock); 336 fmi = fstrans_mount_get(mp); 337 KASSERT(fmi != NULL); 338 fmi->fmi_gone = true; 339 SLIST_REMOVE(&fstrans_mount_hashtab[indx], 340 fmi, fstrans_mount_info, fmi_hash); 341 fstrans_gone_count += 1; 342 fstrans_mount_dtor(fmi); 343 mutex_exit(&fstrans_lock); 344 } 345 346 /* 347 * Clear mount entries whose mount is gone. 348 */ 349 static void 350 fstrans_clear_lwp_info(void) 351 { 352 struct fstrans_lwp_info **p, *fli, *tofree = NULL; 353 354 /* 355 * Scan our list clearing entries whose mount is gone. 356 */ 357 mutex_enter(&fstrans_lock); 358 for (p = &curlwp->l_fstrans; *p; ) { 359 fli = *p; 360 if (fli->fli_mount != NULL && 361 fli->fli_mountinfo->fmi_gone && 362 fli->fli_trans_cnt == 0 && 363 fli->fli_cow_cnt == 0 && 364 fli->fli_alias_cnt == 0) { 365 *p = (*p)->fli_succ; 366 fstrans_mount_dtor(fli->fli_mountinfo); 367 if (fli->fli_alias) { 368 KASSERT(fli->fli_alias->fli_alias_cnt > 0); 369 fli->fli_alias->fli_alias_cnt--; 370 } 371 fli->fli_mount = NULL; 372 fli->fli_alias = NULL; 373 fli->fli_mountinfo = NULL; 374 fli->fli_self = NULL; 375 p = &curlwp->l_fstrans; 376 fli->fli_succ = tofree; 377 tofree = fli; 378 } else { 379 p = &(*p)->fli_succ; 380 } 381 } 382 #ifdef DIAGNOSTIC 383 for (fli = curlwp->l_fstrans; fli; fli = fli->fli_succ) 384 if (fli->fli_alias != NULL) 385 KASSERT(fli->fli_alias->fli_self == curlwp); 386 #endif /* DIAGNOSTIC */ 387 mutex_exit(&fstrans_lock); 388 389 while (tofree != NULL) { 390 fli = tofree; 391 tofree = fli->fli_succ; 392 pool_cache_put(fstrans_lwp_cache, fli); 393 } 394 } 395 396 /* 397 * Allocate and return per lwp info for this mount. 398 */ 399 static struct fstrans_lwp_info * 400 fstrans_alloc_lwp_info(struct mount *mp) 401 { 402 struct fstrans_lwp_info *fli, *fli_lower; 403 struct fstrans_mount_info *fmi; 404 405 for (fli = curlwp->l_fstrans; fli; fli = fli->fli_succ) { 406 if (fli->fli_mount == mp) 407 return fli; 408 } 409 410 /* 411 * Lookup mount info and get lower mount per lwp info. 412 */ 413 mutex_enter(&fstrans_lock); 414 fmi = fstrans_mount_get(mp); 415 if (fmi == NULL) { 416 mutex_exit(&fstrans_lock); 417 return NULL; 418 } 419 fmi->fmi_ref_cnt += 1; 420 mutex_exit(&fstrans_lock); 421 422 if (fmi->fmi_lower_info) { 423 fli_lower = 424 fstrans_alloc_lwp_info(fmi->fmi_lower_info->fmi_mount); 425 if (fli_lower == NULL) { 426 mutex_enter(&fstrans_lock); 427 fstrans_mount_dtor(fmi); 428 mutex_exit(&fstrans_lock); 429 430 return NULL; 431 } 432 } else { 433 fli_lower = NULL; 434 } 435 436 /* 437 * Allocate a new entry. 438 */ 439 fli = pool_cache_get(fstrans_lwp_cache, PR_WAITOK); 440 KASSERT(fli->fli_trans_cnt == 0); 441 KASSERT(fli->fli_cow_cnt == 0); 442 KASSERT(fli->fli_alias_cnt == 0); 443 KASSERT(fli->fli_mount == NULL); 444 KASSERT(fli->fli_alias == NULL); 445 KASSERT(fli->fli_mountinfo == NULL); 446 KASSERT(fli->fli_self == NULL); 447 448 /* 449 * Attach the mount info and alias. 450 */ 451 452 fli->fli_self = curlwp; 453 fli->fli_mount = mp; 454 fli->fli_mountinfo = fmi; 455 456 fli->fli_succ = curlwp->l_fstrans; 457 curlwp->l_fstrans = fli; 458 459 if (fli_lower) { 460 fli->fli_alias = fli_lower; 461 fli->fli_alias->fli_alias_cnt++; 462 fli = fli->fli_alias; 463 } 464 465 return fli; 466 } 467 468 /* 469 * Retrieve the per lwp info for this mount allocating if necessary. 470 */ 471 static inline struct fstrans_lwp_info * 472 fstrans_get_lwp_info(struct mount *mp, bool do_alloc) 473 { 474 struct fstrans_lwp_info *fli; 475 476 /* 477 * Scan our list for a match. 478 */ 479 for (fli = curlwp->l_fstrans; fli; fli = fli->fli_succ) { 480 if (fli->fli_mount == mp) { 481 KASSERT(mp->mnt_lower == NULL || 482 fli->fli_alias != NULL); 483 if (fli->fli_alias != NULL) 484 fli = fli->fli_alias; 485 break; 486 } 487 } 488 489 if (do_alloc) { 490 if (__predict_false(fli == NULL)) 491 fli = fstrans_alloc_lwp_info(mp); 492 } 493 494 return fli; 495 } 496 497 /* 498 * Check if this lock type is granted at this state. 499 */ 500 static bool 501 grant_lock(const struct fstrans_mount_info *fmi, 502 const enum fstrans_lock_type type) 503 { 504 505 if (__predict_true(fmi->fmi_state == FSTRANS_NORMAL)) 506 return true; 507 if (fmi->fmi_owner == curlwp) 508 return true; 509 if (fmi->fmi_state == FSTRANS_SUSPENDING && type == FSTRANS_LAZY) 510 return true; 511 512 return false; 513 } 514 515 /* 516 * Start a transaction. If this thread already has a transaction on this 517 * file system increment the reference counter. 518 */ 519 static inline int 520 _fstrans_start(struct mount *mp, enum fstrans_lock_type lock_type, int wait) 521 { 522 int s; 523 struct fstrans_lwp_info *fli; 524 struct fstrans_mount_info *fmi; 525 526 ASSERT_SLEEPABLE(); 527 528 fli = fstrans_get_lwp_info(mp, true); 529 if (fli == NULL) 530 return 0; 531 fmi = fli->fli_mountinfo; 532 533 if (fli->fli_trans_cnt > 0) { 534 fli->fli_trans_cnt += 1; 535 536 return 0; 537 } 538 539 s = pserialize_read_enter(); 540 if (__predict_true(grant_lock(fmi, lock_type))) { 541 fli->fli_trans_cnt = 1; 542 fli->fli_lock_type = lock_type; 543 pserialize_read_exit(s); 544 545 return 0; 546 } 547 pserialize_read_exit(s); 548 549 if (! wait) 550 return SET_ERROR(EBUSY); 551 552 mutex_enter(&fstrans_lock); 553 while (! grant_lock(fmi, lock_type)) 554 cv_wait(&fstrans_state_cv, &fstrans_lock); 555 fli->fli_trans_cnt = 1; 556 fli->fli_lock_type = lock_type; 557 mutex_exit(&fstrans_lock); 558 559 return 0; 560 } 561 562 void 563 fstrans_start(struct mount *mp) 564 { 565 int error __diagused; 566 567 error = _fstrans_start(mp, FSTRANS_SHARED, 1); 568 KASSERT(error == 0); 569 } 570 571 int 572 fstrans_start_nowait(struct mount *mp) 573 { 574 575 return _fstrans_start(mp, FSTRANS_SHARED, 0); 576 } 577 578 void 579 fstrans_start_lazy(struct mount *mp) 580 { 581 int error __diagused; 582 583 error = _fstrans_start(mp, FSTRANS_LAZY, 1); 584 KASSERT(error == 0); 585 } 586 587 /* 588 * Finish a transaction. 589 */ 590 void 591 fstrans_done(struct mount *mp) 592 { 593 int s; 594 struct fstrans_lwp_info *fli; 595 struct fstrans_mount_info *fmi; 596 597 fli = fstrans_get_lwp_info(mp, false); 598 if (fli == NULL) 599 return; 600 fmi = fli->fli_mountinfo; 601 KASSERT(fli->fli_trans_cnt > 0); 602 603 if (fli->fli_trans_cnt > 1) { 604 fli->fli_trans_cnt -= 1; 605 606 return; 607 } 608 609 if (__predict_false(fstrans_gone_count > 0)) 610 fstrans_clear_lwp_info(); 611 612 s = pserialize_read_enter(); 613 if (__predict_true(fmi->fmi_state == FSTRANS_NORMAL)) { 614 fli->fli_trans_cnt = 0; 615 pserialize_read_exit(s); 616 617 return; 618 } 619 pserialize_read_exit(s); 620 621 mutex_enter(&fstrans_lock); 622 fli->fli_trans_cnt = 0; 623 cv_signal(&fstrans_count_cv); 624 mutex_exit(&fstrans_lock); 625 } 626 627 /* 628 * Check if we hold an lock. 629 */ 630 int 631 fstrans_held(struct mount *mp) 632 { 633 struct fstrans_lwp_info *fli; 634 struct fstrans_mount_info *fmi; 635 636 KASSERT(mp != dead_rootmount); 637 638 fli = fstrans_get_lwp_info(mp, false); 639 if (fli == NULL) 640 return 0; 641 fmi = fli->fli_mountinfo; 642 643 return (fli->fli_trans_cnt > 0 || fmi->fmi_owner == curlwp); 644 } 645 646 /* 647 * Check if this thread has an exclusive lock. 648 */ 649 int 650 fstrans_is_owner(struct mount *mp) 651 { 652 struct fstrans_lwp_info *fli; 653 struct fstrans_mount_info *fmi; 654 655 KASSERT(mp != dead_rootmount); 656 657 fli = fstrans_get_lwp_info(mp, false); 658 if (fli == NULL) 659 return 0; 660 fmi = fli->fli_mountinfo; 661 662 return (fmi->fmi_owner == curlwp); 663 } 664 665 /* 666 * True, if no thread is in a transaction not granted at the current state. 667 */ 668 static bool 669 state_change_done(const struct fstrans_mount_info *fmi) 670 { 671 struct fstrans_lwp_info *fli; 672 673 KASSERT(mutex_owned(&fstrans_lock)); 674 675 LIST_FOREACH(fli, &fstrans_fli_head, fli_list) { 676 if (fli->fli_mountinfo != fmi) 677 continue; 678 if (fli->fli_trans_cnt == 0) 679 continue; 680 if (fli->fli_self == curlwp) 681 continue; 682 if (grant_lock(fmi, fli->fli_lock_type)) 683 continue; 684 685 return false; 686 } 687 688 return true; 689 } 690 691 /* 692 * Set new file system state. 693 */ 694 int 695 fstrans_setstate(struct mount *mp, enum fstrans_state new_state) 696 { 697 int error; 698 enum fstrans_state old_state; 699 struct fstrans_lwp_info *fli; 700 struct fstrans_mount_info *fmi; 701 702 KASSERT(mp != dead_rootmount); 703 704 fli = fstrans_get_lwp_info(mp, true); 705 if (fli == NULL) 706 return SET_ERROR(ENOENT); 707 fmi = fli->fli_mountinfo; 708 old_state = fmi->fmi_state; 709 if (old_state == new_state) 710 return 0; 711 712 mutex_enter(&fstrans_lock); 713 fmi->fmi_state = new_state; 714 pserialize_perform(fstrans_psz); 715 716 /* 717 * All threads see the new state now. 718 * Wait for transactions invalid at this state to leave. 719 */ 720 error = 0; 721 while (! state_change_done(fmi)) { 722 error = cv_wait_sig(&fstrans_count_cv, &fstrans_lock); 723 if (error) { 724 new_state = fmi->fmi_state = FSTRANS_NORMAL; 725 break; 726 } 727 } 728 if (old_state != new_state) { 729 if (old_state == FSTRANS_NORMAL) { 730 KASSERT(fmi->fmi_owner == NULL); 731 fmi->fmi_owner = curlwp; 732 } 733 if (new_state == FSTRANS_NORMAL) { 734 KASSERT(fmi->fmi_owner == curlwp); 735 fmi->fmi_owner = NULL; 736 } 737 } 738 cv_broadcast(&fstrans_state_cv); 739 mutex_exit(&fstrans_lock); 740 741 return error; 742 } 743 744 /* 745 * Get current file system state. 746 */ 747 enum fstrans_state 748 fstrans_getstate(struct mount *mp) 749 { 750 struct fstrans_lwp_info *fli; 751 struct fstrans_mount_info *fmi; 752 753 KASSERT(mp != dead_rootmount); 754 755 fli = fstrans_get_lwp_info(mp, true); 756 KASSERT(fli != NULL); 757 fmi = fli->fli_mountinfo; 758 759 return fmi->fmi_state; 760 } 761 762 /* 763 * Request a filesystem to suspend all operations. 764 */ 765 int 766 vfs_suspend(struct mount *mp, int nowait) 767 { 768 struct fstrans_lwp_info *fli; 769 int error; 770 771 if (mp == dead_rootmount) 772 return SET_ERROR(EOPNOTSUPP); 773 774 fli = fstrans_get_lwp_info(mp, true); 775 if (fli == NULL) 776 return SET_ERROR(ENOENT); 777 778 if (nowait) { 779 if (!mutex_tryenter(&vfs_suspend_lock)) 780 return SET_ERROR(EWOULDBLOCK); 781 } else 782 mutex_enter(&vfs_suspend_lock); 783 784 if ((error = VFS_SUSPENDCTL(fli->fli_mount, SUSPEND_SUSPEND)) != 0) { 785 mutex_exit(&vfs_suspend_lock); 786 return error; 787 } 788 789 if ((mp->mnt_iflag & IMNT_GONE) != 0) { 790 vfs_resume(mp); 791 return SET_ERROR(ENOENT); 792 } 793 794 return 0; 795 } 796 797 /* 798 * Request a filesystem to resume all operations. 799 */ 800 void 801 vfs_resume(struct mount *mp) 802 { 803 struct fstrans_lwp_info *fli; 804 805 KASSERT(mp != dead_rootmount); 806 807 fli = fstrans_get_lwp_info(mp, false); 808 mp = fli->fli_mount; 809 810 VFS_SUSPENDCTL(mp, SUSPEND_RESUME); 811 mutex_exit(&vfs_suspend_lock); 812 } 813 814 /* 815 * True, if no thread is running a cow handler. 816 */ 817 static bool 818 cow_state_change_done(const struct fstrans_mount_info *fmi) 819 { 820 struct fstrans_lwp_info *fli; 821 822 KASSERT(mutex_owned(&fstrans_lock)); 823 KASSERT(fmi->fmi_cow_change); 824 825 LIST_FOREACH(fli, &fstrans_fli_head, fli_list) { 826 if (fli->fli_mount != fmi->fmi_mount) 827 continue; 828 if (fli->fli_cow_cnt == 0) 829 continue; 830 831 return false; 832 } 833 834 return true; 835 } 836 837 /* 838 * Prepare for changing this mounts cow list. 839 * Returns with fstrans_lock locked. 840 */ 841 static void 842 cow_change_enter(struct fstrans_mount_info *fmi) 843 { 844 845 mutex_enter(&fstrans_lock); 846 847 /* 848 * Wait for other threads changing the list. 849 */ 850 while (fmi->fmi_cow_change) 851 cv_wait(&fstrans_state_cv, &fstrans_lock); 852 853 /* 854 * Wait until all threads are aware of a state change. 855 */ 856 fmi->fmi_cow_change = true; 857 pserialize_perform(fstrans_psz); 858 859 while (! cow_state_change_done(fmi)) 860 cv_wait(&fstrans_count_cv, &fstrans_lock); 861 } 862 863 /* 864 * Done changing this mounts cow list. 865 */ 866 static void 867 cow_change_done(struct fstrans_mount_info *fmi) 868 { 869 870 KASSERT(mutex_owned(&fstrans_lock)); 871 872 fmi->fmi_cow_change = false; 873 pserialize_perform(fstrans_psz); 874 875 cv_broadcast(&fstrans_state_cv); 876 877 mutex_exit(&fstrans_lock); 878 } 879 880 /* 881 * Add a handler to this mount. 882 */ 883 int 884 fscow_establish(struct mount *mp, int (*func)(void *, struct buf *, bool), 885 void *arg) 886 { 887 struct fstrans_mount_info *fmi; 888 struct fscow_handler *newch; 889 890 KASSERT(mp != dead_rootmount); 891 892 mutex_enter(&fstrans_lock); 893 fmi = fstrans_mount_get(mp); 894 KASSERT(fmi != NULL); 895 fmi->fmi_ref_cnt += 1; 896 mutex_exit(&fstrans_lock); 897 898 newch = kmem_alloc(sizeof(*newch), KM_SLEEP); 899 newch->ch_func = func; 900 newch->ch_arg = arg; 901 902 cow_change_enter(fmi); 903 LIST_INSERT_HEAD(&fmi->fmi_cow_handler, newch, ch_list); 904 cow_change_done(fmi); 905 906 return 0; 907 } 908 909 /* 910 * Remove a handler from this mount. 911 */ 912 int 913 fscow_disestablish(struct mount *mp, int (*func)(void *, struct buf *, bool), 914 void *arg) 915 { 916 struct fstrans_mount_info *fmi; 917 struct fscow_handler *hp = NULL; 918 919 KASSERT(mp != dead_rootmount); 920 921 mutex_enter(&fstrans_lock); 922 fmi = fstrans_mount_get(mp); 923 KASSERT(fmi != NULL); 924 mutex_exit(&fstrans_lock); 925 926 cow_change_enter(fmi); 927 LIST_FOREACH(hp, &fmi->fmi_cow_handler, ch_list) 928 if (hp->ch_func == func && hp->ch_arg == arg) 929 break; 930 if (hp != NULL) { 931 LIST_REMOVE(hp, ch_list); 932 kmem_free(hp, sizeof(*hp)); 933 } 934 fstrans_mount_dtor(fmi); 935 cow_change_done(fmi); 936 937 return hp ? 0 : SET_ERROR(EINVAL); 938 } 939 940 /* 941 * Check for need to copy block that is about to be written. 942 */ 943 int 944 fscow_run(struct buf *bp, bool data_valid) 945 { 946 int error, s; 947 struct mount *mp; 948 struct fstrans_lwp_info *fli; 949 struct fstrans_mount_info *fmi; 950 struct fscow_handler *hp; 951 952 /* 953 * First check if we need run the copy-on-write handler. 954 */ 955 if ((bp->b_flags & B_COWDONE)) 956 return 0; 957 if (bp->b_vp == NULL) { 958 bp->b_flags |= B_COWDONE; 959 return 0; 960 } 961 if (bp->b_vp->v_type == VBLK) 962 mp = spec_node_getmountedfs(bp->b_vp); 963 else 964 mp = bp->b_vp->v_mount; 965 if (mp == NULL || mp == dead_rootmount) { 966 bp->b_flags |= B_COWDONE; 967 return 0; 968 } 969 970 fli = fstrans_get_lwp_info(mp, true); 971 KASSERT(fli != NULL); 972 fmi = fli->fli_mountinfo; 973 974 /* 975 * On non-recursed run check if other threads 976 * want to change the list. 977 */ 978 if (fli->fli_cow_cnt == 0) { 979 s = pserialize_read_enter(); 980 if (__predict_false(fmi->fmi_cow_change)) { 981 pserialize_read_exit(s); 982 mutex_enter(&fstrans_lock); 983 while (fmi->fmi_cow_change) 984 cv_wait(&fstrans_state_cv, &fstrans_lock); 985 fli->fli_cow_cnt = 1; 986 mutex_exit(&fstrans_lock); 987 } else { 988 fli->fli_cow_cnt = 1; 989 pserialize_read_exit(s); 990 } 991 } else 992 fli->fli_cow_cnt += 1; 993 994 /* 995 * Run all copy-on-write handlers, stop on error. 996 */ 997 error = 0; 998 LIST_FOREACH(hp, &fmi->fmi_cow_handler, ch_list) 999 if ((error = (*hp->ch_func)(hp->ch_arg, bp, data_valid)) != 0) 1000 break; 1001 if (error == 0) 1002 bp->b_flags |= B_COWDONE; 1003 1004 /* 1005 * Check if other threads want to change the list. 1006 */ 1007 if (fli->fli_cow_cnt > 1) { 1008 fli->fli_cow_cnt -= 1; 1009 } else { 1010 s = pserialize_read_enter(); 1011 if (__predict_false(fmi->fmi_cow_change)) { 1012 pserialize_read_exit(s); 1013 mutex_enter(&fstrans_lock); 1014 fli->fli_cow_cnt = 0; 1015 cv_signal(&fstrans_count_cv); 1016 mutex_exit(&fstrans_lock); 1017 } else { 1018 fli->fli_cow_cnt = 0; 1019 pserialize_read_exit(s); 1020 } 1021 } 1022 1023 return error; 1024 } 1025 1026 #if defined(DDB) 1027 void fstrans_dump(int); 1028 1029 static void 1030 fstrans_print_lwp(struct proc *p, struct lwp *l, int verbose) 1031 { 1032 char prefix[9]; 1033 struct fstrans_lwp_info *fli; 1034 1035 snprintf(prefix, sizeof(prefix), "%d.%d", p->p_pid, l->l_lid); 1036 LIST_FOREACH(fli, &fstrans_fli_head, fli_list) { 1037 if (fli->fli_self != l) 1038 continue; 1039 if (fli->fli_trans_cnt == 0 && fli->fli_cow_cnt == 0) { 1040 if (! verbose) 1041 continue; 1042 } 1043 printf("%-8s", prefix); 1044 if (verbose) 1045 printf(" @%p", fli); 1046 if (fli->fli_mount == dead_rootmount) 1047 printf(" <dead>"); 1048 else if (fli->fli_mount != NULL) 1049 printf(" (%s)", fli->fli_mount->mnt_stat.f_mntonname); 1050 else 1051 printf(" NULL"); 1052 if (fli->fli_alias != NULL) { 1053 struct mount *amp = fli->fli_alias->fli_mount; 1054 1055 printf(" alias"); 1056 if (verbose) 1057 printf(" @%p", fli->fli_alias); 1058 if (amp == NULL) 1059 printf(" NULL"); 1060 else 1061 printf(" (%s)", amp->mnt_stat.f_mntonname); 1062 } 1063 if (fli->fli_mountinfo && fli->fli_mountinfo->fmi_gone) 1064 printf(" gone"); 1065 if (fli->fli_trans_cnt == 0) { 1066 printf(" -"); 1067 } else { 1068 switch (fli->fli_lock_type) { 1069 case FSTRANS_LAZY: 1070 printf(" lazy"); 1071 break; 1072 case FSTRANS_SHARED: 1073 printf(" shared"); 1074 break; 1075 default: 1076 printf(" %#x", fli->fli_lock_type); 1077 break; 1078 } 1079 } 1080 printf(" %d cow %d alias %d\n", 1081 fli->fli_trans_cnt, fli->fli_cow_cnt, fli->fli_alias_cnt); 1082 prefix[0] = '\0'; 1083 } 1084 } 1085 1086 static void 1087 fstrans_print_mount(struct mount *mp, int verbose) 1088 { 1089 uint32_t indx; 1090 struct fstrans_mount_info *fmi; 1091 1092 indx = fstrans_mount_hash(mp); 1093 SLIST_FOREACH(fmi, &fstrans_mount_hashtab[indx], fmi_hash) 1094 if (fmi->fmi_mount == mp) 1095 break; 1096 1097 if (!verbose && (fmi == NULL || fmi->fmi_state == FSTRANS_NORMAL)) 1098 return; 1099 1100 printf("%-16s ", mp->mnt_stat.f_mntonname); 1101 if (fmi == NULL) { 1102 printf("(null)\n"); 1103 return; 1104 } 1105 printf("owner %p ", fmi->fmi_owner); 1106 switch (fmi->fmi_state) { 1107 case FSTRANS_NORMAL: 1108 printf("state normal\n"); 1109 break; 1110 case FSTRANS_SUSPENDING: 1111 printf("state suspending\n"); 1112 break; 1113 case FSTRANS_SUSPENDED: 1114 printf("state suspended\n"); 1115 break; 1116 default: 1117 printf("state %#x\n", fmi->fmi_state); 1118 break; 1119 } 1120 } 1121 1122 void 1123 fstrans_dump(int full) 1124 { 1125 const struct proclist_desc *pd; 1126 struct proc *p; 1127 struct lwp *l; 1128 struct mount *mp; 1129 1130 printf("Fstrans locks by lwp:\n"); 1131 for (pd = proclists; pd->pd_list != NULL; pd++) 1132 PROCLIST_FOREACH(p, pd->pd_list) 1133 LIST_FOREACH(l, &p->p_lwps, l_sibling) 1134 fstrans_print_lwp(p, l, full == 1); 1135 1136 printf("Fstrans state by mount:\n"); 1137 for (mp = _mountlist_next(NULL); mp; mp = _mountlist_next(mp)) 1138 fstrans_print_mount(mp, full == 1); 1139 } 1140 #endif /* defined(DDB) */ 1141