1 /* $NetBSD: vfs_trans.c,v 1.15 2007/12/02 13:56:16 hannken Exp $ */ 2 3 /*- 4 * Copyright (c) 2007 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Juergen Hannken-Illjes. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the NetBSD 21 * Foundation, Inc. and its contributors. 22 * 4. Neither the name of The NetBSD Foundation nor the names of its 23 * contributors may be used to endorse or promote products derived 24 * from this software without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 36 * POSSIBILITY OF SUCH DAMAGE. 37 */ 38 39 #include <sys/cdefs.h> 40 __KERNEL_RCSID(0, "$NetBSD: vfs_trans.c,v 1.15 2007/12/02 13:56:16 hannken Exp $"); 41 42 /* 43 * File system transaction operations. 44 */ 45 46 #include "opt_ddb.h" 47 48 #if defined(DDB) 49 #define _LWP_API_PRIVATE /* Need _lwp_getspecific_by_lwp() */ 50 #endif 51 52 #include <sys/param.h> 53 #include <sys/systm.h> 54 #include <sys/malloc.h> 55 #include <sys/kmem.h> 56 #include <sys/mount.h> 57 #include <sys/rwlock.h> 58 #include <sys/vnode.h> 59 #define _FSTRANS_API_PRIVATE 60 #include <sys/fstrans.h> 61 #include <sys/proc.h> 62 63 #include <miscfs/specfs/specdev.h> 64 #include <miscfs/syncfs/syncfs.h> 65 66 struct fstrans_lwp_info { 67 struct fstrans_lwp_info *fli_succ; 68 struct mount *fli_mount; 69 int fli_count; 70 enum fstrans_lock_type fli_lock_type; 71 }; 72 struct fstrans_mount_info { 73 enum fstrans_state fmi_state; 74 krwlock_t fmi_shared_lock; 75 krwlock_t fmi_lazy_lock; 76 }; 77 78 static specificdata_key_t lwp_data_key; 79 static specificdata_key_t mount_data_key; 80 static specificdata_key_t mount_cow_key; 81 static kmutex_t vfs_suspend_lock; /* Serialize suspensions. */ 82 static kmutex_t fstrans_init_lock; 83 84 POOL_INIT(fstrans_pl, sizeof(struct fstrans_lwp_info), 0, 0, 0, 85 "fstrans", NULL, IPL_NONE); 86 87 static void fstrans_lwp_dtor(void *); 88 static void fstrans_mount_dtor(void *); 89 static void fscow_mount_dtor(void *); 90 static struct fstrans_mount_info *fstrans_mount_init(struct mount *); 91 92 /* 93 * Initialize 94 */ 95 void 96 fstrans_init(void) 97 { 98 int error; 99 100 error = lwp_specific_key_create(&lwp_data_key, fstrans_lwp_dtor); 101 KASSERT(error == 0); 102 error = mount_specific_key_create(&mount_data_key, fstrans_mount_dtor); 103 KASSERT(error == 0); 104 error = mount_specific_key_create(&mount_cow_key, fscow_mount_dtor); 105 KASSERT(error == 0); 106 107 mutex_init(&vfs_suspend_lock, MUTEX_DEFAULT, IPL_NONE); 108 mutex_init(&fstrans_init_lock, MUTEX_DEFAULT, IPL_NONE); 109 } 110 111 /* 112 * Deallocate lwp state 113 */ 114 static void 115 fstrans_lwp_dtor(void *arg) 116 { 117 struct fstrans_lwp_info *fli, *fli_next; 118 119 for (fli = arg; fli; fli = fli_next) { 120 KASSERT(fli->fli_mount == NULL); 121 KASSERT(fli->fli_count == 0); 122 fli_next = fli->fli_succ; 123 pool_put(&fstrans_pl, fli); 124 } 125 } 126 127 /* 128 * Deallocate mount state 129 */ 130 static void 131 fstrans_mount_dtor(void *arg) 132 { 133 struct fstrans_mount_info *fmi = arg; 134 135 KASSERT(fmi->fmi_state == FSTRANS_NORMAL); 136 rw_destroy(&fmi->fmi_lazy_lock); 137 rw_destroy(&fmi->fmi_shared_lock); 138 free(fmi, M_MOUNT); 139 } 140 141 /* 142 * Create mount info for this mount 143 */ 144 static struct fstrans_mount_info * 145 fstrans_mount_init(struct mount *mp) 146 { 147 struct fstrans_mount_info *new; 148 149 mutex_enter(&fstrans_init_lock); 150 151 if ((new = mount_getspecific(mp, mount_data_key)) != NULL) { 152 mutex_exit(&fstrans_init_lock); 153 return new; 154 } 155 156 new = malloc(sizeof(*new), M_MOUNT, M_WAITOK); 157 new->fmi_state = FSTRANS_NORMAL; 158 rw_init(&new->fmi_lazy_lock); 159 rw_init(&new->fmi_shared_lock); 160 161 mount_setspecific(mp, mount_data_key, new); 162 mutex_exit(&fstrans_init_lock); 163 164 return new; 165 } 166 167 /* 168 * Start a transaction. If this thread already has a transaction on this 169 * file system increment the reference counter. 170 * A thread with an exclusive transaction lock may get a shared or lazy one. 171 * A thread with a shared or lazy transaction lock cannot upgrade to an 172 * exclusive one yet. 173 */ 174 int 175 _fstrans_start(struct mount *mp, enum fstrans_lock_type lock_type, int wait) 176 { 177 krwlock_t *lock_p; 178 krw_t lock_op; 179 struct fstrans_lwp_info *fli, *new_fli; 180 struct fstrans_mount_info *fmi; 181 182 ASSERT_SLEEPABLE(NULL, __func__); 183 184 if (mp == NULL || (mp->mnt_iflag & IMNT_HAS_TRANS) == 0) 185 return 0; 186 187 new_fli = NULL; 188 for (fli = lwp_getspecific(lwp_data_key); fli; fli = fli->fli_succ) { 189 if (fli->fli_mount == NULL && new_fli == NULL) 190 new_fli = fli; 191 if (fli->fli_mount == mp) { 192 KASSERT(fli->fli_count > 0); 193 if (fli->fli_lock_type != FSTRANS_EXCL && 194 lock_type == FSTRANS_EXCL) 195 panic("fstrans_start: cannot upgrade lock"); 196 fli->fli_count += 1; 197 return 0; 198 } 199 } 200 201 if (new_fli == NULL) { 202 new_fli = pool_get(&fstrans_pl, PR_WAITOK); 203 new_fli->fli_mount = NULL; 204 new_fli->fli_count = 0; 205 new_fli->fli_succ = lwp_getspecific(lwp_data_key); 206 lwp_setspecific(lwp_data_key, new_fli); 207 } 208 209 KASSERT(new_fli->fli_mount == NULL); 210 KASSERT(new_fli->fli_count == 0); 211 212 if ((fmi = mount_getspecific(mp, mount_data_key)) == NULL) 213 fmi = fstrans_mount_init(mp); 214 215 if (lock_type == FSTRANS_LAZY) 216 lock_p = &fmi->fmi_lazy_lock; 217 else 218 lock_p = &fmi->fmi_shared_lock; 219 lock_op = (lock_type == FSTRANS_EXCL ? RW_WRITER : RW_READER); 220 221 if (wait) 222 rw_enter(lock_p, lock_op); 223 else if (rw_tryenter(lock_p, lock_op) == 0) 224 return EBUSY; 225 226 new_fli->fli_mount = mp; 227 new_fli->fli_count = 1; 228 new_fli->fli_lock_type = lock_type; 229 230 return 0; 231 } 232 233 /* 234 * Finish a transaction. 235 */ 236 void 237 fstrans_done(struct mount *mp) 238 { 239 struct fstrans_lwp_info *fli; 240 struct fstrans_mount_info *fmi; 241 242 if (mp == NULL || (mp->mnt_iflag & IMNT_HAS_TRANS) == 0) 243 return; 244 245 for (fli = lwp_getspecific(lwp_data_key); fli; fli = fli->fli_succ) { 246 if (fli->fli_mount == mp) { 247 fli->fli_count -= 1; 248 if (fli->fli_count > 0) 249 return; 250 break; 251 } 252 } 253 254 KASSERT(fli != NULL); 255 KASSERT(fli->fli_mount == mp); 256 KASSERT(fli->fli_count == 0); 257 fli->fli_mount = NULL; 258 fmi = mount_getspecific(mp, mount_data_key); 259 KASSERT(fmi != NULL); 260 if (fli->fli_lock_type == FSTRANS_LAZY) 261 rw_exit(&fmi->fmi_lazy_lock); 262 else 263 rw_exit(&fmi->fmi_shared_lock); 264 } 265 266 /* 267 * Check if this thread has an exclusive lock. 268 */ 269 int 270 fstrans_is_owner(struct mount *mp) 271 { 272 struct fstrans_lwp_info *fli; 273 274 if (mp == NULL) 275 return 0; 276 if ((mp->mnt_iflag & IMNT_HAS_TRANS) == 0) 277 return 0; 278 279 for (fli = lwp_getspecific(lwp_data_key); fli; fli = fli->fli_succ) 280 if (fli->fli_mount == mp) 281 break; 282 283 if (fli == NULL) 284 return 0; 285 286 KASSERT(fli->fli_mount == mp); 287 KASSERT(fli->fli_count > 0); 288 return (fli->fli_lock_type == FSTRANS_EXCL); 289 } 290 291 /* 292 * Set new file system state. 293 */ 294 int 295 fstrans_setstate(struct mount *mp, enum fstrans_state new_state) 296 { 297 struct fstrans_mount_info *fmi; 298 299 if ((fmi = mount_getspecific(mp, mount_data_key)) == NULL) 300 fmi = fstrans_mount_init(mp); 301 302 switch (new_state) { 303 case FSTRANS_SUSPENDING: 304 KASSERT(fmi->fmi_state == FSTRANS_NORMAL); 305 fstrans_start(mp, FSTRANS_EXCL); 306 fmi->fmi_state = FSTRANS_SUSPENDING; 307 break; 308 309 case FSTRANS_SUSPENDED: 310 KASSERT(fmi->fmi_state == FSTRANS_NORMAL || 311 fmi->fmi_state == FSTRANS_SUSPENDING); 312 KASSERT(fmi->fmi_state == FSTRANS_NORMAL || 313 fstrans_is_owner(mp)); 314 if (fmi->fmi_state == FSTRANS_NORMAL) 315 fstrans_start(mp, FSTRANS_EXCL); 316 rw_enter(&fmi->fmi_lazy_lock, RW_WRITER); 317 fmi->fmi_state = FSTRANS_SUSPENDED; 318 break; 319 320 case FSTRANS_NORMAL: 321 KASSERT(fmi->fmi_state == FSTRANS_NORMAL || 322 fstrans_is_owner(mp)); 323 if (fmi->fmi_state == FSTRANS_SUSPENDED) 324 rw_exit(&fmi->fmi_lazy_lock); 325 if (fmi->fmi_state == FSTRANS_SUSPENDING || 326 fmi->fmi_state == FSTRANS_SUSPENDED) { 327 fmi->fmi_state = FSTRANS_NORMAL; 328 fstrans_done(mp); 329 } 330 break; 331 332 default: 333 panic("%s: illegal state %d", __func__, new_state); 334 } 335 336 return 0; 337 } 338 339 /* 340 * Get current file system state 341 */ 342 enum fstrans_state 343 fstrans_getstate(struct mount *mp) 344 { 345 struct fstrans_mount_info *fmi; 346 347 if ((fmi = mount_getspecific(mp, mount_data_key)) == NULL) 348 return FSTRANS_NORMAL; 349 350 return fmi->fmi_state; 351 } 352 353 /* 354 * Request a filesystem to suspend all operations. 355 */ 356 int 357 vfs_suspend(struct mount *mp, int nowait) 358 { 359 int error; 360 361 if (nowait) { 362 if (!mutex_tryenter(&vfs_suspend_lock)) 363 return EWOULDBLOCK; 364 } else 365 mutex_enter(&vfs_suspend_lock); 366 367 mutex_enter(&syncer_mutex); 368 369 if ((error = VFS_SUSPENDCTL(mp, SUSPEND_SUSPEND)) != 0) { 370 mutex_exit(&syncer_mutex); 371 mutex_exit(&vfs_suspend_lock); 372 } 373 374 return error; 375 } 376 377 /* 378 * Request a filesystem to resume all operations. 379 */ 380 void 381 vfs_resume(struct mount *mp) 382 { 383 384 VFS_SUSPENDCTL(mp, SUSPEND_RESUME); 385 mutex_exit(&syncer_mutex); 386 mutex_exit(&vfs_suspend_lock); 387 } 388 389 #if defined(DDB) 390 void fstrans_dump(int); 391 392 static void 393 fstrans_print_lwp(struct proc *p, struct lwp *l, int verbose) 394 { 395 char prefix[9]; 396 struct fstrans_lwp_info *fli; 397 398 snprintf(prefix, sizeof(prefix), "%d.%d", p->p_pid, l->l_lid); 399 for (fli = _lwp_getspecific_by_lwp(l, lwp_data_key); 400 fli; 401 fli = fli->fli_succ) { 402 if (!verbose && fli->fli_count == 0) 403 continue; 404 printf("%-8s", prefix); 405 if (verbose) 406 printf(" @%p", fli); 407 if (fli->fli_mount != NULL) 408 printf(" (%s)", fli->fli_mount->mnt_stat.f_mntonname); 409 else 410 printf(" NULL"); 411 switch (fli->fli_lock_type) { 412 case FSTRANS_LAZY: 413 printf(" lazy"); 414 break; 415 case FSTRANS_SHARED: 416 printf(" shared"); 417 break; 418 case FSTRANS_EXCL: 419 printf(" excl"); 420 break; 421 default: 422 printf(" %#x", fli->fli_lock_type); 423 break; 424 } 425 printf(" %d\n", fli->fli_count); 426 prefix[0] = '\0'; 427 } 428 } 429 430 static void 431 fstrans_print_mount(struct mount *mp, int verbose) 432 { 433 struct fstrans_mount_info *fmi; 434 435 fmi = mount_getspecific(mp, mount_data_key); 436 if (!verbose && (fmi == NULL || fmi->fmi_state == FSTRANS_NORMAL)) 437 return; 438 439 printf("%-16s ", mp->mnt_stat.f_mntonname); 440 if (fmi == NULL) { 441 printf("(null)\n"); 442 return; 443 } 444 switch (fmi->fmi_state) { 445 case FSTRANS_NORMAL: 446 printf("state normal\n"); 447 break; 448 case FSTRANS_SUSPENDING: 449 printf("state suspending\n"); 450 break; 451 case FSTRANS_SUSPENDED: 452 printf("state suspended\n"); 453 break; 454 default: 455 printf("state %#x\n", fmi->fmi_state); 456 break; 457 } 458 printf("%16s r=%d w=%d\n", "lock_lazy:", 459 rw_read_held(&fmi->fmi_lazy_lock), 460 rw_write_held(&fmi->fmi_lazy_lock)); 461 printf("%16s r=%d w=%d\n", "lock_shared:", 462 rw_read_held(&fmi->fmi_shared_lock), 463 rw_write_held(&fmi->fmi_shared_lock)); 464 } 465 466 void 467 fstrans_dump(int full) 468 { 469 const struct proclist_desc *pd; 470 struct proc *p; 471 struct lwp *l; 472 struct mount *mp; 473 474 printf("Fstrans locks by lwp:\n"); 475 for (pd = proclists; pd->pd_list != NULL; pd++) 476 LIST_FOREACH(p, pd->pd_list, p_list) 477 LIST_FOREACH(l, &p->p_lwps, l_sibling) 478 fstrans_print_lwp(p, l, full == 1); 479 480 printf("Fstrans state by mount:\n"); 481 CIRCLEQ_FOREACH(mp, &mountlist, mnt_list) 482 fstrans_print_mount(mp, full == 1); 483 } 484 #endif /* defined(DDB) */ 485 486 487 struct fscow_handler { 488 SLIST_ENTRY(fscow_handler) ch_list; 489 int (*ch_func)(void *, struct buf *, bool); 490 void *ch_arg; 491 }; 492 493 struct fscow_mount_info { 494 krwlock_t cmi_lock; 495 SLIST_HEAD(, fscow_handler) cmi_handler; 496 }; 497 498 /* 499 * Deallocate mount state 500 */ 501 static void 502 fscow_mount_dtor(void *arg) 503 { 504 struct fscow_mount_info *cmi = arg; 505 506 KASSERT(SLIST_EMPTY(&cmi->cmi_handler)); 507 rw_destroy(&cmi->cmi_lock); 508 kmem_free(cmi, sizeof(*cmi)); 509 } 510 511 /* 512 * Create mount info for this mount 513 */ 514 static struct fscow_mount_info * 515 fscow_mount_init(struct mount *mp) 516 { 517 struct fscow_mount_info *new; 518 519 mutex_enter(&fstrans_init_lock); 520 521 if ((new = mount_getspecific(mp, mount_cow_key)) != NULL) { 522 mutex_exit(&fstrans_init_lock); 523 return new; 524 } 525 526 if ((new = kmem_alloc(sizeof(*new), KM_SLEEP)) != NULL) { 527 SLIST_INIT(&new->cmi_handler); 528 rw_init(&new->cmi_lock); 529 mount_setspecific(mp, mount_cow_key, new); 530 } 531 532 mutex_exit(&fstrans_init_lock); 533 534 return new; 535 } 536 537 int 538 fscow_establish(struct mount *mp, int (*func)(void *, struct buf *, bool), 539 void *arg) 540 { 541 struct fscow_mount_info *cmi; 542 struct fscow_handler *new; 543 544 if ((cmi = mount_getspecific(mp, mount_cow_key)) == NULL) 545 cmi = fscow_mount_init(mp); 546 if (cmi == NULL) 547 return ENOMEM; 548 549 if ((new = kmem_alloc(sizeof(*new), KM_SLEEP)) == NULL) 550 return ENOMEM; 551 new->ch_func = func; 552 new->ch_arg = arg; 553 rw_enter(&cmi->cmi_lock, RW_WRITER); 554 SLIST_INSERT_HEAD(&cmi->cmi_handler, new, ch_list); 555 rw_exit(&cmi->cmi_lock); 556 557 return 0; 558 } 559 560 int 561 fscow_disestablish(struct mount *mp, int (*func)(void *, struct buf *, bool), 562 void *arg) 563 { 564 struct fscow_mount_info *cmi; 565 struct fscow_handler *hp = NULL; 566 567 if ((cmi = mount_getspecific(mp, mount_cow_key)) == NULL) 568 return EINVAL; 569 570 rw_enter(&cmi->cmi_lock, RW_WRITER); 571 SLIST_FOREACH(hp, &cmi->cmi_handler, ch_list) 572 if (hp->ch_func == func && hp->ch_arg == arg) 573 break; 574 if (hp != NULL) { 575 SLIST_REMOVE(&cmi->cmi_handler, hp, fscow_handler, ch_list); 576 kmem_free(hp, sizeof(*hp)); 577 } 578 rw_exit(&cmi->cmi_lock); 579 580 return hp ? 0 : EINVAL; 581 } 582 583 int 584 fscow_run(struct buf *bp, bool data_valid) 585 { 586 int error = 0; 587 struct mount *mp; 588 struct fscow_mount_info *cmi; 589 struct fscow_handler *hp; 590 591 if ((bp->b_flags & B_COWDONE)) 592 goto done; 593 if (bp->b_vp == NULL) 594 goto done; 595 if (bp->b_vp->v_type == VBLK) 596 mp = bp->b_vp->v_specmountpoint; 597 else 598 mp = bp->b_vp->v_mount; 599 if (mp == NULL) 600 goto done; 601 602 if ((cmi = mount_getspecific(mp, mount_cow_key)) == NULL) 603 goto done; 604 605 rw_enter(&cmi->cmi_lock, RW_READER); 606 SLIST_FOREACH(hp, &cmi->cmi_handler, ch_list) 607 if ((error = (*hp->ch_func)(hp->ch_arg, bp, data_valid)) != 0) 608 break; 609 rw_exit(&cmi->cmi_lock); 610 611 done: 612 if (error == 0) 613 bp->b_flags |= B_COWDONE; 614 615 return error; 616 } 617