1 /* $NetBSD: vfs_trans.c,v 1.25 2009/05/12 11:42:12 yamt Exp $ */ 2 3 /*- 4 * Copyright (c) 2007 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Juergen Hannken-Illjes. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 #include <sys/cdefs.h> 33 __KERNEL_RCSID(0, "$NetBSD: vfs_trans.c,v 1.25 2009/05/12 11:42:12 yamt Exp $"); 34 35 /* 36 * File system transaction operations. 37 */ 38 39 #include "opt_ddb.h" 40 41 #if defined(DDB) 42 #define _LWP_API_PRIVATE /* Need _lwp_getspecific_by_lwp() */ 43 #endif 44 45 #include <sys/param.h> 46 #include <sys/systm.h> 47 #include <sys/buf.h> 48 #include <sys/kmem.h> 49 #include <sys/mount.h> 50 #include <sys/rwlock.h> 51 #include <sys/vnode.h> 52 #define _FSTRANS_API_PRIVATE 53 #include <sys/fstrans.h> 54 #include <sys/proc.h> 55 56 #include <miscfs/specfs/specdev.h> 57 #include <miscfs/syncfs/syncfs.h> 58 59 struct fscow_handler { 60 SLIST_ENTRY(fscow_handler) ch_list; 61 int (*ch_func)(void *, struct buf *, bool); 62 void *ch_arg; 63 }; 64 struct fstrans_lwp_info { 65 struct fstrans_lwp_info *fli_succ; 66 struct mount *fli_mount; 67 int fli_trans_cnt; 68 int fli_cow_cnt; 69 enum fstrans_lock_type fli_lock_type; 70 }; 71 struct fstrans_mount_info { 72 enum fstrans_state fmi_state; 73 krwlock_t fmi_shared_lock; 74 krwlock_t fmi_lazy_lock; 75 krwlock_t fmi_cow_lock; 76 SLIST_HEAD(, fscow_handler) fmi_cow_handler; 77 }; 78 79 static specificdata_key_t lwp_data_key; 80 static kmutex_t vfs_suspend_lock; /* Serialize suspensions. */ 81 static pool_cache_t fstrans_cache; 82 83 static void fstrans_lwp_dtor(void *); 84 static struct fstrans_lwp_info *fstrans_get_lwp_info(struct mount *); 85 86 /* 87 * Initialize 88 */ 89 void 90 fstrans_init(void) 91 { 92 int error; 93 94 error = lwp_specific_key_create(&lwp_data_key, fstrans_lwp_dtor); 95 KASSERT(error == 0); 96 97 mutex_init(&vfs_suspend_lock, MUTEX_DEFAULT, IPL_NONE); 98 fstrans_cache = pool_cache_init(sizeof(struct fstrans_lwp_info), 0, 0, 99 0, "fstrans", NULL, IPL_NONE, NULL, NULL, NULL); 100 } 101 102 /* 103 * Deallocate lwp state 104 */ 105 static void 106 fstrans_lwp_dtor(void *arg) 107 { 108 struct fstrans_lwp_info *fli, *fli_next; 109 110 for (fli = arg; fli; fli = fli_next) { 111 KASSERT(fli->fli_trans_cnt == 0); 112 KASSERT(fli->fli_cow_cnt == 0); 113 fli_next = fli->fli_succ; 114 pool_cache_put(fstrans_cache, fli); 115 } 116 } 117 118 /* 119 * Allocate mount state 120 */ 121 int 122 fstrans_mount(struct mount *mp) 123 { 124 struct fstrans_mount_info *new; 125 126 if ((new = kmem_alloc(sizeof(*new), KM_SLEEP)) == NULL) 127 return ENOMEM; 128 new->fmi_state = FSTRANS_NORMAL; 129 rw_init(&new->fmi_lazy_lock); 130 rw_init(&new->fmi_shared_lock); 131 SLIST_INIT(&new->fmi_cow_handler); 132 rw_init(&new->fmi_cow_lock); 133 134 mp->mnt_transinfo = new; 135 mp->mnt_iflag |= IMNT_HAS_TRANS; 136 137 return 0; 138 } 139 140 /* 141 * Deallocate mount state 142 */ 143 void 144 fstrans_unmount(struct mount *mp) 145 { 146 struct fstrans_mount_info *fmi; 147 struct fscow_handler *hp; 148 149 if ((fmi = mp->mnt_transinfo) == NULL) 150 return; 151 152 KASSERT(fmi->fmi_state == FSTRANS_NORMAL); 153 rw_destroy(&fmi->fmi_lazy_lock); 154 rw_destroy(&fmi->fmi_shared_lock); 155 rw_enter(&fmi->fmi_cow_lock, RW_WRITER); 156 while ((hp = SLIST_FIRST(&fmi->fmi_cow_handler)) != NULL) { 157 SLIST_REMOVE(&fmi->fmi_cow_handler, hp, fscow_handler, ch_list); 158 kmem_free(hp, sizeof(*hp)); 159 } 160 rw_exit(&fmi->fmi_cow_lock); 161 rw_destroy(&fmi->fmi_cow_lock); 162 kmem_free(fmi, sizeof(*fmi)); 163 mp->mnt_iflag &= ~IMNT_HAS_TRANS; 164 mp->mnt_transinfo = NULL; 165 } 166 167 /* 168 * Retrieve the per lwp info for this mount 169 */ 170 static struct fstrans_lwp_info * 171 fstrans_get_lwp_info(struct mount *mp) 172 { 173 struct fstrans_lwp_info *fli, *new_fli; 174 175 new_fli = NULL; 176 for (fli = lwp_getspecific(lwp_data_key); fli; fli = fli->fli_succ) { 177 if (fli->fli_mount == mp) 178 return fli; 179 else if (fli->fli_trans_cnt == 0 && fli->fli_cow_cnt == 0 && 180 new_fli == NULL) 181 new_fli = fli; 182 } 183 184 if (new_fli == NULL) { 185 new_fli = pool_cache_get(fstrans_cache, PR_WAITOK); 186 new_fli->fli_trans_cnt = 0; 187 new_fli->fli_cow_cnt = 0; 188 new_fli->fli_succ = lwp_getspecific(lwp_data_key); 189 lwp_setspecific(lwp_data_key, new_fli); 190 } 191 192 KASSERT(new_fli->fli_trans_cnt == 0); 193 KASSERT(new_fli->fli_cow_cnt == 0); 194 195 new_fli->fli_mount = mp; 196 197 return new_fli; 198 } 199 200 /* 201 * Start a transaction. If this thread already has a transaction on this 202 * file system increment the reference counter. 203 * A thread with an exclusive transaction lock may get a shared or lazy one. 204 * A thread with a shared or lazy transaction lock cannot upgrade to an 205 * exclusive one yet. 206 */ 207 int 208 _fstrans_start(struct mount *mp, enum fstrans_lock_type lock_type, int wait) 209 { 210 krwlock_t *lock_p; 211 krw_t lock_op; 212 struct fstrans_lwp_info *fli; 213 struct fstrans_mount_info *fmi; 214 215 ASSERT_SLEEPABLE(); 216 217 if (mp == NULL || (mp->mnt_iflag & IMNT_HAS_TRANS) == 0) 218 return 0; 219 220 fli = fstrans_get_lwp_info(mp); 221 222 if (fli->fli_trans_cnt > 0) { 223 if (fli->fli_lock_type != FSTRANS_EXCL && 224 lock_type == FSTRANS_EXCL) 225 panic("fstrans_start: cannot upgrade lock"); 226 fli->fli_trans_cnt += 1; 227 return 0; 228 } 229 230 fmi = mp->mnt_transinfo; 231 232 if (lock_type == FSTRANS_LAZY) 233 lock_p = &fmi->fmi_lazy_lock; 234 else 235 lock_p = &fmi->fmi_shared_lock; 236 lock_op = (lock_type == FSTRANS_EXCL ? RW_WRITER : RW_READER); 237 238 if (wait) 239 rw_enter(lock_p, lock_op); 240 else if (rw_tryenter(lock_p, lock_op) == 0) 241 return EBUSY; 242 243 fli->fli_trans_cnt = 1; 244 fli->fli_lock_type = lock_type; 245 246 return 0; 247 } 248 249 /* 250 * Finish a transaction. 251 */ 252 void 253 fstrans_done(struct mount *mp) 254 { 255 struct fstrans_lwp_info *fli; 256 struct fstrans_mount_info *fmi; 257 258 if (mp == NULL || (mp->mnt_iflag & IMNT_HAS_TRANS) == 0) 259 return; 260 261 for (fli = lwp_getspecific(lwp_data_key); fli; fli = fli->fli_succ) { 262 if (fli->fli_mount == mp) { 263 fli->fli_trans_cnt -= 1; 264 if (fli->fli_trans_cnt > 0) 265 return; 266 break; 267 } 268 } 269 270 KASSERT(fli != NULL); 271 KASSERT(fli->fli_mount == mp); 272 KASSERT(fli->fli_trans_cnt == 0); 273 274 fmi = mp->mnt_transinfo; 275 KASSERT(fmi != NULL); 276 if (fli->fli_lock_type == FSTRANS_LAZY) 277 rw_exit(&fmi->fmi_lazy_lock); 278 else 279 rw_exit(&fmi->fmi_shared_lock); 280 } 281 282 /* 283 * Check if this thread has an exclusive lock. 284 */ 285 int 286 fstrans_is_owner(struct mount *mp) 287 { 288 struct fstrans_lwp_info *fli; 289 290 if (mp == NULL) 291 return 0; 292 if ((mp->mnt_iflag & IMNT_HAS_TRANS) == 0) 293 return 0; 294 295 for (fli = lwp_getspecific(lwp_data_key); fli; fli = fli->fli_succ) 296 if (fli->fli_mount == mp) 297 break; 298 299 if (fli == NULL || fli->fli_trans_cnt == 0) 300 return 0; 301 302 KASSERT(fli->fli_mount == mp); 303 KASSERT(fli->fli_trans_cnt > 0); 304 return (fli->fli_lock_type == FSTRANS_EXCL); 305 } 306 307 /* 308 * Set new file system state. 309 */ 310 int 311 fstrans_setstate(struct mount *mp, enum fstrans_state new_state) 312 { 313 struct fstrans_mount_info *fmi; 314 315 fmi = mp->mnt_transinfo; 316 317 switch (new_state) { 318 case FSTRANS_SUSPENDING: 319 KASSERT(fmi->fmi_state == FSTRANS_NORMAL); 320 fstrans_start(mp, FSTRANS_EXCL); 321 fmi->fmi_state = FSTRANS_SUSPENDING; 322 break; 323 324 case FSTRANS_SUSPENDED: 325 KASSERT(fmi->fmi_state == FSTRANS_NORMAL || 326 fmi->fmi_state == FSTRANS_SUSPENDING); 327 KASSERT(fmi->fmi_state == FSTRANS_NORMAL || 328 fstrans_is_owner(mp)); 329 if (fmi->fmi_state == FSTRANS_NORMAL) 330 fstrans_start(mp, FSTRANS_EXCL); 331 rw_enter(&fmi->fmi_lazy_lock, RW_WRITER); 332 fmi->fmi_state = FSTRANS_SUSPENDED; 333 break; 334 335 case FSTRANS_NORMAL: 336 KASSERT(fmi->fmi_state == FSTRANS_NORMAL || 337 fstrans_is_owner(mp)); 338 if (fmi->fmi_state == FSTRANS_SUSPENDED) 339 rw_exit(&fmi->fmi_lazy_lock); 340 if (fmi->fmi_state == FSTRANS_SUSPENDING || 341 fmi->fmi_state == FSTRANS_SUSPENDED) { 342 fmi->fmi_state = FSTRANS_NORMAL; 343 fstrans_done(mp); 344 } 345 break; 346 347 default: 348 panic("%s: illegal state %d", __func__, new_state); 349 } 350 351 return 0; 352 } 353 354 /* 355 * Get current file system state 356 */ 357 enum fstrans_state 358 fstrans_getstate(struct mount *mp) 359 { 360 struct fstrans_mount_info *fmi; 361 362 fmi = mp->mnt_transinfo; 363 364 return fmi->fmi_state; 365 } 366 367 /* 368 * Request a filesystem to suspend all operations. 369 */ 370 int 371 vfs_suspend(struct mount *mp, int nowait) 372 { 373 int error; 374 375 if (nowait) { 376 if (!mutex_tryenter(&vfs_suspend_lock)) 377 return EWOULDBLOCK; 378 } else 379 mutex_enter(&vfs_suspend_lock); 380 381 mutex_enter(&syncer_mutex); 382 383 if ((error = VFS_SUSPENDCTL(mp, SUSPEND_SUSPEND)) != 0) { 384 mutex_exit(&syncer_mutex); 385 mutex_exit(&vfs_suspend_lock); 386 } 387 388 return error; 389 } 390 391 /* 392 * Request a filesystem to resume all operations. 393 */ 394 void 395 vfs_resume(struct mount *mp) 396 { 397 398 VFS_SUSPENDCTL(mp, SUSPEND_RESUME); 399 mutex_exit(&syncer_mutex); 400 mutex_exit(&vfs_suspend_lock); 401 } 402 403 #if defined(DDB) 404 void fstrans_dump(int); 405 406 static void 407 fstrans_print_lwp(struct proc *p, struct lwp *l, int verbose) 408 { 409 char prefix[9]; 410 struct fstrans_lwp_info *fli; 411 412 snprintf(prefix, sizeof(prefix), "%d.%d", p->p_pid, l->l_lid); 413 for (fli = _lwp_getspecific_by_lwp(l, lwp_data_key); 414 fli; 415 fli = fli->fli_succ) { 416 if (!verbose && fli->fli_trans_cnt == 0) 417 continue; 418 printf("%-8s", prefix); 419 if (verbose) 420 printf(" @%p", fli); 421 if (fli->fli_mount != NULL) 422 printf(" (%s)", fli->fli_mount->mnt_stat.f_mntonname); 423 else 424 printf(" NULL"); 425 switch (fli->fli_lock_type) { 426 case FSTRANS_LAZY: 427 printf(" lazy"); 428 break; 429 case FSTRANS_SHARED: 430 printf(" shared"); 431 break; 432 case FSTRANS_EXCL: 433 printf(" excl"); 434 break; 435 default: 436 printf(" %#x", fli->fli_lock_type); 437 break; 438 } 439 printf(" %d\n", fli->fli_trans_cnt); 440 prefix[0] = '\0'; 441 } 442 } 443 444 static void 445 fstrans_print_mount(struct mount *mp, int verbose) 446 { 447 struct fstrans_mount_info *fmi; 448 449 fmi = mp->mnt_transinfo; 450 if (!verbose && (fmi == NULL || fmi->fmi_state == FSTRANS_NORMAL)) 451 return; 452 453 printf("%-16s ", mp->mnt_stat.f_mntonname); 454 if (fmi == NULL) { 455 printf("(null)\n"); 456 return; 457 } 458 switch (fmi->fmi_state) { 459 case FSTRANS_NORMAL: 460 printf("state normal\n"); 461 break; 462 case FSTRANS_SUSPENDING: 463 printf("state suspending\n"); 464 break; 465 case FSTRANS_SUSPENDED: 466 printf("state suspended\n"); 467 break; 468 default: 469 printf("state %#x\n", fmi->fmi_state); 470 break; 471 } 472 printf("%16s r=%d w=%d\n", "lock_lazy:", 473 rw_read_held(&fmi->fmi_lazy_lock), 474 rw_write_held(&fmi->fmi_lazy_lock)); 475 printf("%16s r=%d w=%d\n", "lock_shared:", 476 rw_read_held(&fmi->fmi_shared_lock), 477 rw_write_held(&fmi->fmi_shared_lock)); 478 } 479 480 void 481 fstrans_dump(int full) 482 { 483 const struct proclist_desc *pd; 484 struct proc *p; 485 struct lwp *l; 486 struct mount *mp; 487 488 printf("Fstrans locks by lwp:\n"); 489 for (pd = proclists; pd->pd_list != NULL; pd++) 490 PROCLIST_FOREACH(p, pd->pd_list) 491 LIST_FOREACH(l, &p->p_lwps, l_sibling) 492 fstrans_print_lwp(p, l, full == 1); 493 494 printf("Fstrans state by mount:\n"); 495 CIRCLEQ_FOREACH(mp, &mountlist, mnt_list) 496 fstrans_print_mount(mp, full == 1); 497 } 498 #endif /* defined(DDB) */ 499 500 int 501 fscow_establish(struct mount *mp, int (*func)(void *, struct buf *, bool), 502 void *arg) 503 { 504 struct fstrans_mount_info *fmi; 505 struct fscow_handler *new; 506 507 if ((mp->mnt_iflag & IMNT_HAS_TRANS) == 0) 508 return EINVAL; 509 510 fmi = mp->mnt_transinfo; 511 512 if ((new = kmem_alloc(sizeof(*new), KM_SLEEP)) == NULL) 513 return ENOMEM; 514 new->ch_func = func; 515 new->ch_arg = arg; 516 rw_enter(&fmi->fmi_cow_lock, RW_WRITER); 517 SLIST_INSERT_HEAD(&fmi->fmi_cow_handler, new, ch_list); 518 rw_exit(&fmi->fmi_cow_lock); 519 520 return 0; 521 } 522 523 int 524 fscow_disestablish(struct mount *mp, int (*func)(void *, struct buf *, bool), 525 void *arg) 526 { 527 struct fstrans_mount_info *fmi; 528 struct fscow_handler *hp = NULL; 529 530 if ((mp->mnt_iflag & IMNT_HAS_TRANS) == 0) 531 return EINVAL; 532 533 fmi = mp->mnt_transinfo; 534 535 rw_enter(&fmi->fmi_cow_lock, RW_WRITER); 536 SLIST_FOREACH(hp, &fmi->fmi_cow_handler, ch_list) 537 if (hp->ch_func == func && hp->ch_arg == arg) 538 break; 539 if (hp != NULL) { 540 SLIST_REMOVE(&fmi->fmi_cow_handler, hp, fscow_handler, ch_list); 541 kmem_free(hp, sizeof(*hp)); 542 } 543 rw_exit(&fmi->fmi_cow_lock); 544 545 return hp ? 0 : EINVAL; 546 } 547 548 int 549 fscow_run(struct buf *bp, bool data_valid) 550 { 551 int error = 0; 552 struct mount *mp; 553 struct fstrans_lwp_info *fli; 554 struct fstrans_mount_info *fmi; 555 struct fscow_handler *hp; 556 557 if ((bp->b_flags & B_COWDONE)) 558 goto done; 559 if (bp->b_vp == NULL) 560 goto done; 561 if (bp->b_vp->v_type == VBLK) 562 mp = bp->b_vp->v_specmountpoint; 563 else 564 mp = bp->b_vp->v_mount; 565 if (mp == NULL || (mp->mnt_iflag & IMNT_HAS_TRANS) == 0) 566 goto done; 567 568 fli = fstrans_get_lwp_info(mp); 569 fmi = mp->mnt_transinfo; 570 571 if (fli->fli_cow_cnt++ == 0) 572 rw_enter(&fmi->fmi_cow_lock, RW_READER); 573 574 SLIST_FOREACH(hp, &fmi->fmi_cow_handler, ch_list) 575 if ((error = (*hp->ch_func)(hp->ch_arg, bp, data_valid)) != 0) 576 break; 577 578 if (--fli->fli_cow_cnt == 0) 579 rw_exit(&fmi->fmi_cow_lock); 580 581 done: 582 if (error == 0) 583 bp->b_flags |= B_COWDONE; 584 585 return error; 586 } 587