1 /* $NetBSD: vfs_trans.c,v 1.21 2008/05/16 14:08:56 hannken Exp $ */ 2 3 /*- 4 * Copyright (c) 2007 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Juergen Hannken-Illjes. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 #include <sys/cdefs.h> 33 __KERNEL_RCSID(0, "$NetBSD: vfs_trans.c,v 1.21 2008/05/16 14:08:56 hannken Exp $"); 34 35 /* 36 * File system transaction operations. 37 */ 38 39 #include "opt_ddb.h" 40 41 #if defined(DDB) 42 #define _LWP_API_PRIVATE /* Need _lwp_getspecific_by_lwp() */ 43 #endif 44 45 #include <sys/param.h> 46 #include <sys/systm.h> 47 #include <sys/kmem.h> 48 #include <sys/mount.h> 49 #include <sys/rwlock.h> 50 #include <sys/vnode.h> 51 #define _FSTRANS_API_PRIVATE 52 #include <sys/fstrans.h> 53 #include <sys/proc.h> 54 55 #include <miscfs/specfs/specdev.h> 56 #include <miscfs/syncfs/syncfs.h> 57 58 struct fscow_handler { 59 SLIST_ENTRY(fscow_handler) ch_list; 60 int (*ch_func)(void *, struct buf *, bool); 61 void *ch_arg; 62 }; 63 struct fstrans_lwp_info { 64 struct fstrans_lwp_info *fli_succ; 65 struct mount *fli_mount; 66 int fli_trans_cnt; 67 int fli_cow_cnt; 68 enum fstrans_lock_type fli_lock_type; 69 }; 70 struct fstrans_mount_info { 71 enum fstrans_state fmi_state; 72 krwlock_t fmi_shared_lock; 73 krwlock_t fmi_lazy_lock; 74 krwlock_t fmi_cow_lock; 75 SLIST_HEAD(, fscow_handler) fmi_cow_handler; 76 }; 77 78 static specificdata_key_t lwp_data_key; 79 static kmutex_t vfs_suspend_lock; /* Serialize suspensions. */ 80 81 POOL_INIT(fstrans_pl, sizeof(struct fstrans_lwp_info), 0, 0, 0, 82 "fstrans", NULL, IPL_NONE); 83 84 static void fstrans_lwp_dtor(void *); 85 static struct fstrans_lwp_info *fstrans_get_lwp_info(struct mount *); 86 87 /* 88 * Initialize 89 */ 90 void 91 fstrans_init(void) 92 { 93 int error; 94 95 error = lwp_specific_key_create(&lwp_data_key, fstrans_lwp_dtor); 96 KASSERT(error == 0); 97 98 mutex_init(&vfs_suspend_lock, MUTEX_DEFAULT, IPL_NONE); 99 } 100 101 /* 102 * Deallocate lwp state 103 */ 104 static void 105 fstrans_lwp_dtor(void *arg) 106 { 107 struct fstrans_lwp_info *fli, *fli_next; 108 109 for (fli = arg; fli; fli = fli_next) { 110 KASSERT(fli->fli_trans_cnt == 0); 111 KASSERT(fli->fli_cow_cnt == 0); 112 fli_next = fli->fli_succ; 113 pool_put(&fstrans_pl, fli); 114 } 115 } 116 117 /* 118 * Allocate mount state 119 */ 120 int 121 fstrans_mount(struct mount *mp) 122 { 123 struct fstrans_mount_info *new; 124 125 if ((new = kmem_alloc(sizeof(*new), KM_SLEEP)) == NULL) 126 return ENOMEM; 127 new->fmi_state = FSTRANS_NORMAL; 128 rw_init(&new->fmi_lazy_lock); 129 rw_init(&new->fmi_shared_lock); 130 SLIST_INIT(&new->fmi_cow_handler); 131 rw_init(&new->fmi_cow_lock); 132 133 mp->mnt_transinfo = new; 134 mp->mnt_iflag |= IMNT_HAS_TRANS; 135 136 return 0; 137 } 138 139 /* 140 * Deallocate mount state 141 */ 142 void 143 fstrans_unmount(struct mount *mp) 144 { 145 struct fstrans_mount_info *fmi; 146 147 if ((fmi = mp->mnt_transinfo) == NULL) 148 return; 149 150 KASSERT(fmi->fmi_state == FSTRANS_NORMAL); 151 rw_destroy(&fmi->fmi_lazy_lock); 152 rw_destroy(&fmi->fmi_shared_lock); 153 KASSERT(SLIST_EMPTY(&fmi->fmi_cow_handler)); 154 rw_destroy(&fmi->fmi_cow_lock); 155 kmem_free(fmi, sizeof(*fmi)); 156 mp->mnt_iflag &= ~IMNT_HAS_TRANS; 157 mp->mnt_transinfo = NULL; 158 } 159 160 /* 161 * Retrieve the per lwp info for this mount 162 */ 163 static struct fstrans_lwp_info * 164 fstrans_get_lwp_info(struct mount *mp) 165 { 166 struct fstrans_lwp_info *fli, *new_fli; 167 168 new_fli = NULL; 169 for (fli = lwp_getspecific(lwp_data_key); fli; fli = fli->fli_succ) { 170 if (fli->fli_mount == mp) 171 return fli; 172 else if (fli->fli_trans_cnt == 0 && fli->fli_cow_cnt == 0 && 173 new_fli == NULL) 174 new_fli = fli; 175 } 176 177 if (new_fli == NULL) { 178 new_fli = pool_get(&fstrans_pl, PR_WAITOK); 179 new_fli->fli_trans_cnt = 0; 180 new_fli->fli_cow_cnt = 0; 181 new_fli->fli_succ = lwp_getspecific(lwp_data_key); 182 lwp_setspecific(lwp_data_key, new_fli); 183 } 184 185 KASSERT(new_fli->fli_trans_cnt == 0); 186 KASSERT(new_fli->fli_cow_cnt == 0); 187 188 new_fli->fli_mount = mp; 189 190 return new_fli; 191 } 192 193 /* 194 * Start a transaction. If this thread already has a transaction on this 195 * file system increment the reference counter. 196 * A thread with an exclusive transaction lock may get a shared or lazy one. 197 * A thread with a shared or lazy transaction lock cannot upgrade to an 198 * exclusive one yet. 199 */ 200 int 201 _fstrans_start(struct mount *mp, enum fstrans_lock_type lock_type, int wait) 202 { 203 krwlock_t *lock_p; 204 krw_t lock_op; 205 struct fstrans_lwp_info *fli; 206 struct fstrans_mount_info *fmi; 207 208 ASSERT_SLEEPABLE(); 209 210 if (mp == NULL || (mp->mnt_iflag & IMNT_HAS_TRANS) == 0) 211 return 0; 212 213 fli = fstrans_get_lwp_info(mp); 214 215 if (fli->fli_trans_cnt > 0) { 216 if (fli->fli_lock_type != FSTRANS_EXCL && 217 lock_type == FSTRANS_EXCL) 218 panic("fstrans_start: cannot upgrade lock"); 219 fli->fli_trans_cnt += 1; 220 return 0; 221 } 222 223 fmi = mp->mnt_transinfo; 224 225 if (lock_type == FSTRANS_LAZY) 226 lock_p = &fmi->fmi_lazy_lock; 227 else 228 lock_p = &fmi->fmi_shared_lock; 229 lock_op = (lock_type == FSTRANS_EXCL ? RW_WRITER : RW_READER); 230 231 if (wait) 232 rw_enter(lock_p, lock_op); 233 else if (rw_tryenter(lock_p, lock_op) == 0) 234 return EBUSY; 235 236 fli->fli_trans_cnt = 1; 237 fli->fli_lock_type = lock_type; 238 239 return 0; 240 } 241 242 /* 243 * Finish a transaction. 244 */ 245 void 246 fstrans_done(struct mount *mp) 247 { 248 struct fstrans_lwp_info *fli; 249 struct fstrans_mount_info *fmi; 250 251 if (mp == NULL || (mp->mnt_iflag & IMNT_HAS_TRANS) == 0) 252 return; 253 254 for (fli = lwp_getspecific(lwp_data_key); fli; fli = fli->fli_succ) { 255 if (fli->fli_mount == mp) { 256 fli->fli_trans_cnt -= 1; 257 if (fli->fli_trans_cnt > 0) 258 return; 259 break; 260 } 261 } 262 263 KASSERT(fli != NULL); 264 KASSERT(fli->fli_mount == mp); 265 KASSERT(fli->fli_trans_cnt == 0); 266 267 fmi = mp->mnt_transinfo; 268 KASSERT(fmi != NULL); 269 if (fli->fli_lock_type == FSTRANS_LAZY) 270 rw_exit(&fmi->fmi_lazy_lock); 271 else 272 rw_exit(&fmi->fmi_shared_lock); 273 } 274 275 /* 276 * Check if this thread has an exclusive lock. 277 */ 278 int 279 fstrans_is_owner(struct mount *mp) 280 { 281 struct fstrans_lwp_info *fli; 282 283 if (mp == NULL) 284 return 0; 285 if ((mp->mnt_iflag & IMNT_HAS_TRANS) == 0) 286 return 0; 287 288 for (fli = lwp_getspecific(lwp_data_key); fli; fli = fli->fli_succ) 289 if (fli->fli_mount == mp) 290 break; 291 292 if (fli == NULL || fli->fli_trans_cnt == 0) 293 return 0; 294 295 KASSERT(fli->fli_mount == mp); 296 KASSERT(fli->fli_trans_cnt > 0); 297 return (fli->fli_lock_type == FSTRANS_EXCL); 298 } 299 300 /* 301 * Set new file system state. 302 */ 303 int 304 fstrans_setstate(struct mount *mp, enum fstrans_state new_state) 305 { 306 struct fstrans_mount_info *fmi; 307 308 fmi = mp->mnt_transinfo; 309 310 switch (new_state) { 311 case FSTRANS_SUSPENDING: 312 KASSERT(fmi->fmi_state == FSTRANS_NORMAL); 313 fstrans_start(mp, FSTRANS_EXCL); 314 fmi->fmi_state = FSTRANS_SUSPENDING; 315 break; 316 317 case FSTRANS_SUSPENDED: 318 KASSERT(fmi->fmi_state == FSTRANS_NORMAL || 319 fmi->fmi_state == FSTRANS_SUSPENDING); 320 KASSERT(fmi->fmi_state == FSTRANS_NORMAL || 321 fstrans_is_owner(mp)); 322 if (fmi->fmi_state == FSTRANS_NORMAL) 323 fstrans_start(mp, FSTRANS_EXCL); 324 rw_enter(&fmi->fmi_lazy_lock, RW_WRITER); 325 fmi->fmi_state = FSTRANS_SUSPENDED; 326 break; 327 328 case FSTRANS_NORMAL: 329 KASSERT(fmi->fmi_state == FSTRANS_NORMAL || 330 fstrans_is_owner(mp)); 331 if (fmi->fmi_state == FSTRANS_SUSPENDED) 332 rw_exit(&fmi->fmi_lazy_lock); 333 if (fmi->fmi_state == FSTRANS_SUSPENDING || 334 fmi->fmi_state == FSTRANS_SUSPENDED) { 335 fmi->fmi_state = FSTRANS_NORMAL; 336 fstrans_done(mp); 337 } 338 break; 339 340 default: 341 panic("%s: illegal state %d", __func__, new_state); 342 } 343 344 return 0; 345 } 346 347 /* 348 * Get current file system state 349 */ 350 enum fstrans_state 351 fstrans_getstate(struct mount *mp) 352 { 353 struct fstrans_mount_info *fmi; 354 355 fmi = mp->mnt_transinfo; 356 357 return fmi->fmi_state; 358 } 359 360 /* 361 * Request a filesystem to suspend all operations. 362 */ 363 int 364 vfs_suspend(struct mount *mp, int nowait) 365 { 366 int error; 367 368 if (nowait) { 369 if (!mutex_tryenter(&vfs_suspend_lock)) 370 return EWOULDBLOCK; 371 } else 372 mutex_enter(&vfs_suspend_lock); 373 374 mutex_enter(&syncer_mutex); 375 376 if ((error = VFS_SUSPENDCTL(mp, SUSPEND_SUSPEND)) != 0) { 377 mutex_exit(&syncer_mutex); 378 mutex_exit(&vfs_suspend_lock); 379 } 380 381 return error; 382 } 383 384 /* 385 * Request a filesystem to resume all operations. 386 */ 387 void 388 vfs_resume(struct mount *mp) 389 { 390 391 VFS_SUSPENDCTL(mp, SUSPEND_RESUME); 392 mutex_exit(&syncer_mutex); 393 mutex_exit(&vfs_suspend_lock); 394 } 395 396 #if defined(DDB) 397 void fstrans_dump(int); 398 399 static void 400 fstrans_print_lwp(struct proc *p, struct lwp *l, int verbose) 401 { 402 char prefix[9]; 403 struct fstrans_lwp_info *fli; 404 405 snprintf(prefix, sizeof(prefix), "%d.%d", p->p_pid, l->l_lid); 406 for (fli = _lwp_getspecific_by_lwp(l, lwp_data_key); 407 fli; 408 fli = fli->fli_succ) { 409 if (!verbose && fli->fli_trans_cnt == 0) 410 continue; 411 printf("%-8s", prefix); 412 if (verbose) 413 printf(" @%p", fli); 414 if (fli->fli_mount != NULL) 415 printf(" (%s)", fli->fli_mount->mnt_stat.f_mntonname); 416 else 417 printf(" NULL"); 418 switch (fli->fli_lock_type) { 419 case FSTRANS_LAZY: 420 printf(" lazy"); 421 break; 422 case FSTRANS_SHARED: 423 printf(" shared"); 424 break; 425 case FSTRANS_EXCL: 426 printf(" excl"); 427 break; 428 default: 429 printf(" %#x", fli->fli_lock_type); 430 break; 431 } 432 printf(" %d\n", fli->fli_trans_cnt); 433 prefix[0] = '\0'; 434 } 435 } 436 437 static void 438 fstrans_print_mount(struct mount *mp, int verbose) 439 { 440 struct fstrans_mount_info *fmi; 441 442 fmi = mp->mnt_transinfo; 443 if (!verbose && (fmi == NULL || fmi->fmi_state == FSTRANS_NORMAL)) 444 return; 445 446 printf("%-16s ", mp->mnt_stat.f_mntonname); 447 if (fmi == NULL) { 448 printf("(null)\n"); 449 return; 450 } 451 switch (fmi->fmi_state) { 452 case FSTRANS_NORMAL: 453 printf("state normal\n"); 454 break; 455 case FSTRANS_SUSPENDING: 456 printf("state suspending\n"); 457 break; 458 case FSTRANS_SUSPENDED: 459 printf("state suspended\n"); 460 break; 461 default: 462 printf("state %#x\n", fmi->fmi_state); 463 break; 464 } 465 printf("%16s r=%d w=%d\n", "lock_lazy:", 466 rw_read_held(&fmi->fmi_lazy_lock), 467 rw_write_held(&fmi->fmi_lazy_lock)); 468 printf("%16s r=%d w=%d\n", "lock_shared:", 469 rw_read_held(&fmi->fmi_shared_lock), 470 rw_write_held(&fmi->fmi_shared_lock)); 471 } 472 473 void 474 fstrans_dump(int full) 475 { 476 const struct proclist_desc *pd; 477 struct proc *p; 478 struct lwp *l; 479 struct mount *mp; 480 481 printf("Fstrans locks by lwp:\n"); 482 for (pd = proclists; pd->pd_list != NULL; pd++) 483 LIST_FOREACH(p, pd->pd_list, p_list) 484 LIST_FOREACH(l, &p->p_lwps, l_sibling) 485 fstrans_print_lwp(p, l, full == 1); 486 487 printf("Fstrans state by mount:\n"); 488 CIRCLEQ_FOREACH(mp, &mountlist, mnt_list) 489 fstrans_print_mount(mp, full == 1); 490 } 491 #endif /* defined(DDB) */ 492 493 int 494 fscow_establish(struct mount *mp, int (*func)(void *, struct buf *, bool), 495 void *arg) 496 { 497 struct fstrans_mount_info *fmi; 498 struct fscow_handler *new; 499 500 if ((mp->mnt_iflag & IMNT_HAS_TRANS) == 0) 501 return EINVAL; 502 503 fmi = mp->mnt_transinfo; 504 505 if ((new = kmem_alloc(sizeof(*new), KM_SLEEP)) == NULL) 506 return ENOMEM; 507 new->ch_func = func; 508 new->ch_arg = arg; 509 rw_enter(&fmi->fmi_cow_lock, RW_WRITER); 510 SLIST_INSERT_HEAD(&fmi->fmi_cow_handler, new, ch_list); 511 rw_exit(&fmi->fmi_cow_lock); 512 513 return 0; 514 } 515 516 int 517 fscow_disestablish(struct mount *mp, int (*func)(void *, struct buf *, bool), 518 void *arg) 519 { 520 struct fstrans_mount_info *fmi; 521 struct fscow_handler *hp = NULL; 522 523 if ((mp->mnt_iflag & IMNT_HAS_TRANS) == 0) 524 return EINVAL; 525 526 fmi = mp->mnt_transinfo; 527 528 rw_enter(&fmi->fmi_cow_lock, RW_WRITER); 529 SLIST_FOREACH(hp, &fmi->fmi_cow_handler, ch_list) 530 if (hp->ch_func == func && hp->ch_arg == arg) 531 break; 532 if (hp != NULL) { 533 SLIST_REMOVE(&fmi->fmi_cow_handler, hp, fscow_handler, ch_list); 534 kmem_free(hp, sizeof(*hp)); 535 } 536 rw_exit(&fmi->fmi_cow_lock); 537 538 return hp ? 0 : EINVAL; 539 } 540 541 int 542 fscow_run(struct buf *bp, bool data_valid) 543 { 544 int error = 0; 545 struct mount *mp; 546 struct fstrans_lwp_info *fli; 547 struct fstrans_mount_info *fmi; 548 struct fscow_handler *hp; 549 550 if ((bp->b_flags & B_COWDONE)) 551 goto done; 552 if (bp->b_vp == NULL) 553 goto done; 554 if (bp->b_vp->v_type == VBLK) 555 mp = bp->b_vp->v_specmountpoint; 556 else 557 mp = bp->b_vp->v_mount; 558 if (mp == NULL || (mp->mnt_iflag & IMNT_HAS_TRANS) == 0) 559 goto done; 560 561 fli = fstrans_get_lwp_info(mp); 562 fmi = mp->mnt_transinfo; 563 564 if (fli->fli_cow_cnt++ == 0) 565 rw_enter(&fmi->fmi_cow_lock, RW_READER); 566 567 SLIST_FOREACH(hp, &fmi->fmi_cow_handler, ch_list) 568 if ((error = (*hp->ch_func)(hp->ch_arg, bp, data_valid)) != 0) 569 break; 570 571 if (--fli->fli_cow_cnt == 0) 572 rw_exit(&fmi->fmi_cow_lock); 573 574 done: 575 if (error == 0) 576 bp->b_flags |= B_COWDONE; 577 578 return error; 579 } 580