1 /* $NetBSD: lfs_bio.c,v 1.118 2010/06/24 13:03:19 hannken Exp $ */ 2 3 /*- 4 * Copyright (c) 1999, 2000, 2001, 2002, 2003, 2008 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Konrad E. Schroder <perseant@hhhh.org>. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 /* 32 * Copyright (c) 1991, 1993 33 * The Regents of the University of California. All rights reserved. 34 * 35 * Redistribution and use in source and binary forms, with or without 36 * modification, are permitted provided that the following conditions 37 * are met: 38 * 1. Redistributions of source code must retain the above copyright 39 * notice, this list of conditions and the following disclaimer. 40 * 2. Redistributions in binary form must reproduce the above copyright 41 * notice, this list of conditions and the following disclaimer in the 42 * documentation and/or other materials provided with the distribution. 43 * 3. Neither the name of the University nor the names of its contributors 44 * may be used to endorse or promote products derived from this software 45 * without specific prior written permission. 46 * 47 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 48 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 49 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 50 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 51 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 52 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 53 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 54 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 55 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 56 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 57 * SUCH DAMAGE. 58 * 59 * @(#)lfs_bio.c 8.10 (Berkeley) 6/10/95 60 */ 61 62 #include <sys/cdefs.h> 63 __KERNEL_RCSID(0, "$NetBSD: lfs_bio.c,v 1.118 2010/06/24 13:03:19 hannken Exp $"); 64 65 #include <sys/param.h> 66 #include <sys/systm.h> 67 #include <sys/proc.h> 68 #include <sys/buf.h> 69 #include <sys/vnode.h> 70 #include <sys/resourcevar.h> 71 #include <sys/mount.h> 72 #include <sys/kernel.h> 73 #include <sys/kauth.h> 74 75 #include <ufs/ufs/inode.h> 76 #include <ufs/ufs/ufsmount.h> 77 #include <ufs/ufs/ufs_extern.h> 78 79 #include <ufs/lfs/lfs.h> 80 #include <ufs/lfs/lfs_extern.h> 81 82 #include <uvm/uvm.h> 83 84 /* 85 * LFS block write function. 86 * 87 * XXX 88 * No write cost accounting is done. 89 * This is almost certainly wrong for synchronous operations and NFS. 90 * 91 * protected by lfs_lock. 92 */ 93 int locked_queue_count = 0; /* Count of locked-down buffers. */ 94 long locked_queue_bytes = 0L; /* Total size of locked buffers. */ 95 int lfs_subsys_pages = 0L; /* Total number LFS-written pages */ 96 int lfs_fs_pagetrip = 0; /* # of pages to trip per-fs write */ 97 int lfs_writing = 0; /* Set if already kicked off a writer 98 because of buffer space */ 99 100 /* Lock and condition variables for above. */ 101 kcondvar_t locked_queue_cv; 102 kcondvar_t lfs_writing_cv; 103 kmutex_t lfs_lock; 104 105 extern int lfs_dostats; 106 107 /* 108 * reserved number/bytes of locked buffers 109 */ 110 int locked_queue_rcount = 0; 111 long locked_queue_rbytes = 0L; 112 113 static int lfs_fits_buf(struct lfs *, int, int); 114 static int lfs_reservebuf(struct lfs *, struct vnode *vp, struct vnode *vp2, 115 int, int); 116 static int lfs_reserveavail(struct lfs *, struct vnode *vp, struct vnode *vp2, 117 int); 118 119 static int 120 lfs_fits_buf(struct lfs *fs, int n, int bytes) 121 { 122 int count_fit, bytes_fit; 123 124 ASSERT_NO_SEGLOCK(fs); 125 KASSERT(mutex_owned(&lfs_lock)); 126 127 count_fit = 128 (locked_queue_count + locked_queue_rcount + n <= LFS_WAIT_BUFS); 129 bytes_fit = 130 (locked_queue_bytes + locked_queue_rbytes + bytes <= LFS_WAIT_BYTES); 131 132 #ifdef DEBUG 133 if (!count_fit) { 134 DLOG((DLOG_AVAIL, "lfs_fits_buf: no fit count: %d + %d + %d >= %d\n", 135 locked_queue_count, locked_queue_rcount, 136 n, LFS_WAIT_BUFS)); 137 } 138 if (!bytes_fit) { 139 DLOG((DLOG_AVAIL, "lfs_fits_buf: no fit bytes: %ld + %ld + %d >= %ld\n", 140 locked_queue_bytes, locked_queue_rbytes, 141 bytes, LFS_WAIT_BYTES)); 142 } 143 #endif /* DEBUG */ 144 145 return (count_fit && bytes_fit); 146 } 147 148 /* ARGSUSED */ 149 static int 150 lfs_reservebuf(struct lfs *fs, struct vnode *vp, 151 struct vnode *vp2, int n, int bytes) 152 { 153 ASSERT_MAYBE_SEGLOCK(fs); 154 KASSERT(locked_queue_rcount >= 0); 155 KASSERT(locked_queue_rbytes >= 0); 156 157 mutex_enter(&lfs_lock); 158 while (n > 0 && !lfs_fits_buf(fs, n, bytes)) { 159 int error; 160 161 lfs_flush(fs, 0, 0); 162 163 error = cv_timedwait_sig(&locked_queue_cv, &lfs_lock, 164 hz * LFS_BUFWAIT); 165 if (error && error != EWOULDBLOCK) { 166 mutex_exit(&lfs_lock); 167 return error; 168 } 169 } 170 171 locked_queue_rcount += n; 172 locked_queue_rbytes += bytes; 173 174 if (n < 0) 175 cv_broadcast(&locked_queue_cv); 176 177 mutex_exit(&lfs_lock); 178 179 KASSERT(locked_queue_rcount >= 0); 180 KASSERT(locked_queue_rbytes >= 0); 181 182 return 0; 183 } 184 185 /* 186 * Try to reserve some blocks, prior to performing a sensitive operation that 187 * requires the vnode lock to be honored. If there is not enough space, give 188 * up the vnode lock temporarily and wait for the space to become available. 189 * 190 * Called with vp locked. (Note nowever that if fsb < 0, vp is ignored.) 191 * 192 * XXX YAMT - it isn't safe to unlock vp here 193 * because the node might be modified while we sleep. 194 * (eg. cached states like i_offset might be stale, 195 * the vnode might be truncated, etc..) 196 * maybe we should have a way to restart the vnodeop (EVOPRESTART?) 197 * or rearrange vnodeop interface to leave vnode locking to file system 198 * specific code so that each file systems can have their own vnode locking and 199 * vnode re-using strategies. 200 */ 201 static int 202 lfs_reserveavail(struct lfs *fs, struct vnode *vp, 203 struct vnode *vp2, int fsb) 204 { 205 CLEANERINFO *cip; 206 struct buf *bp; 207 int error, slept; 208 209 ASSERT_MAYBE_SEGLOCK(fs); 210 slept = 0; 211 mutex_enter(&lfs_lock); 212 while (fsb > 0 && !lfs_fits(fs, fsb + fs->lfs_ravail + fs->lfs_favail)) { 213 mutex_exit(&lfs_lock); 214 #if 0 215 /* 216 * XXX ideally, we should unlock vnodes here 217 * because we might sleep very long time. 218 */ 219 VOP_UNLOCK(vp); 220 if (vp2 != NULL) { 221 VOP_UNLOCK(vp2); 222 } 223 #else 224 /* 225 * XXX since we'll sleep for cleaner with vnode lock holding, 226 * deadlock will occur if cleaner tries to lock the vnode. 227 * (eg. lfs_markv -> lfs_fastvget -> getnewvnode -> vclean) 228 */ 229 #endif 230 231 if (!slept) { 232 DLOG((DLOG_AVAIL, "lfs_reserve: waiting for %ld (bfree = %d," 233 " est_bfree = %d)\n", 234 fsb + fs->lfs_ravail + fs->lfs_favail, 235 fs->lfs_bfree, LFS_EST_BFREE(fs))); 236 } 237 ++slept; 238 239 /* Wake up the cleaner */ 240 LFS_CLEANERINFO(cip, fs, bp); 241 LFS_SYNC_CLEANERINFO(cip, fs, bp, 0); 242 lfs_wakeup_cleaner(fs); 243 244 mutex_enter(&lfs_lock); 245 /* Cleaner might have run while we were reading, check again */ 246 if (lfs_fits(fs, fsb + fs->lfs_ravail + fs->lfs_favail)) 247 break; 248 249 error = mtsleep(&fs->lfs_avail, PCATCH | PUSER, "lfs_reserve", 250 0, &lfs_lock); 251 #if 0 252 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); /* XXX use lockstatus */ 253 vn_lock(vp2, LK_EXCLUSIVE | LK_RETRY); /* XXX use lockstatus */ 254 #endif 255 if (error) { 256 mutex_exit(&lfs_lock); 257 return error; 258 } 259 } 260 #ifdef DEBUG 261 if (slept) { 262 DLOG((DLOG_AVAIL, "lfs_reserve: woke up\n")); 263 } 264 #endif 265 fs->lfs_ravail += fsb; 266 mutex_exit(&lfs_lock); 267 268 return 0; 269 } 270 271 #ifdef DIAGNOSTIC 272 int lfs_rescount; 273 int lfs_rescountdirop; 274 #endif 275 276 int 277 lfs_reserve(struct lfs *fs, struct vnode *vp, struct vnode *vp2, int fsb) 278 { 279 int error; 280 int cantwait; 281 282 ASSERT_MAYBE_SEGLOCK(fs); 283 if (vp2) { 284 /* Make sure we're not in the process of reclaiming vp2 */ 285 mutex_enter(&lfs_lock); 286 while(fs->lfs_flags & LFS_UNDIROP) { 287 mtsleep(&fs->lfs_flags, PRIBIO + 1, "lfsrundirop", 0, 288 &lfs_lock); 289 } 290 mutex_exit(&lfs_lock); 291 } 292 293 KASSERT(fsb < 0 || VOP_ISLOCKED(vp)); 294 KASSERT(vp2 == NULL || fsb < 0 || VOP_ISLOCKED(vp2)); 295 KASSERT(vp2 == NULL || !(VTOI(vp2)->i_flag & IN_ADIROP)); 296 KASSERT(vp2 == NULL || vp2 != fs->lfs_unlockvp); 297 298 cantwait = (VTOI(vp)->i_flag & IN_ADIROP) || fs->lfs_unlockvp == vp; 299 #ifdef DIAGNOSTIC 300 if (cantwait) { 301 if (fsb > 0) 302 lfs_rescountdirop++; 303 else if (fsb < 0) 304 lfs_rescountdirop--; 305 if (lfs_rescountdirop < 0) 306 panic("lfs_rescountdirop"); 307 } 308 else { 309 if (fsb > 0) 310 lfs_rescount++; 311 else if (fsb < 0) 312 lfs_rescount--; 313 if (lfs_rescount < 0) 314 panic("lfs_rescount"); 315 } 316 #endif 317 if (cantwait) 318 return 0; 319 320 /* 321 * XXX 322 * vref vnodes here so that cleaner doesn't try to reuse them. 323 * (see XXX comment in lfs_reserveavail) 324 */ 325 vhold(vp); 326 if (vp2 != NULL) { 327 vhold(vp2); 328 } 329 330 error = lfs_reserveavail(fs, vp, vp2, fsb); 331 if (error) 332 goto done; 333 334 /* 335 * XXX just a guess. should be more precise. 336 */ 337 error = lfs_reservebuf(fs, vp, vp2, fsb, fsbtob(fs, fsb)); 338 if (error) 339 lfs_reserveavail(fs, vp, vp2, -fsb); 340 341 done: 342 holdrele(vp); 343 if (vp2 != NULL) { 344 holdrele(vp2); 345 } 346 347 return error; 348 } 349 350 int 351 lfs_bwrite(void *v) 352 { 353 struct vop_bwrite_args /* { 354 struct buf *a_bp; 355 } */ *ap = v; 356 struct buf *bp = ap->a_bp; 357 358 #ifdef DIAGNOSTIC 359 if (VTOI(bp->b_vp)->i_lfs->lfs_ronly == 0 && (bp->b_flags & B_ASYNC)) { 360 panic("bawrite LFS buffer"); 361 } 362 #endif /* DIAGNOSTIC */ 363 return lfs_bwrite_ext(bp, 0); 364 } 365 366 /* 367 * Determine if there is enough room currently available to write fsb 368 * blocks. We need enough blocks for the new blocks, the current 369 * inode blocks (including potentially the ifile inode), a summary block, 370 * and the segment usage table, plus an ifile block. 371 */ 372 int 373 lfs_fits(struct lfs *fs, int fsb) 374 { 375 int needed; 376 377 ASSERT_NO_SEGLOCK(fs); 378 needed = fsb + btofsb(fs, fs->lfs_sumsize) + 379 ((howmany(fs->lfs_uinodes + 1, INOPB(fs)) + fs->lfs_segtabsz + 380 1) << (fs->lfs_bshift - fs->lfs_ffshift)); 381 382 if (needed >= fs->lfs_avail) { 383 #ifdef DEBUG 384 DLOG((DLOG_AVAIL, "lfs_fits: no fit: fsb = %ld, uinodes = %ld, " 385 "needed = %ld, avail = %ld\n", 386 (long)fsb, (long)fs->lfs_uinodes, (long)needed, 387 (long)fs->lfs_avail)); 388 #endif 389 return 0; 390 } 391 return 1; 392 } 393 394 int 395 lfs_availwait(struct lfs *fs, int fsb) 396 { 397 int error; 398 CLEANERINFO *cip; 399 struct buf *cbp; 400 401 ASSERT_NO_SEGLOCK(fs); 402 /* Push cleaner blocks through regardless */ 403 mutex_enter(&lfs_lock); 404 if (LFS_SEGLOCK_HELD(fs) && 405 fs->lfs_sp->seg_flags & (SEGM_CLEAN | SEGM_FORCE_CKP)) { 406 mutex_exit(&lfs_lock); 407 return 0; 408 } 409 mutex_exit(&lfs_lock); 410 411 while (!lfs_fits(fs, fsb)) { 412 /* 413 * Out of space, need cleaner to run. 414 * Update the cleaner info, then wake it up. 415 * Note the cleanerinfo block is on the ifile 416 * so it CANT_WAIT. 417 */ 418 LFS_CLEANERINFO(cip, fs, cbp); 419 LFS_SYNC_CLEANERINFO(cip, fs, cbp, 0); 420 421 #ifdef DEBUG 422 DLOG((DLOG_AVAIL, "lfs_availwait: out of available space, " 423 "waiting on cleaner\n")); 424 #endif 425 426 lfs_wakeup_cleaner(fs); 427 #ifdef DIAGNOSTIC 428 if (LFS_SEGLOCK_HELD(fs)) 429 panic("lfs_availwait: deadlock"); 430 #endif 431 error = tsleep(&fs->lfs_avail, PCATCH | PUSER, "cleaner", 0); 432 if (error) 433 return (error); 434 } 435 return 0; 436 } 437 438 int 439 lfs_bwrite_ext(struct buf *bp, int flags) 440 { 441 struct lfs *fs; 442 struct inode *ip; 443 struct vnode *vp; 444 int fsb; 445 446 vp = bp->b_vp; 447 fs = VFSTOUFS(vp->v_mount)->um_lfs; 448 449 ASSERT_MAYBE_SEGLOCK(fs); 450 KASSERT(bp->b_cflags & BC_BUSY); 451 KASSERT(flags & BW_CLEAN || !LFS_IS_MALLOC_BUF(bp)); 452 KASSERT(((bp->b_oflags | bp->b_flags) & (BO_DELWRI|B_LOCKED)) 453 != BO_DELWRI); 454 455 /* 456 * Don't write *any* blocks if we're mounted read-only, or 457 * if we are "already unmounted". 458 * 459 * In particular the cleaner can't write blocks either. 460 */ 461 if (fs->lfs_ronly || (fs->lfs_pflags & LFS_PF_CLEAN)) { 462 bp->b_oflags &= ~BO_DELWRI; 463 bp->b_flags |= B_READ; 464 bp->b_error = 0; 465 mutex_enter(&bufcache_lock); 466 LFS_UNLOCK_BUF(bp); 467 if (LFS_IS_MALLOC_BUF(bp)) 468 bp->b_cflags &= ~BC_BUSY; 469 else 470 brelsel(bp, 0); 471 mutex_exit(&bufcache_lock); 472 return (fs->lfs_ronly ? EROFS : 0); 473 } 474 475 /* 476 * Set the delayed write flag and use reassignbuf to move the buffer 477 * from the clean list to the dirty one. 478 * 479 * Set the B_LOCKED flag and unlock the buffer, causing brelse to move 480 * the buffer onto the LOCKED free list. This is necessary, otherwise 481 * getnewbuf() would try to reclaim the buffers using bawrite, which 482 * isn't going to work. 483 * 484 * XXX we don't let meta-data writes run out of space because they can 485 * come from the segment writer. We need to make sure that there is 486 * enough space reserved so that there's room to write meta-data 487 * blocks. 488 */ 489 if ((bp->b_flags & B_LOCKED) == 0) { 490 fsb = numfrags(fs, bp->b_bcount); 491 492 ip = VTOI(vp); 493 mutex_enter(&lfs_lock); 494 if (flags & BW_CLEAN) { 495 LFS_SET_UINO(ip, IN_CLEANING); 496 } else { 497 LFS_SET_UINO(ip, IN_MODIFIED); 498 } 499 mutex_exit(&lfs_lock); 500 fs->lfs_avail -= fsb; 501 502 mutex_enter(&bufcache_lock); 503 mutex_enter(&vp->v_interlock); 504 bp->b_oflags = (bp->b_oflags | BO_DELWRI) & ~BO_DONE; 505 LFS_LOCK_BUF(bp); 506 bp->b_flags &= ~B_READ; 507 bp->b_error = 0; 508 reassignbuf(bp, bp->b_vp); 509 mutex_exit(&vp->v_interlock); 510 } else { 511 mutex_enter(&bufcache_lock); 512 } 513 514 if (bp->b_iodone != NULL) 515 bp->b_cflags &= ~BC_BUSY; 516 else 517 brelsel(bp, 0); 518 mutex_exit(&bufcache_lock); 519 520 return (0); 521 } 522 523 /* 524 * Called and return with the lfs_lock held. 525 */ 526 void 527 lfs_flush_fs(struct lfs *fs, int flags) 528 { 529 ASSERT_NO_SEGLOCK(fs); 530 KASSERT(mutex_owned(&lfs_lock)); 531 if (fs->lfs_ronly) 532 return; 533 534 if (lfs_dostats) 535 ++lfs_stats.flush_invoked; 536 537 mutex_exit(&lfs_lock); 538 lfs_writer_enter(fs, "fldirop"); 539 lfs_segwrite(fs->lfs_ivnode->v_mount, flags); 540 lfs_writer_leave(fs); 541 mutex_enter(&lfs_lock); 542 fs->lfs_favail = 0; /* XXX */ 543 } 544 545 /* 546 * This routine initiates segment writes when LFS is consuming too many 547 * resources. Ideally the pageout daemon would be able to direct LFS 548 * more subtly. 549 * XXX We have one static count of locked buffers; 550 * XXX need to think more about the multiple filesystem case. 551 * 552 * Called and return with lfs_lock held. 553 * If fs != NULL, we hold the segment lock for fs. 554 */ 555 void 556 lfs_flush(struct lfs *fs, int flags, int only_onefs) 557 { 558 extern u_int64_t locked_fakequeue_count; 559 struct mount *mp, *nmp; 560 struct lfs *tfs; 561 562 KASSERT(mutex_owned(&lfs_lock)); 563 KDASSERT(fs == NULL || !LFS_SEGLOCK_HELD(fs)); 564 565 if (lfs_dostats) 566 ++lfs_stats.write_exceeded; 567 /* XXX should we include SEGM_CKP here? */ 568 if (lfs_writing && !(flags & SEGM_SYNC)) { 569 DLOG((DLOG_FLUSH, "lfs_flush: not flushing because another flush is active\n")); 570 return; 571 } 572 while (lfs_writing) 573 cv_wait(&lfs_writing_cv, &lfs_lock); 574 lfs_writing = 1; 575 576 mutex_exit(&lfs_lock); 577 578 if (only_onefs) { 579 KASSERT(fs != NULL); 580 if (vfs_busy(fs->lfs_ivnode->v_mount, NULL)) 581 goto errout; 582 mutex_enter(&lfs_lock); 583 lfs_flush_fs(fs, flags); 584 mutex_exit(&lfs_lock); 585 vfs_unbusy(fs->lfs_ivnode->v_mount, false, NULL); 586 } else { 587 locked_fakequeue_count = 0; 588 mutex_enter(&mountlist_lock); 589 for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist; 590 mp = nmp) { 591 if (vfs_busy(mp, &nmp)) { 592 DLOG((DLOG_FLUSH, "lfs_flush: fs vfs_busy\n")); 593 continue; 594 } 595 if (strncmp(&mp->mnt_stat.f_fstypename[0], MOUNT_LFS, 596 sizeof(mp->mnt_stat.f_fstypename)) == 0) { 597 tfs = VFSTOUFS(mp)->um_lfs; 598 mutex_enter(&lfs_lock); 599 lfs_flush_fs(tfs, flags); 600 mutex_exit(&lfs_lock); 601 } 602 vfs_unbusy(mp, false, &nmp); 603 } 604 mutex_exit(&mountlist_lock); 605 } 606 LFS_DEBUG_COUNTLOCKED("flush"); 607 wakeup(&lfs_subsys_pages); 608 609 errout: 610 mutex_enter(&lfs_lock); 611 KASSERT(lfs_writing); 612 lfs_writing = 0; 613 wakeup(&lfs_writing); 614 } 615 616 #define INOCOUNT(fs) howmany((fs)->lfs_uinodes, INOPB(fs)) 617 #define INOBYTES(fs) ((fs)->lfs_uinodes * sizeof (struct ufs1_dinode)) 618 619 /* 620 * make sure that we don't have too many locked buffers. 621 * flush buffers if needed. 622 */ 623 int 624 lfs_check(struct vnode *vp, daddr_t blkno, int flags) 625 { 626 int error; 627 struct lfs *fs; 628 struct inode *ip; 629 extern pid_t lfs_writer_daemon; 630 631 error = 0; 632 ip = VTOI(vp); 633 634 /* If out of buffers, wait on writer */ 635 /* XXX KS - if it's the Ifile, we're probably the cleaner! */ 636 if (ip->i_number == LFS_IFILE_INUM) 637 return 0; 638 /* If we're being called from inside a dirop, don't sleep */ 639 if (ip->i_flag & IN_ADIROP) 640 return 0; 641 642 fs = ip->i_lfs; 643 644 ASSERT_NO_SEGLOCK(fs); 645 646 /* 647 * If we would flush below, but dirops are active, sleep. 648 * Note that a dirop cannot ever reach this code! 649 */ 650 mutex_enter(&lfs_lock); 651 while (fs->lfs_dirops > 0 && 652 (locked_queue_count + INOCOUNT(fs) > LFS_MAX_BUFS || 653 locked_queue_bytes + INOBYTES(fs) > LFS_MAX_BYTES || 654 lfs_subsys_pages > LFS_MAX_PAGES || 655 fs->lfs_dirvcount > LFS_MAX_FSDIROP(fs) || 656 lfs_dirvcount > LFS_MAX_DIROP || fs->lfs_diropwait > 0)) 657 { 658 ++fs->lfs_diropwait; 659 mtsleep(&fs->lfs_writer, PRIBIO+1, "bufdirop", 0, 660 &lfs_lock); 661 --fs->lfs_diropwait; 662 } 663 664 #ifdef DEBUG 665 if (locked_queue_count + INOCOUNT(fs) > LFS_MAX_BUFS) 666 DLOG((DLOG_FLUSH, "lfs_check: lqc = %d, max %d\n", 667 locked_queue_count + INOCOUNT(fs), LFS_MAX_BUFS)); 668 if (locked_queue_bytes + INOBYTES(fs) > LFS_MAX_BYTES) 669 DLOG((DLOG_FLUSH, "lfs_check: lqb = %ld, max %ld\n", 670 locked_queue_bytes + INOBYTES(fs), LFS_MAX_BYTES)); 671 if (lfs_subsys_pages > LFS_MAX_PAGES) 672 DLOG((DLOG_FLUSH, "lfs_check: lssp = %d, max %d\n", 673 lfs_subsys_pages, LFS_MAX_PAGES)); 674 if (lfs_fs_pagetrip && fs->lfs_pages > lfs_fs_pagetrip) 675 DLOG((DLOG_FLUSH, "lfs_check: fssp = %d, trip at %d\n", 676 fs->lfs_pages, lfs_fs_pagetrip)); 677 if (lfs_dirvcount > LFS_MAX_DIROP) 678 DLOG((DLOG_FLUSH, "lfs_check: ldvc = %d, max %d\n", 679 lfs_dirvcount, LFS_MAX_DIROP)); 680 if (fs->lfs_dirvcount > LFS_MAX_FSDIROP(fs)) 681 DLOG((DLOG_FLUSH, "lfs_check: lfdvc = %d, max %d\n", 682 fs->lfs_dirvcount, LFS_MAX_FSDIROP(fs))); 683 if (fs->lfs_diropwait > 0) 684 DLOG((DLOG_FLUSH, "lfs_check: ldvw = %d\n", 685 fs->lfs_diropwait)); 686 #endif 687 688 /* If there are too many pending dirops, we have to flush them. */ 689 if (fs->lfs_dirvcount > LFS_MAX_FSDIROP(fs) || 690 lfs_dirvcount > LFS_MAX_DIROP || fs->lfs_diropwait > 0) { 691 flags |= SEGM_CKP; 692 } 693 694 if (locked_queue_count + INOCOUNT(fs) > LFS_MAX_BUFS || 695 locked_queue_bytes + INOBYTES(fs) > LFS_MAX_BYTES || 696 lfs_subsys_pages > LFS_MAX_PAGES || 697 fs->lfs_dirvcount > LFS_MAX_FSDIROP(fs) || 698 lfs_dirvcount > LFS_MAX_DIROP || fs->lfs_diropwait > 0) { 699 lfs_flush(fs, flags, 0); 700 } else if (lfs_fs_pagetrip && fs->lfs_pages > lfs_fs_pagetrip) { 701 /* 702 * If we didn't flush the whole thing, some filesystems 703 * still might want to be flushed. 704 */ 705 ++fs->lfs_pdflush; 706 wakeup(&lfs_writer_daemon); 707 } 708 709 while (locked_queue_count + INOCOUNT(fs) >= LFS_WAIT_BUFS || 710 locked_queue_bytes + INOBYTES(fs) >= LFS_WAIT_BYTES || 711 lfs_subsys_pages > LFS_WAIT_PAGES || 712 fs->lfs_dirvcount > LFS_MAX_FSDIROP(fs) || 713 lfs_dirvcount > LFS_MAX_DIROP) { 714 715 if (lfs_dostats) 716 ++lfs_stats.wait_exceeded; 717 DLOG((DLOG_AVAIL, "lfs_check: waiting: count=%d, bytes=%ld\n", 718 locked_queue_count, locked_queue_bytes)); 719 error = cv_timedwait_sig(&locked_queue_cv, &lfs_lock, 720 hz * LFS_BUFWAIT); 721 if (error != EWOULDBLOCK) 722 break; 723 724 /* 725 * lfs_flush might not flush all the buffers, if some of the 726 * inodes were locked or if most of them were Ifile blocks 727 * and we weren't asked to checkpoint. Try flushing again 728 * to keep us from blocking indefinitely. 729 */ 730 if (locked_queue_count + INOCOUNT(fs) >= LFS_MAX_BUFS || 731 locked_queue_bytes + INOBYTES(fs) >= LFS_MAX_BYTES) { 732 lfs_flush(fs, flags | SEGM_CKP, 0); 733 } 734 } 735 mutex_exit(&lfs_lock); 736 return (error); 737 } 738 739 /* 740 * Allocate a new buffer header. 741 */ 742 struct buf * 743 lfs_newbuf(struct lfs *fs, struct vnode *vp, daddr_t daddr, size_t size, int type) 744 { 745 struct buf *bp; 746 size_t nbytes; 747 748 ASSERT_MAYBE_SEGLOCK(fs); 749 nbytes = roundup(size, fsbtob(fs, 1)); 750 751 bp = getiobuf(NULL, true); 752 if (nbytes) { 753 bp->b_data = lfs_malloc(fs, nbytes, type); 754 /* memset(bp->b_data, 0, nbytes); */ 755 } 756 #ifdef DIAGNOSTIC 757 if (vp == NULL) 758 panic("vp is NULL in lfs_newbuf"); 759 if (bp == NULL) 760 panic("bp is NULL after malloc in lfs_newbuf"); 761 #endif 762 763 bp->b_bufsize = size; 764 bp->b_bcount = size; 765 bp->b_lblkno = daddr; 766 bp->b_blkno = daddr; 767 bp->b_error = 0; 768 bp->b_resid = 0; 769 bp->b_iodone = lfs_callback; 770 bp->b_cflags = BC_BUSY | BC_NOCACHE; 771 bp->b_private = fs; 772 773 mutex_enter(&bufcache_lock); 774 mutex_enter(&vp->v_interlock); 775 bgetvp(vp, bp); 776 mutex_exit(&vp->v_interlock); 777 mutex_exit(&bufcache_lock); 778 779 return (bp); 780 } 781 782 void 783 lfs_freebuf(struct lfs *fs, struct buf *bp) 784 { 785 struct vnode *vp; 786 787 if ((vp = bp->b_vp) != NULL) { 788 mutex_enter(&bufcache_lock); 789 mutex_enter(&vp->v_interlock); 790 brelvp(bp); 791 mutex_exit(&vp->v_interlock); 792 mutex_exit(&bufcache_lock); 793 } 794 if (!(bp->b_cflags & BC_INVAL)) { /* BC_INVAL indicates a "fake" buffer */ 795 lfs_free(fs, bp->b_data, LFS_NB_UNKNOWN); 796 bp->b_data = NULL; 797 } 798 putiobuf(bp); 799 } 800 801 /* 802 * Count buffers on the "locked" queue, and compare it to a pro-forma count. 803 * Don't count malloced buffers, since they don't detract from the total. 804 */ 805 void 806 lfs_countlocked(int *count, long *bytes, const char *msg) 807 { 808 struct buf *bp; 809 int n = 0; 810 long int size = 0L; 811 812 mutex_enter(&bufcache_lock); 813 TAILQ_FOREACH(bp, &bufqueues[BQ_LOCKED].bq_queue, b_freelist) { 814 KASSERT(bp->b_iodone == NULL); 815 n++; 816 size += bp->b_bufsize; 817 #ifdef DIAGNOSTIC 818 if (n > nbuf) 819 panic("lfs_countlocked: this can't happen: more" 820 " buffers locked than exist"); 821 #endif 822 } 823 /* 824 * Theoretically this function never really does anything. 825 * Give a warning if we have to fix the accounting. 826 */ 827 if (n != *count) { 828 DLOG((DLOG_LLIST, "lfs_countlocked: %s: adjusted buf count" 829 " from %d to %d\n", msg, *count, n)); 830 } 831 if (size != *bytes) { 832 DLOG((DLOG_LLIST, "lfs_countlocked: %s: adjusted byte count" 833 " from %ld to %ld\n", msg, *bytes, size)); 834 } 835 *count = n; 836 *bytes = size; 837 mutex_exit(&bufcache_lock); 838 return; 839 } 840 841 int 842 lfs_wait_pages(void) 843 { 844 int active, inactive; 845 846 uvm_estimatepageable(&active, &inactive); 847 return LFS_WAIT_RESOURCE(active + inactive + uvmexp.free, 1); 848 } 849 850 int 851 lfs_max_pages(void) 852 { 853 int active, inactive; 854 855 uvm_estimatepageable(&active, &inactive); 856 return LFS_MAX_RESOURCE(active + inactive + uvmexp.free, 1); 857 } 858