1 /* $NetBSD: lfs_pages.c,v 1.25 2020/03/17 18:31:38 ad Exp $ */ 2 3 /*- 4 * Copyright (c) 1999, 2000, 2001, 2002, 2003, 2019 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Konrad E. Schroder <perseant@hhhh.org>. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 /* 32 * Copyright (c) 1986, 1989, 1991, 1993, 1995 33 * The Regents of the University of California. All rights reserved. 34 * 35 * Redistribution and use in source and binary forms, with or without 36 * modification, are permitted provided that the following conditions 37 * are met: 38 * 1. Redistributions of source code must retain the above copyright 39 * notice, this list of conditions and the following disclaimer. 40 * 2. Redistributions in binary form must reproduce the above copyright 41 * notice, this list of conditions and the following disclaimer in the 42 * documentation and/or other materials provided with the distribution. 43 * 3. Neither the name of the University nor the names of its contributors 44 * may be used to endorse or promote products derived from this software 45 * without specific prior written permission. 46 * 47 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 48 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 49 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 50 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 51 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 52 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 53 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 54 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 55 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 56 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 57 * SUCH DAMAGE. 58 * 59 * @(#)lfs_vnops.c 8.13 (Berkeley) 6/10/95 60 */ 61 62 #include <sys/cdefs.h> 63 __KERNEL_RCSID(0, "$NetBSD: lfs_pages.c,v 1.25 2020/03/17 18:31:38 ad Exp $"); 64 65 #ifdef _KERNEL_OPT 66 #include "opt_compat_netbsd.h" 67 #include "opt_uvm_page_trkown.h" 68 #endif 69 70 #include <sys/param.h> 71 #include <sys/systm.h> 72 #include <sys/namei.h> 73 #include <sys/resourcevar.h> 74 #include <sys/kernel.h> 75 #include <sys/file.h> 76 #include <sys/stat.h> 77 #include <sys/buf.h> 78 #include <sys/proc.h> 79 #include <sys/mount.h> 80 #include <sys/vnode.h> 81 #include <sys/pool.h> 82 #include <sys/signalvar.h> 83 #include <sys/kauth.h> 84 #include <sys/syslog.h> 85 #include <sys/fstrans.h> 86 87 #include <miscfs/fifofs/fifo.h> 88 #include <miscfs/genfs/genfs.h> 89 #include <miscfs/specfs/specdev.h> 90 91 #include <ufs/lfs/ulfs_inode.h> 92 #include <ufs/lfs/ulfsmount.h> 93 #include <ufs/lfs/ulfs_bswap.h> 94 #include <ufs/lfs/ulfs_extern.h> 95 96 #include <uvm/uvm.h> 97 #include <uvm/uvm_pmap.h> 98 #include <uvm/uvm_stat.h> 99 #include <uvm/uvm_pager.h> 100 101 #include <ufs/lfs/lfs.h> 102 #include <ufs/lfs/lfs_accessors.h> 103 #include <ufs/lfs/lfs_kernel.h> 104 #include <ufs/lfs/lfs_extern.h> 105 106 extern kcondvar_t lfs_writerd_cv; 107 108 static int check_dirty(struct lfs *, struct vnode *, off_t, off_t, off_t, int, int, struct vm_page **); 109 110 int 111 lfs_getpages(void *v) 112 { 113 struct vop_getpages_args /* { 114 struct vnode *a_vp; 115 voff_t a_offset; 116 struct vm_page **a_m; 117 int *a_count; 118 int a_centeridx; 119 vm_prot_t a_access_type; 120 int a_advice; 121 int a_flags; 122 } */ *ap = v; 123 124 if (VTOI(ap->a_vp)->i_number == LFS_IFILE_INUM && 125 (ap->a_access_type & VM_PROT_WRITE) != 0) { 126 return EPERM; 127 } 128 if ((ap->a_access_type & VM_PROT_WRITE) != 0) { 129 mutex_enter(&lfs_lock); 130 LFS_SET_UINO(VTOI(ap->a_vp), IN_MODIFIED); 131 mutex_exit(&lfs_lock); 132 } 133 134 /* 135 * we're relying on the fact that genfs_getpages() always read in 136 * entire filesystem blocks. 137 */ 138 return genfs_getpages(v); 139 } 140 141 /* 142 * Wait for a page to become unbusy, possibly printing diagnostic messages 143 * as well. 144 * 145 * Called with vp->v_uobj.vmobjlock held; return with it held. 146 */ 147 static void 148 wait_for_page(struct vnode *vp, struct vm_page *pg, const char *label) 149 { 150 KASSERT(rw_write_held(vp->v_uobj.vmobjlock)); 151 if ((pg->flags & PG_BUSY) == 0) 152 return; /* Nothing to wait for! */ 153 154 #if defined(DEBUG) && defined(UVM_PAGE_TRKOWN) 155 static struct vm_page *lastpg; 156 157 if (label != NULL && pg != lastpg) { 158 if (pg->owner_tag) { 159 printf("lfs_putpages[%d.%d]: %s: page %p owner %d.%d [%s]\n", 160 curproc->p_pid, curlwp->l_lid, label, 161 pg, pg->owner, pg->lowner, pg->owner_tag); 162 } else { 163 printf("lfs_putpages[%d.%d]: %s: page %p unowned?!\n", 164 curproc->p_pid, curlwp->l_lid, label, pg); 165 } 166 } 167 lastpg = pg; 168 #endif 169 170 uvm_pagewait(pg, vp->v_uobj.vmobjlock, "lfsput"); 171 rw_enter(vp->v_uobj.vmobjlock, RW_WRITER); 172 } 173 174 /* 175 * This routine is called by lfs_putpages() when it can't complete the 176 * write because a page is busy. This means that either (1) someone, 177 * possibly the pagedaemon, is looking at this page, and will give it up 178 * presently; or (2) we ourselves are holding the page busy in the 179 * process of being written (either gathered or actually on its way to 180 * disk). We don't need to give up the segment lock, but we might need 181 * to call lfs_writeseg() to expedite the page's journey to disk. 182 * 183 * Called with vp->v_uobj.vmobjlock held; return with it held. 184 */ 185 /* #define BUSYWAIT */ 186 static void 187 write_and_wait(struct lfs *fs, struct vnode *vp, struct vm_page *pg, 188 int seglocked, const char *label) 189 { 190 KASSERT(rw_write_held(vp->v_uobj.vmobjlock)); 191 #ifndef BUSYWAIT 192 struct inode *ip = VTOI(vp); 193 struct segment *sp = fs->lfs_sp; 194 int count = 0; 195 196 if (pg == NULL) 197 return; 198 199 while (pg->flags & PG_BUSY && 200 pg->uobject == &vp->v_uobj) { 201 rw_exit(vp->v_uobj.vmobjlock); 202 if (sp->cbpp - sp->bpp > 1) { 203 /* Write gathered pages */ 204 lfs_updatemeta(sp); 205 lfs_release_finfo(fs); 206 (void) lfs_writeseg(fs, sp); 207 208 /* 209 * Reinitialize FIP 210 */ 211 KASSERT(sp->vp == vp); 212 lfs_acquire_finfo(fs, ip->i_number, 213 ip->i_gen); 214 } 215 ++count; 216 rw_enter(vp->v_uobj.vmobjlock, RW_WRITER); 217 wait_for_page(vp, pg, label); 218 } 219 if (label != NULL && count > 1) { 220 DLOG((DLOG_PAGE, "lfs_putpages[%d]: %s: %sn = %d\n", 221 curproc->p_pid, label, (count > 0 ? "looping, " : ""), 222 count)); 223 } 224 #else 225 preempt(1); 226 #endif 227 KASSERT(rw_write_held(vp->v_uobj.vmobjlock)); 228 } 229 230 /* 231 * Make sure that for all pages in every block in the given range, 232 * either all are dirty or all are clean. If any of the pages 233 * we've seen so far are dirty, put the vnode on the paging chain, 234 * and mark it IN_PAGING. 235 * 236 * If checkfirst != 0, don't check all the pages but return at the 237 * first dirty page. 238 */ 239 static int 240 check_dirty(struct lfs *fs, struct vnode *vp, 241 off_t startoffset, off_t endoffset, off_t blkeof, 242 int flags, int checkfirst, struct vm_page **pgp) 243 { 244 struct vm_page *pgs[MAXBSIZE / MIN_PAGE_SIZE], *pg; 245 off_t soff = 0; /* XXX: gcc */ 246 voff_t off; 247 int i; 248 int nonexistent; 249 int any_dirty; /* number of dirty pages */ 250 int dirty; /* number of dirty pages in a block */ 251 int tdirty; 252 int pages_per_block = lfs_sb_getbsize(fs) >> PAGE_SHIFT; 253 int pagedaemon = (curlwp == uvm.pagedaemon_lwp); 254 255 KASSERT(rw_write_held(vp->v_uobj.vmobjlock)); 256 ASSERT_MAYBE_SEGLOCK(fs); 257 top: 258 any_dirty = 0; 259 260 soff = startoffset; 261 KASSERT((soff & (lfs_sb_getbsize(fs) - 1)) == 0); 262 while (soff < MIN(blkeof, endoffset)) { 263 264 /* 265 * Mark all pages in extended range busy; find out if any 266 * of them are dirty. 267 */ 268 nonexistent = dirty = 0; 269 for (i = 0; i == 0 || i < pages_per_block; i++) { 270 KASSERT(rw_write_held(vp->v_uobj.vmobjlock)); 271 off = soff + (i << PAGE_SHIFT); 272 pgs[i] = pg = uvm_pagelookup(&vp->v_uobj, off); 273 if (pg == NULL) { 274 ++nonexistent; 275 continue; 276 } 277 KASSERT(pg != NULL); 278 279 /* 280 * If we're holding the segment lock, we can deadlock 281 * against a process that has our page and is waiting 282 * for the cleaner, while the cleaner waits for the 283 * segment lock. Just bail in that case. 284 */ 285 if ((pg->flags & PG_BUSY) && 286 (pagedaemon || LFS_SEGLOCK_HELD(fs))) { 287 if (i > 0) 288 uvm_page_unbusy(pgs, i); 289 DLOG((DLOG_PAGE, "lfs_putpages: avoiding 3-way or pagedaemon deadlock\n")); 290 if (pgp) 291 *pgp = pg; 292 KASSERT(rw_write_held(vp->v_uobj.vmobjlock)); 293 return -1; 294 } 295 296 while (pg->flags & PG_BUSY) { 297 wait_for_page(vp, pg, NULL); 298 KASSERT(rw_write_held(vp->v_uobj.vmobjlock)); 299 if (i > 0) 300 uvm_page_unbusy(pgs, i); 301 KASSERT(rw_write_held(vp->v_uobj.vmobjlock)); 302 goto top; 303 } 304 pg->flags |= PG_BUSY; 305 UVM_PAGE_OWN(pg, "lfs_putpages"); 306 307 pmap_page_protect(pg, VM_PROT_NONE); 308 tdirty = 309 uvm_pagegetdirty(pg) != UVM_PAGE_STATUS_CLEAN && 310 (uvm_pagegetdirty(pg) == UVM_PAGE_STATUS_DIRTY || 311 pmap_clear_modify(pg)); 312 dirty += tdirty; 313 } 314 if ((pages_per_block > 0 && nonexistent >= pages_per_block) || 315 (pages_per_block == 0 && nonexistent > 0)) { 316 soff += MAX(PAGE_SIZE, lfs_sb_getbsize(fs)); 317 continue; 318 } 319 320 any_dirty += dirty; 321 KASSERT(nonexistent == 0); 322 KASSERT(rw_write_held(vp->v_uobj.vmobjlock)); 323 324 /* 325 * If any are dirty make all dirty; unbusy them, 326 * but if we were asked to clean, wire them so that 327 * the pagedaemon doesn't bother us about them while 328 * they're on their way to disk. 329 */ 330 for (i = 0; i == 0 || i < pages_per_block; i++) { 331 KASSERT(rw_write_held(vp->v_uobj.vmobjlock)); 332 pg = pgs[i]; 333 KASSERT(!(uvm_pagegetdirty(pg) != UVM_PAGE_STATUS_DIRTY 334 && (pg->flags & PG_DELWRI))); 335 KASSERT(pg->flags & PG_BUSY); 336 if (dirty) { 337 uvm_pagemarkdirty(pg, UVM_PAGE_STATUS_DIRTY); 338 if (flags & PGO_FREE) { 339 /* 340 * Wire the page so that 341 * pdaemon doesn't see it again. 342 */ 343 uvm_pagelock(pg); 344 uvm_pagewire(pg); 345 uvm_pageunlock(pg); 346 347 /* Suspended write flag */ 348 pg->flags |= PG_DELWRI; 349 } 350 } 351 pg->flags &= ~PG_BUSY; 352 uvm_pagelock(pg); 353 uvm_pagewakeup(pg); 354 uvm_pageunlock(pg); 355 UVM_PAGE_OWN(pg, NULL); 356 } 357 358 if (checkfirst && any_dirty) 359 break; 360 361 soff += MAX(PAGE_SIZE, lfs_sb_getbsize(fs)); 362 } 363 364 KASSERT(rw_write_held(vp->v_uobj.vmobjlock)); 365 return any_dirty; 366 } 367 368 /* 369 * lfs_putpages functions like genfs_putpages except that 370 * 371 * (1) It needs to bounds-check the incoming requests to ensure that 372 * they are block-aligned; if they are not, expand the range and 373 * do the right thing in case, e.g., the requested range is clean 374 * but the expanded range is dirty. 375 * 376 * (2) It needs to explicitly send blocks to be written when it is done. 377 * If VOP_PUTPAGES is called without the seglock held, we simply take 378 * the seglock and let lfs_segunlock wait for us. 379 * XXX There might be a bad situation if we have to flush a vnode while 380 * XXX lfs_markv is in operation. As of this writing we panic in this 381 * XXX case. 382 * 383 * Assumptions: 384 * 385 * (1) The caller does not hold any pages in this vnode busy. If it does, 386 * there is a danger that when we expand the page range and busy the 387 * pages we will deadlock. 388 * 389 * (2) We are called with vp->v_uobj.vmobjlock held; we must return with it 390 * released. 391 * 392 * (3) We don't absolutely have to free pages right away, provided that 393 * the request does not have PGO_SYNCIO. When the pagedaemon gives 394 * us a request with PGO_FREE, we take the pages out of the paging 395 * queue and wake up the writer, which will handle freeing them for us. 396 * 397 * We ensure that for any filesystem block, all pages for that 398 * block are either resident or not, even if those pages are higher 399 * than EOF; that means that we will be getting requests to free 400 * "unused" pages above EOF all the time, and should ignore them. 401 * 402 * (4) If we are called with PGO_LOCKED, the finfo array we are to write 403 * into has been set up for us by lfs_writefile. If not, we will 404 * have to handle allocating and/or freeing an finfo entry. 405 * 406 * XXX note that we're (ab)using PGO_LOCKED as "seglock held". 407 */ 408 409 /* How many times to loop before we should start to worry */ 410 #define TOOMANY 4 411 412 int 413 lfs_putpages(void *v) 414 { 415 int error; 416 struct vop_putpages_args /* { 417 struct vnode *a_vp; 418 voff_t a_offlo; 419 voff_t a_offhi; 420 int a_flags; 421 } */ *ap = v; 422 struct vnode *vp; 423 struct inode *ip; 424 struct lfs *fs; 425 struct segment *sp; 426 off_t origoffset, startoffset, endoffset, origendoffset, blkeof; 427 off_t off, max_endoffset; 428 bool seglocked, sync, pagedaemon, reclaim; 429 struct vm_page *pg, *busypg; 430 UVMHIST_FUNC("lfs_putpages"); UVMHIST_CALLED(ubchist); 431 struct mount *trans_mp; 432 int oreclaim = 0; 433 int donewriting = 0; 434 #ifdef DEBUG 435 int debug_n_again, debug_n_dirtyclean; 436 #endif 437 438 vp = ap->a_vp; 439 ip = VTOI(vp); 440 fs = ip->i_lfs; 441 sync = (ap->a_flags & PGO_SYNCIO) != 0; 442 reclaim = (ap->a_flags & PGO_RECLAIM) != 0; 443 pagedaemon = (curlwp == uvm.pagedaemon_lwp); 444 trans_mp = NULL; 445 446 KASSERT(rw_write_held(vp->v_uobj.vmobjlock)); 447 448 /* Putpages does nothing for metadata. */ 449 if (vp == fs->lfs_ivnode || vp->v_type != VREG) { 450 rw_exit(vp->v_uobj.vmobjlock); 451 return 0; 452 } 453 454 retry: 455 /* 456 * If there are no pages, don't do anything. 457 */ 458 if (vp->v_uobj.uo_npages == 0) { 459 mutex_enter(vp->v_interlock); 460 if ((vp->v_iflag & VI_ONWORKLST) && 461 LIST_FIRST(&vp->v_dirtyblkhd) == NULL) { 462 vn_syncer_remove_from_worklist(vp); 463 } 464 mutex_exit(vp->v_interlock); 465 if (trans_mp) 466 fstrans_done(trans_mp); 467 rw_exit(vp->v_uobj.vmobjlock); 468 469 /* Remove us from paging queue, if we were on it */ 470 mutex_enter(&lfs_lock); 471 if (ip->i_state & IN_PAGING) { 472 ip->i_state &= ~IN_PAGING; 473 TAILQ_REMOVE(&fs->lfs_pchainhd, ip, i_lfs_pchain); 474 } 475 mutex_exit(&lfs_lock); 476 477 KASSERT(!rw_write_held(vp->v_uobj.vmobjlock)); 478 return 0; 479 } 480 481 blkeof = lfs_blkroundup(fs, ip->i_size); 482 483 /* 484 * Ignore requests to free pages past EOF but in the same block 485 * as EOF, unless the vnode is being reclaimed or the request 486 * is synchronous. (If the request is sync, it comes from 487 * lfs_truncate.) 488 * 489 * To avoid being flooded with this request, make these pages 490 * look "active". 491 */ 492 if (!sync && !reclaim && 493 ap->a_offlo >= ip->i_size && ap->a_offlo < blkeof) { 494 origoffset = ap->a_offlo; 495 for (off = origoffset; off < blkeof; off += lfs_sb_getbsize(fs)) { 496 pg = uvm_pagelookup(&vp->v_uobj, off); 497 KASSERT(pg != NULL); 498 while (pg->flags & PG_BUSY) { 499 uvm_pagewait(pg, vp->v_uobj.vmobjlock, "lfsput2"); 500 rw_enter(vp->v_uobj.vmobjlock, RW_WRITER); 501 } 502 uvm_pagelock(pg); 503 uvm_pageactivate(pg); 504 uvm_pageunlock(pg); 505 } 506 ap->a_offlo = blkeof; 507 if (ap->a_offhi > 0 && ap->a_offhi <= ap->a_offlo) { 508 rw_exit(vp->v_uobj.vmobjlock); 509 return 0; 510 } 511 } 512 513 /* 514 * Extend page range to start and end at block boundaries. 515 * (For the purposes of VOP_PUTPAGES, fragments don't exist.) 516 */ 517 origoffset = ap->a_offlo; 518 origendoffset = ap->a_offhi; 519 startoffset = origoffset & ~(lfs_sb_getbmask(fs)); 520 max_endoffset = (trunc_page(LLONG_MAX) >> lfs_sb_getbshift(fs)) 521 << lfs_sb_getbshift(fs); 522 523 if (origendoffset == 0 || ap->a_flags & PGO_ALLPAGES) { 524 endoffset = max_endoffset; 525 origendoffset = endoffset; 526 } else { 527 origendoffset = round_page(ap->a_offhi); 528 endoffset = round_page(lfs_blkroundup(fs, origendoffset)); 529 } 530 531 KASSERT(startoffset > 0 || endoffset >= startoffset); 532 if (startoffset == endoffset) { 533 /* Nothing to do, why were we called? */ 534 rw_exit(vp->v_uobj.vmobjlock); 535 DLOG((DLOG_PAGE, "lfs_putpages: startoffset = endoffset = %" 536 PRId64 "\n", startoffset)); 537 return 0; 538 } 539 540 ap->a_offlo = startoffset; 541 ap->a_offhi = endoffset; 542 543 /* 544 * If not cleaning, just send the pages through genfs_putpages 545 * to be returned to the pool. 546 */ 547 if (!(ap->a_flags & PGO_CLEANIT)) { 548 DLOG((DLOG_PAGE, "lfs_putpages: no cleanit vn %p ino %d (flags %x)\n", 549 vp, (int)ip->i_number, ap->a_flags)); 550 int r = genfs_putpages(v); 551 KASSERT(!rw_write_held(vp->v_uobj.vmobjlock)); 552 return r; 553 } 554 555 if (trans_mp /* && (ap->a_flags & PGO_CLEANIT) != 0 */) { 556 if (pagedaemon) { 557 /* Pagedaemon must not sleep here. */ 558 trans_mp = vp->v_mount; 559 error = fstrans_start_nowait(trans_mp); 560 if (error) { 561 rw_exit(vp->v_uobj.vmobjlock); 562 return error; 563 } 564 } else { 565 /* 566 * Cannot use vdeadcheck() here as this operation 567 * usually gets used from VOP_RECLAIM(). Test for 568 * change of v_mount instead and retry on change. 569 */ 570 rw_exit(vp->v_uobj.vmobjlock); 571 trans_mp = vp->v_mount; 572 fstrans_start(trans_mp); 573 if (vp->v_mount != trans_mp) { 574 fstrans_done(trans_mp); 575 trans_mp = NULL; 576 } 577 } 578 rw_enter(vp->v_uobj.vmobjlock, RW_WRITER); 579 goto retry; 580 } 581 582 /* Set PGO_BUSYFAIL to avoid deadlocks */ 583 ap->a_flags |= PGO_BUSYFAIL; 584 585 /* 586 * Likewise, if we are asked to clean but the pages are not 587 * dirty, we can just free them using genfs_putpages. 588 */ 589 #ifdef DEBUG 590 debug_n_dirtyclean = 0; 591 #endif 592 do { 593 int r; 594 KASSERT(rw_write_held(vp->v_uobj.vmobjlock)); 595 596 /* Count the number of dirty pages */ 597 r = check_dirty(fs, vp, startoffset, endoffset, blkeof, 598 ap->a_flags, 1, NULL); 599 if (r < 0) { 600 /* Pages are busy with another process */ 601 rw_exit(vp->v_uobj.vmobjlock); 602 error = EDEADLK; 603 goto out; 604 } 605 if (r > 0) /* Some pages are dirty */ 606 break; 607 608 /* 609 * Sometimes pages are dirtied between the time that 610 * we check and the time we try to clean them. 611 * Instruct lfs_gop_write to return EDEADLK in this case 612 * so we can write them properly. 613 */ 614 ip->i_lfs_iflags |= LFSI_NO_GOP_WRITE; 615 r = genfs_do_putpages(vp, startoffset, endoffset, 616 ap->a_flags & ~PGO_SYNCIO, &busypg); 617 ip->i_lfs_iflags &= ~LFSI_NO_GOP_WRITE; 618 if (r != EDEADLK) { 619 KASSERT(!rw_write_held(vp->v_uobj.vmobjlock)); 620 error = r; 621 goto out; 622 } 623 624 /* One of the pages was busy. Start over. */ 625 rw_enter(vp->v_uobj.vmobjlock, RW_WRITER); 626 wait_for_page(vp, busypg, "dirtyclean"); 627 #ifdef DEBUG 628 ++debug_n_dirtyclean; 629 #endif 630 } while(1); 631 632 #ifdef DEBUG 633 if (debug_n_dirtyclean > TOOMANY) 634 DLOG((DLOG_PAGE, "lfs_putpages: dirtyclean: looping, n = %d\n", 635 debug_n_dirtyclean)); 636 #endif 637 638 /* 639 * Dirty and asked to clean. 640 * 641 * Pagedaemon can't actually write LFS pages; wake up 642 * the writer to take care of that. The writer will 643 * notice the pager inode queue and act on that. 644 * 645 * XXX We must drop the vp->interlock before taking the lfs_lock or we 646 * get a nasty deadlock with lfs_flush_pchain(). 647 */ 648 if (pagedaemon) { 649 rw_exit(vp->v_uobj.vmobjlock); 650 mutex_enter(&lfs_lock); 651 if (!(ip->i_state & IN_PAGING)) { 652 ip->i_state |= IN_PAGING; 653 TAILQ_INSERT_TAIL(&fs->lfs_pchainhd, ip, i_lfs_pchain); 654 } 655 cv_broadcast(&lfs_writerd_cv); 656 mutex_exit(&lfs_lock); 657 preempt(); 658 KASSERT(!rw_write_held(vp->v_uobj.vmobjlock)); 659 error = EWOULDBLOCK; 660 goto out; 661 } 662 663 /* 664 * If this is a file created in a recent dirop, we can't flush its 665 * inode until the dirop is complete. Drain dirops, then flush the 666 * filesystem (taking care of any other pending dirops while we're 667 * at it). 668 */ 669 if ((ap->a_flags & (PGO_CLEANIT|PGO_LOCKED)) == PGO_CLEANIT && 670 (vp->v_uflag & VU_DIROP)) { 671 DLOG((DLOG_PAGE, "lfs_putpages: flushing VU_DIROP\n")); 672 673 /* 674 * NB: lfs_flush_fs can recursively call lfs_putpages, 675 * but it won't reach this branch because it passes 676 * PGO_LOCKED. 677 */ 678 679 rw_exit(vp->v_uobj.vmobjlock); 680 mutex_enter(&lfs_lock); 681 lfs_flush_fs(fs, sync ? SEGM_SYNC : 0); 682 mutex_exit(&lfs_lock); 683 rw_enter(vp->v_uobj.vmobjlock, RW_WRITER); 684 685 /* 686 * The flush will have cleaned out this vnode as well, 687 * no need to do more to it. 688 * XXX then why are we falling through and continuing? 689 */ 690 691 /* 692 * XXX State may have changed while we dropped the 693 * lock; start over just in case. The above comment 694 * suggests this should maybe instead be goto out. 695 */ 696 goto retry; 697 } 698 699 /* 700 * This is it. We are going to write some pages. From here on 701 * down it's all just mechanics. 702 * 703 * Don't let genfs_putpages wait; lfs_segunlock will wait for us. 704 */ 705 ap->a_flags &= ~PGO_SYNCIO; 706 707 /* 708 * If we've already got the seglock, flush the node and return. 709 * The FIP has already been set up for us by lfs_writefile, 710 * and FIP cleanup and lfs_updatemeta will also be done there, 711 * unless genfs_putpages returns EDEADLK; then we must flush 712 * what we have, and correct FIP and segment header accounting. 713 */ 714 get_seglock: 715 /* 716 * If we are not called with the segment locked, lock it. 717 * Account for a new FIP in the segment header, and set sp->vp. 718 * (This should duplicate the setup at the top of lfs_writefile().) 719 */ 720 seglocked = (ap->a_flags & PGO_LOCKED) != 0; 721 if (!seglocked) { 722 rw_exit(vp->v_uobj.vmobjlock); 723 error = lfs_seglock(fs, SEGM_PROT | (sync ? SEGM_SYNC : 0)); 724 if (error != 0) { 725 KASSERT(!rw_write_held(vp->v_uobj.vmobjlock)); 726 goto out; 727 } 728 rw_enter(vp->v_uobj.vmobjlock, RW_WRITER); 729 lfs_acquire_finfo(fs, ip->i_number, ip->i_gen); 730 } 731 sp = fs->lfs_sp; 732 KASSERT(sp->vp == NULL); 733 sp->vp = vp; 734 735 /* Note segments written by reclaim; only for debugging */ 736 mutex_enter(vp->v_interlock); 737 if (vdead_check(vp, VDEAD_NOWAIT) != 0) { 738 sp->seg_flags |= SEGM_RECLAIM; 739 fs->lfs_reclino = ip->i_number; 740 } 741 mutex_exit(vp->v_interlock); 742 743 /* 744 * Ensure that the partial segment is marked SS_DIROP if this 745 * vnode is a DIROP. 746 */ 747 if (!seglocked && vp->v_uflag & VU_DIROP) { 748 SEGSUM *ssp = sp->segsum; 749 750 lfs_ss_setflags(fs, ssp, 751 lfs_ss_getflags(fs, ssp) | (SS_DIROP|SS_CONT)); 752 } 753 754 /* 755 * Loop over genfs_putpages until all pages are gathered. 756 * genfs_putpages() drops the interlock, so reacquire it if necessary. 757 * Whenever we lose the interlock we have to rerun check_dirty, as 758 * well, since more pages might have been dirtied in our absence. 759 */ 760 #ifdef DEBUG 761 debug_n_again = 0; 762 #endif 763 do { 764 busypg = NULL; 765 KASSERT(rw_write_held(vp->v_uobj.vmobjlock)); 766 if (check_dirty(fs, vp, startoffset, endoffset, blkeof, 767 ap->a_flags, 0, &busypg) < 0) { 768 write_and_wait(fs, vp, busypg, seglocked, NULL); 769 if (!seglocked) { 770 rw_exit(vp->v_uobj.vmobjlock); 771 lfs_release_finfo(fs); 772 lfs_segunlock(fs); 773 rw_enter(vp->v_uobj.vmobjlock, RW_WRITER); 774 } 775 sp->vp = NULL; 776 goto get_seglock; 777 } 778 779 busypg = NULL; 780 oreclaim = (ap->a_flags & PGO_RECLAIM); 781 ap->a_flags &= ~PGO_RECLAIM; 782 error = genfs_do_putpages(vp, startoffset, endoffset, 783 ap->a_flags, &busypg); 784 ap->a_flags |= oreclaim; 785 786 if (error == EDEADLK || error == EAGAIN) { 787 DLOG((DLOG_PAGE, "lfs_putpages: genfs_putpages returned" 788 " %d ino %d off %jx (seg %d)\n", error, 789 ip->i_number, (uintmax_t)lfs_sb_getoffset(fs), 790 lfs_dtosn(fs, lfs_sb_getoffset(fs)))); 791 792 if (oreclaim) { 793 rw_enter(vp->v_uobj.vmobjlock, RW_WRITER); 794 write_and_wait(fs, vp, busypg, seglocked, "again"); 795 rw_exit(vp->v_uobj.vmobjlock); 796 } else { 797 if ((sp->seg_flags & SEGM_SINGLE) && 798 lfs_sb_getcurseg(fs) != fs->lfs_startseg) 799 donewriting = 1; 800 } 801 } else if (error) { 802 DLOG((DLOG_PAGE, "lfs_putpages: genfs_putpages returned" 803 " %d ino %d off %jx (seg %d)\n", error, 804 (int)ip->i_number, (uintmax_t)lfs_sb_getoffset(fs), 805 lfs_dtosn(fs, lfs_sb_getoffset(fs)))); 806 } 807 /* genfs_do_putpages loses the interlock */ 808 #ifdef DEBUG 809 ++debug_n_again; 810 #endif 811 if (oreclaim && error == EAGAIN) { 812 DLOG((DLOG_PAGE, "vp %p ino %d vi_flags %x a_flags %x avoiding vclean panic\n", 813 vp, (int)ip->i_number, vp->v_iflag, ap->a_flags)); 814 rw_enter(vp->v_uobj.vmobjlock, RW_WRITER); 815 } 816 if (error == EDEADLK) 817 rw_enter(vp->v_uobj.vmobjlock, RW_WRITER); 818 } while (error == EDEADLK || (oreclaim && error == EAGAIN)); 819 #ifdef DEBUG 820 if (debug_n_again > TOOMANY) 821 DLOG((DLOG_PAGE, "lfs_putpages: again: looping, n = %d\n", debug_n_again)); 822 #endif 823 824 KASSERT(sp != NULL && sp->vp == vp); 825 if (!seglocked && !donewriting) { 826 sp->vp = NULL; 827 828 /* Write indirect blocks as well */ 829 lfs_gather(fs, fs->lfs_sp, vp, lfs_match_indir); 830 lfs_gather(fs, fs->lfs_sp, vp, lfs_match_dindir); 831 lfs_gather(fs, fs->lfs_sp, vp, lfs_match_tindir); 832 833 KASSERT(sp->vp == NULL); 834 sp->vp = vp; 835 } 836 837 /* 838 * Blocks are now gathered into a segment waiting to be written. 839 * All that's left to do is update metadata, and write them. 840 */ 841 lfs_updatemeta(sp); 842 KASSERT(sp->vp == vp); 843 sp->vp = NULL; 844 845 /* 846 * If we were called from lfs_writefile, we don't need to clean up 847 * the FIP or unlock the segment lock. We're done. 848 */ 849 if (seglocked) { 850 KASSERT(!rw_write_held(vp->v_uobj.vmobjlock)); 851 goto out; 852 } 853 854 /* Clean up FIP and send it to disk. */ 855 lfs_release_finfo(fs); 856 lfs_writeseg(fs, fs->lfs_sp); 857 858 /* 859 * Remove us from paging queue if we wrote all our pages. 860 */ 861 if (origendoffset == 0 || ap->a_flags & PGO_ALLPAGES) { 862 mutex_enter(&lfs_lock); 863 if (ip->i_state & IN_PAGING) { 864 ip->i_state &= ~IN_PAGING; 865 TAILQ_REMOVE(&fs->lfs_pchainhd, ip, i_lfs_pchain); 866 } 867 mutex_exit(&lfs_lock); 868 } 869 870 /* 871 * XXX - with the malloc/copy writeseg, the pages are freed by now 872 * even if we don't wait (e.g. if we hold a nested lock). This 873 * will not be true if we stop using malloc/copy. 874 */ 875 KASSERT(fs->lfs_sp->seg_flags & SEGM_PROT); 876 lfs_segunlock(fs); 877 878 /* 879 * Wait for v_numoutput to drop to zero. The seglock should 880 * take care of this, but there is a slight possibility that 881 * aiodoned might not have got around to our buffers yet. 882 */ 883 if (sync) { 884 mutex_enter(vp->v_interlock); 885 while (vp->v_numoutput > 0) { 886 DLOG((DLOG_PAGE, "lfs_putpages: ino %d sleeping on" 887 " num %d\n", ip->i_number, vp->v_numoutput)); 888 cv_wait(&vp->v_cv, vp->v_interlock); 889 } 890 mutex_exit(vp->v_interlock); 891 } 892 893 out:; 894 if (trans_mp) 895 fstrans_done(trans_mp); 896 KASSERT(!rw_write_held(vp->v_uobj.vmobjlock)); 897 return error; 898 } 899 900