1 /* $NetBSD: lfs_pages.c,v 1.1 2014/05/16 09:34:03 dholland Exp $ */ 2 3 /*- 4 * Copyright (c) 1999, 2000, 2001, 2002, 2003 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Konrad E. Schroder <perseant@hhhh.org>. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 /* 32 * Copyright (c) 1986, 1989, 1991, 1993, 1995 33 * The Regents of the University of California. All rights reserved. 34 * 35 * Redistribution and use in source and binary forms, with or without 36 * modification, are permitted provided that the following conditions 37 * are met: 38 * 1. Redistributions of source code must retain the above copyright 39 * notice, this list of conditions and the following disclaimer. 40 * 2. Redistributions in binary form must reproduce the above copyright 41 * notice, this list of conditions and the following disclaimer in the 42 * documentation and/or other materials provided with the distribution. 43 * 3. Neither the name of the University nor the names of its contributors 44 * may be used to endorse or promote products derived from this software 45 * without specific prior written permission. 46 * 47 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 48 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 49 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 50 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 51 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 52 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 53 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 54 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 55 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 56 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 57 * SUCH DAMAGE. 58 * 59 * @(#)lfs_vnops.c 8.13 (Berkeley) 6/10/95 60 */ 61 62 #include <sys/cdefs.h> 63 __KERNEL_RCSID(0, "$NetBSD: lfs_pages.c,v 1.1 2014/05/16 09:34:03 dholland Exp $"); 64 65 #ifdef _KERNEL_OPT 66 #include "opt_compat_netbsd.h" 67 #include "opt_uvm_page_trkown.h" 68 #endif 69 70 #include <sys/param.h> 71 #include <sys/systm.h> 72 #include <sys/namei.h> 73 #include <sys/resourcevar.h> 74 #include <sys/kernel.h> 75 #include <sys/file.h> 76 #include <sys/stat.h> 77 #include <sys/buf.h> 78 #include <sys/proc.h> 79 #include <sys/mount.h> 80 #include <sys/vnode.h> 81 #include <sys/pool.h> 82 #include <sys/signalvar.h> 83 #include <sys/kauth.h> 84 #include <sys/syslog.h> 85 #include <sys/fstrans.h> 86 87 #include <miscfs/fifofs/fifo.h> 88 #include <miscfs/genfs/genfs.h> 89 #include <miscfs/specfs/specdev.h> 90 91 #include <ufs/lfs/ulfs_inode.h> 92 #include <ufs/lfs/ulfsmount.h> 93 #include <ufs/lfs/ulfs_bswap.h> 94 #include <ufs/lfs/ulfs_extern.h> 95 96 #include <uvm/uvm.h> 97 #include <uvm/uvm_pmap.h> 98 #include <uvm/uvm_stat.h> 99 #include <uvm/uvm_pager.h> 100 101 #include <ufs/lfs/lfs.h> 102 #include <ufs/lfs/lfs_kernel.h> 103 #include <ufs/lfs/lfs_extern.h> 104 105 extern pid_t lfs_writer_daemon; 106 107 static int check_dirty(struct lfs *, struct vnode *, off_t, off_t, off_t, int, int, struct vm_page **); 108 109 int 110 lfs_getpages(void *v) 111 { 112 struct vop_getpages_args /* { 113 struct vnode *a_vp; 114 voff_t a_offset; 115 struct vm_page **a_m; 116 int *a_count; 117 int a_centeridx; 118 vm_prot_t a_access_type; 119 int a_advice; 120 int a_flags; 121 } */ *ap = v; 122 123 if (VTOI(ap->a_vp)->i_number == LFS_IFILE_INUM && 124 (ap->a_access_type & VM_PROT_WRITE) != 0) { 125 return EPERM; 126 } 127 if ((ap->a_access_type & VM_PROT_WRITE) != 0) { 128 mutex_enter(&lfs_lock); 129 LFS_SET_UINO(VTOI(ap->a_vp), IN_MODIFIED); 130 mutex_exit(&lfs_lock); 131 } 132 133 /* 134 * we're relying on the fact that genfs_getpages() always read in 135 * entire filesystem blocks. 136 */ 137 return genfs_getpages(v); 138 } 139 140 /* 141 * Wait for a page to become unbusy, possibly printing diagnostic messages 142 * as well. 143 * 144 * Called with vp->v_interlock held; return with it held. 145 */ 146 static void 147 wait_for_page(struct vnode *vp, struct vm_page *pg, const char *label) 148 { 149 KASSERT(mutex_owned(vp->v_interlock)); 150 if ((pg->flags & PG_BUSY) == 0) 151 return; /* Nothing to wait for! */ 152 153 #if defined(DEBUG) && defined(UVM_PAGE_TRKOWN) 154 static struct vm_page *lastpg; 155 156 if (label != NULL && pg != lastpg) { 157 if (pg->owner_tag) { 158 printf("lfs_putpages[%d.%d]: %s: page %p owner %d.%d [%s]\n", 159 curproc->p_pid, curlwp->l_lid, label, 160 pg, pg->owner, pg->lowner, pg->owner_tag); 161 } else { 162 printf("lfs_putpages[%d.%d]: %s: page %p unowned?!\n", 163 curproc->p_pid, curlwp->l_lid, label, pg); 164 } 165 } 166 lastpg = pg; 167 #endif 168 169 pg->flags |= PG_WANTED; 170 UVM_UNLOCK_AND_WAIT(pg, vp->v_interlock, 0, "lfsput", 0); 171 mutex_enter(vp->v_interlock); 172 } 173 174 /* 175 * This routine is called by lfs_putpages() when it can't complete the 176 * write because a page is busy. This means that either (1) someone, 177 * possibly the pagedaemon, is looking at this page, and will give it up 178 * presently; or (2) we ourselves are holding the page busy in the 179 * process of being written (either gathered or actually on its way to 180 * disk). We don't need to give up the segment lock, but we might need 181 * to call lfs_writeseg() to expedite the page's journey to disk. 182 * 183 * Called with vp->v_interlock held; return with it held. 184 */ 185 /* #define BUSYWAIT */ 186 static void 187 write_and_wait(struct lfs *fs, struct vnode *vp, struct vm_page *pg, 188 int seglocked, const char *label) 189 { 190 KASSERT(mutex_owned(vp->v_interlock)); 191 #ifndef BUSYWAIT 192 struct inode *ip = VTOI(vp); 193 struct segment *sp = fs->lfs_sp; 194 int count = 0; 195 196 if (pg == NULL) 197 return; 198 199 while (pg->flags & PG_BUSY && 200 pg->uobject == &vp->v_uobj) { 201 mutex_exit(vp->v_interlock); 202 if (sp->cbpp - sp->bpp > 1) { 203 /* Write gathered pages */ 204 lfs_updatemeta(sp); 205 lfs_release_finfo(fs); 206 (void) lfs_writeseg(fs, sp); 207 208 /* 209 * Reinitialize FIP 210 */ 211 KASSERT(sp->vp == vp); 212 lfs_acquire_finfo(fs, ip->i_number, 213 ip->i_gen); 214 } 215 ++count; 216 mutex_enter(vp->v_interlock); 217 wait_for_page(vp, pg, label); 218 } 219 if (label != NULL && count > 1) { 220 DLOG((DLOG_PAGE, "lfs_putpages[%d]: %s: %sn = %d\n", 221 curproc->p_pid, label, (count > 0 ? "looping, " : ""), 222 count)); 223 } 224 #else 225 preempt(1); 226 #endif 227 KASSERT(mutex_owned(vp->v_interlock)); 228 } 229 230 /* 231 * Make sure that for all pages in every block in the given range, 232 * either all are dirty or all are clean. If any of the pages 233 * we've seen so far are dirty, put the vnode on the paging chain, 234 * and mark it IN_PAGING. 235 * 236 * If checkfirst != 0, don't check all the pages but return at the 237 * first dirty page. 238 */ 239 static int 240 check_dirty(struct lfs *fs, struct vnode *vp, 241 off_t startoffset, off_t endoffset, off_t blkeof, 242 int flags, int checkfirst, struct vm_page **pgp) 243 { 244 int by_list; 245 struct vm_page *curpg = NULL; /* XXX: gcc */ 246 struct vm_page *pgs[MAXBSIZE / PAGE_SIZE], *pg; 247 off_t soff = 0; /* XXX: gcc */ 248 voff_t off; 249 int i; 250 int nonexistent; 251 int any_dirty; /* number of dirty pages */ 252 int dirty; /* number of dirty pages in a block */ 253 int tdirty; 254 int pages_per_block = fs->lfs_bsize >> PAGE_SHIFT; 255 int pagedaemon = (curlwp == uvm.pagedaemon_lwp); 256 257 KASSERT(mutex_owned(vp->v_interlock)); 258 ASSERT_MAYBE_SEGLOCK(fs); 259 top: 260 by_list = (vp->v_uobj.uo_npages <= 261 ((endoffset - startoffset) >> PAGE_SHIFT) * 262 UVM_PAGE_TREE_PENALTY); 263 any_dirty = 0; 264 265 if (by_list) { 266 curpg = TAILQ_FIRST(&vp->v_uobj.memq); 267 } else { 268 soff = startoffset; 269 } 270 while (by_list || soff < MIN(blkeof, endoffset)) { 271 if (by_list) { 272 /* 273 * Find the first page in a block. Skip 274 * blocks outside our area of interest or beyond 275 * the end of file. 276 */ 277 KASSERT(curpg == NULL 278 || (curpg->flags & PG_MARKER) == 0); 279 if (pages_per_block > 1) { 280 while (curpg && 281 ((curpg->offset & fs->lfs_bmask) || 282 curpg->offset >= vp->v_size || 283 curpg->offset >= endoffset)) { 284 curpg = TAILQ_NEXT(curpg, listq.queue); 285 KASSERT(curpg == NULL || 286 (curpg->flags & PG_MARKER) == 0); 287 } 288 } 289 if (curpg == NULL) 290 break; 291 soff = curpg->offset; 292 } 293 294 /* 295 * Mark all pages in extended range busy; find out if any 296 * of them are dirty. 297 */ 298 nonexistent = dirty = 0; 299 for (i = 0; i == 0 || i < pages_per_block; i++) { 300 KASSERT(mutex_owned(vp->v_interlock)); 301 if (by_list && pages_per_block <= 1) { 302 pgs[i] = pg = curpg; 303 } else { 304 off = soff + (i << PAGE_SHIFT); 305 pgs[i] = pg = uvm_pagelookup(&vp->v_uobj, off); 306 if (pg == NULL) { 307 ++nonexistent; 308 continue; 309 } 310 } 311 KASSERT(pg != NULL); 312 313 /* 314 * If we're holding the segment lock, we can deadlock 315 * against a process that has our page and is waiting 316 * for the cleaner, while the cleaner waits for the 317 * segment lock. Just bail in that case. 318 */ 319 if ((pg->flags & PG_BUSY) && 320 (pagedaemon || LFS_SEGLOCK_HELD(fs))) { 321 if (i > 0) 322 uvm_page_unbusy(pgs, i); 323 DLOG((DLOG_PAGE, "lfs_putpages: avoiding 3-way or pagedaemon deadlock\n")); 324 if (pgp) 325 *pgp = pg; 326 KASSERT(mutex_owned(vp->v_interlock)); 327 return -1; 328 } 329 330 while (pg->flags & PG_BUSY) { 331 wait_for_page(vp, pg, NULL); 332 KASSERT(mutex_owned(vp->v_interlock)); 333 if (i > 0) 334 uvm_page_unbusy(pgs, i); 335 KASSERT(mutex_owned(vp->v_interlock)); 336 goto top; 337 } 338 pg->flags |= PG_BUSY; 339 UVM_PAGE_OWN(pg, "lfs_putpages"); 340 341 pmap_page_protect(pg, VM_PROT_NONE); 342 tdirty = (pmap_clear_modify(pg) || 343 (pg->flags & PG_CLEAN) == 0); 344 dirty += tdirty; 345 } 346 if (pages_per_block > 0 && nonexistent >= pages_per_block) { 347 if (by_list) { 348 curpg = TAILQ_NEXT(curpg, listq.queue); 349 } else { 350 soff += fs->lfs_bsize; 351 } 352 continue; 353 } 354 355 any_dirty += dirty; 356 KASSERT(nonexistent == 0); 357 KASSERT(mutex_owned(vp->v_interlock)); 358 359 /* 360 * If any are dirty make all dirty; unbusy them, 361 * but if we were asked to clean, wire them so that 362 * the pagedaemon doesn't bother us about them while 363 * they're on their way to disk. 364 */ 365 for (i = 0; i == 0 || i < pages_per_block; i++) { 366 KASSERT(mutex_owned(vp->v_interlock)); 367 pg = pgs[i]; 368 KASSERT(!((pg->flags & PG_CLEAN) && (pg->flags & PG_DELWRI))); 369 KASSERT(pg->flags & PG_BUSY); 370 if (dirty) { 371 pg->flags &= ~PG_CLEAN; 372 if (flags & PGO_FREE) { 373 /* 374 * Wire the page so that 375 * pdaemon doesn't see it again. 376 */ 377 mutex_enter(&uvm_pageqlock); 378 uvm_pagewire(pg); 379 mutex_exit(&uvm_pageqlock); 380 381 /* Suspended write flag */ 382 pg->flags |= PG_DELWRI; 383 } 384 } 385 if (pg->flags & PG_WANTED) 386 wakeup(pg); 387 pg->flags &= ~(PG_WANTED|PG_BUSY); 388 UVM_PAGE_OWN(pg, NULL); 389 } 390 391 if (checkfirst && any_dirty) 392 break; 393 394 if (by_list) { 395 curpg = TAILQ_NEXT(curpg, listq.queue); 396 } else { 397 soff += MAX(PAGE_SIZE, fs->lfs_bsize); 398 } 399 } 400 401 KASSERT(mutex_owned(vp->v_interlock)); 402 return any_dirty; 403 } 404 405 /* 406 * lfs_putpages functions like genfs_putpages except that 407 * 408 * (1) It needs to bounds-check the incoming requests to ensure that 409 * they are block-aligned; if they are not, expand the range and 410 * do the right thing in case, e.g., the requested range is clean 411 * but the expanded range is dirty. 412 * 413 * (2) It needs to explicitly send blocks to be written when it is done. 414 * If VOP_PUTPAGES is called without the seglock held, we simply take 415 * the seglock and let lfs_segunlock wait for us. 416 * XXX There might be a bad situation if we have to flush a vnode while 417 * XXX lfs_markv is in operation. As of this writing we panic in this 418 * XXX case. 419 * 420 * Assumptions: 421 * 422 * (1) The caller does not hold any pages in this vnode busy. If it does, 423 * there is a danger that when we expand the page range and busy the 424 * pages we will deadlock. 425 * 426 * (2) We are called with vp->v_interlock held; we must return with it 427 * released. 428 * 429 * (3) We don't absolutely have to free pages right away, provided that 430 * the request does not have PGO_SYNCIO. When the pagedaemon gives 431 * us a request with PGO_FREE, we take the pages out of the paging 432 * queue and wake up the writer, which will handle freeing them for us. 433 * 434 * We ensure that for any filesystem block, all pages for that 435 * block are either resident or not, even if those pages are higher 436 * than EOF; that means that we will be getting requests to free 437 * "unused" pages above EOF all the time, and should ignore them. 438 * 439 * (4) If we are called with PGO_LOCKED, the finfo array we are to write 440 * into has been set up for us by lfs_writefile. If not, we will 441 * have to handle allocating and/or freeing an finfo entry. 442 * 443 * XXX note that we're (ab)using PGO_LOCKED as "seglock held". 444 */ 445 446 /* How many times to loop before we should start to worry */ 447 #define TOOMANY 4 448 449 int 450 lfs_putpages(void *v) 451 { 452 int error; 453 struct vop_putpages_args /* { 454 struct vnode *a_vp; 455 voff_t a_offlo; 456 voff_t a_offhi; 457 int a_flags; 458 } */ *ap = v; 459 struct vnode *vp; 460 struct inode *ip; 461 struct lfs *fs; 462 struct segment *sp; 463 off_t origoffset, startoffset, endoffset, origendoffset, blkeof; 464 off_t off, max_endoffset; 465 bool seglocked, sync, pagedaemon, reclaim; 466 struct vm_page *pg, *busypg; 467 UVMHIST_FUNC("lfs_putpages"); UVMHIST_CALLED(ubchist); 468 int oreclaim = 0; 469 int donewriting = 0; 470 #ifdef DEBUG 471 int debug_n_again, debug_n_dirtyclean; 472 #endif 473 474 vp = ap->a_vp; 475 ip = VTOI(vp); 476 fs = ip->i_lfs; 477 sync = (ap->a_flags & PGO_SYNCIO) != 0; 478 reclaim = (ap->a_flags & PGO_RECLAIM) != 0; 479 pagedaemon = (curlwp == uvm.pagedaemon_lwp); 480 481 KASSERT(mutex_owned(vp->v_interlock)); 482 483 /* Putpages does nothing for metadata. */ 484 if (vp == fs->lfs_ivnode || vp->v_type != VREG) { 485 mutex_exit(vp->v_interlock); 486 return 0; 487 } 488 489 /* 490 * If there are no pages, don't do anything. 491 */ 492 if (vp->v_uobj.uo_npages == 0) { 493 if (TAILQ_EMPTY(&vp->v_uobj.memq) && 494 (vp->v_iflag & VI_ONWORKLST) && 495 LIST_FIRST(&vp->v_dirtyblkhd) == NULL) { 496 vp->v_iflag &= ~VI_WRMAPDIRTY; 497 vn_syncer_remove_from_worklist(vp); 498 } 499 mutex_exit(vp->v_interlock); 500 501 /* Remove us from paging queue, if we were on it */ 502 mutex_enter(&lfs_lock); 503 if (ip->i_flags & IN_PAGING) { 504 ip->i_flags &= ~IN_PAGING; 505 TAILQ_REMOVE(&fs->lfs_pchainhd, ip, i_lfs_pchain); 506 } 507 mutex_exit(&lfs_lock); 508 509 KASSERT(!mutex_owned(vp->v_interlock)); 510 return 0; 511 } 512 513 blkeof = lfs_blkroundup(fs, ip->i_size); 514 515 /* 516 * Ignore requests to free pages past EOF but in the same block 517 * as EOF, unless the vnode is being reclaimed or the request 518 * is synchronous. (If the request is sync, it comes from 519 * lfs_truncate.) 520 * 521 * To avoid being flooded with this request, make these pages 522 * look "active". 523 */ 524 if (!sync && !reclaim && 525 ap->a_offlo >= ip->i_size && ap->a_offlo < blkeof) { 526 origoffset = ap->a_offlo; 527 for (off = origoffset; off < blkeof; off += fs->lfs_bsize) { 528 pg = uvm_pagelookup(&vp->v_uobj, off); 529 KASSERT(pg != NULL); 530 while (pg->flags & PG_BUSY) { 531 pg->flags |= PG_WANTED; 532 UVM_UNLOCK_AND_WAIT(pg, vp->v_interlock, 0, 533 "lfsput2", 0); 534 mutex_enter(vp->v_interlock); 535 } 536 mutex_enter(&uvm_pageqlock); 537 uvm_pageactivate(pg); 538 mutex_exit(&uvm_pageqlock); 539 } 540 ap->a_offlo = blkeof; 541 if (ap->a_offhi > 0 && ap->a_offhi <= ap->a_offlo) { 542 mutex_exit(vp->v_interlock); 543 return 0; 544 } 545 } 546 547 /* 548 * Extend page range to start and end at block boundaries. 549 * (For the purposes of VOP_PUTPAGES, fragments don't exist.) 550 */ 551 origoffset = ap->a_offlo; 552 origendoffset = ap->a_offhi; 553 startoffset = origoffset & ~(fs->lfs_bmask); 554 max_endoffset = (trunc_page(LLONG_MAX) >> fs->lfs_bshift) 555 << fs->lfs_bshift; 556 557 if (origendoffset == 0 || ap->a_flags & PGO_ALLPAGES) { 558 endoffset = max_endoffset; 559 origendoffset = endoffset; 560 } else { 561 origendoffset = round_page(ap->a_offhi); 562 endoffset = round_page(lfs_blkroundup(fs, origendoffset)); 563 } 564 565 KASSERT(startoffset > 0 || endoffset >= startoffset); 566 if (startoffset == endoffset) { 567 /* Nothing to do, why were we called? */ 568 mutex_exit(vp->v_interlock); 569 DLOG((DLOG_PAGE, "lfs_putpages: startoffset = endoffset = %" 570 PRId64 "\n", startoffset)); 571 return 0; 572 } 573 574 ap->a_offlo = startoffset; 575 ap->a_offhi = endoffset; 576 577 /* 578 * If not cleaning, just send the pages through genfs_putpages 579 * to be returned to the pool. 580 */ 581 if (!(ap->a_flags & PGO_CLEANIT)) { 582 DLOG((DLOG_PAGE, "lfs_putpages: no cleanit vn %p ino %d (flags %x)\n", 583 vp, (int)ip->i_number, ap->a_flags)); 584 int r = genfs_putpages(v); 585 KASSERT(!mutex_owned(vp->v_interlock)); 586 return r; 587 } 588 589 /* Set PGO_BUSYFAIL to avoid deadlocks */ 590 ap->a_flags |= PGO_BUSYFAIL; 591 592 /* 593 * Likewise, if we are asked to clean but the pages are not 594 * dirty, we can just free them using genfs_putpages. 595 */ 596 #ifdef DEBUG 597 debug_n_dirtyclean = 0; 598 #endif 599 do { 600 int r; 601 KASSERT(mutex_owned(vp->v_interlock)); 602 603 /* Count the number of dirty pages */ 604 r = check_dirty(fs, vp, startoffset, endoffset, blkeof, 605 ap->a_flags, 1, NULL); 606 if (r < 0) { 607 /* Pages are busy with another process */ 608 mutex_exit(vp->v_interlock); 609 return EDEADLK; 610 } 611 if (r > 0) /* Some pages are dirty */ 612 break; 613 614 /* 615 * Sometimes pages are dirtied between the time that 616 * we check and the time we try to clean them. 617 * Instruct lfs_gop_write to return EDEADLK in this case 618 * so we can write them properly. 619 */ 620 ip->i_lfs_iflags |= LFSI_NO_GOP_WRITE; 621 r = genfs_do_putpages(vp, startoffset, endoffset, 622 ap->a_flags & ~PGO_SYNCIO, &busypg); 623 ip->i_lfs_iflags &= ~LFSI_NO_GOP_WRITE; 624 if (r != EDEADLK) { 625 KASSERT(!mutex_owned(vp->v_interlock)); 626 return r; 627 } 628 629 /* One of the pages was busy. Start over. */ 630 mutex_enter(vp->v_interlock); 631 wait_for_page(vp, busypg, "dirtyclean"); 632 #ifdef DEBUG 633 ++debug_n_dirtyclean; 634 #endif 635 } while(1); 636 637 #ifdef DEBUG 638 if (debug_n_dirtyclean > TOOMANY) 639 DLOG((DLOG_PAGE, "lfs_putpages: dirtyclean: looping, n = %d\n", 640 debug_n_dirtyclean)); 641 #endif 642 643 /* 644 * Dirty and asked to clean. 645 * 646 * Pagedaemon can't actually write LFS pages; wake up 647 * the writer to take care of that. The writer will 648 * notice the pager inode queue and act on that. 649 * 650 * XXX We must drop the vp->interlock before taking the lfs_lock or we 651 * get a nasty deadlock with lfs_flush_pchain(). 652 */ 653 if (pagedaemon) { 654 mutex_exit(vp->v_interlock); 655 mutex_enter(&lfs_lock); 656 if (!(ip->i_flags & IN_PAGING)) { 657 ip->i_flags |= IN_PAGING; 658 TAILQ_INSERT_TAIL(&fs->lfs_pchainhd, ip, i_lfs_pchain); 659 } 660 wakeup(&lfs_writer_daemon); 661 mutex_exit(&lfs_lock); 662 preempt(); 663 KASSERT(!mutex_owned(vp->v_interlock)); 664 return EWOULDBLOCK; 665 } 666 667 /* 668 * If this is a file created in a recent dirop, we can't flush its 669 * inode until the dirop is complete. Drain dirops, then flush the 670 * filesystem (taking care of any other pending dirops while we're 671 * at it). 672 */ 673 if ((ap->a_flags & (PGO_CLEANIT|PGO_LOCKED)) == PGO_CLEANIT && 674 (vp->v_uflag & VU_DIROP)) { 675 DLOG((DLOG_PAGE, "lfs_putpages: flushing VU_DIROP\n")); 676 677 lfs_writer_enter(fs, "ppdirop"); 678 679 /* Note if we hold the vnode locked */ 680 if (VOP_ISLOCKED(vp) == LK_EXCLUSIVE) 681 { 682 DLOG((DLOG_PAGE, "lfs_putpages: dirop inode already locked\n")); 683 } else { 684 DLOG((DLOG_PAGE, "lfs_putpages: dirop inode not locked\n")); 685 } 686 mutex_exit(vp->v_interlock); 687 688 mutex_enter(&lfs_lock); 689 lfs_flush_fs(fs, sync ? SEGM_SYNC : 0); 690 mutex_exit(&lfs_lock); 691 692 mutex_enter(vp->v_interlock); 693 lfs_writer_leave(fs); 694 695 /* The flush will have cleaned out this vnode as well, 696 no need to do more to it. */ 697 } 698 699 /* 700 * This is it. We are going to write some pages. From here on 701 * down it's all just mechanics. 702 * 703 * Don't let genfs_putpages wait; lfs_segunlock will wait for us. 704 */ 705 ap->a_flags &= ~PGO_SYNCIO; 706 707 /* 708 * If we've already got the seglock, flush the node and return. 709 * The FIP has already been set up for us by lfs_writefile, 710 * and FIP cleanup and lfs_updatemeta will also be done there, 711 * unless genfs_putpages returns EDEADLK; then we must flush 712 * what we have, and correct FIP and segment header accounting. 713 */ 714 get_seglock: 715 /* 716 * If we are not called with the segment locked, lock it. 717 * Account for a new FIP in the segment header, and set sp->vp. 718 * (This should duplicate the setup at the top of lfs_writefile().) 719 */ 720 seglocked = (ap->a_flags & PGO_LOCKED) != 0; 721 if (!seglocked) { 722 mutex_exit(vp->v_interlock); 723 error = lfs_seglock(fs, SEGM_PROT | (sync ? SEGM_SYNC : 0)); 724 if (error != 0) { 725 KASSERT(!mutex_owned(vp->v_interlock)); 726 return error; 727 } 728 mutex_enter(vp->v_interlock); 729 lfs_acquire_finfo(fs, ip->i_number, ip->i_gen); 730 } 731 sp = fs->lfs_sp; 732 KASSERT(sp->vp == NULL); 733 sp->vp = vp; 734 735 /* Note segments written by reclaim; only for debugging */ 736 if (vdead_check(vp, VDEAD_NOWAIT) != 0) { 737 sp->seg_flags |= SEGM_RECLAIM; 738 fs->lfs_reclino = ip->i_number; 739 } 740 741 /* 742 * Ensure that the partial segment is marked SS_DIROP if this 743 * vnode is a DIROP. 744 */ 745 if (!seglocked && vp->v_uflag & VU_DIROP) 746 ((SEGSUM *)(sp->segsum))->ss_flags |= (SS_DIROP|SS_CONT); 747 748 /* 749 * Loop over genfs_putpages until all pages are gathered. 750 * genfs_putpages() drops the interlock, so reacquire it if necessary. 751 * Whenever we lose the interlock we have to rerun check_dirty, as 752 * well, since more pages might have been dirtied in our absence. 753 */ 754 #ifdef DEBUG 755 debug_n_again = 0; 756 #endif 757 do { 758 busypg = NULL; 759 KASSERT(mutex_owned(vp->v_interlock)); 760 if (check_dirty(fs, vp, startoffset, endoffset, blkeof, 761 ap->a_flags, 0, &busypg) < 0) { 762 mutex_exit(vp->v_interlock); 763 /* XXX why? --ks */ 764 mutex_enter(vp->v_interlock); 765 write_and_wait(fs, vp, busypg, seglocked, NULL); 766 if (!seglocked) { 767 mutex_exit(vp->v_interlock); 768 lfs_release_finfo(fs); 769 lfs_segunlock(fs); 770 mutex_enter(vp->v_interlock); 771 } 772 sp->vp = NULL; 773 goto get_seglock; 774 } 775 776 busypg = NULL; 777 KASSERT(!mutex_owned(&uvm_pageqlock)); 778 oreclaim = (ap->a_flags & PGO_RECLAIM); 779 ap->a_flags &= ~PGO_RECLAIM; 780 error = genfs_do_putpages(vp, startoffset, endoffset, 781 ap->a_flags, &busypg); 782 ap->a_flags |= oreclaim; 783 784 if (error == EDEADLK || error == EAGAIN) { 785 DLOG((DLOG_PAGE, "lfs_putpages: genfs_putpages returned" 786 " %d ino %d off %x (seg %d)\n", error, 787 ip->i_number, fs->lfs_offset, 788 lfs_dtosn(fs, fs->lfs_offset))); 789 790 if (oreclaim) { 791 mutex_enter(vp->v_interlock); 792 write_and_wait(fs, vp, busypg, seglocked, "again"); 793 mutex_exit(vp->v_interlock); 794 } else { 795 if ((sp->seg_flags & SEGM_SINGLE) && 796 fs->lfs_curseg != fs->lfs_startseg) 797 donewriting = 1; 798 } 799 } else if (error) { 800 DLOG((DLOG_PAGE, "lfs_putpages: genfs_putpages returned" 801 " %d ino %d off %x (seg %d)\n", error, 802 (int)ip->i_number, fs->lfs_offset, 803 lfs_dtosn(fs, fs->lfs_offset))); 804 } 805 /* genfs_do_putpages loses the interlock */ 806 #ifdef DEBUG 807 ++debug_n_again; 808 #endif 809 if (oreclaim && error == EAGAIN) { 810 DLOG((DLOG_PAGE, "vp %p ino %d vi_flags %x a_flags %x avoiding vclean panic\n", 811 vp, (int)ip->i_number, vp->v_iflag, ap->a_flags)); 812 mutex_enter(vp->v_interlock); 813 } 814 if (error == EDEADLK) 815 mutex_enter(vp->v_interlock); 816 } while (error == EDEADLK || (oreclaim && error == EAGAIN)); 817 #ifdef DEBUG 818 if (debug_n_again > TOOMANY) 819 DLOG((DLOG_PAGE, "lfs_putpages: again: looping, n = %d\n", debug_n_again)); 820 #endif 821 822 KASSERT(sp != NULL && sp->vp == vp); 823 if (!seglocked && !donewriting) { 824 sp->vp = NULL; 825 826 /* Write indirect blocks as well */ 827 lfs_gather(fs, fs->lfs_sp, vp, lfs_match_indir); 828 lfs_gather(fs, fs->lfs_sp, vp, lfs_match_dindir); 829 lfs_gather(fs, fs->lfs_sp, vp, lfs_match_tindir); 830 831 KASSERT(sp->vp == NULL); 832 sp->vp = vp; 833 } 834 835 /* 836 * Blocks are now gathered into a segment waiting to be written. 837 * All that's left to do is update metadata, and write them. 838 */ 839 lfs_updatemeta(sp); 840 KASSERT(sp->vp == vp); 841 sp->vp = NULL; 842 843 /* 844 * If we were called from lfs_writefile, we don't need to clean up 845 * the FIP or unlock the segment lock. We're done. 846 */ 847 if (seglocked) { 848 KASSERT(!mutex_owned(vp->v_interlock)); 849 return error; 850 } 851 852 /* Clean up FIP and send it to disk. */ 853 lfs_release_finfo(fs); 854 lfs_writeseg(fs, fs->lfs_sp); 855 856 /* 857 * Remove us from paging queue if we wrote all our pages. 858 */ 859 if (origendoffset == 0 || ap->a_flags & PGO_ALLPAGES) { 860 mutex_enter(&lfs_lock); 861 if (ip->i_flags & IN_PAGING) { 862 ip->i_flags &= ~IN_PAGING; 863 TAILQ_REMOVE(&fs->lfs_pchainhd, ip, i_lfs_pchain); 864 } 865 mutex_exit(&lfs_lock); 866 } 867 868 /* 869 * XXX - with the malloc/copy writeseg, the pages are freed by now 870 * even if we don't wait (e.g. if we hold a nested lock). This 871 * will not be true if we stop using malloc/copy. 872 */ 873 KASSERT(fs->lfs_sp->seg_flags & SEGM_PROT); 874 lfs_segunlock(fs); 875 876 /* 877 * Wait for v_numoutput to drop to zero. The seglock should 878 * take care of this, but there is a slight possibility that 879 * aiodoned might not have got around to our buffers yet. 880 */ 881 if (sync) { 882 mutex_enter(vp->v_interlock); 883 while (vp->v_numoutput > 0) { 884 DLOG((DLOG_PAGE, "lfs_putpages: ino %d sleeping on" 885 " num %d\n", ip->i_number, vp->v_numoutput)); 886 cv_wait(&vp->v_cv, vp->v_interlock); 887 } 888 mutex_exit(vp->v_interlock); 889 } 890 KASSERT(!mutex_owned(vp->v_interlock)); 891 return error; 892 } 893 894