1 /* $NetBSD: uvm_vnode.c,v 1.48 2001/03/10 22:46:51 chs Exp $ */ 2 3 /* 4 * Copyright (c) 1997 Charles D. Cranor and Washington University. 5 * Copyright (c) 1991, 1993 6 * The Regents of the University of California. 7 * Copyright (c) 1990 University of Utah. 8 * 9 * All rights reserved. 10 * 11 * This code is derived from software contributed to Berkeley by 12 * the Systems Programming Group of the University of Utah Computer 13 * Science Department. 14 * 15 * Redistribution and use in source and binary forms, with or without 16 * modification, are permitted provided that the following conditions 17 * are met: 18 * 1. Redistributions of source code must retain the above copyright 19 * notice, this list of conditions and the following disclaimer. 20 * 2. Redistributions in binary form must reproduce the above copyright 21 * notice, this list of conditions and the following disclaimer in the 22 * documentation and/or other materials provided with the distribution. 23 * 3. All advertising materials mentioning features or use of this software 24 * must display the following acknowledgement: 25 * This product includes software developed by Charles D. Cranor, 26 * Washington University, the University of California, Berkeley and 27 * its contributors. 28 * 4. Neither the name of the University nor the names of its contributors 29 * may be used to endorse or promote products derived from this software 30 * without specific prior written permission. 31 * 32 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 33 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 34 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 35 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 36 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 37 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 38 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 39 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 40 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 41 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 42 * SUCH DAMAGE. 43 * 44 * @(#)vnode_pager.c 8.8 (Berkeley) 2/13/94 45 * from: Id: uvm_vnode.c,v 1.1.2.26 1998/02/02 20:38:07 chuck Exp 46 */ 47 48 #include "fs_nfs.h" 49 #include "opt_uvmhist.h" 50 #include "opt_ddb.h" 51 52 /* 53 * uvm_vnode.c: the vnode pager. 54 */ 55 56 #include <sys/param.h> 57 #include <sys/systm.h> 58 #include <sys/kernel.h> 59 #include <sys/proc.h> 60 #include <sys/malloc.h> 61 #include <sys/vnode.h> 62 #include <sys/disklabel.h> 63 #include <sys/ioctl.h> 64 #include <sys/fcntl.h> 65 #include <sys/conf.h> 66 #include <sys/pool.h> 67 #include <sys/mount.h> 68 69 #include <miscfs/specfs/specdev.h> 70 71 #include <uvm/uvm.h> 72 #include <uvm/uvm_vnode.h> 73 74 /* 75 * functions 76 */ 77 78 static void uvn_cluster __P((struct uvm_object *, voff_t, voff_t *, 79 voff_t *)); 80 static void uvn_detach __P((struct uvm_object *)); 81 static int uvn_findpage __P((struct uvm_object *, voff_t, 82 struct vm_page **, int)); 83 static boolean_t uvn_flush __P((struct uvm_object *, voff_t, voff_t, 84 int)); 85 static int uvn_get __P((struct uvm_object *, voff_t, vm_page_t *, 86 int *, int, vm_prot_t, int, int)); 87 static int uvn_put __P((struct uvm_object *, vm_page_t *, int, 88 boolean_t)); 89 static void uvn_reference __P((struct uvm_object *)); 90 static boolean_t uvn_releasepg __P((struct vm_page *, 91 struct vm_page **)); 92 93 /* 94 * master pager structure 95 */ 96 97 struct uvm_pagerops uvm_vnodeops = { 98 NULL, 99 uvn_reference, 100 uvn_detach, 101 NULL, 102 uvn_flush, 103 uvn_get, 104 uvn_put, 105 uvn_cluster, 106 uvm_mk_pcluster, 107 uvn_releasepg, 108 }; 109 110 /* 111 * the ops! 112 */ 113 114 /* 115 * uvn_attach 116 * 117 * attach a vnode structure to a VM object. if the vnode is already 118 * attached, then just bump the reference count by one and return the 119 * VM object. if not already attached, attach and return the new VM obj. 120 * the "accessprot" tells the max access the attaching thread wants to 121 * our pages. 122 * 123 * => caller must _not_ already be holding the lock on the uvm_object. 124 * => in fact, nothing should be locked so that we can sleep here. 125 * => note that uvm_object is first thing in vnode structure, so their 126 * pointers are equiv. 127 */ 128 129 struct uvm_object * 130 uvn_attach(arg, accessprot) 131 void *arg; 132 vm_prot_t accessprot; 133 { 134 struct vnode *vp = arg; 135 struct uvm_vnode *uvn = &vp->v_uvm; 136 struct vattr vattr; 137 int result; 138 struct partinfo pi; 139 voff_t used_vnode_size; 140 UVMHIST_FUNC("uvn_attach"); UVMHIST_CALLED(maphist); 141 142 UVMHIST_LOG(maphist, "(vn=0x%x)", arg,0,0,0); 143 used_vnode_size = (voff_t)0; 144 145 /* 146 * first get a lock on the uvn. 147 */ 148 simple_lock(&uvn->u_obj.vmobjlock); 149 while (uvn->u_flags & VXLOCK) { 150 uvn->u_flags |= VXWANT; 151 UVMHIST_LOG(maphist, " SLEEPING on blocked vn",0,0,0,0); 152 UVM_UNLOCK_AND_WAIT(uvn, &uvn->u_obj.vmobjlock, FALSE, 153 "uvn_attach", 0); 154 simple_lock(&uvn->u_obj.vmobjlock); 155 UVMHIST_LOG(maphist," WOKE UP",0,0,0,0); 156 } 157 158 /* 159 * if we're mapping a BLK device, make sure it is a disk. 160 */ 161 if (vp->v_type == VBLK && bdevsw[major(vp->v_rdev)].d_type != D_DISK) { 162 simple_unlock(&uvn->u_obj.vmobjlock); 163 UVMHIST_LOG(maphist,"<- done (VBLK not D_DISK!)", 0,0,0,0); 164 return(NULL); 165 } 166 167 #ifdef DIAGNOSTIC 168 if (vp->v_type != VREG) { 169 panic("uvn_attach: vp %p not VREG", vp); 170 } 171 #endif 172 173 /* 174 * set up our idea of the size 175 * if this hasn't been done already. 176 */ 177 if (uvn->u_size == VSIZENOTSET) { 178 179 uvn->u_flags |= VXLOCK; 180 simple_unlock(&uvn->u_obj.vmobjlock); /* drop lock in case we sleep */ 181 /* XXX: curproc? */ 182 if (vp->v_type == VBLK) { 183 /* 184 * We could implement this as a specfs getattr call, but: 185 * 186 * (1) VOP_GETATTR() would get the file system 187 * vnode operation, not the specfs operation. 188 * 189 * (2) All we want is the size, anyhow. 190 */ 191 result = (*bdevsw[major(vp->v_rdev)].d_ioctl)(vp->v_rdev, 192 DIOCGPART, (caddr_t)&pi, FREAD, curproc); 193 if (result == 0) { 194 /* XXX should remember blocksize */ 195 used_vnode_size = (voff_t)pi.disklab->d_secsize * 196 (voff_t)pi.part->p_size; 197 } 198 } else { 199 result = VOP_GETATTR(vp, &vattr, curproc->p_ucred, curproc); 200 if (result == 0) 201 used_vnode_size = vattr.va_size; 202 } 203 204 /* relock object */ 205 simple_lock(&uvn->u_obj.vmobjlock); 206 207 if (uvn->u_flags & VXWANT) 208 wakeup(uvn); 209 uvn->u_flags &= ~(VXLOCK|VXWANT); 210 211 if (result != 0) { 212 simple_unlock(&uvn->u_obj.vmobjlock); /* drop lock */ 213 UVMHIST_LOG(maphist,"<- done (VOP_GETATTR FAILED!)", 0,0,0,0); 214 return(NULL); 215 } 216 uvn->u_size = used_vnode_size; 217 218 } 219 220 /* unlock and return */ 221 simple_unlock(&uvn->u_obj.vmobjlock); 222 UVMHIST_LOG(maphist,"<- done, refcnt=%d", uvn->u_obj.uo_refs, 223 0, 0, 0); 224 return (&uvn->u_obj); 225 } 226 227 228 /* 229 * uvn_reference 230 * 231 * duplicate a reference to a VM object. Note that the reference 232 * count must already be at least one (the passed in reference) so 233 * there is no chance of the uvn being killed or locked out here. 234 * 235 * => caller must call with object unlocked. 236 * => caller must be using the same accessprot as was used at attach time 237 */ 238 239 240 static void 241 uvn_reference(uobj) 242 struct uvm_object *uobj; 243 { 244 VREF((struct vnode *)uobj); 245 } 246 247 /* 248 * uvn_detach 249 * 250 * remove a reference to a VM object. 251 * 252 * => caller must call with object unlocked and map locked. 253 */ 254 static void 255 uvn_detach(uobj) 256 struct uvm_object *uobj; 257 { 258 vrele((struct vnode *)uobj); 259 } 260 261 /* 262 * uvn_releasepg: handled a released page in a uvn 263 * 264 * => "pg" is a PG_BUSY [caller owns it], PG_RELEASED page that we need 265 * to dispose of. 266 * => caller must handled PG_WANTED case 267 * => called with page's object locked, pageq's unlocked 268 * => returns TRUE if page's object is still alive, FALSE if we 269 * killed the page's object. if we return TRUE, then we 270 * return with the object locked. 271 * => if (nextpgp != NULL) => we return the next page on the queue, and return 272 * with the page queues locked [for pagedaemon] 273 * => if (nextpgp == NULL) => we return with page queues unlocked [normal case] 274 * => we kill the uvn if it is not referenced and we are suppose to 275 * kill it ("relkill"). 276 */ 277 278 boolean_t 279 uvn_releasepg(pg, nextpgp) 280 struct vm_page *pg; 281 struct vm_page **nextpgp; /* OUT */ 282 { 283 KASSERT(pg->flags & PG_RELEASED); 284 285 /* 286 * dispose of the page [caller handles PG_WANTED] 287 */ 288 pmap_page_protect(pg, VM_PROT_NONE); 289 uvm_lock_pageq(); 290 if (nextpgp) 291 *nextpgp = TAILQ_NEXT(pg, pageq); 292 uvm_pagefree(pg); 293 if (!nextpgp) 294 uvm_unlock_pageq(); 295 296 return (TRUE); 297 } 298 299 /* 300 * issues to consider: 301 * there are two tailq's in the uvm. structure... one for pending async 302 * i/o and one for "done" async i/o. to do an async i/o one puts 303 * a buf on the "pending" list (protected by splbio()), starts the 304 * i/o and returns 0. when the i/o is done, we expect 305 * some sort of "i/o done" function to be called (at splbio(), interrupt 306 * time). this function should remove the buf from the pending list 307 * and place it on the "done" list and wakeup the daemon. the daemon 308 * will run at normal spl() and will remove all items from the "done" 309 * list and call the iodone hook for each done request (see uvm_pager.c). 310 * 311 * => return KERN_SUCCESS (aio finished, free it). otherwise requeue for 312 * later collection. 313 * => called with pageq's locked by the daemon. 314 * 315 * general outline: 316 * - "try" to lock object. if fail, just return (will try again later) 317 * - drop "u_nio" (this req is done!) 318 * - if (object->iosync && u_naio == 0) { wakeup &uvn->u_naio } 319 * - get "page" structures (atop?). 320 * - handle "wanted" pages 321 * - handle "released" pages [using pgo_releasepg] 322 * >>> pgo_releasepg may kill the object 323 * dont forget to look at "object" wanted flag in all cases. 324 */ 325 326 327 /* 328 * uvn_flush: flush pages out of a uvm object. 329 * 330 * => "stop == 0" means flush all pages at or after "start". 331 * => object should be locked by caller. we may _unlock_ the object 332 * if (and only if) we need to clean a page (PGO_CLEANIT), or 333 * if PGO_SYNCIO is set and there are pages busy. 334 * we return with the object locked. 335 * => if PGO_CLEANIT or PGO_SYNCIO is set, we may block (due to I/O). 336 * thus, a caller might want to unlock higher level resources 337 * (e.g. vm_map) before calling flush. 338 * => if neither PGO_CLEANIT nor PGO_SYNCIO is set, then we will neither 339 * unlock the object nor block. 340 * => if PGO_ALLPAGES is set, then all pages in the object are valid targets 341 * for flushing. 342 * => NOTE: we rely on the fact that the object's memq is a TAILQ and 343 * that new pages are inserted on the tail end of the list. thus, 344 * we can make a complete pass through the object in one go by starting 345 * at the head and working towards the tail (new pages are put in 346 * front of us). 347 * => NOTE: we are allowed to lock the page queues, so the caller 348 * must not be holding the lock on them [e.g. pagedaemon had 349 * better not call us with the queues locked] 350 * => we return TRUE unless we encountered some sort of I/O error 351 * 352 * comment on "cleaning" object and PG_BUSY pages: 353 * this routine is holding the lock on the object. the only time 354 * that it can run into a PG_BUSY page that it does not own is if 355 * some other process has started I/O on the page (e.g. either 356 * a pagein, or a pageout). if the PG_BUSY page is being paged 357 * in, then it can not be dirty (!PG_CLEAN) because no one has 358 * had a chance to modify it yet. if the PG_BUSY page is being 359 * paged out then it means that someone else has already started 360 * cleaning the page for us (how nice!). in this case, if we 361 * have syncio specified, then after we make our pass through the 362 * object we need to wait for the other PG_BUSY pages to clear 363 * off (i.e. we need to do an iosync). also note that once a 364 * page is PG_BUSY it must stay in its object until it is un-busyed. 365 * 366 * note on page traversal: 367 * we can traverse the pages in an object either by going down the 368 * linked list in "uobj->memq", or we can go over the address range 369 * by page doing hash table lookups for each address. depending 370 * on how many pages are in the object it may be cheaper to do one 371 * or the other. we set "by_list" to true if we are using memq. 372 * if the cost of a hash lookup was equal to the cost of the list 373 * traversal we could compare the number of pages in the start->stop 374 * range to the total number of pages in the object. however, it 375 * seems that a hash table lookup is more expensive than the linked 376 * list traversal, so we multiply the number of pages in the 377 * start->stop range by a penalty which we define below. 378 */ 379 380 #define UVN_HASH_PENALTY 4 /* XXX: a guess */ 381 382 static boolean_t 383 uvn_flush(uobj, start, stop, flags) 384 struct uvm_object *uobj; 385 voff_t start, stop; 386 int flags; 387 { 388 struct uvm_vnode *uvn = (struct uvm_vnode *)uobj; 389 struct vnode *vp = (struct vnode *)uobj; 390 struct vm_page *pp, *ppnext, *ptmp; 391 struct vm_page *pps[256], **ppsp; 392 int s; 393 int npages, result, lcv; 394 boolean_t retval, need_iosync, by_list, needs_clean, all, wasclean; 395 boolean_t async = (flags & PGO_SYNCIO) == 0; 396 voff_t curoff; 397 u_short pp_version; 398 UVMHIST_FUNC("uvn_flush"); UVMHIST_CALLED(maphist); 399 UVMHIST_LOG(maphist, "uobj %p start 0x%x stop 0x%x flags 0x%x", 400 uobj, start, stop, flags); 401 KASSERT(flags & (PGO_CLEANIT|PGO_FREE|PGO_DEACTIVATE)); 402 403 if (uobj->uo_npages == 0) { 404 if (LIST_FIRST(&vp->v_dirtyblkhd) == NULL && 405 (vp->v_flag & VONWORKLST)) { 406 vp->v_flag &= ~VONWORKLST; 407 LIST_REMOVE(vp, v_synclist); 408 } 409 return TRUE; 410 } 411 412 #ifdef DEBUG 413 if (uvn->u_size == VSIZENOTSET) { 414 printf("uvn_flush: size not set vp %p\n", uvn); 415 vprint("uvn_flush VSIZENOTSET", vp); 416 flags |= PGO_ALLPAGES; 417 } 418 #endif 419 420 /* 421 * get init vals and determine how we are going to traverse object 422 */ 423 424 if (stop == 0) { 425 stop = trunc_page(LLONG_MAX); 426 } 427 curoff = 0; 428 need_iosync = FALSE; 429 retval = TRUE; 430 wasclean = TRUE; 431 if (flags & PGO_ALLPAGES) { 432 all = TRUE; 433 by_list = TRUE; 434 } else { 435 start = trunc_page(start); 436 stop = round_page(stop); 437 all = FALSE; 438 by_list = (uobj->uo_npages <= 439 ((stop - start) >> PAGE_SHIFT) * UVN_HASH_PENALTY); 440 } 441 442 UVMHIST_LOG(maphist, 443 " flush start=0x%x, stop=0x%x, by_list=%d, flags=0x%x", 444 start, stop, by_list, flags); 445 446 /* 447 * PG_CLEANCHK: this bit is used by the pgo_mk_pcluster function as 448 * a _hint_ as to how up to date the PG_CLEAN bit is. if the hint 449 * is wrong it will only prevent us from clustering... it won't break 450 * anything. we clear all PG_CLEANCHK bits here, and pgo_mk_pcluster 451 * will set them as it syncs PG_CLEAN. This is only an issue if we 452 * are looking at non-inactive pages (because inactive page's PG_CLEAN 453 * bit is always up to date since there are no mappings). 454 * [borrowed PG_CLEANCHK idea from FreeBSD VM] 455 */ 456 457 if ((flags & PGO_CLEANIT) != 0 && 458 uobj->pgops->pgo_mk_pcluster != NULL) { 459 if (by_list) { 460 TAILQ_FOREACH(pp, &uobj->memq, listq) { 461 if (!all && 462 (pp->offset < start || pp->offset >= stop)) 463 continue; 464 pp->flags &= ~PG_CLEANCHK; 465 } 466 467 } else { /* by hash */ 468 for (curoff = start ; curoff < stop; 469 curoff += PAGE_SIZE) { 470 pp = uvm_pagelookup(uobj, curoff); 471 if (pp) 472 pp->flags &= ~PG_CLEANCHK; 473 } 474 } 475 } 476 477 /* 478 * now do it. note: we must update ppnext in body of loop or we 479 * will get stuck. we need to use ppnext because we may free "pp" 480 * before doing the next loop. 481 */ 482 483 if (by_list) { 484 pp = TAILQ_FIRST(&uobj->memq); 485 } else { 486 curoff = start; 487 pp = uvm_pagelookup(uobj, curoff); 488 } 489 490 ppnext = NULL; 491 ppsp = NULL; 492 uvm_lock_pageq(); 493 494 /* locked: both page queues and uobj */ 495 for ( ; (by_list && pp != NULL) || 496 (!by_list && curoff < stop) ; pp = ppnext) { 497 if (by_list) { 498 if (!all && 499 (pp->offset < start || pp->offset >= stop)) { 500 ppnext = TAILQ_NEXT(pp, listq); 501 continue; 502 } 503 } else { 504 curoff += PAGE_SIZE; 505 if (pp == NULL) { 506 if (curoff < stop) 507 ppnext = uvm_pagelookup(uobj, curoff); 508 continue; 509 } 510 } 511 512 /* 513 * handle case where we do not need to clean page (either 514 * because we are not clean or because page is not dirty or 515 * is busy): 516 * 517 * NOTE: we are allowed to deactivate a non-wired active 518 * PG_BUSY page, but once a PG_BUSY page is on the inactive 519 * queue it must stay put until it is !PG_BUSY (so as not to 520 * confuse pagedaemon). 521 */ 522 523 if ((flags & PGO_CLEANIT) == 0 || (pp->flags & PG_BUSY) != 0) { 524 needs_clean = FALSE; 525 if (!async) 526 need_iosync = TRUE; 527 } else { 528 529 /* 530 * freeing: nuke all mappings so we can sync 531 * PG_CLEAN bit with no race 532 */ 533 if ((pp->flags & PG_CLEAN) != 0 && 534 (flags & PGO_FREE) != 0 && 535 /* XXX ACTIVE|INACTIVE test unnecessary? */ 536 (pp->pqflags & (PQ_ACTIVE|PQ_INACTIVE)) != 0) 537 pmap_page_protect(pp, VM_PROT_NONE); 538 if ((pp->flags & PG_CLEAN) != 0 && 539 pmap_is_modified(pp)) 540 pp->flags &= ~(PG_CLEAN); 541 pp->flags |= PG_CLEANCHK; 542 needs_clean = ((pp->flags & PG_CLEAN) == 0); 543 } 544 545 /* 546 * if we don't need a clean... load ppnext and dispose of pp 547 */ 548 if (!needs_clean) { 549 if (by_list) 550 ppnext = TAILQ_NEXT(pp, listq); 551 else { 552 if (curoff < stop) 553 ppnext = uvm_pagelookup(uobj, curoff); 554 } 555 556 if (flags & PGO_DEACTIVATE) { 557 if ((pp->pqflags & PQ_INACTIVE) == 0 && 558 (pp->flags & PG_BUSY) == 0 && 559 pp->wire_count == 0) { 560 pmap_clear_reference(pp); 561 uvm_pagedeactivate(pp); 562 } 563 564 } else if (flags & PGO_FREE) { 565 if (pp->flags & PG_BUSY) { 566 pp->flags |= PG_RELEASED; 567 } else { 568 pmap_page_protect(pp, VM_PROT_NONE); 569 uvm_pagefree(pp); 570 } 571 } 572 /* ppnext is valid so we can continue... */ 573 continue; 574 } 575 576 /* 577 * pp points to a page in the locked object that we are 578 * working on. if it is !PG_CLEAN,!PG_BUSY and we asked 579 * for cleaning (PGO_CLEANIT). we clean it now. 580 * 581 * let uvm_pager_put attempted a clustered page out. 582 * note: locked: uobj and page queues. 583 */ 584 585 wasclean = FALSE; 586 pp->flags |= PG_BUSY; /* we 'own' page now */ 587 UVM_PAGE_OWN(pp, "uvn_flush"); 588 pmap_page_protect(pp, VM_PROT_READ); 589 pp_version = pp->version; 590 ppsp = pps; 591 npages = sizeof(pps) / sizeof(struct vm_page *); 592 593 /* locked: page queues, uobj */ 594 result = uvm_pager_put(uobj, pp, &ppsp, &npages, 595 flags | PGO_DOACTCLUST, start, stop); 596 /* unlocked: page queues, uobj */ 597 598 /* 599 * at this point nothing is locked. if we did an async I/O 600 * it is remotely possible for the async i/o to complete and 601 * the page "pp" be freed or what not before we get a chance 602 * to relock the object. in order to detect this, we have 603 * saved the version number of the page in "pp_version". 604 */ 605 606 /* relock! */ 607 simple_lock(&uobj->vmobjlock); 608 uvm_lock_pageq(); 609 610 /* 611 * the cleaning operation is now done. finish up. note that 612 * on error uvm_pager_put drops the cluster for us. 613 * on success uvm_pager_put returns the cluster to us in 614 * ppsp/npages. 615 */ 616 617 /* 618 * for pending async i/o if we are not deactivating/freeing 619 * we can move on to the next page. 620 */ 621 622 if (result == 0 && async && 623 (flags & (PGO_DEACTIVATE|PGO_FREE)) == 0) { 624 625 /* 626 * no per-page ops: refresh ppnext and continue 627 */ 628 if (by_list) { 629 if (pp->version == pp_version) 630 ppnext = TAILQ_NEXT(pp, listq); 631 else 632 ppnext = TAILQ_FIRST(&uobj->memq); 633 } else { 634 if (curoff < stop) 635 ppnext = uvm_pagelookup(uobj, curoff); 636 } 637 continue; 638 } 639 640 /* 641 * need to look at each page of the I/O operation. we defer 642 * processing "pp" until the last trip through this "for" loop 643 * so that we can load "ppnext" for the main loop after we 644 * play with the cluster pages [thus the "npages + 1" in the 645 * loop below]. 646 */ 647 648 for (lcv = 0 ; lcv < npages + 1 ; lcv++) { 649 650 /* 651 * handle ppnext for outside loop, and saving pp 652 * until the end. 653 */ 654 if (lcv < npages) { 655 if (ppsp[lcv] == pp) 656 continue; /* skip pp until the end */ 657 ptmp = ppsp[lcv]; 658 } else { 659 ptmp = pp; 660 661 /* set up next page for outer loop */ 662 if (by_list) { 663 if (pp->version == pp_version) 664 ppnext = TAILQ_NEXT(pp, listq); 665 else 666 ppnext = TAILQ_FIRST( 667 &uobj->memq); 668 } else { 669 if (curoff < stop) 670 ppnext = uvm_pagelookup(uobj, 671 curoff); 672 } 673 } 674 675 /* 676 * verify the page wasn't moved while obj was 677 * unlocked 678 */ 679 if (result == 0 && async && ptmp->uobject != uobj) 680 continue; 681 682 /* 683 * unbusy the page if I/O is done. note that for 684 * async I/O it is possible that the I/O op 685 * finished before we relocked the object (in 686 * which case the page is no longer busy). 687 */ 688 689 if (result != 0 || !async) { 690 if (ptmp->flags & PG_WANTED) { 691 /* still holding object lock */ 692 wakeup(ptmp); 693 } 694 ptmp->flags &= ~(PG_WANTED|PG_BUSY); 695 UVM_PAGE_OWN(ptmp, NULL); 696 if (ptmp->flags & PG_RELEASED) { 697 uvm_unlock_pageq(); 698 if (!uvn_releasepg(ptmp, NULL)) { 699 UVMHIST_LOG(maphist, 700 "released %p", 701 ptmp, 0,0,0); 702 return (TRUE); 703 } 704 uvm_lock_pageq(); 705 continue; 706 } else { 707 if ((flags & PGO_WEAK) == 0 && 708 !(result == EIO && 709 curproc == uvm.pagedaemon_proc)) { 710 ptmp->flags |= 711 (PG_CLEAN|PG_CLEANCHK); 712 if ((flags & PGO_FREE) == 0) { 713 pmap_clear_modify(ptmp); 714 } 715 } 716 } 717 } 718 719 /* 720 * dispose of page 721 */ 722 723 if (flags & PGO_DEACTIVATE) { 724 if ((pp->pqflags & PQ_INACTIVE) == 0 && 725 (pp->flags & PG_BUSY) == 0 && 726 pp->wire_count == 0) { 727 pmap_clear_reference(ptmp); 728 uvm_pagedeactivate(ptmp); 729 } 730 } else if (flags & PGO_FREE) { 731 if (result == 0 && async) { 732 if ((ptmp->flags & PG_BUSY) != 0) 733 /* signal for i/o done */ 734 ptmp->flags |= PG_RELEASED; 735 } else { 736 if (result != 0) { 737 printf("uvn_flush: obj=%p, " 738 "offset=0x%llx. error %d\n", 739 pp->uobject, 740 (long long)pp->offset, 741 result); 742 printf("uvn_flush: WARNING: " 743 "changes to page may be " 744 "lost!\n"); 745 retval = FALSE; 746 } 747 pmap_page_protect(ptmp, VM_PROT_NONE); 748 uvm_pagefree(ptmp); 749 } 750 } 751 } /* end of "lcv" for loop */ 752 } /* end of "pp" for loop */ 753 754 uvm_unlock_pageq(); 755 if ((flags & PGO_CLEANIT) && all && wasclean && 756 LIST_FIRST(&vp->v_dirtyblkhd) == NULL && 757 (vp->v_flag & VONWORKLST)) { 758 vp->v_flag &= ~VONWORKLST; 759 LIST_REMOVE(vp, v_synclist); 760 } 761 if (need_iosync) { 762 UVMHIST_LOG(maphist," <<DOING IOSYNC>>",0,0,0,0); 763 764 /* 765 * XXX this doesn't use the new two-flag scheme, 766 * but to use that, all i/o initiators will have to change. 767 */ 768 769 s = splbio(); 770 while (vp->v_numoutput != 0) { 771 UVMHIST_LOG(ubchist, "waiting for vp %p num %d", 772 vp, vp->v_numoutput,0,0); 773 774 vp->v_flag |= VBWAIT; 775 UVM_UNLOCK_AND_WAIT(&vp->v_numoutput, 776 &uvn->u_obj.vmobjlock, 777 FALSE, "uvn_flush",0); 778 simple_lock(&uvn->u_obj.vmobjlock); 779 } 780 splx(s); 781 } 782 783 /* return, with object locked! */ 784 UVMHIST_LOG(maphist,"<- done (retval=0x%x)",retval,0,0,0); 785 return(retval); 786 } 787 788 /* 789 * uvn_cluster 790 * 791 * we are about to do I/O in an object at offset. this function is called 792 * to establish a range of offsets around "offset" in which we can cluster 793 * I/O. 794 * 795 * - currently doesn't matter if obj locked or not. 796 */ 797 798 static void 799 uvn_cluster(uobj, offset, loffset, hoffset) 800 struct uvm_object *uobj; 801 voff_t offset; 802 voff_t *loffset, *hoffset; /* OUT */ 803 { 804 struct uvm_vnode *uvn = (struct uvm_vnode *)uobj; 805 806 *loffset = offset; 807 *hoffset = MIN(offset + MAXBSIZE, round_page(uvn->u_size)); 808 } 809 810 /* 811 * uvn_put: flush page data to backing store. 812 * 813 * => object must be locked! we will _unlock_ it before starting I/O. 814 * => flags: PGO_SYNCIO -- use sync. I/O 815 * => note: caller must set PG_CLEAN and pmap_clear_modify (if needed) 816 */ 817 818 static int 819 uvn_put(uobj, pps, npages, flags) 820 struct uvm_object *uobj; 821 struct vm_page **pps; 822 int npages, flags; 823 { 824 struct vnode *vp = (struct vnode *)uobj; 825 int error; 826 827 error = VOP_PUTPAGES(vp, pps, npages, flags, NULL); 828 return error; 829 } 830 831 832 /* 833 * uvn_get: get pages (synchronously) from backing store 834 * 835 * => prefer map unlocked (not required) 836 * => object must be locked! we will _unlock_ it before starting any I/O. 837 * => flags: PGO_ALLPAGES: get all of the pages 838 * PGO_LOCKED: fault data structures are locked 839 * => NOTE: offset is the offset of pps[0], _NOT_ pps[centeridx] 840 * => NOTE: caller must check for released pages!! 841 */ 842 843 static int 844 uvn_get(uobj, offset, pps, npagesp, centeridx, access_type, advice, flags) 845 struct uvm_object *uobj; 846 voff_t offset; 847 struct vm_page **pps; /* IN/OUT */ 848 int *npagesp; /* IN (OUT if PGO_LOCKED) */ 849 int centeridx; 850 vm_prot_t access_type; 851 int advice, flags; 852 { 853 struct vnode *vp = (struct vnode *)uobj; 854 int error; 855 UVMHIST_FUNC("uvn_get"); UVMHIST_CALLED(ubchist); 856 857 UVMHIST_LOG(ubchist, "vp %p off 0x%x", vp, (int)offset, 0,0); 858 error = VOP_GETPAGES(vp, offset, pps, npagesp, centeridx, 859 access_type, advice, flags); 860 return error; 861 } 862 863 864 /* 865 * uvn_findpages: 866 * return the page for the uobj and offset requested, allocating if needed. 867 * => uobj must be locked. 868 * => returned page will be BUSY. 869 */ 870 871 void 872 uvn_findpages(uobj, offset, npagesp, pps, flags) 873 struct uvm_object *uobj; 874 voff_t offset; 875 int *npagesp; 876 struct vm_page **pps; 877 int flags; 878 { 879 int i, rv, npages; 880 881 rv = 0; 882 npages = *npagesp; 883 for (i = 0; i < npages; i++, offset += PAGE_SIZE) { 884 rv += uvn_findpage(uobj, offset, &pps[i], flags); 885 } 886 *npagesp = rv; 887 } 888 889 static int 890 uvn_findpage(uobj, offset, pgp, flags) 891 struct uvm_object *uobj; 892 voff_t offset; 893 struct vm_page **pgp; 894 int flags; 895 { 896 struct vm_page *pg; 897 UVMHIST_FUNC("uvn_findpage"); UVMHIST_CALLED(ubchist); 898 UVMHIST_LOG(ubchist, "vp %p off 0x%lx", uobj, offset,0,0); 899 900 if (*pgp != NULL) { 901 UVMHIST_LOG(ubchist, "dontcare", 0,0,0,0); 902 return 0; 903 } 904 for (;;) { 905 /* look for an existing page */ 906 pg = uvm_pagelookup(uobj, offset); 907 908 /* nope? allocate one now */ 909 if (pg == NULL) { 910 if (flags & UFP_NOALLOC) { 911 UVMHIST_LOG(ubchist, "noalloc", 0,0,0,0); 912 return 0; 913 } 914 pg = uvm_pagealloc(uobj, offset, NULL, 0); 915 if (pg == NULL) { 916 if (flags & UFP_NOWAIT) { 917 UVMHIST_LOG(ubchist, "nowait",0,0,0,0); 918 return 0; 919 } 920 simple_unlock(&uobj->vmobjlock); 921 uvm_wait("uvn_fp1"); 922 simple_lock(&uobj->vmobjlock); 923 continue; 924 } 925 if (UVM_OBJ_IS_VTEXT(uobj)) { 926 uvmexp.vtextpages++; 927 } else { 928 uvmexp.vnodepages++; 929 } 930 UVMHIST_LOG(ubchist, "alloced",0,0,0,0); 931 break; 932 } else if (flags & UFP_NOCACHE) { 933 UVMHIST_LOG(ubchist, "nocache",0,0,0,0); 934 return 0; 935 } 936 937 /* page is there, see if we need to wait on it */ 938 if ((pg->flags & (PG_BUSY|PG_RELEASED)) != 0) { 939 if (flags & UFP_NOWAIT) { 940 UVMHIST_LOG(ubchist, "nowait",0,0,0,0); 941 return 0; 942 } 943 pg->flags |= PG_WANTED; 944 UVM_UNLOCK_AND_WAIT(pg, &uobj->vmobjlock, 0, 945 "uvn_fp2", 0); 946 simple_lock(&uobj->vmobjlock); 947 continue; 948 } 949 950 /* skip PG_RDONLY pages if requested */ 951 if ((flags & UFP_NORDONLY) && (pg->flags & PG_RDONLY)) { 952 UVMHIST_LOG(ubchist, "nordonly",0,0,0,0); 953 return 0; 954 } 955 956 /* mark the page BUSY and we're done. */ 957 pg->flags |= PG_BUSY; 958 UVM_PAGE_OWN(pg, "uvn_findpage"); 959 UVMHIST_LOG(ubchist, "found",0,0,0,0); 960 break; 961 } 962 *pgp = pg; 963 return 1; 964 } 965 966 /* 967 * uvm_vnp_setsize: grow or shrink a vnode uvn 968 * 969 * grow => just update size value 970 * shrink => toss un-needed pages 971 * 972 * => we assume that the caller has a reference of some sort to the 973 * vnode in question so that it will not be yanked out from under 974 * us. 975 * 976 * called from: 977 * => truncate fns (ext2fs_truncate, ffs_truncate, detrunc[msdos]) 978 * => "write" fns (ext2fs_write, WRITE [ufs/ufs], msdosfs_write, nfs_write) 979 * => ffs_balloc [XXX: why? doesn't WRITE handle?] 980 * => NFS: nfs_loadattrcache, nfs_getattrcache, nfs_setattr 981 * => union fs: union_newsize 982 */ 983 984 void 985 uvm_vnp_setsize(vp, newsize) 986 struct vnode *vp; 987 voff_t newsize; 988 { 989 struct uvm_vnode *uvn = &vp->v_uvm; 990 voff_t pgend = round_page(newsize); 991 UVMHIST_FUNC("uvm_vnp_setsize"); UVMHIST_CALLED(ubchist); 992 993 simple_lock(&uvn->u_obj.vmobjlock); 994 995 UVMHIST_LOG(ubchist, "old 0x%x new 0x%x", uvn->u_size, newsize, 0,0); 996 997 /* 998 * now check if the size has changed: if we shrink we had better 999 * toss some pages... 1000 */ 1001 1002 if (uvn->u_size > pgend && uvn->u_size != VSIZENOTSET) { 1003 (void) uvn_flush(&uvn->u_obj, pgend, 0, PGO_FREE); 1004 } 1005 uvn->u_size = newsize; 1006 simple_unlock(&uvn->u_obj.vmobjlock); 1007 } 1008 1009 /* 1010 * uvm_vnp_zerorange: set a range of bytes in a file to zero. 1011 */ 1012 1013 void 1014 uvm_vnp_zerorange(vp, off, len) 1015 struct vnode *vp; 1016 off_t off; 1017 size_t len; 1018 { 1019 void *win; 1020 1021 /* 1022 * XXXUBC invent kzero() and use it 1023 */ 1024 1025 while (len) { 1026 vsize_t bytelen = len; 1027 1028 win = ubc_alloc(&vp->v_uvm.u_obj, off, &bytelen, UBC_WRITE); 1029 memset(win, 0, bytelen); 1030 ubc_release(win, 0); 1031 1032 off += bytelen; 1033 len -= bytelen; 1034 } 1035 } 1036