1 /* $NetBSD: uvm_vnode.c,v 1.50 2001/05/26 21:27:21 chs Exp $ */ 2 3 /* 4 * Copyright (c) 1997 Charles D. Cranor and Washington University. 5 * Copyright (c) 1991, 1993 6 * The Regents of the University of California. 7 * Copyright (c) 1990 University of Utah. 8 * 9 * All rights reserved. 10 * 11 * This code is derived from software contributed to Berkeley by 12 * the Systems Programming Group of the University of Utah Computer 13 * Science Department. 14 * 15 * Redistribution and use in source and binary forms, with or without 16 * modification, are permitted provided that the following conditions 17 * are met: 18 * 1. Redistributions of source code must retain the above copyright 19 * notice, this list of conditions and the following disclaimer. 20 * 2. Redistributions in binary form must reproduce the above copyright 21 * notice, this list of conditions and the following disclaimer in the 22 * documentation and/or other materials provided with the distribution. 23 * 3. All advertising materials mentioning features or use of this software 24 * must display the following acknowledgement: 25 * This product includes software developed by Charles D. Cranor, 26 * Washington University, the University of California, Berkeley and 27 * its contributors. 28 * 4. Neither the name of the University nor the names of its contributors 29 * may be used to endorse or promote products derived from this software 30 * without specific prior written permission. 31 * 32 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 33 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 34 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 35 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 36 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 37 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 38 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 39 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 40 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 41 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 42 * SUCH DAMAGE. 43 * 44 * @(#)vnode_pager.c 8.8 (Berkeley) 2/13/94 45 * from: Id: uvm_vnode.c,v 1.1.2.26 1998/02/02 20:38:07 chuck Exp 46 */ 47 48 #include "fs_nfs.h" 49 #include "opt_uvmhist.h" 50 #include "opt_ddb.h" 51 52 /* 53 * uvm_vnode.c: the vnode pager. 54 */ 55 56 #include <sys/param.h> 57 #include <sys/systm.h> 58 #include <sys/kernel.h> 59 #include <sys/proc.h> 60 #include <sys/malloc.h> 61 #include <sys/vnode.h> 62 #include <sys/disklabel.h> 63 #include <sys/ioctl.h> 64 #include <sys/fcntl.h> 65 #include <sys/conf.h> 66 #include <sys/pool.h> 67 #include <sys/mount.h> 68 69 #include <miscfs/specfs/specdev.h> 70 71 #include <uvm/uvm.h> 72 #include <uvm/uvm_vnode.h> 73 74 /* 75 * functions 76 */ 77 78 static void uvn_cluster __P((struct uvm_object *, voff_t, voff_t *, 79 voff_t *)); 80 static void uvn_detach __P((struct uvm_object *)); 81 static int uvn_findpage __P((struct uvm_object *, voff_t, 82 struct vm_page **, int)); 83 static boolean_t uvn_flush __P((struct uvm_object *, voff_t, voff_t, 84 int)); 85 static int uvn_get __P((struct uvm_object *, voff_t, 86 struct vm_page **, int *, int, vm_prot_t, 87 int, int)); 88 static int uvn_put __P((struct uvm_object *, struct vm_page **, 89 int, boolean_t)); 90 static void uvn_reference __P((struct uvm_object *)); 91 static boolean_t uvn_releasepg __P((struct vm_page *, 92 struct vm_page **)); 93 94 /* 95 * master pager structure 96 */ 97 98 struct uvm_pagerops uvm_vnodeops = { 99 NULL, 100 uvn_reference, 101 uvn_detach, 102 NULL, 103 uvn_flush, 104 uvn_get, 105 uvn_put, 106 uvn_cluster, 107 uvm_mk_pcluster, 108 uvn_releasepg, 109 }; 110 111 /* 112 * the ops! 113 */ 114 115 /* 116 * uvn_attach 117 * 118 * attach a vnode structure to a VM object. if the vnode is already 119 * attached, then just bump the reference count by one and return the 120 * VM object. if not already attached, attach and return the new VM obj. 121 * the "accessprot" tells the max access the attaching thread wants to 122 * our pages. 123 * 124 * => caller must _not_ already be holding the lock on the uvm_object. 125 * => in fact, nothing should be locked so that we can sleep here. 126 * => note that uvm_object is first thing in vnode structure, so their 127 * pointers are equiv. 128 */ 129 130 struct uvm_object * 131 uvn_attach(arg, accessprot) 132 void *arg; 133 vm_prot_t accessprot; 134 { 135 struct vnode *vp = arg; 136 struct uvm_vnode *uvn = &vp->v_uvm; 137 struct vattr vattr; 138 int result; 139 struct partinfo pi; 140 voff_t used_vnode_size; 141 UVMHIST_FUNC("uvn_attach"); UVMHIST_CALLED(maphist); 142 143 UVMHIST_LOG(maphist, "(vn=0x%x)", arg,0,0,0); 144 used_vnode_size = (voff_t)0; 145 146 /* 147 * first get a lock on the uvn. 148 */ 149 simple_lock(&uvn->u_obj.vmobjlock); 150 while (uvn->u_flags & VXLOCK) { 151 uvn->u_flags |= VXWANT; 152 UVMHIST_LOG(maphist, " SLEEPING on blocked vn",0,0,0,0); 153 UVM_UNLOCK_AND_WAIT(uvn, &uvn->u_obj.vmobjlock, FALSE, 154 "uvn_attach", 0); 155 simple_lock(&uvn->u_obj.vmobjlock); 156 UVMHIST_LOG(maphist," WOKE UP",0,0,0,0); 157 } 158 159 /* 160 * if we're mapping a BLK device, make sure it is a disk. 161 */ 162 if (vp->v_type == VBLK && bdevsw[major(vp->v_rdev)].d_type != D_DISK) { 163 simple_unlock(&uvn->u_obj.vmobjlock); 164 UVMHIST_LOG(maphist,"<- done (VBLK not D_DISK!)", 0,0,0,0); 165 return(NULL); 166 } 167 168 #ifdef DIAGNOSTIC 169 if (vp->v_type != VREG) { 170 panic("uvn_attach: vp %p not VREG", vp); 171 } 172 #endif 173 174 /* 175 * set up our idea of the size 176 * if this hasn't been done already. 177 */ 178 if (uvn->u_size == VSIZENOTSET) { 179 180 uvn->u_flags |= VXLOCK; 181 simple_unlock(&uvn->u_obj.vmobjlock); /* drop lock in case we sleep */ 182 /* XXX: curproc? */ 183 if (vp->v_type == VBLK) { 184 /* 185 * We could implement this as a specfs getattr call, but: 186 * 187 * (1) VOP_GETATTR() would get the file system 188 * vnode operation, not the specfs operation. 189 * 190 * (2) All we want is the size, anyhow. 191 */ 192 result = (*bdevsw[major(vp->v_rdev)].d_ioctl)(vp->v_rdev, 193 DIOCGPART, (caddr_t)&pi, FREAD, curproc); 194 if (result == 0) { 195 /* XXX should remember blocksize */ 196 used_vnode_size = (voff_t)pi.disklab->d_secsize * 197 (voff_t)pi.part->p_size; 198 } 199 } else { 200 result = VOP_GETATTR(vp, &vattr, curproc->p_ucred, curproc); 201 if (result == 0) 202 used_vnode_size = vattr.va_size; 203 } 204 205 /* relock object */ 206 simple_lock(&uvn->u_obj.vmobjlock); 207 208 if (uvn->u_flags & VXWANT) 209 wakeup(uvn); 210 uvn->u_flags &= ~(VXLOCK|VXWANT); 211 212 if (result != 0) { 213 simple_unlock(&uvn->u_obj.vmobjlock); /* drop lock */ 214 UVMHIST_LOG(maphist,"<- done (VOP_GETATTR FAILED!)", 0,0,0,0); 215 return(NULL); 216 } 217 uvn->u_size = used_vnode_size; 218 219 } 220 221 /* unlock and return */ 222 simple_unlock(&uvn->u_obj.vmobjlock); 223 UVMHIST_LOG(maphist,"<- done, refcnt=%d", uvn->u_obj.uo_refs, 224 0, 0, 0); 225 return (&uvn->u_obj); 226 } 227 228 229 /* 230 * uvn_reference 231 * 232 * duplicate a reference to a VM object. Note that the reference 233 * count must already be at least one (the passed in reference) so 234 * there is no chance of the uvn being killed or locked out here. 235 * 236 * => caller must call with object unlocked. 237 * => caller must be using the same accessprot as was used at attach time 238 */ 239 240 241 static void 242 uvn_reference(uobj) 243 struct uvm_object *uobj; 244 { 245 VREF((struct vnode *)uobj); 246 } 247 248 /* 249 * uvn_detach 250 * 251 * remove a reference to a VM object. 252 * 253 * => caller must call with object unlocked and map locked. 254 */ 255 static void 256 uvn_detach(uobj) 257 struct uvm_object *uobj; 258 { 259 vrele((struct vnode *)uobj); 260 } 261 262 /* 263 * uvn_releasepg: handled a released page in a uvn 264 * 265 * => "pg" is a PG_BUSY [caller owns it], PG_RELEASED page that we need 266 * to dispose of. 267 * => caller must handled PG_WANTED case 268 * => called with page's object locked, pageq's unlocked 269 * => returns TRUE if page's object is still alive, FALSE if we 270 * killed the page's object. if we return TRUE, then we 271 * return with the object locked. 272 * => if (nextpgp != NULL) => we return the next page on the queue, and return 273 * with the page queues locked [for pagedaemon] 274 * => if (nextpgp == NULL) => we return with page queues unlocked [normal case] 275 * => we kill the uvn if it is not referenced and we are suppose to 276 * kill it ("relkill"). 277 */ 278 279 boolean_t 280 uvn_releasepg(pg, nextpgp) 281 struct vm_page *pg; 282 struct vm_page **nextpgp; /* OUT */ 283 { 284 KASSERT(pg->flags & PG_RELEASED); 285 286 /* 287 * dispose of the page [caller handles PG_WANTED] 288 */ 289 pmap_page_protect(pg, VM_PROT_NONE); 290 uvm_lock_pageq(); 291 if (nextpgp) 292 *nextpgp = TAILQ_NEXT(pg, pageq); 293 uvm_pagefree(pg); 294 if (!nextpgp) 295 uvm_unlock_pageq(); 296 297 return (TRUE); 298 } 299 300 /* 301 * issues to consider: 302 * there are two tailq's in the uvm. structure... one for pending async 303 * i/o and one for "done" async i/o. to do an async i/o one puts 304 * a buf on the "pending" list (protected by splbio()), starts the 305 * i/o and returns 0. when the i/o is done, we expect 306 * some sort of "i/o done" function to be called (at splbio(), interrupt 307 * time). this function should remove the buf from the pending list 308 * and place it on the "done" list and wakeup the daemon. the daemon 309 * will run at normal spl() and will remove all items from the "done" 310 * list and call the iodone hook for each done request (see uvm_pager.c). 311 * 312 * => return KERN_SUCCESS (aio finished, free it). otherwise requeue for 313 * later collection. 314 * => called with pageq's locked by the daemon. 315 * 316 * general outline: 317 * - "try" to lock object. if fail, just return (will try again later) 318 * - drop "u_nio" (this req is done!) 319 * - if (object->iosync && u_naio == 0) { wakeup &uvn->u_naio } 320 * - get "page" structures (atop?). 321 * - handle "wanted" pages 322 * - handle "released" pages [using pgo_releasepg] 323 * >>> pgo_releasepg may kill the object 324 * dont forget to look at "object" wanted flag in all cases. 325 */ 326 327 328 /* 329 * uvn_flush: flush pages out of a uvm object. 330 * 331 * => "stop == 0" means flush all pages at or after "start". 332 * => object should be locked by caller. we may _unlock_ the object 333 * if (and only if) we need to clean a page (PGO_CLEANIT), or 334 * if PGO_SYNCIO is set and there are pages busy. 335 * we return with the object locked. 336 * => if PGO_CLEANIT or PGO_SYNCIO is set, we may block (due to I/O). 337 * thus, a caller might want to unlock higher level resources 338 * (e.g. vm_map) before calling flush. 339 * => if neither PGO_CLEANIT nor PGO_SYNCIO is set, then we will neither 340 * unlock the object nor block. 341 * => if PGO_ALLPAGES is set, then all pages in the object are valid targets 342 * for flushing. 343 * => NOTE: we rely on the fact that the object's memq is a TAILQ and 344 * that new pages are inserted on the tail end of the list. thus, 345 * we can make a complete pass through the object in one go by starting 346 * at the head and working towards the tail (new pages are put in 347 * front of us). 348 * => NOTE: we are allowed to lock the page queues, so the caller 349 * must not be holding the lock on them [e.g. pagedaemon had 350 * better not call us with the queues locked] 351 * => we return TRUE unless we encountered some sort of I/O error 352 * 353 * comment on "cleaning" object and PG_BUSY pages: 354 * this routine is holding the lock on the object. the only time 355 * that it can run into a PG_BUSY page that it does not own is if 356 * some other process has started I/O on the page (e.g. either 357 * a pagein, or a pageout). if the PG_BUSY page is being paged 358 * in, then it can not be dirty (!PG_CLEAN) because no one has 359 * had a chance to modify it yet. if the PG_BUSY page is being 360 * paged out then it means that someone else has already started 361 * cleaning the page for us (how nice!). in this case, if we 362 * have syncio specified, then after we make our pass through the 363 * object we need to wait for the other PG_BUSY pages to clear 364 * off (i.e. we need to do an iosync). also note that once a 365 * page is PG_BUSY it must stay in its object until it is un-busyed. 366 * 367 * note on page traversal: 368 * we can traverse the pages in an object either by going down the 369 * linked list in "uobj->memq", or we can go over the address range 370 * by page doing hash table lookups for each address. depending 371 * on how many pages are in the object it may be cheaper to do one 372 * or the other. we set "by_list" to true if we are using memq. 373 * if the cost of a hash lookup was equal to the cost of the list 374 * traversal we could compare the number of pages in the start->stop 375 * range to the total number of pages in the object. however, it 376 * seems that a hash table lookup is more expensive than the linked 377 * list traversal, so we multiply the number of pages in the 378 * start->stop range by a penalty which we define below. 379 */ 380 381 #define UVN_HASH_PENALTY 4 /* XXX: a guess */ 382 383 static boolean_t 384 uvn_flush(uobj, start, stop, flags) 385 struct uvm_object *uobj; 386 voff_t start, stop; 387 int flags; 388 { 389 struct uvm_vnode *uvn = (struct uvm_vnode *)uobj; 390 struct vnode *vp = (struct vnode *)uobj; 391 struct vm_page *pp, *ppnext, *ptmp; 392 struct vm_page *pps[256], **ppsp; 393 int s; 394 int npages, result, lcv; 395 boolean_t retval, need_iosync, by_list, needs_clean, all, wasclean; 396 boolean_t async = (flags & PGO_SYNCIO) == 0; 397 voff_t curoff; 398 u_short pp_version; 399 UVMHIST_FUNC("uvn_flush"); UVMHIST_CALLED(maphist); 400 UVMHIST_LOG(maphist, "uobj %p start 0x%x stop 0x%x flags 0x%x", 401 uobj, start, stop, flags); 402 KASSERT(flags & (PGO_CLEANIT|PGO_FREE|PGO_DEACTIVATE)); 403 404 if (uobj->uo_npages == 0) { 405 if (LIST_FIRST(&vp->v_dirtyblkhd) == NULL && 406 (vp->v_flag & VONWORKLST)) { 407 vp->v_flag &= ~VONWORKLST; 408 LIST_REMOVE(vp, v_synclist); 409 } 410 return TRUE; 411 } 412 413 #ifdef DEBUG 414 if (uvn->u_size == VSIZENOTSET) { 415 printf("uvn_flush: size not set vp %p\n", uvn); 416 vprint("uvn_flush VSIZENOTSET", vp); 417 flags |= PGO_ALLPAGES; 418 } 419 #endif 420 421 /* 422 * get init vals and determine how we are going to traverse object 423 */ 424 425 if (stop == 0) { 426 stop = trunc_page(LLONG_MAX); 427 } 428 curoff = 0; 429 need_iosync = FALSE; 430 retval = TRUE; 431 wasclean = TRUE; 432 if (flags & PGO_ALLPAGES) { 433 all = TRUE; 434 by_list = TRUE; 435 } else { 436 start = trunc_page(start); 437 stop = round_page(stop); 438 all = FALSE; 439 by_list = (uobj->uo_npages <= 440 ((stop - start) >> PAGE_SHIFT) * UVN_HASH_PENALTY); 441 } 442 443 UVMHIST_LOG(maphist, 444 " flush start=0x%x, stop=0x%x, by_list=%d, flags=0x%x", 445 start, stop, by_list, flags); 446 447 /* 448 * PG_CLEANCHK: this bit is used by the pgo_mk_pcluster function as 449 * a _hint_ as to how up to date the PG_CLEAN bit is. if the hint 450 * is wrong it will only prevent us from clustering... it won't break 451 * anything. we clear all PG_CLEANCHK bits here, and pgo_mk_pcluster 452 * will set them as it syncs PG_CLEAN. This is only an issue if we 453 * are looking at non-inactive pages (because inactive page's PG_CLEAN 454 * bit is always up to date since there are no mappings). 455 * [borrowed PG_CLEANCHK idea from FreeBSD VM] 456 */ 457 458 if ((flags & PGO_CLEANIT) != 0 && 459 uobj->pgops->pgo_mk_pcluster != NULL) { 460 if (by_list) { 461 TAILQ_FOREACH(pp, &uobj->memq, listq) { 462 if (!all && 463 (pp->offset < start || pp->offset >= stop)) 464 continue; 465 pp->flags &= ~PG_CLEANCHK; 466 } 467 468 } else { /* by hash */ 469 for (curoff = start ; curoff < stop; 470 curoff += PAGE_SIZE) { 471 pp = uvm_pagelookup(uobj, curoff); 472 if (pp) 473 pp->flags &= ~PG_CLEANCHK; 474 } 475 } 476 } 477 478 /* 479 * now do it. note: we must update ppnext in body of loop or we 480 * will get stuck. we need to use ppnext because we may free "pp" 481 * before doing the next loop. 482 */ 483 484 if (by_list) { 485 pp = TAILQ_FIRST(&uobj->memq); 486 } else { 487 curoff = start; 488 pp = uvm_pagelookup(uobj, curoff); 489 } 490 491 ppnext = NULL; 492 ppsp = NULL; 493 uvm_lock_pageq(); 494 495 /* locked: both page queues and uobj */ 496 for ( ; (by_list && pp != NULL) || 497 (!by_list && curoff < stop) ; pp = ppnext) { 498 if (by_list) { 499 if (!all && 500 (pp->offset < start || pp->offset >= stop)) { 501 ppnext = TAILQ_NEXT(pp, listq); 502 continue; 503 } 504 } else { 505 curoff += PAGE_SIZE; 506 if (pp == NULL) { 507 if (curoff < stop) 508 ppnext = uvm_pagelookup(uobj, curoff); 509 continue; 510 } 511 } 512 513 /* 514 * handle case where we do not need to clean page (either 515 * because we are not clean or because page is not dirty or 516 * is busy): 517 * 518 * NOTE: we are allowed to deactivate a non-wired active 519 * PG_BUSY page, but once a PG_BUSY page is on the inactive 520 * queue it must stay put until it is !PG_BUSY (so as not to 521 * confuse pagedaemon). 522 */ 523 524 if ((flags & PGO_CLEANIT) == 0 || (pp->flags & PG_BUSY) != 0) { 525 needs_clean = FALSE; 526 if (!async) 527 need_iosync = TRUE; 528 } else { 529 530 /* 531 * freeing: nuke all mappings so we can sync 532 * PG_CLEAN bit with no race 533 */ 534 if ((pp->flags & PG_CLEAN) != 0 && 535 (flags & PGO_FREE) != 0 && 536 /* XXX ACTIVE|INACTIVE test unnecessary? */ 537 (pp->pqflags & (PQ_ACTIVE|PQ_INACTIVE)) != 0) 538 pmap_page_protect(pp, VM_PROT_NONE); 539 if ((pp->flags & PG_CLEAN) != 0 && 540 pmap_is_modified(pp)) 541 pp->flags &= ~(PG_CLEAN); 542 pp->flags |= PG_CLEANCHK; 543 needs_clean = ((pp->flags & PG_CLEAN) == 0); 544 } 545 546 /* 547 * if we don't need a clean... load ppnext and dispose of pp 548 */ 549 if (!needs_clean) { 550 if (by_list) 551 ppnext = TAILQ_NEXT(pp, listq); 552 else { 553 if (curoff < stop) 554 ppnext = uvm_pagelookup(uobj, curoff); 555 } 556 557 if (flags & PGO_DEACTIVATE) { 558 if ((pp->pqflags & PQ_INACTIVE) == 0 && 559 (pp->flags & PG_BUSY) == 0 && 560 pp->wire_count == 0) { 561 pmap_clear_reference(pp); 562 uvm_pagedeactivate(pp); 563 } 564 565 } else if (flags & PGO_FREE) { 566 if (pp->flags & PG_BUSY) { 567 pp->flags |= PG_RELEASED; 568 } else { 569 pmap_page_protect(pp, VM_PROT_NONE); 570 uvm_pagefree(pp); 571 } 572 } 573 /* ppnext is valid so we can continue... */ 574 continue; 575 } 576 577 /* 578 * pp points to a page in the locked object that we are 579 * working on. if it is !PG_CLEAN,!PG_BUSY and we asked 580 * for cleaning (PGO_CLEANIT). we clean it now. 581 * 582 * let uvm_pager_put attempted a clustered page out. 583 * note: locked: uobj and page queues. 584 */ 585 586 wasclean = FALSE; 587 pp->flags |= PG_BUSY; /* we 'own' page now */ 588 UVM_PAGE_OWN(pp, "uvn_flush"); 589 pmap_page_protect(pp, VM_PROT_READ); 590 pp_version = pp->version; 591 ppsp = pps; 592 npages = sizeof(pps) / sizeof(struct vm_page *); 593 594 /* locked: page queues, uobj */ 595 result = uvm_pager_put(uobj, pp, &ppsp, &npages, 596 flags | PGO_DOACTCLUST, start, stop); 597 /* unlocked: page queues, uobj */ 598 599 /* 600 * at this point nothing is locked. if we did an async I/O 601 * it is remotely possible for the async i/o to complete and 602 * the page "pp" be freed or what not before we get a chance 603 * to relock the object. in order to detect this, we have 604 * saved the version number of the page in "pp_version". 605 */ 606 607 /* relock! */ 608 simple_lock(&uobj->vmobjlock); 609 uvm_lock_pageq(); 610 611 /* 612 * the cleaning operation is now done. finish up. note that 613 * on error uvm_pager_put drops the cluster for us. 614 * on success uvm_pager_put returns the cluster to us in 615 * ppsp/npages. 616 */ 617 618 /* 619 * for pending async i/o if we are not deactivating/freeing 620 * we can move on to the next page. 621 */ 622 623 if (result == 0 && async && 624 (flags & (PGO_DEACTIVATE|PGO_FREE)) == 0) { 625 626 /* 627 * no per-page ops: refresh ppnext and continue 628 */ 629 if (by_list) { 630 if (pp->version == pp_version) 631 ppnext = TAILQ_NEXT(pp, listq); 632 else 633 ppnext = TAILQ_FIRST(&uobj->memq); 634 } else { 635 if (curoff < stop) 636 ppnext = uvm_pagelookup(uobj, curoff); 637 } 638 continue; 639 } 640 641 /* 642 * need to look at each page of the I/O operation. we defer 643 * processing "pp" until the last trip through this "for" loop 644 * so that we can load "ppnext" for the main loop after we 645 * play with the cluster pages [thus the "npages + 1" in the 646 * loop below]. 647 */ 648 649 for (lcv = 0 ; lcv < npages + 1 ; lcv++) { 650 651 /* 652 * handle ppnext for outside loop, and saving pp 653 * until the end. 654 */ 655 if (lcv < npages) { 656 if (ppsp[lcv] == pp) 657 continue; /* skip pp until the end */ 658 ptmp = ppsp[lcv]; 659 } else { 660 ptmp = pp; 661 662 /* set up next page for outer loop */ 663 if (by_list) { 664 if (pp->version == pp_version) 665 ppnext = TAILQ_NEXT(pp, listq); 666 else 667 ppnext = TAILQ_FIRST( 668 &uobj->memq); 669 } else { 670 if (curoff < stop) 671 ppnext = uvm_pagelookup(uobj, 672 curoff); 673 } 674 } 675 676 /* 677 * verify the page wasn't moved while obj was 678 * unlocked 679 */ 680 if (result == 0 && async && ptmp->uobject != uobj) 681 continue; 682 683 /* 684 * unbusy the page if I/O is done. note that for 685 * async I/O it is possible that the I/O op 686 * finished before we relocked the object (in 687 * which case the page is no longer busy). 688 */ 689 690 if (result != 0 || !async) { 691 if (ptmp->flags & PG_WANTED) { 692 /* still holding object lock */ 693 wakeup(ptmp); 694 } 695 ptmp->flags &= ~(PG_WANTED|PG_BUSY); 696 UVM_PAGE_OWN(ptmp, NULL); 697 if (ptmp->flags & PG_RELEASED) { 698 uvm_unlock_pageq(); 699 if (!uvn_releasepg(ptmp, NULL)) { 700 UVMHIST_LOG(maphist, 701 "released %p", 702 ptmp, 0,0,0); 703 return (TRUE); 704 } 705 uvm_lock_pageq(); 706 continue; 707 } else { 708 if ((flags & PGO_WEAK) == 0 && 709 !(result == EIO && 710 curproc == uvm.pagedaemon_proc)) { 711 ptmp->flags |= 712 (PG_CLEAN|PG_CLEANCHK); 713 if ((flags & PGO_FREE) == 0) { 714 pmap_clear_modify(ptmp); 715 } 716 } 717 } 718 } 719 720 /* 721 * dispose of page 722 */ 723 724 if (flags & PGO_DEACTIVATE) { 725 if ((pp->pqflags & PQ_INACTIVE) == 0 && 726 (pp->flags & PG_BUSY) == 0 && 727 pp->wire_count == 0) { 728 pmap_clear_reference(ptmp); 729 uvm_pagedeactivate(ptmp); 730 } 731 } else if (flags & PGO_FREE) { 732 if (result == 0 && async) { 733 if ((ptmp->flags & PG_BUSY) != 0) 734 /* signal for i/o done */ 735 ptmp->flags |= PG_RELEASED; 736 } else { 737 if (result != 0) { 738 printf("uvn_flush: obj=%p, " 739 "offset=0x%llx. error %d\n", 740 pp->uobject, 741 (long long)pp->offset, 742 result); 743 printf("uvn_flush: WARNING: " 744 "changes to page may be " 745 "lost!\n"); 746 retval = FALSE; 747 } 748 pmap_page_protect(ptmp, VM_PROT_NONE); 749 uvm_pagefree(ptmp); 750 } 751 } 752 } /* end of "lcv" for loop */ 753 } /* end of "pp" for loop */ 754 755 uvm_unlock_pageq(); 756 if ((flags & PGO_CLEANIT) && all && wasclean && 757 LIST_FIRST(&vp->v_dirtyblkhd) == NULL && 758 (vp->v_flag & VONWORKLST)) { 759 vp->v_flag &= ~VONWORKLST; 760 LIST_REMOVE(vp, v_synclist); 761 } 762 if (need_iosync) { 763 UVMHIST_LOG(maphist," <<DOING IOSYNC>>",0,0,0,0); 764 765 /* 766 * XXX this doesn't use the new two-flag scheme, 767 * but to use that, all i/o initiators will have to change. 768 */ 769 770 s = splbio(); 771 while (vp->v_numoutput != 0) { 772 UVMHIST_LOG(ubchist, "waiting for vp %p num %d", 773 vp, vp->v_numoutput,0,0); 774 775 vp->v_flag |= VBWAIT; 776 UVM_UNLOCK_AND_WAIT(&vp->v_numoutput, 777 &uvn->u_obj.vmobjlock, 778 FALSE, "uvn_flush",0); 779 simple_lock(&uvn->u_obj.vmobjlock); 780 } 781 splx(s); 782 } 783 784 /* return, with object locked! */ 785 UVMHIST_LOG(maphist,"<- done (retval=0x%x)",retval,0,0,0); 786 return(retval); 787 } 788 789 /* 790 * uvn_cluster 791 * 792 * we are about to do I/O in an object at offset. this function is called 793 * to establish a range of offsets around "offset" in which we can cluster 794 * I/O. 795 * 796 * - currently doesn't matter if obj locked or not. 797 */ 798 799 static void 800 uvn_cluster(uobj, offset, loffset, hoffset) 801 struct uvm_object *uobj; 802 voff_t offset; 803 voff_t *loffset, *hoffset; /* OUT */ 804 { 805 struct uvm_vnode *uvn = (struct uvm_vnode *)uobj; 806 807 *loffset = offset; 808 *hoffset = MIN(offset + MAXBSIZE, round_page(uvn->u_size)); 809 } 810 811 /* 812 * uvn_put: flush page data to backing store. 813 * 814 * => object must be locked! we will _unlock_ it before starting I/O. 815 * => flags: PGO_SYNCIO -- use sync. I/O 816 * => note: caller must set PG_CLEAN and pmap_clear_modify (if needed) 817 */ 818 819 static int 820 uvn_put(uobj, pps, npages, flags) 821 struct uvm_object *uobj; 822 struct vm_page **pps; 823 int npages, flags; 824 { 825 struct vnode *vp = (struct vnode *)uobj; 826 int error; 827 828 error = VOP_PUTPAGES(vp, pps, npages, flags, NULL); 829 return error; 830 } 831 832 833 /* 834 * uvn_get: get pages (synchronously) from backing store 835 * 836 * => prefer map unlocked (not required) 837 * => object must be locked! we will _unlock_ it before starting any I/O. 838 * => flags: PGO_ALLPAGES: get all of the pages 839 * PGO_LOCKED: fault data structures are locked 840 * => NOTE: offset is the offset of pps[0], _NOT_ pps[centeridx] 841 * => NOTE: caller must check for released pages!! 842 */ 843 844 static int 845 uvn_get(uobj, offset, pps, npagesp, centeridx, access_type, advice, flags) 846 struct uvm_object *uobj; 847 voff_t offset; 848 struct vm_page **pps; /* IN/OUT */ 849 int *npagesp; /* IN (OUT if PGO_LOCKED) */ 850 int centeridx; 851 vm_prot_t access_type; 852 int advice, flags; 853 { 854 struct vnode *vp = (struct vnode *)uobj; 855 int error; 856 UVMHIST_FUNC("uvn_get"); UVMHIST_CALLED(ubchist); 857 858 UVMHIST_LOG(ubchist, "vp %p off 0x%x", vp, (int)offset, 0,0); 859 error = VOP_GETPAGES(vp, offset, pps, npagesp, centeridx, 860 access_type, advice, flags); 861 return error; 862 } 863 864 865 /* 866 * uvn_findpages: 867 * return the page for the uobj and offset requested, allocating if needed. 868 * => uobj must be locked. 869 * => returned page will be BUSY. 870 */ 871 872 void 873 uvn_findpages(uobj, offset, npagesp, pps, flags) 874 struct uvm_object *uobj; 875 voff_t offset; 876 int *npagesp; 877 struct vm_page **pps; 878 int flags; 879 { 880 int i, rv, npages; 881 882 rv = 0; 883 npages = *npagesp; 884 for (i = 0; i < npages; i++, offset += PAGE_SIZE) { 885 rv += uvn_findpage(uobj, offset, &pps[i], flags); 886 } 887 *npagesp = rv; 888 } 889 890 static int 891 uvn_findpage(uobj, offset, pgp, flags) 892 struct uvm_object *uobj; 893 voff_t offset; 894 struct vm_page **pgp; 895 int flags; 896 { 897 struct vm_page *pg; 898 UVMHIST_FUNC("uvn_findpage"); UVMHIST_CALLED(ubchist); 899 UVMHIST_LOG(ubchist, "vp %p off 0x%lx", uobj, offset,0,0); 900 901 if (*pgp != NULL) { 902 UVMHIST_LOG(ubchist, "dontcare", 0,0,0,0); 903 return 0; 904 } 905 for (;;) { 906 /* look for an existing page */ 907 pg = uvm_pagelookup(uobj, offset); 908 909 /* nope? allocate one now */ 910 if (pg == NULL) { 911 if (flags & UFP_NOALLOC) { 912 UVMHIST_LOG(ubchist, "noalloc", 0,0,0,0); 913 return 0; 914 } 915 pg = uvm_pagealloc(uobj, offset, NULL, 0); 916 if (pg == NULL) { 917 if (flags & UFP_NOWAIT) { 918 UVMHIST_LOG(ubchist, "nowait",0,0,0,0); 919 return 0; 920 } 921 simple_unlock(&uobj->vmobjlock); 922 uvm_wait("uvn_fp1"); 923 simple_lock(&uobj->vmobjlock); 924 continue; 925 } 926 if (UVM_OBJ_IS_VTEXT(uobj)) { 927 uvmexp.vtextpages++; 928 } else { 929 uvmexp.vnodepages++; 930 } 931 UVMHIST_LOG(ubchist, "alloced",0,0,0,0); 932 break; 933 } else if (flags & UFP_NOCACHE) { 934 UVMHIST_LOG(ubchist, "nocache",0,0,0,0); 935 return 0; 936 } 937 938 /* page is there, see if we need to wait on it */ 939 if ((pg->flags & (PG_BUSY|PG_RELEASED)) != 0) { 940 if (flags & UFP_NOWAIT) { 941 UVMHIST_LOG(ubchist, "nowait",0,0,0,0); 942 return 0; 943 } 944 pg->flags |= PG_WANTED; 945 UVM_UNLOCK_AND_WAIT(pg, &uobj->vmobjlock, 0, 946 "uvn_fp2", 0); 947 simple_lock(&uobj->vmobjlock); 948 continue; 949 } 950 951 /* skip PG_RDONLY pages if requested */ 952 if ((flags & UFP_NORDONLY) && (pg->flags & PG_RDONLY)) { 953 UVMHIST_LOG(ubchist, "nordonly",0,0,0,0); 954 return 0; 955 } 956 957 /* mark the page BUSY and we're done. */ 958 pg->flags |= PG_BUSY; 959 UVM_PAGE_OWN(pg, "uvn_findpage"); 960 UVMHIST_LOG(ubchist, "found",0,0,0,0); 961 break; 962 } 963 *pgp = pg; 964 return 1; 965 } 966 967 /* 968 * uvm_vnp_setsize: grow or shrink a vnode uvn 969 * 970 * grow => just update size value 971 * shrink => toss un-needed pages 972 * 973 * => we assume that the caller has a reference of some sort to the 974 * vnode in question so that it will not be yanked out from under 975 * us. 976 * 977 * called from: 978 * => truncate fns (ext2fs_truncate, ffs_truncate, detrunc[msdos]) 979 * => "write" fns (ext2fs_write, WRITE [ufs/ufs], msdosfs_write, nfs_write) 980 * => ffs_balloc [XXX: why? doesn't WRITE handle?] 981 * => NFS: nfs_loadattrcache, nfs_getattrcache, nfs_setattr 982 * => union fs: union_newsize 983 */ 984 985 void 986 uvm_vnp_setsize(vp, newsize) 987 struct vnode *vp; 988 voff_t newsize; 989 { 990 struct uvm_vnode *uvn = &vp->v_uvm; 991 voff_t pgend = round_page(newsize); 992 UVMHIST_FUNC("uvm_vnp_setsize"); UVMHIST_CALLED(ubchist); 993 994 simple_lock(&uvn->u_obj.vmobjlock); 995 996 UVMHIST_LOG(ubchist, "old 0x%x new 0x%x", uvn->u_size, newsize, 0,0); 997 998 /* 999 * now check if the size has changed: if we shrink we had better 1000 * toss some pages... 1001 */ 1002 1003 if (uvn->u_size > pgend && uvn->u_size != VSIZENOTSET) { 1004 (void) uvn_flush(&uvn->u_obj, pgend, 0, PGO_FREE); 1005 } 1006 uvn->u_size = newsize; 1007 simple_unlock(&uvn->u_obj.vmobjlock); 1008 } 1009 1010 /* 1011 * uvm_vnp_zerorange: set a range of bytes in a file to zero. 1012 */ 1013 1014 void 1015 uvm_vnp_zerorange(vp, off, len) 1016 struct vnode *vp; 1017 off_t off; 1018 size_t len; 1019 { 1020 void *win; 1021 1022 /* 1023 * XXXUBC invent kzero() and use it 1024 */ 1025 1026 while (len) { 1027 vsize_t bytelen = len; 1028 1029 win = ubc_alloc(&vp->v_uvm.u_obj, off, &bytelen, UBC_WRITE); 1030 memset(win, 0, bytelen); 1031 ubc_release(win, 0); 1032 1033 off += bytelen; 1034 len -= bytelen; 1035 } 1036 } 1037