1 /* $NetBSD: uvm_vnode.c,v 1.40 2000/12/16 06:17:09 chs Exp $ */ 2 3 /* 4 * Copyright (c) 1997 Charles D. Cranor and Washington University. 5 * Copyright (c) 1991, 1993 6 * The Regents of the University of California. 7 * Copyright (c) 1990 University of Utah. 8 * 9 * All rights reserved. 10 * 11 * This code is derived from software contributed to Berkeley by 12 * the Systems Programming Group of the University of Utah Computer 13 * Science Department. 14 * 15 * Redistribution and use in source and binary forms, with or without 16 * modification, are permitted provided that the following conditions 17 * are met: 18 * 1. Redistributions of source code must retain the above copyright 19 * notice, this list of conditions and the following disclaimer. 20 * 2. Redistributions in binary form must reproduce the above copyright 21 * notice, this list of conditions and the following disclaimer in the 22 * documentation and/or other materials provided with the distribution. 23 * 3. All advertising materials mentioning features or use of this software 24 * must display the following acknowledgement: 25 * This product includes software developed by Charles D. Cranor, 26 * Washington University, the University of California, Berkeley and 27 * its contributors. 28 * 4. Neither the name of the University nor the names of its contributors 29 * may be used to endorse or promote products derived from this software 30 * without specific prior written permission. 31 * 32 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 33 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 34 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 35 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 36 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 37 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 38 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 39 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 40 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 41 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 42 * SUCH DAMAGE. 43 * 44 * @(#)vnode_pager.c 8.8 (Berkeley) 2/13/94 45 * from: Id: uvm_vnode.c,v 1.1.2.26 1998/02/02 20:38:07 chuck Exp 46 */ 47 48 #include "fs_nfs.h" 49 #include "opt_uvmhist.h" 50 #include "opt_ddb.h" 51 52 /* 53 * uvm_vnode.c: the vnode pager. 54 */ 55 56 #include <sys/param.h> 57 #include <sys/systm.h> 58 #include <sys/kernel.h> 59 #include <sys/proc.h> 60 #include <sys/malloc.h> 61 #include <sys/vnode.h> 62 #include <sys/disklabel.h> 63 #include <sys/ioctl.h> 64 #include <sys/fcntl.h> 65 #include <sys/conf.h> 66 #include <sys/pool.h> 67 #include <sys/mount.h> 68 69 #include <miscfs/specfs/specdev.h> 70 71 #include <uvm/uvm.h> 72 #include <uvm/uvm_vnode.h> 73 74 /* 75 * functions 76 */ 77 78 static void uvn_cluster __P((struct uvm_object *, voff_t, voff_t *, 79 voff_t *)); 80 static void uvn_detach __P((struct uvm_object *)); 81 static int uvn_findpage __P((struct uvm_object *, voff_t, 82 struct vm_page **, int)); 83 static boolean_t uvn_flush __P((struct uvm_object *, voff_t, voff_t, 84 int)); 85 static int uvn_get __P((struct uvm_object *, voff_t, vm_page_t *, 86 int *, int, vm_prot_t, int, int)); 87 static int uvn_put __P((struct uvm_object *, vm_page_t *, int, 88 boolean_t)); 89 static void uvn_reference __P((struct uvm_object *)); 90 static boolean_t uvn_releasepg __P((struct vm_page *, 91 struct vm_page **)); 92 93 /* 94 * master pager structure 95 */ 96 97 struct uvm_pagerops uvm_vnodeops = { 98 NULL, 99 uvn_reference, 100 uvn_detach, 101 NULL, 102 uvn_flush, 103 uvn_get, 104 uvn_put, 105 uvn_cluster, 106 uvm_mk_pcluster, 107 uvn_releasepg, 108 }; 109 110 /* 111 * the ops! 112 */ 113 114 /* 115 * uvn_attach 116 * 117 * attach a vnode structure to a VM object. if the vnode is already 118 * attached, then just bump the reference count by one and return the 119 * VM object. if not already attached, attach and return the new VM obj. 120 * the "accessprot" tells the max access the attaching thread wants to 121 * our pages. 122 * 123 * => caller must _not_ already be holding the lock on the uvm_object. 124 * => in fact, nothing should be locked so that we can sleep here. 125 * => note that uvm_object is first thing in vnode structure, so their 126 * pointers are equiv. 127 */ 128 129 struct uvm_object * 130 uvn_attach(arg, accessprot) 131 void *arg; 132 vm_prot_t accessprot; 133 { 134 struct vnode *vp = arg; 135 struct uvm_vnode *uvn = &vp->v_uvm; 136 struct vattr vattr; 137 int result; 138 struct partinfo pi; 139 voff_t used_vnode_size; 140 UVMHIST_FUNC("uvn_attach"); UVMHIST_CALLED(maphist); 141 142 UVMHIST_LOG(maphist, "(vn=0x%x)", arg,0,0,0); 143 used_vnode_size = (voff_t)0; 144 145 /* 146 * first get a lock on the uvn. 147 */ 148 simple_lock(&uvn->u_obj.vmobjlock); 149 while (uvn->u_flags & VXLOCK) { 150 uvn->u_flags |= VXWANT; 151 UVMHIST_LOG(maphist, " SLEEPING on blocked vn",0,0,0,0); 152 UVM_UNLOCK_AND_WAIT(uvn, &uvn->u_obj.vmobjlock, FALSE, 153 "uvn_attach", 0); 154 simple_lock(&uvn->u_obj.vmobjlock); 155 UVMHIST_LOG(maphist," WOKE UP",0,0,0,0); 156 } 157 158 /* 159 * if we're mapping a BLK device, make sure it is a disk. 160 */ 161 if (vp->v_type == VBLK && bdevsw[major(vp->v_rdev)].d_type != D_DISK) { 162 simple_unlock(&uvn->u_obj.vmobjlock); 163 UVMHIST_LOG(maphist,"<- done (VBLK not D_DISK!)", 0,0,0,0); 164 return(NULL); 165 } 166 167 #ifdef DIAGNOSTIC 168 if (vp->v_type != VREG) { 169 panic("uvn_attach: vp %p not VREG", vp); 170 } 171 #endif 172 173 /* 174 * set up our idea of the size 175 * if this hasn't been done already. 176 */ 177 if (uvn->u_size == VSIZENOTSET) { 178 179 uvn->u_flags |= VXLOCK; 180 simple_unlock(&uvn->u_obj.vmobjlock); /* drop lock in case we sleep */ 181 /* XXX: curproc? */ 182 if (vp->v_type == VBLK) { 183 /* 184 * We could implement this as a specfs getattr call, but: 185 * 186 * (1) VOP_GETATTR() would get the file system 187 * vnode operation, not the specfs operation. 188 * 189 * (2) All we want is the size, anyhow. 190 */ 191 result = (*bdevsw[major(vp->v_rdev)].d_ioctl)(vp->v_rdev, 192 DIOCGPART, (caddr_t)&pi, FREAD, curproc); 193 if (result == 0) { 194 /* XXX should remember blocksize */ 195 used_vnode_size = (voff_t)pi.disklab->d_secsize * 196 (voff_t)pi.part->p_size; 197 } 198 } else { 199 result = VOP_GETATTR(vp, &vattr, curproc->p_ucred, curproc); 200 if (result == 0) 201 used_vnode_size = vattr.va_size; 202 } 203 204 /* relock object */ 205 simple_lock(&uvn->u_obj.vmobjlock); 206 207 if (uvn->u_flags & VXWANT) 208 wakeup(uvn); 209 uvn->u_flags &= ~(VXLOCK|VXWANT); 210 211 if (result != 0) { 212 simple_unlock(&uvn->u_obj.vmobjlock); /* drop lock */ 213 UVMHIST_LOG(maphist,"<- done (VOP_GETATTR FAILED!)", 0,0,0,0); 214 return(NULL); 215 } 216 uvn->u_size = used_vnode_size; 217 218 } 219 220 /* unlock and return */ 221 simple_unlock(&uvn->u_obj.vmobjlock); 222 UVMHIST_LOG(maphist,"<- done, refcnt=%d", uvn->u_obj.uo_refs, 223 0, 0, 0); 224 return (&uvn->u_obj); 225 } 226 227 228 /* 229 * uvn_reference 230 * 231 * duplicate a reference to a VM object. Note that the reference 232 * count must already be at least one (the passed in reference) so 233 * there is no chance of the uvn being killed or locked out here. 234 * 235 * => caller must call with object unlocked. 236 * => caller must be using the same accessprot as was used at attach time 237 */ 238 239 240 static void 241 uvn_reference(uobj) 242 struct uvm_object *uobj; 243 { 244 VREF((struct vnode *)uobj); 245 } 246 247 /* 248 * uvn_detach 249 * 250 * remove a reference to a VM object. 251 * 252 * => caller must call with object unlocked and map locked. 253 * => this starts the detach process, but doesn't have to finish it 254 * (async i/o could still be pending). 255 */ 256 static void 257 uvn_detach(uobj) 258 struct uvm_object *uobj; 259 { 260 vrele((struct vnode *)uobj); 261 } 262 263 /* 264 * uvn_releasepg: handled a released page in a uvn 265 * 266 * => "pg" is a PG_BUSY [caller owns it], PG_RELEASED page that we need 267 * to dispose of. 268 * => caller must handled PG_WANTED case 269 * => called with page's object locked, pageq's unlocked 270 * => returns TRUE if page's object is still alive, FALSE if we 271 * killed the page's object. if we return TRUE, then we 272 * return with the object locked. 273 * => if (nextpgp != NULL) => we return the next page on the queue, and return 274 * with the page queues locked [for pagedaemon] 275 * => if (nextpgp == NULL) => we return with page queues unlocked [normal case] 276 * => we kill the uvn if it is not referenced and we are suppose to 277 * kill it ("relkill"). 278 */ 279 280 boolean_t 281 uvn_releasepg(pg, nextpgp) 282 struct vm_page *pg; 283 struct vm_page **nextpgp; /* OUT */ 284 { 285 KASSERT(pg->flags & PG_RELEASED); 286 287 /* 288 * dispose of the page [caller handles PG_WANTED] 289 */ 290 pmap_page_protect(pg, VM_PROT_NONE); 291 uvm_lock_pageq(); 292 if (nextpgp) 293 *nextpgp = TAILQ_NEXT(pg, pageq); 294 uvm_pagefree(pg); 295 if (!nextpgp) 296 uvm_unlock_pageq(); 297 298 return (TRUE); 299 } 300 301 /* 302 * NOTE: currently we have to use VOP_READ/VOP_WRITE because they go 303 * through the buffer cache and allow I/O in any size. These VOPs use 304 * synchronous i/o. [vs. VOP_STRATEGY which can be async, but doesn't 305 * go through the buffer cache or allow I/O sizes larger than a 306 * block]. we will eventually want to change this. 307 * 308 * issues to consider: 309 * uvm provides the uvm_aiodesc structure for async i/o management. 310 * there are two tailq's in the uvm. structure... one for pending async 311 * i/o and one for "done" async i/o. to do an async i/o one puts 312 * an aiodesc on the "pending" list (protected by splbio()), starts the 313 * i/o and returns VM_PAGER_PEND. when the i/o is done, we expect 314 * some sort of "i/o done" function to be called (at splbio(), interrupt 315 * time). this function should remove the aiodesc from the pending list 316 * and place it on the "done" list and wakeup the daemon. the daemon 317 * will run at normal spl() and will remove all items from the "done" 318 * list and call the "aiodone" hook for each done request (see uvm_pager.c). 319 * [in the old vm code, this was done by calling the "put" routine with 320 * null arguments which made the code harder to read and understand because 321 * you had one function ("put") doing two things.] 322 * 323 * so the current pager needs: 324 * int uvn_aiodone(struct uvm_aiodesc *) 325 * 326 * => return KERN_SUCCESS (aio finished, free it). otherwise requeue for 327 * later collection. 328 * => called with pageq's locked by the daemon. 329 * 330 * general outline: 331 * - "try" to lock object. if fail, just return (will try again later) 332 * - drop "u_nio" (this req is done!) 333 * - if (object->iosync && u_naio == 0) { wakeup &uvn->u_naio } 334 * - get "page" structures (atop?). 335 * - handle "wanted" pages 336 * - handle "released" pages [using pgo_releasepg] 337 * >>> pgo_releasepg may kill the object 338 * dont forget to look at "object" wanted flag in all cases. 339 */ 340 341 342 /* 343 * uvn_flush: flush pages out of a uvm object. 344 * 345 * => object should be locked by caller. we may _unlock_ the object 346 * if (and only if) we need to clean a page (PGO_CLEANIT). 347 * we return with the object locked. 348 * => if PGO_CLEANIT is set, we may block (due to I/O). thus, a caller 349 * might want to unlock higher level resources (e.g. vm_map) 350 * before calling flush. 351 * => if PGO_CLEANIT is not set, then we will neither unlock the object 352 * or block. 353 * => if PGO_ALLPAGE is set, then all pages in the object are valid targets 354 * for flushing. 355 * => NOTE: we rely on the fact that the object's memq is a TAILQ and 356 * that new pages are inserted on the tail end of the list. thus, 357 * we can make a complete pass through the object in one go by starting 358 * at the head and working towards the tail (new pages are put in 359 * front of us). 360 * => NOTE: we are allowed to lock the page queues, so the caller 361 * must not be holding the lock on them [e.g. pagedaemon had 362 * better not call us with the queues locked] 363 * => we return TRUE unless we encountered some sort of I/O error 364 * 365 * comment on "cleaning" object and PG_BUSY pages: 366 * this routine is holding the lock on the object. the only time 367 * that it can run into a PG_BUSY page that it does not own is if 368 * some other process has started I/O on the page (e.g. either 369 * a pagein, or a pageout). if the PG_BUSY page is being paged 370 * in, then it can not be dirty (!PG_CLEAN) because no one has 371 * had a chance to modify it yet. if the PG_BUSY page is being 372 * paged out then it means that someone else has already started 373 * cleaning the page for us (how nice!). in this case, if we 374 * have syncio specified, then after we make our pass through the 375 * object we need to wait for the other PG_BUSY pages to clear 376 * off (i.e. we need to do an iosync). also note that once a 377 * page is PG_BUSY it must stay in its object until it is un-busyed. 378 * 379 * note on page traversal: 380 * we can traverse the pages in an object either by going down the 381 * linked list in "uobj->memq", or we can go over the address range 382 * by page doing hash table lookups for each address. depending 383 * on how many pages are in the object it may be cheaper to do one 384 * or the other. we set "by_list" to true if we are using memq. 385 * if the cost of a hash lookup was equal to the cost of the list 386 * traversal we could compare the number of pages in the start->stop 387 * range to the total number of pages in the object. however, it 388 * seems that a hash table lookup is more expensive than the linked 389 * list traversal, so we multiply the number of pages in the 390 * start->stop range by a penalty which we define below. 391 */ 392 393 #define UVN_HASH_PENALTY 4 /* XXX: a guess */ 394 395 static boolean_t 396 uvn_flush(uobj, start, stop, flags) 397 struct uvm_object *uobj; 398 voff_t start, stop; 399 int flags; 400 { 401 struct uvm_vnode *uvn = (struct uvm_vnode *)uobj; 402 struct vnode *vp = (struct vnode *)uobj; 403 struct vm_page *pp, *ppnext, *ptmp; 404 struct vm_page *pps[256], **ppsp; 405 int s; 406 int npages, result, lcv; 407 boolean_t retval, need_iosync, by_list, needs_clean, all, wasclean; 408 voff_t curoff; 409 u_short pp_version; 410 UVMHIST_FUNC("uvn_flush"); UVMHIST_CALLED(maphist); 411 UVMHIST_LOG(maphist, "uobj %p start 0x%x stop 0x%x flags 0x%x", 412 uobj, start, stop, flags); 413 KASSERT(flags & (PGO_CLEANIT|PGO_FREE|PGO_DEACTIVATE)); 414 415 #ifdef DEBUG 416 if (uvn->u_size == VSIZENOTSET) { 417 printf("uvn_flush: size not set vp %p\n", uvn); 418 vprint("uvn_flush VSIZENOTSET", vp); 419 flags |= PGO_ALLPAGES; 420 } 421 #endif 422 423 /* 424 * get init vals and determine how we are going to traverse object 425 */ 426 427 curoff = 0; 428 need_iosync = FALSE; 429 retval = TRUE; 430 wasclean = TRUE; 431 if (flags & PGO_ALLPAGES) { 432 all = TRUE; 433 by_list = TRUE; 434 } else { 435 start = trunc_page(start); 436 stop = round_page(stop); 437 #ifdef DEBUG 438 if (stop > round_page(uvn->u_size)) { 439 printf("uvn_flush: oor vp %p start 0x%x stop 0x%x " 440 "size 0x%x\n", uvn, (int)start, (int)stop, 441 (int)round_page(uvn->u_size)); 442 } 443 #endif 444 all = FALSE; 445 by_list = (uobj->uo_npages <= 446 ((stop - start) >> PAGE_SHIFT) * UVN_HASH_PENALTY); 447 } 448 449 UVMHIST_LOG(maphist, 450 " flush start=0x%x, stop=0x%x, by_list=%d, flags=0x%x", 451 start, stop, by_list, flags); 452 453 /* 454 * PG_CLEANCHK: this bit is used by the pgo_mk_pcluster function as 455 * a _hint_ as to how up to date the PG_CLEAN bit is. if the hint 456 * is wrong it will only prevent us from clustering... it won't break 457 * anything. we clear all PG_CLEANCHK bits here, and pgo_mk_pcluster 458 * will set them as it syncs PG_CLEAN. This is only an issue if we 459 * are looking at non-inactive pages (because inactive page's PG_CLEAN 460 * bit is always up to date since there are no mappings). 461 * [borrowed PG_CLEANCHK idea from FreeBSD VM] 462 */ 463 464 if ((flags & PGO_CLEANIT) != 0 && 465 uobj->pgops->pgo_mk_pcluster != NULL) { 466 if (by_list) { 467 TAILQ_FOREACH(pp, &uobj->memq, listq) { 468 if (!all && 469 (pp->offset < start || pp->offset >= stop)) 470 continue; 471 pp->flags &= ~PG_CLEANCHK; 472 } 473 474 } else { /* by hash */ 475 for (curoff = start ; curoff < stop; 476 curoff += PAGE_SIZE) { 477 pp = uvm_pagelookup(uobj, curoff); 478 if (pp) 479 pp->flags &= ~PG_CLEANCHK; 480 } 481 } 482 } 483 484 /* 485 * now do it. note: we must update ppnext in body of loop or we 486 * will get stuck. we need to use ppnext because we may free "pp" 487 * before doing the next loop. 488 */ 489 490 if (by_list) { 491 pp = TAILQ_FIRST(&uobj->memq); 492 } else { 493 curoff = start; 494 pp = uvm_pagelookup(uobj, curoff); 495 } 496 497 ppnext = NULL; 498 ppsp = NULL; 499 uvm_lock_pageq(); 500 501 /* locked: both page queues and uobj */ 502 for ( ; (by_list && pp != NULL) || 503 (!by_list && curoff < stop) ; pp = ppnext) { 504 if (by_list) { 505 if (!all && 506 (pp->offset < start || pp->offset >= stop)) { 507 ppnext = TAILQ_NEXT(pp, listq); 508 continue; 509 } 510 } else { 511 curoff += PAGE_SIZE; 512 if (pp == NULL) { 513 if (curoff < stop) 514 ppnext = uvm_pagelookup(uobj, curoff); 515 continue; 516 } 517 } 518 519 /* 520 * handle case where we do not need to clean page (either 521 * because we are not clean or because page is not dirty or 522 * is busy): 523 * 524 * NOTE: we are allowed to deactivate a non-wired active 525 * PG_BUSY page, but once a PG_BUSY page is on the inactive 526 * queue it must stay put until it is !PG_BUSY (so as not to 527 * confuse pagedaemon). 528 */ 529 530 if ((flags & PGO_CLEANIT) == 0 || (pp->flags & PG_BUSY) != 0) { 531 needs_clean = FALSE; 532 if ((flags & (PGO_CLEANIT|PGO_SYNCIO)) == 533 (PGO_CLEANIT|PGO_SYNCIO)) 534 need_iosync = TRUE; 535 } else { 536 537 /* 538 * freeing: nuke all mappings so we can sync 539 * PG_CLEAN bit with no race 540 */ 541 if ((pp->flags & PG_CLEAN) != 0 && 542 (flags & PGO_FREE) != 0 && 543 (pp->pqflags & PQ_ACTIVE) != 0) 544 pmap_page_protect(pp, VM_PROT_NONE); 545 if ((pp->flags & PG_CLEAN) != 0 && 546 pmap_is_modified(pp)) 547 pp->flags &= ~(PG_CLEAN); 548 pp->flags |= PG_CLEANCHK; 549 needs_clean = ((pp->flags & PG_CLEAN) == 0); 550 } 551 552 /* 553 * if we don't need a clean... load ppnext and dispose of pp 554 */ 555 if (!needs_clean) { 556 if (by_list) 557 ppnext = TAILQ_NEXT(pp, listq); 558 else { 559 if (curoff < stop) 560 ppnext = uvm_pagelookup(uobj, curoff); 561 } 562 563 if (flags & PGO_DEACTIVATE) { 564 if ((pp->pqflags & PQ_INACTIVE) == 0 && 565 (pp->flags & PG_BUSY) == 0 && 566 pp->wire_count == 0) { 567 pmap_page_protect(pp, VM_PROT_NONE); 568 uvm_pagedeactivate(pp); 569 } 570 571 } else if (flags & PGO_FREE) { 572 if (pp->flags & PG_BUSY) { 573 pp->flags |= PG_RELEASED; 574 } else { 575 pmap_page_protect(pp, VM_PROT_NONE); 576 uvm_pagefree(pp); 577 } 578 } 579 /* ppnext is valid so we can continue... */ 580 continue; 581 } 582 583 /* 584 * pp points to a page in the locked object that we are 585 * working on. if it is !PG_CLEAN,!PG_BUSY and we asked 586 * for cleaning (PGO_CLEANIT). we clean it now. 587 * 588 * let uvm_pager_put attempted a clustered page out. 589 * note: locked: uobj and page queues. 590 */ 591 592 wasclean = FALSE; 593 pp->flags |= PG_BUSY; /* we 'own' page now */ 594 UVM_PAGE_OWN(pp, "uvn_flush"); 595 pmap_page_protect(pp, VM_PROT_READ); 596 pp_version = pp->version; 597 ReTry: 598 ppsp = pps; 599 npages = sizeof(pps) / sizeof(struct vm_page *); 600 601 /* locked: page queues, uobj */ 602 result = uvm_pager_put(uobj, pp, &ppsp, &npages, 603 flags | PGO_DOACTCLUST, start, stop); 604 /* unlocked: page queues, uobj */ 605 606 /* 607 * at this point nothing is locked. if we did an async I/O 608 * it is remotely possible for the async i/o to complete and 609 * the page "pp" be freed or what not before we get a chance 610 * to relock the object. in order to detect this, we have 611 * saved the version number of the page in "pp_version". 612 */ 613 614 /* relock! */ 615 simple_lock(&uobj->vmobjlock); 616 uvm_lock_pageq(); 617 618 /* 619 * VM_PAGER_AGAIN: given the structure of this pager, this 620 * can only happen when we are doing async I/O and can't 621 * map the pages into kernel memory (pager_map) due to lack 622 * of vm space. if this happens we drop back to sync I/O. 623 */ 624 625 if (result == VM_PAGER_AGAIN) { 626 627 /* 628 * it is unlikely, but page could have been released 629 * while we had the object lock dropped. we ignore 630 * this now and retry the I/O. we will detect and 631 * handle the released page after the syncio I/O 632 * completes. 633 */ 634 #ifdef DIAGNOSTIC 635 if (flags & PGO_SYNCIO) 636 panic("uvn_flush: PGO_SYNCIO return 'try again' error (impossible)"); 637 #endif 638 flags |= PGO_SYNCIO; 639 goto ReTry; 640 } 641 642 /* 643 * the cleaning operation is now done. finish up. note that 644 * on error (!OK, !PEND) uvm_pager_put drops the cluster for us. 645 * if success (OK, PEND) then uvm_pager_put returns the cluster 646 * to us in ppsp/npages. 647 */ 648 649 /* 650 * for pending async i/o if we are not deactivating/freeing 651 * we can move on to the next page. 652 */ 653 654 if (result == VM_PAGER_PEND && 655 (flags & (PGO_DEACTIVATE|PGO_FREE)) == 0) { 656 657 /* 658 * no per-page ops: refresh ppnext and continue 659 */ 660 if (by_list) { 661 if (pp->version == pp_version) 662 ppnext = TAILQ_NEXT(pp, listq); 663 else 664 ppnext = TAILQ_FIRST(&uobj->memq); 665 } else { 666 if (curoff < stop) 667 ppnext = uvm_pagelookup(uobj, curoff); 668 } 669 continue; 670 } 671 672 /* 673 * need to look at each page of the I/O operation. we defer 674 * processing "pp" until the last trip through this "for" loop 675 * so that we can load "ppnext" for the main loop after we 676 * play with the cluster pages [thus the "npages + 1" in the 677 * loop below]. 678 */ 679 680 for (lcv = 0 ; lcv < npages + 1 ; lcv++) { 681 682 /* 683 * handle ppnext for outside loop, and saving pp 684 * until the end. 685 */ 686 if (lcv < npages) { 687 if (ppsp[lcv] == pp) 688 continue; /* skip pp until the end */ 689 ptmp = ppsp[lcv]; 690 } else { 691 ptmp = pp; 692 693 /* set up next page for outer loop */ 694 if (by_list) { 695 if (pp->version == pp_version) 696 ppnext = TAILQ_NEXT(pp, listq); 697 else 698 ppnext = TAILQ_FIRST( 699 &uobj->memq); 700 } else { 701 if (curoff < stop) 702 ppnext = uvm_pagelookup(uobj, 703 curoff); 704 } 705 } 706 707 /* 708 * verify the page wasn't moved while obj was 709 * unlocked 710 */ 711 if (result == VM_PAGER_PEND && ptmp->uobject != uobj) 712 continue; 713 714 /* 715 * unbusy the page if I/O is done. note that for 716 * pending I/O it is possible that the I/O op 717 * finished before we relocked the object (in 718 * which case the page is no longer busy). 719 */ 720 721 if (result != VM_PAGER_PEND) { 722 if (ptmp->flags & PG_WANTED) { 723 /* still holding object lock */ 724 wakeup(ptmp); 725 } 726 ptmp->flags &= ~(PG_WANTED|PG_BUSY); 727 UVM_PAGE_OWN(ptmp, NULL); 728 if (ptmp->flags & PG_RELEASED) { 729 uvm_unlock_pageq(); 730 if (!uvn_releasepg(ptmp, NULL)) { 731 UVMHIST_LOG(maphist, 732 "released %p", 733 ptmp, 0,0,0); 734 return (TRUE); 735 } 736 uvm_lock_pageq(); 737 continue; 738 } else { 739 if ((flags & PGO_WEAK) == 0 && 740 !(result == VM_PAGER_ERROR && 741 curproc == uvm.pagedaemon_proc)) { 742 ptmp->flags |= 743 (PG_CLEAN|PG_CLEANCHK); 744 if ((flags & PGO_FREE) == 0) { 745 pmap_clear_modify(ptmp); 746 } 747 } 748 } 749 } 750 751 /* 752 * dispose of page 753 */ 754 755 if (flags & PGO_DEACTIVATE) { 756 if ((pp->pqflags & PQ_INACTIVE) == 0 && 757 (pp->flags & PG_BUSY) == 0 && 758 pp->wire_count == 0) { 759 pmap_page_protect(ptmp, VM_PROT_NONE); 760 uvm_pagedeactivate(ptmp); 761 } 762 } else if (flags & PGO_FREE) { 763 if (result == VM_PAGER_PEND) { 764 if ((ptmp->flags & PG_BUSY) != 0) 765 /* signal for i/o done */ 766 ptmp->flags |= PG_RELEASED; 767 } else { 768 if (result != VM_PAGER_OK) { 769 printf("uvn_flush: obj=%p, " 770 "offset=0x%llx. error %d\n", 771 pp->uobject, 772 (long long)pp->offset, 773 result); 774 printf("uvn_flush: WARNING: " 775 "changes to page may be " 776 "lost!\n"); 777 retval = FALSE; 778 } 779 pmap_page_protect(ptmp, VM_PROT_NONE); 780 uvm_pagefree(ptmp); 781 } 782 } 783 } /* end of "lcv" for loop */ 784 } /* end of "pp" for loop */ 785 786 uvm_unlock_pageq(); 787 if ((flags & PGO_CLEANIT) && all && wasclean && 788 LIST_FIRST(&vp->v_dirtyblkhd) == NULL && 789 (vp->v_flag & VONWORKLST)) { 790 vp->v_flag &= ~VONWORKLST; 791 LIST_REMOVE(vp, v_synclist); 792 } 793 if (need_iosync) { 794 UVMHIST_LOG(maphist," <<DOING IOSYNC>>",0,0,0,0); 795 796 /* 797 * XXX this doesn't use the new two-flag scheme, 798 * but to use that, all i/o initiators will have to change. 799 */ 800 801 s = splbio(); 802 while (vp->v_numoutput != 0) { 803 UVMHIST_LOG(ubchist, "waiting for vp %p num %d", 804 vp, vp->v_numoutput,0,0); 805 806 vp->v_flag |= VBWAIT; 807 UVM_UNLOCK_AND_WAIT(&vp->v_numoutput, 808 &uvn->u_obj.vmobjlock, 809 FALSE, "uvn_flush",0); 810 simple_lock(&uvn->u_obj.vmobjlock); 811 } 812 splx(s); 813 } 814 815 /* return, with object locked! */ 816 UVMHIST_LOG(maphist,"<- done (retval=0x%x)",retval,0,0,0); 817 return(retval); 818 } 819 820 /* 821 * uvn_cluster 822 * 823 * we are about to do I/O in an object at offset. this function is called 824 * to establish a range of offsets around "offset" in which we can cluster 825 * I/O. 826 * 827 * - currently doesn't matter if obj locked or not. 828 */ 829 830 static void 831 uvn_cluster(uobj, offset, loffset, hoffset) 832 struct uvm_object *uobj; 833 voff_t offset; 834 voff_t *loffset, *hoffset; /* OUT */ 835 { 836 struct uvm_vnode *uvn = (struct uvm_vnode *)uobj; 837 838 *loffset = offset; 839 *hoffset = min(offset + MAXBSIZE, round_page(uvn->u_size)); 840 } 841 842 /* 843 * uvn_put: flush page data to backing store. 844 * 845 * => object must be locked! we will _unlock_ it before starting I/O. 846 * => flags: PGO_SYNCIO -- use sync. I/O 847 * => note: caller must set PG_CLEAN and pmap_clear_modify (if needed) 848 */ 849 850 static int 851 uvn_put(uobj, pps, npages, flags) 852 struct uvm_object *uobj; 853 struct vm_page **pps; 854 int npages, flags; 855 { 856 struct vnode *vp = (struct vnode *)uobj; 857 int error; 858 859 error = VOP_PUTPAGES(vp, pps, npages, flags, NULL); 860 return uvm_errno2vmerror(error); 861 } 862 863 864 /* 865 * uvn_get: get pages (synchronously) from backing store 866 * 867 * => prefer map unlocked (not required) 868 * => object must be locked! we will _unlock_ it before starting any I/O. 869 * => flags: PGO_ALLPAGES: get all of the pages 870 * PGO_LOCKED: fault data structures are locked 871 * => NOTE: offset is the offset of pps[0], _NOT_ pps[centeridx] 872 * => NOTE: caller must check for released pages!! 873 */ 874 875 static int 876 uvn_get(uobj, offset, pps, npagesp, centeridx, access_type, advice, flags) 877 struct uvm_object *uobj; 878 voff_t offset; 879 struct vm_page **pps; /* IN/OUT */ 880 int *npagesp; /* IN (OUT if PGO_LOCKED) */ 881 int centeridx; 882 vm_prot_t access_type; 883 int advice, flags; 884 { 885 struct vnode *vp = (struct vnode *)uobj; 886 int error; 887 UVMHIST_FUNC("uvn_get"); UVMHIST_CALLED(ubchist); 888 889 UVMHIST_LOG(ubchist, "vp %p off 0x%x", vp, (int)offset, 0,0); 890 error = VOP_GETPAGES(vp, offset, pps, npagesp, centeridx, 891 access_type, advice, flags); 892 return uvm_errno2vmerror(error); 893 } 894 895 896 /* 897 * uvn_findpages: 898 * return the page for the uobj and offset requested, allocating if needed. 899 * => uobj must be locked. 900 * => returned page will be BUSY. 901 */ 902 903 void 904 uvn_findpages(uobj, offset, npagesp, pps, flags) 905 struct uvm_object *uobj; 906 voff_t offset; 907 int *npagesp; 908 struct vm_page **pps; 909 int flags; 910 { 911 int i, rv, npages; 912 913 rv = 0; 914 npages = *npagesp; 915 for (i = 0; i < npages; i++, offset += PAGE_SIZE) { 916 rv += uvn_findpage(uobj, offset, &pps[i], flags); 917 } 918 *npagesp = rv; 919 } 920 921 static int 922 uvn_findpage(uobj, offset, pgp, flags) 923 struct uvm_object *uobj; 924 voff_t offset; 925 struct vm_page **pgp; 926 int flags; 927 { 928 struct vm_page *pg; 929 UVMHIST_FUNC("uvn_findpage"); UVMHIST_CALLED(ubchist); 930 UVMHIST_LOG(ubchist, "vp %p off 0x%lx", uobj, offset,0,0); 931 932 if (*pgp != NULL) { 933 UVMHIST_LOG(ubchist, "dontcare", 0,0,0,0); 934 return 0; 935 } 936 for (;;) { 937 /* look for an existing page */ 938 pg = uvm_pagelookup(uobj, offset); 939 940 /* nope? allocate one now */ 941 if (pg == NULL) { 942 if (flags & UFP_NOALLOC) { 943 UVMHIST_LOG(ubchist, "noalloc", 0,0,0,0); 944 return 0; 945 } 946 if (uvmexp.vnodepages > 947 (uvmexp.active + uvmexp.inactive + uvmexp.wired + 948 uvmexp.free) * 7 / 8) { 949 pg = NULL; 950 } else { 951 pg = uvm_pagealloc(uobj, offset, NULL, 0); 952 } 953 if (pg == NULL) { 954 if (flags & UFP_NOWAIT) { 955 UVMHIST_LOG(ubchist, "nowait",0,0,0,0); 956 return 0; 957 } 958 simple_unlock(&uobj->vmobjlock); 959 uvm_wait("uvn_fp1"); 960 simple_lock(&uobj->vmobjlock); 961 continue; 962 } 963 uvmexp.vnodepages++; 964 UVMHIST_LOG(ubchist, "alloced",0,0,0,0); 965 break; 966 } else if (flags & UFP_NOCACHE) { 967 UVMHIST_LOG(ubchist, "nocache",0,0,0,0); 968 return 0; 969 } 970 971 /* page is there, see if we need to wait on it */ 972 if ((pg->flags & (PG_BUSY|PG_RELEASED)) != 0) { 973 if (flags & UFP_NOWAIT) { 974 UVMHIST_LOG(ubchist, "nowait",0,0,0,0); 975 return 0; 976 } 977 pg->flags |= PG_WANTED; 978 UVM_UNLOCK_AND_WAIT(pg, &uobj->vmobjlock, 0, 979 "uvn_fp2", 0); 980 simple_lock(&uobj->vmobjlock); 981 continue; 982 } 983 984 /* skip PG_RDONLY pages if requested */ 985 if ((flags & UFP_NORDONLY) && (pg->flags & PG_RDONLY)) { 986 UVMHIST_LOG(ubchist, "nordonly",0,0,0,0); 987 return 0; 988 } 989 990 /* mark the page BUSY and we're done. */ 991 pg->flags |= PG_BUSY; 992 UVM_PAGE_OWN(pg, "uvn_findpage"); 993 UVMHIST_LOG(ubchist, "found",0,0,0,0); 994 break; 995 } 996 *pgp = pg; 997 return 1; 998 } 999 1000 /* 1001 * uvm_vnp_setsize: grow or shrink a vnode uvn 1002 * 1003 * grow => just update size value 1004 * shrink => toss un-needed pages 1005 * 1006 * => we assume that the caller has a reference of some sort to the 1007 * vnode in question so that it will not be yanked out from under 1008 * us. 1009 * 1010 * called from: 1011 * => truncate fns (ext2fs_truncate, ffs_truncate, detrunc[msdos]) 1012 * => "write" fns (ext2fs_write, WRITE [ufs/ufs], msdosfs_write, nfs_write) 1013 * => ffs_balloc [XXX: why? doesn't WRITE handle?] 1014 * => NFS: nfs_loadattrcache, nfs_getattrcache, nfs_setattr 1015 * => union fs: union_newsize 1016 */ 1017 1018 void 1019 uvm_vnp_setsize(vp, newsize) 1020 struct vnode *vp; 1021 voff_t newsize; 1022 { 1023 struct uvm_vnode *uvn = &vp->v_uvm; 1024 UVMHIST_FUNC("uvm_vnp_setsize"); UVMHIST_CALLED(ubchist); 1025 1026 simple_lock(&uvn->u_obj.vmobjlock); 1027 1028 UVMHIST_LOG(ubchist, "old 0x%x new 0x%x", uvn->u_size, newsize, 0,0); 1029 1030 /* 1031 * now check if the size has changed: if we shrink we had better 1032 * toss some pages... 1033 */ 1034 1035 if (uvn->u_size > newsize && uvn->u_size != VSIZENOTSET) { 1036 (void) uvn_flush(&uvn->u_obj, newsize, uvn->u_size, PGO_FREE); 1037 } 1038 uvn->u_size = newsize; 1039 simple_unlock(&uvn->u_obj.vmobjlock); 1040 } 1041 1042 /* 1043 * uvm_vnp_zerorange: set a range of bytes in a file to zero. 1044 */ 1045 1046 void 1047 uvm_vnp_zerorange(vp, off, len) 1048 struct vnode *vp; 1049 off_t off; 1050 size_t len; 1051 { 1052 void *win; 1053 1054 /* 1055 * XXXUBC invent kzero() and use it 1056 */ 1057 1058 while (len) { 1059 vsize_t bytelen = len; 1060 1061 win = ubc_alloc(&vp->v_uvm.u_obj, off, &bytelen, UBC_WRITE); 1062 memset(win, 0, bytelen); 1063 ubc_release(win, 0); 1064 1065 off += bytelen; 1066 len -= bytelen; 1067 } 1068 } 1069