1 /* $OpenBSD: uvm_pager.c,v 1.49 2009/04/06 12:02:52 oga Exp $ */ 2 /* $NetBSD: uvm_pager.c,v 1.36 2000/11/27 18:26:41 chs Exp $ */ 3 4 /* 5 * 6 * Copyright (c) 1997 Charles D. Cranor and Washington University. 7 * All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. All advertising materials mentioning features or use of this software 18 * must display the following acknowledgement: 19 * This product includes software developed by Charles D. Cranor and 20 * Washington University. 21 * 4. The name of the author may not be used to endorse or promote products 22 * derived from this software without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 25 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 26 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 27 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 28 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 29 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 30 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 31 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 32 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 33 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 34 * 35 * from: Id: uvm_pager.c,v 1.1.2.23 1998/02/02 20:38:06 chuck Exp 36 */ 37 38 /* 39 * uvm_pager.c: generic functions used to assist the pagers. 40 */ 41 42 #include <sys/param.h> 43 #include <sys/systm.h> 44 #include <sys/proc.h> 45 #include <sys/malloc.h> 46 #include <sys/pool.h> 47 #include <sys/vnode.h> 48 #include <sys/buf.h> 49 50 #include <uvm/uvm.h> 51 52 struct pool *uvm_aiobuf_pool; 53 54 struct uvm_pagerops *uvmpagerops[] = { 55 &aobj_pager, 56 &uvm_deviceops, 57 &uvm_vnodeops, 58 }; 59 60 /* 61 * the pager map: provides KVA for I/O 62 * 63 * Each uvm_pseg has room for MAX_PAGERMAP_SEGS pager io space of 64 * MAXBSIZE bytes. 65 * 66 * The number of uvm_pseg instances is dynamic using an array segs. 67 * At most UVM_PSEG_COUNT instances can exist. 68 * 69 * psegs[0] always exists (so that the pager can always map in pages). 70 * psegs[0] element 0 is always reserved for the pagedaemon. 71 * 72 * Any other pseg is automatically created when no space is available 73 * and automatically destroyed when it is no longer in use. 74 */ 75 #define MAX_PAGER_SEGS 16 76 #define PSEG_NUMSEGS (PAGER_MAP_SIZE / MAX_PAGER_SEGS / MAXBSIZE) 77 struct uvm_pseg { 78 /* Start of virtual space; 0 if not inited. */ 79 vaddr_t start; 80 /* Bitmap of the segments in use in this pseg. */ 81 int use; 82 }; 83 struct mutex uvm_pseg_lck; 84 struct uvm_pseg psegs[PSEG_NUMSEGS]; 85 86 #define UVM_PSEG_FULL(pseg) ((pseg)->use == (1 << MAX_PAGER_SEGS) - 1) 87 #define UVM_PSEG_EMPTY(pseg) ((pseg)->use == 0) 88 #define UVM_PSEG_INUSE(pseg,id) (((pseg)->use & (1 << (id))) != 0) 89 90 void uvm_pseg_init(struct uvm_pseg *); 91 void uvm_pseg_destroy(struct uvm_pseg *); 92 vaddr_t uvm_pseg_get(int); 93 void uvm_pseg_release(vaddr_t); 94 95 struct vm_page *uvm_pageratop(vaddr_t); 96 97 /* 98 * uvm_pager_init: init pagers (at boot time) 99 */ 100 101 void 102 uvm_pager_init(void) 103 { 104 int lcv; 105 106 /* 107 * init pager map 108 */ 109 110 uvm_pseg_init(&psegs[0]); 111 mtx_init(&uvm_pseg_lck, IPL_VM); 112 113 /* 114 * init ASYNC I/O queue 115 */ 116 117 TAILQ_INIT(&uvm.aio_done); 118 119 /* 120 * call pager init functions 121 */ 122 for (lcv = 0 ; lcv < sizeof(uvmpagerops)/sizeof(struct uvm_pagerops *); 123 lcv++) { 124 if (uvmpagerops[lcv]->pgo_init) 125 uvmpagerops[lcv]->pgo_init(); 126 } 127 } 128 129 /* 130 * Initialize a uvm_pseg. 131 * 132 * May fail, in which case seg->start == 0. 133 * 134 * Caller locks uvm_pseg_lck. 135 */ 136 void 137 uvm_pseg_init(struct uvm_pseg *pseg) 138 { 139 KASSERT(pseg->start == 0); 140 KASSERT(pseg->use == 0); 141 pseg->start = uvm_km_valloc(kernel_map, MAX_PAGER_SEGS * MAXBSIZE); 142 } 143 144 /* 145 * Destroy a uvm_pseg. 146 * 147 * Never fails. 148 * 149 * Requires that seg != &psegs[0] 150 * 151 * Caller locks uvm_pseg_lck. 152 */ 153 void 154 uvm_pseg_destroy(struct uvm_pseg *pseg) 155 { 156 KASSERT(pseg != &psegs[0]); 157 KASSERT(pseg->start != 0); 158 KASSERT(pseg->use == 0); 159 uvm_km_free(kernel_map, pseg->start, MAX_PAGER_SEGS * MAXBSIZE); 160 pseg->start = 0; 161 } 162 163 /* 164 * Acquire a pager map segment. 165 * 166 * Returns a vaddr for paging. 0 on failure. 167 * 168 * Caller does not lock. 169 */ 170 vaddr_t 171 uvm_pseg_get(int flags) 172 { 173 int i; 174 struct uvm_pseg *pseg; 175 176 mtx_enter(&uvm_pseg_lck); 177 178 pager_seg_restart: 179 /* Find first pseg that has room. */ 180 for (pseg = &psegs[0]; pseg != &psegs[PSEG_NUMSEGS]; pseg++) { 181 if (UVM_PSEG_FULL(pseg)) 182 continue; 183 184 if (pseg->start == 0) { 185 /* Need initialization. */ 186 uvm_pseg_init(pseg); 187 if (pseg->start == 0) 188 goto pager_seg_fail; 189 } 190 191 /* Keep index 0 reserved for pagedaemon. */ 192 if (pseg == &psegs[0] && curproc != uvm.pagedaemon_proc) 193 i = 1; 194 else 195 i = 0; 196 197 for (; i < MAX_PAGER_SEGS; i++) { 198 if (!UVM_PSEG_INUSE(pseg, i)) { 199 pseg->use |= 1 << i; 200 mtx_leave(&uvm_pseg_lck); 201 return pseg->start + i * MAXBSIZE; 202 } 203 } 204 } 205 206 pager_seg_fail: 207 if ((flags & UVMPAGER_MAPIN_WAITOK) != 0) { 208 msleep(&psegs, &uvm_pseg_lck, PVM, "pagerseg", 0); 209 goto pager_seg_restart; 210 } 211 212 mtx_leave(&uvm_pseg_lck); 213 return 0; 214 } 215 216 /* 217 * Release a pager map segment. 218 * 219 * Caller does not lock. 220 * 221 * Deallocates pseg if it is no longer in use. 222 */ 223 void 224 uvm_pseg_release(vaddr_t segaddr) 225 { 226 int id; 227 struct uvm_pseg *pseg; 228 229 for (pseg = &psegs[0]; pseg != &psegs[PSEG_NUMSEGS]; pseg++) { 230 if (pseg->start <= segaddr && 231 segaddr < pseg->start + MAX_PAGER_SEGS * MAXBSIZE) 232 break; 233 } 234 KASSERT(pseg != &psegs[PSEG_NUMSEGS]); 235 236 id = (segaddr - pseg->start) / MAXBSIZE; 237 KASSERT(id >= 0 && id < MAX_PAGER_SEGS); 238 239 /* test for no remainder */ 240 KDASSERT(segaddr == pseg->start + id * MAXBSIZE); 241 242 mtx_enter(&uvm_pseg_lck); 243 244 KASSERT(UVM_PSEG_INUSE(pseg, id)); 245 246 pseg->use &= ~(1 << id); 247 wakeup(&psegs); 248 249 if (pseg != &psegs[0] && UVM_PSEG_EMPTY(pseg)) 250 uvm_pseg_destroy(pseg); 251 252 mtx_leave(&uvm_pseg_lck); 253 } 254 255 /* 256 * uvm_pagermapin: map pages into KVA for I/O that needs mappings 257 * 258 * We basically just km_valloc a blank map entry to reserve the space in the 259 * kernel map and then use pmap_enter() to put the mappings in by hand. 260 */ 261 vaddr_t 262 uvm_pagermapin(struct vm_page **pps, int npages, int flags) 263 { 264 vaddr_t kva, cva; 265 vm_prot_t prot; 266 vsize_t size; 267 struct vm_page *pp; 268 269 UVMHIST_FUNC("uvm_pagermapin"); UVMHIST_CALLED(maphist); 270 271 UVMHIST_LOG(maphist,"(pps=%p, npages=%ld, flags=%d)", 272 pps, npages, flags,0); 273 274 prot = VM_PROT_READ; 275 if (flags & UVMPAGER_MAPIN_READ) 276 prot |= VM_PROT_WRITE; 277 size = ptoa(npages); 278 279 KASSERT(size <= MAXBSIZE); 280 281 kva = uvm_pseg_get(flags); 282 if (kva == 0) { 283 UVMHIST_LOG(maphist,"<- NOWAIT failed", 0,0,0,0); 284 return 0; 285 } 286 287 for (cva = kva ; size != 0 ; size -= PAGE_SIZE, cva += PAGE_SIZE) { 288 pp = *pps++; 289 KASSERT(pp); 290 KASSERT(pp->pg_flags & PG_BUSY); 291 /* Allow pmap_enter to fail. */ 292 if (pmap_enter(pmap_kernel(), cva, VM_PAGE_TO_PHYS(pp), 293 prot, PMAP_WIRED | PMAP_CANFAIL | prot) != 0) { 294 pmap_remove(pmap_kernel(), kva, cva); 295 pmap_update(pmap_kernel()); 296 uvm_pseg_release(kva); 297 UVMHIST_LOG(maphist,"<- pmap_enter failed", 0,0,0,0); 298 return 0; 299 } 300 } 301 pmap_update(pmap_kernel()); 302 UVMHIST_LOG(maphist, "<- done (KVA=0x%lx)", kva,0,0,0); 303 return kva; 304 } 305 306 /* 307 * uvm_pagermapout: remove KVA mapping 308 * 309 * We remove our mappings by hand and then remove the mapping. 310 */ 311 void 312 uvm_pagermapout(vaddr_t kva, int npages) 313 { 314 UVMHIST_FUNC("uvm_pagermapout"); UVMHIST_CALLED(maphist); 315 316 UVMHIST_LOG(maphist, " (kva=0x%lx, npages=%ld)", kva, npages,0,0); 317 318 pmap_remove(pmap_kernel(), kva, kva + (npages << PAGE_SHIFT)); 319 pmap_update(pmap_kernel()); 320 uvm_pseg_release(kva); 321 322 UVMHIST_LOG(maphist,"<- done",0,0,0,0); 323 } 324 325 /* 326 * uvm_mk_pcluster 327 * 328 * generic "make 'pager put' cluster" function. a pager can either 329 * [1] set pgo_mk_pcluster to NULL (never cluster), [2] set it to this 330 * generic function, or [3] set it to a pager specific function. 331 * 332 * => caller must lock object _and_ pagequeues (since we need to look 333 * at active vs. inactive bits, etc.) 334 * => caller must make center page busy and write-protect it 335 * => we mark all cluster pages busy for the caller 336 * => the caller must unbusy all pages (and check wanted/released 337 * status if it drops the object lock) 338 * => flags: 339 * PGO_ALLPAGES: all pages in object are valid targets 340 * !PGO_ALLPAGES: use "lo" and "hi" to limit range of cluster 341 * PGO_DOACTCLUST: include active pages in cluster. 342 * NOTE: the caller should clear PG_CLEANCHK bits if PGO_DOACTCLUST. 343 * PG_CLEANCHK is only a hint, but clearing will help reduce 344 * the number of calls we make to the pmap layer. 345 */ 346 347 struct vm_page ** 348 uvm_mk_pcluster(struct uvm_object *uobj, struct vm_page **pps, int *npages, 349 struct vm_page *center, int flags, voff_t mlo, voff_t mhi) 350 { 351 struct vm_page **ppsp, *pclust; 352 voff_t lo, hi, curoff; 353 int center_idx, forward, incr; 354 UVMHIST_FUNC("uvm_mk_pcluster"); UVMHIST_CALLED(maphist); 355 356 /* 357 * center page should already be busy and write protected. XXX: 358 * suppose page is wired? if we lock, then a process could 359 * fault/block on it. if we don't lock, a process could write the 360 * pages in the middle of an I/O. (consider an msync()). let's 361 * lock it for now (better to delay than corrupt data?). 362 */ 363 364 /* 365 * get cluster boundaries, check sanity, and apply our limits as well. 366 */ 367 368 uobj->pgops->pgo_cluster(uobj, center->offset, &lo, &hi); 369 if ((flags & PGO_ALLPAGES) == 0) { 370 if (lo < mlo) 371 lo = mlo; 372 if (hi > mhi) 373 hi = mhi; 374 } 375 if ((hi - lo) >> PAGE_SHIFT > *npages) { /* pps too small, bail out! */ 376 pps[0] = center; 377 *npages = 1; 378 return(pps); 379 } 380 381 /* 382 * now determine the center and attempt to cluster around the 383 * edges 384 */ 385 386 center_idx = (center->offset - lo) >> PAGE_SHIFT; 387 pps[center_idx] = center; /* plug in the center page */ 388 ppsp = &pps[center_idx]; 389 *npages = 1; 390 391 /* 392 * attempt to cluster around the left [backward], and then 393 * the right side [forward]. 394 * 395 * note that for inactive pages (pages that have been deactivated) 396 * there are no valid mappings and PG_CLEAN should be up to date. 397 * [i.e. there is no need to query the pmap with pmap_is_modified 398 * since there are no mappings]. 399 */ 400 401 for (forward = 0 ; forward <= 1 ; forward++) { 402 incr = forward ? PAGE_SIZE : -PAGE_SIZE; 403 curoff = center->offset + incr; 404 for ( ;(forward == 0 && curoff >= lo) || 405 (forward && curoff < hi); 406 curoff += incr) { 407 408 pclust = uvm_pagelookup(uobj, curoff); /* lookup page */ 409 if (pclust == NULL) { 410 break; /* no page */ 411 } 412 /* handle active pages */ 413 /* NOTE: inactive pages don't have pmap mappings */ 414 if ((pclust->pg_flags & PQ_INACTIVE) == 0) { 415 if ((flags & PGO_DOACTCLUST) == 0) { 416 /* dont want mapped pages at all */ 417 break; 418 } 419 420 /* make sure "clean" bit is sync'd */ 421 if ((pclust->pg_flags & PG_CLEANCHK) == 0) { 422 if ((pclust->pg_flags & (PG_CLEAN|PG_BUSY)) 423 == PG_CLEAN && 424 pmap_is_modified(pclust)) 425 atomic_clearbits_int( 426 &pclust->pg_flags, 427 PG_CLEAN); 428 /* now checked */ 429 atomic_setbits_int(&pclust->pg_flags, 430 PG_CLEANCHK); 431 } 432 } 433 434 /* is page available for cleaning and does it need it */ 435 if ((pclust->pg_flags & (PG_CLEAN|PG_BUSY)) != 0) { 436 break; /* page is already clean or is busy */ 437 } 438 439 /* yes! enroll the page in our array */ 440 atomic_setbits_int(&pclust->pg_flags, PG_BUSY); 441 UVM_PAGE_OWN(pclust, "uvm_mk_pcluster"); 442 443 /* XXX: protect wired page? see above comment. */ 444 pmap_page_protect(pclust, VM_PROT_READ); 445 if (!forward) { 446 ppsp--; /* back up one page */ 447 *ppsp = pclust; 448 } else { 449 /* move forward one page */ 450 ppsp[*npages] = pclust; 451 } 452 (*npages)++; 453 } 454 } 455 456 /* 457 * done! return the cluster array to the caller!!! 458 */ 459 460 UVMHIST_LOG(maphist, "<- done",0,0,0,0); 461 return(ppsp); 462 } 463 464 /* 465 * uvm_pager_put: high level pageout routine 466 * 467 * we want to pageout page "pg" to backing store, clustering if 468 * possible. 469 * 470 * => page queues must be locked by caller 471 * => if page is not swap-backed, then "uobj" points to the object 472 * backing it. this object should be locked by the caller. 473 * => if page is swap-backed, then "uobj" should be NULL. 474 * => "pg" should be PG_BUSY (by caller), and !PG_CLEAN 475 * for swap-backed memory, "pg" can be NULL if there is no page 476 * of interest [sometimes the case for the pagedaemon] 477 * => "ppsp_ptr" should point to an array of npages vm_page pointers 478 * for possible cluster building 479 * => flags (first two for non-swap-backed pages) 480 * PGO_ALLPAGES: all pages in uobj are valid targets 481 * PGO_DOACTCLUST: include "PQ_ACTIVE" pages as valid targets 482 * PGO_SYNCIO: do SYNC I/O (no async) 483 * PGO_PDFREECLUST: pagedaemon: drop cluster on successful I/O 484 * => start/stop: if (uobj && !PGO_ALLPAGES) limit targets to this range 485 * if (!uobj) start is the (daddr64_t) of the starting swapblk 486 * => return state: 487 * 1. we return the VM_PAGER status code of the pageout 488 * 2. we return with the page queues unlocked 489 * 3. if (uobj != NULL) [!swap_backed] we return with 490 * uobj locked _only_ if PGO_PDFREECLUST is set 491 * AND result != VM_PAGER_PEND. in all other cases 492 * we return with uobj unlocked. [this is a hack 493 * that allows the pagedaemon to save one lock/unlock 494 * pair in the !swap_backed case since we have to 495 * lock the uobj to drop the cluster anyway] 496 * 4. on errors we always drop the cluster. thus, if we return 497 * !PEND, !OK, then the caller only has to worry about 498 * un-busying the main page (not the cluster pages). 499 * 5. on success, if !PGO_PDFREECLUST, we return the cluster 500 * with all pages busy (caller must un-busy and check 501 * wanted/released flags). 502 */ 503 504 int 505 uvm_pager_put(struct uvm_object *uobj, struct vm_page *pg, 506 struct vm_page ***ppsp_ptr, int *npages, int flags, 507 voff_t start, voff_t stop) 508 { 509 int result; 510 daddr64_t swblk; 511 struct vm_page **ppsp = *ppsp_ptr; 512 UVMHIST_FUNC("uvm_pager_put"); UVMHIST_CALLED(pdhist); 513 514 /* 515 * note that uobj is null if we are doing a swap-backed pageout. 516 * note that uobj is !null if we are doing normal object pageout. 517 * note that the page queues must be locked to cluster. 518 */ 519 520 if (uobj) { /* if !swap-backed */ 521 522 /* 523 * attempt to build a cluster for pageout using its 524 * make-put-cluster function (if it has one). 525 */ 526 527 if (uobj->pgops->pgo_mk_pcluster) { 528 ppsp = uobj->pgops->pgo_mk_pcluster(uobj, ppsp, 529 npages, pg, flags, start, stop); 530 *ppsp_ptr = ppsp; /* update caller's pointer */ 531 } else { 532 ppsp[0] = pg; 533 *npages = 1; 534 } 535 536 swblk = 0; /* XXX: keep gcc happy */ 537 538 } else { 539 540 /* 541 * for swap-backed pageout, the caller (the pagedaemon) has 542 * already built the cluster for us. the starting swap 543 * block we are writing to has been passed in as "start." 544 * "pg" could be NULL if there is no page we are especially 545 * interested in (in which case the whole cluster gets dropped 546 * in the event of an error or a sync "done"). 547 */ 548 swblk = (daddr64_t) start; 549 /* ppsp and npages should be ok */ 550 } 551 552 /* now that we've clustered we can unlock the page queues */ 553 uvm_unlock_pageq(); 554 555 /* 556 * now attempt the I/O. if we have a failure and we are 557 * clustered, we will drop the cluster and try again. 558 */ 559 560 ReTry: 561 if (uobj) { 562 /* object is locked */ 563 result = uobj->pgops->pgo_put(uobj, ppsp, *npages, flags); 564 UVMHIST_LOG(pdhist, "put -> %ld", result, 0,0,0); 565 /* object is now unlocked */ 566 } else { 567 /* nothing locked */ 568 /* XXX daddr64_t -> int */ 569 result = uvm_swap_put(swblk, ppsp, *npages, flags); 570 /* nothing locked */ 571 } 572 573 /* 574 * we have attempted the I/O. 575 * 576 * if the I/O was a success then: 577 * if !PGO_PDFREECLUST, we return the cluster to the 578 * caller (who must un-busy all pages) 579 * else we un-busy cluster pages for the pagedaemon 580 * 581 * if I/O is pending (async i/o) then we return the pending code. 582 * [in this case the async i/o done function must clean up when 583 * i/o is done...] 584 */ 585 586 if (result == VM_PAGER_PEND || result == VM_PAGER_OK) { 587 if (result == VM_PAGER_OK && (flags & PGO_PDFREECLUST)) { 588 /* 589 * drop cluster and relock object (only if I/O is 590 * not pending) 591 */ 592 if (uobj) 593 /* required for dropcluster */ 594 simple_lock(&uobj->vmobjlock); 595 if (*npages > 1 || pg == NULL) 596 uvm_pager_dropcluster(uobj, pg, ppsp, npages, 597 PGO_PDFREECLUST); 598 /* if (uobj): object still locked, as per 599 * return-state item #3 */ 600 } 601 return (result); 602 } 603 604 /* 605 * a pager error occured (even after dropping the cluster, if there 606 * was one). give up! the caller only has one page ("pg") 607 * to worry about. 608 */ 609 610 if (*npages > 1 || pg == NULL) { 611 if (uobj) { 612 simple_lock(&uobj->vmobjlock); 613 } 614 uvm_pager_dropcluster(uobj, pg, ppsp, npages, PGO_REALLOCSWAP); 615 616 /* 617 * for failed swap-backed pageouts with a "pg", 618 * we need to reset pg's swslot to either: 619 * "swblk" (for transient errors, so we can retry), 620 * or 0 (for hard errors). 621 */ 622 623 if (uobj == NULL && pg != NULL) { 624 /* XXX daddr64_t -> int */ 625 int nswblk = (result == VM_PAGER_AGAIN) ? swblk : 0; 626 if (pg->pg_flags & PQ_ANON) { 627 simple_lock(&pg->uanon->an_lock); 628 pg->uanon->an_swslot = nswblk; 629 simple_unlock(&pg->uanon->an_lock); 630 } else { 631 simple_lock(&pg->uobject->vmobjlock); 632 uao_set_swslot(pg->uobject, 633 pg->offset >> PAGE_SHIFT, 634 nswblk); 635 simple_unlock(&pg->uobject->vmobjlock); 636 } 637 } 638 if (result == VM_PAGER_AGAIN) { 639 640 /* 641 * for transient failures, free all the swslots that 642 * we're not going to retry with. 643 */ 644 645 if (uobj == NULL) { 646 if (pg) { 647 /* XXX daddr64_t -> int */ 648 uvm_swap_free(swblk + 1, *npages - 1); 649 } else { 650 /* XXX daddr64_t -> int */ 651 uvm_swap_free(swblk, *npages); 652 } 653 } 654 if (pg) { 655 ppsp[0] = pg; 656 *npages = 1; 657 goto ReTry; 658 } 659 } else if (uobj == NULL) { 660 661 /* 662 * for hard errors on swap-backed pageouts, 663 * mark the swslots as bad. note that we do not 664 * free swslots that we mark bad. 665 */ 666 667 /* XXX daddr64_t -> int */ 668 uvm_swap_markbad(swblk, *npages); 669 } 670 } 671 672 /* 673 * a pager error occurred (even after dropping the cluster, if there 674 * was one). give up! the caller only has one page ("pg") 675 * to worry about. 676 */ 677 678 if (uobj && (flags & PGO_PDFREECLUST) != 0) 679 simple_lock(&uobj->vmobjlock); 680 return(result); 681 } 682 683 /* 684 * uvm_pager_dropcluster: drop a cluster we have built (because we 685 * got an error, or, if PGO_PDFREECLUST we are un-busying the 686 * cluster pages on behalf of the pagedaemon). 687 * 688 * => uobj, if non-null, is a non-swap-backed object that is 689 * locked by the caller. we return with this object still 690 * locked. 691 * => page queues are not locked 692 * => pg is our page of interest (the one we clustered around, can be null) 693 * => ppsp/npages is our current cluster 694 * => flags: PGO_PDFREECLUST: pageout was a success: un-busy cluster 695 * pages on behalf of the pagedaemon. 696 * PGO_REALLOCSWAP: drop previously allocated swap slots for 697 * clustered swap-backed pages (except for "pg" if !NULL) 698 * "swblk" is the start of swap alloc (e.g. for ppsp[0]) 699 * [only meaningful if swap-backed (uobj == NULL)] 700 */ 701 702 void 703 uvm_pager_dropcluster(struct uvm_object *uobj, struct vm_page *pg, 704 struct vm_page **ppsp, int *npages, int flags) 705 { 706 int lcv; 707 boolean_t obj_is_alive; 708 struct uvm_object *saved_uobj; 709 710 /* 711 * drop all pages but "pg" 712 */ 713 714 for (lcv = 0 ; lcv < *npages ; lcv++) { 715 716 /* skip "pg" or empty slot */ 717 if (ppsp[lcv] == pg || ppsp[lcv] == NULL) 718 continue; 719 720 /* 721 * if swap-backed, gain lock on object that owns page. note 722 * that PQ_ANON bit can't change as long as we are holding 723 * the PG_BUSY bit (so there is no need to lock the page 724 * queues to test it). 725 * 726 * once we have the lock, dispose of the pointer to swap, if 727 * requested 728 */ 729 if (!uobj) { 730 if (ppsp[lcv]->pg_flags & PQ_ANON) { 731 simple_lock(&ppsp[lcv]->uanon->an_lock); 732 if (flags & PGO_REALLOCSWAP) 733 /* zap swap block */ 734 ppsp[lcv]->uanon->an_swslot = 0; 735 } else { 736 simple_lock(&ppsp[lcv]->uobject->vmobjlock); 737 if (flags & PGO_REALLOCSWAP) 738 uao_set_swslot(ppsp[lcv]->uobject, 739 ppsp[lcv]->offset >> PAGE_SHIFT, 0); 740 } 741 } 742 743 /* did someone want the page while we had it busy-locked? */ 744 if (ppsp[lcv]->pg_flags & PG_WANTED) { 745 /* still holding obj lock */ 746 wakeup(ppsp[lcv]); 747 } 748 749 /* if page was released, release it. otherwise un-busy it */ 750 if (ppsp[lcv]->pg_flags & PG_RELEASED) { 751 752 if (ppsp[lcv]->pg_flags & PQ_ANON) { 753 /* so that anfree will free */ 754 atomic_clearbits_int(&ppsp[lcv]->pg_flags, 755 PG_BUSY); 756 UVM_PAGE_OWN(ppsp[lcv], NULL); 757 758 pmap_page_protect(ppsp[lcv], VM_PROT_NONE); 759 simple_unlock(&ppsp[lcv]->uanon->an_lock); 760 /* kills anon and frees pg */ 761 uvm_anfree(ppsp[lcv]->uanon); 762 763 continue; 764 } 765 766 /* 767 * pgo_releasepg will dump the page for us 768 */ 769 770 saved_uobj = ppsp[lcv]->uobject; 771 obj_is_alive = 772 saved_uobj->pgops->pgo_releasepg(ppsp[lcv], NULL); 773 774 /* for normal objects, "pg" is still PG_BUSY by us, 775 * so obj can't die */ 776 KASSERT(!uobj || obj_is_alive); 777 778 /* only unlock the object if it is still alive... */ 779 if (obj_is_alive && saved_uobj != uobj) 780 simple_unlock(&saved_uobj->vmobjlock); 781 782 /* 783 * XXXCDC: suppose uobj died in the pgo_releasepg? 784 * how pass that 785 * info up to caller. we are currently ignoring it... 786 */ 787 788 continue; /* next page */ 789 } else { 790 atomic_clearbits_int(&ppsp[lcv]->pg_flags, 791 PG_BUSY|PG_WANTED|PG_FAKE); 792 UVM_PAGE_OWN(ppsp[lcv], NULL); 793 } 794 795 /* 796 * if we are operating on behalf of the pagedaemon and we 797 * had a successful pageout update the page! 798 */ 799 if (flags & PGO_PDFREECLUST) { 800 pmap_clear_reference(ppsp[lcv]); 801 pmap_clear_modify(ppsp[lcv]); 802 atomic_setbits_int(&ppsp[lcv]->pg_flags, PG_CLEAN); 803 } 804 805 /* if anonymous cluster, unlock object and move on */ 806 if (!uobj) { 807 if (ppsp[lcv]->pg_flags & PQ_ANON) 808 simple_unlock(&ppsp[lcv]->uanon->an_lock); 809 else 810 simple_unlock(&ppsp[lcv]->uobject->vmobjlock); 811 } 812 } 813 } 814 815 #ifdef UBC 816 /* 817 * interrupt-context iodone handler for nested i/o bufs. 818 * 819 * => must be at splbio(). 820 */ 821 822 void 823 uvm_aio_biodone1(struct buf *bp) 824 { 825 struct buf *mbp = bp->b_private; 826 827 splassert(IPL_BIO); 828 829 KASSERT(mbp != bp); 830 if (bp->b_flags & B_ERROR) { 831 mbp->b_flags |= B_ERROR; 832 mbp->b_error = bp->b_error; 833 } 834 mbp->b_resid -= bp->b_bcount; 835 pool_put(&bufpool, bp); 836 if (mbp->b_resid == 0) { 837 biodone(mbp); 838 } 839 } 840 #endif 841 842 /* 843 * interrupt-context iodone handler for single-buf i/os 844 * or the top-level buf of a nested-buf i/o. 845 * 846 * => must be at splbio(). 847 */ 848 849 void 850 uvm_aio_biodone(struct buf *bp) 851 { 852 splassert(IPL_BIO); 853 854 /* reset b_iodone for when this is a single-buf i/o. */ 855 bp->b_iodone = uvm_aio_aiodone; 856 857 mtx_enter(&uvm.aiodoned_lock); /* locks uvm.aio_done */ 858 TAILQ_INSERT_TAIL(&uvm.aio_done, bp, b_freelist); 859 wakeup(&uvm.aiodoned); 860 mtx_leave(&uvm.aiodoned_lock); 861 } 862 863 /* 864 * uvm_aio_aiodone: do iodone processing for async i/os. 865 * this should be called in thread context, not interrupt context. 866 */ 867 868 void 869 uvm_aio_aiodone(struct buf *bp) 870 { 871 int npages = bp->b_bufsize >> PAGE_SHIFT; 872 struct vm_page *pg, *pgs[MAXPHYS >> PAGE_SHIFT]; 873 struct uvm_object *uobj; 874 int i, error; 875 boolean_t write, swap; 876 UVMHIST_FUNC("uvm_aio_aiodone"); UVMHIST_CALLED(pdhist); 877 UVMHIST_LOG(pdhist, "bp %p", bp, 0,0,0); 878 879 KASSERT(npages <= MAXPHYS >> PAGE_SHIFT); 880 splassert(IPL_BIO); 881 882 error = (bp->b_flags & B_ERROR) ? (bp->b_error ? bp->b_error : EIO) : 0; 883 write = (bp->b_flags & B_READ) == 0; 884 #ifdef UBC 885 /* XXXUBC B_NOCACHE is for swap pager, should be done differently */ 886 if (write && !(bp->b_flags & B_NOCACHE) && bioops.io_pageiodone) { 887 (*bioops.io_pageiodone)(bp); 888 } 889 #endif 890 891 uobj = NULL; 892 for (i = 0; i < npages; i++) { 893 pgs[i] = uvm_pageratop((vaddr_t)bp->b_data + (i << PAGE_SHIFT)); 894 UVMHIST_LOG(pdhist, "pgs[%ld] = %p", i, pgs[i],0,0); 895 } 896 uvm_pagermapout((vaddr_t)bp->b_data, npages); 897 #ifdef UVM_SWAP_ENCRYPT 898 /* 899 * XXX - assumes that we only get ASYNC writes. used to be above. 900 */ 901 if (pgs[0]->pg_flags & PQ_ENCRYPT) { 902 uvm_swap_freepages(pgs, npages); 903 goto freed; 904 } 905 #endif /* UVM_SWAP_ENCRYPT */ 906 for (i = 0; i < npages; i++) { 907 pg = pgs[i]; 908 909 if (i == 0) { 910 swap = (pg->pg_flags & PQ_SWAPBACKED) != 0; 911 if (!swap) { 912 uobj = pg->uobject; 913 simple_lock(&uobj->vmobjlock); 914 } 915 } 916 KASSERT(swap || pg->uobject == uobj); 917 if (swap) { 918 if (pg->pg_flags & PQ_ANON) { 919 simple_lock(&pg->uanon->an_lock); 920 } else { 921 simple_lock(&pg->uobject->vmobjlock); 922 } 923 } 924 925 /* 926 * if this is a read and we got an error, mark the pages 927 * PG_RELEASED so that uvm_page_unbusy() will free them. 928 */ 929 if (!write && error) { 930 atomic_setbits_int(&pg->pg_flags, PG_RELEASED); 931 continue; 932 } 933 KASSERT(!write || (pgs[i]->pg_flags & PG_FAKE) == 0); 934 935 /* 936 * if this is a read and the page is PG_FAKE, 937 * or this was a successful write, 938 * mark the page PG_CLEAN and not PG_FAKE. 939 */ 940 941 if ((pgs[i]->pg_flags & PG_FAKE) || (write && error != ENOMEM)) { 942 pmap_clear_reference(pgs[i]); 943 pmap_clear_modify(pgs[i]); 944 atomic_setbits_int(&pgs[i]->pg_flags, PG_CLEAN); 945 atomic_clearbits_int(&pgs[i]->pg_flags, PG_FAKE); 946 } 947 if (swap) { 948 if (pg->pg_flags & PQ_ANON) { 949 simple_unlock(&pg->uanon->an_lock); 950 } else { 951 simple_unlock(&pg->uobject->vmobjlock); 952 } 953 } 954 } 955 uvm_page_unbusy(pgs, npages); 956 if (!swap) { 957 simple_unlock(&uobj->vmobjlock); 958 } 959 960 #ifdef UVM_SWAP_ENCRYPT 961 freed: 962 #endif 963 if (write && (bp->b_flags & B_AGE) != 0 && bp->b_vp != NULL) { 964 vwakeup(bp->b_vp); 965 } 966 pool_put(&bufpool, bp); 967 } 968 969 /* 970 * uvm_pageratop: convert KVAs in the pager map back to their page 971 * structures. 972 */ 973 struct vm_page * 974 uvm_pageratop(vaddr_t kva) 975 { 976 struct vm_page *pg; 977 paddr_t pa; 978 boolean_t rv; 979 980 rv = pmap_extract(pmap_kernel(), kva, &pa); 981 KASSERT(rv); 982 pg = PHYS_TO_VM_PAGE(pa); 983 KASSERT(pg != NULL); 984 return (pg); 985 } 986