1 /* $OpenBSD: uvm_pager.c,v 1.78 2022/02/18 09:04:38 kettenis Exp $ */ 2 /* $NetBSD: uvm_pager.c,v 1.36 2000/11/27 18:26:41 chs Exp $ */ 3 4 /* 5 * Copyright (c) 1997 Charles D. Cranor and Washington University. 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 * 28 * from: Id: uvm_pager.c,v 1.1.2.23 1998/02/02 20:38:06 chuck Exp 29 */ 30 31 /* 32 * uvm_pager.c: generic functions used to assist the pagers. 33 */ 34 35 #include <sys/param.h> 36 #include <sys/systm.h> 37 #include <sys/malloc.h> 38 #include <sys/pool.h> 39 #include <sys/buf.h> 40 #include <sys/atomic.h> 41 42 #include <uvm/uvm.h> 43 44 struct pool *uvm_aiobuf_pool; 45 46 const struct uvm_pagerops *uvmpagerops[] = { 47 &aobj_pager, 48 &uvm_deviceops, 49 &uvm_vnodeops, 50 }; 51 52 /* 53 * the pager map: provides KVA for I/O 54 * 55 * Each uvm_pseg has room for MAX_PAGERMAP_SEGS pager io space of 56 * MAXBSIZE bytes. 57 * 58 * The number of uvm_pseg instances is dynamic using an array segs. 59 * At most UVM_PSEG_COUNT instances can exist. 60 * 61 * psegs[0] always exists (so that the pager can always map in pages). 62 * psegs[0] element 0 is always reserved for the pagedaemon. 63 * 64 * Any other pseg is automatically created when no space is available 65 * and automatically destroyed when it is no longer in use. 66 */ 67 #define MAX_PAGER_SEGS 16 68 #define PSEG_NUMSEGS (PAGER_MAP_SIZE / MAX_PAGER_SEGS / MAXBSIZE) 69 struct uvm_pseg { 70 /* Start of virtual space; 0 if not inited. */ 71 vaddr_t start; 72 /* Bitmap of the segments in use in this pseg. */ 73 int use; 74 }; 75 struct mutex uvm_pseg_lck; 76 struct uvm_pseg psegs[PSEG_NUMSEGS]; 77 78 #define UVM_PSEG_FULL(pseg) ((pseg)->use == (1 << MAX_PAGER_SEGS) - 1) 79 #define UVM_PSEG_EMPTY(pseg) ((pseg)->use == 0) 80 #define UVM_PSEG_INUSE(pseg,id) (((pseg)->use & (1 << (id))) != 0) 81 82 void uvm_pseg_init(struct uvm_pseg *); 83 vaddr_t uvm_pseg_get(int); 84 void uvm_pseg_release(vaddr_t); 85 86 /* 87 * uvm_pager_init: init pagers (at boot time) 88 */ 89 void 90 uvm_pager_init(void) 91 { 92 int lcv; 93 94 /* init pager map */ 95 uvm_pseg_init(&psegs[0]); 96 mtx_init(&uvm_pseg_lck, IPL_VM); 97 98 /* init ASYNC I/O queue */ 99 TAILQ_INIT(&uvm.aio_done); 100 101 /* call pager init functions */ 102 for (lcv = 0 ; lcv < sizeof(uvmpagerops)/sizeof(struct uvm_pagerops *); 103 lcv++) { 104 if (uvmpagerops[lcv]->pgo_init) 105 uvmpagerops[lcv]->pgo_init(); 106 } 107 } 108 109 /* 110 * Initialize a uvm_pseg. 111 * 112 * May fail, in which case seg->start == 0. 113 * 114 * Caller locks uvm_pseg_lck. 115 */ 116 void 117 uvm_pseg_init(struct uvm_pseg *pseg) 118 { 119 KASSERT(pseg->start == 0); 120 KASSERT(pseg->use == 0); 121 pseg->start = (vaddr_t)km_alloc(MAX_PAGER_SEGS * MAXBSIZE, 122 &kv_any, &kp_none, &kd_trylock); 123 } 124 125 /* 126 * Acquire a pager map segment. 127 * 128 * Returns a vaddr for paging. 0 on failure. 129 * 130 * Caller does not lock. 131 */ 132 vaddr_t 133 uvm_pseg_get(int flags) 134 { 135 int i; 136 struct uvm_pseg *pseg; 137 138 /* 139 * XXX Prevent lock ordering issue in uvm_unmap_detach(). A real 140 * fix would be to move the KERNEL_LOCK() out of uvm_unmap_detach(). 141 * 142 * witness_checkorder() at witness_checkorder+0xba0 143 * __mp_lock() at __mp_lock+0x5f 144 * uvm_unmap_detach() at uvm_unmap_detach+0xc5 145 * uvm_map() at uvm_map+0x857 146 * uvm_km_valloc_try() at uvm_km_valloc_try+0x65 147 * uvm_pseg_get() at uvm_pseg_get+0x6f 148 * uvm_pagermapin() at uvm_pagermapin+0x45 149 * uvn_io() at uvn_io+0xcf 150 * uvn_get() at uvn_get+0x156 151 * uvm_fault_lower() at uvm_fault_lower+0x28a 152 * uvm_fault() at uvm_fault+0x1b3 153 * upageflttrap() at upageflttrap+0x62 154 */ 155 KERNEL_LOCK(); 156 mtx_enter(&uvm_pseg_lck); 157 158 pager_seg_restart: 159 /* Find first pseg that has room. */ 160 for (pseg = &psegs[0]; pseg != &psegs[PSEG_NUMSEGS]; pseg++) { 161 if (UVM_PSEG_FULL(pseg)) 162 continue; 163 164 if (pseg->start == 0) { 165 /* Need initialization. */ 166 uvm_pseg_init(pseg); 167 if (pseg->start == 0) 168 goto pager_seg_fail; 169 } 170 171 /* Keep index 0 reserved for pagedaemon. */ 172 if (pseg == &psegs[0] && curproc != uvm.pagedaemon_proc) 173 i = 1; 174 else 175 i = 0; 176 177 for (; i < MAX_PAGER_SEGS; i++) { 178 if (!UVM_PSEG_INUSE(pseg, i)) { 179 pseg->use |= 1 << i; 180 mtx_leave(&uvm_pseg_lck); 181 KERNEL_UNLOCK(); 182 return pseg->start + i * MAXBSIZE; 183 } 184 } 185 } 186 187 pager_seg_fail: 188 if ((flags & UVMPAGER_MAPIN_WAITOK) != 0) { 189 msleep_nsec(&psegs, &uvm_pseg_lck, PVM, "pagerseg", INFSLP); 190 goto pager_seg_restart; 191 } 192 193 mtx_leave(&uvm_pseg_lck); 194 KERNEL_UNLOCK(); 195 return 0; 196 } 197 198 /* 199 * Release a pager map segment. 200 * 201 * Caller does not lock. 202 * 203 * Deallocates pseg if it is no longer in use. 204 */ 205 void 206 uvm_pseg_release(vaddr_t segaddr) 207 { 208 int id; 209 struct uvm_pseg *pseg; 210 vaddr_t va = 0; 211 212 for (pseg = &psegs[0]; pseg != &psegs[PSEG_NUMSEGS]; pseg++) { 213 if (pseg->start <= segaddr && 214 segaddr < pseg->start + MAX_PAGER_SEGS * MAXBSIZE) 215 break; 216 } 217 KASSERT(pseg != &psegs[PSEG_NUMSEGS]); 218 219 id = (segaddr - pseg->start) / MAXBSIZE; 220 KASSERT(id >= 0 && id < MAX_PAGER_SEGS); 221 222 /* test for no remainder */ 223 KDASSERT(segaddr == pseg->start + id * MAXBSIZE); 224 225 mtx_enter(&uvm_pseg_lck); 226 227 KASSERT(UVM_PSEG_INUSE(pseg, id)); 228 229 pseg->use &= ~(1 << id); 230 wakeup(&psegs); 231 232 if (pseg != &psegs[0] && UVM_PSEG_EMPTY(pseg)) { 233 va = pseg->start; 234 pseg->start = 0; 235 } 236 237 mtx_leave(&uvm_pseg_lck); 238 239 if (va) { 240 km_free((void *)va, MAX_PAGER_SEGS * MAXBSIZE, 241 &kv_any, &kp_none); 242 } 243 } 244 245 /* 246 * uvm_pagermapin: map pages into KVA for I/O that needs mappings 247 * 248 * We basically just km_valloc a blank map entry to reserve the space in the 249 * kernel map and then use pmap_enter() to put the mappings in by hand. 250 */ 251 vaddr_t 252 uvm_pagermapin(struct vm_page **pps, int npages, int flags) 253 { 254 vaddr_t kva, cva; 255 vm_prot_t prot; 256 vsize_t size; 257 struct vm_page *pp; 258 259 prot = PROT_READ; 260 if (flags & UVMPAGER_MAPIN_READ) 261 prot |= PROT_WRITE; 262 size = ptoa(npages); 263 264 KASSERT(size <= MAXBSIZE); 265 266 kva = uvm_pseg_get(flags); 267 if (kva == 0) 268 return 0; 269 270 for (cva = kva ; size != 0 ; size -= PAGE_SIZE, cva += PAGE_SIZE) { 271 pp = *pps++; 272 KASSERT(pp); 273 KASSERT(pp->pg_flags & PG_BUSY); 274 /* Allow pmap_enter to fail. */ 275 if (pmap_enter(pmap_kernel(), cva, VM_PAGE_TO_PHYS(pp), 276 prot, PMAP_WIRED | PMAP_CANFAIL | prot) != 0) { 277 pmap_remove(pmap_kernel(), kva, cva); 278 pmap_update(pmap_kernel()); 279 uvm_pseg_release(kva); 280 return 0; 281 } 282 } 283 pmap_update(pmap_kernel()); 284 return kva; 285 } 286 287 /* 288 * uvm_pagermapout: remove KVA mapping 289 * 290 * We remove our mappings by hand and then remove the mapping. 291 */ 292 void 293 uvm_pagermapout(vaddr_t kva, int npages) 294 { 295 296 pmap_remove(pmap_kernel(), kva, kva + ((vsize_t)npages << PAGE_SHIFT)); 297 pmap_update(pmap_kernel()); 298 uvm_pseg_release(kva); 299 300 } 301 302 /* 303 * uvm_mk_pcluster 304 * 305 * generic "make 'pager put' cluster" function. a pager can either 306 * [1] set pgo_mk_pcluster to NULL (never cluster), [2] set it to this 307 * generic function, or [3] set it to a pager specific function. 308 * 309 * => caller must lock object _and_ pagequeues (since we need to look 310 * at active vs. inactive bits, etc.) 311 * => caller must make center page busy and write-protect it 312 * => we mark all cluster pages busy for the caller 313 * => the caller must unbusy all pages (and check wanted/released 314 * status if it drops the object lock) 315 * => flags: 316 * PGO_ALLPAGES: all pages in object are valid targets 317 * !PGO_ALLPAGES: use "lo" and "hi" to limit range of cluster 318 * PGO_DOACTCLUST: include active pages in cluster. 319 * PGO_FREE: set the PG_RELEASED bits on the cluster so they'll be freed 320 * in async io (caller must clean on error). 321 * NOTE: the caller should clear PG_CLEANCHK bits if PGO_DOACTCLUST. 322 * PG_CLEANCHK is only a hint, but clearing will help reduce 323 * the number of calls we make to the pmap layer. 324 */ 325 326 struct vm_page ** 327 uvm_mk_pcluster(struct uvm_object *uobj, struct vm_page **pps, int *npages, 328 struct vm_page *center, int flags, voff_t mlo, voff_t mhi) 329 { 330 struct vm_page **ppsp, *pclust; 331 voff_t lo, hi, curoff; 332 int center_idx, forward, incr; 333 334 /* 335 * center page should already be busy and write protected. XXX: 336 * suppose page is wired? if we lock, then a process could 337 * fault/block on it. if we don't lock, a process could write the 338 * pages in the middle of an I/O. (consider an msync()). let's 339 * lock it for now (better to delay than corrupt data?). 340 */ 341 /* get cluster boundaries, check sanity, and apply our limits as well.*/ 342 uobj->pgops->pgo_cluster(uobj, center->offset, &lo, &hi); 343 if ((flags & PGO_ALLPAGES) == 0) { 344 if (lo < mlo) 345 lo = mlo; 346 if (hi > mhi) 347 hi = mhi; 348 } 349 if ((hi - lo) >> PAGE_SHIFT > *npages) { /* pps too small, bail out! */ 350 pps[0] = center; 351 *npages = 1; 352 return pps; 353 } 354 355 /* now determine the center and attempt to cluster around the edges */ 356 center_idx = (center->offset - lo) >> PAGE_SHIFT; 357 pps[center_idx] = center; /* plug in the center page */ 358 ppsp = &pps[center_idx]; 359 *npages = 1; 360 361 /* 362 * attempt to cluster around the left [backward], and then 363 * the right side [forward]. 364 * 365 * note that for inactive pages (pages that have been deactivated) 366 * there are no valid mappings and PG_CLEAN should be up to date. 367 * [i.e. there is no need to query the pmap with pmap_is_modified 368 * since there are no mappings]. 369 */ 370 for (forward = 0 ; forward <= 1 ; forward++) { 371 incr = forward ? PAGE_SIZE : -PAGE_SIZE; 372 curoff = center->offset + incr; 373 for ( ;(forward == 0 && curoff >= lo) || 374 (forward && curoff < hi); 375 curoff += incr) { 376 377 pclust = uvm_pagelookup(uobj, curoff); /* lookup page */ 378 if (pclust == NULL) { 379 break; /* no page */ 380 } 381 /* handle active pages */ 382 /* NOTE: inactive pages don't have pmap mappings */ 383 if ((pclust->pg_flags & PQ_INACTIVE) == 0) { 384 if ((flags & PGO_DOACTCLUST) == 0) { 385 /* dont want mapped pages at all */ 386 break; 387 } 388 389 /* make sure "clean" bit is sync'd */ 390 if ((pclust->pg_flags & PG_CLEANCHK) == 0) { 391 if ((pclust->pg_flags & (PG_CLEAN|PG_BUSY)) 392 == PG_CLEAN && 393 pmap_is_modified(pclust)) 394 atomic_clearbits_int( 395 &pclust->pg_flags, 396 PG_CLEAN); 397 /* now checked */ 398 atomic_setbits_int(&pclust->pg_flags, 399 PG_CLEANCHK); 400 } 401 } 402 403 /* is page available for cleaning and does it need it */ 404 if ((pclust->pg_flags & (PG_CLEAN|PG_BUSY)) != 0) { 405 break; /* page is already clean or is busy */ 406 } 407 408 /* yes! enroll the page in our array */ 409 atomic_setbits_int(&pclust->pg_flags, PG_BUSY); 410 UVM_PAGE_OWN(pclust, "uvm_mk_pcluster"); 411 412 /* 413 * If we want to free after io is done, and we're 414 * async, set the released flag 415 */ 416 if ((flags & (PGO_FREE|PGO_SYNCIO)) == PGO_FREE) 417 atomic_setbits_int(&pclust->pg_flags, 418 PG_RELEASED); 419 420 /* XXX: protect wired page? see above comment. */ 421 pmap_page_protect(pclust, PROT_READ); 422 if (!forward) { 423 ppsp--; /* back up one page */ 424 *ppsp = pclust; 425 } else { 426 /* move forward one page */ 427 ppsp[*npages] = pclust; 428 } 429 (*npages)++; 430 } 431 } 432 433 /* 434 * done! return the cluster array to the caller!!! 435 */ 436 return ppsp; 437 } 438 439 /* 440 * uvm_pager_put: high level pageout routine 441 * 442 * we want to pageout page "pg" to backing store, clustering if 443 * possible. 444 * 445 * => page queues must be locked by caller 446 * => if page is not swap-backed, then "uobj" points to the object 447 * backing it. 448 * => if page is swap-backed, then "uobj" should be NULL. 449 * => "pg" should be PG_BUSY (by caller), and !PG_CLEAN 450 * for swap-backed memory, "pg" can be NULL if there is no page 451 * of interest [sometimes the case for the pagedaemon] 452 * => "ppsp_ptr" should point to an array of npages vm_page pointers 453 * for possible cluster building 454 * => flags (first two for non-swap-backed pages) 455 * PGO_ALLPAGES: all pages in uobj are valid targets 456 * PGO_DOACTCLUST: include "PQ_ACTIVE" pages as valid targets 457 * PGO_SYNCIO: do SYNC I/O (no async) 458 * PGO_PDFREECLUST: pagedaemon: drop cluster on successful I/O 459 * PGO_FREE: tell the aio daemon to free pages in the async case. 460 * => start/stop: if (uobj && !PGO_ALLPAGES) limit targets to this range 461 * if (!uobj) start is the (daddr_t) of the starting swapblk 462 * => return state: 463 * 1. we return the VM_PAGER status code of the pageout 464 * 2. we return with the page queues unlocked 465 * 3. on errors we always drop the cluster. thus, if we return 466 * !PEND, !OK, then the caller only has to worry about 467 * un-busying the main page (not the cluster pages). 468 * 4. on success, if !PGO_PDFREECLUST, we return the cluster 469 * with all pages busy (caller must un-busy and check 470 * wanted/released flags). 471 */ 472 int 473 uvm_pager_put(struct uvm_object *uobj, struct vm_page *pg, 474 struct vm_page ***ppsp_ptr, int *npages, int flags, 475 voff_t start, voff_t stop) 476 { 477 int result; 478 daddr_t swblk; 479 struct vm_page **ppsp = *ppsp_ptr; 480 481 /* 482 * note that uobj is null if we are doing a swap-backed pageout. 483 * note that uobj is !null if we are doing normal object pageout. 484 * note that the page queues must be locked to cluster. 485 */ 486 if (uobj) { /* if !swap-backed */ 487 /* 488 * attempt to build a cluster for pageout using its 489 * make-put-cluster function (if it has one). 490 */ 491 if (uobj->pgops->pgo_mk_pcluster) { 492 ppsp = uobj->pgops->pgo_mk_pcluster(uobj, ppsp, 493 npages, pg, flags, start, stop); 494 *ppsp_ptr = ppsp; /* update caller's pointer */ 495 } else { 496 ppsp[0] = pg; 497 *npages = 1; 498 } 499 500 swblk = 0; /* XXX: keep gcc happy */ 501 } else { 502 /* 503 * for swap-backed pageout, the caller (the pagedaemon) has 504 * already built the cluster for us. the starting swap 505 * block we are writing to has been passed in as "start." 506 * "pg" could be NULL if there is no page we are especially 507 * interested in (in which case the whole cluster gets dropped 508 * in the event of an error or a sync "done"). 509 */ 510 swblk = start; 511 /* ppsp and npages should be ok */ 512 } 513 514 /* now that we've clustered we can unlock the page queues */ 515 uvm_unlock_pageq(); 516 517 /* 518 * now attempt the I/O. if we have a failure and we are 519 * clustered, we will drop the cluster and try again. 520 */ 521 ReTry: 522 if (uobj) { 523 result = uobj->pgops->pgo_put(uobj, ppsp, *npages, flags); 524 } else { 525 /* XXX daddr_t -> int */ 526 result = uvm_swap_put(swblk, ppsp, *npages, flags); 527 } 528 529 /* 530 * we have attempted the I/O. 531 * 532 * if the I/O was a success then: 533 * if !PGO_PDFREECLUST, we return the cluster to the 534 * caller (who must un-busy all pages) 535 * else we un-busy cluster pages for the pagedaemon 536 * 537 * if I/O is pending (async i/o) then we return the pending code. 538 * [in this case the async i/o done function must clean up when 539 * i/o is done...] 540 */ 541 if (result == VM_PAGER_PEND || result == VM_PAGER_OK) { 542 if (result == VM_PAGER_OK && (flags & PGO_PDFREECLUST)) { 543 /* drop cluster */ 544 if (*npages > 1 || pg == NULL) 545 uvm_pager_dropcluster(uobj, pg, ppsp, npages, 546 PGO_PDFREECLUST); 547 } 548 return (result); 549 } 550 551 /* 552 * a pager error occurred (even after dropping the cluster, if there 553 * was one). give up! the caller only has one page ("pg") 554 * to worry about. 555 */ 556 if (*npages > 1 || pg == NULL) { 557 uvm_pager_dropcluster(uobj, pg, ppsp, npages, PGO_REALLOCSWAP); 558 559 /* 560 * for failed swap-backed pageouts with a "pg", 561 * we need to reset pg's swslot to either: 562 * "swblk" (for transient errors, so we can retry), 563 * or 0 (for hard errors). 564 */ 565 if (uobj == NULL && pg != NULL) { 566 /* XXX daddr_t -> int */ 567 int nswblk = (result == VM_PAGER_AGAIN) ? swblk : 0; 568 if (pg->pg_flags & PQ_ANON) { 569 rw_enter(pg->uanon->an_lock, RW_WRITE); 570 pg->uanon->an_swslot = nswblk; 571 rw_exit(pg->uanon->an_lock); 572 } else { 573 rw_enter(pg->uobject->vmobjlock, RW_WRITE); 574 uao_set_swslot(pg->uobject, 575 pg->offset >> PAGE_SHIFT, 576 nswblk); 577 rw_exit(pg->uobject->vmobjlock); 578 } 579 } 580 if (result == VM_PAGER_AGAIN) { 581 /* 582 * for transient failures, free all the swslots that 583 * we're not going to retry with. 584 */ 585 if (uobj == NULL) { 586 if (pg) { 587 /* XXX daddr_t -> int */ 588 uvm_swap_free(swblk + 1, *npages - 1); 589 } else { 590 /* XXX daddr_t -> int */ 591 uvm_swap_free(swblk, *npages); 592 } 593 } 594 if (pg) { 595 ppsp[0] = pg; 596 *npages = 1; 597 goto ReTry; 598 } 599 } else if (uobj == NULL) { 600 /* 601 * for hard errors on swap-backed pageouts, 602 * mark the swslots as bad. note that we do not 603 * free swslots that we mark bad. 604 */ 605 /* XXX daddr_t -> int */ 606 uvm_swap_markbad(swblk, *npages); 607 } 608 } 609 610 /* 611 * a pager error occurred (even after dropping the cluster, if there 612 * was one). give up! the caller only has one page ("pg") 613 * to worry about. 614 */ 615 616 return result; 617 } 618 619 /* 620 * uvm_pager_dropcluster: drop a cluster we have built (because we 621 * got an error, or, if PGO_PDFREECLUST we are un-busying the 622 * cluster pages on behalf of the pagedaemon). 623 * 624 * => uobj, if non-null, is a non-swap-backed object 625 * => page queues are not locked 626 * => pg is our page of interest (the one we clustered around, can be null) 627 * => ppsp/npages is our current cluster 628 * => flags: PGO_PDFREECLUST: pageout was a success: un-busy cluster 629 * pages on behalf of the pagedaemon. 630 * PGO_REALLOCSWAP: drop previously allocated swap slots for 631 * clustered swap-backed pages (except for "pg" if !NULL) 632 * "swblk" is the start of swap alloc (e.g. for ppsp[0]) 633 * [only meaningful if swap-backed (uobj == NULL)] 634 */ 635 636 void 637 uvm_pager_dropcluster(struct uvm_object *uobj, struct vm_page *pg, 638 struct vm_page **ppsp, int *npages, int flags) 639 { 640 int lcv; 641 642 KASSERT(uobj == NULL || rw_write_held(uobj->vmobjlock)); 643 644 /* drop all pages but "pg" */ 645 for (lcv = 0 ; lcv < *npages ; lcv++) { 646 /* skip "pg" or empty slot */ 647 if (ppsp[lcv] == pg || ppsp[lcv] == NULL) 648 continue; 649 650 /* 651 * Note that PQ_ANON bit can't change as long as we are holding 652 * the PG_BUSY bit (so there is no need to lock the page 653 * queues to test it). 654 */ 655 if (!uobj) { 656 if (ppsp[lcv]->pg_flags & PQ_ANON) { 657 rw_enter(ppsp[lcv]->uanon->an_lock, RW_WRITE); 658 if (flags & PGO_REALLOCSWAP) 659 /* zap swap block */ 660 ppsp[lcv]->uanon->an_swslot = 0; 661 } else { 662 rw_enter(ppsp[lcv]->uobject->vmobjlock, 663 RW_WRITE); 664 if (flags & PGO_REALLOCSWAP) 665 uao_set_swslot(ppsp[lcv]->uobject, 666 ppsp[lcv]->offset >> PAGE_SHIFT, 0); 667 } 668 } 669 670 /* did someone want the page while we had it busy-locked? */ 671 if (ppsp[lcv]->pg_flags & PG_WANTED) { 672 wakeup(ppsp[lcv]); 673 } 674 675 /* if page was released, release it. otherwise un-busy it */ 676 if (ppsp[lcv]->pg_flags & PG_RELEASED && 677 ppsp[lcv]->pg_flags & PQ_ANON) { 678 /* so that anfree will free */ 679 atomic_clearbits_int(&ppsp[lcv]->pg_flags, 680 PG_BUSY); 681 UVM_PAGE_OWN(ppsp[lcv], NULL); 682 683 /* kills anon and frees pg */ 684 uvm_anon_release(ppsp[lcv]->uanon); 685 686 continue; 687 } else { 688 /* 689 * if we were planning on async io then we would 690 * have PG_RELEASED set, clear that with the others. 691 */ 692 atomic_clearbits_int(&ppsp[lcv]->pg_flags, 693 PG_BUSY|PG_WANTED|PG_FAKE|PG_RELEASED); 694 UVM_PAGE_OWN(ppsp[lcv], NULL); 695 } 696 697 /* 698 * if we are operating on behalf of the pagedaemon and we 699 * had a successful pageout update the page! 700 */ 701 if (flags & PGO_PDFREECLUST) { 702 pmap_clear_reference(ppsp[lcv]); 703 pmap_clear_modify(ppsp[lcv]); 704 atomic_setbits_int(&ppsp[lcv]->pg_flags, PG_CLEAN); 705 } 706 707 /* if anonymous cluster, unlock object and move on */ 708 if (!uobj) { 709 if (ppsp[lcv]->pg_flags & PQ_ANON) 710 rw_exit(ppsp[lcv]->uanon->an_lock); 711 else 712 rw_exit(ppsp[lcv]->uobject->vmobjlock); 713 } 714 } 715 } 716 717 /* 718 * interrupt-context iodone handler for single-buf i/os 719 * or the top-level buf of a nested-buf i/o. 720 * 721 * => must be at splbio(). 722 */ 723 724 void 725 uvm_aio_biodone(struct buf *bp) 726 { 727 splassert(IPL_BIO); 728 729 /* reset b_iodone for when this is a single-buf i/o. */ 730 bp->b_iodone = uvm_aio_aiodone; 731 732 mtx_enter(&uvm.aiodoned_lock); 733 TAILQ_INSERT_TAIL(&uvm.aio_done, bp, b_freelist); 734 wakeup(&uvm.aiodoned); 735 mtx_leave(&uvm.aiodoned_lock); 736 } 737 738 /* 739 * uvm_aio_aiodone: do iodone processing for async i/os. 740 * this should be called in thread context, not interrupt context. 741 */ 742 void 743 uvm_aio_aiodone(struct buf *bp) 744 { 745 int npages = bp->b_bufsize >> PAGE_SHIFT; 746 struct vm_page *pg, *pgs[MAXPHYS >> PAGE_SHIFT]; 747 struct uvm_object *uobj; 748 int i, error; 749 boolean_t write, swap; 750 751 KASSERT(npages <= MAXPHYS >> PAGE_SHIFT); 752 splassert(IPL_BIO); 753 754 error = (bp->b_flags & B_ERROR) ? (bp->b_error ? bp->b_error : EIO) : 0; 755 write = (bp->b_flags & B_READ) == 0; 756 757 uobj = NULL; 758 for (i = 0; i < npages; i++) 759 pgs[i] = uvm_atopg((vaddr_t)bp->b_data + 760 ((vsize_t)i << PAGE_SHIFT)); 761 uvm_pagermapout((vaddr_t)bp->b_data, npages); 762 #ifdef UVM_SWAP_ENCRYPT 763 /* 764 * XXX - assumes that we only get ASYNC writes. used to be above. 765 */ 766 if (pgs[0]->pg_flags & PQ_ENCRYPT) { 767 uvm_swap_freepages(pgs, npages); 768 goto freed; 769 } 770 #endif /* UVM_SWAP_ENCRYPT */ 771 for (i = 0; i < npages; i++) { 772 pg = pgs[i]; 773 774 if (i == 0) { 775 swap = (pg->pg_flags & PQ_SWAPBACKED) != 0; 776 if (!swap) { 777 uobj = pg->uobject; 778 rw_enter(uobj->vmobjlock, RW_WRITE); 779 } 780 } 781 KASSERT(swap || pg->uobject == uobj); 782 783 /* 784 * if this is a read and we got an error, mark the pages 785 * PG_RELEASED so that uvm_page_unbusy() will free them. 786 */ 787 if (!write && error) { 788 atomic_setbits_int(&pg->pg_flags, PG_RELEASED); 789 continue; 790 } 791 KASSERT(!write || (pgs[i]->pg_flags & PG_FAKE) == 0); 792 793 /* 794 * if this is a read and the page is PG_FAKE, 795 * or this was a successful write, 796 * mark the page PG_CLEAN and not PG_FAKE. 797 */ 798 if ((pgs[i]->pg_flags & PG_FAKE) || (write && error != ENOMEM)) { 799 pmap_clear_reference(pgs[i]); 800 pmap_clear_modify(pgs[i]); 801 atomic_setbits_int(&pgs[i]->pg_flags, PG_CLEAN); 802 atomic_clearbits_int(&pgs[i]->pg_flags, PG_FAKE); 803 } 804 } 805 uvm_page_unbusy(pgs, npages); 806 if (!swap) { 807 rw_exit(uobj->vmobjlock); 808 } 809 810 #ifdef UVM_SWAP_ENCRYPT 811 freed: 812 #endif 813 pool_put(&bufpool, bp); 814 } 815