1 /* $OpenBSD: uvm_pager.c,v 1.84 2022/07/17 17:59:35 kettenis Exp $ */ 2 /* $NetBSD: uvm_pager.c,v 1.36 2000/11/27 18:26:41 chs Exp $ */ 3 4 /* 5 * Copyright (c) 1997 Charles D. Cranor and Washington University. 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 * 28 * from: Id: uvm_pager.c,v 1.1.2.23 1998/02/02 20:38:06 chuck Exp 29 */ 30 31 /* 32 * uvm_pager.c: generic functions used to assist the pagers. 33 */ 34 35 #include <sys/param.h> 36 #include <sys/systm.h> 37 #include <sys/malloc.h> 38 #include <sys/pool.h> 39 #include <sys/buf.h> 40 #include <sys/atomic.h> 41 42 #include <uvm/uvm.h> 43 44 struct pool *uvm_aiobuf_pool; 45 46 const struct uvm_pagerops *uvmpagerops[] = { 47 &aobj_pager, 48 &uvm_deviceops, 49 &uvm_vnodeops, 50 }; 51 52 /* 53 * the pager map: provides KVA for I/O 54 * 55 * Each uvm_pseg has room for MAX_PAGERMAP_SEGS pager io space of 56 * MAXBSIZE bytes. 57 * 58 * The number of uvm_pseg instances is dynamic using an array segs. 59 * At most UVM_PSEG_COUNT instances can exist. 60 * 61 * psegs[0/1] always exist (so that the pager can always map in pages). 62 * psegs[0/1] element 0 are always reserved for the pagedaemon. 63 * 64 * Any other pseg is automatically created when no space is available 65 * and automatically destroyed when it is no longer in use. 66 */ 67 #define MAX_PAGER_SEGS 16 68 #define PSEG_NUMSEGS (PAGER_MAP_SIZE / MAX_PAGER_SEGS / MAXBSIZE) 69 struct uvm_pseg { 70 /* Start of virtual space; 0 if not inited. */ 71 vaddr_t start; 72 /* Bitmap of the segments in use in this pseg. */ 73 int use; 74 }; 75 struct mutex uvm_pseg_lck; 76 struct uvm_pseg psegs[PSEG_NUMSEGS]; 77 78 #define UVM_PSEG_FULL(pseg) ((pseg)->use == (1 << MAX_PAGER_SEGS) - 1) 79 #define UVM_PSEG_EMPTY(pseg) ((pseg)->use == 0) 80 #define UVM_PSEG_INUSE(pseg,id) (((pseg)->use & (1 << (id))) != 0) 81 82 void uvm_pseg_init(struct uvm_pseg *); 83 vaddr_t uvm_pseg_get(int); 84 void uvm_pseg_release(vaddr_t); 85 86 /* 87 * uvm_pager_init: init pagers (at boot time) 88 */ 89 void 90 uvm_pager_init(void) 91 { 92 int lcv; 93 94 /* init pager map */ 95 uvm_pseg_init(&psegs[0]); 96 uvm_pseg_init(&psegs[1]); 97 mtx_init(&uvm_pseg_lck, IPL_VM); 98 99 /* init ASYNC I/O queue */ 100 TAILQ_INIT(&uvm.aio_done); 101 102 /* call pager init functions */ 103 for (lcv = 0 ; lcv < sizeof(uvmpagerops)/sizeof(struct uvm_pagerops *); 104 lcv++) { 105 if (uvmpagerops[lcv]->pgo_init) 106 uvmpagerops[lcv]->pgo_init(); 107 } 108 } 109 110 /* 111 * Initialize a uvm_pseg. 112 * 113 * May fail, in which case seg->start == 0. 114 * 115 * Caller locks uvm_pseg_lck. 116 */ 117 void 118 uvm_pseg_init(struct uvm_pseg *pseg) 119 { 120 KASSERT(pseg->start == 0); 121 KASSERT(pseg->use == 0); 122 pseg->start = (vaddr_t)km_alloc(MAX_PAGER_SEGS * MAXBSIZE, 123 &kv_any, &kp_none, &kd_trylock); 124 } 125 126 /* 127 * Acquire a pager map segment. 128 * 129 * Returns a vaddr for paging. 0 on failure. 130 * 131 * Caller does not lock. 132 */ 133 vaddr_t 134 uvm_pseg_get(int flags) 135 { 136 int i; 137 struct uvm_pseg *pseg; 138 139 /* 140 * XXX Prevent lock ordering issue in uvm_unmap_detach(). A real 141 * fix would be to move the KERNEL_LOCK() out of uvm_unmap_detach(). 142 * 143 * witness_checkorder() at witness_checkorder+0xba0 144 * __mp_lock() at __mp_lock+0x5f 145 * uvm_unmap_detach() at uvm_unmap_detach+0xc5 146 * uvm_map() at uvm_map+0x857 147 * uvm_km_valloc_try() at uvm_km_valloc_try+0x65 148 * uvm_pseg_get() at uvm_pseg_get+0x6f 149 * uvm_pagermapin() at uvm_pagermapin+0x45 150 * uvn_io() at uvn_io+0xcf 151 * uvn_get() at uvn_get+0x156 152 * uvm_fault_lower() at uvm_fault_lower+0x28a 153 * uvm_fault() at uvm_fault+0x1b3 154 * upageflttrap() at upageflttrap+0x62 155 */ 156 KERNEL_LOCK(); 157 mtx_enter(&uvm_pseg_lck); 158 159 pager_seg_restart: 160 /* Find first pseg that has room. */ 161 for (pseg = &psegs[0]; pseg != &psegs[PSEG_NUMSEGS]; pseg++) { 162 if (UVM_PSEG_FULL(pseg)) 163 continue; 164 165 if (pseg->start == 0) { 166 /* Need initialization. */ 167 uvm_pseg_init(pseg); 168 if (pseg->start == 0) 169 goto pager_seg_fail; 170 } 171 172 /* Keep indexes 0,1 reserved for pagedaemon. */ 173 if ((pseg == &psegs[0] || pseg == &psegs[1]) && 174 (curproc != uvm.pagedaemon_proc)) 175 i = 2; 176 else 177 i = 0; 178 179 for (; i < MAX_PAGER_SEGS; i++) { 180 if (!UVM_PSEG_INUSE(pseg, i)) { 181 pseg->use |= 1 << i; 182 mtx_leave(&uvm_pseg_lck); 183 KERNEL_UNLOCK(); 184 return pseg->start + i * MAXBSIZE; 185 } 186 } 187 } 188 189 pager_seg_fail: 190 if ((flags & UVMPAGER_MAPIN_WAITOK) != 0) { 191 msleep_nsec(&psegs, &uvm_pseg_lck, PVM, "pagerseg", INFSLP); 192 goto pager_seg_restart; 193 } 194 195 mtx_leave(&uvm_pseg_lck); 196 KERNEL_UNLOCK(); 197 return 0; 198 } 199 200 /* 201 * Release a pager map segment. 202 * 203 * Caller does not lock. 204 * 205 * Deallocates pseg if it is no longer in use. 206 */ 207 void 208 uvm_pseg_release(vaddr_t segaddr) 209 { 210 int id; 211 struct uvm_pseg *pseg; 212 vaddr_t va = 0; 213 214 for (pseg = &psegs[0]; pseg != &psegs[PSEG_NUMSEGS]; pseg++) { 215 if (pseg->start <= segaddr && 216 segaddr < pseg->start + MAX_PAGER_SEGS * MAXBSIZE) 217 break; 218 } 219 KASSERT(pseg != &psegs[PSEG_NUMSEGS]); 220 221 id = (segaddr - pseg->start) / MAXBSIZE; 222 KASSERT(id >= 0 && id < MAX_PAGER_SEGS); 223 224 /* test for no remainder */ 225 KDASSERT(segaddr == pseg->start + id * MAXBSIZE); 226 227 mtx_enter(&uvm_pseg_lck); 228 229 KASSERT(UVM_PSEG_INUSE(pseg, id)); 230 231 pseg->use &= ~(1 << id); 232 wakeup(&psegs); 233 234 if ((pseg != &psegs[0] && pseg != &psegs[1]) && UVM_PSEG_EMPTY(pseg)) { 235 va = pseg->start; 236 pseg->start = 0; 237 } 238 239 mtx_leave(&uvm_pseg_lck); 240 241 if (va) { 242 km_free((void *)va, MAX_PAGER_SEGS * MAXBSIZE, 243 &kv_any, &kp_none); 244 } 245 } 246 247 /* 248 * uvm_pagermapin: map pages into KVA for I/O that needs mappings 249 * 250 * We basically just km_valloc a blank map entry to reserve the space in the 251 * kernel map and then use pmap_enter() to put the mappings in by hand. 252 */ 253 vaddr_t 254 uvm_pagermapin(struct vm_page **pps, int npages, int flags) 255 { 256 vaddr_t kva, cva; 257 vm_prot_t prot; 258 vsize_t size; 259 struct vm_page *pp; 260 261 prot = PROT_READ; 262 if (flags & UVMPAGER_MAPIN_READ) 263 prot |= PROT_WRITE; 264 size = ptoa(npages); 265 266 KASSERT(size <= MAXBSIZE); 267 268 kva = uvm_pseg_get(flags); 269 if (kva == 0) 270 return 0; 271 272 for (cva = kva ; size != 0 ; size -= PAGE_SIZE, cva += PAGE_SIZE) { 273 pp = *pps++; 274 KASSERT(pp); 275 KASSERT(pp->pg_flags & PG_BUSY); 276 /* Allow pmap_enter to fail. */ 277 if (pmap_enter(pmap_kernel(), cva, VM_PAGE_TO_PHYS(pp), 278 prot, PMAP_WIRED | PMAP_CANFAIL | prot) != 0) { 279 pmap_remove(pmap_kernel(), kva, cva); 280 pmap_update(pmap_kernel()); 281 uvm_pseg_release(kva); 282 return 0; 283 } 284 } 285 pmap_update(pmap_kernel()); 286 return kva; 287 } 288 289 /* 290 * uvm_pagermapout: remove KVA mapping 291 * 292 * We remove our mappings by hand and then remove the mapping. 293 */ 294 void 295 uvm_pagermapout(vaddr_t kva, int npages) 296 { 297 298 pmap_remove(pmap_kernel(), kva, kva + ((vsize_t)npages << PAGE_SHIFT)); 299 pmap_update(pmap_kernel()); 300 uvm_pseg_release(kva); 301 302 } 303 304 /* 305 * uvm_mk_pcluster 306 * 307 * generic "make 'pager put' cluster" function. a pager can either 308 * [1] set pgo_mk_pcluster to NULL (never cluster), [2] set it to this 309 * generic function, or [3] set it to a pager specific function. 310 * 311 * => caller must lock object _and_ pagequeues (since we need to look 312 * at active vs. inactive bits, etc.) 313 * => caller must make center page busy and write-protect it 314 * => we mark all cluster pages busy for the caller 315 * => the caller must unbusy all pages (and check wanted/released 316 * status if it drops the object lock) 317 * => flags: 318 * PGO_ALLPAGES: all pages in object are valid targets 319 * !PGO_ALLPAGES: use "lo" and "hi" to limit range of cluster 320 * PGO_DOACTCLUST: include active pages in cluster. 321 * PGO_FREE: set the PG_RELEASED bits on the cluster so they'll be freed 322 * in async io (caller must clean on error). 323 * NOTE: the caller should clear PG_CLEANCHK bits if PGO_DOACTCLUST. 324 * PG_CLEANCHK is only a hint, but clearing will help reduce 325 * the number of calls we make to the pmap layer. 326 */ 327 328 struct vm_page ** 329 uvm_mk_pcluster(struct uvm_object *uobj, struct vm_page **pps, int *npages, 330 struct vm_page *center, int flags, voff_t mlo, voff_t mhi) 331 { 332 struct vm_page **ppsp, *pclust; 333 voff_t lo, hi, curoff; 334 int center_idx, forward, incr; 335 336 /* 337 * center page should already be busy and write protected. XXX: 338 * suppose page is wired? if we lock, then a process could 339 * fault/block on it. if we don't lock, a process could write the 340 * pages in the middle of an I/O. (consider an msync()). let's 341 * lock it for now (better to delay than corrupt data?). 342 */ 343 /* get cluster boundaries, check sanity, and apply our limits as well.*/ 344 uobj->pgops->pgo_cluster(uobj, center->offset, &lo, &hi); 345 if ((flags & PGO_ALLPAGES) == 0) { 346 if (lo < mlo) 347 lo = mlo; 348 if (hi > mhi) 349 hi = mhi; 350 } 351 if ((hi - lo) >> PAGE_SHIFT > *npages) { /* pps too small, bail out! */ 352 pps[0] = center; 353 *npages = 1; 354 return pps; 355 } 356 357 /* now determine the center and attempt to cluster around the edges */ 358 center_idx = (center->offset - lo) >> PAGE_SHIFT; 359 pps[center_idx] = center; /* plug in the center page */ 360 ppsp = &pps[center_idx]; 361 *npages = 1; 362 363 /* 364 * attempt to cluster around the left [backward], and then 365 * the right side [forward]. 366 * 367 * note that for inactive pages (pages that have been deactivated) 368 * there are no valid mappings and PG_CLEAN should be up to date. 369 * [i.e. there is no need to query the pmap with pmap_is_modified 370 * since there are no mappings]. 371 */ 372 for (forward = 0 ; forward <= 1 ; forward++) { 373 incr = forward ? PAGE_SIZE : -PAGE_SIZE; 374 curoff = center->offset + incr; 375 for ( ;(forward == 0 && curoff >= lo) || 376 (forward && curoff < hi); 377 curoff += incr) { 378 379 pclust = uvm_pagelookup(uobj, curoff); /* lookup page */ 380 if (pclust == NULL) { 381 break; /* no page */ 382 } 383 /* handle active pages */ 384 /* NOTE: inactive pages don't have pmap mappings */ 385 if ((pclust->pg_flags & PQ_INACTIVE) == 0) { 386 if ((flags & PGO_DOACTCLUST) == 0) { 387 /* dont want mapped pages at all */ 388 break; 389 } 390 391 /* make sure "clean" bit is sync'd */ 392 if ((pclust->pg_flags & PG_CLEANCHK) == 0) { 393 if ((pclust->pg_flags & (PG_CLEAN|PG_BUSY)) 394 == PG_CLEAN && 395 pmap_is_modified(pclust)) 396 atomic_clearbits_int( 397 &pclust->pg_flags, 398 PG_CLEAN); 399 /* now checked */ 400 atomic_setbits_int(&pclust->pg_flags, 401 PG_CLEANCHK); 402 } 403 } 404 405 /* is page available for cleaning and does it need it */ 406 if ((pclust->pg_flags & (PG_CLEAN|PG_BUSY)) != 0) { 407 break; /* page is already clean or is busy */ 408 } 409 410 /* yes! enroll the page in our array */ 411 atomic_setbits_int(&pclust->pg_flags, PG_BUSY); 412 UVM_PAGE_OWN(pclust, "uvm_mk_pcluster"); 413 414 /* 415 * If we want to free after io is done, and we're 416 * async, set the released flag 417 */ 418 if ((flags & (PGO_FREE|PGO_SYNCIO)) == PGO_FREE) 419 atomic_setbits_int(&pclust->pg_flags, 420 PG_RELEASED); 421 422 /* XXX: protect wired page? see above comment. */ 423 pmap_page_protect(pclust, PROT_READ); 424 if (!forward) { 425 ppsp--; /* back up one page */ 426 *ppsp = pclust; 427 } else { 428 /* move forward one page */ 429 ppsp[*npages] = pclust; 430 } 431 (*npages)++; 432 } 433 } 434 435 /* 436 * done! return the cluster array to the caller!!! 437 */ 438 return ppsp; 439 } 440 441 /* 442 * uvm_pager_put: high level pageout routine 443 * 444 * we want to pageout page "pg" to backing store, clustering if 445 * possible. 446 * 447 * => page queues must be locked by caller 448 * => if page is not swap-backed, then "uobj" points to the object 449 * backing it. 450 * => if page is swap-backed, then "uobj" should be NULL. 451 * => "pg" should be PG_BUSY (by caller), and !PG_CLEAN 452 * for swap-backed memory, "pg" can be NULL if there is no page 453 * of interest [sometimes the case for the pagedaemon] 454 * => "ppsp_ptr" should point to an array of npages vm_page pointers 455 * for possible cluster building 456 * => flags (first two for non-swap-backed pages) 457 * PGO_ALLPAGES: all pages in uobj are valid targets 458 * PGO_DOACTCLUST: include "PQ_ACTIVE" pages as valid targets 459 * PGO_SYNCIO: do SYNC I/O (no async) 460 * PGO_PDFREECLUST: pagedaemon: drop cluster on successful I/O 461 * PGO_FREE: tell the aio daemon to free pages in the async case. 462 * => start/stop: if (uobj && !PGO_ALLPAGES) limit targets to this range 463 * if (!uobj) start is the (daddr_t) of the starting swapblk 464 * => return state: 465 * 1. we return the VM_PAGER status code of the pageout 466 * 2. we return with the page queues unlocked 467 * 3. on errors we always drop the cluster. thus, if we return 468 * !PEND, !OK, then the caller only has to worry about 469 * un-busying the main page (not the cluster pages). 470 * 4. on success, if !PGO_PDFREECLUST, we return the cluster 471 * with all pages busy (caller must un-busy and check 472 * wanted/released flags). 473 */ 474 int 475 uvm_pager_put(struct uvm_object *uobj, struct vm_page *pg, 476 struct vm_page ***ppsp_ptr, int *npages, int flags, 477 voff_t start, voff_t stop) 478 { 479 int result; 480 daddr_t swblk; 481 struct vm_page **ppsp = *ppsp_ptr; 482 483 /* 484 * note that uobj is null if we are doing a swap-backed pageout. 485 * note that uobj is !null if we are doing normal object pageout. 486 * note that the page queues must be locked to cluster. 487 */ 488 if (uobj) { /* if !swap-backed */ 489 /* 490 * attempt to build a cluster for pageout using its 491 * make-put-cluster function (if it has one). 492 */ 493 if (uobj->pgops->pgo_mk_pcluster) { 494 ppsp = uobj->pgops->pgo_mk_pcluster(uobj, ppsp, 495 npages, pg, flags, start, stop); 496 *ppsp_ptr = ppsp; /* update caller's pointer */ 497 } else { 498 ppsp[0] = pg; 499 *npages = 1; 500 } 501 502 swblk = 0; /* XXX: keep gcc happy */ 503 } else { 504 /* 505 * for swap-backed pageout, the caller (the pagedaemon) has 506 * already built the cluster for us. the starting swap 507 * block we are writing to has been passed in as "start." 508 * "pg" could be NULL if there is no page we are especially 509 * interested in (in which case the whole cluster gets dropped 510 * in the event of an error or a sync "done"). 511 */ 512 swblk = start; 513 /* ppsp and npages should be ok */ 514 } 515 516 /* now that we've clustered we can unlock the page queues */ 517 uvm_unlock_pageq(); 518 519 /* 520 * now attempt the I/O. if we have a failure and we are 521 * clustered, we will drop the cluster and try again. 522 */ 523 ReTry: 524 if (uobj) { 525 result = uobj->pgops->pgo_put(uobj, ppsp, *npages, flags); 526 } else { 527 /* XXX daddr_t -> int */ 528 result = uvm_swap_put(swblk, ppsp, *npages, flags); 529 } 530 531 /* 532 * we have attempted the I/O. 533 * 534 * if the I/O was a success then: 535 * if !PGO_PDFREECLUST, we return the cluster to the 536 * caller (who must un-busy all pages) 537 * else we un-busy cluster pages for the pagedaemon 538 * 539 * if I/O is pending (async i/o) then we return the pending code. 540 * [in this case the async i/o done function must clean up when 541 * i/o is done...] 542 */ 543 if (result == VM_PAGER_PEND || result == VM_PAGER_OK) { 544 if (result == VM_PAGER_OK && (flags & PGO_PDFREECLUST)) { 545 /* drop cluster */ 546 if (*npages > 1 || pg == NULL) 547 uvm_pager_dropcluster(uobj, pg, ppsp, npages, 548 PGO_PDFREECLUST); 549 } 550 return (result); 551 } 552 553 /* 554 * a pager error occurred (even after dropping the cluster, if there 555 * was one). give up! the caller only has one page ("pg") 556 * to worry about. 557 */ 558 if (*npages > 1 || pg == NULL) { 559 uvm_pager_dropcluster(uobj, pg, ppsp, npages, PGO_REALLOCSWAP); 560 561 /* 562 * for failed swap-backed pageouts with a "pg", 563 * we need to reset pg's swslot to either: 564 * "swblk" (for transient errors, so we can retry), 565 * or 0 (for hard errors). 566 */ 567 if (uobj == NULL && pg != NULL) { 568 /* XXX daddr_t -> int */ 569 int nswblk = (result == VM_PAGER_AGAIN) ? swblk : 0; 570 if (pg->pg_flags & PQ_ANON) { 571 rw_enter(pg->uanon->an_lock, RW_WRITE); 572 pg->uanon->an_swslot = nswblk; 573 rw_exit(pg->uanon->an_lock); 574 } else { 575 rw_enter(pg->uobject->vmobjlock, RW_WRITE); 576 uao_set_swslot(pg->uobject, 577 pg->offset >> PAGE_SHIFT, 578 nswblk); 579 rw_exit(pg->uobject->vmobjlock); 580 } 581 } 582 if (result == VM_PAGER_AGAIN) { 583 /* 584 * for transient failures, free all the swslots that 585 * we're not going to retry with. 586 */ 587 if (uobj == NULL) { 588 if (pg) { 589 /* XXX daddr_t -> int */ 590 uvm_swap_free(swblk + 1, *npages - 1); 591 } else { 592 /* XXX daddr_t -> int */ 593 uvm_swap_free(swblk, *npages); 594 } 595 } 596 if (pg) { 597 ppsp[0] = pg; 598 *npages = 1; 599 goto ReTry; 600 } 601 } else if (uobj == NULL) { 602 /* 603 * for hard errors on swap-backed pageouts, 604 * mark the swslots as bad. note that we do not 605 * free swslots that we mark bad. 606 */ 607 /* XXX daddr_t -> int */ 608 uvm_swap_markbad(swblk, *npages); 609 } 610 } 611 612 /* 613 * a pager error occurred (even after dropping the cluster, if there 614 * was one). give up! the caller only has one page ("pg") 615 * to worry about. 616 */ 617 618 return result; 619 } 620 621 /* 622 * uvm_pager_dropcluster: drop a cluster we have built (because we 623 * got an error, or, if PGO_PDFREECLUST we are un-busying the 624 * cluster pages on behalf of the pagedaemon). 625 * 626 * => uobj, if non-null, is a non-swap-backed object 627 * => page queues are not locked 628 * => pg is our page of interest (the one we clustered around, can be null) 629 * => ppsp/npages is our current cluster 630 * => flags: PGO_PDFREECLUST: pageout was a success: un-busy cluster 631 * pages on behalf of the pagedaemon. 632 * PGO_REALLOCSWAP: drop previously allocated swap slots for 633 * clustered swap-backed pages (except for "pg" if !NULL) 634 * "swblk" is the start of swap alloc (e.g. for ppsp[0]) 635 * [only meaningful if swap-backed (uobj == NULL)] 636 */ 637 638 void 639 uvm_pager_dropcluster(struct uvm_object *uobj, struct vm_page *pg, 640 struct vm_page **ppsp, int *npages, int flags) 641 { 642 int lcv; 643 644 KASSERT(uobj == NULL || rw_write_held(uobj->vmobjlock)); 645 646 /* drop all pages but "pg" */ 647 for (lcv = 0 ; lcv < *npages ; lcv++) { 648 /* skip "pg" or empty slot */ 649 if (ppsp[lcv] == pg || ppsp[lcv] == NULL) 650 continue; 651 652 /* 653 * Note that PQ_ANON bit can't change as long as we are holding 654 * the PG_BUSY bit (so there is no need to lock the page 655 * queues to test it). 656 */ 657 if (!uobj) { 658 if (ppsp[lcv]->pg_flags & PQ_ANON) { 659 rw_enter(ppsp[lcv]->uanon->an_lock, RW_WRITE); 660 if (flags & PGO_REALLOCSWAP) 661 /* zap swap block */ 662 ppsp[lcv]->uanon->an_swslot = 0; 663 } else { 664 rw_enter(ppsp[lcv]->uobject->vmobjlock, 665 RW_WRITE); 666 if (flags & PGO_REALLOCSWAP) 667 uao_set_swslot(ppsp[lcv]->uobject, 668 ppsp[lcv]->offset >> PAGE_SHIFT, 0); 669 } 670 } 671 672 /* did someone want the page while we had it busy-locked? */ 673 if (ppsp[lcv]->pg_flags & PG_WANTED) { 674 wakeup(ppsp[lcv]); 675 } 676 677 /* if page was released, release it. otherwise un-busy it */ 678 if (ppsp[lcv]->pg_flags & PG_RELEASED && 679 ppsp[lcv]->pg_flags & PQ_ANON) { 680 /* kills anon and frees pg */ 681 uvm_anon_release(ppsp[lcv]->uanon); 682 continue; 683 } else { 684 /* 685 * if we were planning on async io then we would 686 * have PG_RELEASED set, clear that with the others. 687 */ 688 atomic_clearbits_int(&ppsp[lcv]->pg_flags, 689 PG_BUSY|PG_WANTED|PG_FAKE|PG_RELEASED); 690 UVM_PAGE_OWN(ppsp[lcv], NULL); 691 } 692 693 /* 694 * if we are operating on behalf of the pagedaemon and we 695 * had a successful pageout update the page! 696 */ 697 if (flags & PGO_PDFREECLUST) { 698 pmap_clear_reference(ppsp[lcv]); 699 pmap_clear_modify(ppsp[lcv]); 700 atomic_setbits_int(&ppsp[lcv]->pg_flags, PG_CLEAN); 701 } 702 703 /* if anonymous cluster, unlock object and move on */ 704 if (!uobj) { 705 if (ppsp[lcv]->pg_flags & PQ_ANON) 706 rw_exit(ppsp[lcv]->uanon->an_lock); 707 else 708 rw_exit(ppsp[lcv]->uobject->vmobjlock); 709 } 710 } 711 } 712 713 /* 714 * interrupt-context iodone handler for single-buf i/os 715 * or the top-level buf of a nested-buf i/o. 716 * 717 * => must be at splbio(). 718 */ 719 720 void 721 uvm_aio_biodone(struct buf *bp) 722 { 723 splassert(IPL_BIO); 724 725 /* reset b_iodone for when this is a single-buf i/o. */ 726 bp->b_iodone = uvm_aio_aiodone; 727 728 mtx_enter(&uvm.aiodoned_lock); 729 TAILQ_INSERT_TAIL(&uvm.aio_done, bp, b_freelist); 730 wakeup(&uvm.aiodoned); 731 mtx_leave(&uvm.aiodoned_lock); 732 } 733 734 void 735 uvm_aio_aiodone_pages(struct vm_page **pgs, int npages, boolean_t write, 736 int error) 737 { 738 struct uvm_object *uobj; 739 struct vm_page *pg; 740 struct rwlock *slock; 741 boolean_t swap; 742 int i, swslot; 743 744 slock = NULL; 745 uobj = NULL; 746 pg = pgs[0]; 747 swap = (pg->uanon != NULL && pg->uobject == NULL) || 748 (pg->pg_flags & PQ_AOBJ) != 0; 749 750 KASSERT(swap); 751 KASSERT(write); 752 753 if (error) { 754 if (pg->uobject != NULL) { 755 swslot = uao_find_swslot(pg->uobject, 756 pg->offset >> PAGE_SHIFT); 757 } else { 758 swslot = pg->uanon->an_swslot; 759 } 760 KASSERT(swslot); 761 } 762 763 for (i = 0; i < npages; i++) { 764 int anon_disposed = 0; 765 766 pg = pgs[i]; 767 KASSERT((pg->pg_flags & PG_FAKE) == 0); 768 769 /* 770 * lock each page's object (or anon) individually since 771 * each page may need a different lock. 772 */ 773 if (pg->uobject != NULL) { 774 slock = pg->uobject->vmobjlock; 775 } else { 776 slock = pg->uanon->an_lock; 777 } 778 rw_enter(slock, RW_WRITE); 779 anon_disposed = (pg->pg_flags & PG_RELEASED) != 0; 780 KASSERT(!anon_disposed || pg->uobject != NULL || 781 pg->uanon->an_ref == 0); 782 uvm_lock_pageq(); 783 784 /* 785 * if this was a successful write, 786 * mark the page PG_CLEAN. 787 */ 788 if (!error) { 789 pmap_clear_reference(pg); 790 pmap_clear_modify(pg); 791 atomic_setbits_int(&pg->pg_flags, PG_CLEAN); 792 } 793 794 /* 795 * unlock everything for this page now. 796 */ 797 if (pg->uobject == NULL && anon_disposed) { 798 uvm_unlock_pageq(); 799 uvm_anon_release(pg->uanon); 800 } else { 801 uvm_page_unbusy(&pg, 1); 802 uvm_unlock_pageq(); 803 rw_exit(slock); 804 } 805 } 806 807 if (error) { 808 uvm_swap_markbad(swslot, npages); 809 } 810 } 811 812 /* 813 * uvm_aio_aiodone: do iodone processing for async i/os. 814 * this should be called in thread context, not interrupt context. 815 */ 816 void 817 uvm_aio_aiodone(struct buf *bp) 818 { 819 int npages = bp->b_bufsize >> PAGE_SHIFT; 820 struct vm_page *pgs[MAXPHYS >> PAGE_SHIFT]; 821 int i, error; 822 boolean_t write; 823 824 KASSERT(npages <= MAXPHYS >> PAGE_SHIFT); 825 splassert(IPL_BIO); 826 827 error = (bp->b_flags & B_ERROR) ? (bp->b_error ? bp->b_error : EIO) : 0; 828 write = (bp->b_flags & B_READ) == 0; 829 830 for (i = 0; i < npages; i++) 831 pgs[i] = uvm_atopg((vaddr_t)bp->b_data + 832 ((vsize_t)i << PAGE_SHIFT)); 833 uvm_pagermapout((vaddr_t)bp->b_data, npages); 834 #ifdef UVM_SWAP_ENCRYPT 835 /* 836 * XXX - assumes that we only get ASYNC writes. used to be above. 837 */ 838 if (pgs[0]->pg_flags & PQ_ENCRYPT) { 839 uvm_swap_freepages(pgs, npages); 840 goto freed; 841 } 842 #endif /* UVM_SWAP_ENCRYPT */ 843 844 uvm_aio_aiodone_pages(pgs, npages, write, error); 845 846 #ifdef UVM_SWAP_ENCRYPT 847 freed: 848 #endif 849 pool_put(&bufpool, bp); 850 } 851