1 /* $NetBSD: uvm_pager.c,v 1.128 2020/07/09 05:57:15 skrll Exp $ */ 2 3 /* 4 * Copyright (c) 1997 Charles D. Cranor and Washington University. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 * 27 * from: Id: uvm_pager.c,v 1.1.2.23 1998/02/02 20:38:06 chuck Exp 28 */ 29 30 /* 31 * uvm_pager.c: generic functions used to assist the pagers. 32 */ 33 34 #include <sys/cdefs.h> 35 __KERNEL_RCSID(0, "$NetBSD: uvm_pager.c,v 1.128 2020/07/09 05:57:15 skrll Exp $"); 36 37 #include "opt_uvmhist.h" 38 #include "opt_readahead.h" 39 #include "opt_pagermap.h" 40 41 #include <sys/param.h> 42 #include <sys/systm.h> 43 #include <sys/atomic.h> 44 #include <sys/vnode.h> 45 #include <sys/buf.h> 46 47 #include <uvm/uvm.h> 48 49 /* 50 * XXX 51 * this is needed until the device strategy interface 52 * is changed to do physically-addressed i/o. 53 */ 54 55 #ifndef PAGER_MAP_DEFAULT_SIZE 56 #define PAGER_MAP_DEFAULT_SIZE (16 * 1024 * 1024) 57 #endif 58 59 #ifndef PAGER_MAP_SIZE 60 #define PAGER_MAP_SIZE PAGER_MAP_DEFAULT_SIZE 61 #endif 62 63 size_t pager_map_size = PAGER_MAP_SIZE; 64 65 /* 66 * list of uvm pagers in the system 67 */ 68 69 const struct uvm_pagerops * const uvmpagerops[] = { 70 &aobj_pager, 71 &uvm_deviceops, 72 &uvm_vnodeops, 73 &ubc_pager, 74 }; 75 76 /* 77 * the pager map: provides KVA for I/O 78 */ 79 80 struct vm_map *pager_map; /* XXX */ 81 kmutex_t pager_map_wanted_lock __cacheline_aligned; 82 bool pager_map_wanted; /* locked by pager map */ 83 static vaddr_t emergva; 84 static int emerg_ncolors; 85 static bool emerginuse; 86 87 void 88 uvm_pager_realloc_emerg(void) 89 { 90 vaddr_t new_emergva, old_emergva; 91 int old_emerg_ncolors; 92 93 if (__predict_true(emergva != 0 && emerg_ncolors >= uvmexp.ncolors)) 94 return; 95 96 KASSERT(!emerginuse); 97 98 new_emergva = uvm_km_alloc(kernel_map, 99 round_page(MAXPHYS) + ptoa(uvmexp.ncolors), ptoa(uvmexp.ncolors), 100 UVM_KMF_VAONLY); 101 102 KASSERT(new_emergva != 0); 103 104 old_emergva = emergva; 105 old_emerg_ncolors = emerg_ncolors; 106 107 /* 108 * don't support re-color in late boot anyway. 109 */ 110 if (0) /* XXX */ 111 mutex_enter(&pager_map_wanted_lock); 112 113 emergva = new_emergva; 114 emerg_ncolors = uvmexp.ncolors; 115 wakeup(&old_emergva); 116 117 if (0) /* XXX */ 118 mutex_exit(&pager_map_wanted_lock); 119 120 if (old_emergva) 121 uvm_km_free(kernel_map, old_emergva, 122 round_page(MAXPHYS) + ptoa(old_emerg_ncolors), 123 UVM_KMF_VAONLY); 124 } 125 126 /* 127 * uvm_pager_init: init pagers (at boot time) 128 */ 129 130 void 131 uvm_pager_init(void) 132 { 133 u_int lcv; 134 vaddr_t sva, eva; 135 136 /* 137 * init pager map 138 */ 139 140 sva = 0; 141 pager_map = uvm_km_suballoc(kernel_map, &sva, &eva, pager_map_size, 0, 142 false, NULL); 143 mutex_init(&pager_map_wanted_lock, MUTEX_DEFAULT, IPL_NONE); 144 pager_map_wanted = false; 145 146 uvm_pager_realloc_emerg(); 147 148 /* 149 * call pager init functions 150 */ 151 for (lcv = 0 ; lcv < __arraycount(uvmpagerops); lcv++) { 152 if (uvmpagerops[lcv]->pgo_init) 153 uvmpagerops[lcv]->pgo_init(); 154 } 155 } 156 157 #ifdef PMAP_DIRECT 158 /* 159 * uvm_pagermapdirect: map a single page via the pmap's direct segment 160 * 161 * this is an abuse of pmap_direct_process(), since the kva is being grabbed 162 * and no processing is taking place, but for now.. 163 */ 164 165 static int 166 uvm_pagermapdirect(void *kva, size_t sz, void *cookie) 167 { 168 169 KASSERT(sz == PAGE_SIZE); 170 *(vaddr_t *)cookie = (vaddr_t)kva; 171 return 0; 172 } 173 #endif 174 175 /* 176 * uvm_pagermapin: map pages into KVA (pager_map) for I/O that needs mappings 177 * 178 * we basically just map in a blank map entry to reserve the space in the 179 * map and then use pmap_enter() to put the mappings in by hand. 180 */ 181 182 vaddr_t 183 uvm_pagermapin(struct vm_page **pps, int npages, int flags) 184 { 185 vsize_t size; 186 vaddr_t kva; 187 vaddr_t cva; 188 struct vm_page *pp; 189 vm_prot_t prot; 190 const bool pdaemon = (curlwp == uvm.pagedaemon_lwp); 191 const u_int first_color = VM_PGCOLOR(*pps); 192 UVMHIST_FUNC(__func__); 193 UVMHIST_CALLARGS(maphist,"(pps=%#jx, npages=%jd, first_color=%ju)", 194 (uintptr_t)pps, npages, first_color, 0); 195 196 #ifdef PMAP_DIRECT 197 /* 198 * for a single page the direct mapped segment can be used. 199 */ 200 201 if (npages == 1) { 202 int error __diagused; 203 KASSERT((pps[0]->flags & PG_BUSY) != 0); 204 error = pmap_direct_process(VM_PAGE_TO_PHYS(pps[0]), 0, 205 PAGE_SIZE, uvm_pagermapdirect, &kva); 206 KASSERT(error == 0); 207 UVMHIST_LOG(maphist, "<- done, direct (KVA=%#jx)", kva,0,0,0); 208 return kva; 209 } 210 #endif 211 212 /* 213 * compute protection. outgoing I/O only needs read 214 * access to the page, whereas incoming needs read/write. 215 */ 216 217 prot = VM_PROT_READ; 218 if (flags & UVMPAGER_MAPIN_READ) 219 prot |= VM_PROT_WRITE; 220 221 ReStart: 222 size = ptoa(npages); 223 kva = 0; /* let system choose VA */ 224 225 if (uvm_map(pager_map, &kva, size, NULL, UVM_UNKNOWN_OFFSET, 226 first_color, UVM_FLAG_COLORMATCH | UVM_FLAG_NOMERGE 227 | (pdaemon ? UVM_FLAG_NOWAIT : 0)) != 0) { 228 if (pdaemon) { 229 mutex_enter(&pager_map_wanted_lock); 230 if (emerginuse) { 231 UVM_UNLOCK_AND_WAIT(&emergva, 232 &pager_map_wanted_lock, false, 233 "emergva", 0); 234 goto ReStart; 235 } 236 emerginuse = true; 237 mutex_exit(&pager_map_wanted_lock); 238 kva = emergva + ptoa(first_color); 239 /* The shift implicitly truncates to PAGE_SIZE */ 240 KASSERT(npages <= (MAXPHYS >> PAGE_SHIFT)); 241 goto enter; 242 } 243 if ((flags & UVMPAGER_MAPIN_WAITOK) == 0) { 244 UVMHIST_LOG(maphist,"<- NOWAIT failed", 0,0,0,0); 245 return(0); 246 } 247 mutex_enter(&pager_map_wanted_lock); 248 pager_map_wanted = true; 249 UVMHIST_LOG(maphist, " SLEEPING on pager_map",0,0,0,0); 250 UVM_UNLOCK_AND_WAIT(pager_map, &pager_map_wanted_lock, false, 251 "pager_map", 0); 252 goto ReStart; 253 } 254 255 enter: 256 /* got it */ 257 for (cva = kva; npages != 0; npages--, cva += PAGE_SIZE) { 258 pp = *pps++; 259 KASSERT(pp); 260 // KASSERT(!((VM_PAGE_TO_PHYS(pp) ^ cva) & uvmexp.colormask)); 261 KASSERT(pp->flags & PG_BUSY); 262 pmap_kenter_pa(cva, VM_PAGE_TO_PHYS(pp), prot, 0); 263 } 264 pmap_update(vm_map_pmap(pager_map)); 265 266 UVMHIST_LOG(maphist, "<- done (KVA=%#jx)", kva,0,0,0); 267 return(kva); 268 } 269 270 /* 271 * uvm_pagermapout: remove pager_map mapping 272 * 273 * we remove our mappings by hand and then remove the mapping (waking 274 * up anyone wanting space). 275 */ 276 277 void 278 uvm_pagermapout(vaddr_t kva, int npages) 279 { 280 vsize_t size = ptoa(npages); 281 struct vm_map_entry *entries; 282 UVMHIST_FUNC(__func__); 283 UVMHIST_CALLARGS(maphist, " (kva=%#jx, npages=%jd)", kva, npages,0,0); 284 285 #ifdef PMAP_DIRECT 286 /* 287 * solitary pages are mapped directly. 288 */ 289 290 if (npages == 1) { 291 UVMHIST_LOG(maphist,"<- done, direct", 0,0,0,0); 292 return; 293 } 294 #endif 295 296 /* 297 * duplicate uvm_unmap, but add in pager_map_wanted handling. 298 */ 299 300 pmap_kremove(kva, size); 301 pmap_update(pmap_kernel()); 302 303 if ((kva & ~ptoa(uvmexp.colormask)) == emergva) { 304 mutex_enter(&pager_map_wanted_lock); 305 KASSERT(emerginuse); 306 emerginuse = false; 307 wakeup(&emergva); 308 mutex_exit(&pager_map_wanted_lock); 309 return; 310 } 311 312 vm_map_lock(pager_map); 313 uvm_unmap_remove(pager_map, kva, kva + size, &entries, 0); 314 mutex_enter(&pager_map_wanted_lock); 315 if (pager_map_wanted) { 316 pager_map_wanted = false; 317 wakeup(pager_map); 318 } 319 mutex_exit(&pager_map_wanted_lock); 320 vm_map_unlock(pager_map); 321 if (entries) 322 uvm_unmap_detach(entries, 0); 323 UVMHIST_LOG(maphist,"<- done",0,0,0,0); 324 } 325 326 void 327 uvm_aio_aiodone_pages(struct vm_page **pgs, int npages, bool write, int error) 328 { 329 struct uvm_object *uobj; 330 struct vm_page *pg; 331 krwlock_t *slock; 332 int pageout_done; /* number of PG_PAGEOUT pages processed */ 333 int swslot; 334 int i; 335 bool swap; 336 UVMHIST_FUNC(__func__); UVMHIST_CALLED(ubchist); 337 338 swslot = 0; 339 pageout_done = 0; 340 slock = NULL; 341 uobj = NULL; 342 pg = pgs[0]; 343 swap = (pg->uanon != NULL && pg->uobject == NULL) || 344 (pg->flags & PG_AOBJ) != 0; 345 if (!swap) { 346 uobj = pg->uobject; 347 slock = uobj->vmobjlock; 348 rw_enter(slock, RW_WRITER); 349 } else { 350 #if defined(VMSWAP) 351 if (error) { 352 if (pg->uobject != NULL) { 353 swslot = uao_find_swslot(pg->uobject, 354 pg->offset >> PAGE_SHIFT); 355 } else { 356 KASSERT(pg->uanon != NULL); 357 swslot = pg->uanon->an_swslot; 358 } 359 KASSERT(swslot); 360 } 361 #else /* defined(VMSWAP) */ 362 panic("%s: swap", __func__); 363 #endif /* defined(VMSWAP) */ 364 } 365 for (i = 0; i < npages; i++) { 366 #if defined(VMSWAP) 367 bool anon_disposed = false; /* XXX gcc */ 368 #endif /* defined(VMSWAP) */ 369 370 pg = pgs[i]; 371 KASSERT(swap || pg->uobject == uobj); 372 UVMHIST_LOG(ubchist, "pg %#jx", (uintptr_t)pg, 0,0,0); 373 374 #if defined(VMSWAP) 375 /* 376 * for swap i/os, lock each page's object (or anon) 377 * individually since each page may need a different lock. 378 */ 379 380 if (swap) { 381 if (pg->uobject != NULL) { 382 slock = pg->uobject->vmobjlock; 383 } else { 384 slock = pg->uanon->an_lock; 385 } 386 rw_enter(slock, RW_WRITER); 387 anon_disposed = (pg->flags & PG_RELEASED) != 0; 388 KASSERT(!anon_disposed || pg->uobject != NULL || 389 pg->uanon->an_ref == 0); 390 } 391 #endif /* defined(VMSWAP) */ 392 393 if (write && uobj != NULL) { 394 KASSERT(radix_tree_get_tag(&uobj->uo_pages, 395 pg->offset >> PAGE_SHIFT, UVM_PAGE_WRITEBACK_TAG)); 396 radix_tree_clear_tag(&uobj->uo_pages, 397 pg->offset >> PAGE_SHIFT, UVM_PAGE_WRITEBACK_TAG); 398 } 399 400 /* 401 * process errors. for reads, just mark the page to be freed. 402 * for writes, if the error was ENOMEM, we assume this was 403 * a transient failure so we mark the page dirty so that 404 * we'll try to write it again later. for all other write 405 * errors, we assume the error is permanent, thus the data 406 * in the page is lost. bummer. 407 */ 408 409 if (error) { 410 int slot; 411 if (!write) { 412 pg->flags |= PG_RELEASED; 413 continue; 414 } else if (error == ENOMEM) { 415 if (pg->flags & PG_PAGEOUT) { 416 pg->flags &= ~PG_PAGEOUT; 417 pageout_done++; 418 } 419 uvm_pagemarkdirty(pg, UVM_PAGE_STATUS_DIRTY); 420 uvm_pagelock(pg); 421 uvm_pageactivate(pg); 422 uvm_pageunlock(pg); 423 slot = 0; 424 } else 425 slot = SWSLOT_BAD; 426 427 #if defined(VMSWAP) 428 if (swap) { 429 if (pg->uobject != NULL) { 430 int oldslot __diagused; 431 oldslot = uao_set_swslot(pg->uobject, 432 pg->offset >> PAGE_SHIFT, slot); 433 KASSERT(oldslot == swslot + i); 434 } else { 435 KASSERT(pg->uanon->an_swslot == 436 swslot + i); 437 pg->uanon->an_swslot = slot; 438 } 439 } 440 #endif /* defined(VMSWAP) */ 441 } 442 443 /* 444 * if the page is PG_FAKE, this must have been a read to 445 * initialize the page. clear PG_FAKE and activate the page. 446 */ 447 448 if (pg->flags & PG_FAKE) { 449 KASSERT(!write); 450 pg->flags &= ~PG_FAKE; 451 #if defined(READAHEAD_STATS) 452 pg->flags |= PG_READAHEAD; 453 uvm_ra_total.ev_count++; 454 #endif /* defined(READAHEAD_STATS) */ 455 KASSERT(uvm_pagegetdirty(pg) == UVM_PAGE_STATUS_CLEAN); 456 uvm_pagelock(pg); 457 uvm_pageenqueue(pg); 458 uvm_pageunlock(pg); 459 } 460 461 /* 462 * do accounting for pagedaemon i/o and arrange to free 463 * the pages instead of just unbusying them. 464 */ 465 466 if (pg->flags & PG_PAGEOUT) { 467 pg->flags &= ~PG_PAGEOUT; 468 pageout_done++; 469 atomic_inc_uint(&uvmexp.pdfreed); 470 pg->flags |= PG_RELEASED; 471 } 472 473 #if defined(VMSWAP) 474 /* 475 * for swap pages, unlock everything for this page now. 476 */ 477 478 if (swap) { 479 if (pg->uobject == NULL && anon_disposed) { 480 uvm_anon_release(pg->uanon); 481 } else { 482 uvm_page_unbusy(&pg, 1); 483 rw_exit(slock); 484 } 485 } 486 #endif /* defined(VMSWAP) */ 487 } 488 if (pageout_done != 0) { 489 uvm_pageout_done(pageout_done); 490 } 491 if (!swap) { 492 uvm_page_unbusy(pgs, npages); 493 rw_exit(slock); 494 } else { 495 #if defined(VMSWAP) 496 KASSERT(write); 497 498 /* these pages are now only in swap. */ 499 if (error != ENOMEM) { 500 atomic_add_int(&uvmexp.swpgonly, npages); 501 } 502 if (error) { 503 if (error != ENOMEM) 504 uvm_swap_markbad(swslot, npages); 505 else 506 uvm_swap_free(swslot, npages); 507 } 508 atomic_dec_uint(&uvmexp.pdpending); 509 #endif /* defined(VMSWAP) */ 510 } 511 } 512 513 /* 514 * uvm_aio_aiodone: do iodone processing for async i/os. 515 * this should be called in thread context, not interrupt context. 516 */ 517 void 518 uvm_aio_aiodone(struct buf *bp) 519 { 520 const int npages = bp->b_bufsize >> PAGE_SHIFT; 521 struct vm_page *pgs[howmany(MAXPHYS, MIN_PAGE_SIZE)]; 522 int i, error; 523 bool write; 524 UVMHIST_FUNC(__func__); 525 UVMHIST_CALLARGS(ubchist, "bp %#jx", (uintptr_t)bp, 0,0,0); 526 527 KASSERT(bp->b_bufsize <= MAXPHYS); 528 KASSERT(npages <= __arraycount(pgs)); 529 530 error = bp->b_error; 531 write = (bp->b_flags & B_READ) == 0; 532 533 for (i = 0; i < npages; i++) { 534 pgs[i] = uvm_pageratop((vaddr_t)bp->b_data + (i << PAGE_SHIFT)); 535 UVMHIST_LOG(ubchist, "pgs[%jd] = %#jx", i, 536 (uintptr_t)pgs[i], 0, 0); 537 } 538 uvm_pagermapout((vaddr_t)bp->b_data, npages); 539 540 uvm_aio_aiodone_pages(pgs, npages, write, error); 541 542 if (write && (bp->b_cflags & BC_AGE) != 0) { 543 mutex_enter(bp->b_objlock); 544 vwakeup(bp); 545 mutex_exit(bp->b_objlock); 546 } 547 putiobuf(bp); 548 } 549 550 /* 551 * uvm_pageratop: convert KVAs in the pager map back to their page 552 * structures. 553 */ 554 555 struct vm_page * 556 uvm_pageratop(vaddr_t kva) 557 { 558 struct vm_page *pg; 559 paddr_t pa; 560 bool rv __diagused; 561 562 rv = pmap_extract(pmap_kernel(), kva, &pa); 563 KASSERT(rv); 564 pg = PHYS_TO_VM_PAGE(pa); 565 KASSERT(pg != NULL); 566 return (pg); 567 } 568