1 /* $NetBSD: uvm_pager.c,v 1.123 2020/02/24 12:38:57 rin Exp $ */ 2 3 /* 4 * Copyright (c) 1997 Charles D. Cranor and Washington University. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 * 27 * from: Id: uvm_pager.c,v 1.1.2.23 1998/02/02 20:38:06 chuck Exp 28 */ 29 30 /* 31 * uvm_pager.c: generic functions used to assist the pagers. 32 */ 33 34 #include <sys/cdefs.h> 35 __KERNEL_RCSID(0, "$NetBSD: uvm_pager.c,v 1.123 2020/02/24 12:38:57 rin Exp $"); 36 37 #include "opt_uvmhist.h" 38 #include "opt_readahead.h" 39 #include "opt_pagermap.h" 40 41 #include <sys/param.h> 42 #include <sys/systm.h> 43 #include <sys/atomic.h> 44 #include <sys/vnode.h> 45 #include <sys/buf.h> 46 47 #include <uvm/uvm.h> 48 49 /* 50 * XXX 51 * this is needed until the device strategy interface 52 * is changed to do physically-addressed i/o. 53 */ 54 55 #ifndef PAGER_MAP_DEFAULT_SIZE 56 #define PAGER_MAP_DEFAULT_SIZE (16 * 1024 * 1024) 57 #endif 58 59 #ifndef PAGER_MAP_SIZE 60 #define PAGER_MAP_SIZE PAGER_MAP_DEFAULT_SIZE 61 #endif 62 63 size_t pager_map_size = PAGER_MAP_SIZE; 64 65 /* 66 * list of uvm pagers in the system 67 */ 68 69 const struct uvm_pagerops * const uvmpagerops[] = { 70 &aobj_pager, 71 &uvm_deviceops, 72 &uvm_vnodeops, 73 &ubc_pager, 74 }; 75 76 /* 77 * the pager map: provides KVA for I/O 78 */ 79 80 struct vm_map *pager_map; /* XXX */ 81 kmutex_t pager_map_wanted_lock __cacheline_aligned; 82 bool pager_map_wanted; /* locked by pager map */ 83 static vaddr_t emergva; 84 static int emerg_ncolors; 85 static bool emerginuse; 86 87 void 88 uvm_pager_realloc_emerg(void) 89 { 90 vaddr_t new_emergva, old_emergva; 91 int old_emerg_ncolors; 92 93 if (__predict_true(emergva != 0 && emerg_ncolors >= uvmexp.ncolors)) 94 return; 95 96 KASSERT(!emerginuse); 97 98 new_emergva = uvm_km_alloc(kernel_map, 99 round_page(MAXPHYS) + ptoa(uvmexp.ncolors), ptoa(uvmexp.ncolors), 100 UVM_KMF_VAONLY); 101 102 KASSERT(new_emergva != 0); 103 104 old_emergva = emergva; 105 old_emerg_ncolors = emerg_ncolors; 106 107 /* 108 * don't support re-color in late boot anyway. 109 */ 110 if (0) /* XXX */ 111 mutex_enter(&pager_map_wanted_lock); 112 113 emergva = new_emergva; 114 emerg_ncolors = uvmexp.ncolors; 115 wakeup(&old_emergva); 116 117 if (0) /* XXX */ 118 mutex_exit(&pager_map_wanted_lock); 119 120 if (old_emergva) 121 uvm_km_free(kernel_map, old_emergva, 122 round_page(MAXPHYS) + ptoa(old_emerg_ncolors), 123 UVM_KMF_VAONLY); 124 } 125 126 /* 127 * uvm_pager_init: init pagers (at boot time) 128 */ 129 130 void 131 uvm_pager_init(void) 132 { 133 u_int lcv; 134 vaddr_t sva, eva; 135 136 /* 137 * init pager map 138 */ 139 140 sva = 0; 141 pager_map = uvm_km_suballoc(kernel_map, &sva, &eva, pager_map_size, 0, 142 false, NULL); 143 mutex_init(&pager_map_wanted_lock, MUTEX_DEFAULT, IPL_NONE); 144 pager_map_wanted = false; 145 146 uvm_pager_realloc_emerg(); 147 148 /* 149 * call pager init functions 150 */ 151 for (lcv = 0 ; lcv < __arraycount(uvmpagerops); lcv++) { 152 if (uvmpagerops[lcv]->pgo_init) 153 uvmpagerops[lcv]->pgo_init(); 154 } 155 } 156 157 /* 158 * uvm_pagermapin: map pages into KVA (pager_map) for I/O that needs mappings 159 * 160 * we basically just map in a blank map entry to reserve the space in the 161 * map and then use pmap_enter() to put the mappings in by hand. 162 */ 163 164 vaddr_t 165 uvm_pagermapin(struct vm_page **pps, int npages, int flags) 166 { 167 vsize_t size; 168 vaddr_t kva; 169 vaddr_t cva; 170 struct vm_page *pp; 171 vm_prot_t prot; 172 const bool pdaemon = (curlwp == uvm.pagedaemon_lwp); 173 const u_int first_color = VM_PGCOLOR(*pps); 174 UVMHIST_FUNC("uvm_pagermapin"); UVMHIST_CALLED(maphist); 175 176 UVMHIST_LOG(maphist,"(pps=%#jx, npages=%jd, first_color=%ju)", 177 (uintptr_t)pps, npages, first_color, 0); 178 179 /* 180 * compute protection. outgoing I/O only needs read 181 * access to the page, whereas incoming needs read/write. 182 */ 183 184 prot = VM_PROT_READ; 185 if (flags & UVMPAGER_MAPIN_READ) 186 prot |= VM_PROT_WRITE; 187 188 ReStart: 189 size = ptoa(npages); 190 kva = 0; /* let system choose VA */ 191 192 if (uvm_map(pager_map, &kva, size, NULL, UVM_UNKNOWN_OFFSET, 193 first_color, UVM_FLAG_COLORMATCH | UVM_FLAG_NOMERGE 194 | (pdaemon ? UVM_FLAG_NOWAIT : 0)) != 0) { 195 if (pdaemon) { 196 mutex_enter(&pager_map_wanted_lock); 197 if (emerginuse) { 198 UVM_UNLOCK_AND_WAIT(&emergva, 199 &pager_map_wanted_lock, false, 200 "emergva", 0); 201 goto ReStart; 202 } 203 emerginuse = true; 204 mutex_exit(&pager_map_wanted_lock); 205 kva = emergva + ptoa(first_color); 206 /* The shift implicitly truncates to PAGE_SIZE */ 207 KASSERT(npages <= (MAXPHYS >> PAGE_SHIFT)); 208 goto enter; 209 } 210 if ((flags & UVMPAGER_MAPIN_WAITOK) == 0) { 211 UVMHIST_LOG(maphist,"<- NOWAIT failed", 0,0,0,0); 212 return(0); 213 } 214 mutex_enter(&pager_map_wanted_lock); 215 pager_map_wanted = true; 216 UVMHIST_LOG(maphist, " SLEEPING on pager_map",0,0,0,0); 217 UVM_UNLOCK_AND_WAIT(pager_map, &pager_map_wanted_lock, false, 218 "pager_map", 0); 219 goto ReStart; 220 } 221 222 enter: 223 /* got it */ 224 for (cva = kva; npages != 0; npages--, cva += PAGE_SIZE) { 225 pp = *pps++; 226 KASSERT(pp); 227 // KASSERT(!((VM_PAGE_TO_PHYS(pp) ^ cva) & uvmexp.colormask)); 228 KASSERT(pp->flags & PG_BUSY); 229 pmap_kenter_pa(cva, VM_PAGE_TO_PHYS(pp), prot, 0); 230 } 231 pmap_update(vm_map_pmap(pager_map)); 232 233 UVMHIST_LOG(maphist, "<- done (KVA=%#jx)", kva,0,0,0); 234 return(kva); 235 } 236 237 /* 238 * uvm_pagermapout: remove pager_map mapping 239 * 240 * we remove our mappings by hand and then remove the mapping (waking 241 * up anyone wanting space). 242 */ 243 244 void 245 uvm_pagermapout(vaddr_t kva, int npages) 246 { 247 vsize_t size = ptoa(npages); 248 struct vm_map_entry *entries; 249 UVMHIST_FUNC("uvm_pagermapout"); UVMHIST_CALLED(maphist); 250 251 UVMHIST_LOG(maphist, " (kva=%#jx, npages=%jd)", kva, npages,0,0); 252 253 /* 254 * duplicate uvm_unmap, but add in pager_map_wanted handling. 255 */ 256 257 pmap_kremove(kva, size); 258 pmap_update(pmap_kernel()); 259 260 if ((kva & ~ptoa(uvmexp.colormask)) == emergva) { 261 mutex_enter(&pager_map_wanted_lock); 262 KASSERT(emerginuse); 263 emerginuse = false; 264 wakeup(&emergva); 265 mutex_exit(&pager_map_wanted_lock); 266 return; 267 } 268 269 vm_map_lock(pager_map); 270 uvm_unmap_remove(pager_map, kva, kva + size, &entries, 0); 271 mutex_enter(&pager_map_wanted_lock); 272 if (pager_map_wanted) { 273 pager_map_wanted = false; 274 wakeup(pager_map); 275 } 276 mutex_exit(&pager_map_wanted_lock); 277 vm_map_unlock(pager_map); 278 if (entries) 279 uvm_unmap_detach(entries, 0); 280 UVMHIST_LOG(maphist,"<- done",0,0,0,0); 281 } 282 283 void 284 uvm_aio_aiodone_pages(struct vm_page **pgs, int npages, bool write, int error) 285 { 286 struct uvm_object *uobj; 287 struct vm_page *pg; 288 krwlock_t *slock; 289 int pageout_done; /* number of PG_PAGEOUT pages processed */ 290 int swslot; 291 int i; 292 bool swap; 293 UVMHIST_FUNC("uvm_aio_aiodone_pages"); UVMHIST_CALLED(ubchist); 294 295 swslot = 0; 296 pageout_done = 0; 297 slock = NULL; 298 uobj = NULL; 299 pg = pgs[0]; 300 swap = (pg->uanon != NULL && pg->uobject == NULL) || 301 (pg->flags & PG_AOBJ) != 0; 302 if (!swap) { 303 uobj = pg->uobject; 304 slock = uobj->vmobjlock; 305 rw_enter(slock, RW_WRITER); 306 } else { 307 #if defined(VMSWAP) 308 if (error) { 309 if (pg->uobject != NULL) { 310 swslot = uao_find_swslot(pg->uobject, 311 pg->offset >> PAGE_SHIFT); 312 } else { 313 KASSERT(pg->uanon != NULL); 314 swslot = pg->uanon->an_swslot; 315 } 316 KASSERT(swslot); 317 } 318 #else /* defined(VMSWAP) */ 319 panic("%s: swap", __func__); 320 #endif /* defined(VMSWAP) */ 321 } 322 for (i = 0; i < npages; i++) { 323 #if defined(VMSWAP) 324 bool anon_disposed = false; /* XXX gcc */ 325 #endif /* defined(VMSWAP) */ 326 327 pg = pgs[i]; 328 KASSERT(swap || pg->uobject == uobj); 329 UVMHIST_LOG(ubchist, "pg %#jx", (uintptr_t)pg, 0,0,0); 330 331 #if defined(VMSWAP) 332 /* 333 * for swap i/os, lock each page's object (or anon) 334 * individually since each page may need a different lock. 335 */ 336 337 if (swap) { 338 if (pg->uobject != NULL) { 339 slock = pg->uobject->vmobjlock; 340 } else { 341 slock = pg->uanon->an_lock; 342 } 343 rw_enter(slock, RW_WRITER); 344 anon_disposed = (pg->flags & PG_RELEASED) != 0; 345 KASSERT(!anon_disposed || pg->uobject != NULL || 346 pg->uanon->an_ref == 0); 347 } 348 #endif /* defined(VMSWAP) */ 349 350 if (write && uobj != NULL) { 351 KASSERT(radix_tree_get_tag(&uobj->uo_pages, 352 pg->offset >> PAGE_SHIFT, UVM_PAGE_WRITEBACK_TAG)); 353 radix_tree_clear_tag(&uobj->uo_pages, 354 pg->offset >> PAGE_SHIFT, UVM_PAGE_WRITEBACK_TAG); 355 } 356 357 /* 358 * process errors. for reads, just mark the page to be freed. 359 * for writes, if the error was ENOMEM, we assume this was 360 * a transient failure so we mark the page dirty so that 361 * we'll try to write it again later. for all other write 362 * errors, we assume the error is permanent, thus the data 363 * in the page is lost. bummer. 364 */ 365 366 if (error) { 367 int slot; 368 if (!write) { 369 pg->flags |= PG_RELEASED; 370 continue; 371 } else if (error == ENOMEM) { 372 if (pg->flags & PG_PAGEOUT) { 373 pg->flags &= ~PG_PAGEOUT; 374 pageout_done++; 375 } 376 uvm_pagemarkdirty(pg, UVM_PAGE_STATUS_DIRTY); 377 uvm_pagelock(pg); 378 uvm_pageactivate(pg); 379 uvm_pageunlock(pg); 380 slot = 0; 381 } else 382 slot = SWSLOT_BAD; 383 384 #if defined(VMSWAP) 385 if (swap) { 386 if (pg->uobject != NULL) { 387 int oldslot __diagused; 388 oldslot = uao_set_swslot(pg->uobject, 389 pg->offset >> PAGE_SHIFT, slot); 390 KASSERT(oldslot == swslot + i); 391 } else { 392 KASSERT(pg->uanon->an_swslot == 393 swslot + i); 394 pg->uanon->an_swslot = slot; 395 } 396 } 397 #endif /* defined(VMSWAP) */ 398 } 399 400 /* 401 * if the page is PG_FAKE, this must have been a read to 402 * initialize the page. clear PG_FAKE and activate the page. 403 */ 404 405 if (pg->flags & PG_FAKE) { 406 KASSERT(!write); 407 pg->flags &= ~PG_FAKE; 408 #if defined(READAHEAD_STATS) 409 pg->flags |= PG_READAHEAD; 410 uvm_ra_total.ev_count++; 411 #endif /* defined(READAHEAD_STATS) */ 412 KASSERT(uvm_pagegetdirty(pg) == UVM_PAGE_STATUS_CLEAN); 413 uvm_pagelock(pg); 414 uvm_pageenqueue(pg); 415 uvm_pageunlock(pg); 416 } 417 418 /* 419 * do accounting for pagedaemon i/o and arrange to free 420 * the pages instead of just unbusying them. 421 */ 422 423 if (pg->flags & PG_PAGEOUT) { 424 pg->flags &= ~PG_PAGEOUT; 425 pageout_done++; 426 atomic_inc_uint(&uvmexp.pdfreed); 427 pg->flags |= PG_RELEASED; 428 } 429 430 #if defined(VMSWAP) 431 /* 432 * for swap pages, unlock everything for this page now. 433 */ 434 435 if (swap) { 436 if (pg->uobject == NULL && anon_disposed) { 437 uvm_anon_release(pg->uanon); 438 } else { 439 uvm_page_unbusy(&pg, 1); 440 rw_exit(slock); 441 } 442 } 443 #endif /* defined(VMSWAP) */ 444 } 445 uvm_pageout_done(pageout_done); 446 if (!swap) { 447 uvm_page_unbusy(pgs, npages); 448 rw_exit(slock); 449 } else { 450 #if defined(VMSWAP) 451 KASSERT(write); 452 453 /* these pages are now only in swap. */ 454 if (error != ENOMEM) { 455 atomic_add_int(&uvmexp.swpgonly, npages); 456 } 457 if (error) { 458 if (error != ENOMEM) 459 uvm_swap_markbad(swslot, npages); 460 else 461 uvm_swap_free(swslot, npages); 462 } 463 atomic_dec_uint(&uvmexp.pdpending); 464 #endif /* defined(VMSWAP) */ 465 } 466 } 467 468 /* 469 * uvm_aio_aiodone: do iodone processing for async i/os. 470 * this should be called in thread context, not interrupt context. 471 */ 472 473 void 474 uvm_aio_aiodone(struct buf *bp) 475 { 476 int npages = bp->b_bufsize >> PAGE_SHIFT; 477 struct vm_page *pgs[npages]; 478 int i, error; 479 bool write; 480 UVMHIST_FUNC("uvm_aio_aiodone"); UVMHIST_CALLED(ubchist); 481 UVMHIST_LOG(ubchist, "bp %#jx", (uintptr_t)bp, 0,0,0); 482 483 error = bp->b_error; 484 write = (bp->b_flags & B_READ) == 0; 485 486 for (i = 0; i < npages; i++) { 487 pgs[i] = uvm_pageratop((vaddr_t)bp->b_data + (i << PAGE_SHIFT)); 488 UVMHIST_LOG(ubchist, "pgs[%jd] = %#jx", i, 489 (uintptr_t)pgs[i], 0, 0); 490 } 491 uvm_pagermapout((vaddr_t)bp->b_data, npages); 492 493 uvm_aio_aiodone_pages(pgs, npages, write, error); 494 495 if (write && (bp->b_cflags & BC_AGE) != 0) { 496 mutex_enter(bp->b_objlock); 497 vwakeup(bp); 498 mutex_exit(bp->b_objlock); 499 } 500 putiobuf(bp); 501 } 502 503 /* 504 * uvm_pageratop: convert KVAs in the pager map back to their page 505 * structures. 506 */ 507 508 struct vm_page * 509 uvm_pageratop(vaddr_t kva) 510 { 511 struct vm_page *pg; 512 paddr_t pa; 513 bool rv __diagused; 514 515 rv = pmap_extract(pmap_kernel(), kva, &pa); 516 KASSERT(rv); 517 pg = PHYS_TO_VM_PAGE(pa); 518 KASSERT(pg != NULL); 519 return (pg); 520 } 521