1 /* $NetBSD: uvm_pager.c,v 1.110 2014/03/01 18:32:01 christos Exp $ */ 2 3 /* 4 * Copyright (c) 1997 Charles D. Cranor and Washington University. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 * 27 * from: Id: uvm_pager.c,v 1.1.2.23 1998/02/02 20:38:06 chuck Exp 28 */ 29 30 /* 31 * uvm_pager.c: generic functions used to assist the pagers. 32 */ 33 34 #include <sys/cdefs.h> 35 __KERNEL_RCSID(0, "$NetBSD: uvm_pager.c,v 1.110 2014/03/01 18:32:01 christos Exp $"); 36 37 #include "opt_uvmhist.h" 38 #include "opt_readahead.h" 39 #include "opt_pagermap.h" 40 41 #include <sys/param.h> 42 #include <sys/systm.h> 43 #include <sys/vnode.h> 44 #include <sys/buf.h> 45 46 #include <uvm/uvm.h> 47 48 /* 49 * XXX 50 * this is needed until the device strategy interface 51 * is changed to do physically-addressed i/o. 52 */ 53 54 #ifndef PAGER_MAP_DEFAULT_SIZE 55 #define PAGER_MAP_DEFAULT_SIZE (16 * 1024 * 1024) 56 #endif 57 58 #ifndef PAGER_MAP_SIZE 59 #define PAGER_MAP_SIZE PAGER_MAP_DEFAULT_SIZE 60 #endif 61 62 size_t pager_map_size = PAGER_MAP_SIZE; 63 64 /* 65 * list of uvm pagers in the system 66 */ 67 68 const struct uvm_pagerops * const uvmpagerops[] = { 69 &aobj_pager, 70 &uvm_deviceops, 71 &uvm_vnodeops, 72 &ubc_pager, 73 }; 74 75 /* 76 * the pager map: provides KVA for I/O 77 */ 78 79 struct vm_map *pager_map; /* XXX */ 80 kmutex_t pager_map_wanted_lock; 81 bool pager_map_wanted; /* locked by pager map */ 82 static vaddr_t emergva; 83 static int emerg_ncolors; 84 static bool emerginuse; 85 86 void 87 uvm_pager_realloc_emerg(void) 88 { 89 vaddr_t new_emergva, old_emergva; 90 int old_emerg_ncolors; 91 92 if (__predict_true(emergva != 0 && emerg_ncolors >= uvmexp.ncolors)) 93 return; 94 95 KASSERT(!emerginuse); 96 97 new_emergva = uvm_km_alloc(kernel_map, 98 round_page(MAXPHYS) + ptoa(uvmexp.ncolors), ptoa(uvmexp.ncolors), 99 UVM_KMF_VAONLY); 100 101 KASSERT(new_emergva != 0); 102 103 old_emergva = emergva; 104 old_emerg_ncolors = emerg_ncolors; 105 106 /* 107 * don't support re-color in late boot anyway. 108 */ 109 if (0) /* XXX */ 110 mutex_enter(&pager_map_wanted_lock); 111 112 emergva = new_emergva; 113 emerg_ncolors = uvmexp.ncolors; 114 wakeup(&old_emergva); 115 116 if (0) /* XXX */ 117 mutex_exit(&pager_map_wanted_lock); 118 119 if (old_emergva) 120 uvm_km_free(kernel_map, old_emergva, 121 round_page(MAXPHYS) + ptoa(old_emerg_ncolors), 122 UVM_KMF_VAONLY); 123 } 124 125 /* 126 * uvm_pager_init: init pagers (at boot time) 127 */ 128 129 void 130 uvm_pager_init(void) 131 { 132 u_int lcv; 133 vaddr_t sva, eva; 134 135 /* 136 * init pager map 137 */ 138 139 sva = 0; 140 pager_map = uvm_km_suballoc(kernel_map, &sva, &eva, pager_map_size, 0, 141 false, NULL); 142 mutex_init(&pager_map_wanted_lock, MUTEX_DEFAULT, IPL_NONE); 143 pager_map_wanted = false; 144 145 uvm_pager_realloc_emerg(); 146 147 /* 148 * init ASYNC I/O queue 149 */ 150 151 TAILQ_INIT(&uvm.aio_done); 152 153 /* 154 * call pager init functions 155 */ 156 for (lcv = 0 ; lcv < __arraycount(uvmpagerops); lcv++) { 157 if (uvmpagerops[lcv]->pgo_init) 158 uvmpagerops[lcv]->pgo_init(); 159 } 160 } 161 162 /* 163 * uvm_pagermapin: map pages into KVA (pager_map) for I/O that needs mappings 164 * 165 * we basically just map in a blank map entry to reserve the space in the 166 * map and then use pmap_enter() to put the mappings in by hand. 167 */ 168 169 vaddr_t 170 uvm_pagermapin(struct vm_page **pps, int npages, int flags) 171 { 172 vsize_t size; 173 vaddr_t kva; 174 vaddr_t cva; 175 struct vm_page *pp; 176 vm_prot_t prot; 177 const bool pdaemon = (curlwp == uvm.pagedaemon_lwp); 178 const u_int first_color = VM_PGCOLOR_BUCKET(*pps); 179 UVMHIST_FUNC("uvm_pagermapin"); UVMHIST_CALLED(maphist); 180 181 UVMHIST_LOG(maphist,"(pps=0x%x, npages=%d, first_color=%u)", 182 pps, npages, first_color, 0); 183 184 /* 185 * compute protection. outgoing I/O only needs read 186 * access to the page, whereas incoming needs read/write. 187 */ 188 189 prot = VM_PROT_READ; 190 if (flags & UVMPAGER_MAPIN_READ) 191 prot |= VM_PROT_WRITE; 192 193 ReStart: 194 size = ptoa(npages); 195 kva = 0; /* let system choose VA */ 196 197 if (uvm_map(pager_map, &kva, size, NULL, UVM_UNKNOWN_OFFSET, 198 first_color, UVM_FLAG_COLORMATCH | UVM_FLAG_NOMERGE 199 | (pdaemon ? UVM_FLAG_NOWAIT : 0)) != 0) { 200 if (pdaemon) { 201 mutex_enter(&pager_map_wanted_lock); 202 if (emerginuse) { 203 UVM_UNLOCK_AND_WAIT(&emergva, 204 &pager_map_wanted_lock, false, 205 "emergva", 0); 206 goto ReStart; 207 } 208 emerginuse = true; 209 mutex_exit(&pager_map_wanted_lock); 210 kva = emergva + ptoa(first_color); 211 /* The shift implicitly truncates to PAGE_SIZE */ 212 KASSERT(npages <= (MAXPHYS >> PAGE_SHIFT)); 213 goto enter; 214 } 215 if ((flags & UVMPAGER_MAPIN_WAITOK) == 0) { 216 UVMHIST_LOG(maphist,"<- NOWAIT failed", 0,0,0,0); 217 return(0); 218 } 219 mutex_enter(&pager_map_wanted_lock); 220 pager_map_wanted = true; 221 UVMHIST_LOG(maphist, " SLEEPING on pager_map",0,0,0,0); 222 UVM_UNLOCK_AND_WAIT(pager_map, &pager_map_wanted_lock, false, 223 "pager_map", 0); 224 goto ReStart; 225 } 226 227 enter: 228 /* got it */ 229 for (cva = kva; npages != 0; npages--, cva += PAGE_SIZE) { 230 pp = *pps++; 231 KASSERT(pp); 232 // KASSERT(!((VM_PAGE_TO_PHYS(pp) ^ cva) & uvmexp.colormask)); 233 KASSERT(pp->flags & PG_BUSY); 234 pmap_kenter_pa(cva, VM_PAGE_TO_PHYS(pp), prot, 0); 235 } 236 pmap_update(vm_map_pmap(pager_map)); 237 238 UVMHIST_LOG(maphist, "<- done (KVA=0x%x)", kva,0,0,0); 239 return(kva); 240 } 241 242 /* 243 * uvm_pagermapout: remove pager_map mapping 244 * 245 * we remove our mappings by hand and then remove the mapping (waking 246 * up anyone wanting space). 247 */ 248 249 void 250 uvm_pagermapout(vaddr_t kva, int npages) 251 { 252 vsize_t size = ptoa(npages); 253 struct vm_map_entry *entries; 254 UVMHIST_FUNC("uvm_pagermapout"); UVMHIST_CALLED(maphist); 255 256 UVMHIST_LOG(maphist, " (kva=0x%x, npages=%d)", kva, npages,0,0); 257 258 /* 259 * duplicate uvm_unmap, but add in pager_map_wanted handling. 260 */ 261 262 pmap_kremove(kva, size); 263 pmap_update(pmap_kernel()); 264 265 if ((kva & ~ptoa(uvmexp.colormask)) == emergva) { 266 mutex_enter(&pager_map_wanted_lock); 267 KASSERT(emerginuse); 268 emerginuse = false; 269 wakeup(&emergva); 270 mutex_exit(&pager_map_wanted_lock); 271 return; 272 } 273 274 vm_map_lock(pager_map); 275 uvm_unmap_remove(pager_map, kva, kva + size, &entries, 0); 276 mutex_enter(&pager_map_wanted_lock); 277 if (pager_map_wanted) { 278 pager_map_wanted = false; 279 wakeup(pager_map); 280 } 281 mutex_exit(&pager_map_wanted_lock); 282 vm_map_unlock(pager_map); 283 if (entries) 284 uvm_unmap_detach(entries, 0); 285 UVMHIST_LOG(maphist,"<- done",0,0,0,0); 286 } 287 288 /* 289 * interrupt-context iodone handler for single-buf i/os 290 * or the top-level buf of a nested-buf i/o. 291 */ 292 293 void 294 uvm_aio_biodone(struct buf *bp) 295 { 296 /* reset b_iodone for when this is a single-buf i/o. */ 297 bp->b_iodone = uvm_aio_aiodone; 298 299 workqueue_enqueue(uvm.aiodone_queue, &bp->b_work, NULL); 300 } 301 302 void 303 uvm_aio_aiodone_pages(struct vm_page **pgs, int npages, bool write, int error) 304 { 305 struct uvm_object *uobj; 306 struct vm_page *pg; 307 kmutex_t *slock; 308 int pageout_done; /* number of PG_PAGEOUT pages processed */ 309 int swslot; 310 int i; 311 bool swap; 312 UVMHIST_FUNC("uvm_aio_aiodone_pages"); UVMHIST_CALLED(ubchist); 313 314 swslot = 0; 315 pageout_done = 0; 316 slock = NULL; 317 uobj = NULL; 318 pg = pgs[0]; 319 swap = (pg->uanon != NULL && pg->uobject == NULL) || 320 (pg->pqflags & PQ_AOBJ) != 0; 321 if (!swap) { 322 uobj = pg->uobject; 323 slock = uobj->vmobjlock; 324 mutex_enter(slock); 325 mutex_enter(&uvm_pageqlock); 326 } else { 327 #if defined(VMSWAP) 328 if (error) { 329 if (pg->uobject != NULL) { 330 swslot = uao_find_swslot(pg->uobject, 331 pg->offset >> PAGE_SHIFT); 332 } else { 333 KASSERT(pg->uanon != NULL); 334 swslot = pg->uanon->an_swslot; 335 } 336 KASSERT(swslot); 337 } 338 #else /* defined(VMSWAP) */ 339 panic("%s: swap", __func__); 340 #endif /* defined(VMSWAP) */ 341 } 342 for (i = 0; i < npages; i++) { 343 #if defined(VMSWAP) 344 bool anon_disposed = false; /* XXX gcc */ 345 #endif /* defined(VMSWAP) */ 346 347 pg = pgs[i]; 348 KASSERT(swap || pg->uobject == uobj); 349 UVMHIST_LOG(ubchist, "pg %p", pg, 0,0,0); 350 351 #if defined(VMSWAP) 352 /* 353 * for swap i/os, lock each page's object (or anon) 354 * individually since each page may need a different lock. 355 */ 356 357 if (swap) { 358 if (pg->uobject != NULL) { 359 slock = pg->uobject->vmobjlock; 360 } else { 361 slock = pg->uanon->an_lock; 362 } 363 mutex_enter(slock); 364 mutex_enter(&uvm_pageqlock); 365 anon_disposed = (pg->flags & PG_RELEASED) != 0; 366 KASSERT(!anon_disposed || pg->uobject != NULL || 367 pg->uanon->an_ref == 0); 368 } 369 #endif /* defined(VMSWAP) */ 370 371 /* 372 * process errors. for reads, just mark the page to be freed. 373 * for writes, if the error was ENOMEM, we assume this was 374 * a transient failure so we mark the page dirty so that 375 * we'll try to write it again later. for all other write 376 * errors, we assume the error is permanent, thus the data 377 * in the page is lost. bummer. 378 */ 379 380 if (error) { 381 int slot; 382 if (!write) { 383 pg->flags |= PG_RELEASED; 384 continue; 385 } else if (error == ENOMEM) { 386 if (pg->flags & PG_PAGEOUT) { 387 pg->flags &= ~PG_PAGEOUT; 388 pageout_done++; 389 } 390 pg->flags &= ~PG_CLEAN; 391 uvm_pageactivate(pg); 392 slot = 0; 393 } else 394 slot = SWSLOT_BAD; 395 396 #if defined(VMSWAP) 397 if (swap) { 398 if (pg->uobject != NULL) { 399 int oldslot __diagused; 400 oldslot = uao_set_swslot(pg->uobject, 401 pg->offset >> PAGE_SHIFT, slot); 402 KASSERT(oldslot == swslot + i); 403 } else { 404 KASSERT(pg->uanon->an_swslot == 405 swslot + i); 406 pg->uanon->an_swslot = slot; 407 } 408 } 409 #endif /* defined(VMSWAP) */ 410 } 411 412 /* 413 * if the page is PG_FAKE, this must have been a read to 414 * initialize the page. clear PG_FAKE and activate the page. 415 * we must also clear the pmap "modified" flag since it may 416 * still be set from the page's previous identity. 417 */ 418 419 if (pg->flags & PG_FAKE) { 420 KASSERT(!write); 421 pg->flags &= ~PG_FAKE; 422 #if defined(READAHEAD_STATS) 423 pg->pqflags |= PQ_READAHEAD; 424 uvm_ra_total.ev_count++; 425 #endif /* defined(READAHEAD_STATS) */ 426 KASSERT((pg->flags & PG_CLEAN) != 0); 427 uvm_pageenqueue(pg); 428 pmap_clear_modify(pg); 429 } 430 431 /* 432 * do accounting for pagedaemon i/o and arrange to free 433 * the pages instead of just unbusying them. 434 */ 435 436 if (pg->flags & PG_PAGEOUT) { 437 pg->flags &= ~PG_PAGEOUT; 438 pageout_done++; 439 uvmexp.pdfreed++; 440 pg->flags |= PG_RELEASED; 441 } 442 443 #if defined(VMSWAP) 444 /* 445 * for swap pages, unlock everything for this page now. 446 */ 447 448 if (swap) { 449 if (pg->uobject == NULL && anon_disposed) { 450 mutex_exit(&uvm_pageqlock); 451 uvm_anon_release(pg->uanon); 452 } else { 453 uvm_page_unbusy(&pg, 1); 454 mutex_exit(&uvm_pageqlock); 455 mutex_exit(slock); 456 } 457 } 458 #endif /* defined(VMSWAP) */ 459 } 460 uvm_pageout_done(pageout_done); 461 if (!swap) { 462 uvm_page_unbusy(pgs, npages); 463 mutex_exit(&uvm_pageqlock); 464 mutex_exit(slock); 465 } else { 466 #if defined(VMSWAP) 467 KASSERT(write); 468 469 /* these pages are now only in swap. */ 470 mutex_enter(&uvm_swap_data_lock); 471 if (error != ENOMEM) { 472 KASSERT(uvmexp.swpgonly + npages <= uvmexp.swpginuse); 473 uvmexp.swpgonly += npages; 474 } 475 mutex_exit(&uvm_swap_data_lock); 476 if (error) { 477 if (error != ENOMEM) 478 uvm_swap_markbad(swslot, npages); 479 else 480 uvm_swap_free(swslot, npages); 481 } 482 uvmexp.pdpending--; 483 #endif /* defined(VMSWAP) */ 484 } 485 } 486 487 /* 488 * uvm_aio_aiodone: do iodone processing for async i/os. 489 * this should be called in thread context, not interrupt context. 490 */ 491 492 void 493 uvm_aio_aiodone(struct buf *bp) 494 { 495 int npages = bp->b_bufsize >> PAGE_SHIFT; 496 struct vm_page *pgs[npages]; 497 int i, error; 498 bool write; 499 UVMHIST_FUNC("uvm_aio_aiodone"); UVMHIST_CALLED(ubchist); 500 UVMHIST_LOG(ubchist, "bp %p", bp, 0,0,0); 501 502 error = bp->b_error; 503 write = (bp->b_flags & B_READ) == 0; 504 505 for (i = 0; i < npages; i++) { 506 pgs[i] = uvm_pageratop((vaddr_t)bp->b_data + (i << PAGE_SHIFT)); 507 UVMHIST_LOG(ubchist, "pgs[%d] = %p", i, pgs[i],0,0); 508 } 509 uvm_pagermapout((vaddr_t)bp->b_data, npages); 510 511 uvm_aio_aiodone_pages(pgs, npages, write, error); 512 513 if (write && (bp->b_cflags & BC_AGE) != 0) { 514 mutex_enter(bp->b_objlock); 515 vwakeup(bp); 516 mutex_exit(bp->b_objlock); 517 } 518 putiobuf(bp); 519 } 520 521 /* 522 * uvm_pageratop: convert KVAs in the pager map back to their page 523 * structures. 524 */ 525 526 struct vm_page * 527 uvm_pageratop(vaddr_t kva) 528 { 529 struct vm_page *pg; 530 paddr_t pa; 531 bool rv __diagused; 532 533 rv = pmap_extract(pmap_kernel(), kva, &pa); 534 KASSERT(rv); 535 pg = PHYS_TO_VM_PAGE(pa); 536 KASSERT(pg != NULL); 537 return (pg); 538 } 539