1 /* $OpenBSD: uvm_amap.c,v 1.77 2016/09/15 02:00:18 dlg Exp $ */ 2 /* $NetBSD: uvm_amap.c,v 1.27 2000/11/25 06:27:59 chs Exp $ */ 3 4 /* 5 * Copyright (c) 1997 Charles D. Cranor and Washington University. 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 /* 30 * uvm_amap.c: amap operations 31 * 32 * this file contains functions that perform operations on amaps. see 33 * uvm_amap.h for a brief explanation of the role of amaps in uvm. 34 */ 35 36 #include <sys/param.h> 37 #include <sys/systm.h> 38 #include <sys/malloc.h> 39 #include <sys/kernel.h> 40 #include <sys/pool.h> 41 #include <sys/atomic.h> 42 43 #include <uvm/uvm.h> 44 #include <uvm/uvm_swap.h> 45 46 /* 47 * pools for allocation of vm_amap structures. note that in order to 48 * avoid an endless loop, the amap pool's allocator cannot allocate 49 * memory from an amap (it currently goes through the kernel uobj, so 50 * we are ok). 51 */ 52 53 struct pool uvm_amap_pool; 54 struct pool uvm_small_amap_pool[UVM_AMAP_CHUNK]; 55 struct pool uvm_amap_chunk_pool; 56 57 LIST_HEAD(, vm_amap) amap_list; 58 59 static char amap_small_pool_names[UVM_AMAP_CHUNK][9]; 60 61 /* 62 * local functions 63 */ 64 65 static struct vm_amap *amap_alloc1(int, int, int); 66 static __inline void amap_list_insert(struct vm_amap *); 67 static __inline void amap_list_remove(struct vm_amap *); 68 69 struct vm_amap_chunk *amap_chunk_get(struct vm_amap *, int, int, int); 70 void amap_chunk_free(struct vm_amap *, struct vm_amap_chunk *); 71 void amap_wiperange_chunk(struct vm_amap *, struct vm_amap_chunk *, int, int); 72 73 static __inline void 74 amap_list_insert(struct vm_amap *amap) 75 { 76 LIST_INSERT_HEAD(&amap_list, amap, am_list); 77 } 78 79 static __inline void 80 amap_list_remove(struct vm_amap *amap) 81 { 82 LIST_REMOVE(amap, am_list); 83 } 84 85 /* 86 * amap_chunk_get: lookup a chunk for slot. if create is non-zero, 87 * the chunk is created if it does not yet exist. 88 * 89 * => returns the chunk on success or NULL on error 90 */ 91 struct vm_amap_chunk * 92 amap_chunk_get(struct vm_amap *amap, int slot, int create, int waitf) 93 { 94 int bucket = UVM_AMAP_BUCKET(amap, slot); 95 int baseslot = AMAP_BASE_SLOT(slot); 96 int n; 97 struct vm_amap_chunk *chunk, *newchunk, *pchunk = NULL; 98 99 if (UVM_AMAP_SMALL(amap)) 100 return &amap->am_small; 101 102 for (chunk = amap->am_buckets[bucket]; chunk != NULL; 103 chunk = TAILQ_NEXT(chunk, ac_list)) { 104 if (UVM_AMAP_BUCKET(amap, chunk->ac_baseslot) != bucket) 105 break; 106 if (chunk->ac_baseslot == baseslot) 107 return chunk; 108 pchunk = chunk; 109 } 110 if (!create) 111 return NULL; 112 113 if (amap->am_nslot - baseslot >= UVM_AMAP_CHUNK) 114 n = UVM_AMAP_CHUNK; 115 else 116 n = amap->am_nslot - baseslot; 117 118 newchunk = pool_get(&uvm_amap_chunk_pool, waitf | PR_ZERO); 119 if (newchunk == NULL) 120 return NULL; 121 122 if (pchunk == NULL) { 123 TAILQ_INSERT_TAIL(&amap->am_chunks, newchunk, ac_list); 124 KASSERT(amap->am_buckets[bucket] == NULL); 125 amap->am_buckets[bucket] = newchunk; 126 } else 127 TAILQ_INSERT_AFTER(&amap->am_chunks, pchunk, newchunk, 128 ac_list); 129 130 amap->am_ncused++; 131 newchunk->ac_baseslot = baseslot; 132 newchunk->ac_nslot = n; 133 return newchunk; 134 } 135 136 void 137 amap_chunk_free(struct vm_amap *amap, struct vm_amap_chunk *chunk) 138 { 139 int bucket = UVM_AMAP_BUCKET(amap, chunk->ac_baseslot); 140 struct vm_amap_chunk *nchunk; 141 142 if (UVM_AMAP_SMALL(amap)) 143 return; 144 145 nchunk = TAILQ_NEXT(chunk, ac_list); 146 TAILQ_REMOVE(&amap->am_chunks, chunk, ac_list); 147 if (amap->am_buckets[bucket] == chunk) { 148 if (nchunk != NULL && 149 UVM_AMAP_BUCKET(amap, nchunk->ac_baseslot) == bucket) 150 amap->am_buckets[bucket] = nchunk; 151 else 152 amap->am_buckets[bucket] = NULL; 153 154 } 155 pool_put(&uvm_amap_chunk_pool, chunk); 156 amap->am_ncused--; 157 } 158 159 #ifdef UVM_AMAP_PPREF 160 /* 161 * what is ppref? ppref is an _optional_ amap feature which is used 162 * to keep track of reference counts on a per-page basis. it is enabled 163 * when UVM_AMAP_PPREF is defined. 164 * 165 * when enabled, an array of ints is allocated for the pprefs. this 166 * array is allocated only when a partial reference is added to the 167 * map (either by unmapping part of the amap, or gaining a reference 168 * to only a part of an amap). if the malloc of the array fails 169 * (M_NOWAIT), then we set the array pointer to PPREF_NONE to indicate 170 * that we tried to do ppref's but couldn't alloc the array so just 171 * give up (after all, this is an optional feature!). 172 * 173 * the array is divided into page sized "chunks." for chunks of length 1, 174 * the chunk reference count plus one is stored in that chunk's slot. 175 * for chunks of length > 1 the first slot contains (the reference count 176 * plus one) * -1. [the negative value indicates that the length is 177 * greater than one.] the second slot of the chunk contains the length 178 * of the chunk. here is an example: 179 * 180 * actual REFS: 2 2 2 2 3 1 1 0 0 0 4 4 0 1 1 1 181 * ppref: -3 4 x x 4 -2 2 -1 3 x -5 2 1 -2 3 x 182 * <----------><-><----><-------><----><-><-------> 183 * (x = don't care) 184 * 185 * this allows us to allow one int to contain the ref count for the whole 186 * chunk. note that the "plus one" part is needed because a reference 187 * count of zero is neither positive or negative (need a way to tell 188 * if we've got one zero or a bunch of them). 189 * 190 * here are some in-line functions to help us. 191 */ 192 193 static __inline void pp_getreflen(int *, int, int *, int *); 194 static __inline void pp_setreflen(int *, int, int, int); 195 196 /* 197 * pp_getreflen: get the reference and length for a specific offset 198 */ 199 static __inline void 200 pp_getreflen(int *ppref, int offset, int *refp, int *lenp) 201 { 202 203 if (ppref[offset] > 0) { /* chunk size must be 1 */ 204 *refp = ppref[offset] - 1; /* don't forget to adjust */ 205 *lenp = 1; 206 } else { 207 *refp = (ppref[offset] * -1) - 1; 208 *lenp = ppref[offset+1]; 209 } 210 } 211 212 /* 213 * pp_setreflen: set the reference and length for a specific offset 214 */ 215 static __inline void 216 pp_setreflen(int *ppref, int offset, int ref, int len) 217 { 218 if (len == 1) { 219 ppref[offset] = ref + 1; 220 } else { 221 ppref[offset] = (ref + 1) * -1; 222 ppref[offset+1] = len; 223 } 224 } 225 #endif 226 227 /* 228 * amap_init: called at boot time to init global amap data structures 229 */ 230 231 void 232 amap_init(void) 233 { 234 int i; 235 size_t size; 236 237 /* Initialize the vm_amap pool. */ 238 pool_init(&uvm_amap_pool, sizeof(struct vm_amap), 239 0, IPL_NONE, PR_WAITOK, "amappl", NULL); 240 pool_sethiwat(&uvm_amap_pool, 4096); 241 242 /* initialize small amap pools */ 243 for (i = 0; i < nitems(uvm_small_amap_pool); i++) { 244 snprintf(amap_small_pool_names[i], 245 sizeof(amap_small_pool_names[0]), "amappl%d", i + 1); 246 size = offsetof(struct vm_amap, am_small.ac_anon) + 247 (i + 1) * sizeof(struct vm_anon *); 248 pool_init(&uvm_small_amap_pool[i], size, 0, 249 IPL_NONE, 0, amap_small_pool_names[i], NULL); 250 } 251 252 pool_init(&uvm_amap_chunk_pool, sizeof(struct vm_amap_chunk) + 253 UVM_AMAP_CHUNK * sizeof(struct vm_anon *), 254 0, IPL_NONE, 0, "amapchunkpl", NULL); 255 pool_sethiwat(&uvm_amap_chunk_pool, 4096); 256 } 257 258 /* 259 * amap_alloc1: internal function that allocates an amap, but does not 260 * init the overlay. 261 */ 262 static inline struct vm_amap * 263 amap_alloc1(int slots, int waitf, int lazyalloc) 264 { 265 struct vm_amap *amap; 266 struct vm_amap_chunk *chunk, *tmp; 267 int chunks, chunkperbucket = 1, hashshift = 0; 268 int buckets, i, n; 269 int pwaitf = (waitf & M_WAITOK) ? PR_WAITOK : PR_NOWAIT; 270 271 KASSERT(slots > 0); 272 273 /* 274 * Cast to unsigned so that rounding up cannot cause integer overflow 275 * if slots is large. 276 */ 277 chunks = roundup((unsigned int)slots, UVM_AMAP_CHUNK) / UVM_AMAP_CHUNK; 278 279 if (lazyalloc) { 280 /* 281 * Basically, the amap is a hash map where the number of 282 * buckets is fixed. We select the number of buckets using the 283 * following strategy: 284 * 285 * 1. The maximal number of entries to search in a bucket upon 286 * a collision should be less than or equal to 287 * log2(slots / UVM_AMAP_CHUNK). This is the worst-case number 288 * of lookups we would have if we could chunk the amap. The 289 * log2(n) comes from the fact that amaps are chunked by 290 * splitting up their vm_map_entries and organizing those 291 * in a binary search tree. 292 * 293 * 2. The maximal number of entries in a bucket must be a 294 * power of two. 295 * 296 * The maximal number of entries per bucket is used to hash 297 * a slot to a bucket. 298 * 299 * In the future, this strategy could be refined to make it 300 * even harder/impossible that the total amount of KVA needed 301 * for the hash buckets of all amaps to exceed the maximal 302 * amount of KVA memory reserved for amaps. 303 */ 304 chunkperbucket = 1 << hashshift; 305 while ((1 << chunkperbucket) * 2 <= chunks) { 306 hashshift++; 307 chunkperbucket = 1 << hashshift; 308 } 309 } 310 311 if (slots > UVM_AMAP_CHUNK) 312 amap = pool_get(&uvm_amap_pool, pwaitf); 313 else 314 amap = pool_get(&uvm_small_amap_pool[slots - 1], 315 pwaitf | PR_ZERO); 316 if (amap == NULL) 317 return(NULL); 318 319 amap->am_ref = 1; 320 amap->am_flags = 0; 321 #ifdef UVM_AMAP_PPREF 322 amap->am_ppref = NULL; 323 #endif 324 amap->am_nslot = slots; 325 amap->am_nused = 0; 326 327 if (UVM_AMAP_SMALL(amap)) { 328 amap->am_small.ac_nslot = slots; 329 return (amap); 330 } 331 332 amap->am_ncused = 0; 333 TAILQ_INIT(&amap->am_chunks); 334 amap->am_hashshift = hashshift; 335 amap->am_buckets = NULL; 336 337 buckets = howmany(chunks, chunkperbucket); 338 amap->am_buckets = mallocarray(buckets, sizeof(*amap->am_buckets), 339 M_UVMAMAP, waitf | (lazyalloc ? M_ZERO : 0)); 340 if (amap->am_buckets == NULL) 341 goto fail1; 342 343 if (!lazyalloc) { 344 for (i = 0; i < buckets; i++) { 345 if (i == buckets - 1) { 346 n = slots % UVM_AMAP_CHUNK; 347 if (n == 0) 348 n = UVM_AMAP_CHUNK; 349 } else 350 n = UVM_AMAP_CHUNK; 351 352 chunk = pool_get(&uvm_amap_chunk_pool, 353 PR_ZERO | pwaitf); 354 if (chunk == NULL) 355 goto fail1; 356 357 amap->am_buckets[i] = chunk; 358 amap->am_ncused++; 359 chunk->ac_baseslot = i * UVM_AMAP_CHUNK; 360 chunk->ac_nslot = n; 361 TAILQ_INSERT_TAIL(&amap->am_chunks, chunk, ac_list); 362 } 363 } 364 365 return(amap); 366 367 fail1: 368 free(amap->am_buckets, M_UVMAMAP, 0); 369 TAILQ_FOREACH_SAFE(chunk, &amap->am_chunks, ac_list, tmp) 370 pool_put(&uvm_amap_chunk_pool, chunk); 371 pool_put(&uvm_amap_pool, amap); 372 return (NULL); 373 } 374 375 /* 376 * amap_alloc: allocate an amap to manage "sz" bytes of anonymous VM 377 * 378 * => caller should ensure sz is a multiple of PAGE_SIZE 379 * => reference count to new amap is set to one 380 */ 381 struct vm_amap * 382 amap_alloc(vaddr_t sz, int waitf, int lazyalloc) 383 { 384 struct vm_amap *amap; 385 size_t slots; 386 387 AMAP_B2SLOT(slots, sz); /* load slots */ 388 if (slots > INT_MAX) 389 return (NULL); 390 391 amap = amap_alloc1(slots, waitf, lazyalloc); 392 if (amap) 393 amap_list_insert(amap); 394 395 return(amap); 396 } 397 398 399 /* 400 * amap_free: free an amap 401 * 402 * => the amap should have a zero reference count and be empty 403 */ 404 void 405 amap_free(struct vm_amap *amap) 406 { 407 struct vm_amap_chunk *chunk, *tmp; 408 409 KASSERT(amap->am_ref == 0 && amap->am_nused == 0); 410 KASSERT((amap->am_flags & AMAP_SWAPOFF) == 0); 411 412 #ifdef UVM_AMAP_PPREF 413 if (amap->am_ppref && amap->am_ppref != PPREF_NONE) 414 free(amap->am_ppref, M_UVMAMAP, 0); 415 #endif 416 417 if (UVM_AMAP_SMALL(amap)) 418 pool_put(&uvm_small_amap_pool[amap->am_nslot - 1], amap); 419 else { 420 TAILQ_FOREACH_SAFE(chunk, &amap->am_chunks, ac_list, tmp) 421 pool_put(&uvm_amap_chunk_pool, chunk); 422 free(amap->am_buckets, M_UVMAMAP, 0); 423 pool_put(&uvm_amap_pool, amap); 424 } 425 } 426 427 /* 428 * amap_wipeout: wipeout all anon's in an amap; then free the amap! 429 * 430 * => called from amap_unref when the final reference to an amap is 431 * discarded (i.e. when reference count == 1) 432 */ 433 434 void 435 amap_wipeout(struct vm_amap *amap) 436 { 437 int slot; 438 struct vm_anon *anon; 439 struct vm_amap_chunk *chunk; 440 441 KASSERT(amap->am_ref == 0); 442 443 if (__predict_false((amap->am_flags & AMAP_SWAPOFF) != 0)) { 444 /* amap_swap_off will call us again. */ 445 return; 446 } 447 amap_list_remove(amap); 448 449 AMAP_CHUNK_FOREACH(chunk, amap) { 450 int i, refs, map = chunk->ac_usedmap; 451 452 for (i = ffs(map); i != 0; i = ffs(map)) { 453 slot = i - 1; 454 map ^= 1 << slot; 455 anon = chunk->ac_anon[slot]; 456 457 if (anon == NULL || anon->an_ref == 0) 458 panic("amap_wipeout: corrupt amap"); 459 460 refs = --anon->an_ref; 461 if (refs == 0) { 462 /* 463 * we had the last reference to a vm_anon. 464 * free it. 465 */ 466 uvm_anfree(anon); 467 } 468 } 469 } 470 471 /* now we free the map */ 472 amap->am_ref = 0; /* ... was one */ 473 amap->am_nused = 0; 474 amap_free(amap); /* will free amap */ 475 } 476 477 /* 478 * amap_copy: ensure that a map entry's "needs_copy" flag is false 479 * by copying the amap if necessary. 480 * 481 * => an entry with a null amap pointer will get a new (blank) one. 482 * => if canchunk is true, then we may clip the entry into a chunk 483 * => "startva" and "endva" are used only if canchunk is true. they are 484 * used to limit chunking (e.g. if you have a large space that you 485 * know you are going to need to allocate amaps for, there is no point 486 * in allowing that to be chunked) 487 */ 488 489 void 490 amap_copy(struct vm_map *map, struct vm_map_entry *entry, int waitf, 491 boolean_t canchunk, vaddr_t startva, vaddr_t endva) 492 { 493 struct vm_amap *amap, *srcamap; 494 int slots, lcv, lazyalloc = 0; 495 vaddr_t chunksize; 496 int i, j, k, n, srcslot; 497 struct vm_amap_chunk *chunk = NULL, *srcchunk = NULL; 498 499 /* is there a map to copy? if not, create one from scratch. */ 500 if (entry->aref.ar_amap == NULL) { 501 /* 502 * check to see if we have a large amap that we can 503 * chunk. we align startva/endva to chunk-sized 504 * boundaries and then clip to them. 505 * 506 * if we cannot chunk the amap, allocate it in a way 507 * that makes it grow or shrink dynamically with 508 * the number of slots. 509 */ 510 if (atop(entry->end - entry->start) >= UVM_AMAP_LARGE) { 511 if (canchunk) { 512 /* convert slots to bytes */ 513 chunksize = UVM_AMAP_CHUNK << PAGE_SHIFT; 514 startva = (startva / chunksize) * chunksize; 515 endva = roundup(endva, chunksize); 516 UVM_MAP_CLIP_START(map, entry, startva); 517 /* watch out for endva wrap-around! */ 518 if (endva >= startva) 519 UVM_MAP_CLIP_END(map, entry, endva); 520 } else 521 lazyalloc = 1; 522 } 523 524 entry->aref.ar_pageoff = 0; 525 entry->aref.ar_amap = amap_alloc(entry->end - entry->start, 526 waitf, lazyalloc); 527 if (entry->aref.ar_amap != NULL) 528 entry->etype &= ~UVM_ET_NEEDSCOPY; 529 return; 530 } 531 532 /* 533 * first check and see if we are the only map entry 534 * referencing the amap we currently have. if so, then we can 535 * just take it over rather than copying it. the value can only 536 * be one if we have the only reference to the amap 537 */ 538 if (entry->aref.ar_amap->am_ref == 1) { 539 entry->etype &= ~UVM_ET_NEEDSCOPY; 540 return; 541 } 542 543 /* looks like we need to copy the map. */ 544 AMAP_B2SLOT(slots, entry->end - entry->start); 545 if (!UVM_AMAP_SMALL(entry->aref.ar_amap) && 546 entry->aref.ar_amap->am_hashshift != 0) 547 lazyalloc = 1; 548 amap = amap_alloc1(slots, waitf, lazyalloc); 549 if (amap == NULL) 550 return; 551 srcamap = entry->aref.ar_amap; 552 553 /* 554 * need to double check reference count now. the reference count 555 * could have changed while we were in malloc. if the reference count 556 * dropped down to one we take over the old map rather than 557 * copying the amap. 558 */ 559 if (srcamap->am_ref == 1) { /* take it over? */ 560 entry->etype &= ~UVM_ET_NEEDSCOPY; 561 amap->am_ref--; /* drop final reference to map */ 562 amap_free(amap); /* dispose of new (unused) amap */ 563 return; 564 } 565 566 /* we must copy it now. */ 567 for (lcv = 0; lcv < slots; lcv += n) { 568 srcslot = entry->aref.ar_pageoff + lcv; 569 i = UVM_AMAP_SLOTIDX(lcv); 570 j = UVM_AMAP_SLOTIDX(srcslot); 571 n = UVM_AMAP_CHUNK; 572 if (i > j) 573 n -= i; 574 else 575 n -= j; 576 if (lcv + n > slots) 577 n = slots - lcv; 578 579 srcchunk = amap_chunk_get(srcamap, srcslot, 0, PR_NOWAIT); 580 if (srcchunk == NULL) 581 continue; 582 583 chunk = amap_chunk_get(amap, lcv, 1, PR_NOWAIT); 584 if (chunk == NULL) { 585 amap->am_ref = 0; 586 amap_wipeout(amap); 587 return; 588 } 589 590 for (k = 0; k < n; i++, j++, k++) { 591 chunk->ac_anon[i] = srcchunk->ac_anon[j]; 592 if (chunk->ac_anon[i] == NULL) 593 continue; 594 595 chunk->ac_usedmap |= (1 << i); 596 chunk->ac_anon[i]->an_ref++; 597 amap->am_nused++; 598 } 599 } 600 601 /* 602 * drop our reference to the old amap (srcamap). 603 * we know that the reference count on srcamap is greater than 604 * one (we checked above), so there is no way we could drop 605 * the count to zero. [and no need to worry about freeing it] 606 */ 607 srcamap->am_ref--; 608 if (srcamap->am_ref == 1 && (srcamap->am_flags & AMAP_SHARED) != 0) 609 srcamap->am_flags &= ~AMAP_SHARED; /* clear shared flag */ 610 #ifdef UVM_AMAP_PPREF 611 if (srcamap->am_ppref && srcamap->am_ppref != PPREF_NONE) { 612 amap_pp_adjref(srcamap, entry->aref.ar_pageoff, 613 (entry->end - entry->start) >> PAGE_SHIFT, -1); 614 } 615 #endif 616 617 /* install new amap. */ 618 entry->aref.ar_pageoff = 0; 619 entry->aref.ar_amap = amap; 620 entry->etype &= ~UVM_ET_NEEDSCOPY; 621 622 amap_list_insert(amap); 623 } 624 625 /* 626 * amap_cow_now: resolve all copy-on-write faults in an amap now for fork(2) 627 * 628 * called during fork(2) when the parent process has a wired map 629 * entry. in that case we want to avoid write-protecting pages 630 * in the parent's map (e.g. like what you'd do for a COW page) 631 * so we resolve the COW here. 632 * 633 * => assume parent's entry was wired, thus all pages are resident. 634 * => caller passes child's map/entry in to us 635 * => XXXCDC: out of memory should cause fork to fail, but there is 636 * currently no easy way to do this (needs fix) 637 */ 638 639 void 640 amap_cow_now(struct vm_map *map, struct vm_map_entry *entry) 641 { 642 struct vm_amap *amap = entry->aref.ar_amap; 643 int slot; 644 struct vm_anon *anon, *nanon; 645 struct vm_page *pg, *npg; 646 struct vm_amap_chunk *chunk; 647 648 /* 649 * note that if we wait, we must ReStart the "lcv" for loop because 650 * some other process could reorder the anon's in the 651 * am_anon[] array on us. 652 */ 653 ReStart: 654 AMAP_CHUNK_FOREACH(chunk, amap) { 655 int i, map = chunk->ac_usedmap; 656 657 for (i = ffs(map); i != 0; i = ffs(map)) { 658 slot = i - 1; 659 map ^= 1 << slot; 660 anon = chunk->ac_anon[slot]; 661 pg = anon->an_page; 662 663 /* page must be resident since parent is wired */ 664 if (pg == NULL) 665 panic("amap_cow_now: non-resident wired page" 666 " in anon %p", anon); 667 668 /* 669 * if the anon ref count is one, we are safe (the child 670 * has exclusive access to the page). 671 */ 672 if (anon->an_ref <= 1) 673 continue; 674 675 /* 676 * if the page is busy then we have to wait for 677 * it and then restart. 678 */ 679 if (pg->pg_flags & PG_BUSY) { 680 atomic_setbits_int(&pg->pg_flags, PG_WANTED); 681 UVM_WAIT(pg, FALSE, "cownow", 0); 682 goto ReStart; 683 } 684 685 /* ok, time to do a copy-on-write to a new anon */ 686 nanon = uvm_analloc(); 687 if (nanon) { 688 npg = uvm_pagealloc(NULL, 0, nanon, 0); 689 } else 690 npg = NULL; /* XXX: quiet gcc warning */ 691 692 if (nanon == NULL || npg == NULL) { 693 /* out of memory */ 694 /* 695 * XXXCDC: we should cause fork to fail, but 696 * we can't ... 697 */ 698 if (nanon) { 699 uvm_anfree(nanon); 700 } 701 uvm_wait("cownowpage"); 702 goto ReStart; 703 } 704 705 /* 706 * got it... now we can copy the data and replace anon 707 * with our new one... 708 */ 709 uvm_pagecopy(pg, npg); /* old -> new */ 710 anon->an_ref--; /* can't drop to zero */ 711 chunk->ac_anon[slot] = nanon; /* replace */ 712 713 /* 714 * drop PG_BUSY on new page ... since we have had its 715 * owner locked the whole time it can't be 716 * PG_RELEASED | PG_WANTED. 717 */ 718 atomic_clearbits_int(&npg->pg_flags, PG_BUSY|PG_FAKE); 719 UVM_PAGE_OWN(npg, NULL); 720 uvm_lock_pageq(); 721 uvm_pageactivate(npg); 722 uvm_unlock_pageq(); 723 } 724 } 725 } 726 727 /* 728 * amap_splitref: split a single reference into two separate references 729 * 730 * => called from uvm_map's clip routines 731 */ 732 void 733 amap_splitref(struct vm_aref *origref, struct vm_aref *splitref, vaddr_t offset) 734 { 735 int leftslots; 736 737 AMAP_B2SLOT(leftslots, offset); 738 if (leftslots == 0) 739 panic("amap_splitref: split at zero offset"); 740 741 /* now: we have a valid am_mapped array. */ 742 if (origref->ar_amap->am_nslot - origref->ar_pageoff - leftslots <= 0) 743 panic("amap_splitref: map size check failed"); 744 745 #ifdef UVM_AMAP_PPREF 746 /* establish ppref before we add a duplicate reference to the amap */ 747 if (origref->ar_amap->am_ppref == NULL) 748 amap_pp_establish(origref->ar_amap); 749 #endif 750 751 splitref->ar_amap = origref->ar_amap; 752 splitref->ar_amap->am_ref++; /* not a share reference */ 753 splitref->ar_pageoff = origref->ar_pageoff + leftslots; 754 } 755 756 #ifdef UVM_AMAP_PPREF 757 758 /* 759 * amap_pp_establish: add a ppref array to an amap, if possible 760 */ 761 void 762 amap_pp_establish(struct vm_amap *amap) 763 { 764 765 amap->am_ppref = mallocarray(amap->am_nslot, sizeof(int), 766 M_UVMAMAP, M_NOWAIT|M_ZERO); 767 768 /* if we fail then we just won't use ppref for this amap */ 769 if (amap->am_ppref == NULL) { 770 amap->am_ppref = PPREF_NONE; /* not using it */ 771 return; 772 } 773 774 /* init ppref */ 775 pp_setreflen(amap->am_ppref, 0, amap->am_ref, amap->am_nslot); 776 } 777 778 /* 779 * amap_pp_adjref: adjust reference count to a part of an amap using the 780 * per-page reference count array. 781 * 782 * => caller must check that ppref != PPREF_NONE before calling 783 */ 784 void 785 amap_pp_adjref(struct vm_amap *amap, int curslot, vsize_t slotlen, int adjval) 786 { 787 int stopslot, *ppref, lcv, prevlcv; 788 int ref, len, prevref, prevlen; 789 790 stopslot = curslot + slotlen; 791 ppref = amap->am_ppref; 792 prevlcv = 0; 793 794 /* 795 * first advance to the correct place in the ppref array, 796 * fragment if needed. 797 */ 798 for (lcv = 0 ; lcv < curslot ; lcv += len) { 799 pp_getreflen(ppref, lcv, &ref, &len); 800 if (lcv + len > curslot) { /* goes past start? */ 801 pp_setreflen(ppref, lcv, ref, curslot - lcv); 802 pp_setreflen(ppref, curslot, ref, len - (curslot -lcv)); 803 len = curslot - lcv; /* new length of entry @ lcv */ 804 } 805 prevlcv = lcv; 806 } 807 if (lcv != 0) 808 pp_getreflen(ppref, prevlcv, &prevref, &prevlen); 809 else { 810 /* Ensure that the "prevref == ref" test below always 811 * fails, since we're starting from the beginning of 812 * the ppref array; that is, there is no previous 813 * chunk. 814 */ 815 prevref = -1; 816 prevlen = 0; 817 } 818 819 /* 820 * now adjust reference counts in range. merge the first 821 * changed entry with the last unchanged entry if possible. 822 */ 823 if (lcv != curslot) 824 panic("amap_pp_adjref: overshot target"); 825 826 for (/* lcv already set */; lcv < stopslot ; lcv += len) { 827 pp_getreflen(ppref, lcv, &ref, &len); 828 if (lcv + len > stopslot) { /* goes past end? */ 829 pp_setreflen(ppref, lcv, ref, stopslot - lcv); 830 pp_setreflen(ppref, stopslot, ref, 831 len - (stopslot - lcv)); 832 len = stopslot - lcv; 833 } 834 ref += adjval; 835 if (ref < 0) 836 panic("amap_pp_adjref: negative reference count"); 837 if (lcv == prevlcv + prevlen && ref == prevref) { 838 pp_setreflen(ppref, prevlcv, ref, prevlen + len); 839 } else { 840 pp_setreflen(ppref, lcv, ref, len); 841 } 842 if (ref == 0) 843 amap_wiperange(amap, lcv, len); 844 } 845 846 } 847 848 void 849 amap_wiperange_chunk(struct vm_amap *amap, struct vm_amap_chunk *chunk, 850 int slotoff, int slots) 851 { 852 int curslot, i, map; 853 int startbase, endbase; 854 struct vm_anon *anon; 855 856 startbase = AMAP_BASE_SLOT(slotoff); 857 endbase = AMAP_BASE_SLOT(slotoff + slots - 1); 858 859 map = chunk->ac_usedmap; 860 if (startbase == chunk->ac_baseslot) 861 map &= ~((1 << (slotoff - startbase)) - 1); 862 if (endbase == chunk->ac_baseslot) 863 map &= (1 << (slotoff + slots - endbase)) - 1; 864 865 for (i = ffs(map); i != 0; i = ffs(map)) { 866 int refs; 867 868 curslot = i - 1; 869 map ^= 1 << curslot; 870 chunk->ac_usedmap ^= 1 << curslot; 871 anon = chunk->ac_anon[curslot]; 872 873 /* remove it from the amap */ 874 chunk->ac_anon[curslot] = NULL; 875 876 amap->am_nused--; 877 878 /* drop anon reference count */ 879 refs = --anon->an_ref; 880 if (refs == 0) { 881 /* 882 * we just eliminated the last reference to an 883 * anon. free it. 884 */ 885 uvm_anfree(anon); 886 } 887 } 888 } 889 890 /* 891 * amap_wiperange: wipe out a range of an amap 892 * [different from amap_wipeout because the amap is kept intact] 893 */ 894 void 895 amap_wiperange(struct vm_amap *amap, int slotoff, int slots) 896 { 897 int bucket, startbucket, endbucket; 898 struct vm_amap_chunk *chunk, *nchunk; 899 900 startbucket = UVM_AMAP_BUCKET(amap, slotoff); 901 endbucket = UVM_AMAP_BUCKET(amap, slotoff + slots - 1); 902 903 /* 904 * we can either traverse the amap by am_chunks or by am_buckets 905 * depending on which is cheaper. decide now. 906 */ 907 if (UVM_AMAP_SMALL(amap)) 908 amap_wiperange_chunk(amap, &amap->am_small, slotoff, slots); 909 else if (endbucket + 1 - startbucket >= amap->am_ncused) { 910 TAILQ_FOREACH_SAFE(chunk, &amap->am_chunks, ac_list, nchunk) { 911 if (chunk->ac_baseslot + chunk->ac_nslot <= slotoff) 912 continue; 913 if (chunk->ac_baseslot >= slotoff + slots) 914 continue; 915 916 amap_wiperange_chunk(amap, chunk, slotoff, slots); 917 if (chunk->ac_usedmap == 0) 918 amap_chunk_free(amap, chunk); 919 } 920 } else { 921 for (bucket = startbucket; bucket <= endbucket; bucket++) { 922 for (chunk = amap->am_buckets[bucket]; chunk != NULL; 923 chunk = nchunk) { 924 nchunk = TAILQ_NEXT(chunk, ac_list); 925 926 if (UVM_AMAP_BUCKET(amap, chunk->ac_baseslot) != 927 bucket) 928 break; 929 if (chunk->ac_baseslot + chunk->ac_nslot <= 930 slotoff) 931 continue; 932 if (chunk->ac_baseslot >= slotoff + slots) 933 continue; 934 935 amap_wiperange_chunk(amap, chunk, slotoff, 936 slots); 937 if (chunk->ac_usedmap == 0) 938 amap_chunk_free(amap, chunk); 939 } 940 } 941 } 942 } 943 944 #endif 945 946 /* 947 * amap_swap_off: pagein anonymous pages in amaps and drop swap slots. 948 * 949 * => note that we don't always traverse all anons. 950 * eg. amaps being wiped out, released anons. 951 * => return TRUE if failed. 952 */ 953 954 boolean_t 955 amap_swap_off(int startslot, int endslot) 956 { 957 struct vm_amap *am; 958 struct vm_amap *am_next; 959 boolean_t rv = FALSE; 960 961 for (am = LIST_FIRST(&amap_list); am != NULL && !rv; am = am_next) { 962 int i, map; 963 struct vm_amap_chunk *chunk; 964 965 again: 966 AMAP_CHUNK_FOREACH(chunk, am) { 967 map = chunk->ac_usedmap; 968 969 for (i = ffs(map); i != 0; i = ffs(map)) { 970 int swslot; 971 int slot = i - 1; 972 struct vm_anon *anon; 973 974 map ^= 1 << slot; 975 anon = chunk->ac_anon[slot]; 976 977 swslot = anon->an_swslot; 978 if (swslot < startslot || endslot <= swslot) { 979 continue; 980 } 981 982 am->am_flags |= AMAP_SWAPOFF; 983 984 rv = uvm_anon_pagein(anon); 985 986 am->am_flags &= ~AMAP_SWAPOFF; 987 if (rv || amap_refs(am) == 0) 988 goto nextamap; 989 goto again; 990 } 991 } 992 993 nextamap: 994 am_next = LIST_NEXT(am, am_list); 995 if (amap_refs(am) == 0) 996 amap_wipeout(am); 997 } 998 999 return rv; 1000 } 1001 1002 /* 1003 * amap_lookup: look up a page in an amap 1004 */ 1005 struct vm_anon * 1006 amap_lookup(struct vm_aref *aref, vaddr_t offset) 1007 { 1008 int slot; 1009 struct vm_amap *amap = aref->ar_amap; 1010 struct vm_amap_chunk *chunk; 1011 1012 AMAP_B2SLOT(slot, offset); 1013 slot += aref->ar_pageoff; 1014 1015 if (slot >= amap->am_nslot) 1016 panic("amap_lookup: offset out of range"); 1017 1018 chunk = amap_chunk_get(amap, slot, 0, PR_NOWAIT); 1019 if (chunk == NULL) 1020 return NULL; 1021 1022 return chunk->ac_anon[UVM_AMAP_SLOTIDX(slot)]; 1023 } 1024 1025 /* 1026 * amap_lookups: look up a range of pages in an amap 1027 * 1028 * => XXXCDC: this interface is biased toward array-based amaps. fix. 1029 */ 1030 void 1031 amap_lookups(struct vm_aref *aref, vaddr_t offset, 1032 struct vm_anon **anons, int npages) 1033 { 1034 int i, lcv, n, slot; 1035 struct vm_amap *amap = aref->ar_amap; 1036 struct vm_amap_chunk *chunk = NULL; 1037 1038 AMAP_B2SLOT(slot, offset); 1039 slot += aref->ar_pageoff; 1040 1041 if ((slot + (npages - 1)) >= amap->am_nslot) 1042 panic("amap_lookups: offset out of range"); 1043 1044 for (i = 0, lcv = slot; lcv < slot + npages; i += n, lcv += n) { 1045 n = UVM_AMAP_CHUNK - UVM_AMAP_SLOTIDX(lcv); 1046 if (lcv + n > slot + npages) 1047 n = slot + npages - lcv; 1048 1049 chunk = amap_chunk_get(amap, lcv, 0, PR_NOWAIT); 1050 if (chunk == NULL) 1051 memset(&anons[i], 0, n * sizeof(*anons)); 1052 else 1053 memcpy(&anons[i], 1054 &chunk->ac_anon[UVM_AMAP_SLOTIDX(lcv)], 1055 n * sizeof(*anons)); 1056 } 1057 } 1058 1059 /* 1060 * amap_populate: ensure that the amap can store an anon for the page at 1061 * offset. This function can sleep until memory to store the anon is 1062 * available. 1063 */ 1064 void 1065 amap_populate(struct vm_aref *aref, vaddr_t offset) 1066 { 1067 int slot; 1068 struct vm_amap *amap = aref->ar_amap; 1069 struct vm_amap_chunk *chunk; 1070 1071 AMAP_B2SLOT(slot, offset); 1072 slot += aref->ar_pageoff; 1073 1074 if (slot >= amap->am_nslot) 1075 panic("amap_populate: offset out of range"); 1076 1077 chunk = amap_chunk_get(amap, slot, 1, PR_WAITOK); 1078 KASSERT(chunk != NULL); 1079 } 1080 1081 /* 1082 * amap_add: add (or replace) a page to an amap 1083 * 1084 * => returns 0 if adding the page was successful or 1 when not. 1085 */ 1086 int 1087 amap_add(struct vm_aref *aref, vaddr_t offset, struct vm_anon *anon, 1088 boolean_t replace) 1089 { 1090 int slot; 1091 struct vm_amap *amap = aref->ar_amap; 1092 struct vm_amap_chunk *chunk; 1093 1094 AMAP_B2SLOT(slot, offset); 1095 slot += aref->ar_pageoff; 1096 1097 if (slot >= amap->am_nslot) 1098 panic("amap_add: offset out of range"); 1099 chunk = amap_chunk_get(amap, slot, 1, PR_NOWAIT); 1100 if (chunk == NULL) 1101 return 1; 1102 1103 slot = UVM_AMAP_SLOTIDX(slot); 1104 if (replace) { 1105 if (chunk->ac_anon[slot] == NULL) 1106 panic("amap_add: replacing null anon"); 1107 if (chunk->ac_anon[slot]->an_page != NULL && 1108 (amap->am_flags & AMAP_SHARED) != 0) { 1109 pmap_page_protect(chunk->ac_anon[slot]->an_page, 1110 PROT_NONE); 1111 /* 1112 * XXX: suppose page is supposed to be wired somewhere? 1113 */ 1114 } 1115 } else { /* !replace */ 1116 if (chunk->ac_anon[slot] != NULL) 1117 panic("amap_add: slot in use"); 1118 1119 chunk->ac_usedmap |= 1 << slot; 1120 amap->am_nused++; 1121 } 1122 chunk->ac_anon[slot] = anon; 1123 1124 return 0; 1125 } 1126 1127 /* 1128 * amap_unadd: remove a page from an amap 1129 */ 1130 void 1131 amap_unadd(struct vm_aref *aref, vaddr_t offset) 1132 { 1133 int slot; 1134 struct vm_amap *amap = aref->ar_amap; 1135 struct vm_amap_chunk *chunk; 1136 1137 AMAP_B2SLOT(slot, offset); 1138 slot += aref->ar_pageoff; 1139 1140 if (slot >= amap->am_nslot) 1141 panic("amap_unadd: offset out of range"); 1142 chunk = amap_chunk_get(amap, slot, 0, PR_NOWAIT); 1143 if (chunk == NULL) 1144 panic("amap_unadd: chunk for slot %d not present", slot); 1145 1146 slot = UVM_AMAP_SLOTIDX(slot); 1147 if (chunk->ac_anon[slot] == NULL) 1148 panic("amap_unadd: nothing there"); 1149 1150 chunk->ac_anon[slot] = NULL; 1151 chunk->ac_usedmap &= ~(1 << slot); 1152 amap->am_nused--; 1153 1154 if (chunk->ac_usedmap == 0) 1155 amap_chunk_free(amap, chunk); 1156 } 1157 1158 /* 1159 * amap_ref: gain a reference to an amap 1160 * 1161 * => "offset" and "len" are in units of pages 1162 * => called at fork time to gain the child's reference 1163 */ 1164 void 1165 amap_ref(struct vm_amap *amap, vaddr_t offset, vsize_t len, int flags) 1166 { 1167 1168 amap->am_ref++; 1169 if (flags & AMAP_SHARED) 1170 amap->am_flags |= AMAP_SHARED; 1171 #ifdef UVM_AMAP_PPREF 1172 if (amap->am_ppref == NULL && (flags & AMAP_REFALL) == 0 && 1173 len != amap->am_nslot) 1174 amap_pp_establish(amap); 1175 if (amap->am_ppref && amap->am_ppref != PPREF_NONE) { 1176 if (flags & AMAP_REFALL) 1177 amap_pp_adjref(amap, 0, amap->am_nslot, 1); 1178 else 1179 amap_pp_adjref(amap, offset, len, 1); 1180 } 1181 #endif 1182 } 1183 1184 /* 1185 * amap_unref: remove a reference to an amap 1186 * 1187 * => caller must remove all pmap-level references to this amap before 1188 * dropping the reference 1189 * => called from uvm_unmap_detach [only] ... note that entry is no 1190 * longer part of a map 1191 */ 1192 void 1193 amap_unref(struct vm_amap *amap, vaddr_t offset, vsize_t len, boolean_t all) 1194 { 1195 1196 /* if we are the last reference, free the amap and return. */ 1197 if (amap->am_ref-- == 1) { 1198 amap_wipeout(amap); /* drops final ref and frees */ 1199 return; 1200 } 1201 1202 /* otherwise just drop the reference count(s) */ 1203 if (amap->am_ref == 1 && (amap->am_flags & AMAP_SHARED) != 0) 1204 amap->am_flags &= ~AMAP_SHARED; /* clear shared flag */ 1205 #ifdef UVM_AMAP_PPREF 1206 if (amap->am_ppref == NULL && all == 0 && len != amap->am_nslot) 1207 amap_pp_establish(amap); 1208 if (amap->am_ppref && amap->am_ppref != PPREF_NONE) { 1209 if (all) 1210 amap_pp_adjref(amap, 0, amap->am_nslot, -1); 1211 else 1212 amap_pp_adjref(amap, offset, len, -1); 1213 } 1214 #endif 1215 } 1216