1 /* $OpenBSD: uvm_amap.c,v 1.53 2014/07/12 18:44:01 tedu Exp $ */ 2 /* $NetBSD: uvm_amap.c,v 1.27 2000/11/25 06:27:59 chs Exp $ */ 3 4 /* 5 * Copyright (c) 1997 Charles D. Cranor and Washington University. 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 /* 30 * uvm_amap.c: amap operations 31 * 32 * this file contains functions that perform operations on amaps. see 33 * uvm_amap.h for a brief explanation of the role of amaps in uvm. 34 */ 35 36 #include <sys/param.h> 37 #include <sys/systm.h> 38 #include <sys/proc.h> 39 #include <sys/malloc.h> 40 #include <sys/kernel.h> 41 #include <sys/pool.h> 42 43 #include <uvm/uvm.h> 44 #include <uvm/uvm_swap.h> 45 46 /* 47 * pool for allocation of vm_map structures. note that in order to 48 * avoid an endless loop, the amap pool's allocator cannot allocate 49 * memory from an amap (it currently goes through the kernel uobj, so 50 * we are ok). 51 */ 52 53 struct pool uvm_amap_pool; 54 55 LIST_HEAD(, vm_amap) amap_list; 56 57 #define MALLOC_SLOT_UNIT (2 * sizeof(int) + sizeof(struct vm_anon *)) 58 59 /* 60 * local functions 61 */ 62 63 static struct vm_amap *amap_alloc1(int, int, int); 64 static __inline void amap_list_insert(struct vm_amap *); 65 static __inline void amap_list_remove(struct vm_amap *); 66 67 static __inline void 68 amap_list_insert(struct vm_amap *amap) 69 { 70 LIST_INSERT_HEAD(&amap_list, amap, am_list); 71 } 72 73 static __inline void 74 amap_list_remove(struct vm_amap *amap) 75 { 76 LIST_REMOVE(amap, am_list); 77 } 78 79 #ifdef UVM_AMAP_PPREF 80 /* 81 * what is ppref? ppref is an _optional_ amap feature which is used 82 * to keep track of reference counts on a per-page basis. it is enabled 83 * when UVM_AMAP_PPREF is defined. 84 * 85 * when enabled, an array of ints is allocated for the pprefs. this 86 * array is allocated only when a partial reference is added to the 87 * map (either by unmapping part of the amap, or gaining a reference 88 * to only a part of an amap). if the malloc of the array fails 89 * (M_NOWAIT), then we set the array pointer to PPREF_NONE to indicate 90 * that we tried to do ppref's but couldn't alloc the array so just 91 * give up (after all, this is an optional feature!). 92 * 93 * the array is divided into page sized "chunks." for chunks of length 1, 94 * the chunk reference count plus one is stored in that chunk's slot. 95 * for chunks of length > 1 the first slot contains (the reference count 96 * plus one) * -1. [the negative value indicates that the length is 97 * greater than one.] the second slot of the chunk contains the length 98 * of the chunk. here is an example: 99 * 100 * actual REFS: 2 2 2 2 3 1 1 0 0 0 4 4 0 1 1 1 101 * ppref: -3 4 x x 4 -2 2 -1 3 x -5 2 1 -2 3 x 102 * <----------><-><----><-------><----><-><-------> 103 * (x = don't care) 104 * 105 * this allows us to allow one int to contain the ref count for the whole 106 * chunk. note that the "plus one" part is needed because a reference 107 * count of zero is neither positive or negative (need a way to tell 108 * if we've got one zero or a bunch of them). 109 * 110 * here are some in-line functions to help us. 111 */ 112 113 static __inline void pp_getreflen(int *, int, int *, int *); 114 static __inline void pp_setreflen(int *, int, int, int); 115 116 /* 117 * pp_getreflen: get the reference and length for a specific offset 118 */ 119 static __inline void 120 pp_getreflen(int *ppref, int offset, int *refp, int *lenp) 121 { 122 123 if (ppref[offset] > 0) { /* chunk size must be 1 */ 124 *refp = ppref[offset] - 1; /* don't forget to adjust */ 125 *lenp = 1; 126 } else { 127 *refp = (ppref[offset] * -1) - 1; 128 *lenp = ppref[offset+1]; 129 } 130 } 131 132 /* 133 * pp_setreflen: set the reference and length for a specific offset 134 */ 135 static __inline void 136 pp_setreflen(int *ppref, int offset, int ref, int len) 137 { 138 if (len == 1) { 139 ppref[offset] = ref + 1; 140 } else { 141 ppref[offset] = (ref + 1) * -1; 142 ppref[offset+1] = len; 143 } 144 } 145 #endif 146 147 /* 148 * amap_init: called at boot time to init global amap data structures 149 */ 150 151 void 152 amap_init(void) 153 { 154 /* Initialize the vm_amap pool. */ 155 pool_init(&uvm_amap_pool, sizeof(struct vm_amap), 0, 0, 0, 156 "amappl", &pool_allocator_nointr); 157 pool_sethiwat(&uvm_amap_pool, 4096); 158 } 159 160 /* 161 * amap_alloc1: internal function that allocates an amap, but does not 162 * init the overlay. 163 */ 164 static inline struct vm_amap * 165 amap_alloc1(int slots, int padslots, int waitf) 166 { 167 struct vm_amap *amap; 168 int totalslots; 169 170 amap = pool_get(&uvm_amap_pool, (waitf == M_WAITOK) ? PR_WAITOK 171 : PR_NOWAIT); 172 if (amap == NULL) 173 return(NULL); 174 175 totalslots = malloc_roundup((slots + padslots) * MALLOC_SLOT_UNIT) / 176 MALLOC_SLOT_UNIT; 177 amap->am_ref = 1; 178 amap->am_flags = 0; 179 #ifdef UVM_AMAP_PPREF 180 amap->am_ppref = NULL; 181 #endif 182 amap->am_maxslot = totalslots; 183 amap->am_nslot = slots; 184 amap->am_nused = 0; 185 186 amap->am_slots = malloc(totalslots * MALLOC_SLOT_UNIT, M_UVMAMAP, 187 waitf); 188 if (amap->am_slots == NULL) 189 goto fail1; 190 191 amap->am_bckptr = (int *)(((char *)amap->am_slots) + totalslots * 192 sizeof(int)); 193 amap->am_anon = (struct vm_anon **)(((char *)amap->am_bckptr) + 194 totalslots * sizeof(int)); 195 196 return(amap); 197 198 fail1: 199 pool_put(&uvm_amap_pool, amap); 200 return (NULL); 201 } 202 203 /* 204 * amap_alloc: allocate an amap to manage "sz" bytes of anonymous VM 205 * 206 * => caller should ensure sz is a multiple of PAGE_SIZE 207 * => reference count to new amap is set to one 208 */ 209 struct vm_amap * 210 amap_alloc(vaddr_t sz, vaddr_t padsz, int waitf) 211 { 212 struct vm_amap *amap; 213 int slots, padslots; 214 215 AMAP_B2SLOT(slots, sz); /* load slots */ 216 AMAP_B2SLOT(padslots, padsz); 217 218 amap = amap_alloc1(slots, padslots, waitf); 219 if (amap) { 220 memset(amap->am_anon, 0, 221 amap->am_maxslot * sizeof(struct vm_anon *)); 222 amap_list_insert(amap); 223 } 224 225 return(amap); 226 } 227 228 229 /* 230 * amap_free: free an amap 231 * 232 * => the amap should have a zero reference count and be empty 233 */ 234 void 235 amap_free(struct vm_amap *amap) 236 { 237 238 KASSERT(amap->am_ref == 0 && amap->am_nused == 0); 239 KASSERT((amap->am_flags & AMAP_SWAPOFF) == 0); 240 241 free(amap->am_slots, M_UVMAMAP, 0); 242 #ifdef UVM_AMAP_PPREF 243 if (amap->am_ppref && amap->am_ppref != PPREF_NONE) 244 free(amap->am_ppref, M_UVMAMAP, 0); 245 #endif 246 pool_put(&uvm_amap_pool, amap); 247 248 } 249 250 /* 251 * amap_extend: extend the size of an amap (if needed) 252 * 253 * => called from uvm_map when we want to extend an amap to cover 254 * a new mapping (rather than allocate a new one) 255 * => to safely extend an amap it should have a reference count of 256 * one (thus it can't be shared) 257 * => XXXCDC: support padding at this level? 258 */ 259 int 260 amap_extend(struct vm_map_entry *entry, vsize_t addsize) 261 { 262 struct vm_amap *amap = entry->aref.ar_amap; 263 int slotoff = entry->aref.ar_pageoff; 264 int slotmapped, slotadd, slotneed, slotalloc; 265 #ifdef UVM_AMAP_PPREF 266 int *newppref, *oldppref; 267 #endif 268 u_int *newsl, *newbck, *oldsl, *oldbck; 269 struct vm_anon **newover, **oldover; 270 int slotadded; 271 272 /* 273 * first, determine how many slots we need in the amap. don't 274 * forget that ar_pageoff could be non-zero: this means that 275 * there are some unused slots before us in the amap. 276 */ 277 AMAP_B2SLOT(slotmapped, entry->end - entry->start); /* slots mapped */ 278 AMAP_B2SLOT(slotadd, addsize); /* slots to add */ 279 slotneed = slotoff + slotmapped + slotadd; 280 281 /* 282 * case 1: we already have enough slots in the map and thus 283 * only need to bump the reference counts on the slots we are 284 * adding. 285 */ 286 if (amap->am_nslot >= slotneed) { 287 #ifdef UVM_AMAP_PPREF 288 if (amap->am_ppref && amap->am_ppref != PPREF_NONE) { 289 amap_pp_adjref(amap, slotoff + slotmapped, slotadd, 1); 290 } 291 #endif 292 return (0); 293 } 294 295 /* 296 * case 2: we pre-allocated slots for use and we just need to 297 * bump nslot up to take account for these slots. 298 */ 299 if (amap->am_maxslot >= slotneed) { 300 #ifdef UVM_AMAP_PPREF 301 if (amap->am_ppref && amap->am_ppref != PPREF_NONE) { 302 if ((slotoff + slotmapped) < amap->am_nslot) 303 amap_pp_adjref(amap, slotoff + slotmapped, 304 (amap->am_nslot - (slotoff + slotmapped)), 305 1); 306 pp_setreflen(amap->am_ppref, amap->am_nslot, 1, 307 slotneed - amap->am_nslot); 308 } 309 #endif 310 amap->am_nslot = slotneed; 311 /* 312 * no need to zero am_anon since that was done at 313 * alloc time and we never shrink an allocation. 314 */ 315 return (0); 316 } 317 318 /* 319 * case 3: we need to malloc a new amap and copy all the amap 320 * data over from old amap to the new one. 321 * 322 * XXXCDC: could we take advantage of a kernel realloc()? 323 */ 324 if (slotneed >= UVM_AMAP_LARGE) 325 return E2BIG; 326 327 slotalloc = malloc_roundup(slotneed * MALLOC_SLOT_UNIT) / 328 MALLOC_SLOT_UNIT; 329 #ifdef UVM_AMAP_PPREF 330 newppref = NULL; 331 if (amap->am_ppref && amap->am_ppref != PPREF_NONE) { 332 newppref = malloc(slotalloc *sizeof(int), M_UVMAMAP, 333 M_WAITOK | M_CANFAIL); 334 if (newppref == NULL) { 335 /* give up if malloc fails */ 336 free(amap->am_ppref, M_UVMAMAP, 0); 337 amap->am_ppref = PPREF_NONE; 338 } 339 } 340 #endif 341 newsl = malloc(slotalloc * MALLOC_SLOT_UNIT, M_UVMAMAP, 342 M_WAITOK | M_CANFAIL); 343 if (newsl == NULL) { 344 #ifdef UVM_AMAP_PPREF 345 if (newppref != NULL) { 346 free(newppref, M_UVMAMAP, 0); 347 } 348 #endif 349 return (ENOMEM); 350 } 351 newbck = (int *)(((char *)newsl) + slotalloc * sizeof(int)); 352 newover = (struct vm_anon **)(((char *)newbck) + slotalloc * 353 sizeof(int)); 354 KASSERT(amap->am_maxslot < slotneed); 355 356 /* now copy everything over to new malloc'd areas... */ 357 slotadded = slotalloc - amap->am_nslot; 358 359 /* do am_slots */ 360 oldsl = amap->am_slots; 361 memcpy(newsl, oldsl, sizeof(int) * amap->am_nused); 362 amap->am_slots = newsl; 363 364 /* do am_anon */ 365 oldover = amap->am_anon; 366 memcpy(newover, oldover, sizeof(struct vm_anon *) * amap->am_nslot); 367 memset(newover + amap->am_nslot, 0, sizeof(struct vm_anon *) * 368 slotadded); 369 amap->am_anon = newover; 370 371 /* do am_bckptr */ 372 oldbck = amap->am_bckptr; 373 memcpy(newbck, oldbck, sizeof(int) * amap->am_nslot); 374 memset(newbck + amap->am_nslot, 0, sizeof(int) * slotadded); /* XXX: needed? */ 375 amap->am_bckptr = newbck; 376 377 #ifdef UVM_AMAP_PPREF 378 /* do ppref */ 379 oldppref = amap->am_ppref; 380 if (newppref) { 381 memcpy(newppref, oldppref, sizeof(int) * amap->am_nslot); 382 memset(newppref + amap->am_nslot, 0, sizeof(int) * slotadded); 383 amap->am_ppref = newppref; 384 if ((slotoff + slotmapped) < amap->am_nslot) 385 amap_pp_adjref(amap, slotoff + slotmapped, 386 (amap->am_nslot - (slotoff + slotmapped)), 1); 387 pp_setreflen(newppref, amap->am_nslot, 1, 388 slotneed - amap->am_nslot); 389 } 390 #endif 391 392 /* update master values */ 393 amap->am_nslot = slotneed; 394 amap->am_maxslot = slotalloc; 395 396 /* and free */ 397 free(oldsl, M_UVMAMAP, 0); 398 #ifdef UVM_AMAP_PPREF 399 if (oldppref && oldppref != PPREF_NONE) 400 free(oldppref, M_UVMAMAP, 0); 401 #endif 402 return (0); 403 } 404 405 /* 406 * amap_share_protect: change protection of anons in a shared amap 407 * 408 * for shared amaps, given the current data structure layout, it is 409 * not possible for us to directly locate all maps referencing the 410 * shared anon (to change the protection). in order to protect data 411 * in shared maps we use pmap_page_protect(). [this is useful for IPC 412 * mechanisms like map entry passing that may want to write-protect 413 * all mappings of a shared amap.] we traverse am_anon or am_slots 414 * depending on the current state of the amap. 415 */ 416 void 417 amap_share_protect(struct vm_map_entry *entry, vm_prot_t prot) 418 { 419 struct vm_amap *amap = entry->aref.ar_amap; 420 int slots, lcv, slot, stop; 421 422 AMAP_B2SLOT(slots, (entry->end - entry->start)); 423 stop = entry->aref.ar_pageoff + slots; 424 425 if (slots < amap->am_nused) { 426 /* cheaper to traverse am_anon */ 427 for (lcv = entry->aref.ar_pageoff ; lcv < stop ; lcv++) { 428 if (amap->am_anon[lcv] == NULL) 429 continue; 430 if (amap->am_anon[lcv]->an_page != NULL) 431 pmap_page_protect(amap->am_anon[lcv]->an_page, 432 prot); 433 } 434 return; 435 } 436 437 /* cheaper to traverse am_slots */ 438 for (lcv = 0 ; lcv < amap->am_nused ; lcv++) { 439 slot = amap->am_slots[lcv]; 440 if (slot < entry->aref.ar_pageoff || slot >= stop) 441 continue; 442 if (amap->am_anon[slot]->an_page != NULL) 443 pmap_page_protect(amap->am_anon[slot]->an_page, prot); 444 } 445 return; 446 } 447 448 /* 449 * amap_wipeout: wipeout all anon's in an amap; then free the amap! 450 * 451 * => called from amap_unref when the final reference to an amap is 452 * discarded (i.e. when reference count == 1) 453 */ 454 455 void 456 amap_wipeout(struct vm_amap *amap) 457 { 458 int lcv, slot; 459 struct vm_anon *anon; 460 461 KASSERT(amap->am_ref == 0); 462 463 if (__predict_false((amap->am_flags & AMAP_SWAPOFF) != 0)) { 464 /* amap_swap_off will call us again. */ 465 return; 466 } 467 amap_list_remove(amap); 468 469 for (lcv = 0 ; lcv < amap->am_nused ; lcv++) { 470 int refs; 471 472 slot = amap->am_slots[lcv]; 473 anon = amap->am_anon[slot]; 474 475 if (anon == NULL || anon->an_ref == 0) 476 panic("amap_wipeout: corrupt amap"); 477 478 refs = --anon->an_ref; 479 if (refs == 0) { 480 /* we had the last reference to a vm_anon. free it. */ 481 uvm_anfree(anon); 482 } 483 } 484 485 /* now we free the map */ 486 amap->am_ref = 0; /* ... was one */ 487 amap->am_nused = 0; 488 amap_free(amap); /* will free amap */ 489 } 490 491 /* 492 * amap_copy: ensure that a map entry's "needs_copy" flag is false 493 * by copying the amap if necessary. 494 * 495 * => an entry with a null amap pointer will get a new (blank) one. 496 * => if canchunk is true, then we may clip the entry into a chunk 497 * => "startva" and "endva" are used only if canchunk is true. they are 498 * used to limit chunking (e.g. if you have a large space that you 499 * know you are going to need to allocate amaps for, there is no point 500 * in allowing that to be chunked) 501 */ 502 503 void 504 amap_copy(struct vm_map *map, struct vm_map_entry *entry, int waitf, 505 boolean_t canchunk, vaddr_t startva, vaddr_t endva) 506 { 507 struct vm_amap *amap, *srcamap; 508 int slots, lcv; 509 vaddr_t chunksize; 510 511 /* is there a map to copy? if not, create one from scratch. */ 512 if (entry->aref.ar_amap == NULL) { 513 /* 514 * check to see if we have a large amap that we can 515 * chunk. we align startva/endva to chunk-sized 516 * boundaries and then clip to them. 517 */ 518 if (canchunk && atop(entry->end - entry->start) >= 519 UVM_AMAP_LARGE) { 520 /* convert slots to bytes */ 521 chunksize = UVM_AMAP_CHUNK << PAGE_SHIFT; 522 startva = (startva / chunksize) * chunksize; 523 endva = roundup(endva, chunksize); 524 UVM_MAP_CLIP_START(map, entry, startva); 525 /* watch out for endva wrap-around! */ 526 if (endva >= startva) 527 UVM_MAP_CLIP_END(map, entry, endva); 528 } 529 530 entry->aref.ar_pageoff = 0; 531 entry->aref.ar_amap = amap_alloc(entry->end - entry->start, 0, 532 waitf); 533 if (entry->aref.ar_amap != NULL) 534 entry->etype &= ~UVM_ET_NEEDSCOPY; 535 return; 536 } 537 538 /* 539 * first check and see if we are the only map entry 540 * referencing the amap we currently have. if so, then we can 541 * just take it over rather than copying it. the value can only 542 * be one if we have the only reference to the amap 543 */ 544 if (entry->aref.ar_amap->am_ref == 1) { 545 entry->etype &= ~UVM_ET_NEEDSCOPY; 546 return; 547 } 548 549 /* looks like we need to copy the map. */ 550 AMAP_B2SLOT(slots, entry->end - entry->start); 551 amap = amap_alloc1(slots, 0, waitf); 552 if (amap == NULL) 553 return; 554 srcamap = entry->aref.ar_amap; 555 556 /* 557 * need to double check reference count now. the reference count 558 * could have changed while we were in malloc. if the reference count 559 * dropped down to one we take over the old map rather than 560 * copying the amap. 561 */ 562 if (srcamap->am_ref == 1) { /* take it over? */ 563 entry->etype &= ~UVM_ET_NEEDSCOPY; 564 amap->am_ref--; /* drop final reference to map */ 565 amap_free(amap); /* dispose of new (unused) amap */ 566 return; 567 } 568 569 /* we must copy it now. */ 570 for (lcv = 0 ; lcv < slots; lcv++) { 571 amap->am_anon[lcv] = 572 srcamap->am_anon[entry->aref.ar_pageoff + lcv]; 573 if (amap->am_anon[lcv] == NULL) 574 continue; 575 amap->am_anon[lcv]->an_ref++; 576 amap->am_bckptr[lcv] = amap->am_nused; 577 amap->am_slots[amap->am_nused] = lcv; 578 amap->am_nused++; 579 } 580 memset(&amap->am_anon[lcv], 0, 581 (amap->am_maxslot - lcv) * sizeof(struct vm_anon *)); 582 583 /* 584 * drop our reference to the old amap (srcamap). 585 * we know that the reference count on srcamap is greater than 586 * one (we checked above), so there is no way we could drop 587 * the count to zero. [and no need to worry about freeing it] 588 */ 589 srcamap->am_ref--; 590 if (srcamap->am_ref == 1 && (srcamap->am_flags & AMAP_SHARED) != 0) 591 srcamap->am_flags &= ~AMAP_SHARED; /* clear shared flag */ 592 #ifdef UVM_AMAP_PPREF 593 if (srcamap->am_ppref && srcamap->am_ppref != PPREF_NONE) { 594 amap_pp_adjref(srcamap, entry->aref.ar_pageoff, 595 (entry->end - entry->start) >> PAGE_SHIFT, -1); 596 } 597 #endif 598 599 /* install new amap. */ 600 entry->aref.ar_pageoff = 0; 601 entry->aref.ar_amap = amap; 602 entry->etype &= ~UVM_ET_NEEDSCOPY; 603 604 amap_list_insert(amap); 605 } 606 607 /* 608 * amap_cow_now: resolve all copy-on-write faults in an amap now for fork(2) 609 * 610 * called during fork(2) when the parent process has a wired map 611 * entry. in that case we want to avoid write-protecting pages 612 * in the parent's map (e.g. like what you'd do for a COW page) 613 * so we resolve the COW here. 614 * 615 * => assume parent's entry was wired, thus all pages are resident. 616 * => assume pages that are loaned out (loan_count) are already mapped 617 * read-only in all maps, and thus no need for us to worry about them 618 * => caller passes child's map/entry in to us 619 * => XXXCDC: out of memory should cause fork to fail, but there is 620 * currently no easy way to do this (needs fix) 621 */ 622 623 void 624 amap_cow_now(struct vm_map *map, struct vm_map_entry *entry) 625 { 626 struct vm_amap *amap = entry->aref.ar_amap; 627 int lcv, slot; 628 struct vm_anon *anon, *nanon; 629 struct vm_page *pg, *npg; 630 631 /* 632 * note that if we wait, we must ReStart the "lcv" for loop because 633 * some other process could reorder the anon's in the 634 * am_anon[] array on us. 635 */ 636 ReStart: 637 for (lcv = 0 ; lcv < amap->am_nused ; lcv++) { 638 /* get the page */ 639 slot = amap->am_slots[lcv]; 640 anon = amap->am_anon[slot]; 641 pg = anon->an_page; 642 643 /* page must be resident since parent is wired */ 644 if (pg == NULL) 645 panic("amap_cow_now: non-resident wired page" 646 " in anon %p", anon); 647 648 /* 649 * if the anon ref count is one and the page is not loaned, 650 * then we are safe (the child has exclusive access to the 651 * page). if the page is loaned, then it must already be 652 * mapped read-only. 653 * 654 * we only need to get involved when these are not true. 655 * [note: if loan_count == 0, then the anon must own the page] 656 */ 657 if (anon->an_ref > 1 && pg->loan_count == 0) { 658 /* 659 * if the page is busy then we have to wait for 660 * it and then restart. 661 */ 662 if (pg->pg_flags & PG_BUSY) { 663 atomic_setbits_int(&pg->pg_flags, PG_WANTED); 664 UVM_WAIT(pg, FALSE, "cownow", 0); 665 goto ReStart; 666 } 667 668 /* ok, time to do a copy-on-write to a new anon */ 669 nanon = uvm_analloc(); 670 if (nanon) { 671 npg = uvm_pagealloc(NULL, 0, nanon, 0); 672 } else 673 npg = NULL; /* XXX: quiet gcc warning */ 674 675 if (nanon == NULL || npg == NULL) { 676 /* out of memory */ 677 /* 678 * XXXCDC: we should cause fork to fail, but 679 * we can't ... 680 */ 681 if (nanon) { 682 uvm_anfree(nanon); 683 } 684 uvm_wait("cownowpage"); 685 goto ReStart; 686 } 687 688 /* 689 * got it... now we can copy the data and replace anon 690 * with our new one... 691 */ 692 uvm_pagecopy(pg, npg); /* old -> new */ 693 anon->an_ref--; /* can't drop to zero */ 694 amap->am_anon[slot] = nanon; /* replace */ 695 696 /* 697 * drop PG_BUSY on new page ... since we have had its 698 * owner locked the whole time it can't be 699 * PG_RELEASED | PG_WANTED. 700 */ 701 atomic_clearbits_int(&npg->pg_flags, PG_BUSY|PG_FAKE); 702 UVM_PAGE_OWN(npg, NULL); 703 uvm_lock_pageq(); 704 uvm_pageactivate(npg); 705 uvm_unlock_pageq(); 706 } 707 } 708 } 709 710 /* 711 * amap_splitref: split a single reference into two separate references 712 * 713 * => called from uvm_map's clip routines 714 */ 715 void 716 amap_splitref(struct vm_aref *origref, struct vm_aref *splitref, vaddr_t offset) 717 { 718 int leftslots; 719 720 AMAP_B2SLOT(leftslots, offset); 721 if (leftslots == 0) 722 panic("amap_splitref: split at zero offset"); 723 724 /* now: we have a valid am_mapped array. */ 725 if (origref->ar_amap->am_nslot - origref->ar_pageoff - leftslots <= 0) 726 panic("amap_splitref: map size check failed"); 727 728 #ifdef UVM_AMAP_PPREF 729 /* establish ppref before we add a duplicate reference to the amap */ 730 if (origref->ar_amap->am_ppref == NULL) 731 amap_pp_establish(origref->ar_amap); 732 #endif 733 734 splitref->ar_amap = origref->ar_amap; 735 splitref->ar_amap->am_ref++; /* not a share reference */ 736 splitref->ar_pageoff = origref->ar_pageoff + leftslots; 737 } 738 739 #ifdef UVM_AMAP_PPREF 740 741 /* 742 * amap_pp_establish: add a ppref array to an amap, if possible 743 */ 744 void 745 amap_pp_establish(struct vm_amap *amap) 746 { 747 748 amap->am_ppref = malloc(sizeof(int) * amap->am_maxslot, 749 M_UVMAMAP, M_NOWAIT|M_ZERO); 750 751 /* if we fail then we just won't use ppref for this amap */ 752 if (amap->am_ppref == NULL) { 753 amap->am_ppref = PPREF_NONE; /* not using it */ 754 return; 755 } 756 757 /* init ppref */ 758 pp_setreflen(amap->am_ppref, 0, amap->am_ref, amap->am_nslot); 759 } 760 761 /* 762 * amap_pp_adjref: adjust reference count to a part of an amap using the 763 * per-page reference count array. 764 * 765 * => caller must check that ppref != PPREF_NONE before calling 766 */ 767 void 768 amap_pp_adjref(struct vm_amap *amap, int curslot, vsize_t slotlen, int adjval) 769 { 770 int stopslot, *ppref, lcv, prevlcv; 771 int ref, len, prevref, prevlen; 772 773 stopslot = curslot + slotlen; 774 ppref = amap->am_ppref; 775 prevlcv = 0; 776 777 /* 778 * first advance to the correct place in the ppref array, 779 * fragment if needed. 780 */ 781 for (lcv = 0 ; lcv < curslot ; lcv += len) { 782 pp_getreflen(ppref, lcv, &ref, &len); 783 if (lcv + len > curslot) { /* goes past start? */ 784 pp_setreflen(ppref, lcv, ref, curslot - lcv); 785 pp_setreflen(ppref, curslot, ref, len - (curslot -lcv)); 786 len = curslot - lcv; /* new length of entry @ lcv */ 787 } 788 prevlcv = lcv; 789 } 790 if (lcv != 0) 791 pp_getreflen(ppref, prevlcv, &prevref, &prevlen); 792 else { 793 /* Ensure that the "prevref == ref" test below always 794 * fails, since we're starting from the beginning of 795 * the ppref array; that is, there is no previous 796 * chunk. 797 */ 798 prevref = -1; 799 prevlen = 0; 800 } 801 802 /* 803 * now adjust reference counts in range. merge the first 804 * changed entry with the last unchanged entry if possible. 805 */ 806 if (lcv != curslot) 807 panic("amap_pp_adjref: overshot target"); 808 809 for (/* lcv already set */; lcv < stopslot ; lcv += len) { 810 pp_getreflen(ppref, lcv, &ref, &len); 811 if (lcv + len > stopslot) { /* goes past end? */ 812 pp_setreflen(ppref, lcv, ref, stopslot - lcv); 813 pp_setreflen(ppref, stopslot, ref, 814 len - (stopslot - lcv)); 815 len = stopslot - lcv; 816 } 817 ref += adjval; 818 if (ref < 0) 819 panic("amap_pp_adjref: negative reference count"); 820 if (lcv == prevlcv + prevlen && ref == prevref) { 821 pp_setreflen(ppref, prevlcv, ref, prevlen + len); 822 } else { 823 pp_setreflen(ppref, lcv, ref, len); 824 } 825 if (ref == 0) 826 amap_wiperange(amap, lcv, len); 827 } 828 829 } 830 831 /* 832 * amap_wiperange: wipe out a range of an amap 833 * [different from amap_wipeout because the amap is kept intact] 834 */ 835 void 836 amap_wiperange(struct vm_amap *amap, int slotoff, int slots) 837 { 838 int byanon, lcv, stop, curslot, ptr, slotend; 839 struct vm_anon *anon; 840 841 /* 842 * we can either traverse the amap by am_anon or by am_slots depending 843 * on which is cheaper. decide now. 844 */ 845 if (slots < amap->am_nused) { 846 byanon = TRUE; 847 lcv = slotoff; 848 stop = slotoff + slots; 849 } else { 850 byanon = FALSE; 851 lcv = 0; 852 stop = amap->am_nused; 853 slotend = slotoff + slots; 854 } 855 856 while (lcv < stop) { 857 int refs; 858 859 if (byanon) { 860 curslot = lcv++; /* lcv advances here */ 861 if (amap->am_anon[curslot] == NULL) 862 continue; 863 } else { 864 curslot = amap->am_slots[lcv]; 865 if (curslot < slotoff || curslot >= slotend) { 866 lcv++; /* lcv advances here */ 867 continue; 868 } 869 stop--; /* drop stop, since anon will be removed */ 870 } 871 anon = amap->am_anon[curslot]; 872 873 /* remove it from the amap */ 874 amap->am_anon[curslot] = NULL; 875 ptr = amap->am_bckptr[curslot]; 876 if (ptr != (amap->am_nused - 1)) { 877 amap->am_slots[ptr] = 878 amap->am_slots[amap->am_nused - 1]; 879 amap->am_bckptr[amap->am_slots[ptr]] = 880 ptr; /* back ptr. */ 881 } 882 amap->am_nused--; 883 884 /* drop anon reference count */ 885 refs = --anon->an_ref; 886 if (refs == 0) { 887 /* 888 * we just eliminated the last reference to an anon. 889 * free it. 890 */ 891 uvm_anfree(anon); 892 } 893 } 894 } 895 896 #endif 897 898 /* 899 * amap_swap_off: pagein anonymous pages in amaps and drop swap slots. 900 * 901 * => note that we don't always traverse all anons. 902 * eg. amaps being wiped out, released anons. 903 * => return TRUE if failed. 904 */ 905 906 boolean_t 907 amap_swap_off(int startslot, int endslot) 908 { 909 struct vm_amap *am; 910 struct vm_amap *am_next; 911 struct vm_amap marker_prev; 912 struct vm_amap marker_next; 913 boolean_t rv = FALSE; 914 915 #if defined(DIAGNOSTIC) 916 memset(&marker_prev, 0, sizeof(marker_prev)); 917 memset(&marker_next, 0, sizeof(marker_next)); 918 #endif /* defined(DIAGNOSTIC) */ 919 920 for (am = LIST_FIRST(&amap_list); am != NULL && !rv; am = am_next) { 921 int i; 922 923 LIST_INSERT_BEFORE(am, &marker_prev, am_list); 924 LIST_INSERT_AFTER(am, &marker_next, am_list); 925 926 if (am->am_nused <= 0) { 927 goto next; 928 } 929 930 for (i = 0; i < am->am_nused; i++) { 931 int slot; 932 int swslot; 933 struct vm_anon *anon; 934 935 slot = am->am_slots[i]; 936 anon = am->am_anon[slot]; 937 938 swslot = anon->an_swslot; 939 if (swslot < startslot || endslot <= swslot) { 940 continue; 941 } 942 943 am->am_flags |= AMAP_SWAPOFF; 944 945 rv = uvm_anon_pagein(anon); 946 947 am->am_flags &= ~AMAP_SWAPOFF; 948 if (amap_refs(am) == 0) { 949 amap_wipeout(am); 950 am = NULL; 951 break; 952 } 953 if (rv) { 954 break; 955 } 956 i = 0; 957 } 958 959 next: 960 KASSERT(LIST_NEXT(&marker_prev, am_list) == &marker_next || 961 LIST_NEXT(LIST_NEXT(&marker_prev, am_list), am_list) == 962 &marker_next); 963 am_next = LIST_NEXT(&marker_next, am_list); 964 LIST_REMOVE(&marker_prev, am_list); 965 LIST_REMOVE(&marker_next, am_list); 966 } 967 968 return rv; 969 } 970 971 /* 972 * amap_lookup: look up a page in an amap 973 */ 974 struct vm_anon * 975 amap_lookup(struct vm_aref *aref, vaddr_t offset) 976 { 977 int slot; 978 struct vm_amap *amap = aref->ar_amap; 979 980 AMAP_B2SLOT(slot, offset); 981 slot += aref->ar_pageoff; 982 983 if (slot >= amap->am_nslot) 984 panic("amap_lookup: offset out of range"); 985 986 return(amap->am_anon[slot]); 987 } 988 989 /* 990 * amap_lookups: look up a range of pages in an amap 991 * 992 * => XXXCDC: this interface is biased toward array-based amaps. fix. 993 */ 994 void 995 amap_lookups(struct vm_aref *aref, vaddr_t offset, 996 struct vm_anon **anons, int npages) 997 { 998 int slot; 999 struct vm_amap *amap = aref->ar_amap; 1000 1001 AMAP_B2SLOT(slot, offset); 1002 slot += aref->ar_pageoff; 1003 1004 if ((slot + (npages - 1)) >= amap->am_nslot) 1005 panic("amap_lookups: offset out of range"); 1006 1007 memcpy(anons, &amap->am_anon[slot], npages * sizeof(struct vm_anon *)); 1008 1009 return; 1010 } 1011 1012 /* 1013 * amap_add: add (or replace) a page to an amap 1014 * 1015 * => returns an "offset" which is meaningful to amap_unadd(). 1016 */ 1017 void 1018 amap_add(struct vm_aref *aref, vaddr_t offset, struct vm_anon *anon, 1019 boolean_t replace) 1020 { 1021 int slot; 1022 struct vm_amap *amap = aref->ar_amap; 1023 1024 AMAP_B2SLOT(slot, offset); 1025 slot += aref->ar_pageoff; 1026 1027 if (slot >= amap->am_nslot) 1028 panic("amap_add: offset out of range"); 1029 1030 if (replace) { 1031 if (amap->am_anon[slot] == NULL) 1032 panic("amap_add: replacing null anon"); 1033 if (amap->am_anon[slot]->an_page != NULL && 1034 (amap->am_flags & AMAP_SHARED) != 0) { 1035 pmap_page_protect(amap->am_anon[slot]->an_page, 1036 VM_PROT_NONE); 1037 /* 1038 * XXX: suppose page is supposed to be wired somewhere? 1039 */ 1040 } 1041 } else { /* !replace */ 1042 if (amap->am_anon[slot] != NULL) 1043 panic("amap_add: slot in use"); 1044 1045 amap->am_bckptr[slot] = amap->am_nused; 1046 amap->am_slots[amap->am_nused] = slot; 1047 amap->am_nused++; 1048 } 1049 amap->am_anon[slot] = anon; 1050 } 1051 1052 /* 1053 * amap_unadd: remove a page from an amap 1054 */ 1055 void 1056 amap_unadd(struct vm_aref *aref, vaddr_t offset) 1057 { 1058 int ptr, slot; 1059 struct vm_amap *amap = aref->ar_amap; 1060 1061 AMAP_B2SLOT(slot, offset); 1062 slot += aref->ar_pageoff; 1063 1064 if (slot >= amap->am_nslot) 1065 panic("amap_unadd: offset out of range"); 1066 1067 if (amap->am_anon[slot] == NULL) 1068 panic("amap_unadd: nothing there"); 1069 1070 amap->am_anon[slot] = NULL; 1071 ptr = amap->am_bckptr[slot]; 1072 1073 if (ptr != (amap->am_nused - 1)) { /* swap to keep slots contig? */ 1074 amap->am_slots[ptr] = amap->am_slots[amap->am_nused - 1]; 1075 amap->am_bckptr[amap->am_slots[ptr]] = ptr; /* back link */ 1076 } 1077 amap->am_nused--; 1078 } 1079 1080 /* 1081 * amap_ref: gain a reference to an amap 1082 * 1083 * => "offset" and "len" are in units of pages 1084 * => called at fork time to gain the child's reference 1085 */ 1086 void 1087 amap_ref(struct vm_amap *amap, vaddr_t offset, vsize_t len, int flags) 1088 { 1089 1090 amap->am_ref++; 1091 if (flags & AMAP_SHARED) 1092 amap->am_flags |= AMAP_SHARED; 1093 #ifdef UVM_AMAP_PPREF 1094 if (amap->am_ppref == NULL && (flags & AMAP_REFALL) == 0 && 1095 len != amap->am_nslot) 1096 amap_pp_establish(amap); 1097 if (amap->am_ppref && amap->am_ppref != PPREF_NONE) { 1098 if (flags & AMAP_REFALL) 1099 amap_pp_adjref(amap, 0, amap->am_nslot, 1); 1100 else 1101 amap_pp_adjref(amap, offset, len, 1); 1102 } 1103 #endif 1104 } 1105 1106 /* 1107 * amap_unref: remove a reference to an amap 1108 * 1109 * => caller must remove all pmap-level references to this amap before 1110 * dropping the reference 1111 * => called from uvm_unmap_detach [only] ... note that entry is no 1112 * longer part of a map 1113 */ 1114 void 1115 amap_unref(struct vm_amap *amap, vaddr_t offset, vsize_t len, boolean_t all) 1116 { 1117 1118 /* if we are the last reference, free the amap and return. */ 1119 if (amap->am_ref-- == 1) { 1120 amap_wipeout(amap); /* drops final ref and frees */ 1121 return; 1122 } 1123 1124 /* otherwise just drop the reference count(s) */ 1125 if (amap->am_ref == 1 && (amap->am_flags & AMAP_SHARED) != 0) 1126 amap->am_flags &= ~AMAP_SHARED; /* clear shared flag */ 1127 #ifdef UVM_AMAP_PPREF 1128 if (amap->am_ppref == NULL && all == 0 && len != amap->am_nslot) 1129 amap_pp_establish(amap); 1130 if (amap->am_ppref && amap->am_ppref != PPREF_NONE) { 1131 if (all) 1132 amap_pp_adjref(amap, 0, amap->am_nslot, -1); 1133 else 1134 amap_pp_adjref(amap, offset, len, -1); 1135 } 1136 #endif 1137 } 1138