1 /* $OpenBSD: uvm_pmemrange.c,v 1.40 2014/04/13 23:14:15 tedu Exp $ */ 2 3 /* 4 * Copyright (c) 2009, 2010 Ariane van der Steldt <ariane@stack.nl> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 19 #include <sys/param.h> 20 #include <sys/systm.h> 21 #include <uvm/uvm.h> 22 #include <sys/malloc.h> 23 #include <sys/proc.h> /* XXX for atomic */ 24 #include <sys/kernel.h> 25 #include <sys/mount.h> 26 27 /* 28 * 2 trees: addr tree and size tree. 29 * 30 * The allocator keeps chunks of free pages (called a range). 31 * Two pages are part of the same range if: 32 * - all pages in between are part of that range, 33 * - they are of the same memory type (zeroed or non-zeroed), 34 * - they are part of the same pmemrange. 35 * A pmemrange is a range of memory which is part of the same vm_physseg 36 * and has a use-count. 37 * 38 * addr tree is vm_page[0].objt 39 * size tree is vm_page[1].objt 40 * 41 * The size tree is not used for memory ranges of 1 page, instead, 42 * single queue is vm_page[0].pageq 43 * 44 * vm_page[0].fpgsz describes the length of a free range. Two adjecent ranges 45 * are joined, unless: 46 * - they have pages in between them which are not free 47 * - they belong to different memtypes (zeroed vs dirty memory) 48 * - they are in different pmemrange areas (ISA vs non-ISA memory for instance) 49 * - they are not a continuation of the same array 50 * The latter issue is caused by vm_physseg ordering and splitting from the 51 * MD initialization machinery. The MD code is dependant on freelists and 52 * happens to split ISA memory from non-ISA memory. 53 * (Note: freelists die die die!) 54 * 55 * uvm_page_init guarantees that every vm_physseg contains an array of 56 * struct vm_page. Also, uvm_page_physload allocates an array of struct 57 * vm_page. This code depends on that array. The array may break across 58 * vm_physsegs boundaries. 59 */ 60 61 /* 62 * Validate the flags of the page. (Used in asserts.) 63 * Any free page must have the PQ_FREE flag set. 64 * Free pages may be zeroed. 65 * Pmap flags are left untouched. 66 * 67 * The PQ_FREE flag is not checked here: by not checking, we can easily use 68 * this check in pages which are freed. 69 */ 70 #define VALID_FLAGS(pg_flags) \ 71 (((pg_flags) & ~(PQ_FREE|PG_ZERO|PG_PMAPMASK)) == 0x0) 72 73 /* Tree comparators. */ 74 int uvm_pmemrange_addr_cmp(struct uvm_pmemrange *, struct uvm_pmemrange *); 75 int uvm_pmemrange_use_cmp(struct uvm_pmemrange *, struct uvm_pmemrange *); 76 int uvm_pmr_addr_cmp(struct vm_page *, struct vm_page *); 77 int uvm_pmr_size_cmp(struct vm_page *, struct vm_page *); 78 int uvm_pmr_pg_to_memtype(struct vm_page *); 79 80 #ifdef DDB 81 void uvm_pmr_print(void); 82 #endif 83 84 /* 85 * Memory types. The page flags are used to derive what the current memory 86 * type of a page is. 87 */ 88 int 89 uvm_pmr_pg_to_memtype(struct vm_page *pg) 90 { 91 if (pg->pg_flags & PG_ZERO) 92 return UVM_PMR_MEMTYPE_ZERO; 93 /* Default: dirty memory. */ 94 return UVM_PMR_MEMTYPE_DIRTY; 95 } 96 97 /* Trees. */ 98 RB_GENERATE(uvm_pmr_addr, vm_page, objt, uvm_pmr_addr_cmp); 99 RB_GENERATE(uvm_pmr_size, vm_page, objt, uvm_pmr_size_cmp); 100 RB_GENERATE(uvm_pmemrange_addr, uvm_pmemrange, pmr_addr, 101 uvm_pmemrange_addr_cmp); 102 103 /* Validation. */ 104 #ifdef DEBUG 105 void uvm_pmr_assertvalid(struct uvm_pmemrange *pmr); 106 #else 107 #define uvm_pmr_assertvalid(pmr) do {} while (0) 108 #endif 109 110 int uvm_pmr_get1page(psize_t, int, struct pglist *, 111 paddr_t, paddr_t); 112 113 struct uvm_pmemrange *uvm_pmr_allocpmr(void); 114 struct vm_page *uvm_pmr_nfindsz(struct uvm_pmemrange *, psize_t, int); 115 struct vm_page *uvm_pmr_nextsz(struct uvm_pmemrange *, 116 struct vm_page *, int); 117 void uvm_pmr_pnaddr(struct uvm_pmemrange *pmr, 118 struct vm_page *pg, struct vm_page **pg_prev, 119 struct vm_page **pg_next); 120 struct vm_page *uvm_pmr_findnextsegment(struct uvm_pmemrange *, 121 struct vm_page *, paddr_t); 122 psize_t uvm_pmr_remove_1strange(struct pglist *, paddr_t, 123 struct vm_page **, int); 124 void uvm_pmr_split(paddr_t); 125 struct uvm_pmemrange *uvm_pmemrange_find(paddr_t); 126 struct uvm_pmemrange *uvm_pmemrange_use_insert(struct uvm_pmemrange_use *, 127 struct uvm_pmemrange *); 128 psize_t pow2divide(psize_t, psize_t); 129 struct vm_page *uvm_pmr_rootupdate(struct uvm_pmemrange *, 130 struct vm_page *, paddr_t, paddr_t, int); 131 132 /* 133 * Computes num/denom and rounds it up to the next power-of-2. 134 * 135 * This is a division function which calculates an approximation of 136 * num/denom, with result =~ num/denom. It is meant to be fast and doesn't 137 * have to be accurate. 138 * 139 * Providing too large a value makes the allocator slightly faster, at the 140 * risk of hitting the failure case more often. Providing too small a value 141 * makes the allocator a bit slower, but less likely to hit a failure case. 142 */ 143 psize_t 144 pow2divide(psize_t num, psize_t denom) 145 { 146 int rshift; 147 148 for (rshift = 0; num > denom; rshift++, denom <<= 1) 149 ; 150 return (paddr_t)1 << rshift; 151 } 152 153 /* 154 * Predicate: lhs is a subrange or rhs. 155 * 156 * If rhs_low == 0: don't care about lower bound. 157 * If rhs_high == 0: don't care about upper bound. 158 */ 159 #define PMR_IS_SUBRANGE_OF(lhs_low, lhs_high, rhs_low, rhs_high) \ 160 (((rhs_low) == 0 || (lhs_low) >= (rhs_low)) && \ 161 ((rhs_high) == 0 || (lhs_high) <= (rhs_high))) 162 163 /* 164 * Predicate: lhs intersects with rhs. 165 * 166 * If rhs_low == 0: don't care about lower bound. 167 * If rhs_high == 0: don't care about upper bound. 168 * Ranges don't intersect if they don't have any page in common, array 169 * semantics mean that < instead of <= should be used here. 170 */ 171 #define PMR_INTERSECTS_WITH(lhs_low, lhs_high, rhs_low, rhs_high) \ 172 (((rhs_low) == 0 || (rhs_low) < (lhs_high)) && \ 173 ((rhs_high) == 0 || (lhs_low) < (rhs_high))) 174 175 /* 176 * Align to power-of-2 alignment. 177 */ 178 #define PMR_ALIGN(pgno, align) \ 179 (((pgno) + ((align) - 1)) & ~((align) - 1)) 180 181 182 /* 183 * Comparator: sort by address ascending. 184 */ 185 int 186 uvm_pmemrange_addr_cmp(struct uvm_pmemrange *lhs, struct uvm_pmemrange *rhs) 187 { 188 return lhs->low < rhs->low ? -1 : lhs->low > rhs->low; 189 } 190 191 /* 192 * Comparator: sort by use ascending. 193 * 194 * The higher the use value of a range, the more devices need memory in 195 * this range. Therefor allocate from the range with the lowest use first. 196 */ 197 int 198 uvm_pmemrange_use_cmp(struct uvm_pmemrange *lhs, struct uvm_pmemrange *rhs) 199 { 200 int result; 201 202 result = lhs->use < rhs->use ? -1 : lhs->use > rhs->use; 203 if (result == 0) 204 result = uvm_pmemrange_addr_cmp(lhs, rhs); 205 return result; 206 } 207 208 int 209 uvm_pmr_addr_cmp(struct vm_page *lhs, struct vm_page *rhs) 210 { 211 paddr_t lhs_addr, rhs_addr; 212 213 lhs_addr = VM_PAGE_TO_PHYS(lhs); 214 rhs_addr = VM_PAGE_TO_PHYS(rhs); 215 216 return (lhs_addr < rhs_addr ? -1 : lhs_addr > rhs_addr); 217 } 218 219 int 220 uvm_pmr_size_cmp(struct vm_page *lhs, struct vm_page *rhs) 221 { 222 psize_t lhs_size, rhs_size; 223 int cmp; 224 225 /* Using second tree, so we receive pg[1] instead of pg[0]. */ 226 lhs_size = (lhs - 1)->fpgsz; 227 rhs_size = (rhs - 1)->fpgsz; 228 229 cmp = (lhs_size < rhs_size ? -1 : lhs_size > rhs_size); 230 if (cmp == 0) 231 cmp = uvm_pmr_addr_cmp(lhs - 1, rhs - 1); 232 return cmp; 233 } 234 235 /* 236 * Find the first range of free pages that is at least sz pages long. 237 */ 238 struct vm_page * 239 uvm_pmr_nfindsz(struct uvm_pmemrange *pmr, psize_t sz, int mti) 240 { 241 struct vm_page *node, *best; 242 243 KASSERT(sz >= 1); 244 245 if (sz == 1 && !TAILQ_EMPTY(&pmr->single[mti])) 246 return TAILQ_FIRST(&pmr->single[mti]); 247 248 node = RB_ROOT(&pmr->size[mti]); 249 best = NULL; 250 while (node != NULL) { 251 if ((node - 1)->fpgsz >= sz) { 252 best = (node - 1); 253 node = RB_LEFT(node, objt); 254 } else 255 node = RB_RIGHT(node, objt); 256 } 257 return best; 258 } 259 260 /* 261 * Finds the next range. The next range has a size >= pg->fpgsz. 262 * Returns NULL if no more ranges are available. 263 */ 264 struct vm_page * 265 uvm_pmr_nextsz(struct uvm_pmemrange *pmr, struct vm_page *pg, int mt) 266 { 267 struct vm_page *npg; 268 269 KASSERT(pmr != NULL && pg != NULL); 270 if (pg->fpgsz == 1) { 271 if (TAILQ_NEXT(pg, pageq) != NULL) 272 return TAILQ_NEXT(pg, pageq); 273 else 274 npg = RB_MIN(uvm_pmr_size, &pmr->size[mt]); 275 } else 276 npg = RB_NEXT(uvm_pmr_size, &pmr->size[mt], pg + 1); 277 278 return npg == NULL ? NULL : npg - 1; 279 } 280 281 /* 282 * Finds the previous and next ranges relative to the (uninserted) pg range. 283 * 284 * *pg_prev == NULL if no previous range is available, that can join with 285 * pg. 286 * *pg_next == NULL if no next range is available, that can join with 287 * pg. 288 */ 289 void 290 uvm_pmr_pnaddr(struct uvm_pmemrange *pmr, struct vm_page *pg, 291 struct vm_page **pg_prev, struct vm_page **pg_next) 292 { 293 KASSERT(pg_prev != NULL && pg_next != NULL); 294 295 *pg_next = RB_NFIND(uvm_pmr_addr, &pmr->addr, pg); 296 if (*pg_next == NULL) 297 *pg_prev = RB_MAX(uvm_pmr_addr, &pmr->addr); 298 else 299 *pg_prev = RB_PREV(uvm_pmr_addr, &pmr->addr, *pg_next); 300 301 KDASSERT(*pg_next == NULL || 302 VM_PAGE_TO_PHYS(*pg_next) > VM_PAGE_TO_PHYS(pg)); 303 KDASSERT(*pg_prev == NULL || 304 VM_PAGE_TO_PHYS(*pg_prev) < VM_PAGE_TO_PHYS(pg)); 305 306 /* Reset if not contig. */ 307 if (*pg_prev != NULL && 308 (atop(VM_PAGE_TO_PHYS(*pg_prev)) + (*pg_prev)->fpgsz 309 != atop(VM_PAGE_TO_PHYS(pg)) || 310 *pg_prev + (*pg_prev)->fpgsz != pg || /* Array broke. */ 311 uvm_pmr_pg_to_memtype(*pg_prev) != uvm_pmr_pg_to_memtype(pg))) 312 *pg_prev = NULL; 313 if (*pg_next != NULL && 314 (atop(VM_PAGE_TO_PHYS(pg)) + pg->fpgsz 315 != atop(VM_PAGE_TO_PHYS(*pg_next)) || 316 pg + pg->fpgsz != *pg_next || /* Array broke. */ 317 uvm_pmr_pg_to_memtype(*pg_next) != uvm_pmr_pg_to_memtype(pg))) 318 *pg_next = NULL; 319 return; 320 } 321 322 /* 323 * Remove a range from the address tree. 324 * Address tree maintains pmr counters. 325 */ 326 void 327 uvm_pmr_remove_addr(struct uvm_pmemrange *pmr, struct vm_page *pg) 328 { 329 KDASSERT(RB_FIND(uvm_pmr_addr, &pmr->addr, pg) == pg); 330 KDASSERT(pg->pg_flags & PQ_FREE); 331 RB_REMOVE(uvm_pmr_addr, &pmr->addr, pg); 332 333 pmr->nsegs--; 334 } 335 /* 336 * Remove a range from the size tree. 337 */ 338 void 339 uvm_pmr_remove_size(struct uvm_pmemrange *pmr, struct vm_page *pg) 340 { 341 int memtype; 342 #ifdef DEBUG 343 struct vm_page *i; 344 #endif 345 346 KDASSERT(pg->fpgsz >= 1); 347 KDASSERT(pg->pg_flags & PQ_FREE); 348 memtype = uvm_pmr_pg_to_memtype(pg); 349 350 if (pg->fpgsz == 1) { 351 #ifdef DEBUG 352 TAILQ_FOREACH(i, &pmr->single[memtype], pageq) { 353 if (i == pg) 354 break; 355 } 356 KDASSERT(i == pg); 357 #endif 358 TAILQ_REMOVE(&pmr->single[memtype], pg, pageq); 359 } else { 360 KDASSERT(RB_FIND(uvm_pmr_size, &pmr->size[memtype], 361 pg + 1) == pg + 1); 362 RB_REMOVE(uvm_pmr_size, &pmr->size[memtype], pg + 1); 363 } 364 } 365 /* Remove from both trees. */ 366 void 367 uvm_pmr_remove(struct uvm_pmemrange *pmr, struct vm_page *pg) 368 { 369 uvm_pmr_assertvalid(pmr); 370 uvm_pmr_remove_size(pmr, pg); 371 uvm_pmr_remove_addr(pmr, pg); 372 uvm_pmr_assertvalid(pmr); 373 } 374 375 /* 376 * Insert the range described in pg. 377 * Returns the range thus created (which may be joined with the previous and 378 * next ranges). 379 * If no_join, the caller guarantees that the range cannot possibly join 380 * with adjecent ranges. 381 */ 382 struct vm_page * 383 uvm_pmr_insert_addr(struct uvm_pmemrange *pmr, struct vm_page *pg, int no_join) 384 { 385 struct vm_page *prev, *next; 386 387 #ifdef DEBUG 388 struct vm_page *i; 389 int mt; 390 #endif 391 392 KDASSERT(pg->pg_flags & PQ_FREE); 393 KDASSERT(pg->fpgsz >= 1); 394 395 #ifdef DEBUG 396 for (mt = 0; mt < UVM_PMR_MEMTYPE_MAX; mt++) { 397 TAILQ_FOREACH(i, &pmr->single[mt], pageq) 398 KDASSERT(i != pg); 399 if (pg->fpgsz > 1) { 400 KDASSERT(RB_FIND(uvm_pmr_size, &pmr->size[mt], 401 pg + 1) == NULL); 402 } 403 KDASSERT(RB_FIND(uvm_pmr_addr, &pmr->addr, pg) == NULL); 404 } 405 #endif 406 407 if (!no_join) { 408 uvm_pmr_pnaddr(pmr, pg, &prev, &next); 409 if (next != NULL) { 410 uvm_pmr_remove_size(pmr, next); 411 uvm_pmr_remove_addr(pmr, next); 412 pg->fpgsz += next->fpgsz; 413 next->fpgsz = 0; 414 } 415 if (prev != NULL) { 416 uvm_pmr_remove_size(pmr, prev); 417 prev->fpgsz += pg->fpgsz; 418 pg->fpgsz = 0; 419 return prev; 420 } 421 } 422 423 RB_INSERT(uvm_pmr_addr, &pmr->addr, pg); 424 425 pmr->nsegs++; 426 427 return pg; 428 } 429 /* 430 * Insert the range described in pg. 431 * Returns the range thus created (which may be joined with the previous and 432 * next ranges). 433 * Page must already be in the address tree. 434 */ 435 void 436 uvm_pmr_insert_size(struct uvm_pmemrange *pmr, struct vm_page *pg) 437 { 438 int memtype; 439 #ifdef DEBUG 440 struct vm_page *i; 441 int mti; 442 #endif 443 444 KDASSERT(pg->fpgsz >= 1); 445 KDASSERT(pg->pg_flags & PQ_FREE); 446 447 memtype = uvm_pmr_pg_to_memtype(pg); 448 #ifdef DEBUG 449 for (mti = 0; mti < UVM_PMR_MEMTYPE_MAX; mti++) { 450 TAILQ_FOREACH(i, &pmr->single[mti], pageq) 451 KDASSERT(i != pg); 452 if (pg->fpgsz > 1) { 453 KDASSERT(RB_FIND(uvm_pmr_size, &pmr->size[mti], 454 pg + 1) == NULL); 455 } 456 KDASSERT(RB_FIND(uvm_pmr_addr, &pmr->addr, pg) == pg); 457 } 458 for (i = pg; i < pg + pg->fpgsz; i++) 459 KASSERT(uvm_pmr_pg_to_memtype(i) == memtype); 460 #endif 461 462 if (pg->fpgsz == 1) 463 TAILQ_INSERT_TAIL(&pmr->single[memtype], pg, pageq); 464 else 465 RB_INSERT(uvm_pmr_size, &pmr->size[memtype], pg + 1); 466 } 467 /* Insert in both trees. */ 468 struct vm_page * 469 uvm_pmr_insert(struct uvm_pmemrange *pmr, struct vm_page *pg, int no_join) 470 { 471 uvm_pmr_assertvalid(pmr); 472 pg = uvm_pmr_insert_addr(pmr, pg, no_join); 473 uvm_pmr_insert_size(pmr, pg); 474 uvm_pmr_assertvalid(pmr); 475 return pg; 476 } 477 478 /* 479 * Find the last page that is part of this segment. 480 * => pg: the range at which to start the search. 481 * => boundary: the page number boundary specification (0 = no boundary). 482 * => pmr: the pmemrange of the page. 483 * 484 * This function returns 1 before the next range, so if you want to have the 485 * next range, you need to run TAILQ_NEXT(result, pageq) after calling. 486 * The reason is that this way, the length of the segment is easily 487 * calculated using: atop(result) - atop(pg) + 1. 488 * Hence this function also never returns NULL. 489 */ 490 struct vm_page * 491 uvm_pmr_findnextsegment(struct uvm_pmemrange *pmr, 492 struct vm_page *pg, paddr_t boundary) 493 { 494 paddr_t first_boundary; 495 struct vm_page *next; 496 struct vm_page *prev; 497 498 KDASSERT(pmr->low <= atop(VM_PAGE_TO_PHYS(pg)) && 499 pmr->high > atop(VM_PAGE_TO_PHYS(pg))); 500 if (boundary != 0) { 501 first_boundary = 502 PMR_ALIGN(atop(VM_PAGE_TO_PHYS(pg)) + 1, boundary); 503 } else 504 first_boundary = 0; 505 506 /* 507 * Increase next until it hits the first page of the next segment. 508 * 509 * While loop checks the following: 510 * - next != NULL we have not reached the end of pgl 511 * - boundary == 0 || next < first_boundary 512 * we do not cross a boundary 513 * - atop(prev) + 1 == atop(next) 514 * still in the same segment 515 * - low <= last 516 * - high > last still in the same memory range 517 * - memtype is equal allocator is unable to view different memtypes 518 * as part of the same segment 519 * - prev + 1 == next no array breakage occurs 520 */ 521 prev = pg; 522 next = TAILQ_NEXT(prev, pageq); 523 while (next != NULL && 524 (boundary == 0 || atop(VM_PAGE_TO_PHYS(next)) < first_boundary) && 525 atop(VM_PAGE_TO_PHYS(prev)) + 1 == atop(VM_PAGE_TO_PHYS(next)) && 526 pmr->low <= atop(VM_PAGE_TO_PHYS(next)) && 527 pmr->high > atop(VM_PAGE_TO_PHYS(next)) && 528 uvm_pmr_pg_to_memtype(prev) == uvm_pmr_pg_to_memtype(next) && 529 prev + 1 == next) { 530 prev = next; 531 next = TAILQ_NEXT(prev, pageq); 532 } 533 534 /* 535 * End of this segment. 536 */ 537 return prev; 538 } 539 540 /* 541 * Remove the first segment of contiguous pages from pgl. 542 * A segment ends if it crosses boundary (unless boundary = 0) or 543 * if it would enter a different uvm_pmemrange. 544 * 545 * Work: the page range that the caller is currently working with. 546 * May be null. 547 * 548 * If is_desperate is non-zero, the smallest segment is erased. Otherwise, 549 * the first segment is erased (which, if called by uvm_pmr_getpages(), 550 * probably is the smallest or very close to it). 551 */ 552 psize_t 553 uvm_pmr_remove_1strange(struct pglist *pgl, paddr_t boundary, 554 struct vm_page **work, int is_desperate) 555 { 556 struct vm_page *start, *end, *iter, *iter_end, *inserted; 557 psize_t count; 558 struct uvm_pmemrange *pmr, *pmr_iter; 559 560 KASSERT(!TAILQ_EMPTY(pgl)); 561 562 /* 563 * Initialize to first page. 564 * Unless desperate scan finds a better candidate, this is what'll be 565 * erased. 566 */ 567 start = TAILQ_FIRST(pgl); 568 pmr = uvm_pmemrange_find(atop(VM_PAGE_TO_PHYS(start))); 569 end = uvm_pmr_findnextsegment(pmr, start, boundary); 570 571 /* 572 * If we are desperate, we _really_ want to get rid of the smallest 573 * element (rather than a close match to the smallest element). 574 */ 575 if (is_desperate) { 576 /* Linear search for smallest segment. */ 577 pmr_iter = pmr; 578 for (iter = TAILQ_NEXT(end, pageq); 579 iter != NULL && start != end; 580 iter = TAILQ_NEXT(iter_end, pageq)) { 581 /* 582 * Only update pmr if it doesn't match current 583 * iteration. 584 */ 585 if (pmr->low > atop(VM_PAGE_TO_PHYS(iter)) || 586 pmr->high <= atop(VM_PAGE_TO_PHYS(iter))) { 587 pmr_iter = uvm_pmemrange_find(atop( 588 VM_PAGE_TO_PHYS(iter))); 589 } 590 591 iter_end = uvm_pmr_findnextsegment(pmr_iter, iter, 592 boundary); 593 594 /* 595 * Current iteration is smaller than best match so 596 * far; update. 597 */ 598 if (VM_PAGE_TO_PHYS(iter_end) - VM_PAGE_TO_PHYS(iter) < 599 VM_PAGE_TO_PHYS(end) - VM_PAGE_TO_PHYS(start)) { 600 start = iter; 601 end = iter_end; 602 pmr = pmr_iter; 603 } 604 } 605 } 606 607 /* 608 * Calculate count and end of the list. 609 */ 610 count = atop(VM_PAGE_TO_PHYS(end) - VM_PAGE_TO_PHYS(start)) + 1; 611 end = TAILQ_NEXT(end, pageq); 612 613 /* 614 * Actually remove the range of pages. 615 * 616 * Sadly, this cannot be done using pointer iteration: 617 * vm_physseg is not guaranteed to be sorted on address, hence 618 * uvm_page_init() may not have initialized its array sorted by 619 * page number. 620 */ 621 for (iter = start; iter != end; iter = iter_end) { 622 iter_end = TAILQ_NEXT(iter, pageq); 623 TAILQ_REMOVE(pgl, iter, pageq); 624 } 625 626 start->fpgsz = count; 627 inserted = uvm_pmr_insert(pmr, start, 0); 628 629 /* 630 * If the caller was working on a range and this function modified 631 * that range, update the pointer. 632 */ 633 if (work != NULL && *work != NULL && 634 atop(VM_PAGE_TO_PHYS(inserted)) <= atop(VM_PAGE_TO_PHYS(*work)) && 635 atop(VM_PAGE_TO_PHYS(inserted)) + inserted->fpgsz > 636 atop(VM_PAGE_TO_PHYS(*work))) 637 *work = inserted; 638 return count; 639 } 640 641 /* 642 * Extract a number of pages from a segment of free pages. 643 * Called by uvm_pmr_getpages. 644 * 645 * Returns the segment that was created from pages left over at the tail 646 * of the remove set of pages, or NULL if no pages were left at the tail. 647 */ 648 struct vm_page * 649 uvm_pmr_extract_range(struct uvm_pmemrange *pmr, struct vm_page *pg, 650 paddr_t start, paddr_t end, struct pglist *result) 651 { 652 struct vm_page *after, *pg_i; 653 psize_t before_sz, after_sz; 654 #ifdef DEBUG 655 psize_t i; 656 #endif 657 658 KDASSERT(end > start); 659 KDASSERT(pmr->low <= atop(VM_PAGE_TO_PHYS(pg))); 660 KDASSERT(pmr->high >= atop(VM_PAGE_TO_PHYS(pg)) + pg->fpgsz); 661 KDASSERT(atop(VM_PAGE_TO_PHYS(pg)) <= start); 662 KDASSERT(atop(VM_PAGE_TO_PHYS(pg)) + pg->fpgsz >= end); 663 664 before_sz = start - atop(VM_PAGE_TO_PHYS(pg)); 665 after_sz = atop(VM_PAGE_TO_PHYS(pg)) + pg->fpgsz - end; 666 KDASSERT(before_sz + after_sz + (end - start) == pg->fpgsz); 667 uvm_pmr_assertvalid(pmr); 668 669 uvm_pmr_remove_size(pmr, pg); 670 if (before_sz == 0) 671 uvm_pmr_remove_addr(pmr, pg); 672 after = pg + before_sz + (end - start); 673 674 /* Add selected pages to result. */ 675 for (pg_i = pg + before_sz; pg_i != after; pg_i++) { 676 KASSERT(pg_i->pg_flags & PQ_FREE); 677 pg_i->fpgsz = 0; 678 TAILQ_INSERT_TAIL(result, pg_i, pageq); 679 } 680 681 /* Before handling. */ 682 if (before_sz > 0) { 683 pg->fpgsz = before_sz; 684 uvm_pmr_insert_size(pmr, pg); 685 } 686 687 /* After handling. */ 688 if (after_sz > 0) { 689 #ifdef DEBUG 690 for (i = 0; i < after_sz; i++) { 691 KASSERT(!uvm_pmr_isfree(after + i)); 692 } 693 #endif 694 KDASSERT(atop(VM_PAGE_TO_PHYS(after)) == end); 695 after->fpgsz = after_sz; 696 after = uvm_pmr_insert_addr(pmr, after, 1); 697 uvm_pmr_insert_size(pmr, after); 698 } 699 700 uvm_pmr_assertvalid(pmr); 701 return (after_sz > 0 ? after : NULL); 702 } 703 704 /* 705 * Acquire a number of pages. 706 * 707 * count: the number of pages returned 708 * start: lowest page number 709 * end: highest page number +1 710 * (start = end = 0: no limitation) 711 * align: power-of-2 alignment constraint (align = 1: no alignment) 712 * boundary: power-of-2 boundary (boundary = 0: no boundary) 713 * maxseg: maximum number of segments to return 714 * flags: UVM_PLA_* flags 715 * result: returned pages storage (uses pageq) 716 */ 717 int 718 uvm_pmr_getpages(psize_t count, paddr_t start, paddr_t end, paddr_t align, 719 paddr_t boundary, int maxseg, int flags, struct pglist *result) 720 { 721 struct uvm_pmemrange *pmr; /* Iterate memory ranges. */ 722 struct vm_page *found, *f_next; /* Iterate chunks. */ 723 psize_t fcount; /* Current found pages. */ 724 int fnsegs; /* Current segment counter. */ 725 int try, start_try; 726 psize_t search[3]; 727 paddr_t fstart, fend; /* Pages to be taken from found. */ 728 int memtype; /* Requested memtype. */ 729 int memtype_init; /* Best memtype. */ 730 int desperate; /* True if allocation failed. */ 731 #ifdef DIAGNOSTIC 732 struct vm_page *diag_prev; /* Used during validation. */ 733 #endif /* DIAGNOSTIC */ 734 735 /* 736 * Validate arguments. 737 */ 738 KASSERT(count > 0); 739 KASSERT(start == 0 || end == 0 || start < end); 740 KASSERT(align >= 1); 741 KASSERT(powerof2(align)); 742 KASSERT(maxseg > 0); 743 KASSERT(boundary == 0 || powerof2(boundary)); 744 KASSERT(boundary == 0 || maxseg * boundary >= count); 745 KASSERT(TAILQ_EMPTY(result)); 746 747 /* 748 * TRYCONTIG is a noop if you only want a single segment. 749 * Remove it if that's the case: otherwise it'll deny the fast 750 * allocation. 751 */ 752 if (maxseg == 1 || count == 1) 753 flags &= ~UVM_PLA_TRYCONTIG; 754 755 /* 756 * Configure search. 757 * 758 * search[0] is one segment, only used in UVM_PLA_TRYCONTIG case. 759 * search[1] is multiple segments, chosen to fulfill the search in 760 * approximately even-sized segments. 761 * This is a good trade-off between slightly reduced allocation speed 762 * and less fragmentation. 763 * search[2] is the worst case, in which all segments are evaluated. 764 * This provides the least fragmentation, but makes the search 765 * possibly longer (although in the case it is selected, that no 766 * longer matters most). 767 * 768 * The exception is when maxseg == 1: since we can only fulfill that 769 * with one segment of size pages, only a single search type has to 770 * be attempted. 771 */ 772 if (maxseg == 1 || count == 1) { 773 start_try = 2; 774 search[2] = count; 775 } else if (maxseg >= count && (flags & UVM_PLA_TRYCONTIG) == 0) { 776 start_try = 2; 777 search[2] = 1; 778 } else { 779 start_try = 0; 780 search[0] = count; 781 search[1] = pow2divide(count, maxseg); 782 search[2] = 1; 783 if ((flags & UVM_PLA_TRYCONTIG) == 0) 784 start_try = 1; 785 if (search[1] >= search[0]) { 786 search[1] = search[0]; 787 start_try = 1; 788 } 789 if (search[2] >= search[start_try]) { 790 start_try = 2; 791 } 792 } 793 794 /* 795 * Memory type: if zeroed memory is requested, traverse the zero set. 796 * Otherwise, traverse the dirty set. 797 * 798 * The memtype iterator is reinitialized to memtype_init on entrance 799 * of a pmemrange. 800 */ 801 if (flags & UVM_PLA_ZERO) 802 memtype_init = UVM_PMR_MEMTYPE_ZERO; 803 else 804 memtype_init = UVM_PMR_MEMTYPE_DIRTY; 805 806 /* 807 * Initially, we're not desperate. 808 * 809 * Note that if we return from a sleep, we are still desperate. 810 * Chances are that memory pressure is still high, so resetting 811 * seems over-optimistic to me. 812 */ 813 desperate = 0; 814 815 uvm_lock_fpageq(); 816 817 retry: /* Return point after sleeping. */ 818 fcount = 0; 819 fnsegs = 0; 820 821 retry_desperate: 822 /* 823 * If we just want any page(s), go for the really fast option. 824 */ 825 if (count <= maxseg && align == 1 && boundary == 0 && 826 (flags & UVM_PLA_TRYCONTIG) == 0) { 827 fcount += uvm_pmr_get1page(count - fcount, memtype_init, 828 result, start, end); 829 830 /* 831 * If we found sufficient pages, go to the succes exit code. 832 * 833 * Otherwise, go immediately to fail, since we collected 834 * all we could anyway. 835 */ 836 if (fcount == count) 837 goto out; 838 else 839 goto fail; 840 } 841 842 /* 843 * The heart of the contig case. 844 * 845 * The code actually looks like this: 846 * 847 * foreach (struct pmemrange) { 848 * foreach (memtype) { 849 * foreach(try) { 850 * foreach (free range of memtype in pmemrange, 851 * starting at search[try]) { 852 * while (range has space left) 853 * take from range 854 * } 855 * } 856 * } 857 * 858 * if next pmemrange has higher usecount than current: 859 * enter desperate case (which will drain the pmemranges 860 * until empty prior to moving to the next one) 861 * } 862 * 863 * When desperate is activated, try always starts at the highest 864 * value. The memtype loop is using a goto ReScanMemtype. 865 * The try loop is using a goto ReScan. 866 * The 'range has space left' loop uses label DrainFound. 867 * 868 * Writing them all as loops would take up a lot of screen space in 869 * the form of indentation and some parts are easier to express 870 * using the labels. 871 */ 872 873 TAILQ_FOREACH(pmr, &uvm.pmr_control.use, pmr_use) { 874 /* Empty range. */ 875 if (pmr->nsegs == 0) 876 continue; 877 878 /* Outside requested range. */ 879 if (!PMR_INTERSECTS_WITH(pmr->low, pmr->high, start, end)) 880 continue; 881 882 memtype = memtype_init; 883 884 rescan_memtype: /* Return point at memtype++. */ 885 try = start_try; 886 887 rescan: /* Return point at try++. */ 888 for (found = uvm_pmr_nfindsz(pmr, search[try], memtype); 889 found != NULL; 890 found = f_next) { 891 f_next = uvm_pmr_nextsz(pmr, found, memtype); 892 893 fstart = atop(VM_PAGE_TO_PHYS(found)); 894 if (start != 0) 895 fstart = MAX(start, fstart); 896 drain_found: 897 /* 898 * Throw away the first segment if fnsegs == maxseg 899 * 900 * Note that f_next is still valid after this call, 901 * since we only allocated from entries before f_next. 902 * We don't revisit the entries we already extracted 903 * from unless we entered the desperate case. 904 */ 905 if (fnsegs == maxseg) { 906 fnsegs--; 907 fcount -= 908 uvm_pmr_remove_1strange(result, boundary, 909 &found, desperate); 910 } 911 912 fstart = PMR_ALIGN(fstart, align); 913 fend = atop(VM_PAGE_TO_PHYS(found)) + found->fpgsz; 914 if (end != 0) 915 fend = MIN(end, fend); 916 if (boundary != 0) { 917 fend = 918 MIN(fend, PMR_ALIGN(fstart + 1, boundary)); 919 } 920 if (fstart >= fend) 921 continue; 922 if (fend - fstart > count - fcount) 923 fend = fstart + (count - fcount); 924 925 fcount += fend - fstart; 926 fnsegs++; 927 found = uvm_pmr_extract_range(pmr, found, 928 fstart, fend, result); 929 930 if (fcount == count) 931 goto out; 932 933 /* 934 * If there's still space left in found, try to 935 * fully drain it prior to continueing. 936 */ 937 if (found != NULL) { 938 fstart = fend; 939 goto drain_found; 940 } 941 } 942 943 /* Try a smaller search now. */ 944 if (++try < nitems(search)) 945 goto rescan; 946 947 /* 948 * Exhaust all memory types prior to going to the next memory 949 * segment. 950 * This means that zero-vs-dirty are eaten prior to moving 951 * to a pmemrange with a higher use-count. 952 * 953 * Code is basically a difficult way of writing: 954 * memtype = memtype_init; 955 * do { 956 * ...; 957 * memtype += 1; 958 * memtype %= MEMTYPE_MAX; 959 * } while (memtype != memtype_init); 960 */ 961 memtype += 1; 962 if (memtype == UVM_PMR_MEMTYPE_MAX) 963 memtype = 0; 964 if (memtype != memtype_init) 965 goto rescan_memtype; 966 967 /* 968 * If not desperate, enter desperate case prior to eating all 969 * the good stuff in the next range. 970 */ 971 if (!desperate && TAILQ_NEXT(pmr, pmr_use) != NULL && 972 TAILQ_NEXT(pmr, pmr_use)->use != pmr->use) 973 break; 974 } 975 976 /* 977 * Not enough memory of the requested type available. Fall back to 978 * less good memory that we'll clean up better later. 979 * 980 * This algorithm is not very smart though, it just starts scanning 981 * a different typed range, but the nicer ranges of the previous 982 * iteration may fall out. Hence there is a small chance of a false 983 * negative. 984 * 985 * When desparate: scan all sizes starting at the smallest 986 * (start_try = 1) and do not consider UVM_PLA_TRYCONTIG (which may 987 * allow us to hit the fast path now). 988 * 989 * Also, because we will revisit entries we scanned before, we need 990 * to reset the page queue, or we may end up releasing entries in 991 * such a way as to invalidate f_next. 992 */ 993 if (!desperate) { 994 desperate = 1; 995 start_try = nitems(search) - 1; 996 flags &= ~UVM_PLA_TRYCONTIG; 997 998 while (!TAILQ_EMPTY(result)) 999 uvm_pmr_remove_1strange(result, 0, NULL, 0); 1000 fnsegs = 0; 1001 fcount = 0; 1002 goto retry_desperate; 1003 } 1004 1005 fail: 1006 /* Allocation failed. */ 1007 /* XXX: claim from memory reserve here */ 1008 1009 while (!TAILQ_EMPTY(result)) 1010 uvm_pmr_remove_1strange(result, 0, NULL, 0); 1011 1012 if (flags & UVM_PLA_WAITOK) { 1013 if (uvm_wait_pla(ptoa(start), ptoa(end) - 1, ptoa(count), 1014 flags & UVM_PLA_FAILOK) == 0) 1015 goto retry; 1016 KASSERT(flags & UVM_PLA_FAILOK); 1017 } else 1018 wakeup(&uvm.pagedaemon); 1019 uvm_unlock_fpageq(); 1020 1021 return ENOMEM; 1022 1023 out: 1024 /* Allocation succesful. */ 1025 uvmexp.free -= fcount; 1026 1027 uvm_unlock_fpageq(); 1028 1029 /* Update statistics and zero pages if UVM_PLA_ZERO. */ 1030 #ifdef DIAGNOSTIC 1031 fnsegs = 0; 1032 fcount = 0; 1033 diag_prev = NULL; 1034 #endif /* DIAGNOSTIC */ 1035 TAILQ_FOREACH(found, result, pageq) { 1036 atomic_clearbits_int(&found->pg_flags, PG_PMAPMASK); 1037 1038 if (found->pg_flags & PG_ZERO) { 1039 uvmexp.zeropages--; 1040 } 1041 if (flags & UVM_PLA_ZERO) { 1042 if (found->pg_flags & PG_ZERO) 1043 uvmexp.pga_zerohit++; 1044 else { 1045 uvmexp.pga_zeromiss++; 1046 uvm_pagezero(found); 1047 } 1048 } 1049 atomic_clearbits_int(&found->pg_flags, PG_ZERO|PQ_FREE); 1050 1051 found->uobject = NULL; 1052 found->uanon = NULL; 1053 found->pg_version++; 1054 1055 /* 1056 * Validate that the page matches range criterium. 1057 */ 1058 KDASSERT(start == 0 || atop(VM_PAGE_TO_PHYS(found)) >= start); 1059 KDASSERT(end == 0 || atop(VM_PAGE_TO_PHYS(found)) < end); 1060 1061 #ifdef DIAGNOSTIC 1062 /* 1063 * Update fcount (# found pages) and 1064 * fnsegs (# found segments) counters. 1065 */ 1066 if (diag_prev == NULL || 1067 /* new segment if it contains a hole */ 1068 atop(VM_PAGE_TO_PHYS(diag_prev)) + 1 != 1069 atop(VM_PAGE_TO_PHYS(found)) || 1070 /* new segment if it crosses boundary */ 1071 (atop(VM_PAGE_TO_PHYS(diag_prev)) & ~(boundary - 1)) != 1072 (atop(VM_PAGE_TO_PHYS(found)) & ~(boundary - 1))) 1073 fnsegs++; 1074 fcount++; 1075 1076 diag_prev = found; 1077 #endif /* DIAGNOSTIC */ 1078 } 1079 1080 #ifdef DIAGNOSTIC 1081 /* 1082 * Panic on algorithm failure. 1083 */ 1084 if (fcount != count || fnsegs > maxseg) { 1085 panic("pmemrange allocation error: " 1086 "allocated %ld pages in %d segments, " 1087 "but request was %ld pages in %d segments", 1088 fcount, fnsegs, count, maxseg); 1089 } 1090 #endif /* DIAGNOSTIC */ 1091 1092 return 0; 1093 } 1094 1095 /* 1096 * Free a number of contig pages (invoked by uvm_page_init). 1097 */ 1098 void 1099 uvm_pmr_freepages(struct vm_page *pg, psize_t count) 1100 { 1101 struct uvm_pmemrange *pmr; 1102 psize_t i, pmr_count; 1103 struct vm_page *firstpg = pg; 1104 1105 for (i = 0; i < count; i++) { 1106 KASSERT(atop(VM_PAGE_TO_PHYS(&pg[i])) == 1107 atop(VM_PAGE_TO_PHYS(pg)) + i); 1108 1109 if (!((pg[i].pg_flags & PQ_FREE) == 0 && 1110 VALID_FLAGS(pg[i].pg_flags))) { 1111 printf("Flags: 0x%x, will panic now.\n", 1112 pg[i].pg_flags); 1113 } 1114 KASSERT((pg[i].pg_flags & PQ_FREE) == 0 && 1115 VALID_FLAGS(pg[i].pg_flags)); 1116 atomic_setbits_int(&pg[i].pg_flags, PQ_FREE); 1117 atomic_clearbits_int(&pg[i].pg_flags, PG_ZERO); 1118 } 1119 1120 uvm_lock_fpageq(); 1121 1122 for (i = count; i > 0; i -= pmr_count) { 1123 pmr = uvm_pmemrange_find(atop(VM_PAGE_TO_PHYS(pg))); 1124 KASSERT(pmr != NULL); 1125 1126 pmr_count = MIN(i, pmr->high - atop(VM_PAGE_TO_PHYS(pg))); 1127 pg->fpgsz = pmr_count; 1128 uvm_pmr_insert(pmr, pg, 0); 1129 1130 uvmexp.free += pmr_count; 1131 pg += pmr_count; 1132 } 1133 wakeup(&uvmexp.free); 1134 1135 uvm_wakeup_pla(VM_PAGE_TO_PHYS(firstpg), ptoa(count)); 1136 1137 uvm_unlock_fpageq(); 1138 } 1139 1140 /* 1141 * Free all pages in the queue. 1142 */ 1143 void 1144 uvm_pmr_freepageq(struct pglist *pgl) 1145 { 1146 struct vm_page *pg; 1147 paddr_t pstart; 1148 psize_t plen; 1149 1150 TAILQ_FOREACH(pg, pgl, pageq) { 1151 if (!((pg->pg_flags & PQ_FREE) == 0 && 1152 VALID_FLAGS(pg->pg_flags))) { 1153 printf("Flags: 0x%x, will panic now.\n", 1154 pg->pg_flags); 1155 } 1156 KASSERT((pg->pg_flags & PQ_FREE) == 0 && 1157 VALID_FLAGS(pg->pg_flags)); 1158 atomic_setbits_int(&pg->pg_flags, PQ_FREE); 1159 atomic_clearbits_int(&pg->pg_flags, PG_ZERO); 1160 } 1161 1162 uvm_lock_fpageq(); 1163 while (!TAILQ_EMPTY(pgl)) { 1164 pstart = VM_PAGE_TO_PHYS(TAILQ_FIRST(pgl)); 1165 plen = uvm_pmr_remove_1strange(pgl, 0, NULL, 0); 1166 uvmexp.free += plen; 1167 1168 uvm_wakeup_pla(pstart, ptoa(plen)); 1169 } 1170 wakeup(&uvmexp.free); 1171 uvm_unlock_fpageq(); 1172 1173 return; 1174 } 1175 1176 /* 1177 * Store a pmemrange in the list. 1178 * 1179 * The list is sorted by use. 1180 */ 1181 struct uvm_pmemrange * 1182 uvm_pmemrange_use_insert(struct uvm_pmemrange_use *useq, 1183 struct uvm_pmemrange *pmr) 1184 { 1185 struct uvm_pmemrange *iter; 1186 int cmp = 1; 1187 1188 TAILQ_FOREACH(iter, useq, pmr_use) { 1189 cmp = uvm_pmemrange_use_cmp(pmr, iter); 1190 if (cmp == 0) 1191 return iter; 1192 if (cmp == -1) 1193 break; 1194 } 1195 1196 if (iter == NULL) 1197 TAILQ_INSERT_TAIL(useq, pmr, pmr_use); 1198 else 1199 TAILQ_INSERT_BEFORE(iter, pmr, pmr_use); 1200 return NULL; 1201 } 1202 1203 #ifdef DEBUG 1204 /* 1205 * Validation of the whole pmemrange. 1206 * Called with fpageq locked. 1207 */ 1208 void 1209 uvm_pmr_assertvalid(struct uvm_pmemrange *pmr) 1210 { 1211 struct vm_page *prev, *next, *i, *xref; 1212 int lcv, mti; 1213 1214 /* Empty range */ 1215 if (pmr->nsegs == 0) 1216 return; 1217 1218 /* Validate address tree. */ 1219 RB_FOREACH(i, uvm_pmr_addr, &pmr->addr) { 1220 /* Validate the range. */ 1221 KASSERT(i->fpgsz > 0); 1222 KASSERT(atop(VM_PAGE_TO_PHYS(i)) >= pmr->low); 1223 KASSERT(atop(VM_PAGE_TO_PHYS(i)) + i->fpgsz 1224 <= pmr->high); 1225 1226 /* Validate each page in this range. */ 1227 for (lcv = 0; lcv < i->fpgsz; lcv++) { 1228 /* 1229 * Only the first page has a size specification. 1230 * Rest is size 0. 1231 */ 1232 KASSERT(lcv == 0 || i[lcv].fpgsz == 0); 1233 /* 1234 * Flag check. 1235 */ 1236 KASSERT(VALID_FLAGS(i[lcv].pg_flags) && 1237 (i[lcv].pg_flags & PQ_FREE) == PQ_FREE); 1238 /* 1239 * Free pages are: 1240 * - not wired 1241 * - not loaned 1242 * - have no vm_anon 1243 * - have no uvm_object 1244 */ 1245 KASSERT(i[lcv].wire_count == 0); 1246 KASSERT(i[lcv].loan_count == 0); 1247 KASSERT(i[lcv].uanon == (void*)0xdeadbeef || 1248 i[lcv].uanon == NULL); 1249 KASSERT(i[lcv].uobject == (void*)0xdeadbeef || 1250 i[lcv].uobject == NULL); 1251 /* 1252 * Pages in a single range always have the same 1253 * memtype. 1254 */ 1255 KASSERT(uvm_pmr_pg_to_memtype(&i[0]) == 1256 uvm_pmr_pg_to_memtype(&i[lcv])); 1257 } 1258 1259 /* Check that it shouldn't be joined with its predecessor. */ 1260 prev = RB_PREV(uvm_pmr_addr, &pmr->addr, i); 1261 if (prev != NULL) { 1262 KASSERT(uvm_pmr_pg_to_memtype(i) != 1263 uvm_pmr_pg_to_memtype(prev) || 1264 atop(VM_PAGE_TO_PHYS(i)) > 1265 atop(VM_PAGE_TO_PHYS(prev)) + prev->fpgsz || 1266 prev + prev->fpgsz != i); 1267 } 1268 1269 /* Assert i is in the size tree as well. */ 1270 if (i->fpgsz == 1) { 1271 TAILQ_FOREACH(xref, 1272 &pmr->single[uvm_pmr_pg_to_memtype(i)], pageq) { 1273 if (xref == i) 1274 break; 1275 } 1276 KASSERT(xref == i); 1277 } else { 1278 KASSERT(RB_FIND(uvm_pmr_size, 1279 &pmr->size[uvm_pmr_pg_to_memtype(i)], i + 1) == 1280 i + 1); 1281 } 1282 } 1283 1284 /* Validate size tree. */ 1285 for (mti = 0; mti < UVM_PMR_MEMTYPE_MAX; mti++) { 1286 for (i = uvm_pmr_nfindsz(pmr, 1, mti); i != NULL; i = next) { 1287 next = uvm_pmr_nextsz(pmr, i, mti); 1288 if (next != NULL) { 1289 KASSERT(i->fpgsz <= 1290 next->fpgsz); 1291 } 1292 1293 /* Assert i is in the addr tree as well. */ 1294 KASSERT(RB_FIND(uvm_pmr_addr, &pmr->addr, i) == i); 1295 1296 /* Assert i is of the correct memory type. */ 1297 KASSERT(uvm_pmr_pg_to_memtype(i) == mti); 1298 } 1299 } 1300 1301 /* Validate nsegs statistic. */ 1302 lcv = 0; 1303 RB_FOREACH(i, uvm_pmr_addr, &pmr->addr) 1304 lcv++; 1305 KASSERT(pmr->nsegs == lcv); 1306 } 1307 #endif /* DEBUG */ 1308 1309 /* 1310 * Split pmr at split point pageno. 1311 * Called with fpageq unlocked. 1312 * 1313 * Split is only applied if a pmemrange spans pageno. 1314 */ 1315 void 1316 uvm_pmr_split(paddr_t pageno) 1317 { 1318 struct uvm_pmemrange *pmr, *drain; 1319 struct vm_page *rebuild, *prev, *next; 1320 psize_t prev_sz; 1321 1322 uvm_lock_fpageq(); 1323 pmr = uvm_pmemrange_find(pageno); 1324 if (pmr == NULL || !(pmr->low < pageno)) { 1325 /* No split required. */ 1326 uvm_unlock_fpageq(); 1327 return; 1328 } 1329 1330 KASSERT(pmr->low < pageno); 1331 KASSERT(pmr->high > pageno); 1332 1333 /* 1334 * uvm_pmr_allocpmr() calls into malloc() which in turn calls into 1335 * uvm_kmemalloc which calls into pmemrange, making the locking 1336 * a bit hard, so we just race! 1337 */ 1338 uvm_unlock_fpageq(); 1339 drain = uvm_pmr_allocpmr(); 1340 uvm_lock_fpageq(); 1341 pmr = uvm_pmemrange_find(pageno); 1342 if (pmr == NULL || !(pmr->low < pageno)) { 1343 /* 1344 * We lost the race since someone else ran this or a related 1345 * function, however this should be triggered very rarely so 1346 * we just leak the pmr. 1347 */ 1348 printf("uvm_pmr_split: lost one pmr\n"); 1349 uvm_unlock_fpageq(); 1350 return; 1351 } 1352 1353 drain->low = pageno; 1354 drain->high = pmr->high; 1355 drain->use = pmr->use; 1356 1357 uvm_pmr_assertvalid(pmr); 1358 uvm_pmr_assertvalid(drain); 1359 KASSERT(drain->nsegs == 0); 1360 1361 RB_FOREACH(rebuild, uvm_pmr_addr, &pmr->addr) { 1362 if (atop(VM_PAGE_TO_PHYS(rebuild)) >= pageno) 1363 break; 1364 } 1365 if (rebuild == NULL) 1366 prev = RB_MAX(uvm_pmr_addr, &pmr->addr); 1367 else 1368 prev = RB_PREV(uvm_pmr_addr, &pmr->addr, rebuild); 1369 KASSERT(prev == NULL || atop(VM_PAGE_TO_PHYS(prev)) < pageno); 1370 1371 /* 1372 * Handle free chunk that spans the split point. 1373 */ 1374 if (prev != NULL && 1375 atop(VM_PAGE_TO_PHYS(prev)) + prev->fpgsz > pageno) { 1376 psize_t before, after; 1377 1378 KASSERT(atop(VM_PAGE_TO_PHYS(prev)) < pageno); 1379 1380 uvm_pmr_remove(pmr, prev); 1381 prev_sz = prev->fpgsz; 1382 before = pageno - atop(VM_PAGE_TO_PHYS(prev)); 1383 after = atop(VM_PAGE_TO_PHYS(prev)) + prev_sz - pageno; 1384 1385 KASSERT(before > 0); 1386 KASSERT(after > 0); 1387 1388 prev->fpgsz = before; 1389 uvm_pmr_insert(pmr, prev, 1); 1390 (prev + before)->fpgsz = after; 1391 uvm_pmr_insert(drain, prev + before, 1); 1392 } 1393 1394 /* Move free chunks that no longer fall in the range. */ 1395 for (; rebuild != NULL; rebuild = next) { 1396 next = RB_NEXT(uvm_pmr_addr, &pmr->addr, rebuild); 1397 1398 uvm_pmr_remove(pmr, rebuild); 1399 uvm_pmr_insert(drain, rebuild, 1); 1400 } 1401 1402 pmr->high = pageno; 1403 uvm_pmr_assertvalid(pmr); 1404 uvm_pmr_assertvalid(drain); 1405 1406 RB_INSERT(uvm_pmemrange_addr, &uvm.pmr_control.addr, drain); 1407 uvm_pmemrange_use_insert(&uvm.pmr_control.use, drain); 1408 uvm_unlock_fpageq(); 1409 } 1410 1411 /* 1412 * Increase the usage counter for the given range of memory. 1413 * 1414 * The more usage counters a given range of memory has, the more will be 1415 * attempted not to allocate from it. 1416 * 1417 * Addresses here are in paddr_t, not page-numbers. 1418 * The lowest and highest allowed address are specified. 1419 */ 1420 void 1421 uvm_pmr_use_inc(paddr_t low, paddr_t high) 1422 { 1423 struct uvm_pmemrange *pmr; 1424 paddr_t sz; 1425 1426 /* pmr uses page numbers, translate low and high. */ 1427 high++; 1428 high = atop(trunc_page(high)); 1429 low = atop(round_page(low)); 1430 uvm_pmr_split(low); 1431 uvm_pmr_split(high); 1432 1433 sz = 0; 1434 uvm_lock_fpageq(); 1435 /* Increase use count on segments in range. */ 1436 RB_FOREACH(pmr, uvm_pmemrange_addr, &uvm.pmr_control.addr) { 1437 if (PMR_IS_SUBRANGE_OF(pmr->low, pmr->high, low, high)) { 1438 TAILQ_REMOVE(&uvm.pmr_control.use, pmr, pmr_use); 1439 pmr->use++; 1440 sz += pmr->high - pmr->low; 1441 uvm_pmemrange_use_insert(&uvm.pmr_control.use, pmr); 1442 } 1443 uvm_pmr_assertvalid(pmr); 1444 } 1445 uvm_unlock_fpageq(); 1446 1447 KASSERT(sz >= high - low); 1448 } 1449 1450 /* 1451 * Allocate a pmemrange. 1452 * 1453 * If called from uvm_page_init, the uvm_pageboot_alloc is used. 1454 * If called after uvm_init, malloc is used. 1455 * (And if called in between, you're dead.) 1456 */ 1457 struct uvm_pmemrange * 1458 uvm_pmr_allocpmr(void) 1459 { 1460 struct uvm_pmemrange *nw; 1461 int i; 1462 1463 /* We're only ever hitting the !uvm.page_init_done case for now. */ 1464 if (!uvm.page_init_done) { 1465 nw = (struct uvm_pmemrange *) 1466 uvm_pageboot_alloc(sizeof(struct uvm_pmemrange)); 1467 } else { 1468 nw = malloc(sizeof(struct uvm_pmemrange), 1469 M_VMMAP, M_NOWAIT); 1470 } 1471 KASSERT(nw != NULL); 1472 bzero(nw, sizeof(struct uvm_pmemrange)); 1473 RB_INIT(&nw->addr); 1474 for (i = 0; i < UVM_PMR_MEMTYPE_MAX; i++) { 1475 RB_INIT(&nw->size[i]); 1476 TAILQ_INIT(&nw->single[i]); 1477 } 1478 return nw; 1479 } 1480 1481 /* 1482 * Initialization of pmr. 1483 * Called by uvm_page_init. 1484 * 1485 * Sets up pmemranges. 1486 */ 1487 void 1488 uvm_pmr_init(void) 1489 { 1490 struct uvm_pmemrange *new_pmr; 1491 int i; 1492 1493 TAILQ_INIT(&uvm.pmr_control.use); 1494 RB_INIT(&uvm.pmr_control.addr); 1495 TAILQ_INIT(&uvm.pmr_control.allocs); 1496 1497 /* By default, one range for the entire address space. */ 1498 new_pmr = uvm_pmr_allocpmr(); 1499 new_pmr->low = 0; 1500 new_pmr->high = atop((paddr_t)-1) + 1; 1501 1502 RB_INSERT(uvm_pmemrange_addr, &uvm.pmr_control.addr, new_pmr); 1503 uvm_pmemrange_use_insert(&uvm.pmr_control.use, new_pmr); 1504 1505 for (i = 0; uvm_md_constraints[i] != NULL; i++) { 1506 uvm_pmr_use_inc(uvm_md_constraints[i]->ucr_low, 1507 uvm_md_constraints[i]->ucr_high); 1508 } 1509 } 1510 1511 /* 1512 * Find the pmemrange that contains the given page number. 1513 * 1514 * (Manually traverses the binary tree, because that is cheaper on stack 1515 * usage.) 1516 */ 1517 struct uvm_pmemrange * 1518 uvm_pmemrange_find(paddr_t pageno) 1519 { 1520 struct uvm_pmemrange *pmr; 1521 1522 pmr = RB_ROOT(&uvm.pmr_control.addr); 1523 while (pmr != NULL) { 1524 if (pmr->low > pageno) 1525 pmr = RB_LEFT(pmr, pmr_addr); 1526 else if (pmr->high <= pageno) 1527 pmr = RB_RIGHT(pmr, pmr_addr); 1528 else 1529 break; 1530 } 1531 1532 return pmr; 1533 } 1534 1535 #if defined(DDB) || defined(DEBUG) 1536 /* 1537 * Return true if the given page is in any of the free lists. 1538 * Used by uvm_page_printit. 1539 * This function is safe, even if the page is not on the freeq. 1540 * Note: does not apply locking, only called from ddb. 1541 */ 1542 int 1543 uvm_pmr_isfree(struct vm_page *pg) 1544 { 1545 struct vm_page *r; 1546 struct uvm_pmemrange *pmr; 1547 1548 pmr = uvm_pmemrange_find(atop(VM_PAGE_TO_PHYS(pg))); 1549 if (pmr == NULL) 1550 return 0; 1551 r = RB_NFIND(uvm_pmr_addr, &pmr->addr, pg); 1552 if (r == NULL) 1553 r = RB_MAX(uvm_pmr_addr, &pmr->addr); 1554 else 1555 r = RB_PREV(uvm_pmr_addr, &pmr->addr, r); 1556 if (r == NULL) 1557 return 0; /* Empty tree. */ 1558 1559 KDASSERT(atop(VM_PAGE_TO_PHYS(r)) <= atop(VM_PAGE_TO_PHYS(pg))); 1560 return atop(VM_PAGE_TO_PHYS(r)) + r->fpgsz > 1561 atop(VM_PAGE_TO_PHYS(pg)); 1562 } 1563 #endif /* DEBUG */ 1564 1565 /* 1566 * Given a root of a tree, find a range which intersects start, end and 1567 * is of the same memtype. 1568 * 1569 * Page must be in the address tree. 1570 */ 1571 struct vm_page* 1572 uvm_pmr_rootupdate(struct uvm_pmemrange *pmr, struct vm_page *init_root, 1573 paddr_t start, paddr_t end, int memtype) 1574 { 1575 int direction; 1576 struct vm_page *root; 1577 struct vm_page *high, *high_next; 1578 struct vm_page *low, *low_next; 1579 1580 KDASSERT(pmr != NULL && init_root != NULL); 1581 root = init_root; 1582 1583 /* Which direction to use for searching. */ 1584 if (start != 0 && atop(VM_PAGE_TO_PHYS(root)) + root->fpgsz <= start) 1585 direction = 1; 1586 else if (end != 0 && atop(VM_PAGE_TO_PHYS(root)) >= end) 1587 direction = -1; 1588 else /* nothing to do */ 1589 return root; 1590 1591 /* First, update root to fall within the chosen range. */ 1592 while (root && !PMR_INTERSECTS_WITH( 1593 atop(VM_PAGE_TO_PHYS(root)), 1594 atop(VM_PAGE_TO_PHYS(root)) + root->fpgsz, 1595 start, end)) { 1596 if (direction == 1) 1597 root = RB_RIGHT(root, objt); 1598 else 1599 root = RB_LEFT(root, objt); 1600 } 1601 if (root == NULL || uvm_pmr_pg_to_memtype(root) == memtype) 1602 return root; 1603 1604 /* 1605 * Root is valid, but of the wrong memtype. 1606 * 1607 * Try to find a range that has the given memtype in the subtree 1608 * (memtype mismatches are costly, either because the conversion 1609 * is expensive, or a later allocation will need to do the opposite 1610 * conversion, which will be expensive). 1611 * 1612 * 1613 * First, simply increase address until we hit something we can use. 1614 * Cache the upper page, so we can page-walk later. 1615 */ 1616 high = root; 1617 high_next = RB_RIGHT(high, objt); 1618 while (high_next != NULL && PMR_INTERSECTS_WITH( 1619 atop(VM_PAGE_TO_PHYS(high_next)), 1620 atop(VM_PAGE_TO_PHYS(high_next)) + high_next->fpgsz, 1621 start, end)) { 1622 high = high_next; 1623 if (uvm_pmr_pg_to_memtype(high) == memtype) 1624 return high; 1625 high_next = RB_RIGHT(high, objt); 1626 } 1627 1628 /* 1629 * Second, decrease the address until we hit something we can use. 1630 * Cache the lower page, so we can page-walk later. 1631 */ 1632 low = root; 1633 low_next = RB_LEFT(low, objt); 1634 while (low_next != NULL && PMR_INTERSECTS_WITH( 1635 atop(VM_PAGE_TO_PHYS(low_next)), 1636 atop(VM_PAGE_TO_PHYS(low_next)) + low_next->fpgsz, 1637 start, end)) { 1638 low = low_next; 1639 if (uvm_pmr_pg_to_memtype(low) == memtype) 1640 return low; 1641 low_next = RB_LEFT(low, objt); 1642 } 1643 1644 if (low == high) 1645 return NULL; 1646 1647 /* No hits. Walk the address tree until we find something usable. */ 1648 for (low = RB_NEXT(uvm_pmr_addr, &pmr->addr, low); 1649 low != high; 1650 low = RB_NEXT(uvm_pmr_addr, &pmr->addr, low)) { 1651 KDASSERT(PMR_IS_SUBRANGE_OF(atop(VM_PAGE_TO_PHYS(low)), 1652 atop(VM_PAGE_TO_PHYS(low)) + low->fpgsz, 1653 start, end)); 1654 if (uvm_pmr_pg_to_memtype(low) == memtype) 1655 return low; 1656 } 1657 1658 /* Nothing found. */ 1659 return NULL; 1660 } 1661 1662 /* 1663 * Allocate any page, the fastest way. Page number constraints only. 1664 */ 1665 int 1666 uvm_pmr_get1page(psize_t count, int memtype_init, struct pglist *result, 1667 paddr_t start, paddr_t end) 1668 { 1669 struct uvm_pmemrange *pmr; 1670 struct vm_page *found, *splitpg; 1671 psize_t fcount; 1672 int memtype; 1673 1674 fcount = 0; 1675 TAILQ_FOREACH(pmr, &uvm.pmr_control.use, pmr_use) { 1676 /* We're done. */ 1677 if (fcount == count) 1678 break; 1679 1680 /* Outside requested range. */ 1681 if (!(start == 0 && end == 0) && 1682 !PMR_INTERSECTS_WITH(pmr->low, pmr->high, start, end)) 1683 continue; 1684 1685 /* Range is empty. */ 1686 if (pmr->nsegs == 0) 1687 continue; 1688 1689 /* Loop over all memtypes, starting at memtype_init. */ 1690 memtype = memtype_init; 1691 while (fcount != count) { 1692 found = TAILQ_FIRST(&pmr->single[memtype]); 1693 /* 1694 * If found is outside the range, walk the list 1695 * until we find something that intersects with 1696 * boundaries. 1697 */ 1698 while (found && !PMR_INTERSECTS_WITH( 1699 atop(VM_PAGE_TO_PHYS(found)), 1700 atop(VM_PAGE_TO_PHYS(found)) + 1, 1701 start, end)) 1702 found = TAILQ_NEXT(found, pageq); 1703 1704 if (found == NULL) { 1705 found = RB_ROOT(&pmr->size[memtype]); 1706 /* Size tree gives pg[1] instead of pg[0] */ 1707 if (found != NULL) { 1708 found--; 1709 1710 found = uvm_pmr_rootupdate(pmr, found, 1711 start, end, memtype); 1712 } 1713 } 1714 if (found != NULL) { 1715 uvm_pmr_assertvalid(pmr); 1716 uvm_pmr_remove_size(pmr, found); 1717 1718 /* 1719 * If the page intersects the end, then it'll 1720 * need splitting. 1721 * 1722 * Note that we don't need to split if the page 1723 * intersects start: the drain function will 1724 * simply stop on hitting start. 1725 */ 1726 if (end != 0 && atop(VM_PAGE_TO_PHYS(found)) + 1727 found->fpgsz > end) { 1728 psize_t splitsz = 1729 atop(VM_PAGE_TO_PHYS(found)) + 1730 found->fpgsz - end; 1731 1732 uvm_pmr_remove_addr(pmr, found); 1733 uvm_pmr_assertvalid(pmr); 1734 found->fpgsz -= splitsz; 1735 splitpg = found + found->fpgsz; 1736 splitpg->fpgsz = splitsz; 1737 uvm_pmr_insert(pmr, splitpg, 1); 1738 1739 /* 1740 * At this point, splitpg and found 1741 * actually should be joined. 1742 * But we explicitly disable that, 1743 * because we will start subtracting 1744 * from found. 1745 */ 1746 KASSERT(start == 0 || 1747 atop(VM_PAGE_TO_PHYS(found)) + 1748 found->fpgsz > start); 1749 uvm_pmr_insert_addr(pmr, found, 1); 1750 } 1751 1752 /* 1753 * Fetch pages from the end. 1754 * If the range is larger than the requested 1755 * number of pages, this saves us an addr-tree 1756 * update. 1757 * 1758 * Since we take from the end and insert at 1759 * the head, any ranges keep preserved. 1760 */ 1761 while (found->fpgsz > 0 && fcount < count && 1762 (start == 0 || 1763 atop(VM_PAGE_TO_PHYS(found)) + 1764 found->fpgsz > start)) { 1765 found->fpgsz--; 1766 fcount++; 1767 TAILQ_INSERT_HEAD(result, 1768 &found[found->fpgsz], pageq); 1769 } 1770 if (found->fpgsz > 0) { 1771 uvm_pmr_insert_size(pmr, found); 1772 KDASSERT(fcount == count); 1773 uvm_pmr_assertvalid(pmr); 1774 return fcount; 1775 } 1776 1777 /* 1778 * Delayed addr-tree removal. 1779 */ 1780 uvm_pmr_remove_addr(pmr, found); 1781 uvm_pmr_assertvalid(pmr); 1782 } else { 1783 /* 1784 * Skip to the next memtype. 1785 */ 1786 memtype += 1; 1787 if (memtype == UVM_PMR_MEMTYPE_MAX) 1788 memtype = 0; 1789 if (memtype == memtype_init) 1790 break; 1791 } 1792 } 1793 } 1794 1795 /* 1796 * Search finished. 1797 * 1798 * Ran out of ranges before enough pages were gathered, or we hit the 1799 * case where found->fpgsz == count - fcount, in which case the 1800 * above exit condition didn't trigger. 1801 * 1802 * On failure, caller will free the pages. 1803 */ 1804 return fcount; 1805 } 1806 1807 #ifdef DDB 1808 /* 1809 * Print information about pmemrange. 1810 * Does not do locking (so either call it from DDB or acquire fpageq lock 1811 * before invoking. 1812 */ 1813 void 1814 uvm_pmr_print(void) 1815 { 1816 struct uvm_pmemrange *pmr; 1817 struct vm_page *pg; 1818 psize_t size[UVM_PMR_MEMTYPE_MAX]; 1819 psize_t free; 1820 int useq_len; 1821 int mt; 1822 1823 printf("Ranges, use queue:\n"); 1824 useq_len = 0; 1825 TAILQ_FOREACH(pmr, &uvm.pmr_control.use, pmr_use) { 1826 useq_len++; 1827 free = 0; 1828 for (mt = 0; mt < UVM_PMR_MEMTYPE_MAX; mt++) { 1829 pg = RB_MAX(uvm_pmr_size, &pmr->size[mt]); 1830 if (pg != NULL) 1831 pg--; 1832 else 1833 pg = TAILQ_FIRST(&pmr->single[mt]); 1834 size[mt] = (pg == NULL ? 0 : pg->fpgsz); 1835 1836 RB_FOREACH(pg, uvm_pmr_addr, &pmr->addr) 1837 free += pg->fpgsz; 1838 } 1839 1840 printf("* [0x%lx-0x%lx] use=%d nsegs=%ld", 1841 (unsigned long)pmr->low, (unsigned long)pmr->high, 1842 pmr->use, (unsigned long)pmr->nsegs); 1843 for (mt = 0; mt < UVM_PMR_MEMTYPE_MAX; mt++) { 1844 printf(" maxsegsz[%d]=0x%lx", mt, 1845 (unsigned long)size[mt]); 1846 } 1847 printf(" free=0x%lx\n", (unsigned long)free); 1848 } 1849 printf("#ranges = %d\n", useq_len); 1850 } 1851 #endif 1852 1853 /* 1854 * uvm_wait_pla: wait (sleep) for the page daemon to free some pages 1855 * in a specific physmem area. 1856 * 1857 * Returns ENOMEM if the pagedaemon failed to free any pages. 1858 * If not failok, failure will lead to panic. 1859 * 1860 * Must be called with fpageq locked. 1861 */ 1862 int 1863 uvm_wait_pla(paddr_t low, paddr_t high, paddr_t size, int failok) 1864 { 1865 struct uvm_pmalloc pma; 1866 const char *wmsg = "pmrwait"; 1867 1868 if (curproc == uvm.pagedaemon_proc) { 1869 /* 1870 * This is not that uncommon when the pagedaemon is trying 1871 * to flush out a large mmapped file. VOP_WRITE will circle 1872 * back through the buffer cache and try to get more memory. 1873 * The pagedaemon starts by calling bufbackoff, but we can 1874 * easily use up that reserve in a single scan iteration. 1875 */ 1876 uvm_unlock_fpageq(); 1877 if (bufbackoff(NULL, atop(size)) == 0) { 1878 uvm_lock_fpageq(); 1879 return 0; 1880 } 1881 uvm_lock_fpageq(); 1882 1883 /* 1884 * XXX detect pagedaemon deadlock - see comment in 1885 * uvm_wait(), as this is exactly the same issue. 1886 */ 1887 printf("pagedaemon: wait_pla deadlock detected!\n"); 1888 msleep(&uvmexp.free, &uvm.fpageqlock, PVM, wmsg, hz >> 3); 1889 #if defined(DEBUG) 1890 /* DEBUG: panic so we can debug it */ 1891 panic("wait_pla pagedaemon deadlock"); 1892 #endif 1893 return 0; 1894 } 1895 1896 for (;;) { 1897 pma.pm_constraint.ucr_low = low; 1898 pma.pm_constraint.ucr_high = high; 1899 pma.pm_size = size; 1900 pma.pm_flags = UVM_PMA_LINKED; 1901 TAILQ_INSERT_TAIL(&uvm.pmr_control.allocs, &pma, pmq); 1902 1903 wakeup(&uvm.pagedaemon); /* wake the daemon! */ 1904 while (pma.pm_flags & (UVM_PMA_LINKED | UVM_PMA_BUSY)) 1905 msleep(&pma, &uvm.fpageqlock, PVM, wmsg, 0); 1906 1907 if (!(pma.pm_flags & UVM_PMA_FREED) && 1908 pma.pm_flags & UVM_PMA_FAIL) { 1909 if (failok) 1910 return ENOMEM; 1911 printf("uvm_wait: failed to free %ld pages between " 1912 "0x%lx-0x%lx\n", atop(size), low, high); 1913 } else 1914 return 0; 1915 } 1916 /* UNREACHABLE */ 1917 } 1918 1919 /* 1920 * Wake up uvm_pmalloc sleepers. 1921 */ 1922 void 1923 uvm_wakeup_pla(paddr_t low, psize_t len) 1924 { 1925 struct uvm_pmalloc *pma, *pma_next; 1926 paddr_t high; 1927 1928 high = low + len; 1929 1930 /* Wake specific allocations waiting for this memory. */ 1931 for (pma = TAILQ_FIRST(&uvm.pmr_control.allocs); pma != NULL; 1932 pma = pma_next) { 1933 pma_next = TAILQ_NEXT(pma, pmq); 1934 1935 if (low < pma->pm_constraint.ucr_high && 1936 high > pma->pm_constraint.ucr_low) { 1937 pma->pm_flags |= UVM_PMA_FREED; 1938 if (!(pma->pm_flags & UVM_PMA_BUSY)) { 1939 pma->pm_flags &= ~UVM_PMA_LINKED; 1940 TAILQ_REMOVE(&uvm.pmr_control.allocs, pma, 1941 pmq); 1942 wakeup(pma); 1943 } 1944 } 1945 } 1946 } 1947