1 /* $NetBSD: uvm_pglist.c,v 1.81 2020/03/01 21:43:56 ad Exp $ */ 2 3 /*- 4 * Copyright (c) 1997, 2019 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 9 * NASA Ames Research Center, and by Andrew Doran. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 /* 34 * uvm_pglist.c: pglist functions 35 */ 36 37 #include <sys/cdefs.h> 38 __KERNEL_RCSID(0, "$NetBSD: uvm_pglist.c,v 1.81 2020/03/01 21:43:56 ad Exp $"); 39 40 #include <sys/param.h> 41 #include <sys/systm.h> 42 #include <sys/cpu.h> 43 44 #include <uvm/uvm.h> 45 #include <uvm/uvm_pdpolicy.h> 46 #include <uvm/uvm_pgflcache.h> 47 48 #ifdef VM_PAGE_ALLOC_MEMORY_STATS 49 #define STAT_INCR(v) (v)++ 50 #define STAT_DECR(v) do { \ 51 if ((v) == 0) \ 52 printf("%s:%d -- Already 0!\n", __FILE__, __LINE__); \ 53 else \ 54 (v)--; \ 55 } while (/*CONSTCOND*/ 0) 56 u_long uvm_pglistalloc_npages; 57 #else 58 #define STAT_INCR(v) 59 #define STAT_DECR(v) 60 #endif 61 62 /* 63 * uvm_pglistalloc: allocate a list of pages 64 * 65 * => allocated pages are placed onto an rlist. rlist is 66 * initialized by uvm_pglistalloc. 67 * => returns 0 on success or errno on failure 68 * => implementation allocates a single segment if any constraints are 69 * imposed by call arguments. 70 * => doesn't take into account clean non-busy pages on inactive list 71 * that could be used(?) 72 * => params: 73 * size the size of the allocation, rounded to page size. 74 * low the low address of the allowed allocation range. 75 * high the high address of the allowed allocation range. 76 * alignment memory must be aligned to this power-of-two boundary. 77 * boundary no segment in the allocation may cross this 78 * power-of-two boundary (relative to zero). 79 */ 80 81 static void 82 uvm_pglist_add(struct vm_page *pg, struct pglist *rlist) 83 { 84 struct pgfreelist *pgfl; 85 struct pgflbucket *pgb; 86 87 pgfl = &uvm.page_free[uvm_page_get_freelist(pg)]; 88 pgb = pgfl->pgfl_buckets[uvm_page_get_bucket(pg)]; 89 90 #ifdef UVMDEBUG 91 struct vm_page *tp; 92 LIST_FOREACH(tp, &pgb->pgb_colors[VM_PGCOLOR(pg)], pageq.list) { 93 if (tp == pg) 94 break; 95 } 96 if (tp == NULL) 97 panic("uvm_pglistalloc: page not on freelist"); 98 #endif 99 LIST_REMOVE(pg, pageq.list); 100 pgb->pgb_nfree--; 101 if (pg->flags & PG_ZERO) 102 CPU_COUNT(CPU_COUNT_ZEROPAGES, -1); 103 pg->flags = PG_CLEAN; 104 pg->uobject = NULL; 105 pg->uanon = NULL; 106 TAILQ_INSERT_TAIL(rlist, pg, pageq.queue); 107 STAT_INCR(uvm_pglistalloc_npages); 108 } 109 110 static int 111 uvm_pglistalloc_c_ps(uvm_physseg_t psi, int num, paddr_t low, paddr_t high, 112 paddr_t alignment, paddr_t boundary, struct pglist *rlist) 113 { 114 signed int candidate, limit, candidateidx, end, idx, skip; 115 int pagemask; 116 bool second_pass; 117 #ifdef DEBUG 118 paddr_t idxpa, lastidxpa; 119 paddr_t cidx = 0; /* XXX: GCC */ 120 #endif 121 #ifdef PGALLOC_VERBOSE 122 printf("pgalloc: contig %d pgs from psi %d\n", num, psi); 123 #endif 124 125 low = atop(low); 126 high = atop(high); 127 alignment = atop(alignment); 128 129 /* 130 * Make sure that physseg falls within with range to be allocated from. 131 */ 132 if (high <= uvm_physseg_get_avail_start(psi) || low >= uvm_physseg_get_avail_end(psi)) 133 return 0; 134 135 /* 136 * We start our search at the just after where the last allocation 137 * succeeded. 138 */ 139 candidate = roundup2(uimax(low, uvm_physseg_get_avail_start(psi) + 140 uvm_physseg_get_start_hint(psi)), alignment); 141 limit = uimin(high, uvm_physseg_get_avail_end(psi)); 142 pagemask = ~((boundary >> PAGE_SHIFT) - 1); 143 skip = 0; 144 second_pass = false; 145 146 for (;;) { 147 bool ok = true; 148 signed int cnt; 149 150 if (candidate + num > limit) { 151 if (uvm_physseg_get_start_hint(psi) == 0 || second_pass) { 152 /* 153 * We've run past the allowable range. 154 */ 155 return 0; /* FAIL = 0 pages*/ 156 } 157 /* 158 * We've wrapped around the end of this segment 159 * so restart at the beginning but now our limit 160 * is were we started. 161 */ 162 second_pass = true; 163 candidate = roundup2(uimax(low, uvm_physseg_get_avail_start(psi)), alignment); 164 limit = uimin(limit, uvm_physseg_get_avail_start(psi) + 165 uvm_physseg_get_start_hint(psi)); 166 skip = 0; 167 continue; 168 } 169 if (boundary != 0 && 170 ((candidate ^ (candidate + num - 1)) & pagemask) != 0) { 171 /* 172 * Region crosses boundary. Jump to the boundary 173 * just crossed and ensure alignment. 174 */ 175 candidate = (candidate + num - 1) & pagemask; 176 candidate = roundup2(candidate, alignment); 177 skip = 0; 178 continue; 179 } 180 #ifdef DEBUG 181 /* 182 * Make sure this is a managed physical page. 183 */ 184 185 if (uvm_physseg_find(candidate, &cidx) != psi) 186 panic("pgalloc contig: botch1"); 187 if (cidx != candidate - uvm_physseg_get_start(psi)) 188 panic("pgalloc contig: botch2"); 189 if (uvm_physseg_find(candidate + num - 1, &cidx) != psi) 190 panic("pgalloc contig: botch3"); 191 if (cidx != candidate - uvm_physseg_get_start(psi) + num - 1) 192 panic("pgalloc contig: botch4"); 193 #endif 194 candidateidx = candidate - uvm_physseg_get_start(psi); 195 end = candidateidx + num; 196 197 /* 198 * Found a suitable starting page. See if the range is free. 199 */ 200 #ifdef PGALLOC_VERBOSE 201 printf("%s: psi=%d candidate=%#x end=%#x skip=%#x, align=%#"PRIxPADDR, 202 __func__, psi, candidateidx, end, skip, alignment); 203 #endif 204 /* 205 * We start at the end and work backwards since if we find a 206 * non-free page, it makes no sense to continue. 207 * 208 * But on the plus size we have "vetted" some number of free 209 * pages. If this iteration fails, we may be able to skip 210 * testing most of those pages again in the next pass. 211 */ 212 for (idx = end - 1; idx >= candidateidx + skip; idx--) { 213 if (VM_PAGE_IS_FREE(uvm_physseg_get_pg(psi, idx)) == 0) { 214 ok = false; 215 break; 216 } 217 218 #ifdef DEBUG 219 if (idx > candidateidx) { 220 idxpa = VM_PAGE_TO_PHYS(uvm_physseg_get_pg(psi, idx)); 221 lastidxpa = VM_PAGE_TO_PHYS(uvm_physseg_get_pg(psi, idx - 1)); 222 if ((lastidxpa + PAGE_SIZE) != idxpa) { 223 /* 224 * Region not contiguous. 225 */ 226 panic("pgalloc contig: botch5"); 227 } 228 if (boundary != 0 && 229 ((lastidxpa ^ idxpa) & ~(boundary - 1)) 230 != 0) { 231 /* 232 * Region crosses boundary. 233 */ 234 panic("pgalloc contig: botch6"); 235 } 236 } 237 #endif 238 } 239 240 if (ok) { 241 while (skip-- > 0) { 242 KDASSERT(VM_PAGE_IS_FREE(uvm_physseg_get_pg(psi, candidateidx + skip))); 243 } 244 #ifdef PGALLOC_VERBOSE 245 printf(": ok\n"); 246 #endif 247 break; 248 } 249 250 #ifdef PGALLOC_VERBOSE 251 printf(": non-free at %#x\n", idx - candidateidx); 252 #endif 253 /* 254 * count the number of pages we can advance 255 * since we know they aren't all free. 256 */ 257 cnt = idx + 1 - candidateidx; 258 /* 259 * now round up that to the needed alignment. 260 */ 261 cnt = roundup2(cnt, alignment); 262 /* 263 * The number of pages we can skip checking 264 * (might be 0 if cnt > num). 265 */ 266 skip = uimax(num - cnt, 0); 267 candidate += cnt; 268 } 269 270 /* 271 * we have a chunk of memory that conforms to the requested constraints. 272 */ 273 for (idx = candidateidx; idx < end; idx++) 274 uvm_pglist_add(uvm_physseg_get_pg(psi, idx), rlist); 275 276 /* 277 * the next time we need to search this segment, start after this 278 * chunk of pages we just allocated. 279 */ 280 uvm_physseg_set_start_hint(psi, candidate + num - 281 uvm_physseg_get_avail_start(psi)); 282 KASSERTMSG(uvm_physseg_get_start_hint(psi) <= 283 uvm_physseg_get_avail_end(psi) - uvm_physseg_get_avail_start(psi), 284 "%x %u (%#x) <= %#"PRIxPADDR" - %#"PRIxPADDR" (%#"PRIxPADDR")", 285 candidate + num, 286 uvm_physseg_get_start_hint(psi), uvm_physseg_get_start_hint(psi), 287 uvm_physseg_get_avail_end(psi), uvm_physseg_get_avail_start(psi), 288 uvm_physseg_get_avail_end(psi) - uvm_physseg_get_avail_start(psi)); 289 290 #ifdef PGALLOC_VERBOSE 291 printf("got %d pgs\n", num); 292 #endif 293 return num; /* number of pages allocated */ 294 } 295 296 static int 297 uvm_pglistalloc_contig(int num, paddr_t low, paddr_t high, paddr_t alignment, 298 paddr_t boundary, struct pglist *rlist) 299 { 300 int fl; 301 int error; 302 303 uvm_physseg_t psi; 304 /* Default to "lose". */ 305 error = ENOMEM; 306 307 /* 308 * Block all memory allocation and lock the free list. 309 */ 310 uvm_pgfl_lock(); 311 312 /* Are there even any free pages? */ 313 if (uvm_availmem() <= 314 (uvmexp.reserve_pagedaemon + uvmexp.reserve_kernel)) 315 goto out; 316 317 for (fl = 0; fl < VM_NFREELIST; fl++) { 318 #if (VM_PHYSSEG_STRAT == VM_PSTRAT_BIGFIRST) 319 for (psi = uvm_physseg_get_last(); uvm_physseg_valid_p(psi); psi = uvm_physseg_get_prev(psi)) 320 #else 321 for (psi = uvm_physseg_get_first(); uvm_physseg_valid_p(psi); psi = uvm_physseg_get_next(psi)) 322 #endif 323 { 324 if (uvm_physseg_get_free_list(psi) != fl) 325 continue; 326 327 num -= uvm_pglistalloc_c_ps(psi, num, low, high, 328 alignment, boundary, rlist); 329 if (num == 0) { 330 #ifdef PGALLOC_VERBOSE 331 printf("pgalloc: %"PRIxMAX"-%"PRIxMAX"\n", 332 (uintmax_t) VM_PAGE_TO_PHYS(TAILQ_FIRST(rlist)), 333 (uintmax_t) VM_PAGE_TO_PHYS(TAILQ_LAST(rlist, pglist))); 334 #endif 335 error = 0; 336 goto out; 337 } 338 } 339 } 340 341 out: 342 /* 343 * check to see if we need to generate some free pages waking 344 * the pagedaemon. 345 */ 346 347 uvm_pgfl_unlock(); 348 uvm_kick_pdaemon(); 349 return (error); 350 } 351 352 static int 353 uvm_pglistalloc_s_ps(uvm_physseg_t psi, int num, paddr_t low, paddr_t high, 354 struct pglist *rlist) 355 { 356 int todo, limit, candidate; 357 struct vm_page *pg; 358 bool second_pass; 359 #ifdef PGALLOC_VERBOSE 360 printf("pgalloc: simple %d pgs from psi %zd\n", num, psi); 361 #endif 362 363 KASSERT(uvm_physseg_get_start(psi) <= uvm_physseg_get_avail_start(psi)); 364 KASSERT(uvm_physseg_get_start(psi) <= uvm_physseg_get_avail_end(psi)); 365 KASSERT(uvm_physseg_get_avail_start(psi) <= uvm_physseg_get_end(psi)); 366 KASSERT(uvm_physseg_get_avail_end(psi) <= uvm_physseg_get_end(psi)); 367 368 low = atop(low); 369 high = atop(high); 370 todo = num; 371 candidate = uimax(low, uvm_physseg_get_avail_start(psi) + 372 uvm_physseg_get_start_hint(psi)); 373 limit = uimin(high, uvm_physseg_get_avail_end(psi)); 374 pg = uvm_physseg_get_pg(psi, candidate - uvm_physseg_get_start(psi)); 375 second_pass = false; 376 377 /* 378 * Make sure that physseg falls within with range to be allocated from. 379 */ 380 if (high <= uvm_physseg_get_avail_start(psi) || 381 low >= uvm_physseg_get_avail_end(psi)) 382 return 0; 383 384 again: 385 for (;; candidate++, pg++) { 386 if (candidate >= limit) { 387 if (uvm_physseg_get_start_hint(psi) == 0 || second_pass) { 388 candidate = limit - 1; 389 break; 390 } 391 second_pass = true; 392 candidate = uimax(low, uvm_physseg_get_avail_start(psi)); 393 limit = uimin(limit, uvm_physseg_get_avail_start(psi) + 394 uvm_physseg_get_start_hint(psi)); 395 pg = uvm_physseg_get_pg(psi, candidate - uvm_physseg_get_start(psi)); 396 goto again; 397 } 398 #if defined(DEBUG) 399 { 400 paddr_t cidx = 0; 401 const uvm_physseg_t bank = uvm_physseg_find(candidate, &cidx); 402 KDASSERTMSG(bank == psi, 403 "uvm_physseg_find(%#x) (%"PRIxPHYSSEG ") != psi %"PRIxPHYSSEG, 404 candidate, bank, psi); 405 KDASSERTMSG(cidx == candidate - uvm_physseg_get_start(psi), 406 "uvm_physseg_find(%#x): %#"PRIxPADDR" != off %"PRIxPADDR, 407 candidate, cidx, candidate - uvm_physseg_get_start(psi)); 408 } 409 #endif 410 if (VM_PAGE_IS_FREE(pg) == 0) 411 continue; 412 413 uvm_pglist_add(pg, rlist); 414 if (--todo == 0) { 415 break; 416 } 417 } 418 419 /* 420 * The next time we need to search this segment, 421 * start just after the pages we just allocated. 422 */ 423 uvm_physseg_set_start_hint(psi, candidate + 1 - uvm_physseg_get_avail_start(psi)); 424 KASSERTMSG(uvm_physseg_get_start_hint(psi) <= uvm_physseg_get_avail_end(psi) - 425 uvm_physseg_get_avail_start(psi), 426 "%#x %u (%#x) <= %#"PRIxPADDR" - %#"PRIxPADDR" (%#"PRIxPADDR")", 427 candidate + 1, 428 uvm_physseg_get_start_hint(psi), 429 uvm_physseg_get_start_hint(psi), 430 uvm_physseg_get_avail_end(psi), 431 uvm_physseg_get_avail_start(psi), 432 uvm_physseg_get_avail_end(psi) - uvm_physseg_get_avail_start(psi)); 433 434 #ifdef PGALLOC_VERBOSE 435 printf("got %d pgs\n", num - todo); 436 #endif 437 return (num - todo); /* number of pages allocated */ 438 } 439 440 static int 441 uvm_pglistalloc_simple(int num, paddr_t low, paddr_t high, 442 struct pglist *rlist, int waitok) 443 { 444 int fl, error; 445 uvm_physseg_t psi; 446 int count = 0; 447 448 /* Default to "lose". */ 449 error = ENOMEM; 450 451 again: 452 /* 453 * Block all memory allocation and lock the free list. 454 */ 455 uvm_pgfl_lock(); 456 count++; 457 458 /* Are there even any free pages? */ 459 if (uvm_availmem() <= 460 (uvmexp.reserve_pagedaemon + uvmexp.reserve_kernel)) 461 goto out; 462 463 for (fl = 0; fl < VM_NFREELIST; fl++) { 464 #if (VM_PHYSSEG_STRAT == VM_PSTRAT_BIGFIRST) 465 for (psi = uvm_physseg_get_last(); uvm_physseg_valid_p(psi); psi = uvm_physseg_get_prev(psi)) 466 #else 467 for (psi = uvm_physseg_get_first(); uvm_physseg_valid_p(psi); psi = uvm_physseg_get_next(psi)) 468 #endif 469 { 470 if (uvm_physseg_get_free_list(psi) != fl) 471 continue; 472 473 num -= uvm_pglistalloc_s_ps(psi, num, low, high, rlist); 474 if (num == 0) { 475 error = 0; 476 goto out; 477 } 478 } 479 480 } 481 482 out: 483 /* 484 * check to see if we need to generate some free pages waking 485 * the pagedaemon. 486 */ 487 488 uvm_pgfl_unlock(); 489 uvm_kick_pdaemon(); 490 491 if (error) { 492 if (waitok) { 493 /* XXX perhaps some time limitation? */ 494 #ifdef DEBUG 495 if (count == 1) 496 printf("pglistalloc waiting\n"); 497 #endif 498 uvm_wait("pglalloc"); 499 goto again; 500 } else 501 uvm_pglistfree(rlist); 502 } 503 #ifdef PGALLOC_VERBOSE 504 if (!error) 505 printf("pgalloc: %"PRIxMAX"..%"PRIxMAX"\n", 506 (uintmax_t) VM_PAGE_TO_PHYS(TAILQ_FIRST(rlist)), 507 (uintmax_t) VM_PAGE_TO_PHYS(TAILQ_LAST(rlist, pglist))); 508 #endif 509 return (error); 510 } 511 512 int 513 uvm_pglistalloc(psize_t size, paddr_t low, paddr_t high, paddr_t alignment, 514 paddr_t boundary, struct pglist *rlist, int nsegs, int waitok) 515 { 516 int num, res; 517 518 KASSERT(!cpu_intr_p()); 519 KASSERT(!cpu_softintr_p()); 520 KASSERT((alignment & (alignment - 1)) == 0); 521 KASSERT((boundary & (boundary - 1)) == 0); 522 523 /* 524 * Our allocations are always page granularity, so our alignment 525 * must be, too. 526 */ 527 if (alignment < PAGE_SIZE) 528 alignment = PAGE_SIZE; 529 if (boundary != 0 && boundary < size) 530 return (EINVAL); 531 num = atop(round_page(size)); 532 low = roundup2(low, alignment); 533 534 TAILQ_INIT(rlist); 535 536 /* 537 * Turn off the caching of free pages - we need everything to be on 538 * the global freelists. 539 */ 540 uvm_pgflcache_pause(); 541 542 if ((nsegs < size >> PAGE_SHIFT) || (alignment != PAGE_SIZE) || 543 (boundary != 0)) 544 res = uvm_pglistalloc_contig(num, low, high, alignment, 545 boundary, rlist); 546 else 547 res = uvm_pglistalloc_simple(num, low, high, rlist, waitok); 548 549 uvm_pgflcache_resume(); 550 551 return (res); 552 } 553 554 /* 555 * uvm_pglistfree: free a list of pages 556 * 557 * => pages should already be unmapped 558 */ 559 560 void 561 uvm_pglistfree(struct pglist *list) 562 { 563 struct pgfreelist *pgfl; 564 struct pgflbucket *pgb; 565 struct vm_page *pg; 566 int c, b; 567 568 KASSERT(!cpu_intr_p()); 569 KASSERT(!cpu_softintr_p()); 570 571 /* 572 * Lock the free list and free each page. 573 */ 574 575 uvm_pgfl_lock(); 576 while ((pg = TAILQ_FIRST(list)) != NULL) { 577 TAILQ_REMOVE(list, pg, pageq.queue); 578 pg->flags = (pg->flags & PG_ZERO) | PG_FREE; 579 #ifdef DEBUG 580 pg->uobject = (void *)0xdeadbeef; 581 pg->uanon = (void *)0xdeadbeef; 582 if (pg->flags & PG_ZERO) 583 uvm_pagezerocheck(pg); 584 #endif /* DEBUG */ 585 c = VM_PGCOLOR(pg); 586 b = uvm_page_get_bucket(pg); 587 pgfl = &uvm.page_free[uvm_page_get_freelist(pg)]; 588 pgb = pgfl->pgfl_buckets[b]; 589 if (pg->flags & PG_ZERO) 590 CPU_COUNT(CPU_COUNT_ZEROPAGES, 1); 591 pgb->pgb_nfree++; 592 LIST_INSERT_HEAD(&pgb->pgb_colors[c], pg, pageq.list); 593 STAT_DECR(uvm_pglistalloc_npages); 594 } 595 uvm_pgfl_unlock(); 596 } 597