1 /* $NetBSD: uvm_pglist.c,v 1.85 2020/06/14 21:41:42 ad Exp $ */ 2 3 /*- 4 * Copyright (c) 1997, 2019 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 9 * NASA Ames Research Center, and by Andrew Doran. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 /* 34 * uvm_pglist.c: pglist functions 35 */ 36 37 #include <sys/cdefs.h> 38 __KERNEL_RCSID(0, "$NetBSD: uvm_pglist.c,v 1.85 2020/06/14 21:41:42 ad Exp $"); 39 40 #include <sys/param.h> 41 #include <sys/systm.h> 42 #include <sys/cpu.h> 43 44 #include <uvm/uvm.h> 45 #include <uvm/uvm_pdpolicy.h> 46 #include <uvm/uvm_pgflcache.h> 47 48 #ifdef VM_PAGE_ALLOC_MEMORY_STATS 49 #define STAT_INCR(v) (v)++ 50 #define STAT_DECR(v) do { \ 51 if ((v) == 0) \ 52 printf("%s:%d -- Already 0!\n", __FILE__, __LINE__); \ 53 else \ 54 (v)--; \ 55 } while (/*CONSTCOND*/ 0) 56 u_long uvm_pglistalloc_npages; 57 #else 58 #define STAT_INCR(v) 59 #define STAT_DECR(v) 60 #endif 61 62 /* 63 * uvm_pglistalloc: allocate a list of pages 64 * 65 * => allocated pages are placed onto an rlist. rlist is 66 * initialized by uvm_pglistalloc. 67 * => returns 0 on success or errno on failure 68 * => implementation allocates a single segment if any constraints are 69 * imposed by call arguments. 70 * => doesn't take into account clean non-busy pages on inactive list 71 * that could be used(?) 72 * => params: 73 * size the size of the allocation, rounded to page size. 74 * low the low address of the allowed allocation range. 75 * high the high address of the allowed allocation range. 76 * alignment memory must be aligned to this power-of-two boundary. 77 * boundary no segment in the allocation may cross this 78 * power-of-two boundary (relative to zero). 79 */ 80 81 static void 82 uvm_pglist_add(struct vm_page *pg, struct pglist *rlist) 83 { 84 struct pgfreelist *pgfl; 85 struct pgflbucket *pgb; 86 87 pgfl = &uvm.page_free[uvm_page_get_freelist(pg)]; 88 pgb = pgfl->pgfl_buckets[uvm_page_get_bucket(pg)]; 89 90 #ifdef UVMDEBUG 91 struct vm_page *tp; 92 LIST_FOREACH(tp, &pgb->pgb_colors[VM_PGCOLOR(pg)], pageq.list) { 93 if (tp == pg) 94 break; 95 } 96 if (tp == NULL) 97 panic("uvm_pglistalloc: page not on freelist"); 98 #endif 99 LIST_REMOVE(pg, pageq.list); 100 pgb->pgb_nfree--; 101 CPU_COUNT(CPU_COUNT_FREEPAGES, -1); 102 pg->flags = PG_CLEAN; 103 pg->uobject = NULL; 104 pg->uanon = NULL; 105 TAILQ_INSERT_TAIL(rlist, pg, pageq.queue); 106 STAT_INCR(uvm_pglistalloc_npages); 107 } 108 109 static int 110 uvm_pglistalloc_c_ps(uvm_physseg_t psi, int num, paddr_t low, paddr_t high, 111 paddr_t alignment, paddr_t boundary, struct pglist *rlist) 112 { 113 signed int candidate, limit, candidateidx, end, idx, skip; 114 int pagemask; 115 bool second_pass; 116 #ifdef DEBUG 117 paddr_t idxpa, lastidxpa; 118 paddr_t cidx = 0; /* XXX: GCC */ 119 #endif 120 #ifdef PGALLOC_VERBOSE 121 printf("pgalloc: contig %d pgs from psi %d\n", num, psi); 122 #endif 123 124 low = atop(low); 125 high = atop(high); 126 alignment = atop(alignment); 127 128 /* 129 * Make sure that physseg falls within with range to be allocated from. 130 */ 131 if (high <= uvm_physseg_get_avail_start(psi) || low >= uvm_physseg_get_avail_end(psi)) 132 return 0; 133 134 /* 135 * We start our search at the just after where the last allocation 136 * succeeded. 137 */ 138 candidate = roundup2(uimax(low, uvm_physseg_get_avail_start(psi) + 139 uvm_physseg_get_start_hint(psi)), alignment); 140 limit = uimin(high, uvm_physseg_get_avail_end(psi)); 141 pagemask = ~((boundary >> PAGE_SHIFT) - 1); 142 skip = 0; 143 second_pass = false; 144 145 for (;;) { 146 bool ok = true; 147 signed int cnt; 148 149 if (candidate + num > limit) { 150 if (uvm_physseg_get_start_hint(psi) == 0 || second_pass) { 151 /* 152 * We've run past the allowable range. 153 */ 154 return 0; /* FAIL = 0 pages*/ 155 } 156 /* 157 * We've wrapped around the end of this segment 158 * so restart at the beginning but now our limit 159 * is were we started. 160 */ 161 second_pass = true; 162 candidate = roundup2(uimax(low, uvm_physseg_get_avail_start(psi)), alignment); 163 limit = uimin(limit, uvm_physseg_get_avail_start(psi) + 164 uvm_physseg_get_start_hint(psi)); 165 skip = 0; 166 continue; 167 } 168 if (boundary != 0 && 169 ((candidate ^ (candidate + num - 1)) & pagemask) != 0) { 170 /* 171 * Region crosses boundary. Jump to the boundary 172 * just crossed and ensure alignment. 173 */ 174 candidate = (candidate + num - 1) & pagemask; 175 candidate = roundup2(candidate, alignment); 176 skip = 0; 177 continue; 178 } 179 #ifdef DEBUG 180 /* 181 * Make sure this is a managed physical page. 182 */ 183 184 if (uvm_physseg_find(candidate, &cidx) != psi) 185 panic("pgalloc contig: botch1"); 186 if (cidx != candidate - uvm_physseg_get_start(psi)) 187 panic("pgalloc contig: botch2"); 188 if (uvm_physseg_find(candidate + num - 1, &cidx) != psi) 189 panic("pgalloc contig: botch3"); 190 if (cidx != candidate - uvm_physseg_get_start(psi) + num - 1) 191 panic("pgalloc contig: botch4"); 192 #endif 193 candidateidx = candidate - uvm_physseg_get_start(psi); 194 end = candidateidx + num; 195 196 /* 197 * Found a suitable starting page. See if the range is free. 198 */ 199 #ifdef PGALLOC_VERBOSE 200 printf("%s: psi=%d candidate=%#x end=%#x skip=%#x, align=%#"PRIxPADDR, 201 __func__, psi, candidateidx, end, skip, alignment); 202 #endif 203 /* 204 * We start at the end and work backwards since if we find a 205 * non-free page, it makes no sense to continue. 206 * 207 * But on the plus size we have "vetted" some number of free 208 * pages. If this iteration fails, we may be able to skip 209 * testing most of those pages again in the next pass. 210 */ 211 for (idx = end - 1; idx >= candidateidx + skip; idx--) { 212 if (VM_PAGE_IS_FREE(uvm_physseg_get_pg(psi, idx)) == 0) { 213 ok = false; 214 break; 215 } 216 217 #ifdef DEBUG 218 if (idx > candidateidx) { 219 idxpa = VM_PAGE_TO_PHYS(uvm_physseg_get_pg(psi, idx)); 220 lastidxpa = VM_PAGE_TO_PHYS(uvm_physseg_get_pg(psi, idx - 1)); 221 if ((lastidxpa + PAGE_SIZE) != idxpa) { 222 /* 223 * Region not contiguous. 224 */ 225 panic("pgalloc contig: botch5"); 226 } 227 if (boundary != 0 && 228 ((lastidxpa ^ idxpa) & ~(boundary - 1)) 229 != 0) { 230 /* 231 * Region crosses boundary. 232 */ 233 panic("pgalloc contig: botch6"); 234 } 235 } 236 #endif 237 } 238 239 if (ok) { 240 while (skip-- > 0) { 241 KDASSERT(VM_PAGE_IS_FREE(uvm_physseg_get_pg(psi, candidateidx + skip))); 242 } 243 #ifdef PGALLOC_VERBOSE 244 printf(": ok\n"); 245 #endif 246 break; 247 } 248 249 #ifdef PGALLOC_VERBOSE 250 printf(": non-free at %#x\n", idx - candidateidx); 251 #endif 252 /* 253 * count the number of pages we can advance 254 * since we know they aren't all free. 255 */ 256 cnt = idx + 1 - candidateidx; 257 /* 258 * now round up that to the needed alignment. 259 */ 260 cnt = roundup2(cnt, alignment); 261 /* 262 * The number of pages we can skip checking 263 * (might be 0 if cnt > num). 264 */ 265 skip = uimax(num - cnt, 0); 266 candidate += cnt; 267 } 268 269 /* 270 * we have a chunk of memory that conforms to the requested constraints. 271 */ 272 for (idx = candidateidx; idx < end; idx++) 273 uvm_pglist_add(uvm_physseg_get_pg(psi, idx), rlist); 274 275 /* 276 * the next time we need to search this segment, start after this 277 * chunk of pages we just allocated. 278 */ 279 uvm_physseg_set_start_hint(psi, candidate + num - 280 uvm_physseg_get_avail_start(psi)); 281 KASSERTMSG(uvm_physseg_get_start_hint(psi) <= 282 uvm_physseg_get_avail_end(psi) - uvm_physseg_get_avail_start(psi), 283 "%x %u (%#x) <= %#"PRIxPADDR" - %#"PRIxPADDR" (%#"PRIxPADDR")", 284 candidate + num, 285 uvm_physseg_get_start_hint(psi), uvm_physseg_get_start_hint(psi), 286 uvm_physseg_get_avail_end(psi), uvm_physseg_get_avail_start(psi), 287 uvm_physseg_get_avail_end(psi) - uvm_physseg_get_avail_start(psi)); 288 289 #ifdef PGALLOC_VERBOSE 290 printf("got %d pgs\n", num); 291 #endif 292 return num; /* number of pages allocated */ 293 } 294 295 static int 296 uvm_pglistalloc_contig(int num, paddr_t low, paddr_t high, paddr_t alignment, 297 paddr_t boundary, struct pglist *rlist) 298 { 299 int fl; 300 int error; 301 302 uvm_physseg_t psi; 303 /* Default to "lose". */ 304 error = ENOMEM; 305 306 /* 307 * Block all memory allocation and lock the free list. 308 */ 309 uvm_pgfl_lock(); 310 311 /* Are there even any free pages? */ 312 if (uvm_availmem(false) <= 313 (uvmexp.reserve_pagedaemon + uvmexp.reserve_kernel)) 314 goto out; 315 316 for (fl = 0; fl < VM_NFREELIST; fl++) { 317 #if (VM_PHYSSEG_STRAT == VM_PSTRAT_BIGFIRST) 318 for (psi = uvm_physseg_get_last(); uvm_physseg_valid_p(psi); psi = uvm_physseg_get_prev(psi)) 319 #else 320 for (psi = uvm_physseg_get_first(); uvm_physseg_valid_p(psi); psi = uvm_physseg_get_next(psi)) 321 #endif 322 { 323 if (uvm_physseg_get_free_list(psi) != fl) 324 continue; 325 326 num -= uvm_pglistalloc_c_ps(psi, num, low, high, 327 alignment, boundary, rlist); 328 if (num == 0) { 329 #ifdef PGALLOC_VERBOSE 330 printf("pgalloc: %"PRIxMAX"-%"PRIxMAX"\n", 331 (uintmax_t) VM_PAGE_TO_PHYS(TAILQ_FIRST(rlist)), 332 (uintmax_t) VM_PAGE_TO_PHYS(TAILQ_LAST(rlist, pglist))); 333 #endif 334 error = 0; 335 goto out; 336 } 337 } 338 } 339 340 out: 341 /* 342 * check to see if we need to generate some free pages waking 343 * the pagedaemon. 344 */ 345 346 uvm_pgfl_unlock(); 347 uvm_kick_pdaemon(); 348 return (error); 349 } 350 351 static int 352 uvm_pglistalloc_s_ps(uvm_physseg_t psi, int num, paddr_t low, paddr_t high, 353 struct pglist *rlist) 354 { 355 int todo, limit, candidate; 356 struct vm_page *pg; 357 bool second_pass; 358 #ifdef PGALLOC_VERBOSE 359 printf("pgalloc: simple %d pgs from psi %zd\n", num, psi); 360 #endif 361 362 KASSERT(uvm_physseg_get_start(psi) <= uvm_physseg_get_avail_start(psi)); 363 KASSERT(uvm_physseg_get_start(psi) <= uvm_physseg_get_avail_end(psi)); 364 KASSERT(uvm_physseg_get_avail_start(psi) <= uvm_physseg_get_end(psi)); 365 KASSERT(uvm_physseg_get_avail_end(psi) <= uvm_physseg_get_end(psi)); 366 367 low = atop(low); 368 high = atop(high); 369 todo = num; 370 candidate = uimax(low, uvm_physseg_get_avail_start(psi) + 371 uvm_physseg_get_start_hint(psi)); 372 limit = uimin(high, uvm_physseg_get_avail_end(psi)); 373 pg = uvm_physseg_get_pg(psi, candidate - uvm_physseg_get_start(psi)); 374 second_pass = false; 375 376 /* 377 * Make sure that physseg falls within with range to be allocated from. 378 */ 379 if (high <= uvm_physseg_get_avail_start(psi) || 380 low >= uvm_physseg_get_avail_end(psi)) 381 return 0; 382 383 again: 384 for (;; candidate++, pg++) { 385 if (candidate >= limit) { 386 if (uvm_physseg_get_start_hint(psi) == 0 || second_pass) { 387 candidate = limit - 1; 388 break; 389 } 390 second_pass = true; 391 candidate = uimax(low, uvm_physseg_get_avail_start(psi)); 392 limit = uimin(limit, uvm_physseg_get_avail_start(psi) + 393 uvm_physseg_get_start_hint(psi)); 394 pg = uvm_physseg_get_pg(psi, candidate - uvm_physseg_get_start(psi)); 395 goto again; 396 } 397 #if defined(DEBUG) 398 { 399 paddr_t cidx = 0; 400 const uvm_physseg_t bank = uvm_physseg_find(candidate, &cidx); 401 KDASSERTMSG(bank == psi, 402 "uvm_physseg_find(%#x) (%"PRIxPHYSSEG ") != psi %"PRIxPHYSSEG, 403 candidate, bank, psi); 404 KDASSERTMSG(cidx == candidate - uvm_physseg_get_start(psi), 405 "uvm_physseg_find(%#x): %#"PRIxPADDR" != off %"PRIxPADDR, 406 candidate, cidx, candidate - uvm_physseg_get_start(psi)); 407 } 408 #endif 409 if (VM_PAGE_IS_FREE(pg) == 0) 410 continue; 411 412 uvm_pglist_add(pg, rlist); 413 if (--todo == 0) { 414 break; 415 } 416 } 417 418 /* 419 * The next time we need to search this segment, 420 * start just after the pages we just allocated. 421 */ 422 uvm_physseg_set_start_hint(psi, candidate + 1 - uvm_physseg_get_avail_start(psi)); 423 KASSERTMSG(uvm_physseg_get_start_hint(psi) <= uvm_physseg_get_avail_end(psi) - 424 uvm_physseg_get_avail_start(psi), 425 "%#x %u (%#x) <= %#"PRIxPADDR" - %#"PRIxPADDR" (%#"PRIxPADDR")", 426 candidate + 1, 427 uvm_physseg_get_start_hint(psi), 428 uvm_physseg_get_start_hint(psi), 429 uvm_physseg_get_avail_end(psi), 430 uvm_physseg_get_avail_start(psi), 431 uvm_physseg_get_avail_end(psi) - uvm_physseg_get_avail_start(psi)); 432 433 #ifdef PGALLOC_VERBOSE 434 printf("got %d pgs\n", num - todo); 435 #endif 436 return (num - todo); /* number of pages allocated */ 437 } 438 439 static int 440 uvm_pglistalloc_simple(int num, paddr_t low, paddr_t high, 441 struct pglist *rlist, int waitok) 442 { 443 int fl, error; 444 uvm_physseg_t psi; 445 int count = 0; 446 447 /* Default to "lose". */ 448 error = ENOMEM; 449 450 again: 451 /* 452 * Block all memory allocation and lock the free list. 453 */ 454 uvm_pgfl_lock(); 455 count++; 456 457 /* Are there even any free pages? */ 458 if (uvm_availmem(false) <= 459 (uvmexp.reserve_pagedaemon + uvmexp.reserve_kernel)) 460 goto out; 461 462 for (fl = 0; fl < VM_NFREELIST; fl++) { 463 #if (VM_PHYSSEG_STRAT == VM_PSTRAT_BIGFIRST) 464 for (psi = uvm_physseg_get_last(); uvm_physseg_valid_p(psi); psi = uvm_physseg_get_prev(psi)) 465 #else 466 for (psi = uvm_physseg_get_first(); uvm_physseg_valid_p(psi); psi = uvm_physseg_get_next(psi)) 467 #endif 468 { 469 if (uvm_physseg_get_free_list(psi) != fl) 470 continue; 471 472 num -= uvm_pglistalloc_s_ps(psi, num, low, high, rlist); 473 if (num == 0) { 474 error = 0; 475 goto out; 476 } 477 } 478 479 } 480 481 out: 482 /* 483 * check to see if we need to generate some free pages waking 484 * the pagedaemon. 485 */ 486 487 uvm_pgfl_unlock(); 488 uvm_kick_pdaemon(); 489 490 if (error) { 491 if (waitok) { 492 /* XXX perhaps some time limitation? */ 493 #ifdef DEBUG 494 if (count == 1) 495 printf("pglistalloc waiting\n"); 496 #endif 497 uvm_wait("pglalloc"); 498 goto again; 499 } else 500 uvm_pglistfree(rlist); 501 } 502 #ifdef PGALLOC_VERBOSE 503 if (!error) 504 printf("pgalloc: %"PRIxMAX"..%"PRIxMAX"\n", 505 (uintmax_t) VM_PAGE_TO_PHYS(TAILQ_FIRST(rlist)), 506 (uintmax_t) VM_PAGE_TO_PHYS(TAILQ_LAST(rlist, pglist))); 507 #endif 508 return (error); 509 } 510 511 int 512 uvm_pglistalloc(psize_t size, paddr_t low, paddr_t high, paddr_t alignment, 513 paddr_t boundary, struct pglist *rlist, int nsegs, int waitok) 514 { 515 int num, res; 516 517 KASSERT(!cpu_intr_p()); 518 KASSERT(!cpu_softintr_p()); 519 KASSERT((alignment & (alignment - 1)) == 0); 520 KASSERT((boundary & (boundary - 1)) == 0); 521 522 /* 523 * Our allocations are always page granularity, so our alignment 524 * must be, too. 525 */ 526 if (alignment < PAGE_SIZE) 527 alignment = PAGE_SIZE; 528 if (boundary != 0 && boundary < size) 529 return (EINVAL); 530 num = atop(round_page(size)); 531 low = roundup2(low, alignment); 532 533 TAILQ_INIT(rlist); 534 535 /* 536 * Turn off the caching of free pages - we need everything to be on 537 * the global freelists. 538 */ 539 uvm_pgflcache_pause(); 540 541 if ((nsegs < size >> PAGE_SHIFT) || (alignment != PAGE_SIZE) || 542 (boundary != 0)) 543 res = uvm_pglistalloc_contig(num, low, high, alignment, 544 boundary, rlist); 545 else 546 res = uvm_pglistalloc_simple(num, low, high, rlist, waitok); 547 548 uvm_pgflcache_resume(); 549 550 return (res); 551 } 552 553 /* 554 * uvm_pglistfree: free a list of pages 555 * 556 * => pages should already be unmapped 557 */ 558 559 void 560 uvm_pglistfree(struct pglist *list) 561 { 562 struct vm_page *pg; 563 564 KASSERT(!cpu_intr_p()); 565 KASSERT(!cpu_softintr_p()); 566 567 while ((pg = TAILQ_FIRST(list)) != NULL) { 568 TAILQ_REMOVE(list, pg, pageq.queue); 569 uvm_pagefree(pg); 570 STAT_DECR(uvm_pglistalloc_npages); 571 } 572 } 573