1 /* $NetBSD: uvm_pglist.c,v 1.67 2014/10/26 01:42:07 christos Exp $ */ 2 3 /*- 4 * Copyright (c) 1997 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 9 * NASA Ames Research Center. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 /* 34 * uvm_pglist.c: pglist functions 35 */ 36 37 #include <sys/cdefs.h> 38 __KERNEL_RCSID(0, "$NetBSD: uvm_pglist.c,v 1.67 2014/10/26 01:42:07 christos Exp $"); 39 40 #include <sys/param.h> 41 #include <sys/systm.h> 42 43 #include <uvm/uvm.h> 44 #include <uvm/uvm_pdpolicy.h> 45 46 #ifdef VM_PAGE_ALLOC_MEMORY_STATS 47 #define STAT_INCR(v) (v)++ 48 #define STAT_DECR(v) do { \ 49 if ((v) == 0) \ 50 printf("%s:%d -- Already 0!\n", __FILE__, __LINE__); \ 51 else \ 52 (v)--; \ 53 } while (/*CONSTCOND*/ 0) 54 u_long uvm_pglistalloc_npages; 55 #else 56 #define STAT_INCR(v) 57 #define STAT_DECR(v) 58 #endif 59 60 /* 61 * uvm_pglistalloc: allocate a list of pages 62 * 63 * => allocated pages are placed onto an rlist. rlist is 64 * initialized by uvm_pglistalloc. 65 * => returns 0 on success or errno on failure 66 * => implementation allocates a single segment if any constraints are 67 * imposed by call arguments. 68 * => doesn't take into account clean non-busy pages on inactive list 69 * that could be used(?) 70 * => params: 71 * size the size of the allocation, rounded to page size. 72 * low the low address of the allowed allocation range. 73 * high the high address of the allowed allocation range. 74 * alignment memory must be aligned to this power-of-two boundary. 75 * boundary no segment in the allocation may cross this 76 * power-of-two boundary (relative to zero). 77 */ 78 79 static void 80 uvm_pglist_add(struct vm_page *pg, struct pglist *rlist) 81 { 82 int free_list __unused, color __unused, pgflidx; 83 84 KASSERT(mutex_owned(&uvm_fpageqlock)); 85 86 #if PGFL_NQUEUES != 2 87 #error uvm_pglistalloc needs to be updated 88 #endif 89 90 free_list = uvm_page_lookup_freelist(pg); 91 color = VM_PGCOLOR_BUCKET(pg); 92 pgflidx = (pg->flags & PG_ZERO) ? PGFL_ZEROS : PGFL_UNKNOWN; 93 #ifdef UVMDEBUG 94 struct vm_page *tp; 95 LIST_FOREACH(tp, 96 &uvm.page_free[free_list].pgfl_buckets[color].pgfl_queues[pgflidx], 97 pageq.list) { 98 if (tp == pg) 99 break; 100 } 101 if (tp == NULL) 102 panic("uvm_pglistalloc: page not on freelist"); 103 #endif 104 LIST_REMOVE(pg, pageq.list); /* global */ 105 LIST_REMOVE(pg, listq.list); /* cpu */ 106 uvmexp.free--; 107 if (pg->flags & PG_ZERO) 108 uvmexp.zeropages--; 109 VM_FREE_PAGE_TO_CPU(pg)->pages[pgflidx]--; 110 pg->flags = PG_CLEAN; 111 pg->pqflags = 0; 112 pg->uobject = NULL; 113 pg->uanon = NULL; 114 TAILQ_INSERT_TAIL(rlist, pg, pageq.queue); 115 STAT_INCR(uvm_pglistalloc_npages); 116 } 117 118 static int 119 uvm_pglistalloc_c_ps(struct vm_physseg *ps, int num, paddr_t low, paddr_t high, 120 paddr_t alignment, paddr_t boundary, struct pglist *rlist) 121 { 122 signed int candidate, limit, candidateidx, end, idx, skip; 123 struct vm_page *pgs; 124 int pagemask; 125 bool second_pass; 126 #ifdef DEBUG 127 paddr_t idxpa, lastidxpa; 128 int cidx = 0; /* XXX: GCC */ 129 #endif 130 #ifdef PGALLOC_VERBOSE 131 printf("pgalloc: contig %d pgs from psi %zd\n", num, ps - vm_physmem); 132 #endif 133 134 KASSERT(mutex_owned(&uvm_fpageqlock)); 135 136 low = atop(low); 137 high = atop(high); 138 alignment = atop(alignment); 139 140 /* 141 * Make sure that physseg falls within with range to be allocated from. 142 */ 143 if (high <= ps->avail_start || low >= ps->avail_end) 144 return 0; 145 146 /* 147 * We start our search at the just after where the last allocation 148 * succeeded. 149 */ 150 candidate = roundup2(max(low, ps->avail_start + ps->start_hint), alignment); 151 limit = min(high, ps->avail_end); 152 pagemask = ~((boundary >> PAGE_SHIFT) - 1); 153 skip = 0; 154 second_pass = false; 155 pgs = ps->pgs; 156 157 for (;;) { 158 bool ok = true; 159 signed int cnt; 160 161 if (candidate + num > limit) { 162 if (ps->start_hint == 0 || second_pass) { 163 /* 164 * We've run past the allowable range. 165 */ 166 return 0; /* FAIL = 0 pages*/ 167 } 168 /* 169 * We've wrapped around the end of this segment 170 * so restart at the beginning but now our limit 171 * is were we started. 172 */ 173 second_pass = true; 174 candidate = roundup2(max(low, ps->avail_start), alignment); 175 limit = min(limit, ps->avail_start + ps->start_hint); 176 skip = 0; 177 continue; 178 } 179 if (boundary != 0 && 180 ((candidate ^ (candidate + num - 1)) & pagemask) != 0) { 181 /* 182 * Region crosses boundary. Jump to the boundary 183 * just crossed and ensure alignment. 184 */ 185 candidate = (candidate + num - 1) & pagemask; 186 candidate = roundup2(candidate, alignment); 187 skip = 0; 188 continue; 189 } 190 #ifdef DEBUG 191 /* 192 * Make sure this is a managed physical page. 193 */ 194 195 if (vm_physseg_find(candidate, &cidx) != ps - vm_physmem) 196 panic("pgalloc contig: botch1"); 197 if (cidx != candidate - ps->start) 198 panic("pgalloc contig: botch2"); 199 if (vm_physseg_find(candidate + num - 1, &cidx) != ps - vm_physmem) 200 panic("pgalloc contig: botch3"); 201 if (cidx != candidate - ps->start + num - 1) 202 panic("pgalloc contig: botch4"); 203 #endif 204 candidateidx = candidate - ps->start; 205 end = candidateidx + num; 206 207 /* 208 * Found a suitable starting page. See if the range is free. 209 */ 210 #ifdef PGALLOC_VERBOSE 211 printf("%s: ps=%p candidate=%#x end=%#x skip=%#x, align=%#"PRIxPADDR, 212 __func__, ps, candidateidx, end, skip, alignment); 213 #endif 214 /* 215 * We start at the end and work backwards since if we find a 216 * non-free page, it makes no sense to continue. 217 * 218 * But on the plus size we have "vetted" some number of free 219 * pages. If this iteration fails, we may be able to skip 220 * testing most of those pages again in the next pass. 221 */ 222 for (idx = end - 1; idx >= candidateidx + skip; idx--) { 223 if (VM_PAGE_IS_FREE(&pgs[idx]) == 0) { 224 ok = false; 225 break; 226 } 227 228 #ifdef DEBUG 229 if (idx > candidateidx) { 230 idxpa = VM_PAGE_TO_PHYS(&pgs[idx]); 231 lastidxpa = VM_PAGE_TO_PHYS(&pgs[idx - 1]); 232 if ((lastidxpa + PAGE_SIZE) != idxpa) { 233 /* 234 * Region not contiguous. 235 */ 236 panic("pgalloc contig: botch5"); 237 } 238 if (boundary != 0 && 239 ((lastidxpa ^ idxpa) & ~(boundary - 1)) 240 != 0) { 241 /* 242 * Region crosses boundary. 243 */ 244 panic("pgalloc contig: botch6"); 245 } 246 } 247 #endif 248 } 249 250 if (ok) { 251 while (skip-- > 0) { 252 KDASSERT(VM_PAGE_IS_FREE(&pgs[candidateidx + skip])); 253 } 254 #ifdef PGALLOC_VERBOSE 255 printf(": ok\n"); 256 #endif 257 break; 258 } 259 260 #ifdef PGALLOC_VERBOSE 261 printf(": non-free at %#x\n", idx - candidateidx); 262 #endif 263 /* 264 * count the number of pages we can advance 265 * since we know they aren't all free. 266 */ 267 cnt = idx + 1 - candidateidx; 268 /* 269 * now round up that to the needed alignment. 270 */ 271 cnt = roundup2(cnt, alignment); 272 /* 273 * The number of pages we can skip checking 274 * (might be 0 if cnt > num). 275 */ 276 skip = max(num - cnt, 0); 277 candidate += cnt; 278 } 279 280 /* 281 * we have a chunk of memory that conforms to the requested constraints. 282 */ 283 for (idx = candidateidx, pgs += idx; idx < end; idx++, pgs++) 284 uvm_pglist_add(pgs, rlist); 285 286 /* 287 * the next time we need to search this segment, start after this 288 * chunk of pages we just allocated. 289 */ 290 ps->start_hint = candidate + num - ps->avail_start; 291 KASSERTMSG(ps->start_hint <= ps->avail_end - ps->avail_start, 292 "%x %u (%#x) <= %#"PRIxPADDR" - %#"PRIxPADDR" (%#"PRIxPADDR")", 293 candidate + num, 294 ps->start_hint, ps->start_hint, ps->avail_end, ps->avail_start, 295 ps->avail_end - ps->avail_start); 296 297 #ifdef PGALLOC_VERBOSE 298 printf("got %d pgs\n", num); 299 #endif 300 return num; /* number of pages allocated */ 301 } 302 303 static int 304 uvm_pglistalloc_contig(int num, paddr_t low, paddr_t high, paddr_t alignment, 305 paddr_t boundary, struct pglist *rlist) 306 { 307 int fl, psi; 308 struct vm_physseg *ps; 309 int error; 310 311 /* Default to "lose". */ 312 error = ENOMEM; 313 314 /* 315 * Block all memory allocation and lock the free list. 316 */ 317 mutex_spin_enter(&uvm_fpageqlock); 318 319 /* Are there even any free pages? */ 320 if (uvmexp.free <= (uvmexp.reserve_pagedaemon + uvmexp.reserve_kernel)) 321 goto out; 322 323 for (fl = 0; fl < VM_NFREELIST; fl++) { 324 #if (VM_PHYSSEG_STRAT == VM_PSTRAT_BIGFIRST) 325 for (psi = vm_nphysseg - 1 ; psi >= 0 ; psi--) 326 #else 327 for (psi = 0 ; psi < vm_nphysseg ; psi++) 328 #endif 329 { 330 ps = &vm_physmem[psi]; 331 332 if (ps->free_list != fl) 333 continue; 334 335 num -= uvm_pglistalloc_c_ps(ps, num, low, high, 336 alignment, boundary, rlist); 337 if (num == 0) { 338 #ifdef PGALLOC_VERBOSE 339 printf("pgalloc: %"PRIxMAX"-%"PRIxMAX"\n", 340 (uintmax_t) VM_PAGE_TO_PHYS(TAILQ_FIRST(rlist)), 341 (uintmax_t) VM_PAGE_TO_PHYS(TAILQ_LAST(rlist, pglist))); 342 #endif 343 error = 0; 344 goto out; 345 } 346 } 347 } 348 349 out: 350 /* 351 * check to see if we need to generate some free pages waking 352 * the pagedaemon. 353 */ 354 355 uvm_kick_pdaemon(); 356 mutex_spin_exit(&uvm_fpageqlock); 357 return (error); 358 } 359 360 static int 361 uvm_pglistalloc_s_ps(struct vm_physseg *ps, int num, paddr_t low, paddr_t high, 362 struct pglist *rlist) 363 { 364 int todo, limit, candidate; 365 struct vm_page *pg; 366 bool second_pass; 367 #ifdef PGALLOC_VERBOSE 368 printf("pgalloc: simple %d pgs from psi %zd\n", num, ps - vm_physmem); 369 #endif 370 371 KASSERT(mutex_owned(&uvm_fpageqlock)); 372 KASSERT(ps->start <= ps->avail_start); 373 KASSERT(ps->start <= ps->avail_end); 374 KASSERT(ps->avail_start <= ps->end); 375 KASSERT(ps->avail_end <= ps->end); 376 377 low = atop(low); 378 high = atop(high); 379 todo = num; 380 candidate = max(low, ps->avail_start + ps->start_hint); 381 limit = min(high, ps->avail_end); 382 pg = &ps->pgs[candidate - ps->start]; 383 second_pass = false; 384 385 /* 386 * Make sure that physseg falls within with range to be allocated from. 387 */ 388 if (high <= ps->avail_start || low >= ps->avail_end) 389 return 0; 390 391 again: 392 for (;; candidate++, pg++) { 393 if (candidate >= limit) { 394 if (ps->start_hint == 0 || second_pass) { 395 candidate = limit - 1; 396 break; 397 } 398 second_pass = true; 399 candidate = max(low, ps->avail_start); 400 limit = min(limit, ps->avail_start + ps->start_hint); 401 pg = &ps->pgs[candidate - ps->start]; 402 goto again; 403 } 404 #if defined(DEBUG) 405 { 406 int cidx = 0; 407 const int bank = vm_physseg_find(candidate, &cidx); 408 KDASSERTMSG(bank == ps - vm_physmem, 409 "vm_physseg_find(%#x) (%d) != ps %zd", 410 candidate, bank, ps - vm_physmem); 411 KDASSERTMSG(cidx == candidate - ps->start, 412 "vm_physseg_find(%#x): %#x != off %"PRIxPADDR, 413 candidate, cidx, candidate - ps->start); 414 } 415 #endif 416 if (VM_PAGE_IS_FREE(pg) == 0) 417 continue; 418 419 uvm_pglist_add(pg, rlist); 420 if (--todo == 0) { 421 break; 422 } 423 } 424 425 /* 426 * The next time we need to search this segment, 427 * start just after the pages we just allocated. 428 */ 429 ps->start_hint = candidate + 1 - ps->avail_start; 430 KASSERTMSG(ps->start_hint <= ps->avail_end - ps->avail_start, 431 "%#x %u (%#x) <= %#"PRIxPADDR" - %#"PRIxPADDR" (%#"PRIxPADDR")", 432 candidate + 1, 433 ps->start_hint, ps->start_hint, ps->avail_end, ps->avail_start, 434 ps->avail_end - ps->avail_start); 435 436 #ifdef PGALLOC_VERBOSE 437 printf("got %d pgs\n", num - todo); 438 #endif 439 return (num - todo); /* number of pages allocated */ 440 } 441 442 static int 443 uvm_pglistalloc_simple(int num, paddr_t low, paddr_t high, 444 struct pglist *rlist, int waitok) 445 { 446 int fl, psi, error; 447 struct vm_physseg *ps; 448 449 /* Default to "lose". */ 450 error = ENOMEM; 451 452 again: 453 /* 454 * Block all memory allocation and lock the free list. 455 */ 456 mutex_spin_enter(&uvm_fpageqlock); 457 458 /* Are there even any free pages? */ 459 if (uvmexp.free <= (uvmexp.reserve_pagedaemon + uvmexp.reserve_kernel)) 460 goto out; 461 462 for (fl = 0; fl < VM_NFREELIST; fl++) { 463 #if (VM_PHYSSEG_STRAT == VM_PSTRAT_BIGFIRST) 464 for (psi = vm_nphysseg - 1 ; psi >= 0 ; psi--) 465 #else 466 for (psi = 0 ; psi < vm_nphysseg ; psi++) 467 #endif 468 { 469 ps = &vm_physmem[psi]; 470 471 if (ps->free_list != fl) 472 continue; 473 474 num -= uvm_pglistalloc_s_ps(ps, num, low, high, rlist); 475 if (num == 0) { 476 error = 0; 477 goto out; 478 } 479 } 480 481 } 482 483 out: 484 /* 485 * check to see if we need to generate some free pages waking 486 * the pagedaemon. 487 */ 488 489 uvm_kick_pdaemon(); 490 mutex_spin_exit(&uvm_fpageqlock); 491 492 if (error) { 493 if (waitok) { 494 /* XXX perhaps some time limitation? */ 495 #ifdef DEBUG 496 printf("pglistalloc waiting\n"); 497 #endif 498 uvm_wait("pglalloc"); 499 goto again; 500 } else 501 uvm_pglistfree(rlist); 502 } 503 #ifdef PGALLOC_VERBOSE 504 if (!error) 505 printf("pgalloc: %"PRIxMAX"..%"PRIxMAX"\n", 506 (uintmax_t) VM_PAGE_TO_PHYS(TAILQ_FIRST(rlist)), 507 (uintmax_t) VM_PAGE_TO_PHYS(TAILQ_LAST(rlist, pglist))); 508 #endif 509 return (error); 510 } 511 512 int 513 uvm_pglistalloc(psize_t size, paddr_t low, paddr_t high, paddr_t alignment, 514 paddr_t boundary, struct pglist *rlist, int nsegs, int waitok) 515 { 516 int num, res; 517 518 KASSERT((alignment & (alignment - 1)) == 0); 519 KASSERT((boundary & (boundary - 1)) == 0); 520 521 /* 522 * Our allocations are always page granularity, so our alignment 523 * must be, too. 524 */ 525 if (alignment < PAGE_SIZE) 526 alignment = PAGE_SIZE; 527 if (boundary != 0 && boundary < size) 528 return (EINVAL); 529 num = atop(round_page(size)); 530 low = roundup2(low, alignment); 531 532 TAILQ_INIT(rlist); 533 534 if ((nsegs < size >> PAGE_SHIFT) || (alignment != PAGE_SIZE) || 535 (boundary != 0)) 536 res = uvm_pglistalloc_contig(num, low, high, alignment, 537 boundary, rlist); 538 else 539 res = uvm_pglistalloc_simple(num, low, high, rlist, waitok); 540 541 return (res); 542 } 543 544 /* 545 * uvm_pglistfree: free a list of pages 546 * 547 * => pages should already be unmapped 548 */ 549 550 void 551 uvm_pglistfree(struct pglist *list) 552 { 553 struct uvm_cpu *ucpu; 554 struct vm_page *pg; 555 int index, color, queue; 556 bool iszero; 557 558 /* 559 * Lock the free list and free each page. 560 */ 561 562 mutex_spin_enter(&uvm_fpageqlock); 563 ucpu = curcpu()->ci_data.cpu_uvm; 564 while ((pg = TAILQ_FIRST(list)) != NULL) { 565 KASSERT(!uvmpdpol_pageisqueued_p(pg)); 566 TAILQ_REMOVE(list, pg, pageq.queue); 567 iszero = (pg->flags & PG_ZERO); 568 pg->pqflags = PQ_FREE; 569 #ifdef DEBUG 570 pg->uobject = (void *)0xdeadbeef; 571 pg->uanon = (void *)0xdeadbeef; 572 #endif /* DEBUG */ 573 #ifdef DEBUG 574 if (iszero) 575 uvm_pagezerocheck(pg); 576 #endif /* DEBUG */ 577 index = uvm_page_lookup_freelist(pg); 578 color = VM_PGCOLOR_BUCKET(pg); 579 queue = iszero ? PGFL_ZEROS : PGFL_UNKNOWN; 580 pg->offset = (uintptr_t)ucpu; 581 LIST_INSERT_HEAD(&uvm.page_free[index].pgfl_buckets[color]. 582 pgfl_queues[queue], pg, pageq.list); 583 LIST_INSERT_HEAD(&ucpu->page_free[index].pgfl_buckets[color]. 584 pgfl_queues[queue], pg, listq.list); 585 uvmexp.free++; 586 if (iszero) 587 uvmexp.zeropages++; 588 ucpu->pages[queue]++; 589 STAT_DECR(uvm_pglistalloc_npages); 590 } 591 if (ucpu->pages[PGFL_ZEROS] < ucpu->pages[PGFL_UNKNOWN]) 592 ucpu->page_idle_zero = vm_page_zero_enable; 593 mutex_spin_exit(&uvm_fpageqlock); 594 } 595