1 /* $NetBSD: uvm_page.c,v 1.46 2000/12/01 09:54:42 chs Exp $ */ 2 3 /* 4 * Copyright (c) 1997 Charles D. Cranor and Washington University. 5 * Copyright (c) 1991, 1993, The Regents of the University of California. 6 * 7 * All rights reserved. 8 * 9 * This code is derived from software contributed to Berkeley by 10 * The Mach Operating System project at Carnegie-Mellon University. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 3. All advertising materials mentioning features or use of this software 21 * must display the following acknowledgement: 22 * This product includes software developed by Charles D. Cranor, 23 * Washington University, the University of California, Berkeley and 24 * its contributors. 25 * 4. Neither the name of the University nor the names of its contributors 26 * may be used to endorse or promote products derived from this software 27 * without specific prior written permission. 28 * 29 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 30 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 31 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 32 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 33 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 34 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 35 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 36 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 37 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 38 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 39 * SUCH DAMAGE. 40 * 41 * @(#)vm_page.c 8.3 (Berkeley) 3/21/94 42 * from: Id: uvm_page.c,v 1.1.2.18 1998/02/06 05:24:42 chs Exp 43 * 44 * 45 * Copyright (c) 1987, 1990 Carnegie-Mellon University. 46 * All rights reserved. 47 * 48 * Permission to use, copy, modify and distribute this software and 49 * its documentation is hereby granted, provided that both the copyright 50 * notice and this permission notice appear in all copies of the 51 * software, derivative works or modified versions, and any portions 52 * thereof, and that both notices appear in supporting documentation. 53 * 54 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 55 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 56 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 57 * 58 * Carnegie Mellon requests users of this software to return to 59 * 60 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 61 * School of Computer Science 62 * Carnegie Mellon University 63 * Pittsburgh PA 15213-3890 64 * 65 * any improvements or extensions that they make and grant Carnegie the 66 * rights to redistribute these changes. 67 */ 68 69 /* 70 * uvm_page.c: page ops. 71 */ 72 73 #include "opt_uvmhist.h" 74 75 #include <sys/param.h> 76 #include <sys/systm.h> 77 #include <sys/malloc.h> 78 #include <sys/sched.h> 79 #include <sys/kernel.h> 80 81 #define UVM_PAGE /* pull in uvm_page.h functions */ 82 #include <uvm/uvm.h> 83 84 /* 85 * global vars... XXXCDC: move to uvm. structure. 86 */ 87 88 /* 89 * physical memory config is stored in vm_physmem. 90 */ 91 92 struct vm_physseg vm_physmem[VM_PHYSSEG_MAX]; /* XXXCDC: uvm.physmem */ 93 int vm_nphysseg = 0; /* XXXCDC: uvm.nphysseg */ 94 95 /* 96 * Some supported CPUs in a given architecture don't support all 97 * of the things necessary to do idle page zero'ing efficiently. 98 * We therefore provide a way to disable it from machdep code here. 99 */ 100 /* 101 * XXX disabled until we can find a way to do this without causing 102 * problems for either cpu caches or DMA latency. 103 */ 104 boolean_t vm_page_zero_enable = FALSE; 105 106 extern struct uvm_pagerops uvm_vnodeops; 107 108 /* 109 * local variables 110 */ 111 112 /* 113 * these variables record the values returned by vm_page_bootstrap, 114 * for debugging purposes. The implementation of uvm_pageboot_alloc 115 * and pmap_startup here also uses them internally. 116 */ 117 118 static vaddr_t virtual_space_start; 119 static vaddr_t virtual_space_end; 120 121 /* 122 * we use a hash table with only one bucket during bootup. we will 123 * later rehash (resize) the hash table once the allocator is ready. 124 * we static allocate the one bootstrap bucket below... 125 */ 126 127 static struct pglist uvm_bootbucket; 128 129 /* 130 * local prototypes 131 */ 132 133 static void uvm_pageinsert __P((struct vm_page *)); 134 static void uvm_pageremove __P((struct vm_page *)); 135 136 /* 137 * inline functions 138 */ 139 140 /* 141 * uvm_pageinsert: insert a page in the object and the hash table 142 * 143 * => caller must lock object 144 * => caller must lock page queues 145 * => call should have already set pg's object and offset pointers 146 * and bumped the version counter 147 */ 148 149 __inline static void 150 uvm_pageinsert(pg) 151 struct vm_page *pg; 152 { 153 struct pglist *buck; 154 int s; 155 156 #ifdef DIAGNOSTIC 157 if (pg->flags & PG_TABLED) 158 panic("uvm_pageinsert: already inserted"); 159 #endif 160 161 buck = &uvm.page_hash[uvm_pagehash(pg->uobject,pg->offset)]; 162 s = splimp(); 163 simple_lock(&uvm.hashlock); 164 TAILQ_INSERT_TAIL(buck, pg, hashq); /* put in hash */ 165 simple_unlock(&uvm.hashlock); 166 splx(s); 167 168 TAILQ_INSERT_TAIL(&pg->uobject->memq, pg, listq); /* put in object */ 169 pg->flags |= PG_TABLED; 170 pg->uobject->uo_npages++; 171 } 172 173 /* 174 * uvm_page_remove: remove page from object and hash 175 * 176 * => caller must lock object 177 * => caller must lock page queues 178 */ 179 180 static __inline void 181 uvm_pageremove(pg) 182 struct vm_page *pg; 183 { 184 struct pglist *buck; 185 int s; 186 187 KASSERT(pg->flags & PG_TABLED); 188 buck = &uvm.page_hash[uvm_pagehash(pg->uobject,pg->offset)]; 189 s = splimp(); 190 simple_lock(&uvm.hashlock); 191 TAILQ_REMOVE(buck, pg, hashq); 192 simple_unlock(&uvm.hashlock); 193 splx(s); 194 195 if (pg->uobject->pgops == &uvm_vnodeops) { 196 uvmexp.vnodepages--; 197 } 198 199 /* object should be locked */ 200 TAILQ_REMOVE(&pg->uobject->memq, pg, listq); 201 202 pg->flags &= ~PG_TABLED; 203 pg->uobject->uo_npages--; 204 pg->uobject = NULL; 205 pg->version++; 206 } 207 208 /* 209 * uvm_page_init: init the page system. called from uvm_init(). 210 * 211 * => we return the range of kernel virtual memory in kvm_startp/kvm_endp 212 */ 213 214 void 215 uvm_page_init(kvm_startp, kvm_endp) 216 vaddr_t *kvm_startp, *kvm_endp; 217 { 218 vsize_t freepages, pagecount, n; 219 vm_page_t pagearray; 220 int lcv, i; 221 paddr_t paddr; 222 223 /* 224 * step 1: init the page queues and page queue locks 225 */ 226 for (lcv = 0; lcv < VM_NFREELIST; lcv++) { 227 for (i = 0; i < PGFL_NQUEUES; i++) 228 TAILQ_INIT(&uvm.page_free[lcv].pgfl_queues[i]); 229 } 230 TAILQ_INIT(&uvm.page_active); 231 TAILQ_INIT(&uvm.page_inactive_swp); 232 TAILQ_INIT(&uvm.page_inactive_obj); 233 simple_lock_init(&uvm.pageqlock); 234 simple_lock_init(&uvm.fpageqlock); 235 236 /* 237 * step 2: init the <obj,offset> => <page> hash table. for now 238 * we just have one bucket (the bootstrap bucket). later on we 239 * will allocate new buckets as we dynamically resize the hash table. 240 */ 241 242 uvm.page_nhash = 1; /* 1 bucket */ 243 uvm.page_hashmask = 0; /* mask for hash function */ 244 uvm.page_hash = &uvm_bootbucket; /* install bootstrap bucket */ 245 TAILQ_INIT(uvm.page_hash); /* init hash table */ 246 simple_lock_init(&uvm.hashlock); /* init hash table lock */ 247 248 /* 249 * step 3: allocate vm_page structures. 250 */ 251 252 /* 253 * sanity check: 254 * before calling this function the MD code is expected to register 255 * some free RAM with the uvm_page_physload() function. our job 256 * now is to allocate vm_page structures for this memory. 257 */ 258 259 if (vm_nphysseg == 0) 260 panic("uvm_page_bootstrap: no memory pre-allocated"); 261 262 /* 263 * first calculate the number of free pages... 264 * 265 * note that we use start/end rather than avail_start/avail_end. 266 * this allows us to allocate extra vm_page structures in case we 267 * want to return some memory to the pool after booting. 268 */ 269 270 freepages = 0; 271 for (lcv = 0 ; lcv < vm_nphysseg ; lcv++) 272 freepages += (vm_physmem[lcv].end - vm_physmem[lcv].start); 273 274 /* 275 * we now know we have (PAGE_SIZE * freepages) bytes of memory we can 276 * use. for each page of memory we use we need a vm_page structure. 277 * thus, the total number of pages we can use is the total size of 278 * the memory divided by the PAGE_SIZE plus the size of the vm_page 279 * structure. we add one to freepages as a fudge factor to avoid 280 * truncation errors (since we can only allocate in terms of whole 281 * pages). 282 */ 283 284 pagecount = ((freepages + 1) << PAGE_SHIFT) / 285 (PAGE_SIZE + sizeof(struct vm_page)); 286 pagearray = (vm_page_t)uvm_pageboot_alloc(pagecount * 287 sizeof(struct vm_page)); 288 memset(pagearray, 0, pagecount * sizeof(struct vm_page)); 289 290 /* 291 * step 4: init the vm_page structures and put them in the correct 292 * place... 293 */ 294 295 for (lcv = 0 ; lcv < vm_nphysseg ; lcv++) { 296 n = vm_physmem[lcv].end - vm_physmem[lcv].start; 297 if (n > pagecount) { 298 printf("uvm_page_init: lost %ld page(s) in init\n", 299 (long)(n - pagecount)); 300 panic("uvm_page_init"); /* XXXCDC: shouldn't happen? */ 301 /* n = pagecount; */ 302 } 303 /* set up page array pointers */ 304 vm_physmem[lcv].pgs = pagearray; 305 pagearray += n; 306 pagecount -= n; 307 vm_physmem[lcv].lastpg = vm_physmem[lcv].pgs + (n - 1); 308 309 /* init and free vm_pages (we've already zeroed them) */ 310 paddr = ptoa(vm_physmem[lcv].start); 311 for (i = 0 ; i < n ; i++, paddr += PAGE_SIZE) { 312 vm_physmem[lcv].pgs[i].phys_addr = paddr; 313 if (atop(paddr) >= vm_physmem[lcv].avail_start && 314 atop(paddr) <= vm_physmem[lcv].avail_end) { 315 uvmexp.npages++; 316 /* add page to free pool */ 317 uvm_pagefree(&vm_physmem[lcv].pgs[i]); 318 } 319 } 320 } 321 322 /* 323 * step 5: pass up the values of virtual_space_start and 324 * virtual_space_end (obtained by uvm_pageboot_alloc) to the upper 325 * layers of the VM. 326 */ 327 328 *kvm_startp = round_page(virtual_space_start); 329 *kvm_endp = trunc_page(virtual_space_end); 330 331 /* 332 * step 6: init locks for kernel threads 333 */ 334 335 simple_lock_init(&uvm.pagedaemon_lock); 336 simple_lock_init(&uvm.aiodoned_lock); 337 338 /* 339 * step 7: init reserve thresholds 340 * XXXCDC - values may need adjusting 341 */ 342 uvmexp.reserve_pagedaemon = 1; 343 uvmexp.reserve_kernel = 5; 344 345 /* 346 * step 8: determine if we should zero pages in the idle 347 * loop. 348 */ 349 uvm.page_idle_zero = vm_page_zero_enable; 350 351 /* 352 * done! 353 */ 354 355 uvm.page_init_done = TRUE; 356 } 357 358 /* 359 * uvm_setpagesize: set the page size 360 * 361 * => sets page_shift and page_mask from uvmexp.pagesize. 362 */ 363 364 void 365 uvm_setpagesize() 366 { 367 if (uvmexp.pagesize == 0) 368 uvmexp.pagesize = DEFAULT_PAGE_SIZE; 369 uvmexp.pagemask = uvmexp.pagesize - 1; 370 if ((uvmexp.pagemask & uvmexp.pagesize) != 0) 371 panic("uvm_setpagesize: page size not a power of two"); 372 for (uvmexp.pageshift = 0; ; uvmexp.pageshift++) 373 if ((1 << uvmexp.pageshift) == uvmexp.pagesize) 374 break; 375 } 376 377 /* 378 * uvm_pageboot_alloc: steal memory from physmem for bootstrapping 379 */ 380 381 vaddr_t 382 uvm_pageboot_alloc(size) 383 vsize_t size; 384 { 385 #if defined(PMAP_STEAL_MEMORY) 386 vaddr_t addr; 387 388 /* 389 * defer bootstrap allocation to MD code (it may want to allocate 390 * from a direct-mapped segment). pmap_steal_memory should round 391 * off virtual_space_start/virtual_space_end. 392 */ 393 394 addr = pmap_steal_memory(size, &virtual_space_start, 395 &virtual_space_end); 396 397 return(addr); 398 399 #else /* !PMAP_STEAL_MEMORY */ 400 401 static boolean_t initialized = FALSE; 402 vaddr_t addr, vaddr; 403 paddr_t paddr; 404 405 /* round to page size */ 406 size = round_page(size); 407 408 /* 409 * on first call to this function, initialize ourselves. 410 */ 411 if (initialized == FALSE) { 412 pmap_virtual_space(&virtual_space_start, &virtual_space_end); 413 414 /* round it the way we like it */ 415 virtual_space_start = round_page(virtual_space_start); 416 virtual_space_end = trunc_page(virtual_space_end); 417 418 initialized = TRUE; 419 } 420 421 /* 422 * allocate virtual memory for this request 423 */ 424 if (virtual_space_start == virtual_space_end || 425 (virtual_space_end - virtual_space_start) < size) 426 panic("uvm_pageboot_alloc: out of virtual space"); 427 428 addr = virtual_space_start; 429 430 #ifdef PMAP_GROWKERNEL 431 /* 432 * If the kernel pmap can't map the requested space, 433 * then allocate more resources for it. 434 */ 435 if (uvm_maxkaddr < (addr + size)) { 436 uvm_maxkaddr = pmap_growkernel(addr + size); 437 if (uvm_maxkaddr < (addr + size)) 438 panic("uvm_pageboot_alloc: pmap_growkernel() failed"); 439 } 440 #endif 441 442 virtual_space_start += size; 443 444 /* 445 * allocate and mapin physical pages to back new virtual pages 446 */ 447 448 for (vaddr = round_page(addr) ; vaddr < addr + size ; 449 vaddr += PAGE_SIZE) { 450 451 if (!uvm_page_physget(&paddr)) 452 panic("uvm_pageboot_alloc: out of memory"); 453 454 /* 455 * Note this memory is no longer managed, so using 456 * pmap_kenter is safe. 457 */ 458 pmap_kenter_pa(vaddr, paddr, VM_PROT_READ|VM_PROT_WRITE); 459 } 460 return(addr); 461 #endif /* PMAP_STEAL_MEMORY */ 462 } 463 464 #if !defined(PMAP_STEAL_MEMORY) 465 /* 466 * uvm_page_physget: "steal" one page from the vm_physmem structure. 467 * 468 * => attempt to allocate it off the end of a segment in which the "avail" 469 * values match the start/end values. if we can't do that, then we 470 * will advance both values (making them equal, and removing some 471 * vm_page structures from the non-avail area). 472 * => return false if out of memory. 473 */ 474 475 /* subroutine: try to allocate from memory chunks on the specified freelist */ 476 static boolean_t uvm_page_physget_freelist __P((paddr_t *, int)); 477 478 static boolean_t 479 uvm_page_physget_freelist(paddrp, freelist) 480 paddr_t *paddrp; 481 int freelist; 482 { 483 int lcv, x; 484 485 /* pass 1: try allocating from a matching end */ 486 #if (VM_PHYSSEG_STRAT == VM_PSTRAT_BIGFIRST) 487 for (lcv = vm_nphysseg - 1 ; lcv >= 0 ; lcv--) 488 #else 489 for (lcv = 0 ; lcv < vm_nphysseg ; lcv++) 490 #endif 491 { 492 493 if (uvm.page_init_done == TRUE) 494 panic("uvm_page_physget: called _after_ bootstrap"); 495 496 if (vm_physmem[lcv].free_list != freelist) 497 continue; 498 499 /* try from front */ 500 if (vm_physmem[lcv].avail_start == vm_physmem[lcv].start && 501 vm_physmem[lcv].avail_start < vm_physmem[lcv].avail_end) { 502 *paddrp = ptoa(vm_physmem[lcv].avail_start); 503 vm_physmem[lcv].avail_start++; 504 vm_physmem[lcv].start++; 505 /* nothing left? nuke it */ 506 if (vm_physmem[lcv].avail_start == 507 vm_physmem[lcv].end) { 508 if (vm_nphysseg == 1) 509 panic("vum_page_physget: out of memory!"); 510 vm_nphysseg--; 511 for (x = lcv ; x < vm_nphysseg ; x++) 512 /* structure copy */ 513 vm_physmem[x] = vm_physmem[x+1]; 514 } 515 return (TRUE); 516 } 517 518 /* try from rear */ 519 if (vm_physmem[lcv].avail_end == vm_physmem[lcv].end && 520 vm_physmem[lcv].avail_start < vm_physmem[lcv].avail_end) { 521 *paddrp = ptoa(vm_physmem[lcv].avail_end - 1); 522 vm_physmem[lcv].avail_end--; 523 vm_physmem[lcv].end--; 524 /* nothing left? nuke it */ 525 if (vm_physmem[lcv].avail_end == 526 vm_physmem[lcv].start) { 527 if (vm_nphysseg == 1) 528 panic("uvm_page_physget: out of memory!"); 529 vm_nphysseg--; 530 for (x = lcv ; x < vm_nphysseg ; x++) 531 /* structure copy */ 532 vm_physmem[x] = vm_physmem[x+1]; 533 } 534 return (TRUE); 535 } 536 } 537 538 /* pass2: forget about matching ends, just allocate something */ 539 #if (VM_PHYSSEG_STRAT == VM_PSTRAT_BIGFIRST) 540 for (lcv = vm_nphysseg - 1 ; lcv >= 0 ; lcv--) 541 #else 542 for (lcv = 0 ; lcv < vm_nphysseg ; lcv++) 543 #endif 544 { 545 546 /* any room in this bank? */ 547 if (vm_physmem[lcv].avail_start >= vm_physmem[lcv].avail_end) 548 continue; /* nope */ 549 550 *paddrp = ptoa(vm_physmem[lcv].avail_start); 551 vm_physmem[lcv].avail_start++; 552 /* truncate! */ 553 vm_physmem[lcv].start = vm_physmem[lcv].avail_start; 554 555 /* nothing left? nuke it */ 556 if (vm_physmem[lcv].avail_start == vm_physmem[lcv].end) { 557 if (vm_nphysseg == 1) 558 panic("uvm_page_physget: out of memory!"); 559 vm_nphysseg--; 560 for (x = lcv ; x < vm_nphysseg ; x++) 561 /* structure copy */ 562 vm_physmem[x] = vm_physmem[x+1]; 563 } 564 return (TRUE); 565 } 566 567 return (FALSE); /* whoops! */ 568 } 569 570 boolean_t 571 uvm_page_physget(paddrp) 572 paddr_t *paddrp; 573 { 574 int i; 575 576 /* try in the order of freelist preference */ 577 for (i = 0; i < VM_NFREELIST; i++) 578 if (uvm_page_physget_freelist(paddrp, i) == TRUE) 579 return (TRUE); 580 return (FALSE); 581 } 582 #endif /* PMAP_STEAL_MEMORY */ 583 584 /* 585 * uvm_page_physload: load physical memory into VM system 586 * 587 * => all args are PFs 588 * => all pages in start/end get vm_page structures 589 * => areas marked by avail_start/avail_end get added to the free page pool 590 * => we are limited to VM_PHYSSEG_MAX physical memory segments 591 */ 592 593 void 594 uvm_page_physload(start, end, avail_start, avail_end, free_list) 595 paddr_t start, end, avail_start, avail_end; 596 int free_list; 597 { 598 int preload, lcv; 599 psize_t npages; 600 struct vm_page *pgs; 601 struct vm_physseg *ps; 602 603 if (uvmexp.pagesize == 0) 604 panic("uvm_page_physload: page size not set!"); 605 606 if (free_list >= VM_NFREELIST || free_list < VM_FREELIST_DEFAULT) 607 panic("uvm_page_physload: bad free list %d\n", free_list); 608 609 if (start >= end) 610 panic("uvm_page_physload: start >= end"); 611 612 /* 613 * do we have room? 614 */ 615 if (vm_nphysseg == VM_PHYSSEG_MAX) { 616 printf("uvm_page_physload: unable to load physical memory " 617 "segment\n"); 618 printf("\t%d segments allocated, ignoring 0x%llx -> 0x%llx\n", 619 VM_PHYSSEG_MAX, (long long)start, (long long)end); 620 printf("\tincrease VM_PHYSSEG_MAX\n"); 621 return; 622 } 623 624 /* 625 * check to see if this is a "preload" (i.e. uvm_mem_init hasn't been 626 * called yet, so malloc is not available). 627 */ 628 for (lcv = 0 ; lcv < vm_nphysseg ; lcv++) { 629 if (vm_physmem[lcv].pgs) 630 break; 631 } 632 preload = (lcv == vm_nphysseg); 633 634 /* 635 * if VM is already running, attempt to malloc() vm_page structures 636 */ 637 if (!preload) { 638 #if defined(VM_PHYSSEG_NOADD) 639 panic("uvm_page_physload: tried to add RAM after vm_mem_init"); 640 #else 641 /* XXXCDC: need some sort of lockout for this case */ 642 paddr_t paddr; 643 npages = end - start; /* # of pages */ 644 pgs = malloc(sizeof(struct vm_page) * npages, 645 M_VMPAGE, M_NOWAIT); 646 if (pgs == NULL) { 647 printf("uvm_page_physload: can not malloc vm_page " 648 "structs for segment\n"); 649 printf("\tignoring 0x%lx -> 0x%lx\n", start, end); 650 return; 651 } 652 /* zero data, init phys_addr and free_list, and free pages */ 653 memset(pgs, 0, sizeof(struct vm_page) * npages); 654 for (lcv = 0, paddr = ptoa(start) ; 655 lcv < npages ; lcv++, paddr += PAGE_SIZE) { 656 pgs[lcv].phys_addr = paddr; 657 pgs[lcv].free_list = free_list; 658 if (atop(paddr) >= avail_start && 659 atop(paddr) <= avail_end) 660 uvm_pagefree(&pgs[lcv]); 661 } 662 /* XXXCDC: incomplete: need to update uvmexp.free, what else? */ 663 /* XXXCDC: need hook to tell pmap to rebuild pv_list, etc... */ 664 #endif 665 } else { 666 667 /* gcc complains if these don't get init'd */ 668 pgs = NULL; 669 npages = 0; 670 671 } 672 673 /* 674 * now insert us in the proper place in vm_physmem[] 675 */ 676 677 #if (VM_PHYSSEG_STRAT == VM_PSTRAT_RANDOM) 678 679 /* random: put it at the end (easy!) */ 680 ps = &vm_physmem[vm_nphysseg]; 681 682 #elif (VM_PHYSSEG_STRAT == VM_PSTRAT_BSEARCH) 683 684 { 685 int x; 686 /* sort by address for binary search */ 687 for (lcv = 0 ; lcv < vm_nphysseg ; lcv++) 688 if (start < vm_physmem[lcv].start) 689 break; 690 ps = &vm_physmem[lcv]; 691 /* move back other entries, if necessary ... */ 692 for (x = vm_nphysseg ; x > lcv ; x--) 693 /* structure copy */ 694 vm_physmem[x] = vm_physmem[x - 1]; 695 } 696 697 #elif (VM_PHYSSEG_STRAT == VM_PSTRAT_BIGFIRST) 698 699 { 700 int x; 701 /* sort by largest segment first */ 702 for (lcv = 0 ; lcv < vm_nphysseg ; lcv++) 703 if ((end - start) > 704 (vm_physmem[lcv].end - vm_physmem[lcv].start)) 705 break; 706 ps = &vm_physmem[lcv]; 707 /* move back other entries, if necessary ... */ 708 for (x = vm_nphysseg ; x > lcv ; x--) 709 /* structure copy */ 710 vm_physmem[x] = vm_physmem[x - 1]; 711 } 712 713 #else 714 715 panic("uvm_page_physload: unknown physseg strategy selected!"); 716 717 #endif 718 719 ps->start = start; 720 ps->end = end; 721 ps->avail_start = avail_start; 722 ps->avail_end = avail_end; 723 if (preload) { 724 ps->pgs = NULL; 725 } else { 726 ps->pgs = pgs; 727 ps->lastpg = pgs + npages - 1; 728 } 729 ps->free_list = free_list; 730 vm_nphysseg++; 731 732 /* 733 * done! 734 */ 735 736 if (!preload) 737 uvm_page_rehash(); 738 739 return; 740 } 741 742 /* 743 * uvm_page_rehash: reallocate hash table based on number of free pages. 744 */ 745 746 void 747 uvm_page_rehash() 748 { 749 int freepages, lcv, bucketcount, s, oldcount; 750 struct pglist *newbuckets, *oldbuckets; 751 struct vm_page *pg; 752 size_t newsize, oldsize; 753 754 /* 755 * compute number of pages that can go in the free pool 756 */ 757 758 freepages = 0; 759 for (lcv = 0 ; lcv < vm_nphysseg ; lcv++) 760 freepages += 761 (vm_physmem[lcv].avail_end - vm_physmem[lcv].avail_start); 762 763 /* 764 * compute number of buckets needed for this number of pages 765 */ 766 767 bucketcount = 1; 768 while (bucketcount < freepages) 769 bucketcount = bucketcount * 2; 770 771 /* 772 * compute the size of the current table and new table. 773 */ 774 775 oldbuckets = uvm.page_hash; 776 oldcount = uvm.page_nhash; 777 oldsize = round_page(sizeof(struct pglist) * oldcount); 778 newsize = round_page(sizeof(struct pglist) * bucketcount); 779 780 /* 781 * allocate the new buckets 782 */ 783 784 newbuckets = (struct pglist *) uvm_km_alloc(kernel_map, newsize); 785 if (newbuckets == NULL) { 786 printf("uvm_page_physrehash: WARNING: could not grow page " 787 "hash table\n"); 788 return; 789 } 790 for (lcv = 0 ; lcv < bucketcount ; lcv++) 791 TAILQ_INIT(&newbuckets[lcv]); 792 793 /* 794 * now replace the old buckets with the new ones and rehash everything 795 */ 796 797 s = splimp(); 798 simple_lock(&uvm.hashlock); 799 uvm.page_hash = newbuckets; 800 uvm.page_nhash = bucketcount; 801 uvm.page_hashmask = bucketcount - 1; /* power of 2 */ 802 803 /* ... and rehash */ 804 for (lcv = 0 ; lcv < oldcount ; lcv++) { 805 while ((pg = oldbuckets[lcv].tqh_first) != NULL) { 806 TAILQ_REMOVE(&oldbuckets[lcv], pg, hashq); 807 TAILQ_INSERT_TAIL( 808 &uvm.page_hash[uvm_pagehash(pg->uobject, pg->offset)], 809 pg, hashq); 810 } 811 } 812 simple_unlock(&uvm.hashlock); 813 splx(s); 814 815 /* 816 * free old bucket array if is not the boot-time table 817 */ 818 819 if (oldbuckets != &uvm_bootbucket) 820 uvm_km_free(kernel_map, (vaddr_t) oldbuckets, oldsize); 821 822 /* 823 * done 824 */ 825 return; 826 } 827 828 829 #if 1 /* XXXCDC: TMP TMP TMP DEBUG DEBUG DEBUG */ 830 831 void uvm_page_physdump __P((void)); /* SHUT UP GCC */ 832 833 /* call from DDB */ 834 void 835 uvm_page_physdump() 836 { 837 int lcv; 838 839 printf("rehash: physical memory config [segs=%d of %d]:\n", 840 vm_nphysseg, VM_PHYSSEG_MAX); 841 for (lcv = 0 ; lcv < vm_nphysseg ; lcv++) 842 printf("0x%llx->0x%llx [0x%llx->0x%llx]\n", 843 (long long)vm_physmem[lcv].start, 844 (long long)vm_physmem[lcv].end, 845 (long long)vm_physmem[lcv].avail_start, 846 (long long)vm_physmem[lcv].avail_end); 847 printf("STRATEGY = "); 848 switch (VM_PHYSSEG_STRAT) { 849 case VM_PSTRAT_RANDOM: printf("RANDOM\n"); break; 850 case VM_PSTRAT_BSEARCH: printf("BSEARCH\n"); break; 851 case VM_PSTRAT_BIGFIRST: printf("BIGFIRST\n"); break; 852 default: printf("<<UNKNOWN>>!!!!\n"); 853 } 854 printf("number of buckets = %d\n", uvm.page_nhash); 855 } 856 #endif 857 858 /* 859 * uvm_pagealloc_strat: allocate vm_page from a particular free list. 860 * 861 * => return null if no pages free 862 * => wake up pagedaemon if number of free pages drops below low water mark 863 * => if obj != NULL, obj must be locked (to put in hash) 864 * => if anon != NULL, anon must be locked (to put in anon) 865 * => only one of obj or anon can be non-null 866 * => caller must activate/deactivate page if it is not wired. 867 * => free_list is ignored if strat == UVM_PGA_STRAT_NORMAL. 868 * => policy decision: it is more important to pull a page off of the 869 * appropriate priority free list than it is to get a zero'd or 870 * unknown contents page. This is because we live with the 871 * consequences of a bad free list decision for the entire 872 * lifetime of the page, e.g. if the page comes from memory that 873 * is slower to access. 874 */ 875 876 struct vm_page * 877 uvm_pagealloc_strat(obj, off, anon, flags, strat, free_list) 878 struct uvm_object *obj; 879 voff_t off; 880 int flags; 881 struct vm_anon *anon; 882 int strat, free_list; 883 { 884 int lcv, try1, try2, s, zeroit = 0; 885 struct vm_page *pg; 886 struct pglist *freeq; 887 struct pgfreelist *pgfl; 888 boolean_t use_reserve; 889 890 KASSERT(obj == NULL || anon == NULL); 891 KASSERT(off == trunc_page(off)); 892 s = uvm_lock_fpageq(); 893 894 /* 895 * check to see if we need to generate some free pages waking 896 * the pagedaemon. 897 */ 898 899 if (uvmexp.free + uvmexp.paging < uvmexp.freemin || 900 (uvmexp.free + uvmexp.paging < uvmexp.freetarg && 901 uvmexp.inactive < uvmexp.inactarg)) { 902 wakeup(&uvm.pagedaemon); 903 } 904 905 /* 906 * fail if any of these conditions is true: 907 * [1] there really are no free pages, or 908 * [2] only kernel "reserved" pages remain and 909 * the page isn't being allocated to a kernel object. 910 * [3] only pagedaemon "reserved" pages remain and 911 * the requestor isn't the pagedaemon. 912 */ 913 914 use_reserve = (flags & UVM_PGA_USERESERVE) || 915 (obj && UVM_OBJ_IS_KERN_OBJECT(obj)); 916 if ((uvmexp.free <= uvmexp.reserve_kernel && !use_reserve) || 917 (uvmexp.free <= uvmexp.reserve_pagedaemon && 918 !(use_reserve && curproc == uvm.pagedaemon_proc))) 919 goto fail; 920 921 #if PGFL_NQUEUES != 2 922 #error uvm_pagealloc_strat needs to be updated 923 #endif 924 925 /* 926 * If we want a zero'd page, try the ZEROS queue first, otherwise 927 * we try the UNKNOWN queue first. 928 */ 929 if (flags & UVM_PGA_ZERO) { 930 try1 = PGFL_ZEROS; 931 try2 = PGFL_UNKNOWN; 932 } else { 933 try1 = PGFL_UNKNOWN; 934 try2 = PGFL_ZEROS; 935 } 936 937 again: 938 switch (strat) { 939 case UVM_PGA_STRAT_NORMAL: 940 /* Check all freelists in descending priority order. */ 941 for (lcv = 0; lcv < VM_NFREELIST; lcv++) { 942 pgfl = &uvm.page_free[lcv]; 943 if ((pg = TAILQ_FIRST((freeq = 944 &pgfl->pgfl_queues[try1]))) != NULL || 945 (pg = TAILQ_FIRST((freeq = 946 &pgfl->pgfl_queues[try2]))) != NULL) 947 goto gotit; 948 } 949 950 /* No pages free! */ 951 goto fail; 952 953 case UVM_PGA_STRAT_ONLY: 954 case UVM_PGA_STRAT_FALLBACK: 955 /* Attempt to allocate from the specified free list. */ 956 KASSERT(free_list >= 0 && free_list < VM_NFREELIST); 957 pgfl = &uvm.page_free[free_list]; 958 if ((pg = TAILQ_FIRST((freeq = 959 &pgfl->pgfl_queues[try1]))) != NULL || 960 (pg = TAILQ_FIRST((freeq = 961 &pgfl->pgfl_queues[try2]))) != NULL) 962 goto gotit; 963 964 /* Fall back, if possible. */ 965 if (strat == UVM_PGA_STRAT_FALLBACK) { 966 strat = UVM_PGA_STRAT_NORMAL; 967 goto again; 968 } 969 970 /* No pages free! */ 971 goto fail; 972 973 default: 974 panic("uvm_pagealloc_strat: bad strat %d", strat); 975 /* NOTREACHED */ 976 } 977 978 gotit: 979 TAILQ_REMOVE(freeq, pg, pageq); 980 uvmexp.free--; 981 982 /* update zero'd page count */ 983 if (pg->flags & PG_ZERO) 984 uvmexp.zeropages--; 985 986 /* 987 * update allocation statistics and remember if we have to 988 * zero the page 989 */ 990 if (flags & UVM_PGA_ZERO) { 991 if (pg->flags & PG_ZERO) { 992 uvmexp.pga_zerohit++; 993 zeroit = 0; 994 } else { 995 uvmexp.pga_zeromiss++; 996 zeroit = 1; 997 } 998 } 999 1000 uvm_unlock_fpageq(s); /* unlock free page queue */ 1001 1002 pg->offset = off; 1003 pg->uobject = obj; 1004 pg->uanon = anon; 1005 pg->flags = PG_BUSY|PG_CLEAN|PG_FAKE; 1006 pg->version++; 1007 if (anon) { 1008 anon->u.an_page = pg; 1009 pg->pqflags = PQ_ANON; 1010 uvmexp.anonpages++; 1011 } else { 1012 if (obj) 1013 uvm_pageinsert(pg); 1014 pg->pqflags = 0; 1015 } 1016 #if defined(UVM_PAGE_TRKOWN) 1017 pg->owner_tag = NULL; 1018 #endif 1019 UVM_PAGE_OWN(pg, "new alloc"); 1020 1021 if (flags & UVM_PGA_ZERO) { 1022 /* 1023 * A zero'd page is not clean. If we got a page not already 1024 * zero'd, then we have to zero it ourselves. 1025 */ 1026 pg->flags &= ~PG_CLEAN; 1027 if (zeroit) 1028 pmap_zero_page(VM_PAGE_TO_PHYS(pg)); 1029 } 1030 1031 return(pg); 1032 1033 fail: 1034 uvm_unlock_fpageq(s); 1035 return (NULL); 1036 } 1037 1038 /* 1039 * uvm_pagerealloc: reallocate a page from one object to another 1040 * 1041 * => both objects must be locked 1042 */ 1043 1044 void 1045 uvm_pagerealloc(pg, newobj, newoff) 1046 struct vm_page *pg; 1047 struct uvm_object *newobj; 1048 voff_t newoff; 1049 { 1050 /* 1051 * remove it from the old object 1052 */ 1053 1054 if (pg->uobject) { 1055 uvm_pageremove(pg); 1056 } 1057 1058 /* 1059 * put it in the new object 1060 */ 1061 1062 if (newobj) { 1063 pg->uobject = newobj; 1064 pg->offset = newoff; 1065 pg->version++; 1066 uvm_pageinsert(pg); 1067 } 1068 } 1069 1070 1071 /* 1072 * uvm_pagefree: free page 1073 * 1074 * => erase page's identity (i.e. remove from hash/object) 1075 * => put page on free list 1076 * => caller must lock owning object (either anon or uvm_object) 1077 * => caller must lock page queues 1078 * => assumes all valid mappings of pg are gone 1079 */ 1080 1081 void 1082 uvm_pagefree(pg) 1083 struct vm_page *pg; 1084 { 1085 int s; 1086 int saved_loan_count = pg->loan_count; 1087 1088 #ifdef DEBUG 1089 if (pg->uobject == (void *)0xdeadbeef && 1090 pg->uanon == (void *)0xdeadbeef) { 1091 panic("uvm_pagefree: freeing free page %p\n", pg); 1092 } 1093 #endif 1094 1095 /* 1096 * if the page was an object page (and thus "TABLED"), remove it 1097 * from the object. 1098 */ 1099 1100 if (pg->flags & PG_TABLED) { 1101 1102 /* 1103 * if the object page is on loan we are going to drop ownership. 1104 * it is possible that an anon will take over as owner for this 1105 * page later on. the anon will want a !PG_CLEAN page so that 1106 * it knows it needs to allocate swap if it wants to page the 1107 * page out. 1108 */ 1109 1110 if (saved_loan_count) 1111 pg->flags &= ~PG_CLEAN; /* in case an anon takes over */ 1112 uvm_pageremove(pg); 1113 1114 /* 1115 * if our page was on loan, then we just lost control over it 1116 * (in fact, if it was loaned to an anon, the anon may have 1117 * already taken over ownership of the page by now and thus 1118 * changed the loan_count [e.g. in uvmfault_anonget()]) we just 1119 * return (when the last loan is dropped, then the page can be 1120 * freed by whatever was holding the last loan). 1121 */ 1122 1123 if (saved_loan_count) 1124 return; 1125 } else if (saved_loan_count && (pg->pqflags & PQ_ANON)) { 1126 1127 /* 1128 * if our page is owned by an anon and is loaned out to the 1129 * kernel then we just want to drop ownership and return. 1130 * the kernel must free the page when all its loans clear ... 1131 * note that the kernel can't change the loan status of our 1132 * page as long as we are holding PQ lock. 1133 */ 1134 1135 pg->pqflags &= ~PQ_ANON; 1136 pg->uanon = NULL; 1137 return; 1138 } 1139 KASSERT(saved_loan_count == 0); 1140 1141 /* 1142 * now remove the page from the queues 1143 */ 1144 1145 if (pg->pqflags & PQ_ACTIVE) { 1146 TAILQ_REMOVE(&uvm.page_active, pg, pageq); 1147 pg->pqflags &= ~PQ_ACTIVE; 1148 uvmexp.active--; 1149 } 1150 if (pg->pqflags & PQ_INACTIVE) { 1151 if (pg->pqflags & PQ_SWAPBACKED) 1152 TAILQ_REMOVE(&uvm.page_inactive_swp, pg, pageq); 1153 else 1154 TAILQ_REMOVE(&uvm.page_inactive_obj, pg, pageq); 1155 pg->pqflags &= ~PQ_INACTIVE; 1156 uvmexp.inactive--; 1157 } 1158 1159 /* 1160 * if the page was wired, unwire it now. 1161 */ 1162 1163 if (pg->wire_count) { 1164 pg->wire_count = 0; 1165 uvmexp.wired--; 1166 } 1167 if (pg->uanon) { 1168 uvmexp.anonpages--; 1169 } 1170 1171 /* 1172 * and put on free queue 1173 */ 1174 1175 pg->flags &= ~PG_ZERO; 1176 1177 s = uvm_lock_fpageq(); 1178 TAILQ_INSERT_TAIL(&uvm.page_free[ 1179 uvm_page_lookup_freelist(pg)].pgfl_queues[PGFL_UNKNOWN], pg, pageq); 1180 pg->pqflags = PQ_FREE; 1181 #ifdef DEBUG 1182 pg->uobject = (void *)0xdeadbeef; 1183 pg->offset = 0xdeadbeef; 1184 pg->uanon = (void *)0xdeadbeef; 1185 #endif 1186 uvmexp.free++; 1187 1188 if (uvmexp.zeropages < UVM_PAGEZERO_TARGET) 1189 uvm.page_idle_zero = vm_page_zero_enable; 1190 1191 uvm_unlock_fpageq(s); 1192 } 1193 1194 /* 1195 * uvm_page_unbusy: unbusy an array of pages. 1196 * 1197 * => pages must either all belong to the same object, or all belong to anons. 1198 * => if pages are object-owned, object must be locked. 1199 * => if pages are anon-owned, anons must be unlockd and have 0 refcount. 1200 */ 1201 1202 void 1203 uvm_page_unbusy(pgs, npgs) 1204 struct vm_page **pgs; 1205 int npgs; 1206 { 1207 struct vm_page *pg; 1208 struct uvm_object *uobj; 1209 int i; 1210 UVMHIST_FUNC("uvm_page_unbusy"); UVMHIST_CALLED(ubchist); 1211 1212 for (i = 0; i < npgs; i++) { 1213 pg = pgs[i]; 1214 1215 if (pg == NULL) { 1216 continue; 1217 } 1218 if (pg->flags & PG_WANTED) { 1219 wakeup(pg); 1220 } 1221 if (pg->flags & PG_RELEASED) { 1222 UVMHIST_LOG(ubchist, "releasing pg %p", pg,0,0,0); 1223 uobj = pg->uobject; 1224 if (uobj != NULL) { 1225 uobj->pgops->pgo_releasepg(pg, NULL); 1226 } else { 1227 pg->flags &= ~(PG_BUSY); 1228 UVM_PAGE_OWN(pg, NULL); 1229 uvm_anfree(pg->uanon); 1230 } 1231 } else { 1232 UVMHIST_LOG(ubchist, "unbusying pg %p", pg,0,0,0); 1233 KASSERT(pg->wire_count || 1234 (pg->pqflags & (PQ_ACTIVE|PQ_INACTIVE))); 1235 pg->flags &= ~(PG_WANTED|PG_BUSY); 1236 UVM_PAGE_OWN(pg, NULL); 1237 } 1238 } 1239 } 1240 1241 #if defined(UVM_PAGE_TRKOWN) 1242 /* 1243 * uvm_page_own: set or release page ownership 1244 * 1245 * => this is a debugging function that keeps track of who sets PG_BUSY 1246 * and where they do it. it can be used to track down problems 1247 * such a process setting "PG_BUSY" and never releasing it. 1248 * => page's object [if any] must be locked 1249 * => if "tag" is NULL then we are releasing page ownership 1250 */ 1251 void 1252 uvm_page_own(pg, tag) 1253 struct vm_page *pg; 1254 char *tag; 1255 { 1256 /* gain ownership? */ 1257 if (tag) { 1258 if (pg->owner_tag) { 1259 printf("uvm_page_own: page %p already owned " 1260 "by proc %d [%s]\n", pg, 1261 pg->owner, pg->owner_tag); 1262 panic("uvm_page_own"); 1263 } 1264 pg->owner = (curproc) ? curproc->p_pid : (pid_t) -1; 1265 pg->owner_tag = tag; 1266 return; 1267 } 1268 1269 /* drop ownership */ 1270 if (pg->owner_tag == NULL) { 1271 printf("uvm_page_own: dropping ownership of an non-owned " 1272 "page (%p)\n", pg); 1273 panic("uvm_page_own"); 1274 } 1275 pg->owner_tag = NULL; 1276 return; 1277 } 1278 #endif 1279 1280 /* 1281 * uvm_pageidlezero: zero free pages while the system is idle. 1282 * 1283 * => we do at least one iteration per call, if we are below the target. 1284 * => we loop until we either reach the target or whichqs indicates that 1285 * there is a process ready to run. 1286 */ 1287 void 1288 uvm_pageidlezero() 1289 { 1290 struct vm_page *pg; 1291 struct pgfreelist *pgfl; 1292 int free_list, s; 1293 1294 do { 1295 s = uvm_lock_fpageq(); 1296 1297 if (uvmexp.zeropages >= UVM_PAGEZERO_TARGET) { 1298 uvm.page_idle_zero = FALSE; 1299 uvm_unlock_fpageq(s); 1300 return; 1301 } 1302 1303 for (free_list = 0; free_list < VM_NFREELIST; free_list++) { 1304 pgfl = &uvm.page_free[free_list]; 1305 if ((pg = TAILQ_FIRST(&pgfl->pgfl_queues[ 1306 PGFL_UNKNOWN])) != NULL) 1307 break; 1308 } 1309 1310 if (pg == NULL) { 1311 /* 1312 * No non-zero'd pages; don't bother trying again 1313 * until we know we have non-zero'd pages free. 1314 */ 1315 uvm.page_idle_zero = FALSE; 1316 uvm_unlock_fpageq(s); 1317 return; 1318 } 1319 1320 TAILQ_REMOVE(&pgfl->pgfl_queues[PGFL_UNKNOWN], pg, pageq); 1321 uvmexp.free--; 1322 uvm_unlock_fpageq(s); 1323 1324 #ifdef PMAP_PAGEIDLEZERO 1325 if (PMAP_PAGEIDLEZERO(VM_PAGE_TO_PHYS(pg)) == FALSE) { 1326 /* 1327 * The machine-dependent code detected some 1328 * reason for us to abort zeroing pages, 1329 * probably because there is a process now 1330 * ready to run. 1331 */ 1332 s = uvm_lock_fpageq(); 1333 TAILQ_INSERT_HEAD(&pgfl->pgfl_queues[PGFL_UNKNOWN], 1334 pg, pageq); 1335 uvmexp.free++; 1336 uvmexp.zeroaborts++; 1337 uvm_unlock_fpageq(s); 1338 return; 1339 } 1340 #else 1341 /* 1342 * XXX This will toast the cache unless the pmap_zero_page() 1343 * XXX implementation does uncached access. 1344 */ 1345 pmap_zero_page(VM_PAGE_TO_PHYS(pg)); 1346 #endif 1347 pg->flags |= PG_ZERO; 1348 1349 s = uvm_lock_fpageq(); 1350 TAILQ_INSERT_HEAD(&pgfl->pgfl_queues[PGFL_ZEROS], pg, pageq); 1351 uvmexp.free++; 1352 uvmexp.zeropages++; 1353 uvm_unlock_fpageq(s); 1354 } while (sched_whichqs == 0); 1355 } 1356