111752d88SAlan Cox /*- 24d846d26SWarner Losh * SPDX-License-Identifier: BSD-2-Clause 3fe267a55SPedro F. Giffuni * 411752d88SAlan Cox * Copyright (c) 2002-2006 Rice University 511752d88SAlan Cox * Copyright (c) 2007 Alan L. Cox <alc@cs.rice.edu> 611752d88SAlan Cox * All rights reserved. 711752d88SAlan Cox * 811752d88SAlan Cox * This software was developed for the FreeBSD Project by Alan L. Cox, 911752d88SAlan Cox * Olivier Crameri, Peter Druschel, Sitaram Iyer, and Juan Navarro. 1011752d88SAlan Cox * 1111752d88SAlan Cox * Redistribution and use in source and binary forms, with or without 1211752d88SAlan Cox * modification, are permitted provided that the following conditions 1311752d88SAlan Cox * are met: 1411752d88SAlan Cox * 1. Redistributions of source code must retain the above copyright 1511752d88SAlan Cox * notice, this list of conditions and the following disclaimer. 1611752d88SAlan Cox * 2. Redistributions in binary form must reproduce the above copyright 1711752d88SAlan Cox * notice, this list of conditions and the following disclaimer in the 1811752d88SAlan Cox * documentation and/or other materials provided with the distribution. 1911752d88SAlan Cox * 2011752d88SAlan Cox * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 2111752d88SAlan Cox * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 2211752d88SAlan Cox * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 2311752d88SAlan Cox * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 2411752d88SAlan Cox * HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 2511752d88SAlan Cox * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 2611752d88SAlan Cox * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS 2711752d88SAlan Cox * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 2811752d88SAlan Cox * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 2911752d88SAlan Cox * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY 3011752d88SAlan Cox * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 3111752d88SAlan Cox * POSSIBILITY OF SUCH DAMAGE. 3211752d88SAlan Cox */ 3311752d88SAlan Cox 34fbd80bd0SAlan Cox /* 35fbd80bd0SAlan Cox * Physical memory system implementation 36fbd80bd0SAlan Cox * 37fbd80bd0SAlan Cox * Any external functions defined by this module are only to be used by the 38fbd80bd0SAlan Cox * virtual memory system. 39fbd80bd0SAlan Cox */ 40fbd80bd0SAlan Cox 4111752d88SAlan Cox #include <sys/cdefs.h> 4211752d88SAlan Cox #include "opt_ddb.h" 43174b5f38SJohn Baldwin #include "opt_vm.h" 4411752d88SAlan Cox 4511752d88SAlan Cox #include <sys/param.h> 4611752d88SAlan Cox #include <sys/systm.h> 47662e7fa8SMark Johnston #include <sys/domainset.h> 4811752d88SAlan Cox #include <sys/lock.h> 4911752d88SAlan Cox #include <sys/kernel.h> 50b16b4c22SMark Johnston #include <sys/kthread.h> 5111752d88SAlan Cox #include <sys/malloc.h> 5211752d88SAlan Cox #include <sys/mutex.h> 537e226537SAttilio Rao #include <sys/proc.h> 5411752d88SAlan Cox #include <sys/queue.h> 5538d6b2dcSRoger Pau Monné #include <sys/rwlock.h> 5611752d88SAlan Cox #include <sys/sbuf.h> 57b16b4c22SMark Johnston #include <sys/sched.h> 5811752d88SAlan Cox #include <sys/sysctl.h> 5938d6b2dcSRoger Pau Monné #include <sys/tree.h> 60b16b4c22SMark Johnston #include <sys/tslog.h> 61b16b4c22SMark Johnston #include <sys/unistd.h> 6211752d88SAlan Cox #include <sys/vmmeter.h> 6311752d88SAlan Cox 6411752d88SAlan Cox #include <ddb/ddb.h> 6511752d88SAlan Cox 6611752d88SAlan Cox #include <vm/vm.h> 6701e115abSDoug Moore #include <vm/vm_extern.h> 6811752d88SAlan Cox #include <vm/vm_param.h> 6911752d88SAlan Cox #include <vm/vm_kern.h> 7011752d88SAlan Cox #include <vm/vm_object.h> 7111752d88SAlan Cox #include <vm/vm_page.h> 7211752d88SAlan Cox #include <vm/vm_phys.h> 73e2068d0bSJeff Roberson #include <vm/vm_pagequeue.h> 7411752d88SAlan Cox 75449c2e92SKonstantin Belousov _Static_assert(sizeof(long) * NBBY >= VM_PHYSSEG_MAX, 76449c2e92SKonstantin Belousov "Too many physsegs."); 77c9b06fa5SDoug Moore _Static_assert(sizeof(long long) >= sizeof(vm_paddr_t), 78c9b06fa5SDoug Moore "vm_paddr_t too big for ffsll, flsll."); 7911752d88SAlan Cox 80b6715dabSJeff Roberson #ifdef NUMA 81cdfeced8SJeff Roberson struct mem_affinity __read_mostly *mem_affinity; 82cdfeced8SJeff Roberson int __read_mostly *mem_locality; 83c415cfc8SZhenlei Huang 84c415cfc8SZhenlei Huang static int numa_disabled; 85c415cfc8SZhenlei Huang static SYSCTL_NODE(_vm, OID_AUTO, numa, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 86c415cfc8SZhenlei Huang "NUMA options"); 87c415cfc8SZhenlei Huang SYSCTL_INT(_vm_numa, OID_AUTO, disabled, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, 88c415cfc8SZhenlei Huang &numa_disabled, 0, "NUMA-awareness in the allocators is disabled"); 8962d70a81SJohn Baldwin #endif 90a3870a18SJohn Baldwin 91cdfeced8SJeff Roberson int __read_mostly vm_ndomains = 1; 92463406acSMark Johnston domainset_t __read_mostly all_domains = DOMAINSET_T_INITIALIZER(0x1); 937e226537SAttilio Rao 94cdfeced8SJeff Roberson struct vm_phys_seg __read_mostly vm_phys_segs[VM_PHYSSEG_MAX]; 95cdfeced8SJeff Roberson int __read_mostly vm_phys_nsegs; 9681302f1dSMark Johnston static struct vm_phys_seg vm_phys_early_segs[8]; 9781302f1dSMark Johnston static int vm_phys_early_nsegs; 9811752d88SAlan Cox 9938d6b2dcSRoger Pau Monné struct vm_phys_fictitious_seg; 10038d6b2dcSRoger Pau Monné static int vm_phys_fictitious_cmp(struct vm_phys_fictitious_seg *, 10138d6b2dcSRoger Pau Monné struct vm_phys_fictitious_seg *); 10238d6b2dcSRoger Pau Monné 10338d6b2dcSRoger Pau Monné RB_HEAD(fict_tree, vm_phys_fictitious_seg) vm_phys_fictitious_tree = 104b649c2acSDoug Moore RB_INITIALIZER(&vm_phys_fictitious_tree); 10538d6b2dcSRoger Pau Monné 10638d6b2dcSRoger Pau Monné struct vm_phys_fictitious_seg { 10738d6b2dcSRoger Pau Monné RB_ENTRY(vm_phys_fictitious_seg) node; 10838d6b2dcSRoger Pau Monné /* Memory region data */ 109b6de32bdSKonstantin Belousov vm_paddr_t start; 110b6de32bdSKonstantin Belousov vm_paddr_t end; 111b6de32bdSKonstantin Belousov vm_page_t first_page; 11238d6b2dcSRoger Pau Monné }; 11338d6b2dcSRoger Pau Monné 11438d6b2dcSRoger Pau Monné RB_GENERATE_STATIC(fict_tree, vm_phys_fictitious_seg, node, 11538d6b2dcSRoger Pau Monné vm_phys_fictitious_cmp); 11638d6b2dcSRoger Pau Monné 117cdfeced8SJeff Roberson static struct rwlock_padalign vm_phys_fictitious_reg_lock; 118c0432fc3SMark Johnston MALLOC_DEFINE(M_FICT_PAGES, "vm_fictitious", "Fictitious VM pages"); 119b6de32bdSKonstantin Belousov 120cdfeced8SJeff Roberson static struct vm_freelist __aligned(CACHE_LINE_SIZE) 121f2a496d6SKonstantin Belousov vm_phys_free_queues[MAXMEMDOM][VM_NFREELIST][VM_NFREEPOOL] 122f2a496d6SKonstantin Belousov [VM_NFREEORDER_MAX]; 12311752d88SAlan Cox 124cdfeced8SJeff Roberson static int __read_mostly vm_nfreelists; 125d866a563SAlan Cox 126d866a563SAlan Cox /* 12721943937SJeff Roberson * These "avail lists" are globals used to communicate boot-time physical 12821943937SJeff Roberson * memory layout to other parts of the kernel. Each physically contiguous 12921943937SJeff Roberson * region of memory is defined by a start address at an even index and an 13021943937SJeff Roberson * end address at the following odd index. Each list is terminated by a 13121943937SJeff Roberson * pair of zero entries. 13221943937SJeff Roberson * 13321943937SJeff Roberson * dump_avail tells the dump code what regions to include in a crash dump, and 13421943937SJeff Roberson * phys_avail is all of the remaining physical memory that is available for 13521943937SJeff Roberson * the vm system. 13621943937SJeff Roberson * 13721943937SJeff Roberson * Initially dump_avail and phys_avail are identical. Boot time memory 13821943937SJeff Roberson * allocations remove extents from phys_avail that may still be included 13921943937SJeff Roberson * in dumps. 14021943937SJeff Roberson */ 14121943937SJeff Roberson vm_paddr_t phys_avail[PHYS_AVAIL_COUNT]; 14221943937SJeff Roberson vm_paddr_t dump_avail[PHYS_AVAIL_COUNT]; 14321943937SJeff Roberson 14421943937SJeff Roberson /* 145d866a563SAlan Cox * Provides the mapping from VM_FREELIST_* to free list indices (flind). 146d866a563SAlan Cox */ 147cdfeced8SJeff Roberson static int __read_mostly vm_freelist_to_flind[VM_NFREELIST]; 148b16b4c22SMark Johnston static int __read_mostly vm_default_freepool; 149d866a563SAlan Cox 150d866a563SAlan Cox CTASSERT(VM_FREELIST_DEFAULT == 0); 151d866a563SAlan Cox 152d866a563SAlan Cox #ifdef VM_FREELIST_DMA32 153d866a563SAlan Cox #define VM_DMA32_BOUNDARY ((vm_paddr_t)1 << 32) 154d866a563SAlan Cox #endif 155d866a563SAlan Cox 156d866a563SAlan Cox /* 157d866a563SAlan Cox * Enforce the assumptions made by vm_phys_add_seg() and vm_phys_init() about 158d866a563SAlan Cox * the ordering of the free list boundaries. 159d866a563SAlan Cox */ 160d866a563SAlan Cox #if defined(VM_LOWMEM_BOUNDARY) && defined(VM_DMA32_BOUNDARY) 161d866a563SAlan Cox CTASSERT(VM_LOWMEM_BOUNDARY < VM_DMA32_BOUNDARY); 162d866a563SAlan Cox #endif 16311752d88SAlan Cox 16411752d88SAlan Cox static int sysctl_vm_phys_free(SYSCTL_HANDLER_ARGS); 1657029da5cSPawel Biernacki SYSCTL_OID(_vm, OID_AUTO, phys_free, 166114484b7SMark Johnston CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0, 1677029da5cSPawel Biernacki sysctl_vm_phys_free, "A", 1687029da5cSPawel Biernacki "Phys Free Info"); 16911752d88SAlan Cox 17011752d88SAlan Cox static int sysctl_vm_phys_segs(SYSCTL_HANDLER_ARGS); 1717029da5cSPawel Biernacki SYSCTL_OID(_vm, OID_AUTO, phys_segs, 172114484b7SMark Johnston CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0, 1737029da5cSPawel Biernacki sysctl_vm_phys_segs, "A", 1747029da5cSPawel Biernacki "Phys Seg Info"); 17511752d88SAlan Cox 176b6715dabSJeff Roberson #ifdef NUMA 177415d7ccaSAdrian Chadd static int sysctl_vm_phys_locality(SYSCTL_HANDLER_ARGS); 1787029da5cSPawel Biernacki SYSCTL_OID(_vm, OID_AUTO, phys_locality, 179114484b7SMark Johnston CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0, 1807029da5cSPawel Biernacki sysctl_vm_phys_locality, "A", 1817029da5cSPawel Biernacki "Phys Locality Info"); 1826520495aSAdrian Chadd #endif 183415d7ccaSAdrian Chadd 1847e226537SAttilio Rao SYSCTL_INT(_vm, OID_AUTO, ndomains, CTLFLAG_RD, 1857e226537SAttilio Rao &vm_ndomains, 0, "Number of physical memory domains available."); 186a3870a18SJohn Baldwin 187d866a563SAlan Cox static void _vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int domain); 188d866a563SAlan Cox static void vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end); 18911752d88SAlan Cox static void vm_phys_split_pages(vm_page_t m, int oind, struct vm_freelist *fl, 190*0078df5fSDoug Moore int order, int pool, int tail); 191c606ab59SDoug Moore 192b16b4c22SMark Johnston static bool __diagused 193b16b4c22SMark Johnston vm_phys_pool_valid(int pool) 194b16b4c22SMark Johnston { 195b16b4c22SMark Johnston #ifdef VM_FREEPOOL_LAZYINIT 196b16b4c22SMark Johnston if (pool == VM_FREEPOOL_LAZYINIT) 197b16b4c22SMark Johnston return (false); 198b16b4c22SMark Johnston #endif 199b16b4c22SMark Johnston return (pool >= 0 && pool < VM_NFREEPOOL); 200b16b4c22SMark Johnston } 201b16b4c22SMark Johnston 20238d6b2dcSRoger Pau Monné /* 20338d6b2dcSRoger Pau Monné * Red-black tree helpers for vm fictitious range management. 20438d6b2dcSRoger Pau Monné */ 20538d6b2dcSRoger Pau Monné static inline int 20638d6b2dcSRoger Pau Monné vm_phys_fictitious_in_range(struct vm_phys_fictitious_seg *p, 20738d6b2dcSRoger Pau Monné struct vm_phys_fictitious_seg *range) 20838d6b2dcSRoger Pau Monné { 20938d6b2dcSRoger Pau Monné 21038d6b2dcSRoger Pau Monné KASSERT(range->start != 0 && range->end != 0, 21138d6b2dcSRoger Pau Monné ("Invalid range passed on search for vm_fictitious page")); 21238d6b2dcSRoger Pau Monné if (p->start >= range->end) 21338d6b2dcSRoger Pau Monné return (1); 21438d6b2dcSRoger Pau Monné if (p->start < range->start) 21538d6b2dcSRoger Pau Monné return (-1); 21638d6b2dcSRoger Pau Monné 21738d6b2dcSRoger Pau Monné return (0); 21838d6b2dcSRoger Pau Monné } 21938d6b2dcSRoger Pau Monné 22038d6b2dcSRoger Pau Monné static int 22138d6b2dcSRoger Pau Monné vm_phys_fictitious_cmp(struct vm_phys_fictitious_seg *p1, 22238d6b2dcSRoger Pau Monné struct vm_phys_fictitious_seg *p2) 22338d6b2dcSRoger Pau Monné { 22438d6b2dcSRoger Pau Monné 22538d6b2dcSRoger Pau Monné /* Check if this is a search for a page */ 22638d6b2dcSRoger Pau Monné if (p1->end == 0) 22738d6b2dcSRoger Pau Monné return (vm_phys_fictitious_in_range(p1, p2)); 22838d6b2dcSRoger Pau Monné 22938d6b2dcSRoger Pau Monné KASSERT(p2->end != 0, 23038d6b2dcSRoger Pau Monné ("Invalid range passed as second parameter to vm fictitious comparison")); 23138d6b2dcSRoger Pau Monné 23238d6b2dcSRoger Pau Monné /* Searching to add a new range */ 23338d6b2dcSRoger Pau Monné if (p1->end <= p2->start) 23438d6b2dcSRoger Pau Monné return (-1); 23538d6b2dcSRoger Pau Monné if (p1->start >= p2->end) 23638d6b2dcSRoger Pau Monné return (1); 23738d6b2dcSRoger Pau Monné 23838d6b2dcSRoger Pau Monné panic("Trying to add overlapping vm fictitious ranges:\n" 23938d6b2dcSRoger Pau Monné "[%#jx:%#jx] and [%#jx:%#jx]", (uintmax_t)p1->start, 24038d6b2dcSRoger Pau Monné (uintmax_t)p1->end, (uintmax_t)p2->start, (uintmax_t)p2->end); 24138d6b2dcSRoger Pau Monné } 24238d6b2dcSRoger Pau Monné 2436f4acaf4SJeff Roberson int 244cb20a74cSStephen J. Kiernan vm_phys_domain_match(int prefer __numa_used, vm_paddr_t low __numa_used, 245cb20a74cSStephen J. Kiernan vm_paddr_t high __numa_used) 246449c2e92SKonstantin Belousov { 247b6715dabSJeff Roberson #ifdef NUMA 2486f4acaf4SJeff Roberson domainset_t mask; 2496f4acaf4SJeff Roberson int i; 250449c2e92SKonstantin Belousov 2516f4acaf4SJeff Roberson if (vm_ndomains == 1 || mem_affinity == NULL) 2526f4acaf4SJeff Roberson return (0); 2536f4acaf4SJeff Roberson 2546f4acaf4SJeff Roberson DOMAINSET_ZERO(&mask); 2556f4acaf4SJeff Roberson /* 2566f4acaf4SJeff Roberson * Check for any memory that overlaps low, high. 2576f4acaf4SJeff Roberson */ 2586f4acaf4SJeff Roberson for (i = 0; mem_affinity[i].end != 0; i++) 2596f4acaf4SJeff Roberson if (mem_affinity[i].start <= high && 2606f4acaf4SJeff Roberson mem_affinity[i].end >= low) 2616f4acaf4SJeff Roberson DOMAINSET_SET(mem_affinity[i].domain, &mask); 2626f4acaf4SJeff Roberson if (prefer != -1 && DOMAINSET_ISSET(prefer, &mask)) 2636f4acaf4SJeff Roberson return (prefer); 2646f4acaf4SJeff Roberson if (DOMAINSET_EMPTY(&mask)) 2656f4acaf4SJeff Roberson panic("vm_phys_domain_match: Impossible constraint"); 2666f4acaf4SJeff Roberson return (DOMAINSET_FFS(&mask) - 1); 2676f4acaf4SJeff Roberson #else 2686f4acaf4SJeff Roberson return (0); 2696f4acaf4SJeff Roberson #endif 270449c2e92SKonstantin Belousov } 271449c2e92SKonstantin Belousov 27211752d88SAlan Cox /* 27311752d88SAlan Cox * Outputs the state of the physical memory allocator, specifically, 27411752d88SAlan Cox * the amount of physical memory in each free list. 27511752d88SAlan Cox */ 27611752d88SAlan Cox static int 27711752d88SAlan Cox sysctl_vm_phys_free(SYSCTL_HANDLER_ARGS) 27811752d88SAlan Cox { 27911752d88SAlan Cox struct sbuf sbuf; 28011752d88SAlan Cox struct vm_freelist *fl; 2817e226537SAttilio Rao int dom, error, flind, oind, pind; 28211752d88SAlan Cox 28300f0e671SMatthew D Fleming error = sysctl_wire_old_buffer(req, 0); 28400f0e671SMatthew D Fleming if (error != 0) 28500f0e671SMatthew D Fleming return (error); 2867e226537SAttilio Rao sbuf_new_for_sysctl(&sbuf, NULL, 128 * vm_ndomains, req); 2877e226537SAttilio Rao for (dom = 0; dom < vm_ndomains; dom++) { 288eb2f42fbSAlan Cox sbuf_printf(&sbuf,"\nDOMAIN %d:\n", dom); 28911752d88SAlan Cox for (flind = 0; flind < vm_nfreelists; flind++) { 290eb2f42fbSAlan Cox sbuf_printf(&sbuf, "\nFREE LIST %d:\n" 29111752d88SAlan Cox "\n ORDER (SIZE) | NUMBER" 29211752d88SAlan Cox "\n ", flind); 29311752d88SAlan Cox for (pind = 0; pind < VM_NFREEPOOL; pind++) 29411752d88SAlan Cox sbuf_printf(&sbuf, " | POOL %d", pind); 29511752d88SAlan Cox sbuf_printf(&sbuf, "\n-- "); 29611752d88SAlan Cox for (pind = 0; pind < VM_NFREEPOOL; pind++) 29711752d88SAlan Cox sbuf_printf(&sbuf, "-- -- "); 29811752d88SAlan Cox sbuf_printf(&sbuf, "--\n"); 29911752d88SAlan Cox for (oind = VM_NFREEORDER - 1; oind >= 0; oind--) { 300d689bc00SAlan Cox sbuf_printf(&sbuf, " %2d (%6dK)", oind, 30111752d88SAlan Cox 1 << (PAGE_SHIFT - 10 + oind)); 30211752d88SAlan Cox for (pind = 0; pind < VM_NFREEPOOL; pind++) { 3037e226537SAttilio Rao fl = vm_phys_free_queues[dom][flind][pind]; 304eb2f42fbSAlan Cox sbuf_printf(&sbuf, " | %6d", 3057e226537SAttilio Rao fl[oind].lcnt); 30611752d88SAlan Cox } 30711752d88SAlan Cox sbuf_printf(&sbuf, "\n"); 30811752d88SAlan Cox } 3097e226537SAttilio Rao } 31011752d88SAlan Cox } 3114e657159SMatthew D Fleming error = sbuf_finish(&sbuf); 31211752d88SAlan Cox sbuf_delete(&sbuf); 31311752d88SAlan Cox return (error); 31411752d88SAlan Cox } 31511752d88SAlan Cox 31611752d88SAlan Cox /* 31711752d88SAlan Cox * Outputs the set of physical memory segments. 31811752d88SAlan Cox */ 31911752d88SAlan Cox static int 32011752d88SAlan Cox sysctl_vm_phys_segs(SYSCTL_HANDLER_ARGS) 32111752d88SAlan Cox { 32211752d88SAlan Cox struct sbuf sbuf; 32311752d88SAlan Cox struct vm_phys_seg *seg; 32411752d88SAlan Cox int error, segind; 32511752d88SAlan Cox 32600f0e671SMatthew D Fleming error = sysctl_wire_old_buffer(req, 0); 32700f0e671SMatthew D Fleming if (error != 0) 32800f0e671SMatthew D Fleming return (error); 3294e657159SMatthew D Fleming sbuf_new_for_sysctl(&sbuf, NULL, 128, req); 33011752d88SAlan Cox for (segind = 0; segind < vm_phys_nsegs; segind++) { 33111752d88SAlan Cox sbuf_printf(&sbuf, "\nSEGMENT %d:\n\n", segind); 33211752d88SAlan Cox seg = &vm_phys_segs[segind]; 33311752d88SAlan Cox sbuf_printf(&sbuf, "start: %#jx\n", 33411752d88SAlan Cox (uintmax_t)seg->start); 33511752d88SAlan Cox sbuf_printf(&sbuf, "end: %#jx\n", 33611752d88SAlan Cox (uintmax_t)seg->end); 337a3870a18SJohn Baldwin sbuf_printf(&sbuf, "domain: %d\n", seg->domain); 33811752d88SAlan Cox sbuf_printf(&sbuf, "free list: %p\n", seg->free_queues); 33911752d88SAlan Cox } 3404e657159SMatthew D Fleming error = sbuf_finish(&sbuf); 34111752d88SAlan Cox sbuf_delete(&sbuf); 34211752d88SAlan Cox return (error); 34311752d88SAlan Cox } 34411752d88SAlan Cox 345415d7ccaSAdrian Chadd /* 346415d7ccaSAdrian Chadd * Return affinity, or -1 if there's no affinity information. 347415d7ccaSAdrian Chadd */ 3486520495aSAdrian Chadd int 349cb20a74cSStephen J. Kiernan vm_phys_mem_affinity(int f __numa_used, int t __numa_used) 350415d7ccaSAdrian Chadd { 351415d7ccaSAdrian Chadd 352b6715dabSJeff Roberson #ifdef NUMA 353415d7ccaSAdrian Chadd if (mem_locality == NULL) 354415d7ccaSAdrian Chadd return (-1); 355415d7ccaSAdrian Chadd if (f >= vm_ndomains || t >= vm_ndomains) 356415d7ccaSAdrian Chadd return (-1); 357415d7ccaSAdrian Chadd return (mem_locality[f * vm_ndomains + t]); 3586520495aSAdrian Chadd #else 3596520495aSAdrian Chadd return (-1); 3606520495aSAdrian Chadd #endif 361415d7ccaSAdrian Chadd } 362415d7ccaSAdrian Chadd 363b6715dabSJeff Roberson #ifdef NUMA 364415d7ccaSAdrian Chadd /* 365415d7ccaSAdrian Chadd * Outputs the VM locality table. 366415d7ccaSAdrian Chadd */ 367415d7ccaSAdrian Chadd static int 368415d7ccaSAdrian Chadd sysctl_vm_phys_locality(SYSCTL_HANDLER_ARGS) 369415d7ccaSAdrian Chadd { 370415d7ccaSAdrian Chadd struct sbuf sbuf; 371415d7ccaSAdrian Chadd int error, i, j; 372415d7ccaSAdrian Chadd 373415d7ccaSAdrian Chadd error = sysctl_wire_old_buffer(req, 0); 374415d7ccaSAdrian Chadd if (error != 0) 375415d7ccaSAdrian Chadd return (error); 376415d7ccaSAdrian Chadd sbuf_new_for_sysctl(&sbuf, NULL, 128, req); 377415d7ccaSAdrian Chadd 378415d7ccaSAdrian Chadd sbuf_printf(&sbuf, "\n"); 379415d7ccaSAdrian Chadd 380415d7ccaSAdrian Chadd for (i = 0; i < vm_ndomains; i++) { 381415d7ccaSAdrian Chadd sbuf_printf(&sbuf, "%d: ", i); 382415d7ccaSAdrian Chadd for (j = 0; j < vm_ndomains; j++) { 383415d7ccaSAdrian Chadd sbuf_printf(&sbuf, "%d ", vm_phys_mem_affinity(i, j)); 384415d7ccaSAdrian Chadd } 385415d7ccaSAdrian Chadd sbuf_printf(&sbuf, "\n"); 386415d7ccaSAdrian Chadd } 387415d7ccaSAdrian Chadd error = sbuf_finish(&sbuf); 388415d7ccaSAdrian Chadd sbuf_delete(&sbuf); 389415d7ccaSAdrian Chadd return (error); 390415d7ccaSAdrian Chadd } 3916520495aSAdrian Chadd #endif 392415d7ccaSAdrian Chadd 3937e226537SAttilio Rao static void 394*0078df5fSDoug Moore vm_freelist_add(struct vm_freelist *fl, vm_page_t m, int order, int pool, 395*0078df5fSDoug Moore int tail) 396a3870a18SJohn Baldwin { 397a3870a18SJohn Baldwin 3987e226537SAttilio Rao m->order = order; 399*0078df5fSDoug Moore m->pool = pool; 4007e226537SAttilio Rao if (tail) 4015cd29d0fSMark Johnston TAILQ_INSERT_TAIL(&fl[order].pl, m, listq); 4027e226537SAttilio Rao else 4035cd29d0fSMark Johnston TAILQ_INSERT_HEAD(&fl[order].pl, m, listq); 4047e226537SAttilio Rao fl[order].lcnt++; 405a3870a18SJohn Baldwin } 4067e226537SAttilio Rao 4077e226537SAttilio Rao static void 4087e226537SAttilio Rao vm_freelist_rem(struct vm_freelist *fl, vm_page_t m, int order) 4097e226537SAttilio Rao { 4107e226537SAttilio Rao 4115cd29d0fSMark Johnston TAILQ_REMOVE(&fl[order].pl, m, listq); 4127e226537SAttilio Rao fl[order].lcnt--; 4137e226537SAttilio Rao m->order = VM_NFREEORDER; 414a3870a18SJohn Baldwin } 415a3870a18SJohn Baldwin 41611752d88SAlan Cox /* 41711752d88SAlan Cox * Create a physical memory segment. 41811752d88SAlan Cox */ 41911752d88SAlan Cox static void 420d866a563SAlan Cox _vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int domain) 42111752d88SAlan Cox { 42211752d88SAlan Cox struct vm_phys_seg *seg; 42311752d88SAlan Cox 42411752d88SAlan Cox KASSERT(vm_phys_nsegs < VM_PHYSSEG_MAX, 42511752d88SAlan Cox ("vm_phys_create_seg: increase VM_PHYSSEG_MAX")); 426ef435ae7SJeff Roberson KASSERT(domain >= 0 && domain < vm_ndomains, 4277e226537SAttilio Rao ("vm_phys_create_seg: invalid domain provided")); 42811752d88SAlan Cox seg = &vm_phys_segs[vm_phys_nsegs++]; 429271f0f12SAlan Cox while (seg > vm_phys_segs && (seg - 1)->start >= end) { 430271f0f12SAlan Cox *seg = *(seg - 1); 431271f0f12SAlan Cox seg--; 432271f0f12SAlan Cox } 43311752d88SAlan Cox seg->start = start; 43411752d88SAlan Cox seg->end = end; 435a3870a18SJohn Baldwin seg->domain = domain; 43611752d88SAlan Cox } 43711752d88SAlan Cox 438a3870a18SJohn Baldwin static void 439d866a563SAlan Cox vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end) 440a3870a18SJohn Baldwin { 441b6715dabSJeff Roberson #ifdef NUMA 442a3870a18SJohn Baldwin int i; 443a3870a18SJohn Baldwin 444a3870a18SJohn Baldwin if (mem_affinity == NULL) { 445d866a563SAlan Cox _vm_phys_create_seg(start, end, 0); 446a3870a18SJohn Baldwin return; 447a3870a18SJohn Baldwin } 448a3870a18SJohn Baldwin 449a3870a18SJohn Baldwin for (i = 0;; i++) { 450a3870a18SJohn Baldwin if (mem_affinity[i].end == 0) 451a3870a18SJohn Baldwin panic("Reached end of affinity info"); 452a3870a18SJohn Baldwin if (mem_affinity[i].end <= start) 453a3870a18SJohn Baldwin continue; 454a3870a18SJohn Baldwin if (mem_affinity[i].start > start) 455a3870a18SJohn Baldwin panic("No affinity info for start %jx", 456a3870a18SJohn Baldwin (uintmax_t)start); 457a3870a18SJohn Baldwin if (mem_affinity[i].end >= end) { 458d866a563SAlan Cox _vm_phys_create_seg(start, end, 459a3870a18SJohn Baldwin mem_affinity[i].domain); 460a3870a18SJohn Baldwin break; 461a3870a18SJohn Baldwin } 462d866a563SAlan Cox _vm_phys_create_seg(start, mem_affinity[i].end, 463a3870a18SJohn Baldwin mem_affinity[i].domain); 464a3870a18SJohn Baldwin start = mem_affinity[i].end; 465a3870a18SJohn Baldwin } 46662d70a81SJohn Baldwin #else 46762d70a81SJohn Baldwin _vm_phys_create_seg(start, end, 0); 46862d70a81SJohn Baldwin #endif 469a3870a18SJohn Baldwin } 470a3870a18SJohn Baldwin 47111752d88SAlan Cox /* 472271f0f12SAlan Cox * Add a physical memory segment. 473271f0f12SAlan Cox */ 474271f0f12SAlan Cox void 475271f0f12SAlan Cox vm_phys_add_seg(vm_paddr_t start, vm_paddr_t end) 476271f0f12SAlan Cox { 477d866a563SAlan Cox vm_paddr_t paddr; 478271f0f12SAlan Cox 479271f0f12SAlan Cox KASSERT((start & PAGE_MASK) == 0, 480271f0f12SAlan Cox ("vm_phys_define_seg: start is not page aligned")); 481271f0f12SAlan Cox KASSERT((end & PAGE_MASK) == 0, 482271f0f12SAlan Cox ("vm_phys_define_seg: end is not page aligned")); 483d866a563SAlan Cox 484d866a563SAlan Cox /* 485d866a563SAlan Cox * Split the physical memory segment if it spans two or more free 486d866a563SAlan Cox * list boundaries. 487d866a563SAlan Cox */ 488d866a563SAlan Cox paddr = start; 489d866a563SAlan Cox #ifdef VM_FREELIST_LOWMEM 490d866a563SAlan Cox if (paddr < VM_LOWMEM_BOUNDARY && end > VM_LOWMEM_BOUNDARY) { 491d866a563SAlan Cox vm_phys_create_seg(paddr, VM_LOWMEM_BOUNDARY); 492d866a563SAlan Cox paddr = VM_LOWMEM_BOUNDARY; 493d866a563SAlan Cox } 494271f0f12SAlan Cox #endif 495d866a563SAlan Cox #ifdef VM_FREELIST_DMA32 496d866a563SAlan Cox if (paddr < VM_DMA32_BOUNDARY && end > VM_DMA32_BOUNDARY) { 497d866a563SAlan Cox vm_phys_create_seg(paddr, VM_DMA32_BOUNDARY); 498d866a563SAlan Cox paddr = VM_DMA32_BOUNDARY; 499d866a563SAlan Cox } 500d866a563SAlan Cox #endif 501d866a563SAlan Cox vm_phys_create_seg(paddr, end); 502271f0f12SAlan Cox } 503271f0f12SAlan Cox 504271f0f12SAlan Cox /* 50511752d88SAlan Cox * Initialize the physical memory allocator. 506d866a563SAlan Cox * 507d866a563SAlan Cox * Requires that vm_page_array is initialized! 50811752d88SAlan Cox */ 50911752d88SAlan Cox void 51011752d88SAlan Cox vm_phys_init(void) 51111752d88SAlan Cox { 51211752d88SAlan Cox struct vm_freelist *fl; 51372aebdd7SAlan Cox struct vm_phys_seg *end_seg, *prev_seg, *seg, *tmp_seg; 51452526922SJohn Baldwin #if defined(VM_DMA32_NPAGES_THRESHOLD) || defined(VM_PHYSSEG_SPARSE) 515d866a563SAlan Cox u_long npages; 51652526922SJohn Baldwin #endif 517d866a563SAlan Cox int dom, flind, freelist, oind, pind, segind; 51811752d88SAlan Cox 519d866a563SAlan Cox /* 520d866a563SAlan Cox * Compute the number of free lists, and generate the mapping from the 521d866a563SAlan Cox * manifest constants VM_FREELIST_* to the free list indices. 522d866a563SAlan Cox * 523d866a563SAlan Cox * Initially, the entries of vm_freelist_to_flind[] are set to either 524d866a563SAlan Cox * 0 or 1 to indicate which free lists should be created. 525d866a563SAlan Cox */ 52652526922SJohn Baldwin #ifdef VM_DMA32_NPAGES_THRESHOLD 527d866a563SAlan Cox npages = 0; 52852526922SJohn Baldwin #endif 529d866a563SAlan Cox for (segind = vm_phys_nsegs - 1; segind >= 0; segind--) { 530d866a563SAlan Cox seg = &vm_phys_segs[segind]; 531d866a563SAlan Cox #ifdef VM_FREELIST_LOWMEM 532d866a563SAlan Cox if (seg->end <= VM_LOWMEM_BOUNDARY) 533d866a563SAlan Cox vm_freelist_to_flind[VM_FREELIST_LOWMEM] = 1; 534d866a563SAlan Cox else 535d866a563SAlan Cox #endif 536d866a563SAlan Cox #ifdef VM_FREELIST_DMA32 537d866a563SAlan Cox if ( 538d866a563SAlan Cox #ifdef VM_DMA32_NPAGES_THRESHOLD 539d866a563SAlan Cox /* 540d866a563SAlan Cox * Create the DMA32 free list only if the amount of 541d866a563SAlan Cox * physical memory above physical address 4G exceeds the 542d866a563SAlan Cox * given threshold. 543d866a563SAlan Cox */ 544d866a563SAlan Cox npages > VM_DMA32_NPAGES_THRESHOLD && 545d866a563SAlan Cox #endif 546d866a563SAlan Cox seg->end <= VM_DMA32_BOUNDARY) 547d866a563SAlan Cox vm_freelist_to_flind[VM_FREELIST_DMA32] = 1; 548d866a563SAlan Cox else 549d866a563SAlan Cox #endif 550d866a563SAlan Cox { 55152526922SJohn Baldwin #ifdef VM_DMA32_NPAGES_THRESHOLD 552d866a563SAlan Cox npages += atop(seg->end - seg->start); 55352526922SJohn Baldwin #endif 554d866a563SAlan Cox vm_freelist_to_flind[VM_FREELIST_DEFAULT] = 1; 555d866a563SAlan Cox } 556d866a563SAlan Cox } 557d866a563SAlan Cox /* Change each entry into a running total of the free lists. */ 558d866a563SAlan Cox for (freelist = 1; freelist < VM_NFREELIST; freelist++) { 559d866a563SAlan Cox vm_freelist_to_flind[freelist] += 560d866a563SAlan Cox vm_freelist_to_flind[freelist - 1]; 561d866a563SAlan Cox } 562d866a563SAlan Cox vm_nfreelists = vm_freelist_to_flind[VM_NFREELIST - 1]; 563d866a563SAlan Cox KASSERT(vm_nfreelists > 0, ("vm_phys_init: no free lists")); 564d866a563SAlan Cox /* Change each entry into a free list index. */ 565d866a563SAlan Cox for (freelist = 0; freelist < VM_NFREELIST; freelist++) 566d866a563SAlan Cox vm_freelist_to_flind[freelist]--; 567d866a563SAlan Cox 568d866a563SAlan Cox /* 569d866a563SAlan Cox * Initialize the first_page and free_queues fields of each physical 570d866a563SAlan Cox * memory segment. 571d866a563SAlan Cox */ 572271f0f12SAlan Cox #ifdef VM_PHYSSEG_SPARSE 573d866a563SAlan Cox npages = 0; 57411752d88SAlan Cox #endif 575271f0f12SAlan Cox for (segind = 0; segind < vm_phys_nsegs; segind++) { 576271f0f12SAlan Cox seg = &vm_phys_segs[segind]; 577271f0f12SAlan Cox #ifdef VM_PHYSSEG_SPARSE 578d866a563SAlan Cox seg->first_page = &vm_page_array[npages]; 579d866a563SAlan Cox npages += atop(seg->end - seg->start); 580271f0f12SAlan Cox #else 581271f0f12SAlan Cox seg->first_page = PHYS_TO_VM_PAGE(seg->start); 58211752d88SAlan Cox #endif 583d866a563SAlan Cox #ifdef VM_FREELIST_LOWMEM 584d866a563SAlan Cox if (seg->end <= VM_LOWMEM_BOUNDARY) { 585d866a563SAlan Cox flind = vm_freelist_to_flind[VM_FREELIST_LOWMEM]; 586d866a563SAlan Cox KASSERT(flind >= 0, 587d866a563SAlan Cox ("vm_phys_init: LOWMEM flind < 0")); 588d866a563SAlan Cox } else 589d866a563SAlan Cox #endif 590d866a563SAlan Cox #ifdef VM_FREELIST_DMA32 591d866a563SAlan Cox if (seg->end <= VM_DMA32_BOUNDARY) { 592d866a563SAlan Cox flind = vm_freelist_to_flind[VM_FREELIST_DMA32]; 593d866a563SAlan Cox KASSERT(flind >= 0, 594d866a563SAlan Cox ("vm_phys_init: DMA32 flind < 0")); 595d866a563SAlan Cox } else 596d866a563SAlan Cox #endif 597d866a563SAlan Cox { 598d866a563SAlan Cox flind = vm_freelist_to_flind[VM_FREELIST_DEFAULT]; 599d866a563SAlan Cox KASSERT(flind >= 0, 600d866a563SAlan Cox ("vm_phys_init: DEFAULT flind < 0")); 60111752d88SAlan Cox } 602d866a563SAlan Cox seg->free_queues = &vm_phys_free_queues[seg->domain][flind]; 603d866a563SAlan Cox } 604d866a563SAlan Cox 605d866a563SAlan Cox /* 60672aebdd7SAlan Cox * Coalesce physical memory segments that are contiguous and share the 60772aebdd7SAlan Cox * same per-domain free queues. 60872aebdd7SAlan Cox */ 60972aebdd7SAlan Cox prev_seg = vm_phys_segs; 61072aebdd7SAlan Cox seg = &vm_phys_segs[1]; 61172aebdd7SAlan Cox end_seg = &vm_phys_segs[vm_phys_nsegs]; 61272aebdd7SAlan Cox while (seg < end_seg) { 61372aebdd7SAlan Cox if (prev_seg->end == seg->start && 61472aebdd7SAlan Cox prev_seg->free_queues == seg->free_queues) { 61572aebdd7SAlan Cox prev_seg->end = seg->end; 61672aebdd7SAlan Cox KASSERT(prev_seg->domain == seg->domain, 61772aebdd7SAlan Cox ("vm_phys_init: free queues cannot span domains")); 61872aebdd7SAlan Cox vm_phys_nsegs--; 61972aebdd7SAlan Cox end_seg--; 62072aebdd7SAlan Cox for (tmp_seg = seg; tmp_seg < end_seg; tmp_seg++) 62172aebdd7SAlan Cox *tmp_seg = *(tmp_seg + 1); 62272aebdd7SAlan Cox } else { 62372aebdd7SAlan Cox prev_seg = seg; 62472aebdd7SAlan Cox seg++; 62572aebdd7SAlan Cox } 62672aebdd7SAlan Cox } 62772aebdd7SAlan Cox 62872aebdd7SAlan Cox /* 629d866a563SAlan Cox * Initialize the free queues. 630d866a563SAlan Cox */ 6317e226537SAttilio Rao for (dom = 0; dom < vm_ndomains; dom++) { 63211752d88SAlan Cox for (flind = 0; flind < vm_nfreelists; flind++) { 63311752d88SAlan Cox for (pind = 0; pind < VM_NFREEPOOL; pind++) { 6347e226537SAttilio Rao fl = vm_phys_free_queues[dom][flind][pind]; 63511752d88SAlan Cox for (oind = 0; oind < VM_NFREEORDER; oind++) 63611752d88SAlan Cox TAILQ_INIT(&fl[oind].pl); 63711752d88SAlan Cox } 63811752d88SAlan Cox } 639a3870a18SJohn Baldwin } 640d866a563SAlan Cox 641b16b4c22SMark Johnston #ifdef VM_FREEPOOL_LAZYINIT 642b16b4c22SMark Johnston vm_default_freepool = VM_FREEPOOL_LAZYINIT; 643b16b4c22SMark Johnston #else 644b16b4c22SMark Johnston vm_default_freepool = VM_FREEPOOL_DEFAULT; 645b16b4c22SMark Johnston #endif 646b16b4c22SMark Johnston 64738d6b2dcSRoger Pau Monné rw_init(&vm_phys_fictitious_reg_lock, "vmfctr"); 64811752d88SAlan Cox } 64911752d88SAlan Cox 65011752d88SAlan Cox /* 651662e7fa8SMark Johnston * Register info about the NUMA topology of the system. 652662e7fa8SMark Johnston * 653662e7fa8SMark Johnston * Invoked by platform-dependent code prior to vm_phys_init(). 654662e7fa8SMark Johnston */ 655662e7fa8SMark Johnston void 656cb20a74cSStephen J. Kiernan vm_phys_register_domains(int ndomains __numa_used, 657cb20a74cSStephen J. Kiernan struct mem_affinity *affinity __numa_used, int *locality __numa_used) 658662e7fa8SMark Johnston { 659662e7fa8SMark Johnston #ifdef NUMA 660c415cfc8SZhenlei Huang int i; 661662e7fa8SMark Johnston 662b61f3142SMark Johnston /* 663b61f3142SMark Johnston * For now the only override value that we support is 1, which 664b61f3142SMark Johnston * effectively disables NUMA-awareness in the allocators. 665b61f3142SMark Johnston */ 666c415cfc8SZhenlei Huang TUNABLE_INT_FETCH("vm.numa.disabled", &numa_disabled); 667c415cfc8SZhenlei Huang if (numa_disabled) 668b61f3142SMark Johnston ndomains = 1; 669b61f3142SMark Johnston 670b61f3142SMark Johnston if (ndomains > 1) { 671662e7fa8SMark Johnston vm_ndomains = ndomains; 672662e7fa8SMark Johnston mem_affinity = affinity; 673662e7fa8SMark Johnston mem_locality = locality; 674b61f3142SMark Johnston } 675662e7fa8SMark Johnston 676662e7fa8SMark Johnston for (i = 0; i < vm_ndomains; i++) 677662e7fa8SMark Johnston DOMAINSET_SET(i, &all_domains); 678662e7fa8SMark Johnston #endif 679662e7fa8SMark Johnston } 680662e7fa8SMark Johnston 681662e7fa8SMark Johnston /* 68211752d88SAlan Cox * Split a contiguous, power of two-sized set of physical pages. 683370a338aSAlan Cox * 684370a338aSAlan Cox * When this function is called by a page allocation function, the caller 685370a338aSAlan Cox * should request insertion at the head unless the order [order, oind) queues 686370a338aSAlan Cox * are known to be empty. The objective being to reduce the likelihood of 687370a338aSAlan Cox * long-term fragmentation by promoting contemporaneous allocation and 688370a338aSAlan Cox * (hopefully) deallocation. 68911752d88SAlan Cox */ 69011752d88SAlan Cox static __inline void 691370a338aSAlan Cox vm_phys_split_pages(vm_page_t m, int oind, struct vm_freelist *fl, int order, 692*0078df5fSDoug Moore int pool, int tail) 69311752d88SAlan Cox { 69411752d88SAlan Cox vm_page_t m_buddy; 69511752d88SAlan Cox 69611752d88SAlan Cox while (oind > order) { 69711752d88SAlan Cox oind--; 69811752d88SAlan Cox m_buddy = &m[1 << oind]; 69911752d88SAlan Cox KASSERT(m_buddy->order == VM_NFREEORDER, 70011752d88SAlan Cox ("vm_phys_split_pages: page %p has unexpected order %d", 70111752d88SAlan Cox m_buddy, m_buddy->order)); 702*0078df5fSDoug Moore vm_freelist_add(fl, m_buddy, oind, pool, tail); 70311752d88SAlan Cox } 70411752d88SAlan Cox } 70511752d88SAlan Cox 706d7ec4a88SMark Johnston static void 707*0078df5fSDoug Moore vm_phys_enq_chunk(struct vm_freelist *fl, vm_page_t m, int order, int pool, 708*0078df5fSDoug Moore int tail) 709d7ec4a88SMark Johnston { 710d7ec4a88SMark Johnston KASSERT(order >= 0 && order < VM_NFREEORDER, 711d7ec4a88SMark Johnston ("%s: invalid order %d", __func__, order)); 712d7ec4a88SMark Johnston 713*0078df5fSDoug Moore vm_freelist_add(fl, m, order, pool, tail); 714b16b4c22SMark Johnston #ifdef VM_FREEPOOL_LAZYINIT 715*0078df5fSDoug Moore if (__predict_false(pool == VM_FREEPOOL_LAZYINIT)) { 716b16b4c22SMark Johnston vm_page_t m_next; 717517c5854SMark Johnston vm_paddr_t pa; 718b16b4c22SMark Johnston int npages; 719b16b4c22SMark Johnston 720b16b4c22SMark Johnston npages = 1 << order; 721b16b4c22SMark Johnston m_next = m + npages; 722517c5854SMark Johnston pa = m->phys_addr + ptoa(npages); 723517c5854SMark Johnston if (pa < vm_phys_segs[m->segind].end) { 724517c5854SMark Johnston vm_page_init_page(m_next, pa, m->segind, 725b16b4c22SMark Johnston VM_FREEPOOL_LAZYINIT); 726b16b4c22SMark Johnston } 727517c5854SMark Johnston } 728b16b4c22SMark Johnston #endif 729d7ec4a88SMark Johnston } 730d7ec4a88SMark Johnston 73111752d88SAlan Cox /* 732e77f4e7fSDoug Moore * Add the physical pages [m, m + npages) at the beginning of a power-of-two 733e77f4e7fSDoug Moore * aligned and sized set to the specified free list. 734e77f4e7fSDoug Moore * 735e77f4e7fSDoug Moore * When this function is called by a page allocation function, the caller 736e77f4e7fSDoug Moore * should request insertion at the head unless the lower-order queues are 737e77f4e7fSDoug Moore * known to be empty. The objective being to reduce the likelihood of long- 738e77f4e7fSDoug Moore * term fragmentation by promoting contemporaneous allocation and (hopefully) 739e77f4e7fSDoug Moore * deallocation. 740e77f4e7fSDoug Moore * 741e77f4e7fSDoug Moore * The physical page m's buddy must not be free. 742e77f4e7fSDoug Moore */ 743e77f4e7fSDoug Moore static void 744*0078df5fSDoug Moore vm_phys_enq_beg(vm_page_t m, u_int npages, struct vm_freelist *fl, int pool, 745*0078df5fSDoug Moore int tail) 746e77f4e7fSDoug Moore { 747e77f4e7fSDoug Moore int order; 748e77f4e7fSDoug Moore 749e77f4e7fSDoug Moore KASSERT(npages == 0 || 750e77f4e7fSDoug Moore (VM_PAGE_TO_PHYS(m) & 751543d55d7SDoug Moore ((PAGE_SIZE << ilog2(npages)) - 1)) == 0, 752e77f4e7fSDoug Moore ("%s: page %p and npages %u are misaligned", 753e77f4e7fSDoug Moore __func__, m, npages)); 754e77f4e7fSDoug Moore while (npages > 0) { 755e77f4e7fSDoug Moore KASSERT(m->order == VM_NFREEORDER, 756e77f4e7fSDoug Moore ("%s: page %p has unexpected order %d", 757e77f4e7fSDoug Moore __func__, m, m->order)); 758543d55d7SDoug Moore order = ilog2(npages); 759e77f4e7fSDoug Moore KASSERT(order < VM_NFREEORDER, 760e77f4e7fSDoug Moore ("%s: order %d is out of range", __func__, order)); 761*0078df5fSDoug Moore vm_phys_enq_chunk(fl, m, order, pool, tail); 762e77f4e7fSDoug Moore m += 1 << order; 763e77f4e7fSDoug Moore npages -= 1 << order; 764e77f4e7fSDoug Moore } 765e77f4e7fSDoug Moore } 766e77f4e7fSDoug Moore 767e77f4e7fSDoug Moore /* 7687493904eSAlan Cox * Add the physical pages [m, m + npages) at the end of a power-of-two aligned 7697493904eSAlan Cox * and sized set to the specified free list. 7707493904eSAlan Cox * 7717493904eSAlan Cox * When this function is called by a page allocation function, the caller 7727493904eSAlan Cox * should request insertion at the head unless the lower-order queues are 7737493904eSAlan Cox * known to be empty. The objective being to reduce the likelihood of long- 7747493904eSAlan Cox * term fragmentation by promoting contemporaneous allocation and (hopefully) 7757493904eSAlan Cox * deallocation. 7767493904eSAlan Cox * 777ccdb2827SDoug Moore * If npages is zero, this function does nothing and ignores the physical page 778ccdb2827SDoug Moore * parameter m. Otherwise, the physical page m's buddy must not be free. 7797493904eSAlan Cox */ 780c9b06fa5SDoug Moore static vm_page_t 781*0078df5fSDoug Moore vm_phys_enq_range(vm_page_t m, u_int npages, struct vm_freelist *fl, int pool, 782*0078df5fSDoug Moore int tail) 7837493904eSAlan Cox { 7847493904eSAlan Cox int order; 7857493904eSAlan Cox 786ccdb2827SDoug Moore KASSERT(npages == 0 || 787ccdb2827SDoug Moore ((VM_PAGE_TO_PHYS(m) + npages * PAGE_SIZE) & 788543d55d7SDoug Moore ((PAGE_SIZE << ilog2(npages)) - 1)) == 0, 7897493904eSAlan Cox ("vm_phys_enq_range: page %p and npages %u are misaligned", 7907493904eSAlan Cox m, npages)); 791c9b06fa5SDoug Moore while (npages > 0) { 7927493904eSAlan Cox KASSERT(m->order == VM_NFREEORDER, 7937493904eSAlan Cox ("vm_phys_enq_range: page %p has unexpected order %d", 7947493904eSAlan Cox m, m->order)); 7957493904eSAlan Cox order = ffs(npages) - 1; 796*0078df5fSDoug Moore vm_phys_enq_chunk(fl, m, order, pool, tail); 797c9b06fa5SDoug Moore m += 1 << order; 798c9b06fa5SDoug Moore npages -= 1 << order; 799c9b06fa5SDoug Moore } 800c9b06fa5SDoug Moore return (m); 8017493904eSAlan Cox } 8027493904eSAlan Cox 8037493904eSAlan Cox /* 804*0078df5fSDoug Moore * Complete initialization a contiguous, power of two-sized set of physical 805*0078df5fSDoug Moore * pages. 806b16b4c22SMark Johnston * 807b16b4c22SMark Johnston * If the pages currently belong to the lazy init pool, then the corresponding 808b16b4c22SMark Johnston * page structures must be initialized. In this case it is assumed that the 809b16b4c22SMark Johnston * first page in the run has already been initialized. 810e3537f92SDoug Moore */ 811e3537f92SDoug Moore static void 812*0078df5fSDoug Moore vm_phys_finish_init(vm_page_t m, int order) 813e3537f92SDoug Moore { 814b16b4c22SMark Johnston #ifdef VM_FREEPOOL_LAZYINIT 815b16b4c22SMark Johnston if (__predict_false(m->pool == VM_FREEPOOL_LAZYINIT)) { 816b16b4c22SMark Johnston vm_paddr_t pa; 817b16b4c22SMark Johnston int segind; 818e3537f92SDoug Moore 819b16b4c22SMark Johnston TSENTER(); 820b16b4c22SMark Johnston pa = m->phys_addr + PAGE_SIZE; 821b16b4c22SMark Johnston segind = m->segind; 822b16b4c22SMark Johnston for (vm_page_t m_tmp = m + 1; m_tmp < &m[1 << order]; 823b16b4c22SMark Johnston m_tmp++, pa += PAGE_SIZE) 824*0078df5fSDoug Moore vm_page_init_page(m_tmp, pa, segind, VM_NFREEPOOL); 825b16b4c22SMark Johnston TSEXIT(); 826*0078df5fSDoug Moore } 827b16b4c22SMark Johnston #endif 828e3537f92SDoug Moore } 829e3537f92SDoug Moore 830e3537f92SDoug Moore /* 83189ea39a7SAlan Cox * Tries to allocate the specified number of pages from the specified pool 83289ea39a7SAlan Cox * within the specified domain. Returns the actual number of allocated pages 83389ea39a7SAlan Cox * and a pointer to each page through the array ma[]. 83489ea39a7SAlan Cox * 83532d81f21SAlan Cox * The returned pages may not be physically contiguous. However, in contrast 83632d81f21SAlan Cox * to performing multiple, back-to-back calls to vm_phys_alloc_pages(..., 0), 83732d81f21SAlan Cox * calling this function once to allocate the desired number of pages will 838*0078df5fSDoug Moore * avoid wasted time in vm_phys_split_pages(). The allocated pages have no 839*0078df5fSDoug Moore * valid pool field set. 84089ea39a7SAlan Cox * 84189ea39a7SAlan Cox * The free page queues for the specified domain must be locked. 84289ea39a7SAlan Cox */ 84389ea39a7SAlan Cox int 84489ea39a7SAlan Cox vm_phys_alloc_npages(int domain, int pool, int npages, vm_page_t ma[]) 84589ea39a7SAlan Cox { 84689ea39a7SAlan Cox struct vm_freelist *alt, *fl; 84789ea39a7SAlan Cox vm_page_t m; 848c9b06fa5SDoug Moore int avail, end, flind, freelist, i, oind, pind; 84989ea39a7SAlan Cox 85089ea39a7SAlan Cox KASSERT(domain >= 0 && domain < vm_ndomains, 85189ea39a7SAlan Cox ("vm_phys_alloc_npages: domain %d is out of range", domain)); 852b16b4c22SMark Johnston KASSERT(vm_phys_pool_valid(pool), 85389ea39a7SAlan Cox ("vm_phys_alloc_npages: pool %d is out of range", pool)); 85489ea39a7SAlan Cox KASSERT(npages <= 1 << (VM_NFREEORDER - 1), 85589ea39a7SAlan Cox ("vm_phys_alloc_npages: npages %d is out of range", npages)); 85689ea39a7SAlan Cox vm_domain_free_assert_locked(VM_DOMAIN(domain)); 85789ea39a7SAlan Cox i = 0; 85889ea39a7SAlan Cox for (freelist = 0; freelist < VM_NFREELIST; freelist++) { 85989ea39a7SAlan Cox flind = vm_freelist_to_flind[freelist]; 86089ea39a7SAlan Cox if (flind < 0) 86189ea39a7SAlan Cox continue; 86289ea39a7SAlan Cox fl = vm_phys_free_queues[domain][flind][pool]; 86389ea39a7SAlan Cox for (oind = 0; oind < VM_NFREEORDER; oind++) { 86489ea39a7SAlan Cox while ((m = TAILQ_FIRST(&fl[oind].pl)) != NULL) { 86589ea39a7SAlan Cox vm_freelist_rem(fl, m, oind); 866c9b06fa5SDoug Moore avail = i + (1 << oind); 867c9b06fa5SDoug Moore end = imin(npages, avail); 868e3537f92SDoug Moore while (i < end) 86989ea39a7SAlan Cox ma[i++] = m++; 870c9b06fa5SDoug Moore if (i == npages) { 8717493904eSAlan Cox /* 872c9b06fa5SDoug Moore * Return excess pages to fl. Its order 873c9b06fa5SDoug Moore * [0, oind) queues are empty. 8747493904eSAlan Cox */ 875*0078df5fSDoug Moore vm_phys_enq_range(m, avail - i, fl, 876*0078df5fSDoug Moore pool, 1); 87789ea39a7SAlan Cox return (npages); 878c9b06fa5SDoug Moore } 87989ea39a7SAlan Cox } 88089ea39a7SAlan Cox } 88189ea39a7SAlan Cox for (oind = VM_NFREEORDER - 1; oind >= 0; oind--) { 882b16b4c22SMark Johnston for (pind = vm_default_freepool; pind < VM_NFREEPOOL; 883b16b4c22SMark Johnston pind++) { 88489ea39a7SAlan Cox alt = vm_phys_free_queues[domain][flind][pind]; 88589ea39a7SAlan Cox while ((m = TAILQ_FIRST(&alt[oind].pl)) != 88689ea39a7SAlan Cox NULL) { 88789ea39a7SAlan Cox vm_freelist_rem(alt, m, oind); 888*0078df5fSDoug Moore vm_phys_finish_init(m, oind); 889c9b06fa5SDoug Moore avail = i + (1 << oind); 890c9b06fa5SDoug Moore end = imin(npages, avail); 891e3537f92SDoug Moore while (i < end) 89289ea39a7SAlan Cox ma[i++] = m++; 893c9b06fa5SDoug Moore if (i == npages) { 8947493904eSAlan Cox /* 8957493904eSAlan Cox * Return excess pages to fl. 8967493904eSAlan Cox * Its order [0, oind) queues 8977493904eSAlan Cox * are empty. 8987493904eSAlan Cox */ 899c9b06fa5SDoug Moore vm_phys_enq_range(m, avail - i, 900*0078df5fSDoug Moore fl, pool, 1); 90189ea39a7SAlan Cox return (npages); 902c9b06fa5SDoug Moore } 90389ea39a7SAlan Cox } 90489ea39a7SAlan Cox } 90589ea39a7SAlan Cox } 90689ea39a7SAlan Cox } 90789ea39a7SAlan Cox return (i); 90889ea39a7SAlan Cox } 90989ea39a7SAlan Cox 91089ea39a7SAlan Cox /* 911d866a563SAlan Cox * Allocate a contiguous, power of two-sized set of physical pages from the 912d866a563SAlan Cox * specified free list. The free list must be specified using one of the 913e3537f92SDoug Moore * manifest constants VM_FREELIST_*. 914d866a563SAlan Cox * 915d866a563SAlan Cox * The free page queues must be locked. 91649ca10d4SJayachandran C. */ 9176aede562SDoug Moore static vm_page_t 9180db2102aSMichael Zhilin vm_phys_alloc_freelist_pages(int domain, int freelist, int pool, int order) 91949ca10d4SJayachandran C. { 920ef435ae7SJeff Roberson struct vm_freelist *alt, *fl; 92111752d88SAlan Cox vm_page_t m; 9220db2102aSMichael Zhilin int oind, pind, flind; 92311752d88SAlan Cox 924ef435ae7SJeff Roberson KASSERT(domain >= 0 && domain < vm_ndomains, 925ef435ae7SJeff Roberson ("vm_phys_alloc_freelist_pages: domain %d is out of range", 926ef435ae7SJeff Roberson domain)); 9270db2102aSMichael Zhilin KASSERT(freelist < VM_NFREELIST, 928d866a563SAlan Cox ("vm_phys_alloc_freelist_pages: freelist %d is out of range", 9295be93778SAndrew Turner freelist)); 930b16b4c22SMark Johnston KASSERT(vm_phys_pool_valid(pool), 93149ca10d4SJayachandran C. ("vm_phys_alloc_freelist_pages: pool %d is out of range", pool)); 93211752d88SAlan Cox KASSERT(order < VM_NFREEORDER, 93349ca10d4SJayachandran C. ("vm_phys_alloc_freelist_pages: order %d is out of range", order)); 9346520495aSAdrian Chadd 9350db2102aSMichael Zhilin flind = vm_freelist_to_flind[freelist]; 9360db2102aSMichael Zhilin /* Check if freelist is present */ 9370db2102aSMichael Zhilin if (flind < 0) 9380db2102aSMichael Zhilin return (NULL); 9390db2102aSMichael Zhilin 940e2068d0bSJeff Roberson vm_domain_free_assert_locked(VM_DOMAIN(domain)); 9417e226537SAttilio Rao fl = &vm_phys_free_queues[domain][flind][pool][0]; 94211752d88SAlan Cox for (oind = order; oind < VM_NFREEORDER; oind++) { 94311752d88SAlan Cox m = TAILQ_FIRST(&fl[oind].pl); 94411752d88SAlan Cox if (m != NULL) { 9457e226537SAttilio Rao vm_freelist_rem(fl, m, oind); 946370a338aSAlan Cox /* The order [order, oind) queues are empty. */ 947*0078df5fSDoug Moore vm_phys_split_pages(m, oind, fl, order, pool, 1); 94811752d88SAlan Cox return (m); 94911752d88SAlan Cox } 95011752d88SAlan Cox } 95111752d88SAlan Cox 95211752d88SAlan Cox /* 95311752d88SAlan Cox * The given pool was empty. Find the largest 95411752d88SAlan Cox * contiguous, power-of-two-sized set of pages in any 95511752d88SAlan Cox * pool. Transfer these pages to the given pool, and 95611752d88SAlan Cox * use them to satisfy the allocation. 95711752d88SAlan Cox */ 95811752d88SAlan Cox for (oind = VM_NFREEORDER - 1; oind >= order; oind--) { 959b16b4c22SMark Johnston for (pind = vm_default_freepool; pind < VM_NFREEPOOL; pind++) { 9607e226537SAttilio Rao alt = &vm_phys_free_queues[domain][flind][pind][0]; 96111752d88SAlan Cox m = TAILQ_FIRST(&alt[oind].pl); 96211752d88SAlan Cox if (m != NULL) { 9637e226537SAttilio Rao vm_freelist_rem(alt, m, oind); 964*0078df5fSDoug Moore vm_phys_finish_init(m, oind); 965370a338aSAlan Cox /* The order [order, oind) queues are empty. */ 966*0078df5fSDoug Moore vm_phys_split_pages(m, oind, fl, order, pool, 1); 96711752d88SAlan Cox return (m); 96811752d88SAlan Cox } 96911752d88SAlan Cox } 97011752d88SAlan Cox } 97111752d88SAlan Cox return (NULL); 97211752d88SAlan Cox } 97311752d88SAlan Cox 97411752d88SAlan Cox /* 9756aede562SDoug Moore * Allocate a contiguous, power of two-sized set of physical pages 9766aede562SDoug Moore * from the free lists. 9776aede562SDoug Moore * 9786aede562SDoug Moore * The free page queues must be locked. 9796aede562SDoug Moore */ 9806aede562SDoug Moore vm_page_t 9816aede562SDoug Moore vm_phys_alloc_pages(int domain, int pool, int order) 9826aede562SDoug Moore { 9836aede562SDoug Moore vm_page_t m; 9846aede562SDoug Moore int freelist; 9856aede562SDoug Moore 9866aede562SDoug Moore for (freelist = 0; freelist < VM_NFREELIST; freelist++) { 9876aede562SDoug Moore m = vm_phys_alloc_freelist_pages(domain, freelist, pool, order); 9886aede562SDoug Moore if (m != NULL) 9896aede562SDoug Moore return (m); 9906aede562SDoug Moore } 9916aede562SDoug Moore return (NULL); 9926aede562SDoug Moore } 9936aede562SDoug Moore 9946aede562SDoug Moore /* 99569cbb187SMark Johnston * Find the vm_page corresponding to the given physical address, which must lie 99669cbb187SMark Johnston * within the given physical memory segment. 99769cbb187SMark Johnston */ 99869cbb187SMark Johnston vm_page_t 99969cbb187SMark Johnston vm_phys_seg_paddr_to_vm_page(struct vm_phys_seg *seg, vm_paddr_t pa) 100069cbb187SMark Johnston { 100169cbb187SMark Johnston KASSERT(pa >= seg->start && pa < seg->end, 100269cbb187SMark Johnston ("%s: pa %#jx is out of range", __func__, (uintmax_t)pa)); 100369cbb187SMark Johnston 100469cbb187SMark Johnston return (&seg->first_page[atop(pa - seg->start)]); 100569cbb187SMark Johnston } 100669cbb187SMark Johnston 100769cbb187SMark Johnston /* 100811752d88SAlan Cox * Find the vm_page corresponding to the given physical address. 100911752d88SAlan Cox */ 101011752d88SAlan Cox vm_page_t 101111752d88SAlan Cox vm_phys_paddr_to_vm_page(vm_paddr_t pa) 101211752d88SAlan Cox { 101311752d88SAlan Cox struct vm_phys_seg *seg; 101411752d88SAlan Cox 10159e817428SDoug Moore if ((seg = vm_phys_paddr_to_seg(pa)) != NULL) 101669cbb187SMark Johnston return (vm_phys_seg_paddr_to_vm_page(seg, pa)); 1017f06a3a36SAndrew Thompson return (NULL); 101811752d88SAlan Cox } 101911752d88SAlan Cox 1020b6de32bdSKonstantin Belousov vm_page_t 1021b6de32bdSKonstantin Belousov vm_phys_fictitious_to_vm_page(vm_paddr_t pa) 1022b6de32bdSKonstantin Belousov { 102338d6b2dcSRoger Pau Monné struct vm_phys_fictitious_seg tmp, *seg; 1024b6de32bdSKonstantin Belousov vm_page_t m; 1025b6de32bdSKonstantin Belousov 1026b6de32bdSKonstantin Belousov m = NULL; 102738d6b2dcSRoger Pau Monné tmp.start = pa; 102838d6b2dcSRoger Pau Monné tmp.end = 0; 102938d6b2dcSRoger Pau Monné 103038d6b2dcSRoger Pau Monné rw_rlock(&vm_phys_fictitious_reg_lock); 103138d6b2dcSRoger Pau Monné seg = RB_FIND(fict_tree, &vm_phys_fictitious_tree, &tmp); 103238d6b2dcSRoger Pau Monné rw_runlock(&vm_phys_fictitious_reg_lock); 103338d6b2dcSRoger Pau Monné if (seg == NULL) 103438d6b2dcSRoger Pau Monné return (NULL); 103538d6b2dcSRoger Pau Monné 1036b6de32bdSKonstantin Belousov m = &seg->first_page[atop(pa - seg->start)]; 103738d6b2dcSRoger Pau Monné KASSERT((m->flags & PG_FICTITIOUS) != 0, ("%p not fictitious", m)); 103838d6b2dcSRoger Pau Monné 1039b6de32bdSKonstantin Belousov return (m); 1040b6de32bdSKonstantin Belousov } 1041b6de32bdSKonstantin Belousov 10425ebe728dSRoger Pau Monné static inline void 10435ebe728dSRoger Pau Monné vm_phys_fictitious_init_range(vm_page_t range, vm_paddr_t start, 10445ebe728dSRoger Pau Monné long page_count, vm_memattr_t memattr) 10455ebe728dSRoger Pau Monné { 10465ebe728dSRoger Pau Monné long i; 10475ebe728dSRoger Pau Monné 1048f93f7cf1SMark Johnston bzero(range, page_count * sizeof(*range)); 10495ebe728dSRoger Pau Monné for (i = 0; i < page_count; i++) { 10505ebe728dSRoger Pau Monné vm_page_initfake(&range[i], start + PAGE_SIZE * i, memattr); 10515ebe728dSRoger Pau Monné range[i].oflags &= ~VPO_UNMANAGED; 10525ebe728dSRoger Pau Monné range[i].busy_lock = VPB_UNBUSIED; 10535ebe728dSRoger Pau Monné } 10545ebe728dSRoger Pau Monné } 10555ebe728dSRoger Pau Monné 1056b6de32bdSKonstantin Belousov int 1057b6de32bdSKonstantin Belousov vm_phys_fictitious_reg_range(vm_paddr_t start, vm_paddr_t end, 1058b6de32bdSKonstantin Belousov vm_memattr_t memattr) 1059b6de32bdSKonstantin Belousov { 1060b6de32bdSKonstantin Belousov struct vm_phys_fictitious_seg *seg; 1061b6de32bdSKonstantin Belousov vm_page_t fp; 10625ebe728dSRoger Pau Monné long page_count; 1063b6de32bdSKonstantin Belousov #ifdef VM_PHYSSEG_DENSE 10645ebe728dSRoger Pau Monné long pi, pe; 10655ebe728dSRoger Pau Monné long dpage_count; 1066b6de32bdSKonstantin Belousov #endif 1067b6de32bdSKonstantin Belousov 10685ebe728dSRoger Pau Monné KASSERT(start < end, 10695ebe728dSRoger Pau Monné ("Start of segment isn't less than end (start: %jx end: %jx)", 10705ebe728dSRoger Pau Monné (uintmax_t)start, (uintmax_t)end)); 10715ebe728dSRoger Pau Monné 1072b6de32bdSKonstantin Belousov page_count = (end - start) / PAGE_SIZE; 1073b6de32bdSKonstantin Belousov 1074b6de32bdSKonstantin Belousov #ifdef VM_PHYSSEG_DENSE 1075b6de32bdSKonstantin Belousov pi = atop(start); 10765ebe728dSRoger Pau Monné pe = atop(end); 10775ebe728dSRoger Pau Monné if (pi >= first_page && (pi - first_page) < vm_page_array_size) { 1078b6de32bdSKonstantin Belousov fp = &vm_page_array[pi - first_page]; 10795ebe728dSRoger Pau Monné if ((pe - first_page) > vm_page_array_size) { 10805ebe728dSRoger Pau Monné /* 10815ebe728dSRoger Pau Monné * We have a segment that starts inside 10825ebe728dSRoger Pau Monné * of vm_page_array, but ends outside of it. 10835ebe728dSRoger Pau Monné * 10845ebe728dSRoger Pau Monné * Use vm_page_array pages for those that are 10855ebe728dSRoger Pau Monné * inside of the vm_page_array range, and 10865ebe728dSRoger Pau Monné * allocate the remaining ones. 10875ebe728dSRoger Pau Monné */ 10885ebe728dSRoger Pau Monné dpage_count = vm_page_array_size - (pi - first_page); 10895ebe728dSRoger Pau Monné vm_phys_fictitious_init_range(fp, start, dpage_count, 10905ebe728dSRoger Pau Monné memattr); 10915ebe728dSRoger Pau Monné page_count -= dpage_count; 10925ebe728dSRoger Pau Monné start += ptoa(dpage_count); 10935ebe728dSRoger Pau Monné goto alloc; 10945ebe728dSRoger Pau Monné } 10955ebe728dSRoger Pau Monné /* 10965ebe728dSRoger Pau Monné * We can allocate the full range from vm_page_array, 10975ebe728dSRoger Pau Monné * so there's no need to register the range in the tree. 10985ebe728dSRoger Pau Monné */ 10995ebe728dSRoger Pau Monné vm_phys_fictitious_init_range(fp, start, page_count, memattr); 11005ebe728dSRoger Pau Monné return (0); 11015ebe728dSRoger Pau Monné } else if (pe > first_page && (pe - first_page) < vm_page_array_size) { 11025ebe728dSRoger Pau Monné /* 11035ebe728dSRoger Pau Monné * We have a segment that ends inside of vm_page_array, 11045ebe728dSRoger Pau Monné * but starts outside of it. 11055ebe728dSRoger Pau Monné */ 11065ebe728dSRoger Pau Monné fp = &vm_page_array[0]; 11075ebe728dSRoger Pau Monné dpage_count = pe - first_page; 11085ebe728dSRoger Pau Monné vm_phys_fictitious_init_range(fp, ptoa(first_page), dpage_count, 11095ebe728dSRoger Pau Monné memattr); 11105ebe728dSRoger Pau Monné end -= ptoa(dpage_count); 11115ebe728dSRoger Pau Monné page_count -= dpage_count; 11125ebe728dSRoger Pau Monné goto alloc; 11135ebe728dSRoger Pau Monné } else if (pi < first_page && pe > (first_page + vm_page_array_size)) { 11145ebe728dSRoger Pau Monné /* 11155ebe728dSRoger Pau Monné * Trying to register a fictitious range that expands before 11165ebe728dSRoger Pau Monné * and after vm_page_array. 11175ebe728dSRoger Pau Monné */ 11185ebe728dSRoger Pau Monné return (EINVAL); 11195ebe728dSRoger Pau Monné } else { 11205ebe728dSRoger Pau Monné alloc: 1121b6de32bdSKonstantin Belousov #endif 1122b6de32bdSKonstantin Belousov fp = malloc(page_count * sizeof(struct vm_page), M_FICT_PAGES, 1123f93f7cf1SMark Johnston M_WAITOK); 11245ebe728dSRoger Pau Monné #ifdef VM_PHYSSEG_DENSE 1125b6de32bdSKonstantin Belousov } 11265ebe728dSRoger Pau Monné #endif 11275ebe728dSRoger Pau Monné vm_phys_fictitious_init_range(fp, start, page_count, memattr); 112838d6b2dcSRoger Pau Monné 112938d6b2dcSRoger Pau Monné seg = malloc(sizeof(*seg), M_FICT_PAGES, M_WAITOK | M_ZERO); 1130b6de32bdSKonstantin Belousov seg->start = start; 1131b6de32bdSKonstantin Belousov seg->end = end; 1132b6de32bdSKonstantin Belousov seg->first_page = fp; 113338d6b2dcSRoger Pau Monné 113438d6b2dcSRoger Pau Monné rw_wlock(&vm_phys_fictitious_reg_lock); 113538d6b2dcSRoger Pau Monné RB_INSERT(fict_tree, &vm_phys_fictitious_tree, seg); 113638d6b2dcSRoger Pau Monné rw_wunlock(&vm_phys_fictitious_reg_lock); 113738d6b2dcSRoger Pau Monné 1138b6de32bdSKonstantin Belousov return (0); 1139b6de32bdSKonstantin Belousov } 1140b6de32bdSKonstantin Belousov 1141b6de32bdSKonstantin Belousov void 1142b6de32bdSKonstantin Belousov vm_phys_fictitious_unreg_range(vm_paddr_t start, vm_paddr_t end) 1143b6de32bdSKonstantin Belousov { 114438d6b2dcSRoger Pau Monné struct vm_phys_fictitious_seg *seg, tmp; 1145b6de32bdSKonstantin Belousov #ifdef VM_PHYSSEG_DENSE 11465ebe728dSRoger Pau Monné long pi, pe; 1147b6de32bdSKonstantin Belousov #endif 1148b6de32bdSKonstantin Belousov 11495ebe728dSRoger Pau Monné KASSERT(start < end, 11505ebe728dSRoger Pau Monné ("Start of segment isn't less than end (start: %jx end: %jx)", 11515ebe728dSRoger Pau Monné (uintmax_t)start, (uintmax_t)end)); 11525ebe728dSRoger Pau Monné 1153b6de32bdSKonstantin Belousov #ifdef VM_PHYSSEG_DENSE 1154b6de32bdSKonstantin Belousov pi = atop(start); 11555ebe728dSRoger Pau Monné pe = atop(end); 11565ebe728dSRoger Pau Monné if (pi >= first_page && (pi - first_page) < vm_page_array_size) { 11575ebe728dSRoger Pau Monné if ((pe - first_page) <= vm_page_array_size) { 11585ebe728dSRoger Pau Monné /* 11595ebe728dSRoger Pau Monné * This segment was allocated using vm_page_array 11605ebe728dSRoger Pau Monné * only, there's nothing to do since those pages 11615ebe728dSRoger Pau Monné * were never added to the tree. 11625ebe728dSRoger Pau Monné */ 11635ebe728dSRoger Pau Monné return; 11645ebe728dSRoger Pau Monné } 11655ebe728dSRoger Pau Monné /* 11665ebe728dSRoger Pau Monné * We have a segment that starts inside 11675ebe728dSRoger Pau Monné * of vm_page_array, but ends outside of it. 11685ebe728dSRoger Pau Monné * 11695ebe728dSRoger Pau Monné * Calculate how many pages were added to the 11705ebe728dSRoger Pau Monné * tree and free them. 11715ebe728dSRoger Pau Monné */ 11725ebe728dSRoger Pau Monné start = ptoa(first_page + vm_page_array_size); 11735ebe728dSRoger Pau Monné } else if (pe > first_page && (pe - first_page) < vm_page_array_size) { 11745ebe728dSRoger Pau Monné /* 11755ebe728dSRoger Pau Monné * We have a segment that ends inside of vm_page_array, 11765ebe728dSRoger Pau Monné * but starts outside of it. 11775ebe728dSRoger Pau Monné */ 11785ebe728dSRoger Pau Monné end = ptoa(first_page); 11795ebe728dSRoger Pau Monné } else if (pi < first_page && pe > (first_page + vm_page_array_size)) { 11805ebe728dSRoger Pau Monné /* Since it's not possible to register such a range, panic. */ 11815ebe728dSRoger Pau Monné panic( 11825ebe728dSRoger Pau Monné "Unregistering not registered fictitious range [%#jx:%#jx]", 11835ebe728dSRoger Pau Monné (uintmax_t)start, (uintmax_t)end); 11845ebe728dSRoger Pau Monné } 1185b6de32bdSKonstantin Belousov #endif 118638d6b2dcSRoger Pau Monné tmp.start = start; 118738d6b2dcSRoger Pau Monné tmp.end = 0; 1188b6de32bdSKonstantin Belousov 118938d6b2dcSRoger Pau Monné rw_wlock(&vm_phys_fictitious_reg_lock); 119038d6b2dcSRoger Pau Monné seg = RB_FIND(fict_tree, &vm_phys_fictitious_tree, &tmp); 119138d6b2dcSRoger Pau Monné if (seg->start != start || seg->end != end) { 119238d6b2dcSRoger Pau Monné rw_wunlock(&vm_phys_fictitious_reg_lock); 119338d6b2dcSRoger Pau Monné panic( 119438d6b2dcSRoger Pau Monné "Unregistering not registered fictitious range [%#jx:%#jx]", 119538d6b2dcSRoger Pau Monné (uintmax_t)start, (uintmax_t)end); 119638d6b2dcSRoger Pau Monné } 119738d6b2dcSRoger Pau Monné RB_REMOVE(fict_tree, &vm_phys_fictitious_tree, seg); 119838d6b2dcSRoger Pau Monné rw_wunlock(&vm_phys_fictitious_reg_lock); 119938d6b2dcSRoger Pau Monné free(seg->first_page, M_FICT_PAGES); 120038d6b2dcSRoger Pau Monné free(seg, M_FICT_PAGES); 1201b6de32bdSKonstantin Belousov } 1202b6de32bdSKonstantin Belousov 120311752d88SAlan Cox /* 1204e3537f92SDoug Moore * Free a contiguous, power of two-sized set of physical pages. 1205*0078df5fSDoug Moore * The pool field in the first page determines the destination pool. 12068941dc44SAlan Cox * 12078941dc44SAlan Cox * The free page queues must be locked. 120811752d88SAlan Cox */ 120911752d88SAlan Cox void 1210*0078df5fSDoug Moore vm_phys_free_pages(vm_page_t m, int pool, int order) 121111752d88SAlan Cox { 121211752d88SAlan Cox struct vm_freelist *fl; 121311752d88SAlan Cox struct vm_phys_seg *seg; 12145c1f2cc4SAlan Cox vm_paddr_t pa; 121511752d88SAlan Cox vm_page_t m_buddy; 121611752d88SAlan Cox 121711752d88SAlan Cox KASSERT(m->order == VM_NFREEORDER, 1218*0078df5fSDoug Moore ("%s: page %p has unexpected order %d", 1219*0078df5fSDoug Moore __func__, m, m->order)); 1220*0078df5fSDoug Moore KASSERT(vm_phys_pool_valid(pool), 1221*0078df5fSDoug Moore ("%s: unexpected pool param %d", __func__, pool)); 122211752d88SAlan Cox KASSERT(order < VM_NFREEORDER, 1223*0078df5fSDoug Moore ("%s: order %d is out of range", __func__, order)); 122411752d88SAlan Cox seg = &vm_phys_segs[m->segind]; 1225e2068d0bSJeff Roberson vm_domain_free_assert_locked(VM_DOMAIN(seg->domain)); 12265c1f2cc4SAlan Cox if (order < VM_NFREEORDER - 1) { 12275c1f2cc4SAlan Cox pa = VM_PAGE_TO_PHYS(m); 12285c1f2cc4SAlan Cox do { 12295c1f2cc4SAlan Cox pa ^= ((vm_paddr_t)1 << (PAGE_SHIFT + order)); 12305c1f2cc4SAlan Cox if (pa < seg->start || pa >= seg->end) 123111752d88SAlan Cox break; 123269cbb187SMark Johnston m_buddy = vm_phys_seg_paddr_to_vm_page(seg, pa); 123311752d88SAlan Cox if (m_buddy->order != order) 123411752d88SAlan Cox break; 123511752d88SAlan Cox fl = (*seg->free_queues)[m_buddy->pool]; 12367e226537SAttilio Rao vm_freelist_rem(fl, m_buddy, order); 1237*0078df5fSDoug Moore vm_phys_finish_init(m_buddy, order); 123811752d88SAlan Cox order++; 12395c1f2cc4SAlan Cox pa &= ~(((vm_paddr_t)1 << (PAGE_SHIFT + order)) - 1); 124069cbb187SMark Johnston m = vm_phys_seg_paddr_to_vm_page(seg, pa); 12415c1f2cc4SAlan Cox } while (order < VM_NFREEORDER - 1); 124211752d88SAlan Cox } 1243*0078df5fSDoug Moore fl = (*seg->free_queues)[pool]; 1244*0078df5fSDoug Moore vm_freelist_add(fl, m, order, pool, 1); 124511752d88SAlan Cox } 124611752d88SAlan Cox 1247b16b4c22SMark Johnston #ifdef VM_FREEPOOL_LAZYINIT 1248b16b4c22SMark Johnston /* 1249b16b4c22SMark Johnston * Initialize all pages lingering in the lazy init pool of a NUMA domain, moving 1250b16b4c22SMark Johnston * them to the default pool. This is a prerequisite for some rare operations 1251b16b4c22SMark Johnston * which need to scan the page array and thus depend on all pages being 1252b16b4c22SMark Johnston * initialized. 1253b16b4c22SMark Johnston */ 1254b16b4c22SMark Johnston static void 1255b16b4c22SMark Johnston vm_phys_lazy_init_domain(int domain, bool locked) 1256b16b4c22SMark Johnston { 1257b16b4c22SMark Johnston static bool initdone[MAXMEMDOM]; 1258b16b4c22SMark Johnston struct vm_domain *vmd; 1259b16b4c22SMark Johnston struct vm_freelist *fl; 1260b16b4c22SMark Johnston vm_page_t m; 1261b16b4c22SMark Johnston int pind; 1262b16b4c22SMark Johnston bool unlocked; 1263b16b4c22SMark Johnston 1264b16b4c22SMark Johnston if (__predict_true(atomic_load_bool(&initdone[domain]))) 1265b16b4c22SMark Johnston return; 1266b16b4c22SMark Johnston 1267b16b4c22SMark Johnston vmd = VM_DOMAIN(domain); 1268b16b4c22SMark Johnston if (locked) 1269b16b4c22SMark Johnston vm_domain_free_assert_locked(vmd); 1270b16b4c22SMark Johnston else 1271b16b4c22SMark Johnston vm_domain_free_lock(vmd); 1272b16b4c22SMark Johnston if (atomic_load_bool(&initdone[domain])) 1273b16b4c22SMark Johnston goto out; 1274b16b4c22SMark Johnston pind = VM_FREEPOOL_LAZYINIT; 1275b16b4c22SMark Johnston for (int freelist = 0; freelist < VM_NFREELIST; freelist++) { 1276b16b4c22SMark Johnston int flind; 1277b16b4c22SMark Johnston 1278b16b4c22SMark Johnston flind = vm_freelist_to_flind[freelist]; 1279b16b4c22SMark Johnston if (flind < 0) 1280b16b4c22SMark Johnston continue; 1281b16b4c22SMark Johnston fl = vm_phys_free_queues[domain][flind][pind]; 1282b16b4c22SMark Johnston for (int oind = 0; oind < VM_NFREEORDER; oind++) { 1283b16b4c22SMark Johnston if (atomic_load_int(&fl[oind].lcnt) == 0) 1284b16b4c22SMark Johnston continue; 1285b16b4c22SMark Johnston while ((m = TAILQ_FIRST(&fl[oind].pl)) != NULL) { 1286b16b4c22SMark Johnston /* 1287b16b4c22SMark Johnston * Avoid holding the lock across the 1288b16b4c22SMark Johnston * initialization unless there's a free page 1289b16b4c22SMark Johnston * shortage. 1290b16b4c22SMark Johnston */ 1291b16b4c22SMark Johnston vm_freelist_rem(fl, m, oind); 1292b16b4c22SMark Johnston unlocked = vm_domain_allocate(vmd, 1293b16b4c22SMark Johnston VM_ALLOC_NORMAL, 1 << oind); 1294b16b4c22SMark Johnston if (unlocked) 1295b16b4c22SMark Johnston vm_domain_free_unlock(vmd); 1296*0078df5fSDoug Moore vm_phys_finish_init(m, oind); 1297b16b4c22SMark Johnston if (unlocked) { 1298b16b4c22SMark Johnston vm_domain_freecnt_inc(vmd, 1 << oind); 1299b16b4c22SMark Johnston vm_domain_free_lock(vmd); 1300b16b4c22SMark Johnston } 1301*0078df5fSDoug Moore vm_phys_free_pages(m, VM_FREEPOOL_DEFAULT, 1302*0078df5fSDoug Moore oind); 1303b16b4c22SMark Johnston } 1304b16b4c22SMark Johnston } 1305b16b4c22SMark Johnston } 1306b16b4c22SMark Johnston atomic_store_bool(&initdone[domain], true); 1307b16b4c22SMark Johnston out: 1308b16b4c22SMark Johnston if (!locked) 1309b16b4c22SMark Johnston vm_domain_free_unlock(vmd); 1310b16b4c22SMark Johnston } 1311b16b4c22SMark Johnston 1312b16b4c22SMark Johnston static void 1313b16b4c22SMark Johnston vm_phys_lazy_init(void) 1314b16b4c22SMark Johnston { 1315b16b4c22SMark Johnston for (int domain = 0; domain < vm_ndomains; domain++) 1316b16b4c22SMark Johnston vm_phys_lazy_init_domain(domain, false); 1317b16b4c22SMark Johnston atomic_store_int(&vm_default_freepool, VM_FREEPOOL_DEFAULT); 1318b16b4c22SMark Johnston } 1319b16b4c22SMark Johnston 1320b16b4c22SMark Johnston static void 1321b16b4c22SMark Johnston vm_phys_lazy_init_kthr(void *arg __unused) 1322b16b4c22SMark Johnston { 1323b16b4c22SMark Johnston vm_phys_lazy_init(); 1324b16b4c22SMark Johnston kthread_exit(); 1325b16b4c22SMark Johnston } 1326b16b4c22SMark Johnston 1327b16b4c22SMark Johnston static void 1328b16b4c22SMark Johnston vm_phys_lazy_sysinit(void *arg __unused) 1329b16b4c22SMark Johnston { 1330b16b4c22SMark Johnston struct thread *td; 1331b16b4c22SMark Johnston int error; 1332b16b4c22SMark Johnston 1333b16b4c22SMark Johnston error = kthread_add(vm_phys_lazy_init_kthr, NULL, curproc, &td, 1334b16b4c22SMark Johnston RFSTOPPED, 0, "vmlazyinit"); 1335b16b4c22SMark Johnston if (error == 0) { 1336b16b4c22SMark Johnston thread_lock(td); 1337b16b4c22SMark Johnston sched_prio(td, PRI_MIN_IDLE); 1338b16b4c22SMark Johnston sched_add(td, SRQ_BORING); 1339b16b4c22SMark Johnston } else { 1340b16b4c22SMark Johnston printf("%s: could not create lazy init thread: %d\n", 1341b16b4c22SMark Johnston __func__, error); 1342b16b4c22SMark Johnston vm_phys_lazy_init(); 1343b16b4c22SMark Johnston } 1344b16b4c22SMark Johnston } 1345b16b4c22SMark Johnston SYSINIT(vm_phys_lazy_init, SI_SUB_SMP, SI_ORDER_ANY, vm_phys_lazy_sysinit, 1346b16b4c22SMark Johnston NULL); 1347b16b4c22SMark Johnston #endif /* VM_FREEPOOL_LAZYINIT */ 1348b16b4c22SMark Johnston 134911752d88SAlan Cox /* 1350e3537f92SDoug Moore * Free a contiguous, arbitrarily sized set of physical pages, without 1351*0078df5fSDoug Moore * merging across set boundaries. Assumes no pages have a valid pool field. 1352b8590daeSDoug Moore * 1353b8590daeSDoug Moore * The free page queues must be locked. 1354b8590daeSDoug Moore */ 1355b8590daeSDoug Moore void 1356*0078df5fSDoug Moore vm_phys_enqueue_contig(vm_page_t m, int pool, u_long npages) 1357b8590daeSDoug Moore { 1358b8590daeSDoug Moore struct vm_freelist *fl; 1359b8590daeSDoug Moore struct vm_phys_seg *seg; 1360b8590daeSDoug Moore vm_page_t m_end; 1361c9b06fa5SDoug Moore vm_paddr_t diff, lo; 1362b8590daeSDoug Moore int order; 1363b8590daeSDoug Moore 1364b8590daeSDoug Moore /* 1365b8590daeSDoug Moore * Avoid unnecessary coalescing by freeing the pages in the largest 1366b8590daeSDoug Moore * possible power-of-two-sized subsets. 1367b8590daeSDoug Moore */ 1368b8590daeSDoug Moore vm_domain_free_assert_locked(vm_pagequeue_domain(m)); 1369b8590daeSDoug Moore seg = &vm_phys_segs[m->segind]; 1370*0078df5fSDoug Moore fl = (*seg->free_queues)[pool]; 1371b8590daeSDoug Moore m_end = m + npages; 1372b8590daeSDoug Moore /* Free blocks of increasing size. */ 13736dd15b7aSDoug Moore lo = atop(VM_PAGE_TO_PHYS(m)); 1374c9b06fa5SDoug Moore if (m < m_end && 1375c9b06fa5SDoug Moore (diff = lo ^ (lo + npages - 1)) != 0) { 1376543d55d7SDoug Moore order = min(ilog2(diff), VM_NFREEORDER - 1); 1377*0078df5fSDoug Moore m = vm_phys_enq_range(m, roundup2(lo, 1 << order) - lo, fl, 1378*0078df5fSDoug Moore pool, 1); 13795c1f2cc4SAlan Cox } 1380c9b06fa5SDoug Moore 1381b8590daeSDoug Moore /* Free blocks of maximum size. */ 1382c9b06fa5SDoug Moore order = VM_NFREEORDER - 1; 1383b8590daeSDoug Moore while (m + (1 << order) <= m_end) { 1384b8590daeSDoug Moore KASSERT(seg == &vm_phys_segs[m->segind], 1385b8590daeSDoug Moore ("%s: page range [%p,%p) spans multiple segments", 1386b8590daeSDoug Moore __func__, m_end - npages, m)); 1387*0078df5fSDoug Moore vm_phys_enq_chunk(fl, m, order, pool, 1); 1388b8590daeSDoug Moore m += 1 << order; 1389b8590daeSDoug Moore } 1390b8590daeSDoug Moore /* Free blocks of diminishing size. */ 1391*0078df5fSDoug Moore vm_phys_enq_beg(m, m_end - m, fl, pool, 1); 1392b8590daeSDoug Moore } 1393b8590daeSDoug Moore 1394b8590daeSDoug Moore /* 1395b8590daeSDoug Moore * Free a contiguous, arbitrarily sized set of physical pages. 1396*0078df5fSDoug Moore * Assumes that every page but the first has no valid pool field. 1397*0078df5fSDoug Moore * Uses the pool value in the first page if valid, otherwise default. 1398b8590daeSDoug Moore * 1399b8590daeSDoug Moore * The free page queues must be locked. 1400b8590daeSDoug Moore */ 1401b8590daeSDoug Moore void 1402*0078df5fSDoug Moore vm_phys_free_contig(vm_page_t m, int pool, u_long npages) 1403b8590daeSDoug Moore { 14046dd15b7aSDoug Moore vm_paddr_t lo; 1405b8590daeSDoug Moore vm_page_t m_start, m_end; 14066dd15b7aSDoug Moore unsigned max_order, order_start, order_end; 1407b8590daeSDoug Moore 1408b8590daeSDoug Moore vm_domain_free_assert_locked(vm_pagequeue_domain(m)); 1409b8590daeSDoug Moore 14106dd15b7aSDoug Moore lo = atop(VM_PAGE_TO_PHYS(m)); 1411543d55d7SDoug Moore max_order = min(ilog2(lo ^ (lo + npages)), VM_NFREEORDER - 1); 1412e3537f92SDoug Moore 1413e3537f92SDoug Moore m_start = m; 14146dd15b7aSDoug Moore order_start = ffsll(lo) - 1; 14156dd15b7aSDoug Moore if (order_start < max_order) 1416b8590daeSDoug Moore m_start += 1 << order_start; 1417e3537f92SDoug Moore m_end = m + npages; 14186dd15b7aSDoug Moore order_end = ffsll(lo + npages) - 1; 14196dd15b7aSDoug Moore if (order_end < max_order) 1420b8590daeSDoug Moore m_end -= 1 << order_end; 1421b8590daeSDoug Moore /* 1422b8590daeSDoug Moore * Avoid unnecessary coalescing by freeing the pages at the start and 1423b8590daeSDoug Moore * end of the range last. 1424b8590daeSDoug Moore */ 1425b8590daeSDoug Moore if (m_start < m_end) 1426*0078df5fSDoug Moore vm_phys_enqueue_contig(m_start, pool, m_end - m_start); 1427e3537f92SDoug Moore if (order_start < max_order) 1428*0078df5fSDoug Moore vm_phys_free_pages(m, pool, order_start); 1429e3537f92SDoug Moore if (order_end < max_order) 1430*0078df5fSDoug Moore vm_phys_free_pages(m_end, pool, order_end); 14315c1f2cc4SAlan Cox } 14325c1f2cc4SAlan Cox 14335c1f2cc4SAlan Cox /* 14349e817428SDoug Moore * Identify the first address range within segment segind or greater 14359e817428SDoug Moore * that matches the domain, lies within the low/high range, and has 14369e817428SDoug Moore * enough pages. Return -1 if there is none. 1437c869e672SAlan Cox */ 14389e817428SDoug Moore int 14399e817428SDoug Moore vm_phys_find_range(vm_page_t bounds[], int segind, int domain, 14409e817428SDoug Moore u_long npages, vm_paddr_t low, vm_paddr_t high) 1441c869e672SAlan Cox { 14429e817428SDoug Moore vm_paddr_t pa_end, pa_start; 14439e817428SDoug Moore struct vm_phys_seg *end_seg, *seg; 1444c869e672SAlan Cox 14459e817428SDoug Moore KASSERT(npages > 0, ("npages is zero")); 144658d42717SAlan Cox KASSERT(domain >= 0 && domain < vm_ndomains, ("domain out of range")); 14479e817428SDoug Moore end_seg = &vm_phys_segs[vm_phys_nsegs]; 14489e817428SDoug Moore for (seg = &vm_phys_segs[segind]; seg < end_seg; seg++) { 14493f289c3fSJeff Roberson if (seg->domain != domain) 14503f289c3fSJeff Roberson continue; 1451c869e672SAlan Cox if (seg->start >= high) 14529e817428SDoug Moore return (-1); 14539e817428SDoug Moore pa_start = MAX(low, seg->start); 14549e817428SDoug Moore pa_end = MIN(high, seg->end); 14559e817428SDoug Moore if (pa_end - pa_start < ptoa(npages)) 1456c869e672SAlan Cox continue; 1457b16b4c22SMark Johnston #ifdef VM_FREEPOOL_LAZYINIT 1458b16b4c22SMark Johnston /* 1459b16b4c22SMark Johnston * The pages on the free lists must be initialized. 1460b16b4c22SMark Johnston */ 1461b16b4c22SMark Johnston vm_phys_lazy_init_domain(domain, false); 1462b16b4c22SMark Johnston #endif 146369cbb187SMark Johnston bounds[0] = vm_phys_seg_paddr_to_vm_page(seg, pa_start); 1464fbff6d54SMark Johnston bounds[1] = &seg->first_page[atop(pa_end - seg->start)]; 14659e817428SDoug Moore return (seg - vm_phys_segs); 1466c869e672SAlan Cox } 14679e817428SDoug Moore return (-1); 1468c869e672SAlan Cox } 1469c869e672SAlan Cox 1470c869e672SAlan Cox /* 14719742373aSAlan Cox * Search for the given physical page "m" in the free lists. If the search 14726062d9faSMark Johnston * succeeds, remove "m" from the free lists and return true. Otherwise, return 14736062d9faSMark Johnston * false, indicating that "m" is not in the free lists. 14747bfda801SAlan Cox * 14757bfda801SAlan Cox * The free page queues must be locked. 14767bfda801SAlan Cox */ 14776062d9faSMark Johnston bool 1478b16b4c22SMark Johnston vm_phys_unfree_page(vm_paddr_t pa) 14797bfda801SAlan Cox { 14807bfda801SAlan Cox struct vm_freelist *fl; 14817bfda801SAlan Cox struct vm_phys_seg *seg; 1482b16b4c22SMark Johnston vm_paddr_t pa_half; 1483b16b4c22SMark Johnston vm_page_t m, m_set, m_tmp; 1484*0078df5fSDoug Moore int order, pool; 14857bfda801SAlan Cox 1486b16b4c22SMark Johnston seg = vm_phys_paddr_to_seg(pa); 1487b16b4c22SMark Johnston vm_domain_free_assert_locked(VM_DOMAIN(seg->domain)); 1488b16b4c22SMark Johnston 1489*0078df5fSDoug Moore #ifdef VM_FREEPOOL_LAZYINIT 1490b16b4c22SMark Johnston /* 1491b16b4c22SMark Johnston * The pages on the free lists must be initialized. 1492b16b4c22SMark Johnston */ 1493b16b4c22SMark Johnston vm_phys_lazy_init_domain(seg->domain, true); 1494b16b4c22SMark Johnston #endif 1495b16b4c22SMark Johnston 14967bfda801SAlan Cox /* 14977bfda801SAlan Cox * First, find the contiguous, power of two-sized set of free 14987bfda801SAlan Cox * physical pages containing the given physical page "m" and 14997bfda801SAlan Cox * assign it to "m_set". 15007bfda801SAlan Cox */ 1501b16b4c22SMark Johnston m = vm_phys_paddr_to_vm_page(pa); 15027bfda801SAlan Cox for (m_set = m, order = 0; m_set->order == VM_NFREEORDER && 1503bc8794a1SAlan Cox order < VM_NFREEORDER - 1; ) { 15047bfda801SAlan Cox order++; 15057bfda801SAlan Cox pa = m->phys_addr & (~(vm_paddr_t)0 << (PAGE_SHIFT + order)); 15062fbced65SAlan Cox if (pa >= seg->start) 150769cbb187SMark Johnston m_set = vm_phys_seg_paddr_to_vm_page(seg, pa); 1508e35395ceSAlan Cox else 15096062d9faSMark Johnston return (false); 15107bfda801SAlan Cox } 1511e35395ceSAlan Cox if (m_set->order < order) 15126062d9faSMark Johnston return (false); 1513e35395ceSAlan Cox if (m_set->order == VM_NFREEORDER) 15146062d9faSMark Johnston return (false); 15157bfda801SAlan Cox KASSERT(m_set->order < VM_NFREEORDER, 15167bfda801SAlan Cox ("vm_phys_unfree_page: page %p has unexpected order %d", 15177bfda801SAlan Cox m_set, m_set->order)); 15187bfda801SAlan Cox 15197bfda801SAlan Cox /* 15207bfda801SAlan Cox * Next, remove "m_set" from the free lists. Finally, extract 15217bfda801SAlan Cox * "m" from "m_set" using an iterative algorithm: While "m_set" 15227bfda801SAlan Cox * is larger than a page, shrink "m_set" by returning the half 15237bfda801SAlan Cox * of "m_set" that does not contain "m" to the free lists. 15247bfda801SAlan Cox */ 1525*0078df5fSDoug Moore pool = m_set->pool; 1526*0078df5fSDoug Moore fl = (*seg->free_queues)[pool]; 15277bfda801SAlan Cox order = m_set->order; 15287e226537SAttilio Rao vm_freelist_rem(fl, m_set, order); 15297bfda801SAlan Cox while (order > 0) { 15307bfda801SAlan Cox order--; 15317bfda801SAlan Cox pa_half = m_set->phys_addr ^ (1 << (PAGE_SHIFT + order)); 15327bfda801SAlan Cox if (m->phys_addr < pa_half) 153369cbb187SMark Johnston m_tmp = vm_phys_seg_paddr_to_vm_page(seg, pa_half); 15347bfda801SAlan Cox else { 15357bfda801SAlan Cox m_tmp = m_set; 153669cbb187SMark Johnston m_set = vm_phys_seg_paddr_to_vm_page(seg, pa_half); 15377bfda801SAlan Cox } 1538*0078df5fSDoug Moore vm_freelist_add(fl, m_tmp, order, pool, 0); 15397bfda801SAlan Cox } 15407bfda801SAlan Cox KASSERT(m_set == m, ("vm_phys_unfree_page: fatal inconsistency")); 15416062d9faSMark Johnston return (true); 15427bfda801SAlan Cox } 15437bfda801SAlan Cox 15447bfda801SAlan Cox /* 15452a4897bdSDoug Moore * Find a run of contiguous physical pages, meeting alignment requirements, from 15462a4897bdSDoug Moore * a list of max-sized page blocks, where we need at least two consecutive 15472a4897bdSDoug Moore * blocks to satisfy the (large) page request. 1548fa8a6585SDoug Moore */ 1549fa8a6585SDoug Moore static vm_page_t 15502a4897bdSDoug Moore vm_phys_find_freelist_contig(struct vm_freelist *fl, u_long npages, 1551fa8a6585SDoug Moore vm_paddr_t low, vm_paddr_t high, u_long alignment, vm_paddr_t boundary) 1552fa8a6585SDoug Moore { 1553fa8a6585SDoug Moore struct vm_phys_seg *seg; 15542a4897bdSDoug Moore vm_page_t m, m_iter, m_ret; 15552a4897bdSDoug Moore vm_paddr_t max_size, size; 15562a4897bdSDoug Moore int max_order; 1557fa8a6585SDoug Moore 15582a4897bdSDoug Moore max_order = VM_NFREEORDER - 1; 1559fa8a6585SDoug Moore size = npages << PAGE_SHIFT; 15602a4897bdSDoug Moore max_size = (vm_paddr_t)1 << (PAGE_SHIFT + max_order); 15612a4897bdSDoug Moore KASSERT(size > max_size, ("size is too small")); 15622a4897bdSDoug Moore 1563fa8a6585SDoug Moore /* 15642a4897bdSDoug Moore * In order to avoid examining any free max-sized page block more than 15652a4897bdSDoug Moore * twice, identify the ones that are first in a physically-contiguous 15662a4897bdSDoug Moore * sequence of such blocks, and only for those walk the sequence to 15672a4897bdSDoug Moore * check if there are enough free blocks starting at a properly aligned 15682a4897bdSDoug Moore * block. Thus, no block is checked for free-ness more than twice. 1569fa8a6585SDoug Moore */ 15702a4897bdSDoug Moore TAILQ_FOREACH(m, &fl[max_order].pl, listq) { 15712a4897bdSDoug Moore /* 15722a4897bdSDoug Moore * Skip m unless it is first in a sequence of free max page 15732a4897bdSDoug Moore * blocks >= low in its segment. 15742a4897bdSDoug Moore */ 15752a4897bdSDoug Moore seg = &vm_phys_segs[m->segind]; 15762a4897bdSDoug Moore if (VM_PAGE_TO_PHYS(m) < MAX(low, seg->start)) 15772a4897bdSDoug Moore continue; 15782a4897bdSDoug Moore if (VM_PAGE_TO_PHYS(m) >= max_size && 15792a4897bdSDoug Moore VM_PAGE_TO_PHYS(m) - max_size >= MAX(low, seg->start) && 15802a4897bdSDoug Moore max_order == m[-1 << max_order].order) 1581fa8a6585SDoug Moore continue; 1582fa8a6585SDoug Moore 1583fa8a6585SDoug Moore /* 15842a4897bdSDoug Moore * Advance m_ret from m to the first of the sequence, if any, 15852a4897bdSDoug Moore * that satisfies alignment conditions and might leave enough 15862a4897bdSDoug Moore * space. 1587fa8a6585SDoug Moore */ 15882a4897bdSDoug Moore m_ret = m; 15892a4897bdSDoug Moore while (!vm_addr_ok(VM_PAGE_TO_PHYS(m_ret), 15902a4897bdSDoug Moore size, alignment, boundary) && 15912a4897bdSDoug Moore VM_PAGE_TO_PHYS(m_ret) + size <= MIN(high, seg->end) && 15922a4897bdSDoug Moore max_order == m_ret[1 << max_order].order) 15932a4897bdSDoug Moore m_ret += 1 << max_order; 15942a4897bdSDoug Moore 15952a4897bdSDoug Moore /* 15962a4897bdSDoug Moore * Skip m unless some block m_ret in the sequence is properly 15972a4897bdSDoug Moore * aligned, and begins a sequence of enough pages less than 15982a4897bdSDoug Moore * high, and in the same segment. 15992a4897bdSDoug Moore */ 16002a4897bdSDoug Moore if (VM_PAGE_TO_PHYS(m_ret) + size > MIN(high, seg->end)) 1601fa8a6585SDoug Moore continue; 1602fa8a6585SDoug Moore 1603fa8a6585SDoug Moore /* 16042a4897bdSDoug Moore * Skip m unless the blocks to allocate starting at m_ret are 16052a4897bdSDoug Moore * all free. 1606fa8a6585SDoug Moore */ 16072a4897bdSDoug Moore for (m_iter = m_ret; 16082a4897bdSDoug Moore m_iter < m_ret + npages && max_order == m_iter->order; 16092a4897bdSDoug Moore m_iter += 1 << max_order) { 1610fa8a6585SDoug Moore } 16112a4897bdSDoug Moore if (m_iter < m_ret + npages) 1612fa8a6585SDoug Moore continue; 1613fa8a6585SDoug Moore return (m_ret); 1614fa8a6585SDoug Moore } 1615fa8a6585SDoug Moore return (NULL); 1616fa8a6585SDoug Moore } 1617fa8a6585SDoug Moore 1618fa8a6585SDoug Moore /* 1619fa8a6585SDoug Moore * Find a run of contiguous physical pages from the specified free list 1620342056faSDoug Moore * table. 1621c869e672SAlan Cox */ 1622c869e672SAlan Cox static vm_page_t 1623fa8a6585SDoug Moore vm_phys_find_queues_contig( 1624342056faSDoug Moore struct vm_freelist (*queues)[VM_NFREEPOOL][VM_NFREEORDER_MAX], 1625342056faSDoug Moore u_long npages, vm_paddr_t low, vm_paddr_t high, 1626342056faSDoug Moore u_long alignment, vm_paddr_t boundary) 1627c869e672SAlan Cox { 1628c869e672SAlan Cox struct vm_freelist *fl; 1629fa8a6585SDoug Moore vm_page_t m_ret; 1630c869e672SAlan Cox vm_paddr_t pa, pa_end, size; 1631c869e672SAlan Cox int oind, order, pind; 1632c869e672SAlan Cox 1633c869e672SAlan Cox KASSERT(npages > 0, ("npages is 0")); 1634c869e672SAlan Cox KASSERT(powerof2(alignment), ("alignment is not a power of 2")); 1635c869e672SAlan Cox KASSERT(powerof2(boundary), ("boundary is not a power of 2")); 1636c869e672SAlan Cox /* Compute the queue that is the best fit for npages. */ 16379161b4deSAlan Cox order = flsl(npages - 1); 1638fa8a6585SDoug Moore /* Search for a large enough free block. */ 1639c869e672SAlan Cox size = npages << PAGE_SHIFT; 1640fa8a6585SDoug Moore for (oind = order; oind < VM_NFREEORDER; oind++) { 1641b16b4c22SMark Johnston for (pind = vm_default_freepool; pind < VM_NFREEPOOL; pind++) { 1642342056faSDoug Moore fl = (*queues)[pind]; 16435cd29d0fSMark Johnston TAILQ_FOREACH(m_ret, &fl[oind].pl, listq) { 1644c869e672SAlan Cox /* 1645da92ecbcSDoug Moore * Determine if the address range starting at pa 1646da92ecbcSDoug Moore * is within the given range, satisfies the 1647da92ecbcSDoug Moore * given alignment, and does not cross the given 1648da92ecbcSDoug Moore * boundary. 164911752d88SAlan Cox */ 1650da92ecbcSDoug Moore pa = VM_PAGE_TO_PHYS(m_ret); 1651da92ecbcSDoug Moore pa_end = pa + size; 1652fa8a6585SDoug Moore if (low <= pa && pa_end <= high && 1653fa8a6585SDoug Moore vm_addr_ok(pa, size, alignment, boundary)) 1654fa8a6585SDoug Moore return (m_ret); 1655fa8a6585SDoug Moore } 1656fa8a6585SDoug Moore } 1657fa8a6585SDoug Moore } 1658da92ecbcSDoug Moore if (order < VM_NFREEORDER) 1659fa8a6585SDoug Moore return (NULL); 16602a4897bdSDoug Moore /* Search for a long-enough sequence of max-order blocks. */ 1661b16b4c22SMark Johnston for (pind = vm_default_freepool; pind < VM_NFREEPOOL; pind++) { 1662fa8a6585SDoug Moore fl = (*queues)[pind]; 16632a4897bdSDoug Moore m_ret = vm_phys_find_freelist_contig(fl, npages, 1664fa8a6585SDoug Moore low, high, alignment, boundary); 1665fa8a6585SDoug Moore if (m_ret != NULL) 1666fa8a6585SDoug Moore return (m_ret); 166711752d88SAlan Cox } 166811752d88SAlan Cox return (NULL); 166911752d88SAlan Cox } 167011752d88SAlan Cox 1671b7565d44SJeff Roberson /* 1672342056faSDoug Moore * Allocate a contiguous set of physical pages of the given size 1673342056faSDoug Moore * "npages" from the free lists. All of the physical pages must be at 1674342056faSDoug Moore * or above the given physical address "low" and below the given 1675342056faSDoug Moore * physical address "high". The given value "alignment" determines the 1676342056faSDoug Moore * alignment of the first physical page in the set. If the given value 1677342056faSDoug Moore * "boundary" is non-zero, then the set of physical pages cannot cross 1678342056faSDoug Moore * any physical address boundary that is a multiple of that value. Both 1679*0078df5fSDoug Moore * "alignment" and "boundary" must be a power of two. Sets the pool 1680*0078df5fSDoug Moore * field to DEFAULT in the first allocated page. 1681342056faSDoug Moore */ 1682342056faSDoug Moore vm_page_t 1683342056faSDoug Moore vm_phys_alloc_contig(int domain, u_long npages, vm_paddr_t low, vm_paddr_t high, 1684342056faSDoug Moore u_long alignment, vm_paddr_t boundary) 1685342056faSDoug Moore { 1686342056faSDoug Moore vm_paddr_t pa_end, pa_start; 1687fa8a6585SDoug Moore struct vm_freelist *fl; 1688fa8a6585SDoug Moore vm_page_t m, m_run; 1689342056faSDoug Moore struct vm_phys_seg *seg; 1690342056faSDoug Moore struct vm_freelist (*queues)[VM_NFREEPOOL][VM_NFREEORDER_MAX]; 1691fa8a6585SDoug Moore int oind, segind; 1692342056faSDoug Moore 1693342056faSDoug Moore KASSERT(npages > 0, ("npages is 0")); 1694342056faSDoug Moore KASSERT(powerof2(alignment), ("alignment is not a power of 2")); 1695342056faSDoug Moore KASSERT(powerof2(boundary), ("boundary is not a power of 2")); 1696342056faSDoug Moore vm_domain_free_assert_locked(VM_DOMAIN(domain)); 1697342056faSDoug Moore if (low >= high) 1698342056faSDoug Moore return (NULL); 1699342056faSDoug Moore queues = NULL; 1700342056faSDoug Moore m_run = NULL; 1701342056faSDoug Moore for (segind = vm_phys_nsegs - 1; segind >= 0; segind--) { 1702342056faSDoug Moore seg = &vm_phys_segs[segind]; 1703342056faSDoug Moore if (seg->start >= high || seg->domain != domain) 1704342056faSDoug Moore continue; 1705342056faSDoug Moore if (low >= seg->end) 1706342056faSDoug Moore break; 1707342056faSDoug Moore if (low <= seg->start) 1708342056faSDoug Moore pa_start = seg->start; 1709342056faSDoug Moore else 1710342056faSDoug Moore pa_start = low; 1711342056faSDoug Moore if (high < seg->end) 1712342056faSDoug Moore pa_end = high; 1713342056faSDoug Moore else 1714342056faSDoug Moore pa_end = seg->end; 1715342056faSDoug Moore if (pa_end - pa_start < ptoa(npages)) 1716342056faSDoug Moore continue; 1717342056faSDoug Moore /* 1718342056faSDoug Moore * If a previous segment led to a search using 1719342056faSDoug Moore * the same free lists as would this segment, then 1720342056faSDoug Moore * we've actually already searched within this 1721342056faSDoug Moore * too. So skip it. 1722342056faSDoug Moore */ 1723342056faSDoug Moore if (seg->free_queues == queues) 1724342056faSDoug Moore continue; 1725342056faSDoug Moore queues = seg->free_queues; 1726fa8a6585SDoug Moore m_run = vm_phys_find_queues_contig(queues, npages, 1727342056faSDoug Moore low, high, alignment, boundary); 1728342056faSDoug Moore if (m_run != NULL) 1729342056faSDoug Moore break; 1730342056faSDoug Moore } 1731fa8a6585SDoug Moore if (m_run == NULL) 1732fa8a6585SDoug Moore return (NULL); 1733fa8a6585SDoug Moore 1734fa8a6585SDoug Moore /* Allocate pages from the page-range found. */ 1735fa8a6585SDoug Moore for (m = m_run; m < &m_run[npages]; m = &m[1 << oind]) { 1736fa8a6585SDoug Moore fl = (*queues)[m->pool]; 1737fa8a6585SDoug Moore oind = m->order; 1738fa8a6585SDoug Moore vm_freelist_rem(fl, m, oind); 1739*0078df5fSDoug Moore vm_phys_finish_init(m, oind); 1740fa8a6585SDoug Moore } 1741fa8a6585SDoug Moore /* Return excess pages to the free lists. */ 1742fa8a6585SDoug Moore fl = (*queues)[VM_FREEPOOL_DEFAULT]; 1743*0078df5fSDoug Moore vm_phys_enq_range(&m_run[npages], m - &m_run[npages], fl, 1744*0078df5fSDoug Moore VM_FREEPOOL_DEFAULT, 0); 17452a4897bdSDoug Moore 17462a4897bdSDoug Moore /* Return page verified to satisfy conditions of request. */ 17472a4897bdSDoug Moore pa_start = VM_PAGE_TO_PHYS(m_run); 17482a4897bdSDoug Moore KASSERT(low <= pa_start, 17492a4897bdSDoug Moore ("memory allocated below minimum requested range")); 17502a4897bdSDoug Moore KASSERT(pa_start + ptoa(npages) <= high, 17512a4897bdSDoug Moore ("memory allocated above maximum requested range")); 17522a4897bdSDoug Moore seg = &vm_phys_segs[m_run->segind]; 17532a4897bdSDoug Moore KASSERT(seg->domain == domain, 17542a4897bdSDoug Moore ("memory not allocated from specified domain")); 17552a4897bdSDoug Moore KASSERT(vm_addr_ok(pa_start, ptoa(npages), alignment, boundary), 17562a4897bdSDoug Moore ("memory alignment/boundary constraints not satisfied")); 1757342056faSDoug Moore return (m_run); 1758342056faSDoug Moore } 1759342056faSDoug Moore 1760342056faSDoug Moore /* 1761b7565d44SJeff Roberson * Return the index of the first unused slot which may be the terminating 1762b7565d44SJeff Roberson * entry. 1763b7565d44SJeff Roberson */ 1764b7565d44SJeff Roberson static int 1765b7565d44SJeff Roberson vm_phys_avail_count(void) 1766b7565d44SJeff Roberson { 1767b7565d44SJeff Roberson int i; 1768b7565d44SJeff Roberson 1769b7565d44SJeff Roberson for (i = 0; phys_avail[i + 1]; i += 2) 1770b7565d44SJeff Roberson continue; 1771b7565d44SJeff Roberson if (i > PHYS_AVAIL_ENTRIES) 1772b7565d44SJeff Roberson panic("Improperly terminated phys_avail %d entries", i); 1773b7565d44SJeff Roberson 1774b7565d44SJeff Roberson return (i); 1775b7565d44SJeff Roberson } 1776b7565d44SJeff Roberson 1777b7565d44SJeff Roberson /* 1778b7565d44SJeff Roberson * Assert that a phys_avail entry is valid. 1779b7565d44SJeff Roberson */ 1780b7565d44SJeff Roberson static void 1781b7565d44SJeff Roberson vm_phys_avail_check(int i) 1782b7565d44SJeff Roberson { 1783b7565d44SJeff Roberson if (phys_avail[i] & PAGE_MASK) 1784b7565d44SJeff Roberson panic("Unaligned phys_avail[%d]: %#jx", i, 1785b7565d44SJeff Roberson (intmax_t)phys_avail[i]); 1786b7565d44SJeff Roberson if (phys_avail[i+1] & PAGE_MASK) 1787b7565d44SJeff Roberson panic("Unaligned phys_avail[%d + 1]: %#jx", i, 1788b7565d44SJeff Roberson (intmax_t)phys_avail[i]); 1789b7565d44SJeff Roberson if (phys_avail[i + 1] < phys_avail[i]) 1790b7565d44SJeff Roberson panic("phys_avail[%d] start %#jx < end %#jx", i, 1791b7565d44SJeff Roberson (intmax_t)phys_avail[i], (intmax_t)phys_avail[i+1]); 1792b7565d44SJeff Roberson } 1793b7565d44SJeff Roberson 1794b7565d44SJeff Roberson /* 1795b7565d44SJeff Roberson * Return the index of an overlapping phys_avail entry or -1. 1796b7565d44SJeff Roberson */ 1797be3f5f29SJeff Roberson #ifdef NUMA 1798b7565d44SJeff Roberson static int 1799b7565d44SJeff Roberson vm_phys_avail_find(vm_paddr_t pa) 1800b7565d44SJeff Roberson { 1801b7565d44SJeff Roberson int i; 1802b7565d44SJeff Roberson 1803b7565d44SJeff Roberson for (i = 0; phys_avail[i + 1]; i += 2) 1804b7565d44SJeff Roberson if (phys_avail[i] <= pa && phys_avail[i + 1] > pa) 1805b7565d44SJeff Roberson return (i); 1806b7565d44SJeff Roberson return (-1); 1807b7565d44SJeff Roberson } 1808be3f5f29SJeff Roberson #endif 1809b7565d44SJeff Roberson 1810b7565d44SJeff Roberson /* 1811b7565d44SJeff Roberson * Return the index of the largest entry. 1812b7565d44SJeff Roberson */ 1813b7565d44SJeff Roberson int 1814b7565d44SJeff Roberson vm_phys_avail_largest(void) 1815b7565d44SJeff Roberson { 1816b7565d44SJeff Roberson vm_paddr_t sz, largesz; 1817b7565d44SJeff Roberson int largest; 1818b7565d44SJeff Roberson int i; 1819b7565d44SJeff Roberson 1820b7565d44SJeff Roberson largest = 0; 1821b7565d44SJeff Roberson largesz = 0; 1822b7565d44SJeff Roberson for (i = 0; phys_avail[i + 1]; i += 2) { 1823b7565d44SJeff Roberson sz = vm_phys_avail_size(i); 1824b7565d44SJeff Roberson if (sz > largesz) { 1825b7565d44SJeff Roberson largesz = sz; 1826b7565d44SJeff Roberson largest = i; 1827b7565d44SJeff Roberson } 1828b7565d44SJeff Roberson } 1829b7565d44SJeff Roberson 1830b7565d44SJeff Roberson return (largest); 1831b7565d44SJeff Roberson } 1832b7565d44SJeff Roberson 1833b7565d44SJeff Roberson vm_paddr_t 1834b7565d44SJeff Roberson vm_phys_avail_size(int i) 1835b7565d44SJeff Roberson { 1836b7565d44SJeff Roberson 1837b7565d44SJeff Roberson return (phys_avail[i + 1] - phys_avail[i]); 1838b7565d44SJeff Roberson } 1839b7565d44SJeff Roberson 1840b7565d44SJeff Roberson /* 1841b7565d44SJeff Roberson * Split an entry at the address 'pa'. Return zero on success or errno. 1842b7565d44SJeff Roberson */ 1843b7565d44SJeff Roberson static int 1844b7565d44SJeff Roberson vm_phys_avail_split(vm_paddr_t pa, int i) 1845b7565d44SJeff Roberson { 1846b7565d44SJeff Roberson int cnt; 1847b7565d44SJeff Roberson 1848b7565d44SJeff Roberson vm_phys_avail_check(i); 1849b7565d44SJeff Roberson if (pa <= phys_avail[i] || pa >= phys_avail[i + 1]) 1850b7565d44SJeff Roberson panic("vm_phys_avail_split: invalid address"); 1851b7565d44SJeff Roberson cnt = vm_phys_avail_count(); 1852b7565d44SJeff Roberson if (cnt >= PHYS_AVAIL_ENTRIES) 1853b7565d44SJeff Roberson return (ENOSPC); 1854b7565d44SJeff Roberson memmove(&phys_avail[i + 2], &phys_avail[i], 1855b7565d44SJeff Roberson (cnt - i) * sizeof(phys_avail[0])); 1856b7565d44SJeff Roberson phys_avail[i + 1] = pa; 1857b7565d44SJeff Roberson phys_avail[i + 2] = pa; 1858b7565d44SJeff Roberson vm_phys_avail_check(i); 1859b7565d44SJeff Roberson vm_phys_avail_check(i+2); 1860b7565d44SJeff Roberson 1861b7565d44SJeff Roberson return (0); 1862b7565d44SJeff Roberson } 1863b7565d44SJeff Roberson 186431991a5aSMitchell Horne /* 186531991a5aSMitchell Horne * Check if a given physical address can be included as part of a crash dump. 186631991a5aSMitchell Horne */ 186731991a5aSMitchell Horne bool 186831991a5aSMitchell Horne vm_phys_is_dumpable(vm_paddr_t pa) 186931991a5aSMitchell Horne { 187031991a5aSMitchell Horne vm_page_t m; 187131991a5aSMitchell Horne int i; 187231991a5aSMitchell Horne 187331991a5aSMitchell Horne if ((m = vm_phys_paddr_to_vm_page(pa)) != NULL) 187431991a5aSMitchell Horne return ((m->flags & PG_NODUMP) == 0); 187531991a5aSMitchell Horne 187631991a5aSMitchell Horne for (i = 0; dump_avail[i] != 0 || dump_avail[i + 1] != 0; i += 2) { 187731991a5aSMitchell Horne if (pa >= dump_avail[i] && pa < dump_avail[i + 1]) 187831991a5aSMitchell Horne return (true); 187931991a5aSMitchell Horne } 188031991a5aSMitchell Horne return (false); 188131991a5aSMitchell Horne } 188231991a5aSMitchell Horne 188381302f1dSMark Johnston void 188481302f1dSMark Johnston vm_phys_early_add_seg(vm_paddr_t start, vm_paddr_t end) 188581302f1dSMark Johnston { 188681302f1dSMark Johnston struct vm_phys_seg *seg; 188781302f1dSMark Johnston 188881302f1dSMark Johnston if (vm_phys_early_nsegs == -1) 188981302f1dSMark Johnston panic("%s: called after initialization", __func__); 189081302f1dSMark Johnston if (vm_phys_early_nsegs == nitems(vm_phys_early_segs)) 189181302f1dSMark Johnston panic("%s: ran out of early segments", __func__); 189281302f1dSMark Johnston 189381302f1dSMark Johnston seg = &vm_phys_early_segs[vm_phys_early_nsegs++]; 189481302f1dSMark Johnston seg->start = start; 189581302f1dSMark Johnston seg->end = end; 189681302f1dSMark Johnston } 189781302f1dSMark Johnston 1898b7565d44SJeff Roberson /* 1899b7565d44SJeff Roberson * This routine allocates NUMA node specific memory before the page 1900b7565d44SJeff Roberson * allocator is bootstrapped. 1901b7565d44SJeff Roberson */ 1902b7565d44SJeff Roberson vm_paddr_t 1903b7565d44SJeff Roberson vm_phys_early_alloc(int domain, size_t alloc_size) 1904b7565d44SJeff Roberson { 19052e7838aeSJohn Baldwin #ifdef NUMA 19062e7838aeSJohn Baldwin int mem_index; 19072e7838aeSJohn Baldwin #endif 19082e7838aeSJohn Baldwin int i, biggestone; 1909b7565d44SJeff Roberson vm_paddr_t pa, mem_start, mem_end, size, biggestsize, align; 1910b7565d44SJeff Roberson 191181302f1dSMark Johnston KASSERT(domain == -1 || (domain >= 0 && domain < vm_ndomains), 191281302f1dSMark Johnston ("%s: invalid domain index %d", __func__, domain)); 1913b7565d44SJeff Roberson 1914b7565d44SJeff Roberson /* 1915b7565d44SJeff Roberson * Search the mem_affinity array for the biggest address 1916b7565d44SJeff Roberson * range in the desired domain. This is used to constrain 1917b7565d44SJeff Roberson * the phys_avail selection below. 1918b7565d44SJeff Roberson */ 1919b7565d44SJeff Roberson biggestsize = 0; 1920b7565d44SJeff Roberson mem_start = 0; 1921b7565d44SJeff Roberson mem_end = -1; 1922b7565d44SJeff Roberson #ifdef NUMA 19232e7838aeSJohn Baldwin mem_index = 0; 1924b7565d44SJeff Roberson if (mem_affinity != NULL) { 1925b7565d44SJeff Roberson for (i = 0;; i++) { 1926b7565d44SJeff Roberson size = mem_affinity[i].end - mem_affinity[i].start; 1927b7565d44SJeff Roberson if (size == 0) 1928b7565d44SJeff Roberson break; 192981302f1dSMark Johnston if (domain != -1 && mem_affinity[i].domain != domain) 1930b7565d44SJeff Roberson continue; 1931b7565d44SJeff Roberson if (size > biggestsize) { 1932b7565d44SJeff Roberson mem_index = i; 1933b7565d44SJeff Roberson biggestsize = size; 1934b7565d44SJeff Roberson } 1935b7565d44SJeff Roberson } 1936b7565d44SJeff Roberson mem_start = mem_affinity[mem_index].start; 1937b7565d44SJeff Roberson mem_end = mem_affinity[mem_index].end; 1938b7565d44SJeff Roberson } 1939b7565d44SJeff Roberson #endif 1940b7565d44SJeff Roberson 1941b7565d44SJeff Roberson /* 1942b7565d44SJeff Roberson * Now find biggest physical segment in within the desired 1943b7565d44SJeff Roberson * numa domain. 1944b7565d44SJeff Roberson */ 1945b7565d44SJeff Roberson biggestsize = 0; 1946b7565d44SJeff Roberson biggestone = 0; 1947b7565d44SJeff Roberson for (i = 0; phys_avail[i + 1] != 0; i += 2) { 1948b7565d44SJeff Roberson /* skip regions that are out of range */ 1949b7565d44SJeff Roberson if (phys_avail[i+1] - alloc_size < mem_start || 1950b7565d44SJeff Roberson phys_avail[i+1] > mem_end) 1951b7565d44SJeff Roberson continue; 1952b7565d44SJeff Roberson size = vm_phys_avail_size(i); 1953b7565d44SJeff Roberson if (size > biggestsize) { 1954b7565d44SJeff Roberson biggestone = i; 1955b7565d44SJeff Roberson biggestsize = size; 1956b7565d44SJeff Roberson } 1957b7565d44SJeff Roberson } 1958b7565d44SJeff Roberson alloc_size = round_page(alloc_size); 1959b7565d44SJeff Roberson 1960b7565d44SJeff Roberson /* 1961b7565d44SJeff Roberson * Grab single pages from the front to reduce fragmentation. 1962b7565d44SJeff Roberson */ 1963b7565d44SJeff Roberson if (alloc_size == PAGE_SIZE) { 1964b7565d44SJeff Roberson pa = phys_avail[biggestone]; 1965b7565d44SJeff Roberson phys_avail[biggestone] += PAGE_SIZE; 1966b7565d44SJeff Roberson vm_phys_avail_check(biggestone); 1967b7565d44SJeff Roberson return (pa); 1968b7565d44SJeff Roberson } 1969b7565d44SJeff Roberson 1970b7565d44SJeff Roberson /* 1971b7565d44SJeff Roberson * Naturally align large allocations. 1972b7565d44SJeff Roberson */ 1973b7565d44SJeff Roberson align = phys_avail[biggestone + 1] & (alloc_size - 1); 1974b7565d44SJeff Roberson if (alloc_size + align > biggestsize) 1975b7565d44SJeff Roberson panic("cannot find a large enough size\n"); 1976b7565d44SJeff Roberson if (align != 0 && 1977b7565d44SJeff Roberson vm_phys_avail_split(phys_avail[biggestone + 1] - align, 1978b7565d44SJeff Roberson biggestone) != 0) 1979b7565d44SJeff Roberson /* Wasting memory. */ 1980b7565d44SJeff Roberson phys_avail[biggestone + 1] -= align; 1981b7565d44SJeff Roberson 1982b7565d44SJeff Roberson phys_avail[biggestone + 1] -= alloc_size; 1983b7565d44SJeff Roberson vm_phys_avail_check(biggestone); 1984b7565d44SJeff Roberson pa = phys_avail[biggestone + 1]; 1985b7565d44SJeff Roberson return (pa); 1986b7565d44SJeff Roberson } 1987b7565d44SJeff Roberson 1988b7565d44SJeff Roberson void 1989b7565d44SJeff Roberson vm_phys_early_startup(void) 1990b7565d44SJeff Roberson { 199181302f1dSMark Johnston struct vm_phys_seg *seg; 1992b7565d44SJeff Roberson int i; 1993b7565d44SJeff Roberson 1994b7565d44SJeff Roberson for (i = 0; phys_avail[i + 1] != 0; i += 2) { 1995b7565d44SJeff Roberson phys_avail[i] = round_page(phys_avail[i]); 1996b7565d44SJeff Roberson phys_avail[i + 1] = trunc_page(phys_avail[i + 1]); 1997b7565d44SJeff Roberson } 1998b7565d44SJeff Roberson 199981302f1dSMark Johnston for (i = 0; i < vm_phys_early_nsegs; i++) { 200081302f1dSMark Johnston seg = &vm_phys_early_segs[i]; 200181302f1dSMark Johnston vm_phys_add_seg(seg->start, seg->end); 200281302f1dSMark Johnston } 200381302f1dSMark Johnston vm_phys_early_nsegs = -1; 200481302f1dSMark Johnston 2005b7565d44SJeff Roberson #ifdef NUMA 2006b7565d44SJeff Roberson /* Force phys_avail to be split by domain. */ 2007b7565d44SJeff Roberson if (mem_affinity != NULL) { 2008b7565d44SJeff Roberson int idx; 2009b7565d44SJeff Roberson 2010b7565d44SJeff Roberson for (i = 0; mem_affinity[i].end != 0; i++) { 2011b7565d44SJeff Roberson idx = vm_phys_avail_find(mem_affinity[i].start); 2012b7565d44SJeff Roberson if (idx != -1 && 2013b7565d44SJeff Roberson phys_avail[idx] != mem_affinity[i].start) 2014b7565d44SJeff Roberson vm_phys_avail_split(mem_affinity[i].start, idx); 2015b7565d44SJeff Roberson idx = vm_phys_avail_find(mem_affinity[i].end); 2016b7565d44SJeff Roberson if (idx != -1 && 2017b7565d44SJeff Roberson phys_avail[idx] != mem_affinity[i].end) 2018b7565d44SJeff Roberson vm_phys_avail_split(mem_affinity[i].end, idx); 2019b7565d44SJeff Roberson } 2020b7565d44SJeff Roberson } 2021b7565d44SJeff Roberson #endif 2022b7565d44SJeff Roberson } 2023b7565d44SJeff Roberson 202411752d88SAlan Cox #ifdef DDB 202511752d88SAlan Cox /* 202611752d88SAlan Cox * Show the number of physical pages in each of the free lists. 202711752d88SAlan Cox */ 2028c84c5e00SMitchell Horne DB_SHOW_COMMAND_FLAGS(freepages, db_show_freepages, DB_CMD_MEMSAFE) 202911752d88SAlan Cox { 203011752d88SAlan Cox struct vm_freelist *fl; 20317e226537SAttilio Rao int flind, oind, pind, dom; 203211752d88SAlan Cox 20337e226537SAttilio Rao for (dom = 0; dom < vm_ndomains; dom++) { 20347e226537SAttilio Rao db_printf("DOMAIN: %d\n", dom); 203511752d88SAlan Cox for (flind = 0; flind < vm_nfreelists; flind++) { 203611752d88SAlan Cox db_printf("FREE LIST %d:\n" 203711752d88SAlan Cox "\n ORDER (SIZE) | NUMBER" 203811752d88SAlan Cox "\n ", flind); 203911752d88SAlan Cox for (pind = 0; pind < VM_NFREEPOOL; pind++) 204011752d88SAlan Cox db_printf(" | POOL %d", pind); 204111752d88SAlan Cox db_printf("\n-- "); 204211752d88SAlan Cox for (pind = 0; pind < VM_NFREEPOOL; pind++) 204311752d88SAlan Cox db_printf("-- -- "); 204411752d88SAlan Cox db_printf("--\n"); 204511752d88SAlan Cox for (oind = VM_NFREEORDER - 1; oind >= 0; oind--) { 204611752d88SAlan Cox db_printf(" %2.2d (%6.6dK)", oind, 204711752d88SAlan Cox 1 << (PAGE_SHIFT - 10 + oind)); 204811752d88SAlan Cox for (pind = 0; pind < VM_NFREEPOOL; pind++) { 20497e226537SAttilio Rao fl = vm_phys_free_queues[dom][flind][pind]; 205011752d88SAlan Cox db_printf(" | %6.6d", fl[oind].lcnt); 205111752d88SAlan Cox } 205211752d88SAlan Cox db_printf("\n"); 205311752d88SAlan Cox } 205411752d88SAlan Cox db_printf("\n"); 205511752d88SAlan Cox } 20567e226537SAttilio Rao db_printf("\n"); 20577e226537SAttilio Rao } 205811752d88SAlan Cox } 205911752d88SAlan Cox #endif 2060