10Sstevel@tonic-gate /* 20Sstevel@tonic-gate * CDDL HEADER START 30Sstevel@tonic-gate * 40Sstevel@tonic-gate * The contents of this file are subject to the terms of the 52251Selowe * Common Development and Distribution License (the "License"). 62251Selowe * You may not use this file except in compliance with the License. 70Sstevel@tonic-gate * 80Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 90Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 100Sstevel@tonic-gate * See the License for the specific language governing permissions 110Sstevel@tonic-gate * and limitations under the License. 120Sstevel@tonic-gate * 130Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 140Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 150Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 160Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 170Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 180Sstevel@tonic-gate * 190Sstevel@tonic-gate * CDDL HEADER END 200Sstevel@tonic-gate */ 210Sstevel@tonic-gate /* 222251Selowe * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 230Sstevel@tonic-gate * Use is subject to license terms. 240Sstevel@tonic-gate */ 250Sstevel@tonic-gate 260Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI" 270Sstevel@tonic-gate 280Sstevel@tonic-gate /* 290Sstevel@tonic-gate * UNIX machine dependent virtual memory support. 300Sstevel@tonic-gate */ 310Sstevel@tonic-gate 320Sstevel@tonic-gate #include <sys/vm.h> 330Sstevel@tonic-gate #include <sys/exec.h> 340Sstevel@tonic-gate 350Sstevel@tonic-gate #include <sys/exechdr.h> 360Sstevel@tonic-gate #include <vm/seg_kmem.h> 370Sstevel@tonic-gate #include <sys/atomic.h> 380Sstevel@tonic-gate #include <sys/archsystm.h> 390Sstevel@tonic-gate #include <sys/machsystm.h> 400Sstevel@tonic-gate #include <sys/kdi.h> 410Sstevel@tonic-gate #include <sys/cpu_module.h> 420Sstevel@tonic-gate 430Sstevel@tonic-gate #include <vm/hat_sfmmu.h> 440Sstevel@tonic-gate 450Sstevel@tonic-gate #include <sys/memnode.h> 460Sstevel@tonic-gate 470Sstevel@tonic-gate #include <sys/mem_config.h> 480Sstevel@tonic-gate #include <sys/mem_cage.h> 490Sstevel@tonic-gate #include <vm/vm_dep.h> 502961Sdp78419 #include <vm/page.h> 510Sstevel@tonic-gate #include <sys/platform_module.h> 520Sstevel@tonic-gate 530Sstevel@tonic-gate /* 540Sstevel@tonic-gate * These variables are set by module specific config routines. 550Sstevel@tonic-gate * They are only set by modules which will use physical cache page coloring 560Sstevel@tonic-gate * and/or virtual cache page coloring. 570Sstevel@tonic-gate */ 580Sstevel@tonic-gate int do_pg_coloring = 0; 590Sstevel@tonic-gate int do_virtual_coloring = 0; 600Sstevel@tonic-gate 610Sstevel@tonic-gate /* 620Sstevel@tonic-gate * These variables can be conveniently patched at kernel load time to 630Sstevel@tonic-gate * prevent do_pg_coloring or do_virtual_coloring from being enabled by 640Sstevel@tonic-gate * module specific config routines. 650Sstevel@tonic-gate */ 660Sstevel@tonic-gate 670Sstevel@tonic-gate int use_page_coloring = 1; 680Sstevel@tonic-gate int use_virtual_coloring = 1; 690Sstevel@tonic-gate 700Sstevel@tonic-gate /* 710Sstevel@tonic-gate * initialized by page_coloring_init() 720Sstevel@tonic-gate */ 730Sstevel@tonic-gate extern uint_t page_colors; 740Sstevel@tonic-gate extern uint_t page_colors_mask; 750Sstevel@tonic-gate extern uint_t page_coloring_shift; 760Sstevel@tonic-gate int cpu_page_colors; 770Sstevel@tonic-gate uint_t vac_colors = 0; 780Sstevel@tonic-gate uint_t vac_colors_mask = 0; 790Sstevel@tonic-gate 802961Sdp78419 /* cpu specific coloring initialization */ 812961Sdp78419 extern void page_coloring_init_cpu(); 822961Sdp78419 #pragma weak page_coloring_init_cpu 832961Sdp78419 840Sstevel@tonic-gate /* 850Sstevel@tonic-gate * get the ecache setsize for the current cpu. 860Sstevel@tonic-gate */ 870Sstevel@tonic-gate #define CPUSETSIZE() (cpunodes[CPU->cpu_id].ecache_setsize) 880Sstevel@tonic-gate 890Sstevel@tonic-gate plcnt_t plcnt; /* page list count */ 900Sstevel@tonic-gate 910Sstevel@tonic-gate /* 920Sstevel@tonic-gate * This variable is set by the cpu module to contain the lowest 930Sstevel@tonic-gate * address not affected by the SF_ERRATA_57 workaround. It should 940Sstevel@tonic-gate * remain 0 if the workaround is not needed. 950Sstevel@tonic-gate */ 960Sstevel@tonic-gate #if defined(SF_ERRATA_57) 970Sstevel@tonic-gate caddr_t errata57_limit; 980Sstevel@tonic-gate #endif 990Sstevel@tonic-gate 1000Sstevel@tonic-gate extern void page_relocate_hash(page_t *, page_t *); 1010Sstevel@tonic-gate 1020Sstevel@tonic-gate /* 1030Sstevel@tonic-gate * these must be defined in platform specific areas 1040Sstevel@tonic-gate */ 1050Sstevel@tonic-gate extern void map_addr_proc(caddr_t *, size_t, offset_t, int, caddr_t, 1060Sstevel@tonic-gate struct proc *, uint_t); 1070Sstevel@tonic-gate extern page_t *page_get_freelist(struct vnode *, u_offset_t, struct seg *, 1080Sstevel@tonic-gate caddr_t, size_t, uint_t, struct lgrp *); 1090Sstevel@tonic-gate /* 1100Sstevel@tonic-gate * Convert page frame number to an OBMEM page frame number 1110Sstevel@tonic-gate * (i.e. put in the type bits -- zero for this implementation) 1120Sstevel@tonic-gate */ 1130Sstevel@tonic-gate pfn_t 1140Sstevel@tonic-gate impl_obmem_pfnum(pfn_t pf) 1150Sstevel@tonic-gate { 1160Sstevel@tonic-gate return (pf); 1170Sstevel@tonic-gate } 1180Sstevel@tonic-gate 1190Sstevel@tonic-gate /* 1200Sstevel@tonic-gate * Use physmax to determine the highest physical page of DRAM memory 1210Sstevel@tonic-gate * It is assumed that any physical addresses above physmax is in IO space. 1220Sstevel@tonic-gate * We don't bother checking the low end because we assume that memory space 1230Sstevel@tonic-gate * begins at physical page frame 0. 1240Sstevel@tonic-gate * 1250Sstevel@tonic-gate * Return 1 if the page frame is onboard DRAM memory, else 0. 1260Sstevel@tonic-gate * Returns 0 for nvram so it won't be cached. 1270Sstevel@tonic-gate */ 1280Sstevel@tonic-gate int 1290Sstevel@tonic-gate pf_is_memory(pfn_t pf) 1300Sstevel@tonic-gate { 1310Sstevel@tonic-gate /* We must be IO space */ 1320Sstevel@tonic-gate if (pf > physmax) 1330Sstevel@tonic-gate return (0); 1340Sstevel@tonic-gate 1350Sstevel@tonic-gate /* We must be memory space */ 1360Sstevel@tonic-gate return (1); 1370Sstevel@tonic-gate } 1380Sstevel@tonic-gate 1390Sstevel@tonic-gate /* 1400Sstevel@tonic-gate * Handle a pagefault. 1410Sstevel@tonic-gate */ 1420Sstevel@tonic-gate faultcode_t 1430Sstevel@tonic-gate pagefault(caddr_t addr, enum fault_type type, enum seg_rw rw, int iskernel) 1440Sstevel@tonic-gate { 1450Sstevel@tonic-gate struct as *as; 1460Sstevel@tonic-gate struct proc *p; 1470Sstevel@tonic-gate faultcode_t res; 1480Sstevel@tonic-gate caddr_t base; 1490Sstevel@tonic-gate size_t len; 1500Sstevel@tonic-gate int err; 1510Sstevel@tonic-gate 1520Sstevel@tonic-gate if (INVALID_VADDR(addr)) 1530Sstevel@tonic-gate return (FC_NOMAP); 1540Sstevel@tonic-gate 1550Sstevel@tonic-gate if (iskernel) { 1560Sstevel@tonic-gate as = &kas; 1570Sstevel@tonic-gate } else { 1580Sstevel@tonic-gate p = curproc; 1590Sstevel@tonic-gate as = p->p_as; 1600Sstevel@tonic-gate #if defined(SF_ERRATA_57) 1610Sstevel@tonic-gate /* 1620Sstevel@tonic-gate * Prevent infinite loops due to a segment driver 1630Sstevel@tonic-gate * setting the execute permissions and the sfmmu hat 1640Sstevel@tonic-gate * silently ignoring them. 1650Sstevel@tonic-gate */ 1660Sstevel@tonic-gate if (rw == S_EXEC && AS_TYPE_64BIT(as) && 1670Sstevel@tonic-gate addr < errata57_limit) { 1680Sstevel@tonic-gate res = FC_NOMAP; 1690Sstevel@tonic-gate goto out; 1700Sstevel@tonic-gate } 1710Sstevel@tonic-gate #endif 1720Sstevel@tonic-gate } 1730Sstevel@tonic-gate 1740Sstevel@tonic-gate /* 1750Sstevel@tonic-gate * Dispatch pagefault. 1760Sstevel@tonic-gate */ 1770Sstevel@tonic-gate res = as_fault(as->a_hat, as, addr, 1, type, rw); 1780Sstevel@tonic-gate 1790Sstevel@tonic-gate /* 1800Sstevel@tonic-gate * If this isn't a potential unmapped hole in the user's 1810Sstevel@tonic-gate * UNIX data or stack segments, just return status info. 1820Sstevel@tonic-gate */ 1830Sstevel@tonic-gate if (!(res == FC_NOMAP && iskernel == 0)) 1840Sstevel@tonic-gate goto out; 1850Sstevel@tonic-gate 1860Sstevel@tonic-gate /* 1870Sstevel@tonic-gate * Check to see if we happened to faulted on a currently unmapped 1880Sstevel@tonic-gate * part of the UNIX data or stack segments. If so, create a zfod 1890Sstevel@tonic-gate * mapping there and then try calling the fault routine again. 1900Sstevel@tonic-gate */ 1910Sstevel@tonic-gate base = p->p_brkbase; 1920Sstevel@tonic-gate len = p->p_brksize; 1930Sstevel@tonic-gate 1940Sstevel@tonic-gate if (addr < base || addr >= base + len) { /* data seg? */ 1950Sstevel@tonic-gate base = (caddr_t)(p->p_usrstack - p->p_stksize); 1960Sstevel@tonic-gate len = p->p_stksize; 1970Sstevel@tonic-gate if (addr < base || addr >= p->p_usrstack) { /* stack seg? */ 1980Sstevel@tonic-gate /* not in either UNIX data or stack segments */ 1990Sstevel@tonic-gate res = FC_NOMAP; 2000Sstevel@tonic-gate goto out; 2010Sstevel@tonic-gate } 2020Sstevel@tonic-gate } 2030Sstevel@tonic-gate 2040Sstevel@tonic-gate /* the rest of this function implements a 3.X 4.X 5.X compatibility */ 2050Sstevel@tonic-gate /* This code is probably not needed anymore */ 2060Sstevel@tonic-gate 2070Sstevel@tonic-gate /* expand the gap to the page boundaries on each side */ 2080Sstevel@tonic-gate len = (((uintptr_t)base + len + PAGEOFFSET) & PAGEMASK) - 2090Sstevel@tonic-gate ((uintptr_t)base & PAGEMASK); 2100Sstevel@tonic-gate base = (caddr_t)((uintptr_t)base & PAGEMASK); 2110Sstevel@tonic-gate 2120Sstevel@tonic-gate as_rangelock(as); 2130Sstevel@tonic-gate as_purge(as); 2140Sstevel@tonic-gate if (as_gap(as, PAGESIZE, &base, &len, AH_CONTAIN, addr) == 0) { 2150Sstevel@tonic-gate err = as_map(as, base, len, segvn_create, zfod_argsp); 2160Sstevel@tonic-gate as_rangeunlock(as); 2170Sstevel@tonic-gate if (err) { 2180Sstevel@tonic-gate res = FC_MAKE_ERR(err); 2190Sstevel@tonic-gate goto out; 2200Sstevel@tonic-gate } 2210Sstevel@tonic-gate } else { 2220Sstevel@tonic-gate /* 2230Sstevel@tonic-gate * This page is already mapped by another thread after we 2240Sstevel@tonic-gate * returned from as_fault() above. We just fallthrough 2250Sstevel@tonic-gate * as_fault() below. 2260Sstevel@tonic-gate */ 2270Sstevel@tonic-gate as_rangeunlock(as); 2280Sstevel@tonic-gate } 2290Sstevel@tonic-gate 2300Sstevel@tonic-gate res = as_fault(as->a_hat, as, addr, 1, F_INVAL, rw); 2310Sstevel@tonic-gate 2320Sstevel@tonic-gate out: 2330Sstevel@tonic-gate 2340Sstevel@tonic-gate return (res); 2350Sstevel@tonic-gate } 2360Sstevel@tonic-gate 2370Sstevel@tonic-gate /* 2380Sstevel@tonic-gate * This is the routine which defines the address limit implied 2390Sstevel@tonic-gate * by the flag '_MAP_LOW32'. USERLIMIT32 matches the highest 2400Sstevel@tonic-gate * mappable address in a 32-bit process on this platform (though 2410Sstevel@tonic-gate * perhaps we should make it be UINT32_MAX here?) 2420Sstevel@tonic-gate */ 2430Sstevel@tonic-gate void 2440Sstevel@tonic-gate map_addr(caddr_t *addrp, size_t len, offset_t off, int vacalign, uint_t flags) 2450Sstevel@tonic-gate { 2460Sstevel@tonic-gate struct proc *p = curproc; 2470Sstevel@tonic-gate caddr_t userlimit = flags & _MAP_LOW32 ? 2480Sstevel@tonic-gate (caddr_t)USERLIMIT32 : p->p_as->a_userlimit; 2490Sstevel@tonic-gate map_addr_proc(addrp, len, off, vacalign, userlimit, p, flags); 2500Sstevel@tonic-gate } 2510Sstevel@tonic-gate 2520Sstevel@tonic-gate /* 2530Sstevel@tonic-gate * Some V9 CPUs have holes in the middle of the 64-bit virtual address range. 2540Sstevel@tonic-gate */ 2550Sstevel@tonic-gate caddr_t hole_start, hole_end; 2560Sstevel@tonic-gate 2570Sstevel@tonic-gate /* 2580Sstevel@tonic-gate * kpm mapping window 2590Sstevel@tonic-gate */ 2600Sstevel@tonic-gate caddr_t kpm_vbase; 2610Sstevel@tonic-gate size_t kpm_size; 2620Sstevel@tonic-gate uchar_t kpm_size_shift; 2630Sstevel@tonic-gate 2640Sstevel@tonic-gate /* 2650Sstevel@tonic-gate * Determine whether [base, base+len] contains a mapable range of 2660Sstevel@tonic-gate * addresses at least minlen long. base and len are adjusted if 2670Sstevel@tonic-gate * required to provide a mapable range. 2680Sstevel@tonic-gate */ 2690Sstevel@tonic-gate /* ARGSUSED */ 2700Sstevel@tonic-gate int 2710Sstevel@tonic-gate valid_va_range(caddr_t *basep, size_t *lenp, size_t minlen, int dir) 2720Sstevel@tonic-gate { 2730Sstevel@tonic-gate caddr_t hi, lo; 2740Sstevel@tonic-gate 2750Sstevel@tonic-gate lo = *basep; 2760Sstevel@tonic-gate hi = lo + *lenp; 2770Sstevel@tonic-gate 2780Sstevel@tonic-gate /* 2790Sstevel@tonic-gate * If hi rolled over the top, try cutting back. 2800Sstevel@tonic-gate */ 2810Sstevel@tonic-gate if (hi < lo) { 2820Sstevel@tonic-gate size_t newlen = 0 - (uintptr_t)lo - 1l; 2830Sstevel@tonic-gate 2840Sstevel@tonic-gate if (newlen + (uintptr_t)hi < minlen) 2850Sstevel@tonic-gate return (0); 2860Sstevel@tonic-gate if (newlen < minlen) 2870Sstevel@tonic-gate return (0); 2880Sstevel@tonic-gate *lenp = newlen; 2890Sstevel@tonic-gate } else if (hi - lo < minlen) 2900Sstevel@tonic-gate return (0); 2910Sstevel@tonic-gate 2920Sstevel@tonic-gate /* 2930Sstevel@tonic-gate * Deal with a possible hole in the address range between 2940Sstevel@tonic-gate * hole_start and hole_end that should never be mapped by the MMU. 2950Sstevel@tonic-gate */ 2960Sstevel@tonic-gate hi = lo + *lenp; 2970Sstevel@tonic-gate 2980Sstevel@tonic-gate if (lo < hole_start) { 2990Sstevel@tonic-gate if (hi > hole_start) 3000Sstevel@tonic-gate if (hi < hole_end) 3010Sstevel@tonic-gate hi = hole_start; 3020Sstevel@tonic-gate else 3030Sstevel@tonic-gate /* lo < hole_start && hi >= hole_end */ 3040Sstevel@tonic-gate if (dir == AH_LO) { 3050Sstevel@tonic-gate /* 3060Sstevel@tonic-gate * prefer lowest range 3070Sstevel@tonic-gate */ 3080Sstevel@tonic-gate if (hole_start - lo >= minlen) 3090Sstevel@tonic-gate hi = hole_start; 3100Sstevel@tonic-gate else if (hi - hole_end >= minlen) 3110Sstevel@tonic-gate lo = hole_end; 3120Sstevel@tonic-gate else 3130Sstevel@tonic-gate return (0); 3140Sstevel@tonic-gate } else { 3150Sstevel@tonic-gate /* 3160Sstevel@tonic-gate * prefer highest range 3170Sstevel@tonic-gate */ 3180Sstevel@tonic-gate if (hi - hole_end >= minlen) 3190Sstevel@tonic-gate lo = hole_end; 3200Sstevel@tonic-gate else if (hole_start - lo >= minlen) 3210Sstevel@tonic-gate hi = hole_start; 3220Sstevel@tonic-gate else 3230Sstevel@tonic-gate return (0); 3240Sstevel@tonic-gate } 3250Sstevel@tonic-gate } else { 3260Sstevel@tonic-gate /* lo >= hole_start */ 3270Sstevel@tonic-gate if (hi < hole_end) 3280Sstevel@tonic-gate return (0); 3290Sstevel@tonic-gate if (lo < hole_end) 3300Sstevel@tonic-gate lo = hole_end; 3310Sstevel@tonic-gate } 3320Sstevel@tonic-gate 3330Sstevel@tonic-gate if (hi - lo < minlen) 3340Sstevel@tonic-gate return (0); 3350Sstevel@tonic-gate 3360Sstevel@tonic-gate *basep = lo; 3370Sstevel@tonic-gate *lenp = hi - lo; 3380Sstevel@tonic-gate 3390Sstevel@tonic-gate return (1); 3400Sstevel@tonic-gate } 3410Sstevel@tonic-gate 3420Sstevel@tonic-gate /* 3430Sstevel@tonic-gate * Determine whether [addr, addr+len] with protections `prot' are valid 3440Sstevel@tonic-gate * for a user address space. 3450Sstevel@tonic-gate */ 3460Sstevel@tonic-gate /*ARGSUSED*/ 3470Sstevel@tonic-gate int 3480Sstevel@tonic-gate valid_usr_range(caddr_t addr, size_t len, uint_t prot, struct as *as, 3490Sstevel@tonic-gate caddr_t userlimit) 3500Sstevel@tonic-gate { 3510Sstevel@tonic-gate caddr_t eaddr = addr + len; 3520Sstevel@tonic-gate 3530Sstevel@tonic-gate if (eaddr <= addr || addr >= userlimit || eaddr > userlimit) 3540Sstevel@tonic-gate return (RANGE_BADADDR); 3550Sstevel@tonic-gate 3560Sstevel@tonic-gate /* 3570Sstevel@tonic-gate * Determine if the address range falls within an illegal 3580Sstevel@tonic-gate * range of the MMU. 3590Sstevel@tonic-gate */ 3600Sstevel@tonic-gate if (eaddr > hole_start && addr < hole_end) 3610Sstevel@tonic-gate return (RANGE_BADADDR); 3620Sstevel@tonic-gate 3630Sstevel@tonic-gate #if defined(SF_ERRATA_57) 3640Sstevel@tonic-gate /* 3650Sstevel@tonic-gate * Make sure USERLIMIT isn't raised too high 3660Sstevel@tonic-gate */ 3670Sstevel@tonic-gate ASSERT64(addr <= (caddr_t)0xffffffff80000000ul || 3680Sstevel@tonic-gate errata57_limit == 0); 3690Sstevel@tonic-gate 3700Sstevel@tonic-gate if (AS_TYPE_64BIT(as) && 3710Sstevel@tonic-gate (addr < errata57_limit) && 3720Sstevel@tonic-gate (prot & PROT_EXEC)) 3730Sstevel@tonic-gate return (RANGE_BADPROT); 3740Sstevel@tonic-gate #endif /* SF_ERRATA57 */ 3750Sstevel@tonic-gate return (RANGE_OKAY); 3760Sstevel@tonic-gate } 3770Sstevel@tonic-gate 3780Sstevel@tonic-gate /* 3790Sstevel@tonic-gate * Routine used to check to see if an a.out can be executed 3800Sstevel@tonic-gate * by the current machine/architecture. 3810Sstevel@tonic-gate */ 3820Sstevel@tonic-gate int 3830Sstevel@tonic-gate chkaout(struct exdata *exp) 3840Sstevel@tonic-gate { 3850Sstevel@tonic-gate if (exp->ux_mach == M_SPARC) 3860Sstevel@tonic-gate return (0); 3870Sstevel@tonic-gate else 3880Sstevel@tonic-gate return (ENOEXEC); 3890Sstevel@tonic-gate } 3900Sstevel@tonic-gate 3910Sstevel@tonic-gate /* 3920Sstevel@tonic-gate * The following functions return information about an a.out 3930Sstevel@tonic-gate * which is used when a program is executed. 3940Sstevel@tonic-gate */ 3950Sstevel@tonic-gate 3960Sstevel@tonic-gate /* 3970Sstevel@tonic-gate * Return the load memory address for the data segment. 3980Sstevel@tonic-gate */ 3990Sstevel@tonic-gate caddr_t 4000Sstevel@tonic-gate getdmem(struct exec *exp) 4010Sstevel@tonic-gate { 4020Sstevel@tonic-gate /* 4030Sstevel@tonic-gate * XXX - Sparc Reference Hack approaching 4040Sstevel@tonic-gate * Remember that we are loading 4050Sstevel@tonic-gate * 8k executables into a 4k machine 4060Sstevel@tonic-gate * DATA_ALIGN == 2 * PAGESIZE 4070Sstevel@tonic-gate */ 4080Sstevel@tonic-gate if (exp->a_text) 4090Sstevel@tonic-gate return ((caddr_t)(roundup(USRTEXT + exp->a_text, DATA_ALIGN))); 4100Sstevel@tonic-gate else 4110Sstevel@tonic-gate return ((caddr_t)USRTEXT); 4120Sstevel@tonic-gate } 4130Sstevel@tonic-gate 4140Sstevel@tonic-gate /* 4150Sstevel@tonic-gate * Return the starting disk address for the data segment. 4160Sstevel@tonic-gate */ 4170Sstevel@tonic-gate ulong_t 4180Sstevel@tonic-gate getdfile(struct exec *exp) 4190Sstevel@tonic-gate { 4200Sstevel@tonic-gate if (exp->a_magic == ZMAGIC) 4210Sstevel@tonic-gate return (exp->a_text); 4220Sstevel@tonic-gate else 4230Sstevel@tonic-gate return (sizeof (struct exec) + exp->a_text); 4240Sstevel@tonic-gate } 4250Sstevel@tonic-gate 4260Sstevel@tonic-gate /* 4270Sstevel@tonic-gate * Return the load memory address for the text segment. 4280Sstevel@tonic-gate */ 4290Sstevel@tonic-gate 4300Sstevel@tonic-gate /*ARGSUSED*/ 4310Sstevel@tonic-gate caddr_t 4320Sstevel@tonic-gate gettmem(struct exec *exp) 4330Sstevel@tonic-gate { 4340Sstevel@tonic-gate return ((caddr_t)USRTEXT); 4350Sstevel@tonic-gate } 4360Sstevel@tonic-gate 4370Sstevel@tonic-gate /* 4380Sstevel@tonic-gate * Return the file byte offset for the text segment. 4390Sstevel@tonic-gate */ 4400Sstevel@tonic-gate uint_t 4410Sstevel@tonic-gate gettfile(struct exec *exp) 4420Sstevel@tonic-gate { 4430Sstevel@tonic-gate if (exp->a_magic == ZMAGIC) 4440Sstevel@tonic-gate return (0); 4450Sstevel@tonic-gate else 4460Sstevel@tonic-gate return (sizeof (struct exec)); 4470Sstevel@tonic-gate } 4480Sstevel@tonic-gate 4490Sstevel@tonic-gate void 4500Sstevel@tonic-gate getexinfo( 4510Sstevel@tonic-gate struct exdata *edp_in, 4520Sstevel@tonic-gate struct exdata *edp_out, 4530Sstevel@tonic-gate int *pagetext, 4540Sstevel@tonic-gate int *pagedata) 4550Sstevel@tonic-gate { 4560Sstevel@tonic-gate *edp_out = *edp_in; /* structure copy */ 4570Sstevel@tonic-gate 4580Sstevel@tonic-gate if ((edp_in->ux_mag == ZMAGIC) && 4590Sstevel@tonic-gate ((edp_in->vp->v_flag & VNOMAP) == 0)) { 4600Sstevel@tonic-gate *pagetext = 1; 4610Sstevel@tonic-gate *pagedata = 1; 4620Sstevel@tonic-gate } else { 4630Sstevel@tonic-gate *pagetext = 0; 4640Sstevel@tonic-gate *pagedata = 0; 4650Sstevel@tonic-gate } 4660Sstevel@tonic-gate } 4670Sstevel@tonic-gate 468*2991Ssusans /* 469*2991Ssusans * Return non 0 value if the address may cause a VAC alias with KPM mappings. 470*2991Ssusans * KPM selects an address such that it's equal offset modulo shm_alignment and 471*2991Ssusans * assumes it can't be in VAC conflict with any larger than PAGESIZE mapping. 472*2991Ssusans */ 473*2991Ssusans int 474*2991Ssusans map_addr_vacalign_check(caddr_t addr, u_offset_t off) 475*2991Ssusans { 476*2991Ssusans if (vac) { 477*2991Ssusans return (((uintptr_t)addr ^ off) & shm_alignment - 1); 478*2991Ssusans } else { 479*2991Ssusans return (0); 4800Sstevel@tonic-gate } 481*2991Ssusans } 4820Sstevel@tonic-gate 483*2991Ssusans /* 484*2991Ssusans * Sanity control. Don't use large pages regardless of user 485*2991Ssusans * settings if there's less than priv or shm_lpg_min_physmem memory installed. 486*2991Ssusans * The units for this variable is 8K pages. 487*2991Ssusans */ 488*2991Ssusans pgcnt_t shm_lpg_min_physmem = 131072; /* 1GB */ 489*2991Ssusans pgcnt_t privm_lpg_min_physmem = 131072; /* 1GB */ 4900Sstevel@tonic-gate 4912659Ssusans static size_t 4920Sstevel@tonic-gate map_pgszheap(struct proc *p, caddr_t addr, size_t len) 4930Sstevel@tonic-gate { 494*2991Ssusans size_t pgsz = MMU_PAGESIZE; 495*2991Ssusans int szc; 4960Sstevel@tonic-gate 4970Sstevel@tonic-gate /* 4980Sstevel@tonic-gate * If len is zero, retrieve from proc and don't demote the page size. 499*2991Ssusans * Use atleast the default pagesize. 5000Sstevel@tonic-gate */ 5010Sstevel@tonic-gate if (len == 0) { 502*2991Ssusans len = p->p_brkbase + p->p_brksize - p->p_bssbase; 503*2991Ssusans } 504*2991Ssusans len = MAX(len, default_uheap_lpsize); 505*2991Ssusans 506*2991Ssusans for (szc = mmu_page_sizes - 1; szc >= 0; szc--) { 507*2991Ssusans pgsz = hw_page_array[szc].hp_size; 508*2991Ssusans if ((disable_auto_data_large_pages & (1 << szc)) || 509*2991Ssusans pgsz > max_uheap_lpsize) 510*2991Ssusans continue; 511*2991Ssusans if (len >= pgsz) { 512*2991Ssusans break; 513*2991Ssusans } 5140Sstevel@tonic-gate } 5150Sstevel@tonic-gate 5160Sstevel@tonic-gate /* 517*2991Ssusans * If addr == 0 we were called by memcntl() when the 5180Sstevel@tonic-gate * size code is 0. Don't set pgsz less than current size. 5190Sstevel@tonic-gate */ 5200Sstevel@tonic-gate if (addr == 0 && (pgsz < hw_page_array[p->p_brkpageszc].hp_size)) { 5210Sstevel@tonic-gate pgsz = hw_page_array[p->p_brkpageszc].hp_size; 5220Sstevel@tonic-gate } 5230Sstevel@tonic-gate 5240Sstevel@tonic-gate return (pgsz); 5250Sstevel@tonic-gate } 5260Sstevel@tonic-gate 5272659Ssusans static size_t 5280Sstevel@tonic-gate map_pgszstk(struct proc *p, caddr_t addr, size_t len) 5290Sstevel@tonic-gate { 530*2991Ssusans size_t pgsz = MMU_PAGESIZE; 531*2991Ssusans int szc; 5320Sstevel@tonic-gate 5330Sstevel@tonic-gate /* 5340Sstevel@tonic-gate * If len is zero, retrieve from proc and don't demote the page size. 535*2991Ssusans * Use atleast the default pagesize. 5360Sstevel@tonic-gate */ 5370Sstevel@tonic-gate if (len == 0) { 5380Sstevel@tonic-gate len = p->p_stksize; 5390Sstevel@tonic-gate } 540*2991Ssusans len = MAX(len, default_ustack_lpsize); 5410Sstevel@tonic-gate 542*2991Ssusans for (szc = mmu_page_sizes - 1; szc >= 0; szc--) { 543*2991Ssusans pgsz = hw_page_array[szc].hp_size; 544*2991Ssusans if ((disable_auto_data_large_pages & (1 << szc)) || 545*2991Ssusans pgsz > max_ustack_lpsize) 546*2991Ssusans continue; 547*2991Ssusans if (len >= pgsz) { 548*2991Ssusans break; 549*2991Ssusans } 5500Sstevel@tonic-gate } 5510Sstevel@tonic-gate 5520Sstevel@tonic-gate /* 5530Sstevel@tonic-gate * If addr == 0 we were called by memcntl() or exec_args() when the 5540Sstevel@tonic-gate * size code is 0. Don't set pgsz less than current size. 5550Sstevel@tonic-gate */ 5560Sstevel@tonic-gate if (addr == 0 && (pgsz < hw_page_array[p->p_stkpageszc].hp_size)) { 5570Sstevel@tonic-gate pgsz = hw_page_array[p->p_stkpageszc].hp_size; 5580Sstevel@tonic-gate } 5590Sstevel@tonic-gate 5600Sstevel@tonic-gate return (pgsz); 5610Sstevel@tonic-gate } 5620Sstevel@tonic-gate 5632659Ssusans static size_t 5642659Ssusans map_pgszism(caddr_t addr, size_t len) 5652659Ssusans { 5662659Ssusans uint_t szc; 5672659Ssusans size_t pgsz; 5682659Ssusans 5692659Ssusans for (szc = mmu_page_sizes - 1; szc >= TTE4M; szc--) { 5702659Ssusans if (disable_ism_large_pages & (1 << szc)) 5712659Ssusans continue; 5722659Ssusans 5732659Ssusans pgsz = hw_page_array[szc].hp_size; 5742659Ssusans if ((len >= pgsz) && IS_P2ALIGNED(addr, pgsz)) 5752659Ssusans return (pgsz); 5762659Ssusans } 577*2991Ssusans 5782659Ssusans return (DEFAULT_ISM_PAGESIZE); 5792659Ssusans } 5802659Ssusans 5812659Ssusans /* 5822659Ssusans * Suggest a page size to be used to map a segment of type maptype and length 5832659Ssusans * len. Returns a page size (not a size code). 5842659Ssusans */ 585*2991Ssusans /* ARGSUSED */ 5862659Ssusans size_t 587*2991Ssusans map_pgsz(int maptype, struct proc *p, caddr_t addr, size_t len, int memcntl) 5882659Ssusans { 589*2991Ssusans size_t pgsz = MMU_PAGESIZE; 590*2991Ssusans 591*2991Ssusans ASSERT(maptype != MAPPGSZ_VA); 5922659Ssusans 593*2991Ssusans if (maptype != MAPPGSZ_ISM && physmem < privm_lpg_min_physmem) { 594*2991Ssusans return (MMU_PAGESIZE); 595*2991Ssusans } 5962659Ssusans 5972659Ssusans switch (maptype) { 5982659Ssusans case MAPPGSZ_ISM: 5992659Ssusans pgsz = map_pgszism(addr, len); 6002659Ssusans break; 6012659Ssusans 6022659Ssusans case MAPPGSZ_STK: 603*2991Ssusans if (max_ustack_lpsize > MMU_PAGESIZE) { 604*2991Ssusans pgsz = map_pgszstk(p, addr, len); 605*2991Ssusans } 6062659Ssusans break; 6072659Ssusans 6082659Ssusans case MAPPGSZ_HEAP: 609*2991Ssusans if (max_uheap_lpsize > MMU_PAGESIZE) { 610*2991Ssusans pgsz = map_pgszheap(p, addr, len); 611*2991Ssusans } 6122659Ssusans break; 6132659Ssusans } 6142659Ssusans return (pgsz); 6152659Ssusans } 6160Sstevel@tonic-gate 6170Sstevel@tonic-gate 6180Sstevel@tonic-gate /* assumes TTE8K...TTE4M == szc */ 6190Sstevel@tonic-gate 6200Sstevel@tonic-gate static uint_t 621*2991Ssusans map_szcvec(caddr_t addr, size_t size, uintptr_t off, int disable_lpgs, 622*2991Ssusans size_t max_lpsize, size_t min_physmem) 6232414Saguzovsk { 6242414Saguzovsk caddr_t eaddr = addr + size; 6252414Saguzovsk uint_t szcvec = 0; 6262414Saguzovsk caddr_t raddr; 6272414Saguzovsk caddr_t readdr; 6282414Saguzovsk size_t pgsz; 629*2991Ssusans int i; 6302414Saguzovsk 631*2991Ssusans if (physmem < min_physmem || max_lpsize <= MMU_PAGESIZE) { 6322414Saguzovsk return (0); 6332414Saguzovsk } 6342414Saguzovsk for (i = mmu_page_sizes - 1; i > 0; i--) { 635*2991Ssusans if (disable_lpgs & (1 << i)) { 6362414Saguzovsk continue; 6372414Saguzovsk } 6382414Saguzovsk pgsz = page_get_pagesize(i); 639*2991Ssusans if (pgsz > max_lpsize) { 6402414Saguzovsk continue; 6412414Saguzovsk } 6422414Saguzovsk raddr = (caddr_t)P2ROUNDUP((uintptr_t)addr, pgsz); 6432414Saguzovsk readdr = (caddr_t)P2ALIGN((uintptr_t)eaddr, pgsz); 6442414Saguzovsk if (raddr < addr || raddr >= readdr) { 6452414Saguzovsk continue; 6462414Saguzovsk } 6472414Saguzovsk if (P2PHASE((uintptr_t)addr ^ off, pgsz)) { 6482414Saguzovsk continue; 6492414Saguzovsk } 6502414Saguzovsk szcvec |= (1 << i); 6512414Saguzovsk /* 6522414Saguzovsk * And or in the remaining enabled page sizes. 6532414Saguzovsk */ 654*2991Ssusans szcvec |= P2PHASE(~disable_lpgs, (1 << i)); 6552414Saguzovsk szcvec &= ~1; /* no need to return 8K pagesize */ 6562414Saguzovsk break; 6572414Saguzovsk } 6582414Saguzovsk return (szcvec); 6592414Saguzovsk } 6602414Saguzovsk 6610Sstevel@tonic-gate /* 662*2991Ssusans * Return a bit vector of large page size codes that 663*2991Ssusans * can be used to map [addr, addr + len) region. 664*2991Ssusans */ 665*2991Ssusans /* ARGSUSED */ 666*2991Ssusans uint_t 667*2991Ssusans map_pgszcvec(caddr_t addr, size_t size, uintptr_t off, int flags, int type, 668*2991Ssusans int memcntl) 669*2991Ssusans { 670*2991Ssusans if (flags & MAP_TEXT) { 671*2991Ssusans return (map_szcvec(addr, size, off, disable_auto_text_large_pages, 672*2991Ssusans max_utext_lpsize, shm_lpg_min_physmem)); 673*2991Ssusans 674*2991Ssusans } else if (flags & MAP_INITDATA) { 675*2991Ssusans return (map_szcvec(addr, size, off, disable_auto_data_large_pages, 676*2991Ssusans max_uidata_lpsize, privm_lpg_min_physmem)); 677*2991Ssusans 678*2991Ssusans } else if (type == MAPPGSZC_SHM) { 679*2991Ssusans return (map_szcvec(addr, size, off, disable_auto_data_large_pages, 680*2991Ssusans max_shm_lpsize, shm_lpg_min_physmem)); 681*2991Ssusans 682*2991Ssusans } else if (type == MAPPGSZC_HEAP) { 683*2991Ssusans return (map_szcvec(addr, size, off, disable_auto_data_large_pages, 684*2991Ssusans max_uheap_lpsize, privm_lpg_min_physmem)); 685*2991Ssusans 686*2991Ssusans } else if (type == MAPPGSZC_STACK) { 687*2991Ssusans return (map_szcvec(addr, size, off, disable_auto_data_large_pages, 688*2991Ssusans max_ustack_lpsize, privm_lpg_min_physmem)); 689*2991Ssusans 690*2991Ssusans } else { 691*2991Ssusans return (map_szcvec(addr, size, off, disable_auto_data_large_pages, 692*2991Ssusans max_privmap_lpsize, privm_lpg_min_physmem)); 693*2991Ssusans } 694*2991Ssusans } 695*2991Ssusans 696*2991Ssusans /* 6970Sstevel@tonic-gate * Anchored in the table below are counters used to keep track 6980Sstevel@tonic-gate * of free contiguous physical memory. Each element of the table contains 6990Sstevel@tonic-gate * the array of counters, the size of array which is allocated during 7000Sstevel@tonic-gate * startup based on physmax and a shift value used to convert a pagenum 7010Sstevel@tonic-gate * into a counter array index or vice versa. The table has page size 7020Sstevel@tonic-gate * for rows and region size for columns: 7030Sstevel@tonic-gate * 7040Sstevel@tonic-gate * page_counters[page_size][region_size] 7050Sstevel@tonic-gate * 7060Sstevel@tonic-gate * page_size: TTE size code of pages on page_size freelist. 7070Sstevel@tonic-gate * 7080Sstevel@tonic-gate * region_size: TTE size code of a candidate larger page made up 7090Sstevel@tonic-gate * made up of contiguous free page_size pages. 7100Sstevel@tonic-gate * 7110Sstevel@tonic-gate * As you go across a page_size row increasing region_size each 7120Sstevel@tonic-gate * element keeps track of how many (region_size - 1) size groups 7130Sstevel@tonic-gate * made up of page_size free pages can be coalesced into a 7140Sstevel@tonic-gate * regsion_size page. Yuck! Lets try an example: 7150Sstevel@tonic-gate * 7160Sstevel@tonic-gate * page_counters[1][3] is the table element used for identifying 7170Sstevel@tonic-gate * candidate 4M pages from contiguous pages off the 64K free list. 7180Sstevel@tonic-gate * Each index in the page_counters[1][3].array spans 4M. Its the 7190Sstevel@tonic-gate * number of free 512K size (regsion_size - 1) groups of contiguous 7200Sstevel@tonic-gate * 64K free pages. So when page_counters[1][3].counters[n] == 8 7210Sstevel@tonic-gate * we know we have a candidate 4M page made up of 512K size groups 7220Sstevel@tonic-gate * of 64K free pages. 7230Sstevel@tonic-gate */ 7240Sstevel@tonic-gate 7250Sstevel@tonic-gate /* 7260Sstevel@tonic-gate * Per page size free lists. 3rd (max_mem_nodes) and 4th (page coloring bins) 7270Sstevel@tonic-gate * dimensions are allocated dynamically. 7280Sstevel@tonic-gate */ 7290Sstevel@tonic-gate page_t ***page_freelists[MMU_PAGE_SIZES][MAX_MEM_TYPES]; 7300Sstevel@tonic-gate 7310Sstevel@tonic-gate /* 7320Sstevel@tonic-gate * For now there is only a single size cache list. 7330Sstevel@tonic-gate * Allocated dynamically. 7340Sstevel@tonic-gate */ 7350Sstevel@tonic-gate page_t ***page_cachelists[MAX_MEM_TYPES]; 7360Sstevel@tonic-gate 7370Sstevel@tonic-gate kmutex_t *fpc_mutex[NPC_MUTEX]; 7380Sstevel@tonic-gate kmutex_t *cpc_mutex[NPC_MUTEX]; 7390Sstevel@tonic-gate 7400Sstevel@tonic-gate caddr_t 7410Sstevel@tonic-gate alloc_page_freelists(int mnode, caddr_t alloc_base, int alloc_align) 7420Sstevel@tonic-gate { 7430Sstevel@tonic-gate int mtype; 7440Sstevel@tonic-gate uint_t szc; 7450Sstevel@tonic-gate 7460Sstevel@tonic-gate alloc_base = (caddr_t)roundup((uintptr_t)alloc_base, alloc_align); 7470Sstevel@tonic-gate 7480Sstevel@tonic-gate /* 7490Sstevel@tonic-gate * We only support small pages in the cachelist. 7500Sstevel@tonic-gate */ 7510Sstevel@tonic-gate for (mtype = 0; mtype < MAX_MEM_TYPES; mtype++) { 7520Sstevel@tonic-gate page_cachelists[mtype][mnode] = (page_t **)alloc_base; 7532961Sdp78419 alloc_base += (sizeof (page_t *) * page_get_pagecolors(0)); 7540Sstevel@tonic-gate /* 7550Sstevel@tonic-gate * Allocate freelists bins for all 7560Sstevel@tonic-gate * supported page sizes. 7570Sstevel@tonic-gate */ 7580Sstevel@tonic-gate for (szc = 0; szc < mmu_page_sizes; szc++) { 7590Sstevel@tonic-gate page_freelists[szc][mtype][mnode] = 7600Sstevel@tonic-gate (page_t **)alloc_base; 7610Sstevel@tonic-gate alloc_base += ((sizeof (page_t *) * 7620Sstevel@tonic-gate page_get_pagecolors(szc))); 7630Sstevel@tonic-gate } 7640Sstevel@tonic-gate } 7650Sstevel@tonic-gate 7660Sstevel@tonic-gate alloc_base = (caddr_t)roundup((uintptr_t)alloc_base, alloc_align); 7670Sstevel@tonic-gate 7680Sstevel@tonic-gate return (alloc_base); 7690Sstevel@tonic-gate } 7700Sstevel@tonic-gate 7710Sstevel@tonic-gate /* 7720Sstevel@tonic-gate * Allocate page_freelists bin headers for a memnode from the 7730Sstevel@tonic-gate * nucleus data area. This is the first time that mmu_page_sizes is 7740Sstevel@tonic-gate * used during sun4u bootup, so check mmu_page_sizes initialization. 7750Sstevel@tonic-gate */ 7760Sstevel@tonic-gate int 7770Sstevel@tonic-gate ndata_alloc_page_freelists(struct memlist *ndata, int mnode) 7780Sstevel@tonic-gate { 7790Sstevel@tonic-gate size_t alloc_sz; 7800Sstevel@tonic-gate caddr_t alloc_base; 7810Sstevel@tonic-gate caddr_t end; 7820Sstevel@tonic-gate int mtype; 7830Sstevel@tonic-gate uint_t szc; 7840Sstevel@tonic-gate int32_t allp = 0; 7850Sstevel@tonic-gate 7860Sstevel@tonic-gate if (&mmu_init_mmu_page_sizes) { 7870Sstevel@tonic-gate if (!mmu_init_mmu_page_sizes(allp)) { 7880Sstevel@tonic-gate cmn_err(CE_PANIC, "mmu_page_sizes %d not initialized", 7890Sstevel@tonic-gate mmu_page_sizes); 7900Sstevel@tonic-gate } 7910Sstevel@tonic-gate } 7920Sstevel@tonic-gate ASSERT(mmu_page_sizes >= DEFAULT_MMU_PAGE_SIZES); 7930Sstevel@tonic-gate 7940Sstevel@tonic-gate /* first time called - allocate max_mem_nodes dimension */ 7950Sstevel@tonic-gate if (mnode == 0) { 7960Sstevel@tonic-gate int i; 7970Sstevel@tonic-gate 7980Sstevel@tonic-gate /* page_cachelists */ 7990Sstevel@tonic-gate alloc_sz = MAX_MEM_TYPES * max_mem_nodes * 8000Sstevel@tonic-gate sizeof (page_t **); 8010Sstevel@tonic-gate 8020Sstevel@tonic-gate /* page_freelists */ 8030Sstevel@tonic-gate alloc_sz += MAX_MEM_TYPES * mmu_page_sizes * max_mem_nodes * 8040Sstevel@tonic-gate sizeof (page_t **); 8050Sstevel@tonic-gate 8060Sstevel@tonic-gate /* fpc_mutex and cpc_mutex */ 8070Sstevel@tonic-gate alloc_sz += 2 * NPC_MUTEX * max_mem_nodes * sizeof (kmutex_t); 8080Sstevel@tonic-gate 8090Sstevel@tonic-gate alloc_base = ndata_alloc(ndata, alloc_sz, ecache_alignsize); 8100Sstevel@tonic-gate if (alloc_base == NULL) 8110Sstevel@tonic-gate return (-1); 8120Sstevel@tonic-gate 8130Sstevel@tonic-gate ASSERT(((uintptr_t)alloc_base & (ecache_alignsize - 1)) == 0); 8140Sstevel@tonic-gate 8150Sstevel@tonic-gate for (mtype = 0; mtype < MAX_MEM_TYPES; mtype++) { 8160Sstevel@tonic-gate page_cachelists[mtype] = (page_t ***)alloc_base; 8170Sstevel@tonic-gate alloc_base += (max_mem_nodes * sizeof (page_t **)); 8180Sstevel@tonic-gate for (szc = 0; szc < mmu_page_sizes; szc++) { 8190Sstevel@tonic-gate page_freelists[szc][mtype] = 8200Sstevel@tonic-gate (page_t ***)alloc_base; 8210Sstevel@tonic-gate alloc_base += (max_mem_nodes * 8220Sstevel@tonic-gate sizeof (page_t **)); 8230Sstevel@tonic-gate } 8240Sstevel@tonic-gate } 8250Sstevel@tonic-gate for (i = 0; i < NPC_MUTEX; i++) { 8260Sstevel@tonic-gate fpc_mutex[i] = (kmutex_t *)alloc_base; 8270Sstevel@tonic-gate alloc_base += (sizeof (kmutex_t) * max_mem_nodes); 8280Sstevel@tonic-gate cpc_mutex[i] = (kmutex_t *)alloc_base; 8290Sstevel@tonic-gate alloc_base += (sizeof (kmutex_t) * max_mem_nodes); 8300Sstevel@tonic-gate } 8310Sstevel@tonic-gate alloc_sz = 0; 8320Sstevel@tonic-gate } 8330Sstevel@tonic-gate 8340Sstevel@tonic-gate /* 8350Sstevel@tonic-gate * Calculate the size needed by alloc_page_freelists(). 8360Sstevel@tonic-gate */ 8370Sstevel@tonic-gate for (mtype = 0; mtype < MAX_MEM_TYPES; mtype++) { 8382961Sdp78419 alloc_sz += sizeof (page_t *) * page_get_pagecolors(0); 8390Sstevel@tonic-gate 8400Sstevel@tonic-gate for (szc = 0; szc < mmu_page_sizes; szc++) 8410Sstevel@tonic-gate alloc_sz += sizeof (page_t *) * 8420Sstevel@tonic-gate page_get_pagecolors(szc); 8430Sstevel@tonic-gate } 8440Sstevel@tonic-gate 8450Sstevel@tonic-gate alloc_base = ndata_alloc(ndata, alloc_sz, ecache_alignsize); 8460Sstevel@tonic-gate if (alloc_base == NULL) 8470Sstevel@tonic-gate return (-1); 8480Sstevel@tonic-gate 8490Sstevel@tonic-gate end = alloc_page_freelists(mnode, alloc_base, ecache_alignsize); 8500Sstevel@tonic-gate ASSERT((uintptr_t)end == roundup((uintptr_t)alloc_base + alloc_sz, 8510Sstevel@tonic-gate ecache_alignsize)); 8520Sstevel@tonic-gate 8530Sstevel@tonic-gate return (0); 8540Sstevel@tonic-gate } 8550Sstevel@tonic-gate 8560Sstevel@tonic-gate /* 8570Sstevel@tonic-gate * To select our starting bin, we stride through the bins with a stride 8580Sstevel@tonic-gate * of 337. Why 337? It's prime, it's largeish, and it performs well both 8590Sstevel@tonic-gate * in simulation and practice for different workloads on varying cache sizes. 8600Sstevel@tonic-gate */ 8610Sstevel@tonic-gate uint32_t color_start_current = 0; 8620Sstevel@tonic-gate uint32_t color_start_stride = 337; 8630Sstevel@tonic-gate int color_start_random = 0; 8640Sstevel@tonic-gate 8650Sstevel@tonic-gate /* ARGSUSED */ 8660Sstevel@tonic-gate uint_t 8670Sstevel@tonic-gate get_color_start(struct as *as) 8680Sstevel@tonic-gate { 8690Sstevel@tonic-gate uint32_t old, new; 8700Sstevel@tonic-gate 8710Sstevel@tonic-gate if (consistent_coloring == 2 || color_start_random) { 8720Sstevel@tonic-gate return ((uint_t)(((gettick()) << (vac_shift - MMU_PAGESHIFT)) & 8732961Sdp78419 (hw_page_array[0].hp_colors - 1))); 8740Sstevel@tonic-gate } 8750Sstevel@tonic-gate 8760Sstevel@tonic-gate do { 8770Sstevel@tonic-gate old = color_start_current; 8780Sstevel@tonic-gate new = old + (color_start_stride << (vac_shift - MMU_PAGESHIFT)); 8790Sstevel@tonic-gate } while (cas32(&color_start_current, old, new) != old); 8800Sstevel@tonic-gate 8810Sstevel@tonic-gate return ((uint_t)(new)); 8820Sstevel@tonic-gate } 8830Sstevel@tonic-gate 8840Sstevel@tonic-gate /* 8850Sstevel@tonic-gate * Called once at startup from kphysm_init() -- before memialloc() 8860Sstevel@tonic-gate * is invoked to do the 1st page_free()/page_freelist_add(). 8870Sstevel@tonic-gate * 8880Sstevel@tonic-gate * initializes page_colors and page_colors_mask based on ecache_setsize. 8890Sstevel@tonic-gate * 8900Sstevel@tonic-gate * Also initializes the counter locks. 8910Sstevel@tonic-gate */ 8920Sstevel@tonic-gate void 8930Sstevel@tonic-gate page_coloring_init() 8940Sstevel@tonic-gate { 8952961Sdp78419 int a, i; 8962961Sdp78419 uint_t colors; 8970Sstevel@tonic-gate 8980Sstevel@tonic-gate if (do_pg_coloring == 0) { 8990Sstevel@tonic-gate page_colors = 1; 9002961Sdp78419 for (i = 0; i < mmu_page_sizes; i++) 9012961Sdp78419 hw_page_array[i].hp_colors = 1; 9020Sstevel@tonic-gate return; 9030Sstevel@tonic-gate } 9040Sstevel@tonic-gate 9050Sstevel@tonic-gate /* 9060Sstevel@tonic-gate * Calculate page_colors from ecache_setsize. ecache_setsize contains 9070Sstevel@tonic-gate * the max ecache setsize of all cpus configured in the system or, for 9080Sstevel@tonic-gate * cheetah+ systems, the max possible ecache setsize for all possible 9090Sstevel@tonic-gate * cheetah+ cpus. 9100Sstevel@tonic-gate */ 9110Sstevel@tonic-gate page_colors = ecache_setsize / MMU_PAGESIZE; 9120Sstevel@tonic-gate page_colors_mask = page_colors - 1; 9130Sstevel@tonic-gate 9142961Sdp78419 vac_colors = vac_size / MMU_PAGESIZE; 9152961Sdp78419 vac_colors_mask = vac_colors -1; 9162961Sdp78419 9172961Sdp78419 page_coloring_shift = 0; 9182961Sdp78419 a = ecache_setsize; 9192961Sdp78419 while (a >>= 1) { 9202961Sdp78419 page_coloring_shift++; 9212961Sdp78419 } 9222961Sdp78419 9232961Sdp78419 /* initialize number of colors per page size */ 9242961Sdp78419 for (i = 0; i < mmu_page_sizes; i++) { 9252961Sdp78419 hw_page_array[i].hp_colors = (page_colors_mask >> 9262961Sdp78419 (hw_page_array[i].hp_shift - hw_page_array[0].hp_shift)) 9272961Sdp78419 + 1; 9282961Sdp78419 } 9292961Sdp78419 9300Sstevel@tonic-gate /* 9310Sstevel@tonic-gate * initialize cpu_page_colors if ecache setsizes are homogenous. 9320Sstevel@tonic-gate * cpu_page_colors set to -1 during DR operation or during startup 9330Sstevel@tonic-gate * if setsizes are heterogenous. 9340Sstevel@tonic-gate * 9350Sstevel@tonic-gate * The value of cpu_page_colors determines if additional color bins 9360Sstevel@tonic-gate * need to be checked for a particular color in the page_get routines. 9370Sstevel@tonic-gate */ 9382961Sdp78419 if ((cpu_page_colors == 0) && (cpu_setsize < ecache_setsize)) { 9392961Sdp78419 9400Sstevel@tonic-gate cpu_page_colors = cpu_setsize / MMU_PAGESIZE; 9412961Sdp78419 a = lowbit(page_colors) - lowbit(cpu_page_colors); 9422961Sdp78419 ASSERT(a > 0); 9432961Sdp78419 ASSERT(a < 16); 9442961Sdp78419 9452961Sdp78419 for (i = 0; i < mmu_page_sizes; i++) { 9462961Sdp78419 if ((colors = hw_page_array[i].hp_colors) <= 1) { 9472961Sdp78419 colorequivszc[i] = 0; 9482961Sdp78419 continue; 9492961Sdp78419 } 9502961Sdp78419 while ((colors >> a) == 0) 9512961Sdp78419 a--; 9522961Sdp78419 ASSERT(a >= 0); 9532961Sdp78419 9542961Sdp78419 /* higher 4 bits encodes color equiv mask */ 9552961Sdp78419 colorequivszc[i] = (a << 4); 9562961Sdp78419 } 9572961Sdp78419 } 9580Sstevel@tonic-gate 9592961Sdp78419 /* factor in colorequiv to check additional 'equivalent' bins. */ 9602961Sdp78419 if (colorequiv > 1 && &page_coloring_init_cpu == NULL) { 9612961Sdp78419 9622961Sdp78419 a = lowbit(colorequiv) - 1; 9632961Sdp78419 9642961Sdp78419 if (a > 15) 9652961Sdp78419 a = 15; 9660Sstevel@tonic-gate 9672961Sdp78419 for (i = 0; i < mmu_page_sizes; i++) { 9682961Sdp78419 if ((colors = hw_page_array[i].hp_colors) <= 1) { 9692961Sdp78419 continue; 9702961Sdp78419 } 9712961Sdp78419 while ((colors >> a) == 0) 9722961Sdp78419 a--; 9732961Sdp78419 if ((a << 4) > colorequivszc[i]) { 9742961Sdp78419 colorequivszc[i] = (a << 4); 9752961Sdp78419 } 9762961Sdp78419 } 9772961Sdp78419 } 9782961Sdp78419 9792961Sdp78419 /* do cpu specific color initialization */ 9802961Sdp78419 if (&page_coloring_init_cpu) { 9812961Sdp78419 page_coloring_init_cpu(); 9820Sstevel@tonic-gate } 9830Sstevel@tonic-gate } 9840Sstevel@tonic-gate 9850Sstevel@tonic-gate int 9860Sstevel@tonic-gate bp_color(struct buf *bp) 9870Sstevel@tonic-gate { 9880Sstevel@tonic-gate int color = -1; 9890Sstevel@tonic-gate 9900Sstevel@tonic-gate if (vac) { 9910Sstevel@tonic-gate if ((bp->b_flags & B_PAGEIO) != 0) { 9920Sstevel@tonic-gate color = sfmmu_get_ppvcolor(bp->b_pages); 9930Sstevel@tonic-gate } else if (bp->b_un.b_addr != NULL) { 9940Sstevel@tonic-gate color = sfmmu_get_addrvcolor(bp->b_un.b_addr); 9950Sstevel@tonic-gate } 9960Sstevel@tonic-gate } 9970Sstevel@tonic-gate return (color < 0 ? 0 : ptob(color)); 9980Sstevel@tonic-gate } 9990Sstevel@tonic-gate 10000Sstevel@tonic-gate /* 10010Sstevel@tonic-gate * Create & Initialise pageout scanner thread. The thread has to 10020Sstevel@tonic-gate * start at procedure with process pp and priority pri. 10030Sstevel@tonic-gate */ 10040Sstevel@tonic-gate void 10050Sstevel@tonic-gate pageout_init(void (*procedure)(), proc_t *pp, pri_t pri) 10060Sstevel@tonic-gate { 10070Sstevel@tonic-gate (void) thread_create(NULL, 0, procedure, NULL, 0, pp, TS_RUN, pri); 10080Sstevel@tonic-gate } 10090Sstevel@tonic-gate 10100Sstevel@tonic-gate /* 10110Sstevel@tonic-gate * Function for flushing D-cache when performing module relocations 10120Sstevel@tonic-gate * to an alternate mapping. Stubbed out on all platforms except sun4u, 10130Sstevel@tonic-gate * at least for now. 10140Sstevel@tonic-gate */ 10150Sstevel@tonic-gate void 10160Sstevel@tonic-gate dcache_flushall() 10170Sstevel@tonic-gate { 10180Sstevel@tonic-gate sfmmu_cache_flushall(); 10190Sstevel@tonic-gate } 10200Sstevel@tonic-gate 10210Sstevel@tonic-gate static int 10220Sstevel@tonic-gate kdi_range_overlap(uintptr_t va1, size_t sz1, uintptr_t va2, size_t sz2) 10230Sstevel@tonic-gate { 10240Sstevel@tonic-gate if (va1 < va2 && va1 + sz1 <= va2) 10250Sstevel@tonic-gate return (0); 10260Sstevel@tonic-gate 10270Sstevel@tonic-gate if (va2 < va1 && va2 + sz2 <= va1) 10280Sstevel@tonic-gate return (0); 10290Sstevel@tonic-gate 10300Sstevel@tonic-gate return (1); 10310Sstevel@tonic-gate } 10320Sstevel@tonic-gate 10330Sstevel@tonic-gate /* 10340Sstevel@tonic-gate * Return the number of bytes, relative to the beginning of a given range, that 10350Sstevel@tonic-gate * are non-toxic (can be read from and written to with relative impunity). 10360Sstevel@tonic-gate */ 10370Sstevel@tonic-gate size_t 10380Sstevel@tonic-gate kdi_range_is_nontoxic(uintptr_t va, size_t sz, int write) 10390Sstevel@tonic-gate { 10400Sstevel@tonic-gate /* OBP reads are harmless, but we don't want people writing there */ 10410Sstevel@tonic-gate if (write && kdi_range_overlap(va, sz, OFW_START_ADDR, OFW_END_ADDR - 10420Sstevel@tonic-gate OFW_START_ADDR + 1)) 10430Sstevel@tonic-gate return (va < OFW_START_ADDR ? OFW_START_ADDR - va : 0); 10440Sstevel@tonic-gate 10450Sstevel@tonic-gate if (kdi_range_overlap(va, sz, PIOMAPBASE, PIOMAPSIZE)) 10460Sstevel@tonic-gate return (va < PIOMAPBASE ? PIOMAPBASE - va : 0); 10470Sstevel@tonic-gate 10480Sstevel@tonic-gate return (sz); /* no overlap */ 10490Sstevel@tonic-gate } 10500Sstevel@tonic-gate 10510Sstevel@tonic-gate /* 10520Sstevel@tonic-gate * Minimum physmem required for enabling large pages for kernel heap 10530Sstevel@tonic-gate * Currently we do not enable lp for kmem on systems with less 10540Sstevel@tonic-gate * than 1GB of memory. This value can be changed via /etc/system 10550Sstevel@tonic-gate */ 10560Sstevel@tonic-gate size_t segkmem_lpminphysmem = 0x40000000; /* 1GB */ 10570Sstevel@tonic-gate 10580Sstevel@tonic-gate /* 10590Sstevel@tonic-gate * this function chooses large page size for kernel heap 10600Sstevel@tonic-gate */ 10610Sstevel@tonic-gate size_t 10620Sstevel@tonic-gate get_segkmem_lpsize(size_t lpsize) 10630Sstevel@tonic-gate { 10640Sstevel@tonic-gate size_t memtotal = physmem * PAGESIZE; 10652251Selowe size_t mmusz; 10662251Selowe uint_t szc; 10670Sstevel@tonic-gate 10680Sstevel@tonic-gate if (memtotal < segkmem_lpminphysmem) 10690Sstevel@tonic-gate return (PAGESIZE); 10700Sstevel@tonic-gate 10710Sstevel@tonic-gate if (plat_lpkmem_is_supported != NULL && 10720Sstevel@tonic-gate plat_lpkmem_is_supported() == 0) 10730Sstevel@tonic-gate return (PAGESIZE); 10740Sstevel@tonic-gate 10752251Selowe mmusz = mmu_get_kernel_lpsize(lpsize); 10762251Selowe szc = page_szc(mmusz); 10772251Selowe 10782251Selowe while (szc) { 10792251Selowe if (!(disable_large_pages & (1 << szc))) 10802251Selowe return (page_get_pagesize(szc)); 10812251Selowe szc--; 10822251Selowe } 10832251Selowe return (PAGESIZE); 10840Sstevel@tonic-gate } 1085