10Sstevel@tonic-gate /*
20Sstevel@tonic-gate * CDDL HEADER START
30Sstevel@tonic-gate *
40Sstevel@tonic-gate * The contents of this file are subject to the terms of the
52251Selowe * Common Development and Distribution License (the "License").
62251Selowe * You may not use this file except in compliance with the License.
70Sstevel@tonic-gate *
80Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing.
100Sstevel@tonic-gate * See the License for the specific language governing permissions
110Sstevel@tonic-gate * and limitations under the License.
120Sstevel@tonic-gate *
130Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each
140Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the
160Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying
170Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner]
180Sstevel@tonic-gate *
190Sstevel@tonic-gate * CDDL HEADER END
200Sstevel@tonic-gate */
210Sstevel@tonic-gate /*
22*12293SJames.McPherson@Sun.COM * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23*12293SJames.McPherson@Sun.COM * Use is subject to license terms.
240Sstevel@tonic-gate */
250Sstevel@tonic-gate
260Sstevel@tonic-gate /*
270Sstevel@tonic-gate * UNIX machine dependent virtual memory support.
280Sstevel@tonic-gate */
290Sstevel@tonic-gate
300Sstevel@tonic-gate #include <sys/vm.h>
310Sstevel@tonic-gate #include <sys/exec.h>
320Sstevel@tonic-gate
330Sstevel@tonic-gate #include <sys/exechdr.h>
340Sstevel@tonic-gate #include <vm/seg_kmem.h>
350Sstevel@tonic-gate #include <sys/atomic.h>
360Sstevel@tonic-gate #include <sys/archsystm.h>
370Sstevel@tonic-gate #include <sys/machsystm.h>
380Sstevel@tonic-gate #include <sys/kdi.h>
390Sstevel@tonic-gate #include <sys/cpu_module.h>
400Sstevel@tonic-gate
410Sstevel@tonic-gate #include <vm/hat_sfmmu.h>
420Sstevel@tonic-gate
430Sstevel@tonic-gate #include <sys/memnode.h>
440Sstevel@tonic-gate
450Sstevel@tonic-gate #include <sys/mem_config.h>
460Sstevel@tonic-gate #include <sys/mem_cage.h>
470Sstevel@tonic-gate #include <vm/vm_dep.h>
482961Sdp78419 #include <vm/page.h>
490Sstevel@tonic-gate #include <sys/platform_module.h>
500Sstevel@tonic-gate
510Sstevel@tonic-gate /*
520Sstevel@tonic-gate * These variables are set by module specific config routines.
534528Spaulsan * They are only set by modules which will use physical cache page coloring.
540Sstevel@tonic-gate */
550Sstevel@tonic-gate int do_pg_coloring = 0;
560Sstevel@tonic-gate
570Sstevel@tonic-gate /*
580Sstevel@tonic-gate * These variables can be conveniently patched at kernel load time to
594528Spaulsan * prevent do_pg_coloring from being enabled by
600Sstevel@tonic-gate * module specific config routines.
610Sstevel@tonic-gate */
620Sstevel@tonic-gate
630Sstevel@tonic-gate int use_page_coloring = 1;
640Sstevel@tonic-gate
650Sstevel@tonic-gate /*
660Sstevel@tonic-gate * initialized by page_coloring_init()
670Sstevel@tonic-gate */
680Sstevel@tonic-gate extern uint_t page_colors;
690Sstevel@tonic-gate extern uint_t page_colors_mask;
700Sstevel@tonic-gate extern uint_t page_coloring_shift;
710Sstevel@tonic-gate int cpu_page_colors;
720Sstevel@tonic-gate uint_t vac_colors = 0;
730Sstevel@tonic-gate uint_t vac_colors_mask = 0;
740Sstevel@tonic-gate
752961Sdp78419 /* cpu specific coloring initialization */
762961Sdp78419 extern void page_coloring_init_cpu();
772961Sdp78419 #pragma weak page_coloring_init_cpu
782961Sdp78419
790Sstevel@tonic-gate /*
800Sstevel@tonic-gate * get the ecache setsize for the current cpu.
810Sstevel@tonic-gate */
820Sstevel@tonic-gate #define CPUSETSIZE() (cpunodes[CPU->cpu_id].ecache_setsize)
830Sstevel@tonic-gate
840Sstevel@tonic-gate plcnt_t plcnt; /* page list count */
850Sstevel@tonic-gate
860Sstevel@tonic-gate /*
870Sstevel@tonic-gate * This variable is set by the cpu module to contain the lowest
880Sstevel@tonic-gate * address not affected by the SF_ERRATA_57 workaround. It should
890Sstevel@tonic-gate * remain 0 if the workaround is not needed.
900Sstevel@tonic-gate */
910Sstevel@tonic-gate #if defined(SF_ERRATA_57)
920Sstevel@tonic-gate caddr_t errata57_limit;
930Sstevel@tonic-gate #endif
940Sstevel@tonic-gate
950Sstevel@tonic-gate extern void page_relocate_hash(page_t *, page_t *);
960Sstevel@tonic-gate
970Sstevel@tonic-gate /*
980Sstevel@tonic-gate * these must be defined in platform specific areas
990Sstevel@tonic-gate */
1000Sstevel@tonic-gate extern void map_addr_proc(caddr_t *, size_t, offset_t, int, caddr_t,
1010Sstevel@tonic-gate struct proc *, uint_t);
1020Sstevel@tonic-gate extern page_t *page_get_freelist(struct vnode *, u_offset_t, struct seg *,
1030Sstevel@tonic-gate caddr_t, size_t, uint_t, struct lgrp *);
1040Sstevel@tonic-gate /*
1050Sstevel@tonic-gate * Convert page frame number to an OBMEM page frame number
1060Sstevel@tonic-gate * (i.e. put in the type bits -- zero for this implementation)
1070Sstevel@tonic-gate */
1080Sstevel@tonic-gate pfn_t
impl_obmem_pfnum(pfn_t pf)1090Sstevel@tonic-gate impl_obmem_pfnum(pfn_t pf)
1100Sstevel@tonic-gate {
1110Sstevel@tonic-gate return (pf);
1120Sstevel@tonic-gate }
1130Sstevel@tonic-gate
1140Sstevel@tonic-gate /*
1150Sstevel@tonic-gate * Use physmax to determine the highest physical page of DRAM memory
1160Sstevel@tonic-gate * It is assumed that any physical addresses above physmax is in IO space.
1170Sstevel@tonic-gate * We don't bother checking the low end because we assume that memory space
1180Sstevel@tonic-gate * begins at physical page frame 0.
1190Sstevel@tonic-gate *
1200Sstevel@tonic-gate * Return 1 if the page frame is onboard DRAM memory, else 0.
1210Sstevel@tonic-gate * Returns 0 for nvram so it won't be cached.
1220Sstevel@tonic-gate */
1230Sstevel@tonic-gate int
pf_is_memory(pfn_t pf)1240Sstevel@tonic-gate pf_is_memory(pfn_t pf)
1250Sstevel@tonic-gate {
1260Sstevel@tonic-gate /* We must be IO space */
1270Sstevel@tonic-gate if (pf > physmax)
1280Sstevel@tonic-gate return (0);
1290Sstevel@tonic-gate
1300Sstevel@tonic-gate /* We must be memory space */
1310Sstevel@tonic-gate return (1);
1320Sstevel@tonic-gate }
1330Sstevel@tonic-gate
1340Sstevel@tonic-gate /*
1350Sstevel@tonic-gate * Handle a pagefault.
1360Sstevel@tonic-gate */
1370Sstevel@tonic-gate faultcode_t
pagefault(caddr_t addr,enum fault_type type,enum seg_rw rw,int iskernel)1380Sstevel@tonic-gate pagefault(caddr_t addr, enum fault_type type, enum seg_rw rw, int iskernel)
1390Sstevel@tonic-gate {
1400Sstevel@tonic-gate struct as *as;
1410Sstevel@tonic-gate struct proc *p;
1420Sstevel@tonic-gate faultcode_t res;
1430Sstevel@tonic-gate caddr_t base;
1440Sstevel@tonic-gate size_t len;
1450Sstevel@tonic-gate int err;
1460Sstevel@tonic-gate
1470Sstevel@tonic-gate if (INVALID_VADDR(addr))
1480Sstevel@tonic-gate return (FC_NOMAP);
1490Sstevel@tonic-gate
1500Sstevel@tonic-gate if (iskernel) {
1510Sstevel@tonic-gate as = &kas;
1520Sstevel@tonic-gate } else {
1530Sstevel@tonic-gate p = curproc;
1540Sstevel@tonic-gate as = p->p_as;
1550Sstevel@tonic-gate #if defined(SF_ERRATA_57)
1560Sstevel@tonic-gate /*
1570Sstevel@tonic-gate * Prevent infinite loops due to a segment driver
1580Sstevel@tonic-gate * setting the execute permissions and the sfmmu hat
1590Sstevel@tonic-gate * silently ignoring them.
1600Sstevel@tonic-gate */
1610Sstevel@tonic-gate if (rw == S_EXEC && AS_TYPE_64BIT(as) &&
1620Sstevel@tonic-gate addr < errata57_limit) {
1630Sstevel@tonic-gate res = FC_NOMAP;
1640Sstevel@tonic-gate goto out;
1650Sstevel@tonic-gate }
1660Sstevel@tonic-gate #endif
1670Sstevel@tonic-gate }
1680Sstevel@tonic-gate
1690Sstevel@tonic-gate /*
1700Sstevel@tonic-gate * Dispatch pagefault.
1710Sstevel@tonic-gate */
1720Sstevel@tonic-gate res = as_fault(as->a_hat, as, addr, 1, type, rw);
1730Sstevel@tonic-gate
1740Sstevel@tonic-gate /*
1750Sstevel@tonic-gate * If this isn't a potential unmapped hole in the user's
1760Sstevel@tonic-gate * UNIX data or stack segments, just return status info.
1770Sstevel@tonic-gate */
1780Sstevel@tonic-gate if (!(res == FC_NOMAP && iskernel == 0))
1790Sstevel@tonic-gate goto out;
1800Sstevel@tonic-gate
1810Sstevel@tonic-gate /*
1820Sstevel@tonic-gate * Check to see if we happened to faulted on a currently unmapped
1830Sstevel@tonic-gate * part of the UNIX data or stack segments. If so, create a zfod
1840Sstevel@tonic-gate * mapping there and then try calling the fault routine again.
1850Sstevel@tonic-gate */
1860Sstevel@tonic-gate base = p->p_brkbase;
1870Sstevel@tonic-gate len = p->p_brksize;
1880Sstevel@tonic-gate
1890Sstevel@tonic-gate if (addr < base || addr >= base + len) { /* data seg? */
1900Sstevel@tonic-gate base = (caddr_t)(p->p_usrstack - p->p_stksize);
1910Sstevel@tonic-gate len = p->p_stksize;
1920Sstevel@tonic-gate if (addr < base || addr >= p->p_usrstack) { /* stack seg? */
1930Sstevel@tonic-gate /* not in either UNIX data or stack segments */
1940Sstevel@tonic-gate res = FC_NOMAP;
1950Sstevel@tonic-gate goto out;
1960Sstevel@tonic-gate }
1970Sstevel@tonic-gate }
1980Sstevel@tonic-gate
1990Sstevel@tonic-gate /* the rest of this function implements a 3.X 4.X 5.X compatibility */
2000Sstevel@tonic-gate /* This code is probably not needed anymore */
2010Sstevel@tonic-gate
2020Sstevel@tonic-gate /* expand the gap to the page boundaries on each side */
2030Sstevel@tonic-gate len = (((uintptr_t)base + len + PAGEOFFSET) & PAGEMASK) -
2040Sstevel@tonic-gate ((uintptr_t)base & PAGEMASK);
2050Sstevel@tonic-gate base = (caddr_t)((uintptr_t)base & PAGEMASK);
2060Sstevel@tonic-gate
2070Sstevel@tonic-gate as_rangelock(as);
2080Sstevel@tonic-gate as_purge(as);
2090Sstevel@tonic-gate if (as_gap(as, PAGESIZE, &base, &len, AH_CONTAIN, addr) == 0) {
2100Sstevel@tonic-gate err = as_map(as, base, len, segvn_create, zfod_argsp);
2110Sstevel@tonic-gate as_rangeunlock(as);
2120Sstevel@tonic-gate if (err) {
2130Sstevel@tonic-gate res = FC_MAKE_ERR(err);
2140Sstevel@tonic-gate goto out;
2150Sstevel@tonic-gate }
2160Sstevel@tonic-gate } else {
2170Sstevel@tonic-gate /*
2180Sstevel@tonic-gate * This page is already mapped by another thread after we
2190Sstevel@tonic-gate * returned from as_fault() above. We just fallthrough
2200Sstevel@tonic-gate * as_fault() below.
2210Sstevel@tonic-gate */
2220Sstevel@tonic-gate as_rangeunlock(as);
2230Sstevel@tonic-gate }
2240Sstevel@tonic-gate
2250Sstevel@tonic-gate res = as_fault(as->a_hat, as, addr, 1, F_INVAL, rw);
2260Sstevel@tonic-gate
2270Sstevel@tonic-gate out:
2280Sstevel@tonic-gate
2290Sstevel@tonic-gate return (res);
2300Sstevel@tonic-gate }
2310Sstevel@tonic-gate
2320Sstevel@tonic-gate /*
2330Sstevel@tonic-gate * This is the routine which defines the address limit implied
2340Sstevel@tonic-gate * by the flag '_MAP_LOW32'. USERLIMIT32 matches the highest
2350Sstevel@tonic-gate * mappable address in a 32-bit process on this platform (though
2360Sstevel@tonic-gate * perhaps we should make it be UINT32_MAX here?)
2370Sstevel@tonic-gate */
2380Sstevel@tonic-gate void
map_addr(caddr_t * addrp,size_t len,offset_t off,int vacalign,uint_t flags)2390Sstevel@tonic-gate map_addr(caddr_t *addrp, size_t len, offset_t off, int vacalign, uint_t flags)
2400Sstevel@tonic-gate {
2410Sstevel@tonic-gate struct proc *p = curproc;
2420Sstevel@tonic-gate caddr_t userlimit = flags & _MAP_LOW32 ?
2435648Ssetje (caddr_t)USERLIMIT32 : p->p_as->a_userlimit;
2440Sstevel@tonic-gate map_addr_proc(addrp, len, off, vacalign, userlimit, p, flags);
2450Sstevel@tonic-gate }
2460Sstevel@tonic-gate
2470Sstevel@tonic-gate /*
2480Sstevel@tonic-gate * Some V9 CPUs have holes in the middle of the 64-bit virtual address range.
2490Sstevel@tonic-gate */
2500Sstevel@tonic-gate caddr_t hole_start, hole_end;
2510Sstevel@tonic-gate
2520Sstevel@tonic-gate /*
2530Sstevel@tonic-gate * kpm mapping window
2540Sstevel@tonic-gate */
2550Sstevel@tonic-gate caddr_t kpm_vbase;
2560Sstevel@tonic-gate size_t kpm_size;
2570Sstevel@tonic-gate uchar_t kpm_size_shift;
2580Sstevel@tonic-gate
2595668Smec int valid_va_range_aligned_wraparound;
2600Sstevel@tonic-gate /*
2615668Smec * Determine whether [*basep, *basep + *lenp) contains a mappable range of
2625668Smec * addresses at least "minlen" long, where the base of the range is at "off"
2635668Smec * phase from an "align" boundary and there is space for a "redzone"-sized
2645668Smec * redzone on either side of the range. On success, 1 is returned and *basep
2655668Smec * and *lenp are adjusted to describe the acceptable range (including
2665668Smec * the redzone). On failure, 0 is returned.
2670Sstevel@tonic-gate */
2680Sstevel@tonic-gate int
valid_va_range_aligned(caddr_t * basep,size_t * lenp,size_t minlen,int dir,size_t align,size_t redzone,size_t off)2695668Smec valid_va_range_aligned(caddr_t *basep, size_t *lenp, size_t minlen, int dir,
2705668Smec size_t align, size_t redzone, size_t off)
2710Sstevel@tonic-gate {
2720Sstevel@tonic-gate caddr_t hi, lo;
2735668Smec size_t tot_len;
2745668Smec
2755668Smec ASSERT(align == 0 ? off == 0 : off < align);
2765668Smec ASSERT(ISP2(align));
2775668Smec ASSERT(align == 0 || align >= PAGESIZE);
2780Sstevel@tonic-gate
2790Sstevel@tonic-gate lo = *basep;
2800Sstevel@tonic-gate hi = lo + *lenp;
2815668Smec tot_len = minlen + 2 * redzone; /* need at least this much space */
2820Sstevel@tonic-gate
2835668Smec /* If hi rolled over the top try cutting back. */
2845668Smec if (hi < lo) {
2855668Smec *lenp = 0UL - (uintptr_t)lo - 1UL;
2865668Smec /* Trying to see if this really happens, and then if so, why */
2875668Smec valid_va_range_aligned_wraparound++;
2885668Smec hi = lo + *lenp;
2895668Smec }
2905668Smec if (*lenp < tot_len) {
2910Sstevel@tonic-gate return (0);
2925668Smec }
2930Sstevel@tonic-gate
2940Sstevel@tonic-gate /*
2950Sstevel@tonic-gate * Deal with a possible hole in the address range between
2960Sstevel@tonic-gate * hole_start and hole_end that should never be mapped by the MMU.
2970Sstevel@tonic-gate */
2980Sstevel@tonic-gate
2990Sstevel@tonic-gate if (lo < hole_start) {
3000Sstevel@tonic-gate if (hi > hole_start)
3010Sstevel@tonic-gate if (hi < hole_end)
3020Sstevel@tonic-gate hi = hole_start;
3030Sstevel@tonic-gate else
3040Sstevel@tonic-gate /* lo < hole_start && hi >= hole_end */
3050Sstevel@tonic-gate if (dir == AH_LO) {
3060Sstevel@tonic-gate /*
3070Sstevel@tonic-gate * prefer lowest range
3080Sstevel@tonic-gate */
3095668Smec if (hole_start - lo >= tot_len)
3100Sstevel@tonic-gate hi = hole_start;
3115668Smec else if (hi - hole_end >= tot_len)
3120Sstevel@tonic-gate lo = hole_end;
3130Sstevel@tonic-gate else
3140Sstevel@tonic-gate return (0);
3150Sstevel@tonic-gate } else {
3160Sstevel@tonic-gate /*
3170Sstevel@tonic-gate * prefer highest range
3180Sstevel@tonic-gate */
3195668Smec if (hi - hole_end >= tot_len)
3200Sstevel@tonic-gate lo = hole_end;
3215668Smec else if (hole_start - lo >= tot_len)
3220Sstevel@tonic-gate hi = hole_start;
3230Sstevel@tonic-gate else
3240Sstevel@tonic-gate return (0);
3250Sstevel@tonic-gate }
3260Sstevel@tonic-gate } else {
3270Sstevel@tonic-gate /* lo >= hole_start */
3280Sstevel@tonic-gate if (hi < hole_end)
3290Sstevel@tonic-gate return (0);
3300Sstevel@tonic-gate if (lo < hole_end)
3310Sstevel@tonic-gate lo = hole_end;
3320Sstevel@tonic-gate }
3330Sstevel@tonic-gate
3345668Smec /* Check if remaining length is too small */
3355668Smec if (hi - lo < tot_len) {
3360Sstevel@tonic-gate return (0);
3375668Smec }
3385668Smec if (align > 1) {
3395668Smec caddr_t tlo = lo + redzone;
3405668Smec caddr_t thi = hi - redzone;
3415668Smec tlo = (caddr_t)P2PHASEUP((uintptr_t)tlo, align, off);
3425668Smec if (tlo < lo + redzone) {
3435668Smec return (0);
3445668Smec }
3455668Smec if (thi < tlo || thi - tlo < minlen) {
3465668Smec return (0);
3475668Smec }
3485668Smec }
3490Sstevel@tonic-gate *basep = lo;
3500Sstevel@tonic-gate *lenp = hi - lo;
3515668Smec return (1);
3525668Smec }
3530Sstevel@tonic-gate
3545668Smec /*
3555668Smec * Determine whether [*basep, *basep + *lenp) contains a mappable range of
3565668Smec * addresses at least "minlen" long. On success, 1 is returned and *basep
3575668Smec * and *lenp are adjusted to describe the acceptable range. On failure, 0
3585668Smec * is returned.
3595668Smec */
3605668Smec int
valid_va_range(caddr_t * basep,size_t * lenp,size_t minlen,int dir)3615668Smec valid_va_range(caddr_t *basep, size_t *lenp, size_t minlen, int dir)
3625668Smec {
3635668Smec return (valid_va_range_aligned(basep, lenp, minlen, dir, 0, 0, 0));
3640Sstevel@tonic-gate }
3650Sstevel@tonic-gate
3660Sstevel@tonic-gate /*
3670Sstevel@tonic-gate * Determine whether [addr, addr+len] with protections `prot' are valid
3680Sstevel@tonic-gate * for a user address space.
3690Sstevel@tonic-gate */
3700Sstevel@tonic-gate /*ARGSUSED*/
3710Sstevel@tonic-gate int
valid_usr_range(caddr_t addr,size_t len,uint_t prot,struct as * as,caddr_t userlimit)3720Sstevel@tonic-gate valid_usr_range(caddr_t addr, size_t len, uint_t prot, struct as *as,
3730Sstevel@tonic-gate caddr_t userlimit)
3740Sstevel@tonic-gate {
3750Sstevel@tonic-gate caddr_t eaddr = addr + len;
3760Sstevel@tonic-gate
3770Sstevel@tonic-gate if (eaddr <= addr || addr >= userlimit || eaddr > userlimit)
3780Sstevel@tonic-gate return (RANGE_BADADDR);
3790Sstevel@tonic-gate
3800Sstevel@tonic-gate /*
3810Sstevel@tonic-gate * Determine if the address range falls within an illegal
3820Sstevel@tonic-gate * range of the MMU.
3830Sstevel@tonic-gate */
3840Sstevel@tonic-gate if (eaddr > hole_start && addr < hole_end)
3850Sstevel@tonic-gate return (RANGE_BADADDR);
3860Sstevel@tonic-gate
3870Sstevel@tonic-gate #if defined(SF_ERRATA_57)
3880Sstevel@tonic-gate /*
3890Sstevel@tonic-gate * Make sure USERLIMIT isn't raised too high
3900Sstevel@tonic-gate */
3910Sstevel@tonic-gate ASSERT64(addr <= (caddr_t)0xffffffff80000000ul ||
3920Sstevel@tonic-gate errata57_limit == 0);
3930Sstevel@tonic-gate
3940Sstevel@tonic-gate if (AS_TYPE_64BIT(as) &&
3950Sstevel@tonic-gate (addr < errata57_limit) &&
3960Sstevel@tonic-gate (prot & PROT_EXEC))
3970Sstevel@tonic-gate return (RANGE_BADPROT);
3980Sstevel@tonic-gate #endif /* SF_ERRATA57 */
3990Sstevel@tonic-gate return (RANGE_OKAY);
4000Sstevel@tonic-gate }
4010Sstevel@tonic-gate
4020Sstevel@tonic-gate /*
4030Sstevel@tonic-gate * Routine used to check to see if an a.out can be executed
4040Sstevel@tonic-gate * by the current machine/architecture.
4050Sstevel@tonic-gate */
4060Sstevel@tonic-gate int
chkaout(struct exdata * exp)4070Sstevel@tonic-gate chkaout(struct exdata *exp)
4080Sstevel@tonic-gate {
4090Sstevel@tonic-gate if (exp->ux_mach == M_SPARC)
4100Sstevel@tonic-gate return (0);
4110Sstevel@tonic-gate else
4120Sstevel@tonic-gate return (ENOEXEC);
4130Sstevel@tonic-gate }
4140Sstevel@tonic-gate
4150Sstevel@tonic-gate /*
4160Sstevel@tonic-gate * The following functions return information about an a.out
4170Sstevel@tonic-gate * which is used when a program is executed.
4180Sstevel@tonic-gate */
4190Sstevel@tonic-gate
4200Sstevel@tonic-gate /*
4210Sstevel@tonic-gate * Return the load memory address for the data segment.
4220Sstevel@tonic-gate */
4230Sstevel@tonic-gate caddr_t
getdmem(struct exec * exp)4240Sstevel@tonic-gate getdmem(struct exec *exp)
4250Sstevel@tonic-gate {
4260Sstevel@tonic-gate /*
4270Sstevel@tonic-gate * XXX - Sparc Reference Hack approaching
4280Sstevel@tonic-gate * Remember that we are loading
4290Sstevel@tonic-gate * 8k executables into a 4k machine
4300Sstevel@tonic-gate * DATA_ALIGN == 2 * PAGESIZE
4310Sstevel@tonic-gate */
4320Sstevel@tonic-gate if (exp->a_text)
4330Sstevel@tonic-gate return ((caddr_t)(roundup(USRTEXT + exp->a_text, DATA_ALIGN)));
4340Sstevel@tonic-gate else
4350Sstevel@tonic-gate return ((caddr_t)USRTEXT);
4360Sstevel@tonic-gate }
4370Sstevel@tonic-gate
4380Sstevel@tonic-gate /*
4390Sstevel@tonic-gate * Return the starting disk address for the data segment.
4400Sstevel@tonic-gate */
4410Sstevel@tonic-gate ulong_t
getdfile(struct exec * exp)4420Sstevel@tonic-gate getdfile(struct exec *exp)
4430Sstevel@tonic-gate {
4440Sstevel@tonic-gate if (exp->a_magic == ZMAGIC)
4450Sstevel@tonic-gate return (exp->a_text);
4460Sstevel@tonic-gate else
4470Sstevel@tonic-gate return (sizeof (struct exec) + exp->a_text);
4480Sstevel@tonic-gate }
4490Sstevel@tonic-gate
4500Sstevel@tonic-gate /*
4510Sstevel@tonic-gate * Return the load memory address for the text segment.
4520Sstevel@tonic-gate */
4530Sstevel@tonic-gate
4540Sstevel@tonic-gate /*ARGSUSED*/
4550Sstevel@tonic-gate caddr_t
gettmem(struct exec * exp)4560Sstevel@tonic-gate gettmem(struct exec *exp)
4570Sstevel@tonic-gate {
4580Sstevel@tonic-gate return ((caddr_t)USRTEXT);
4590Sstevel@tonic-gate }
4600Sstevel@tonic-gate
4610Sstevel@tonic-gate /*
4620Sstevel@tonic-gate * Return the file byte offset for the text segment.
4630Sstevel@tonic-gate */
4640Sstevel@tonic-gate uint_t
gettfile(struct exec * exp)4650Sstevel@tonic-gate gettfile(struct exec *exp)
4660Sstevel@tonic-gate {
4670Sstevel@tonic-gate if (exp->a_magic == ZMAGIC)
4680Sstevel@tonic-gate return (0);
4690Sstevel@tonic-gate else
4700Sstevel@tonic-gate return (sizeof (struct exec));
4710Sstevel@tonic-gate }
4720Sstevel@tonic-gate
4730Sstevel@tonic-gate void
getexinfo(struct exdata * edp_in,struct exdata * edp_out,int * pagetext,int * pagedata)4740Sstevel@tonic-gate getexinfo(
4750Sstevel@tonic-gate struct exdata *edp_in,
4760Sstevel@tonic-gate struct exdata *edp_out,
4770Sstevel@tonic-gate int *pagetext,
4780Sstevel@tonic-gate int *pagedata)
4790Sstevel@tonic-gate {
4800Sstevel@tonic-gate *edp_out = *edp_in; /* structure copy */
4810Sstevel@tonic-gate
4820Sstevel@tonic-gate if ((edp_in->ux_mag == ZMAGIC) &&
4830Sstevel@tonic-gate ((edp_in->vp->v_flag & VNOMAP) == 0)) {
4840Sstevel@tonic-gate *pagetext = 1;
4850Sstevel@tonic-gate *pagedata = 1;
4860Sstevel@tonic-gate } else {
4870Sstevel@tonic-gate *pagetext = 0;
4880Sstevel@tonic-gate *pagedata = 0;
4890Sstevel@tonic-gate }
4900Sstevel@tonic-gate }
4910Sstevel@tonic-gate
4922991Ssusans /*
4932991Ssusans * Return non 0 value if the address may cause a VAC alias with KPM mappings.
4942991Ssusans * KPM selects an address such that it's equal offset modulo shm_alignment and
4952991Ssusans * assumes it can't be in VAC conflict with any larger than PAGESIZE mapping.
4962991Ssusans */
4972991Ssusans int
map_addr_vacalign_check(caddr_t addr,u_offset_t off)4982991Ssusans map_addr_vacalign_check(caddr_t addr, u_offset_t off)
4992991Ssusans {
5002991Ssusans if (vac) {
5012991Ssusans return (((uintptr_t)addr ^ off) & shm_alignment - 1);
5022991Ssusans } else {
5032991Ssusans return (0);
5040Sstevel@tonic-gate }
5052991Ssusans }
5060Sstevel@tonic-gate
5072991Ssusans /*
5082991Ssusans * Sanity control. Don't use large pages regardless of user
5092991Ssusans * settings if there's less than priv or shm_lpg_min_physmem memory installed.
5102991Ssusans * The units for this variable is 8K pages.
5112991Ssusans */
5122991Ssusans pgcnt_t shm_lpg_min_physmem = 131072; /* 1GB */
5132991Ssusans pgcnt_t privm_lpg_min_physmem = 131072; /* 1GB */
5140Sstevel@tonic-gate
5152659Ssusans static size_t
map_pgszheap(struct proc * p,caddr_t addr,size_t len)5160Sstevel@tonic-gate map_pgszheap(struct proc *p, caddr_t addr, size_t len)
5170Sstevel@tonic-gate {
5182991Ssusans size_t pgsz = MMU_PAGESIZE;
5192991Ssusans int szc;
5200Sstevel@tonic-gate
5210Sstevel@tonic-gate /*
5220Sstevel@tonic-gate * If len is zero, retrieve from proc and don't demote the page size.
5232991Ssusans * Use atleast the default pagesize.
5240Sstevel@tonic-gate */
5250Sstevel@tonic-gate if (len == 0) {
5262991Ssusans len = p->p_brkbase + p->p_brksize - p->p_bssbase;
5272991Ssusans }
5282991Ssusans len = MAX(len, default_uheap_lpsize);
5292991Ssusans
5302991Ssusans for (szc = mmu_page_sizes - 1; szc >= 0; szc--) {
5312991Ssusans pgsz = hw_page_array[szc].hp_size;
5322991Ssusans if ((disable_auto_data_large_pages & (1 << szc)) ||
5332991Ssusans pgsz > max_uheap_lpsize)
5342991Ssusans continue;
5352991Ssusans if (len >= pgsz) {
5362991Ssusans break;
5372991Ssusans }
5380Sstevel@tonic-gate }
5390Sstevel@tonic-gate
5400Sstevel@tonic-gate /*
5412991Ssusans * If addr == 0 we were called by memcntl() when the
5420Sstevel@tonic-gate * size code is 0. Don't set pgsz less than current size.
5430Sstevel@tonic-gate */
5440Sstevel@tonic-gate if (addr == 0 && (pgsz < hw_page_array[p->p_brkpageszc].hp_size)) {
5450Sstevel@tonic-gate pgsz = hw_page_array[p->p_brkpageszc].hp_size;
5460Sstevel@tonic-gate }
5470Sstevel@tonic-gate
5480Sstevel@tonic-gate return (pgsz);
5490Sstevel@tonic-gate }
5500Sstevel@tonic-gate
5512659Ssusans static size_t
map_pgszstk(struct proc * p,caddr_t addr,size_t len)5520Sstevel@tonic-gate map_pgszstk(struct proc *p, caddr_t addr, size_t len)
5530Sstevel@tonic-gate {
5542991Ssusans size_t pgsz = MMU_PAGESIZE;
5552991Ssusans int szc;
5560Sstevel@tonic-gate
5570Sstevel@tonic-gate /*
5580Sstevel@tonic-gate * If len is zero, retrieve from proc and don't demote the page size.
5592991Ssusans * Use atleast the default pagesize.
5600Sstevel@tonic-gate */
5610Sstevel@tonic-gate if (len == 0) {
5620Sstevel@tonic-gate len = p->p_stksize;
5630Sstevel@tonic-gate }
5642991Ssusans len = MAX(len, default_ustack_lpsize);
5650Sstevel@tonic-gate
5662991Ssusans for (szc = mmu_page_sizes - 1; szc >= 0; szc--) {
5672991Ssusans pgsz = hw_page_array[szc].hp_size;
5682991Ssusans if ((disable_auto_data_large_pages & (1 << szc)) ||
5692991Ssusans pgsz > max_ustack_lpsize)
5702991Ssusans continue;
5712991Ssusans if (len >= pgsz) {
5722991Ssusans break;
5732991Ssusans }
5740Sstevel@tonic-gate }
5750Sstevel@tonic-gate
5760Sstevel@tonic-gate /*
5770Sstevel@tonic-gate * If addr == 0 we were called by memcntl() or exec_args() when the
5780Sstevel@tonic-gate * size code is 0. Don't set pgsz less than current size.
5790Sstevel@tonic-gate */
5800Sstevel@tonic-gate if (addr == 0 && (pgsz < hw_page_array[p->p_stkpageszc].hp_size)) {
5810Sstevel@tonic-gate pgsz = hw_page_array[p->p_stkpageszc].hp_size;
5820Sstevel@tonic-gate }
5830Sstevel@tonic-gate
5840Sstevel@tonic-gate return (pgsz);
5850Sstevel@tonic-gate }
5860Sstevel@tonic-gate
5872659Ssusans static size_t
map_pgszism(caddr_t addr,size_t len)5882659Ssusans map_pgszism(caddr_t addr, size_t len)
5892659Ssusans {
5902659Ssusans uint_t szc;
5912659Ssusans size_t pgsz;
5922659Ssusans
5932659Ssusans for (szc = mmu_page_sizes - 1; szc >= TTE4M; szc--) {
5942659Ssusans if (disable_ism_large_pages & (1 << szc))
5952659Ssusans continue;
5962659Ssusans
5972659Ssusans pgsz = hw_page_array[szc].hp_size;
5982659Ssusans if ((len >= pgsz) && IS_P2ALIGNED(addr, pgsz))
5992659Ssusans return (pgsz);
6002659Ssusans }
6012991Ssusans
6022659Ssusans return (DEFAULT_ISM_PAGESIZE);
6032659Ssusans }
6042659Ssusans
6052659Ssusans /*
6062659Ssusans * Suggest a page size to be used to map a segment of type maptype and length
6072659Ssusans * len. Returns a page size (not a size code).
6082659Ssusans */
6092991Ssusans /* ARGSUSED */
6102659Ssusans size_t
map_pgsz(int maptype,struct proc * p,caddr_t addr,size_t len,int memcntl)6112991Ssusans map_pgsz(int maptype, struct proc *p, caddr_t addr, size_t len, int memcntl)
6122659Ssusans {
6132991Ssusans size_t pgsz = MMU_PAGESIZE;
6142991Ssusans
6152991Ssusans ASSERT(maptype != MAPPGSZ_VA);
6162659Ssusans
6172991Ssusans if (maptype != MAPPGSZ_ISM && physmem < privm_lpg_min_physmem) {
6182991Ssusans return (MMU_PAGESIZE);
6192991Ssusans }
6202659Ssusans
6212659Ssusans switch (maptype) {
6222659Ssusans case MAPPGSZ_ISM:
6232659Ssusans pgsz = map_pgszism(addr, len);
6242659Ssusans break;
6252659Ssusans
6262659Ssusans case MAPPGSZ_STK:
6272991Ssusans if (max_ustack_lpsize > MMU_PAGESIZE) {
6282991Ssusans pgsz = map_pgszstk(p, addr, len);
6292991Ssusans }
6302659Ssusans break;
6312659Ssusans
6322659Ssusans case MAPPGSZ_HEAP:
6332991Ssusans if (max_uheap_lpsize > MMU_PAGESIZE) {
6342991Ssusans pgsz = map_pgszheap(p, addr, len);
6352991Ssusans }
6362659Ssusans break;
6372659Ssusans }
6382659Ssusans return (pgsz);
6392659Ssusans }
6400Sstevel@tonic-gate
6410Sstevel@tonic-gate
6420Sstevel@tonic-gate /* assumes TTE8K...TTE4M == szc */
6430Sstevel@tonic-gate
6440Sstevel@tonic-gate static uint_t
map_szcvec(caddr_t addr,size_t size,uintptr_t off,int disable_lpgs,size_t max_lpsize,size_t min_physmem)6452991Ssusans map_szcvec(caddr_t addr, size_t size, uintptr_t off, int disable_lpgs,
6462991Ssusans size_t max_lpsize, size_t min_physmem)
6472414Saguzovsk {
6482414Saguzovsk caddr_t eaddr = addr + size;
6492414Saguzovsk uint_t szcvec = 0;
6502414Saguzovsk caddr_t raddr;
6512414Saguzovsk caddr_t readdr;
6522414Saguzovsk size_t pgsz;
6532991Ssusans int i;
6542414Saguzovsk
6552991Ssusans if (physmem < min_physmem || max_lpsize <= MMU_PAGESIZE) {
6562414Saguzovsk return (0);
6572414Saguzovsk }
6582414Saguzovsk for (i = mmu_page_sizes - 1; i > 0; i--) {
6592991Ssusans if (disable_lpgs & (1 << i)) {
6602414Saguzovsk continue;
6612414Saguzovsk }
6622414Saguzovsk pgsz = page_get_pagesize(i);
6632991Ssusans if (pgsz > max_lpsize) {
6642414Saguzovsk continue;
6652414Saguzovsk }
6662414Saguzovsk raddr = (caddr_t)P2ROUNDUP((uintptr_t)addr, pgsz);
6672414Saguzovsk readdr = (caddr_t)P2ALIGN((uintptr_t)eaddr, pgsz);
6682414Saguzovsk if (raddr < addr || raddr >= readdr) {
6692414Saguzovsk continue;
6702414Saguzovsk }
6712414Saguzovsk if (P2PHASE((uintptr_t)addr ^ off, pgsz)) {
6722414Saguzovsk continue;
6732414Saguzovsk }
6742414Saguzovsk szcvec |= (1 << i);
6752414Saguzovsk /*
6762414Saguzovsk * And or in the remaining enabled page sizes.
6772414Saguzovsk */
6782991Ssusans szcvec |= P2PHASE(~disable_lpgs, (1 << i));
6792414Saguzovsk szcvec &= ~1; /* no need to return 8K pagesize */
6802414Saguzovsk break;
6812414Saguzovsk }
6822414Saguzovsk return (szcvec);
6832414Saguzovsk }
6842414Saguzovsk
6850Sstevel@tonic-gate /*
6862991Ssusans * Return a bit vector of large page size codes that
6872991Ssusans * can be used to map [addr, addr + len) region.
6882991Ssusans */
6892991Ssusans /* ARGSUSED */
6902991Ssusans uint_t
map_pgszcvec(caddr_t addr,size_t size,uintptr_t off,int flags,int type,int memcntl)6912991Ssusans map_pgszcvec(caddr_t addr, size_t size, uintptr_t off, int flags, int type,
6922991Ssusans int memcntl)
6932991Ssusans {
6942991Ssusans if (flags & MAP_TEXT) {
6955648Ssetje return (map_szcvec(addr, size, off,
6965648Ssetje disable_auto_text_large_pages,
6972991Ssusans max_utext_lpsize, shm_lpg_min_physmem));
6982991Ssusans
6992991Ssusans } else if (flags & MAP_INITDATA) {
7005648Ssetje return (map_szcvec(addr, size, off,
7015648Ssetje disable_auto_data_large_pages,
7022991Ssusans max_uidata_lpsize, privm_lpg_min_physmem));
7032991Ssusans
7042991Ssusans } else if (type == MAPPGSZC_SHM) {
7055648Ssetje return (map_szcvec(addr, size, off,
7065648Ssetje disable_auto_data_large_pages,
7072991Ssusans max_shm_lpsize, shm_lpg_min_physmem));
7082991Ssusans
7092991Ssusans } else if (type == MAPPGSZC_HEAP) {
7105648Ssetje return (map_szcvec(addr, size, off,
7115648Ssetje disable_auto_data_large_pages,
7122991Ssusans max_uheap_lpsize, privm_lpg_min_physmem));
7132991Ssusans
7142991Ssusans } else if (type == MAPPGSZC_STACK) {
7155648Ssetje return (map_szcvec(addr, size, off,
7165648Ssetje disable_auto_data_large_pages,
7172991Ssusans max_ustack_lpsize, privm_lpg_min_physmem));
7182991Ssusans
7192991Ssusans } else {
7205648Ssetje return (map_szcvec(addr, size, off,
7215648Ssetje disable_auto_data_large_pages,
7222991Ssusans max_privmap_lpsize, privm_lpg_min_physmem));
7232991Ssusans }
7242991Ssusans }
7252991Ssusans
7262991Ssusans /*
727*12293SJames.McPherson@Sun.COM * Anchored in the table below are counters used to keep track
728*12293SJames.McPherson@Sun.COM * of free contiguous physical memory. Each element of the table contains
729*12293SJames.McPherson@Sun.COM * the array of counters, the size of array which is allocated during
730*12293SJames.McPherson@Sun.COM * startup based on physmax and a shift value used to convert a pagenum
731*12293SJames.McPherson@Sun.COM * into a counter array index or vice versa. The table has page size
732*12293SJames.McPherson@Sun.COM * for rows and region size for columns:
733*12293SJames.McPherson@Sun.COM *
734*12293SJames.McPherson@Sun.COM * page_counters[page_size][region_size]
735*12293SJames.McPherson@Sun.COM *
736*12293SJames.McPherson@Sun.COM * page_size: TTE size code of pages on page_size freelist.
737*12293SJames.McPherson@Sun.COM *
738*12293SJames.McPherson@Sun.COM * region_size: TTE size code of a candidate larger page made up
739*12293SJames.McPherson@Sun.COM * made up of contiguous free page_size pages.
740*12293SJames.McPherson@Sun.COM *
741*12293SJames.McPherson@Sun.COM * As you go across a page_size row increasing region_size each
742*12293SJames.McPherson@Sun.COM * element keeps track of how many (region_size - 1) size groups
743*12293SJames.McPherson@Sun.COM * made up of page_size free pages can be coalesced into a
744*12293SJames.McPherson@Sun.COM * regsion_size page. Yuck! Lets try an example:
745*12293SJames.McPherson@Sun.COM *
746*12293SJames.McPherson@Sun.COM * page_counters[1][3] is the table element used for identifying
747*12293SJames.McPherson@Sun.COM * candidate 4M pages from contiguous pages off the 64K free list.
748*12293SJames.McPherson@Sun.COM * Each index in the page_counters[1][3].array spans 4M. Its the
749*12293SJames.McPherson@Sun.COM * number of free 512K size (regsion_size - 1) groups of contiguous
750*12293SJames.McPherson@Sun.COM * 64K free pages. So when page_counters[1][3].counters[n] == 8
751*12293SJames.McPherson@Sun.COM * we know we have a candidate 4M page made up of 512K size groups
752*12293SJames.McPherson@Sun.COM * of 64K free pages.
753*12293SJames.McPherson@Sun.COM */
754*12293SJames.McPherson@Sun.COM
755*12293SJames.McPherson@Sun.COM /*
756*12293SJames.McPherson@Sun.COM * Per page size free lists. 3rd (max_mem_nodes) and 4th (page coloring bins)
757*12293SJames.McPherson@Sun.COM * dimensions are allocated dynamically.
758*12293SJames.McPherson@Sun.COM */
759*12293SJames.McPherson@Sun.COM page_t ***page_freelists[MMU_PAGE_SIZES][MAX_MEM_TYPES];
760*12293SJames.McPherson@Sun.COM
761*12293SJames.McPherson@Sun.COM /*
7620Sstevel@tonic-gate * For now there is only a single size cache list.
7630Sstevel@tonic-gate * Allocated dynamically.
7640Sstevel@tonic-gate */
7650Sstevel@tonic-gate page_t ***page_cachelists[MAX_MEM_TYPES];
7660Sstevel@tonic-gate
7670Sstevel@tonic-gate kmutex_t *fpc_mutex[NPC_MUTEX];
7680Sstevel@tonic-gate kmutex_t *cpc_mutex[NPC_MUTEX];
7690Sstevel@tonic-gate
7705648Ssetje /*
7715648Ssetje * Calculate space needed for page freelists and counters
7725648Ssetje */
7735648Ssetje size_t
calc_free_pagelist_sz(void)7745648Ssetje calc_free_pagelist_sz(void)
7750Sstevel@tonic-gate {
7765648Ssetje int szc;
7775648Ssetje size_t alloc_sz, cache_sz, free_sz;
7785648Ssetje
7795648Ssetje /*
7805648Ssetje * one cachelist per color, node, and type
7815648Ssetje */
7825648Ssetje cache_sz = (page_get_pagecolors(0) * sizeof (page_t *)) +
7835648Ssetje sizeof (page_t **);
7845648Ssetje cache_sz *= max_mem_nodes * MAX_MEM_TYPES;
7850Sstevel@tonic-gate
7865648Ssetje /*
7875648Ssetje * one freelist per size, color, node, and type
7885648Ssetje */
7895648Ssetje free_sz = sizeof (page_t **);
7905648Ssetje for (szc = 0; szc < mmu_page_sizes; szc++)
7915648Ssetje free_sz += sizeof (page_t *) * page_get_pagecolors(szc);
7925648Ssetje free_sz *= max_mem_nodes * MAX_MEM_TYPES;
7935648Ssetje
7945648Ssetje alloc_sz = cache_sz + free_sz + page_ctrs_sz();
7955648Ssetje return (alloc_sz);
7965648Ssetje }
7975648Ssetje
7985648Ssetje caddr_t
alloc_page_freelists(caddr_t alloc_base)7995648Ssetje alloc_page_freelists(caddr_t alloc_base)
8005648Ssetje {
8015648Ssetje int mnode, mtype;
8025648Ssetje int szc, clrs;
8030Sstevel@tonic-gate
8040Sstevel@tonic-gate /*
8050Sstevel@tonic-gate * We only support small pages in the cachelist.
8060Sstevel@tonic-gate */
8070Sstevel@tonic-gate for (mtype = 0; mtype < MAX_MEM_TYPES; mtype++) {
8085648Ssetje page_cachelists[mtype] = (page_t ***)alloc_base;
8095648Ssetje alloc_base += (max_mem_nodes * sizeof (page_t **));
8105648Ssetje for (mnode = 0; mnode < max_mem_nodes; mnode++) {
8115648Ssetje page_cachelists[mtype][mnode] = (page_t **)alloc_base;
8125648Ssetje alloc_base +=
8135648Ssetje (page_get_pagecolors(0) * sizeof (page_t *));
8140Sstevel@tonic-gate }
8150Sstevel@tonic-gate }
8160Sstevel@tonic-gate
8175648Ssetje /*
8185648Ssetje * Allocate freelists bins for all
8195648Ssetje * supported page sizes.
8205648Ssetje */
8215648Ssetje for (szc = 0; szc < mmu_page_sizes; szc++) {
8225648Ssetje clrs = page_get_pagecolors(szc);
8235648Ssetje for (mtype = 0; mtype < MAX_MEM_TYPES; mtype++) {
824*12293SJames.McPherson@Sun.COM page_freelists[szc][mtype] = (page_t ***)alloc_base;
8255648Ssetje alloc_base += (max_mem_nodes * sizeof (page_t **));
8265648Ssetje for (mnode = 0; mnode < max_mem_nodes; mnode++) {
827*12293SJames.McPherson@Sun.COM page_freelists[szc][mtype][mnode] =
8285648Ssetje (page_t **)alloc_base;
8295648Ssetje alloc_base += (clrs * (sizeof (page_t *)));
8305648Ssetje }
8315648Ssetje }
8325648Ssetje }
8330Sstevel@tonic-gate
8345648Ssetje alloc_base = page_ctrs_alloc(alloc_base);
8350Sstevel@tonic-gate return (alloc_base);
8360Sstevel@tonic-gate }
8370Sstevel@tonic-gate
8380Sstevel@tonic-gate /*
8395648Ssetje * Allocate page_freelists locks for a memnode from the nucleus data
8405648Ssetje * area. This is the first time that mmu_page_sizes is used during
8415648Ssetje * bootup, so check mmu_page_sizes initialization.
8420Sstevel@tonic-gate */
8430Sstevel@tonic-gate int
ndata_alloc_page_mutexs(struct memlist * ndata)8445648Ssetje ndata_alloc_page_mutexs(struct memlist *ndata)
8450Sstevel@tonic-gate {
8460Sstevel@tonic-gate size_t alloc_sz;
8470Sstevel@tonic-gate caddr_t alloc_base;
8485648Ssetje int i;
8495648Ssetje void page_coloring_init();
8500Sstevel@tonic-gate
8515648Ssetje page_coloring_init();
8520Sstevel@tonic-gate if (&mmu_init_mmu_page_sizes) {
8535648Ssetje if (!mmu_init_mmu_page_sizes(0)) {
8540Sstevel@tonic-gate cmn_err(CE_PANIC, "mmu_page_sizes %d not initialized",
8550Sstevel@tonic-gate mmu_page_sizes);
8560Sstevel@tonic-gate }
8570Sstevel@tonic-gate }
8580Sstevel@tonic-gate ASSERT(mmu_page_sizes >= DEFAULT_MMU_PAGE_SIZES);
8590Sstevel@tonic-gate
8605648Ssetje /* fpc_mutex and cpc_mutex */
8615648Ssetje alloc_sz = 2 * NPC_MUTEX * max_mem_nodes * sizeof (kmutex_t);
8620Sstevel@tonic-gate
8630Sstevel@tonic-gate alloc_base = ndata_alloc(ndata, alloc_sz, ecache_alignsize);
8640Sstevel@tonic-gate if (alloc_base == NULL)
8650Sstevel@tonic-gate return (-1);
8660Sstevel@tonic-gate
8675648Ssetje ASSERT(((uintptr_t)alloc_base & (ecache_alignsize - 1)) == 0);
8680Sstevel@tonic-gate
8695648Ssetje for (i = 0; i < NPC_MUTEX; i++) {
8705648Ssetje fpc_mutex[i] = (kmutex_t *)alloc_base;
8715648Ssetje alloc_base += (sizeof (kmutex_t) * max_mem_nodes);
8725648Ssetje cpc_mutex[i] = (kmutex_t *)alloc_base;
8735648Ssetje alloc_base += (sizeof (kmutex_t) * max_mem_nodes);
8745648Ssetje }
8750Sstevel@tonic-gate return (0);
8760Sstevel@tonic-gate }
8770Sstevel@tonic-gate
8780Sstevel@tonic-gate /*
8790Sstevel@tonic-gate * To select our starting bin, we stride through the bins with a stride
8800Sstevel@tonic-gate * of 337. Why 337? It's prime, it's largeish, and it performs well both
8810Sstevel@tonic-gate * in simulation and practice for different workloads on varying cache sizes.
8820Sstevel@tonic-gate */
8830Sstevel@tonic-gate uint32_t color_start_current = 0;
8840Sstevel@tonic-gate uint32_t color_start_stride = 337;
8850Sstevel@tonic-gate int color_start_random = 0;
8860Sstevel@tonic-gate
8870Sstevel@tonic-gate /* ARGSUSED */
8880Sstevel@tonic-gate uint_t
get_color_start(struct as * as)8890Sstevel@tonic-gate get_color_start(struct as *as)
8900Sstevel@tonic-gate {
8910Sstevel@tonic-gate uint32_t old, new;
8920Sstevel@tonic-gate
8930Sstevel@tonic-gate if (consistent_coloring == 2 || color_start_random) {
8940Sstevel@tonic-gate return ((uint_t)(((gettick()) << (vac_shift - MMU_PAGESHIFT)) &
8952961Sdp78419 (hw_page_array[0].hp_colors - 1)));
8960Sstevel@tonic-gate }
8970Sstevel@tonic-gate
8980Sstevel@tonic-gate do {
8990Sstevel@tonic-gate old = color_start_current;
9000Sstevel@tonic-gate new = old + (color_start_stride << (vac_shift - MMU_PAGESHIFT));
9010Sstevel@tonic-gate } while (cas32(&color_start_current, old, new) != old);
9020Sstevel@tonic-gate
9030Sstevel@tonic-gate return ((uint_t)(new));
9040Sstevel@tonic-gate }
9050Sstevel@tonic-gate
9060Sstevel@tonic-gate /*
9070Sstevel@tonic-gate * Called once at startup from kphysm_init() -- before memialloc()
9080Sstevel@tonic-gate * is invoked to do the 1st page_free()/page_freelist_add().
9090Sstevel@tonic-gate *
9100Sstevel@tonic-gate * initializes page_colors and page_colors_mask based on ecache_setsize.
9110Sstevel@tonic-gate *
9120Sstevel@tonic-gate * Also initializes the counter locks.
9130Sstevel@tonic-gate */
9140Sstevel@tonic-gate void
page_coloring_init()9150Sstevel@tonic-gate page_coloring_init()
9160Sstevel@tonic-gate {
9172961Sdp78419 int a, i;
9182961Sdp78419 uint_t colors;
9190Sstevel@tonic-gate
9200Sstevel@tonic-gate if (do_pg_coloring == 0) {
9210Sstevel@tonic-gate page_colors = 1;
9223177Sdp78419 for (i = 0; i < mmu_page_sizes; i++) {
9233177Sdp78419 colorequivszc[i] = 0;
9242961Sdp78419 hw_page_array[i].hp_colors = 1;
9253177Sdp78419 }
9260Sstevel@tonic-gate return;
9270Sstevel@tonic-gate }
9280Sstevel@tonic-gate
9290Sstevel@tonic-gate /*
9300Sstevel@tonic-gate * Calculate page_colors from ecache_setsize. ecache_setsize contains
9310Sstevel@tonic-gate * the max ecache setsize of all cpus configured in the system or, for
9320Sstevel@tonic-gate * cheetah+ systems, the max possible ecache setsize for all possible
9330Sstevel@tonic-gate * cheetah+ cpus.
9340Sstevel@tonic-gate */
9350Sstevel@tonic-gate page_colors = ecache_setsize / MMU_PAGESIZE;
9360Sstevel@tonic-gate page_colors_mask = page_colors - 1;
9370Sstevel@tonic-gate
9382961Sdp78419 vac_colors = vac_size / MMU_PAGESIZE;
9392961Sdp78419 vac_colors_mask = vac_colors -1;
9402961Sdp78419
9412961Sdp78419 page_coloring_shift = 0;
9422961Sdp78419 a = ecache_setsize;
9432961Sdp78419 while (a >>= 1) {
9442961Sdp78419 page_coloring_shift++;
9452961Sdp78419 }
9462961Sdp78419
9472961Sdp78419 /* initialize number of colors per page size */
9482961Sdp78419 for (i = 0; i < mmu_page_sizes; i++) {
9492961Sdp78419 hw_page_array[i].hp_colors = (page_colors_mask >>
9502961Sdp78419 (hw_page_array[i].hp_shift - hw_page_array[0].hp_shift))
9512961Sdp78419 + 1;
9523177Sdp78419 colorequivszc[i] = 0;
9532961Sdp78419 }
9542961Sdp78419
9550Sstevel@tonic-gate /*
9560Sstevel@tonic-gate * initialize cpu_page_colors if ecache setsizes are homogenous.
9570Sstevel@tonic-gate * cpu_page_colors set to -1 during DR operation or during startup
9580Sstevel@tonic-gate * if setsizes are heterogenous.
9590Sstevel@tonic-gate *
9600Sstevel@tonic-gate * The value of cpu_page_colors determines if additional color bins
9610Sstevel@tonic-gate * need to be checked for a particular color in the page_get routines.
9620Sstevel@tonic-gate */
9633177Sdp78419 if (cpu_setsize > 0 && cpu_page_colors == 0 &&
9643177Sdp78419 cpu_setsize < ecache_setsize) {
9650Sstevel@tonic-gate cpu_page_colors = cpu_setsize / MMU_PAGESIZE;
9662961Sdp78419 a = lowbit(page_colors) - lowbit(cpu_page_colors);
9672961Sdp78419 ASSERT(a > 0);
9682961Sdp78419 ASSERT(a < 16);
9692961Sdp78419
9702961Sdp78419 for (i = 0; i < mmu_page_sizes; i++) {
9712961Sdp78419 if ((colors = hw_page_array[i].hp_colors) <= 1) {
9722961Sdp78419 continue;
9732961Sdp78419 }
9742961Sdp78419 while ((colors >> a) == 0)
9752961Sdp78419 a--;
9762961Sdp78419 ASSERT(a >= 0);
9772961Sdp78419
9782961Sdp78419 /* higher 4 bits encodes color equiv mask */
9792961Sdp78419 colorequivszc[i] = (a << 4);
9802961Sdp78419 }
9812961Sdp78419 }
9820Sstevel@tonic-gate
9832961Sdp78419 /* do cpu specific color initialization */
9842961Sdp78419 if (&page_coloring_init_cpu) {
9852961Sdp78419 page_coloring_init_cpu();
9860Sstevel@tonic-gate }
9870Sstevel@tonic-gate }
9880Sstevel@tonic-gate
9890Sstevel@tonic-gate int
bp_color(struct buf * bp)9900Sstevel@tonic-gate bp_color(struct buf *bp)
9910Sstevel@tonic-gate {
9920Sstevel@tonic-gate int color = -1;
9930Sstevel@tonic-gate
9940Sstevel@tonic-gate if (vac) {
9950Sstevel@tonic-gate if ((bp->b_flags & B_PAGEIO) != 0) {
9960Sstevel@tonic-gate color = sfmmu_get_ppvcolor(bp->b_pages);
9970Sstevel@tonic-gate } else if (bp->b_un.b_addr != NULL) {
9980Sstevel@tonic-gate color = sfmmu_get_addrvcolor(bp->b_un.b_addr);
9990Sstevel@tonic-gate }
10000Sstevel@tonic-gate }
10010Sstevel@tonic-gate return (color < 0 ? 0 : ptob(color));
10020Sstevel@tonic-gate }
10030Sstevel@tonic-gate
10040Sstevel@tonic-gate /*
10050Sstevel@tonic-gate * Function for flushing D-cache when performing module relocations
10060Sstevel@tonic-gate * to an alternate mapping. Stubbed out on all platforms except sun4u,
10070Sstevel@tonic-gate * at least for now.
10080Sstevel@tonic-gate */
10090Sstevel@tonic-gate void
dcache_flushall()10100Sstevel@tonic-gate dcache_flushall()
10110Sstevel@tonic-gate {
10120Sstevel@tonic-gate sfmmu_cache_flushall();
10130Sstevel@tonic-gate }
10140Sstevel@tonic-gate
10150Sstevel@tonic-gate static int
kdi_range_overlap(uintptr_t va1,size_t sz1,uintptr_t va2,size_t sz2)10160Sstevel@tonic-gate kdi_range_overlap(uintptr_t va1, size_t sz1, uintptr_t va2, size_t sz2)
10170Sstevel@tonic-gate {
10180Sstevel@tonic-gate if (va1 < va2 && va1 + sz1 <= va2)
10190Sstevel@tonic-gate return (0);
10200Sstevel@tonic-gate
10210Sstevel@tonic-gate if (va2 < va1 && va2 + sz2 <= va1)
10220Sstevel@tonic-gate return (0);
10230Sstevel@tonic-gate
10240Sstevel@tonic-gate return (1);
10250Sstevel@tonic-gate }
10260Sstevel@tonic-gate
10270Sstevel@tonic-gate /*
10280Sstevel@tonic-gate * Return the number of bytes, relative to the beginning of a given range, that
10290Sstevel@tonic-gate * are non-toxic (can be read from and written to with relative impunity).
10300Sstevel@tonic-gate */
10310Sstevel@tonic-gate size_t
kdi_range_is_nontoxic(uintptr_t va,size_t sz,int write)10320Sstevel@tonic-gate kdi_range_is_nontoxic(uintptr_t va, size_t sz, int write)
10330Sstevel@tonic-gate {
10340Sstevel@tonic-gate /* OBP reads are harmless, but we don't want people writing there */
10350Sstevel@tonic-gate if (write && kdi_range_overlap(va, sz, OFW_START_ADDR, OFW_END_ADDR -
10360Sstevel@tonic-gate OFW_START_ADDR + 1))
10370Sstevel@tonic-gate return (va < OFW_START_ADDR ? OFW_START_ADDR - va : 0);
10380Sstevel@tonic-gate
10390Sstevel@tonic-gate if (kdi_range_overlap(va, sz, PIOMAPBASE, PIOMAPSIZE))
10400Sstevel@tonic-gate return (va < PIOMAPBASE ? PIOMAPBASE - va : 0);
10410Sstevel@tonic-gate
10420Sstevel@tonic-gate return (sz); /* no overlap */
10430Sstevel@tonic-gate }
10440Sstevel@tonic-gate
10450Sstevel@tonic-gate /*
10460Sstevel@tonic-gate * Minimum physmem required for enabling large pages for kernel heap
10470Sstevel@tonic-gate * Currently we do not enable lp for kmem on systems with less
10480Sstevel@tonic-gate * than 1GB of memory. This value can be changed via /etc/system
10490Sstevel@tonic-gate */
10500Sstevel@tonic-gate size_t segkmem_lpminphysmem = 0x40000000; /* 1GB */
10510Sstevel@tonic-gate
10520Sstevel@tonic-gate /*
10530Sstevel@tonic-gate * this function chooses large page size for kernel heap
10540Sstevel@tonic-gate */
10550Sstevel@tonic-gate size_t
get_segkmem_lpsize(size_t lpsize)10560Sstevel@tonic-gate get_segkmem_lpsize(size_t lpsize)
10570Sstevel@tonic-gate {
10580Sstevel@tonic-gate size_t memtotal = physmem * PAGESIZE;
10592251Selowe size_t mmusz;
10602251Selowe uint_t szc;
10610Sstevel@tonic-gate
10620Sstevel@tonic-gate if (memtotal < segkmem_lpminphysmem)
10630Sstevel@tonic-gate return (PAGESIZE);
10640Sstevel@tonic-gate
10650Sstevel@tonic-gate if (plat_lpkmem_is_supported != NULL &&
10660Sstevel@tonic-gate plat_lpkmem_is_supported() == 0)
10670Sstevel@tonic-gate return (PAGESIZE);
10680Sstevel@tonic-gate
10692251Selowe mmusz = mmu_get_kernel_lpsize(lpsize);
10702251Selowe szc = page_szc(mmusz);
10712251Selowe
10722251Selowe while (szc) {
10732251Selowe if (!(disable_large_pages & (1 << szc)))
10742251Selowe return (page_get_pagesize(szc));
10752251Selowe szc--;
10762251Selowe }
10772251Selowe return (PAGESIZE);
10780Sstevel@tonic-gate }
1079