xref: /onnv-gate/usr/src/uts/sun4/vm/vm_dep.c (revision 2991:4b13d6c49c6b)
10Sstevel@tonic-gate /*
20Sstevel@tonic-gate  * CDDL HEADER START
30Sstevel@tonic-gate  *
40Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
52251Selowe  * Common Development and Distribution License (the "License").
62251Selowe  * You may not use this file except in compliance with the License.
70Sstevel@tonic-gate  *
80Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
100Sstevel@tonic-gate  * See the License for the specific language governing permissions
110Sstevel@tonic-gate  * and limitations under the License.
120Sstevel@tonic-gate  *
130Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
140Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
160Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
170Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
180Sstevel@tonic-gate  *
190Sstevel@tonic-gate  * CDDL HEADER END
200Sstevel@tonic-gate  */
210Sstevel@tonic-gate /*
222251Selowe  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
230Sstevel@tonic-gate  * Use is subject to license terms.
240Sstevel@tonic-gate  */
250Sstevel@tonic-gate 
260Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
270Sstevel@tonic-gate 
280Sstevel@tonic-gate /*
290Sstevel@tonic-gate  * UNIX machine dependent virtual memory support.
300Sstevel@tonic-gate  */
310Sstevel@tonic-gate 
320Sstevel@tonic-gate #include <sys/vm.h>
330Sstevel@tonic-gate #include <sys/exec.h>
340Sstevel@tonic-gate 
350Sstevel@tonic-gate #include <sys/exechdr.h>
360Sstevel@tonic-gate #include <vm/seg_kmem.h>
370Sstevel@tonic-gate #include <sys/atomic.h>
380Sstevel@tonic-gate #include <sys/archsystm.h>
390Sstevel@tonic-gate #include <sys/machsystm.h>
400Sstevel@tonic-gate #include <sys/kdi.h>
410Sstevel@tonic-gate #include <sys/cpu_module.h>
420Sstevel@tonic-gate 
430Sstevel@tonic-gate #include <vm/hat_sfmmu.h>
440Sstevel@tonic-gate 
450Sstevel@tonic-gate #include <sys/memnode.h>
460Sstevel@tonic-gate 
470Sstevel@tonic-gate #include <sys/mem_config.h>
480Sstevel@tonic-gate #include <sys/mem_cage.h>
490Sstevel@tonic-gate #include <vm/vm_dep.h>
502961Sdp78419 #include <vm/page.h>
510Sstevel@tonic-gate #include <sys/platform_module.h>
520Sstevel@tonic-gate 
530Sstevel@tonic-gate /*
540Sstevel@tonic-gate  * These variables are set by module specific config routines.
550Sstevel@tonic-gate  * They are only set by modules which will use physical cache page coloring
560Sstevel@tonic-gate  * and/or virtual cache page coloring.
570Sstevel@tonic-gate  */
580Sstevel@tonic-gate int do_pg_coloring = 0;
590Sstevel@tonic-gate int do_virtual_coloring = 0;
600Sstevel@tonic-gate 
610Sstevel@tonic-gate /*
620Sstevel@tonic-gate  * These variables can be conveniently patched at kernel load time to
630Sstevel@tonic-gate  * prevent do_pg_coloring or do_virtual_coloring from being enabled by
640Sstevel@tonic-gate  * module specific config routines.
650Sstevel@tonic-gate  */
660Sstevel@tonic-gate 
670Sstevel@tonic-gate int use_page_coloring = 1;
680Sstevel@tonic-gate int use_virtual_coloring = 1;
690Sstevel@tonic-gate 
700Sstevel@tonic-gate /*
710Sstevel@tonic-gate  * initialized by page_coloring_init()
720Sstevel@tonic-gate  */
730Sstevel@tonic-gate extern uint_t page_colors;
740Sstevel@tonic-gate extern uint_t page_colors_mask;
750Sstevel@tonic-gate extern uint_t page_coloring_shift;
760Sstevel@tonic-gate int cpu_page_colors;
770Sstevel@tonic-gate uint_t vac_colors = 0;
780Sstevel@tonic-gate uint_t vac_colors_mask = 0;
790Sstevel@tonic-gate 
802961Sdp78419 /* cpu specific coloring initialization */
812961Sdp78419 extern void page_coloring_init_cpu();
822961Sdp78419 #pragma weak page_coloring_init_cpu
832961Sdp78419 
840Sstevel@tonic-gate /*
850Sstevel@tonic-gate  * get the ecache setsize for the current cpu.
860Sstevel@tonic-gate  */
870Sstevel@tonic-gate #define	CPUSETSIZE()	(cpunodes[CPU->cpu_id].ecache_setsize)
880Sstevel@tonic-gate 
890Sstevel@tonic-gate plcnt_t		plcnt;		/* page list count */
900Sstevel@tonic-gate 
910Sstevel@tonic-gate /*
920Sstevel@tonic-gate  * This variable is set by the cpu module to contain the lowest
930Sstevel@tonic-gate  * address not affected by the SF_ERRATA_57 workaround.  It should
940Sstevel@tonic-gate  * remain 0 if the workaround is not needed.
950Sstevel@tonic-gate  */
960Sstevel@tonic-gate #if defined(SF_ERRATA_57)
970Sstevel@tonic-gate caddr_t errata57_limit;
980Sstevel@tonic-gate #endif
990Sstevel@tonic-gate 
1000Sstevel@tonic-gate extern void page_relocate_hash(page_t *, page_t *);
1010Sstevel@tonic-gate 
1020Sstevel@tonic-gate /*
1030Sstevel@tonic-gate  * these must be defined in platform specific areas
1040Sstevel@tonic-gate  */
1050Sstevel@tonic-gate extern void map_addr_proc(caddr_t *, size_t, offset_t, int, caddr_t,
1060Sstevel@tonic-gate 	struct proc *, uint_t);
1070Sstevel@tonic-gate extern page_t *page_get_freelist(struct vnode *, u_offset_t, struct seg *,
1080Sstevel@tonic-gate 	caddr_t, size_t, uint_t, struct lgrp *);
1090Sstevel@tonic-gate /*
1100Sstevel@tonic-gate  * Convert page frame number to an OBMEM page frame number
1110Sstevel@tonic-gate  * (i.e. put in the type bits -- zero for this implementation)
1120Sstevel@tonic-gate  */
1130Sstevel@tonic-gate pfn_t
1140Sstevel@tonic-gate impl_obmem_pfnum(pfn_t pf)
1150Sstevel@tonic-gate {
1160Sstevel@tonic-gate 	return (pf);
1170Sstevel@tonic-gate }
1180Sstevel@tonic-gate 
1190Sstevel@tonic-gate /*
1200Sstevel@tonic-gate  * Use physmax to determine the highest physical page of DRAM memory
1210Sstevel@tonic-gate  * It is assumed that any physical addresses above physmax is in IO space.
1220Sstevel@tonic-gate  * We don't bother checking the low end because we assume that memory space
1230Sstevel@tonic-gate  * begins at physical page frame 0.
1240Sstevel@tonic-gate  *
1250Sstevel@tonic-gate  * Return 1 if the page frame is onboard DRAM memory, else 0.
1260Sstevel@tonic-gate  * Returns 0 for nvram so it won't be cached.
1270Sstevel@tonic-gate  */
1280Sstevel@tonic-gate int
1290Sstevel@tonic-gate pf_is_memory(pfn_t pf)
1300Sstevel@tonic-gate {
1310Sstevel@tonic-gate 	/* We must be IO space */
1320Sstevel@tonic-gate 	if (pf > physmax)
1330Sstevel@tonic-gate 		return (0);
1340Sstevel@tonic-gate 
1350Sstevel@tonic-gate 	/* We must be memory space */
1360Sstevel@tonic-gate 	return (1);
1370Sstevel@tonic-gate }
1380Sstevel@tonic-gate 
1390Sstevel@tonic-gate /*
1400Sstevel@tonic-gate  * Handle a pagefault.
1410Sstevel@tonic-gate  */
1420Sstevel@tonic-gate faultcode_t
1430Sstevel@tonic-gate pagefault(caddr_t addr, enum fault_type type, enum seg_rw rw, int iskernel)
1440Sstevel@tonic-gate {
1450Sstevel@tonic-gate 	struct as *as;
1460Sstevel@tonic-gate 	struct proc *p;
1470Sstevel@tonic-gate 	faultcode_t res;
1480Sstevel@tonic-gate 	caddr_t base;
1490Sstevel@tonic-gate 	size_t len;
1500Sstevel@tonic-gate 	int err;
1510Sstevel@tonic-gate 
1520Sstevel@tonic-gate 	if (INVALID_VADDR(addr))
1530Sstevel@tonic-gate 		return (FC_NOMAP);
1540Sstevel@tonic-gate 
1550Sstevel@tonic-gate 	if (iskernel) {
1560Sstevel@tonic-gate 		as = &kas;
1570Sstevel@tonic-gate 	} else {
1580Sstevel@tonic-gate 		p = curproc;
1590Sstevel@tonic-gate 		as = p->p_as;
1600Sstevel@tonic-gate #if defined(SF_ERRATA_57)
1610Sstevel@tonic-gate 		/*
1620Sstevel@tonic-gate 		 * Prevent infinite loops due to a segment driver
1630Sstevel@tonic-gate 		 * setting the execute permissions and the sfmmu hat
1640Sstevel@tonic-gate 		 * silently ignoring them.
1650Sstevel@tonic-gate 		 */
1660Sstevel@tonic-gate 		if (rw == S_EXEC && AS_TYPE_64BIT(as) &&
1670Sstevel@tonic-gate 		    addr < errata57_limit) {
1680Sstevel@tonic-gate 			res = FC_NOMAP;
1690Sstevel@tonic-gate 			goto out;
1700Sstevel@tonic-gate 		}
1710Sstevel@tonic-gate #endif
1720Sstevel@tonic-gate 	}
1730Sstevel@tonic-gate 
1740Sstevel@tonic-gate 	/*
1750Sstevel@tonic-gate 	 * Dispatch pagefault.
1760Sstevel@tonic-gate 	 */
1770Sstevel@tonic-gate 	res = as_fault(as->a_hat, as, addr, 1, type, rw);
1780Sstevel@tonic-gate 
1790Sstevel@tonic-gate 	/*
1800Sstevel@tonic-gate 	 * If this isn't a potential unmapped hole in the user's
1810Sstevel@tonic-gate 	 * UNIX data or stack segments, just return status info.
1820Sstevel@tonic-gate 	 */
1830Sstevel@tonic-gate 	if (!(res == FC_NOMAP && iskernel == 0))
1840Sstevel@tonic-gate 		goto out;
1850Sstevel@tonic-gate 
1860Sstevel@tonic-gate 	/*
1870Sstevel@tonic-gate 	 * Check to see if we happened to faulted on a currently unmapped
1880Sstevel@tonic-gate 	 * part of the UNIX data or stack segments.  If so, create a zfod
1890Sstevel@tonic-gate 	 * mapping there and then try calling the fault routine again.
1900Sstevel@tonic-gate 	 */
1910Sstevel@tonic-gate 	base = p->p_brkbase;
1920Sstevel@tonic-gate 	len = p->p_brksize;
1930Sstevel@tonic-gate 
1940Sstevel@tonic-gate 	if (addr < base || addr >= base + len) {		/* data seg? */
1950Sstevel@tonic-gate 		base = (caddr_t)(p->p_usrstack - p->p_stksize);
1960Sstevel@tonic-gate 		len = p->p_stksize;
1970Sstevel@tonic-gate 		if (addr < base || addr >= p->p_usrstack) {	/* stack seg? */
1980Sstevel@tonic-gate 			/* not in either UNIX data or stack segments */
1990Sstevel@tonic-gate 			res = FC_NOMAP;
2000Sstevel@tonic-gate 			goto out;
2010Sstevel@tonic-gate 		}
2020Sstevel@tonic-gate 	}
2030Sstevel@tonic-gate 
2040Sstevel@tonic-gate 	/* the rest of this function implements a 3.X 4.X 5.X compatibility */
2050Sstevel@tonic-gate 	/* This code is probably not needed anymore */
2060Sstevel@tonic-gate 
2070Sstevel@tonic-gate 	/* expand the gap to the page boundaries on each side */
2080Sstevel@tonic-gate 	len = (((uintptr_t)base + len + PAGEOFFSET) & PAGEMASK) -
2090Sstevel@tonic-gate 	    ((uintptr_t)base & PAGEMASK);
2100Sstevel@tonic-gate 	base = (caddr_t)((uintptr_t)base & PAGEMASK);
2110Sstevel@tonic-gate 
2120Sstevel@tonic-gate 	as_rangelock(as);
2130Sstevel@tonic-gate 	as_purge(as);
2140Sstevel@tonic-gate 	if (as_gap(as, PAGESIZE, &base, &len, AH_CONTAIN, addr) == 0) {
2150Sstevel@tonic-gate 		err = as_map(as, base, len, segvn_create, zfod_argsp);
2160Sstevel@tonic-gate 		as_rangeunlock(as);
2170Sstevel@tonic-gate 		if (err) {
2180Sstevel@tonic-gate 			res = FC_MAKE_ERR(err);
2190Sstevel@tonic-gate 			goto out;
2200Sstevel@tonic-gate 		}
2210Sstevel@tonic-gate 	} else {
2220Sstevel@tonic-gate 		/*
2230Sstevel@tonic-gate 		 * This page is already mapped by another thread after we
2240Sstevel@tonic-gate 		 * returned from as_fault() above.  We just fallthrough
2250Sstevel@tonic-gate 		 * as_fault() below.
2260Sstevel@tonic-gate 		 */
2270Sstevel@tonic-gate 		as_rangeunlock(as);
2280Sstevel@tonic-gate 	}
2290Sstevel@tonic-gate 
2300Sstevel@tonic-gate 	res = as_fault(as->a_hat, as, addr, 1, F_INVAL, rw);
2310Sstevel@tonic-gate 
2320Sstevel@tonic-gate out:
2330Sstevel@tonic-gate 
2340Sstevel@tonic-gate 	return (res);
2350Sstevel@tonic-gate }
2360Sstevel@tonic-gate 
2370Sstevel@tonic-gate /*
2380Sstevel@tonic-gate  * This is the routine which defines the address limit implied
2390Sstevel@tonic-gate  * by the flag '_MAP_LOW32'.  USERLIMIT32 matches the highest
2400Sstevel@tonic-gate  * mappable address in a 32-bit process on this platform (though
2410Sstevel@tonic-gate  * perhaps we should make it be UINT32_MAX here?)
2420Sstevel@tonic-gate  */
2430Sstevel@tonic-gate void
2440Sstevel@tonic-gate map_addr(caddr_t *addrp, size_t len, offset_t off, int vacalign, uint_t flags)
2450Sstevel@tonic-gate {
2460Sstevel@tonic-gate 	struct proc *p = curproc;
2470Sstevel@tonic-gate 	caddr_t userlimit = flags & _MAP_LOW32 ?
2480Sstevel@tonic-gate 		(caddr_t)USERLIMIT32 : p->p_as->a_userlimit;
2490Sstevel@tonic-gate 	map_addr_proc(addrp, len, off, vacalign, userlimit, p, flags);
2500Sstevel@tonic-gate }
2510Sstevel@tonic-gate 
2520Sstevel@tonic-gate /*
2530Sstevel@tonic-gate  * Some V9 CPUs have holes in the middle of the 64-bit virtual address range.
2540Sstevel@tonic-gate  */
2550Sstevel@tonic-gate caddr_t	hole_start, hole_end;
2560Sstevel@tonic-gate 
2570Sstevel@tonic-gate /*
2580Sstevel@tonic-gate  * kpm mapping window
2590Sstevel@tonic-gate  */
2600Sstevel@tonic-gate caddr_t kpm_vbase;
2610Sstevel@tonic-gate size_t  kpm_size;
2620Sstevel@tonic-gate uchar_t kpm_size_shift;
2630Sstevel@tonic-gate 
2640Sstevel@tonic-gate /*
2650Sstevel@tonic-gate  * Determine whether [base, base+len] contains a mapable range of
2660Sstevel@tonic-gate  * addresses at least minlen long. base and len are adjusted if
2670Sstevel@tonic-gate  * required to provide a mapable range.
2680Sstevel@tonic-gate  */
2690Sstevel@tonic-gate /* ARGSUSED */
2700Sstevel@tonic-gate int
2710Sstevel@tonic-gate valid_va_range(caddr_t *basep, size_t *lenp, size_t minlen, int dir)
2720Sstevel@tonic-gate {
2730Sstevel@tonic-gate 	caddr_t hi, lo;
2740Sstevel@tonic-gate 
2750Sstevel@tonic-gate 	lo = *basep;
2760Sstevel@tonic-gate 	hi = lo + *lenp;
2770Sstevel@tonic-gate 
2780Sstevel@tonic-gate 	/*
2790Sstevel@tonic-gate 	 * If hi rolled over the top, try cutting back.
2800Sstevel@tonic-gate 	 */
2810Sstevel@tonic-gate 	if (hi < lo) {
2820Sstevel@tonic-gate 		size_t newlen = 0 - (uintptr_t)lo - 1l;
2830Sstevel@tonic-gate 
2840Sstevel@tonic-gate 		if (newlen + (uintptr_t)hi < minlen)
2850Sstevel@tonic-gate 			return (0);
2860Sstevel@tonic-gate 		if (newlen < minlen)
2870Sstevel@tonic-gate 			return (0);
2880Sstevel@tonic-gate 		*lenp = newlen;
2890Sstevel@tonic-gate 	} else if (hi - lo < minlen)
2900Sstevel@tonic-gate 		return (0);
2910Sstevel@tonic-gate 
2920Sstevel@tonic-gate 	/*
2930Sstevel@tonic-gate 	 * Deal with a possible hole in the address range between
2940Sstevel@tonic-gate 	 * hole_start and hole_end that should never be mapped by the MMU.
2950Sstevel@tonic-gate 	 */
2960Sstevel@tonic-gate 	hi = lo + *lenp;
2970Sstevel@tonic-gate 
2980Sstevel@tonic-gate 	if (lo < hole_start) {
2990Sstevel@tonic-gate 		if (hi > hole_start)
3000Sstevel@tonic-gate 			if (hi < hole_end)
3010Sstevel@tonic-gate 				hi = hole_start;
3020Sstevel@tonic-gate 			else
3030Sstevel@tonic-gate 				/* lo < hole_start && hi >= hole_end */
3040Sstevel@tonic-gate 				if (dir == AH_LO) {
3050Sstevel@tonic-gate 					/*
3060Sstevel@tonic-gate 					 * prefer lowest range
3070Sstevel@tonic-gate 					 */
3080Sstevel@tonic-gate 					if (hole_start - lo >= minlen)
3090Sstevel@tonic-gate 						hi = hole_start;
3100Sstevel@tonic-gate 					else if (hi - hole_end >= minlen)
3110Sstevel@tonic-gate 						lo = hole_end;
3120Sstevel@tonic-gate 					else
3130Sstevel@tonic-gate 						return (0);
3140Sstevel@tonic-gate 				} else {
3150Sstevel@tonic-gate 					/*
3160Sstevel@tonic-gate 					 * prefer highest range
3170Sstevel@tonic-gate 					 */
3180Sstevel@tonic-gate 					if (hi - hole_end >= minlen)
3190Sstevel@tonic-gate 						lo = hole_end;
3200Sstevel@tonic-gate 					else if (hole_start - lo >= minlen)
3210Sstevel@tonic-gate 						hi = hole_start;
3220Sstevel@tonic-gate 					else
3230Sstevel@tonic-gate 						return (0);
3240Sstevel@tonic-gate 				}
3250Sstevel@tonic-gate 	} else {
3260Sstevel@tonic-gate 		/* lo >= hole_start */
3270Sstevel@tonic-gate 		if (hi < hole_end)
3280Sstevel@tonic-gate 			return (0);
3290Sstevel@tonic-gate 		if (lo < hole_end)
3300Sstevel@tonic-gate 			lo = hole_end;
3310Sstevel@tonic-gate 	}
3320Sstevel@tonic-gate 
3330Sstevel@tonic-gate 	if (hi - lo < minlen)
3340Sstevel@tonic-gate 		return (0);
3350Sstevel@tonic-gate 
3360Sstevel@tonic-gate 	*basep = lo;
3370Sstevel@tonic-gate 	*lenp = hi - lo;
3380Sstevel@tonic-gate 
3390Sstevel@tonic-gate 	return (1);
3400Sstevel@tonic-gate }
3410Sstevel@tonic-gate 
3420Sstevel@tonic-gate /*
3430Sstevel@tonic-gate  * Determine whether [addr, addr+len] with protections `prot' are valid
3440Sstevel@tonic-gate  * for a user address space.
3450Sstevel@tonic-gate  */
3460Sstevel@tonic-gate /*ARGSUSED*/
3470Sstevel@tonic-gate int
3480Sstevel@tonic-gate valid_usr_range(caddr_t addr, size_t len, uint_t prot, struct as *as,
3490Sstevel@tonic-gate     caddr_t userlimit)
3500Sstevel@tonic-gate {
3510Sstevel@tonic-gate 	caddr_t eaddr = addr + len;
3520Sstevel@tonic-gate 
3530Sstevel@tonic-gate 	if (eaddr <= addr || addr >= userlimit || eaddr > userlimit)
3540Sstevel@tonic-gate 		return (RANGE_BADADDR);
3550Sstevel@tonic-gate 
3560Sstevel@tonic-gate 	/*
3570Sstevel@tonic-gate 	 * Determine if the address range falls within an illegal
3580Sstevel@tonic-gate 	 * range of the MMU.
3590Sstevel@tonic-gate 	 */
3600Sstevel@tonic-gate 	if (eaddr > hole_start && addr < hole_end)
3610Sstevel@tonic-gate 		return (RANGE_BADADDR);
3620Sstevel@tonic-gate 
3630Sstevel@tonic-gate #if defined(SF_ERRATA_57)
3640Sstevel@tonic-gate 	/*
3650Sstevel@tonic-gate 	 * Make sure USERLIMIT isn't raised too high
3660Sstevel@tonic-gate 	 */
3670Sstevel@tonic-gate 	ASSERT64(addr <= (caddr_t)0xffffffff80000000ul ||
3680Sstevel@tonic-gate 	    errata57_limit == 0);
3690Sstevel@tonic-gate 
3700Sstevel@tonic-gate 	if (AS_TYPE_64BIT(as) &&
3710Sstevel@tonic-gate 	    (addr < errata57_limit) &&
3720Sstevel@tonic-gate 	    (prot & PROT_EXEC))
3730Sstevel@tonic-gate 		return (RANGE_BADPROT);
3740Sstevel@tonic-gate #endif /* SF_ERRATA57 */
3750Sstevel@tonic-gate 	return (RANGE_OKAY);
3760Sstevel@tonic-gate }
3770Sstevel@tonic-gate 
3780Sstevel@tonic-gate /*
3790Sstevel@tonic-gate  * Routine used to check to see if an a.out can be executed
3800Sstevel@tonic-gate  * by the current machine/architecture.
3810Sstevel@tonic-gate  */
3820Sstevel@tonic-gate int
3830Sstevel@tonic-gate chkaout(struct exdata *exp)
3840Sstevel@tonic-gate {
3850Sstevel@tonic-gate 	if (exp->ux_mach == M_SPARC)
3860Sstevel@tonic-gate 		return (0);
3870Sstevel@tonic-gate 	else
3880Sstevel@tonic-gate 		return (ENOEXEC);
3890Sstevel@tonic-gate }
3900Sstevel@tonic-gate 
3910Sstevel@tonic-gate /*
3920Sstevel@tonic-gate  * The following functions return information about an a.out
3930Sstevel@tonic-gate  * which is used when a program is executed.
3940Sstevel@tonic-gate  */
3950Sstevel@tonic-gate 
3960Sstevel@tonic-gate /*
3970Sstevel@tonic-gate  * Return the load memory address for the data segment.
3980Sstevel@tonic-gate  */
3990Sstevel@tonic-gate caddr_t
4000Sstevel@tonic-gate getdmem(struct exec *exp)
4010Sstevel@tonic-gate {
4020Sstevel@tonic-gate 	/*
4030Sstevel@tonic-gate 	 * XXX - Sparc Reference Hack approaching
4040Sstevel@tonic-gate 	 * Remember that we are loading
4050Sstevel@tonic-gate 	 * 8k executables into a 4k machine
4060Sstevel@tonic-gate 	 * DATA_ALIGN == 2 * PAGESIZE
4070Sstevel@tonic-gate 	 */
4080Sstevel@tonic-gate 	if (exp->a_text)
4090Sstevel@tonic-gate 		return ((caddr_t)(roundup(USRTEXT + exp->a_text, DATA_ALIGN)));
4100Sstevel@tonic-gate 	else
4110Sstevel@tonic-gate 		return ((caddr_t)USRTEXT);
4120Sstevel@tonic-gate }
4130Sstevel@tonic-gate 
4140Sstevel@tonic-gate /*
4150Sstevel@tonic-gate  * Return the starting disk address for the data segment.
4160Sstevel@tonic-gate  */
4170Sstevel@tonic-gate ulong_t
4180Sstevel@tonic-gate getdfile(struct exec *exp)
4190Sstevel@tonic-gate {
4200Sstevel@tonic-gate 	if (exp->a_magic == ZMAGIC)
4210Sstevel@tonic-gate 		return (exp->a_text);
4220Sstevel@tonic-gate 	else
4230Sstevel@tonic-gate 		return (sizeof (struct exec) + exp->a_text);
4240Sstevel@tonic-gate }
4250Sstevel@tonic-gate 
4260Sstevel@tonic-gate /*
4270Sstevel@tonic-gate  * Return the load memory address for the text segment.
4280Sstevel@tonic-gate  */
4290Sstevel@tonic-gate 
4300Sstevel@tonic-gate /*ARGSUSED*/
4310Sstevel@tonic-gate caddr_t
4320Sstevel@tonic-gate gettmem(struct exec *exp)
4330Sstevel@tonic-gate {
4340Sstevel@tonic-gate 	return ((caddr_t)USRTEXT);
4350Sstevel@tonic-gate }
4360Sstevel@tonic-gate 
4370Sstevel@tonic-gate /*
4380Sstevel@tonic-gate  * Return the file byte offset for the text segment.
4390Sstevel@tonic-gate  */
4400Sstevel@tonic-gate uint_t
4410Sstevel@tonic-gate gettfile(struct exec *exp)
4420Sstevel@tonic-gate {
4430Sstevel@tonic-gate 	if (exp->a_magic == ZMAGIC)
4440Sstevel@tonic-gate 		return (0);
4450Sstevel@tonic-gate 	else
4460Sstevel@tonic-gate 		return (sizeof (struct exec));
4470Sstevel@tonic-gate }
4480Sstevel@tonic-gate 
4490Sstevel@tonic-gate void
4500Sstevel@tonic-gate getexinfo(
4510Sstevel@tonic-gate 	struct exdata *edp_in,
4520Sstevel@tonic-gate 	struct exdata *edp_out,
4530Sstevel@tonic-gate 	int *pagetext,
4540Sstevel@tonic-gate 	int *pagedata)
4550Sstevel@tonic-gate {
4560Sstevel@tonic-gate 	*edp_out = *edp_in;	/* structure copy */
4570Sstevel@tonic-gate 
4580Sstevel@tonic-gate 	if ((edp_in->ux_mag == ZMAGIC) &&
4590Sstevel@tonic-gate 	    ((edp_in->vp->v_flag & VNOMAP) == 0)) {
4600Sstevel@tonic-gate 		*pagetext = 1;
4610Sstevel@tonic-gate 		*pagedata = 1;
4620Sstevel@tonic-gate 	} else {
4630Sstevel@tonic-gate 		*pagetext = 0;
4640Sstevel@tonic-gate 		*pagedata = 0;
4650Sstevel@tonic-gate 	}
4660Sstevel@tonic-gate }
4670Sstevel@tonic-gate 
468*2991Ssusans /*
469*2991Ssusans  * Return non 0 value if the address may cause a VAC alias with KPM mappings.
470*2991Ssusans  * KPM selects an address such that it's equal offset modulo shm_alignment and
471*2991Ssusans  * assumes it can't be in VAC conflict with any larger than PAGESIZE mapping.
472*2991Ssusans  */
473*2991Ssusans int
474*2991Ssusans map_addr_vacalign_check(caddr_t addr, u_offset_t off)
475*2991Ssusans {
476*2991Ssusans 	if (vac) {
477*2991Ssusans 		return (((uintptr_t)addr ^ off) & shm_alignment - 1);
478*2991Ssusans 	} else {
479*2991Ssusans 		return (0);
4800Sstevel@tonic-gate 	}
481*2991Ssusans }
4820Sstevel@tonic-gate 
483*2991Ssusans /*
484*2991Ssusans  * Sanity control. Don't use large pages regardless of user
485*2991Ssusans  * settings if there's less than priv or shm_lpg_min_physmem memory installed.
486*2991Ssusans  * The units for this variable is 8K pages.
487*2991Ssusans  */
488*2991Ssusans pgcnt_t shm_lpg_min_physmem = 131072;			/* 1GB */
489*2991Ssusans pgcnt_t privm_lpg_min_physmem = 131072;			/* 1GB */
4900Sstevel@tonic-gate 
4912659Ssusans static size_t
4920Sstevel@tonic-gate map_pgszheap(struct proc *p, caddr_t addr, size_t len)
4930Sstevel@tonic-gate {
494*2991Ssusans 	size_t		pgsz = MMU_PAGESIZE;
495*2991Ssusans 	int		szc;
4960Sstevel@tonic-gate 
4970Sstevel@tonic-gate 	/*
4980Sstevel@tonic-gate 	 * If len is zero, retrieve from proc and don't demote the page size.
499*2991Ssusans 	 * Use atleast the default pagesize.
5000Sstevel@tonic-gate 	 */
5010Sstevel@tonic-gate 	if (len == 0) {
502*2991Ssusans 		len = p->p_brkbase + p->p_brksize - p->p_bssbase;
503*2991Ssusans 	}
504*2991Ssusans 	len = MAX(len, default_uheap_lpsize);
505*2991Ssusans 
506*2991Ssusans 	for (szc = mmu_page_sizes - 1; szc >= 0; szc--) {
507*2991Ssusans 		pgsz = hw_page_array[szc].hp_size;
508*2991Ssusans 		if ((disable_auto_data_large_pages & (1 << szc)) ||
509*2991Ssusans 		    pgsz > max_uheap_lpsize)
510*2991Ssusans 			continue;
511*2991Ssusans 		if (len >= pgsz) {
512*2991Ssusans 			break;
513*2991Ssusans 		}
5140Sstevel@tonic-gate 	}
5150Sstevel@tonic-gate 
5160Sstevel@tonic-gate 	/*
517*2991Ssusans 	 * If addr == 0 we were called by memcntl() when the
5180Sstevel@tonic-gate 	 * size code is 0.  Don't set pgsz less than current size.
5190Sstevel@tonic-gate 	 */
5200Sstevel@tonic-gate 	if (addr == 0 && (pgsz < hw_page_array[p->p_brkpageszc].hp_size)) {
5210Sstevel@tonic-gate 		pgsz = hw_page_array[p->p_brkpageszc].hp_size;
5220Sstevel@tonic-gate 	}
5230Sstevel@tonic-gate 
5240Sstevel@tonic-gate 	return (pgsz);
5250Sstevel@tonic-gate }
5260Sstevel@tonic-gate 
5272659Ssusans static size_t
5280Sstevel@tonic-gate map_pgszstk(struct proc *p, caddr_t addr, size_t len)
5290Sstevel@tonic-gate {
530*2991Ssusans 	size_t		pgsz = MMU_PAGESIZE;
531*2991Ssusans 	int		szc;
5320Sstevel@tonic-gate 
5330Sstevel@tonic-gate 	/*
5340Sstevel@tonic-gate 	 * If len is zero, retrieve from proc and don't demote the page size.
535*2991Ssusans 	 * Use atleast the default pagesize.
5360Sstevel@tonic-gate 	 */
5370Sstevel@tonic-gate 	if (len == 0) {
5380Sstevel@tonic-gate 		len = p->p_stksize;
5390Sstevel@tonic-gate 	}
540*2991Ssusans 	len = MAX(len, default_ustack_lpsize);
5410Sstevel@tonic-gate 
542*2991Ssusans 	for (szc = mmu_page_sizes - 1; szc >= 0; szc--) {
543*2991Ssusans 		pgsz = hw_page_array[szc].hp_size;
544*2991Ssusans 		if ((disable_auto_data_large_pages & (1 << szc)) ||
545*2991Ssusans 		    pgsz > max_ustack_lpsize)
546*2991Ssusans 			continue;
547*2991Ssusans 		if (len >= pgsz) {
548*2991Ssusans 			break;
549*2991Ssusans 		}
5500Sstevel@tonic-gate 	}
5510Sstevel@tonic-gate 
5520Sstevel@tonic-gate 	/*
5530Sstevel@tonic-gate 	 * If addr == 0 we were called by memcntl() or exec_args() when the
5540Sstevel@tonic-gate 	 * size code is 0.  Don't set pgsz less than current size.
5550Sstevel@tonic-gate 	 */
5560Sstevel@tonic-gate 	if (addr == 0 && (pgsz < hw_page_array[p->p_stkpageszc].hp_size)) {
5570Sstevel@tonic-gate 		pgsz = hw_page_array[p->p_stkpageszc].hp_size;
5580Sstevel@tonic-gate 	}
5590Sstevel@tonic-gate 
5600Sstevel@tonic-gate 	return (pgsz);
5610Sstevel@tonic-gate }
5620Sstevel@tonic-gate 
5632659Ssusans static size_t
5642659Ssusans map_pgszism(caddr_t addr, size_t len)
5652659Ssusans {
5662659Ssusans 	uint_t szc;
5672659Ssusans 	size_t pgsz;
5682659Ssusans 
5692659Ssusans 	for (szc = mmu_page_sizes - 1; szc >= TTE4M; szc--) {
5702659Ssusans 		if (disable_ism_large_pages & (1 << szc))
5712659Ssusans 			continue;
5722659Ssusans 
5732659Ssusans 		pgsz = hw_page_array[szc].hp_size;
5742659Ssusans 		if ((len >= pgsz) && IS_P2ALIGNED(addr, pgsz))
5752659Ssusans 			return (pgsz);
5762659Ssusans 	}
577*2991Ssusans 
5782659Ssusans 	return (DEFAULT_ISM_PAGESIZE);
5792659Ssusans }
5802659Ssusans 
5812659Ssusans /*
5822659Ssusans  * Suggest a page size to be used to map a segment of type maptype and length
5832659Ssusans  * len.  Returns a page size (not a size code).
5842659Ssusans  */
585*2991Ssusans /* ARGSUSED */
5862659Ssusans size_t
587*2991Ssusans map_pgsz(int maptype, struct proc *p, caddr_t addr, size_t len, int memcntl)
5882659Ssusans {
589*2991Ssusans 	size_t	pgsz = MMU_PAGESIZE;
590*2991Ssusans 
591*2991Ssusans 	ASSERT(maptype != MAPPGSZ_VA);
5922659Ssusans 
593*2991Ssusans 	if (maptype != MAPPGSZ_ISM && physmem < privm_lpg_min_physmem) {
594*2991Ssusans 		return (MMU_PAGESIZE);
595*2991Ssusans 	}
5962659Ssusans 
5972659Ssusans 	switch (maptype) {
5982659Ssusans 	case MAPPGSZ_ISM:
5992659Ssusans 		pgsz = map_pgszism(addr, len);
6002659Ssusans 		break;
6012659Ssusans 
6022659Ssusans 	case MAPPGSZ_STK:
603*2991Ssusans 		if (max_ustack_lpsize > MMU_PAGESIZE) {
604*2991Ssusans 			pgsz = map_pgszstk(p, addr, len);
605*2991Ssusans 		}
6062659Ssusans 		break;
6072659Ssusans 
6082659Ssusans 	case MAPPGSZ_HEAP:
609*2991Ssusans 		if (max_uheap_lpsize > MMU_PAGESIZE) {
610*2991Ssusans 			pgsz = map_pgszheap(p, addr, len);
611*2991Ssusans 		}
6122659Ssusans 		break;
6132659Ssusans 	}
6142659Ssusans 	return (pgsz);
6152659Ssusans }
6160Sstevel@tonic-gate 
6170Sstevel@tonic-gate 
6180Sstevel@tonic-gate /* assumes TTE8K...TTE4M == szc */
6190Sstevel@tonic-gate 
6200Sstevel@tonic-gate static uint_t
621*2991Ssusans map_szcvec(caddr_t addr, size_t size, uintptr_t off, int disable_lpgs,
622*2991Ssusans     size_t max_lpsize, size_t min_physmem)
6232414Saguzovsk {
6242414Saguzovsk 	caddr_t eaddr = addr + size;
6252414Saguzovsk 	uint_t szcvec = 0;
6262414Saguzovsk 	caddr_t raddr;
6272414Saguzovsk 	caddr_t readdr;
6282414Saguzovsk 	size_t pgsz;
629*2991Ssusans 	int i;
6302414Saguzovsk 
631*2991Ssusans 	if (physmem < min_physmem || max_lpsize <= MMU_PAGESIZE) {
6322414Saguzovsk 		return (0);
6332414Saguzovsk 	}
6342414Saguzovsk 	for (i = mmu_page_sizes - 1; i > 0; i--) {
635*2991Ssusans 		if (disable_lpgs & (1 << i)) {
6362414Saguzovsk 			continue;
6372414Saguzovsk 		}
6382414Saguzovsk 		pgsz = page_get_pagesize(i);
639*2991Ssusans 		if (pgsz > max_lpsize) {
6402414Saguzovsk 			continue;
6412414Saguzovsk 		}
6422414Saguzovsk 		raddr = (caddr_t)P2ROUNDUP((uintptr_t)addr, pgsz);
6432414Saguzovsk 		readdr = (caddr_t)P2ALIGN((uintptr_t)eaddr, pgsz);
6442414Saguzovsk 		if (raddr < addr || raddr >= readdr) {
6452414Saguzovsk 			continue;
6462414Saguzovsk 		}
6472414Saguzovsk 		if (P2PHASE((uintptr_t)addr ^ off, pgsz)) {
6482414Saguzovsk 			continue;
6492414Saguzovsk 		}
6502414Saguzovsk 		szcvec |= (1 << i);
6512414Saguzovsk 		/*
6522414Saguzovsk 		 * And or in the remaining enabled page sizes.
6532414Saguzovsk 		 */
654*2991Ssusans 		szcvec |= P2PHASE(~disable_lpgs, (1 << i));
6552414Saguzovsk 		szcvec &= ~1; /* no need to return 8K pagesize */
6562414Saguzovsk 		break;
6572414Saguzovsk 	}
6582414Saguzovsk 	return (szcvec);
6592414Saguzovsk }
6602414Saguzovsk 
6610Sstevel@tonic-gate /*
662*2991Ssusans  * Return a bit vector of large page size codes that
663*2991Ssusans  * can be used to map [addr, addr + len) region.
664*2991Ssusans  */
665*2991Ssusans /* ARGSUSED */
666*2991Ssusans uint_t
667*2991Ssusans map_pgszcvec(caddr_t addr, size_t size, uintptr_t off, int flags, int type,
668*2991Ssusans     int memcntl)
669*2991Ssusans {
670*2991Ssusans 	if (flags & MAP_TEXT) {
671*2991Ssusans 	    return (map_szcvec(addr, size, off, disable_auto_text_large_pages,
672*2991Ssusans 		    max_utext_lpsize, shm_lpg_min_physmem));
673*2991Ssusans 
674*2991Ssusans 	} else if (flags & MAP_INITDATA) {
675*2991Ssusans 	    return (map_szcvec(addr, size, off, disable_auto_data_large_pages,
676*2991Ssusans 		    max_uidata_lpsize, privm_lpg_min_physmem));
677*2991Ssusans 
678*2991Ssusans 	} else if (type == MAPPGSZC_SHM) {
679*2991Ssusans 	    return (map_szcvec(addr, size, off, disable_auto_data_large_pages,
680*2991Ssusans 		    max_shm_lpsize, shm_lpg_min_physmem));
681*2991Ssusans 
682*2991Ssusans 	} else if (type == MAPPGSZC_HEAP) {
683*2991Ssusans 	    return (map_szcvec(addr, size, off, disable_auto_data_large_pages,
684*2991Ssusans 		    max_uheap_lpsize, privm_lpg_min_physmem));
685*2991Ssusans 
686*2991Ssusans 	} else if (type == MAPPGSZC_STACK) {
687*2991Ssusans 	    return (map_szcvec(addr, size, off, disable_auto_data_large_pages,
688*2991Ssusans 		    max_ustack_lpsize, privm_lpg_min_physmem));
689*2991Ssusans 
690*2991Ssusans 	} else {
691*2991Ssusans 	    return (map_szcvec(addr, size, off, disable_auto_data_large_pages,
692*2991Ssusans 		    max_privmap_lpsize, privm_lpg_min_physmem));
693*2991Ssusans 	}
694*2991Ssusans }
695*2991Ssusans 
696*2991Ssusans /*
6970Sstevel@tonic-gate  * Anchored in the table below are counters used to keep track
6980Sstevel@tonic-gate  * of free contiguous physical memory. Each element of the table contains
6990Sstevel@tonic-gate  * the array of counters, the size of array which is allocated during
7000Sstevel@tonic-gate  * startup based on physmax and a shift value used to convert a pagenum
7010Sstevel@tonic-gate  * into a counter array index or vice versa. The table has page size
7020Sstevel@tonic-gate  * for rows and region size for columns:
7030Sstevel@tonic-gate  *
7040Sstevel@tonic-gate  *	page_counters[page_size][region_size]
7050Sstevel@tonic-gate  *
7060Sstevel@tonic-gate  *	page_size: 	TTE size code of pages on page_size freelist.
7070Sstevel@tonic-gate  *
7080Sstevel@tonic-gate  *	region_size:	TTE size code of a candidate larger page made up
7090Sstevel@tonic-gate  *			made up of contiguous free page_size pages.
7100Sstevel@tonic-gate  *
7110Sstevel@tonic-gate  * As you go across a page_size row increasing region_size each
7120Sstevel@tonic-gate  * element keeps track of how many (region_size - 1) size groups
7130Sstevel@tonic-gate  * made up of page_size free pages can be coalesced into a
7140Sstevel@tonic-gate  * regsion_size page. Yuck! Lets try an example:
7150Sstevel@tonic-gate  *
7160Sstevel@tonic-gate  * 	page_counters[1][3] is the table element used for identifying
7170Sstevel@tonic-gate  *	candidate 4M pages from contiguous pages off the 64K free list.
7180Sstevel@tonic-gate  *	Each index in the page_counters[1][3].array spans 4M. Its the
7190Sstevel@tonic-gate  *	number of free 512K size (regsion_size - 1) groups of contiguous
7200Sstevel@tonic-gate  *	64K free pages.	So when page_counters[1][3].counters[n] == 8
7210Sstevel@tonic-gate  *	we know we have a candidate 4M page made up of 512K size groups
7220Sstevel@tonic-gate  *	of 64K free pages.
7230Sstevel@tonic-gate  */
7240Sstevel@tonic-gate 
7250Sstevel@tonic-gate /*
7260Sstevel@tonic-gate  * Per page size free lists. 3rd (max_mem_nodes) and 4th (page coloring bins)
7270Sstevel@tonic-gate  * dimensions are allocated dynamically.
7280Sstevel@tonic-gate  */
7290Sstevel@tonic-gate page_t ***page_freelists[MMU_PAGE_SIZES][MAX_MEM_TYPES];
7300Sstevel@tonic-gate 
7310Sstevel@tonic-gate /*
7320Sstevel@tonic-gate  * For now there is only a single size cache list.
7330Sstevel@tonic-gate  * Allocated dynamically.
7340Sstevel@tonic-gate  */
7350Sstevel@tonic-gate page_t ***page_cachelists[MAX_MEM_TYPES];
7360Sstevel@tonic-gate 
7370Sstevel@tonic-gate kmutex_t *fpc_mutex[NPC_MUTEX];
7380Sstevel@tonic-gate kmutex_t *cpc_mutex[NPC_MUTEX];
7390Sstevel@tonic-gate 
7400Sstevel@tonic-gate caddr_t
7410Sstevel@tonic-gate alloc_page_freelists(int mnode, caddr_t alloc_base, int alloc_align)
7420Sstevel@tonic-gate {
7430Sstevel@tonic-gate 	int	mtype;
7440Sstevel@tonic-gate 	uint_t	szc;
7450Sstevel@tonic-gate 
7460Sstevel@tonic-gate 	alloc_base = (caddr_t)roundup((uintptr_t)alloc_base, alloc_align);
7470Sstevel@tonic-gate 
7480Sstevel@tonic-gate 	/*
7490Sstevel@tonic-gate 	 * We only support small pages in the cachelist.
7500Sstevel@tonic-gate 	 */
7510Sstevel@tonic-gate 	for (mtype = 0; mtype < MAX_MEM_TYPES; mtype++) {
7520Sstevel@tonic-gate 		page_cachelists[mtype][mnode] = (page_t **)alloc_base;
7532961Sdp78419 		alloc_base += (sizeof (page_t *) * page_get_pagecolors(0));
7540Sstevel@tonic-gate 		/*
7550Sstevel@tonic-gate 		 * Allocate freelists bins for all
7560Sstevel@tonic-gate 		 * supported page sizes.
7570Sstevel@tonic-gate 		 */
7580Sstevel@tonic-gate 		for (szc = 0; szc < mmu_page_sizes; szc++) {
7590Sstevel@tonic-gate 			page_freelists[szc][mtype][mnode] =
7600Sstevel@tonic-gate 			    (page_t **)alloc_base;
7610Sstevel@tonic-gate 			alloc_base += ((sizeof (page_t *) *
7620Sstevel@tonic-gate 			    page_get_pagecolors(szc)));
7630Sstevel@tonic-gate 		}
7640Sstevel@tonic-gate 	}
7650Sstevel@tonic-gate 
7660Sstevel@tonic-gate 	alloc_base = (caddr_t)roundup((uintptr_t)alloc_base, alloc_align);
7670Sstevel@tonic-gate 
7680Sstevel@tonic-gate 	return (alloc_base);
7690Sstevel@tonic-gate }
7700Sstevel@tonic-gate 
7710Sstevel@tonic-gate /*
7720Sstevel@tonic-gate  * Allocate page_freelists bin headers for a memnode from the
7730Sstevel@tonic-gate  * nucleus data area. This is the first time that mmu_page_sizes is
7740Sstevel@tonic-gate  * used during sun4u bootup, so check mmu_page_sizes initialization.
7750Sstevel@tonic-gate  */
7760Sstevel@tonic-gate int
7770Sstevel@tonic-gate ndata_alloc_page_freelists(struct memlist *ndata, int mnode)
7780Sstevel@tonic-gate {
7790Sstevel@tonic-gate 	size_t alloc_sz;
7800Sstevel@tonic-gate 	caddr_t alloc_base;
7810Sstevel@tonic-gate 	caddr_t end;
7820Sstevel@tonic-gate 	int	mtype;
7830Sstevel@tonic-gate 	uint_t	szc;
7840Sstevel@tonic-gate 	int32_t allp = 0;
7850Sstevel@tonic-gate 
7860Sstevel@tonic-gate 	if (&mmu_init_mmu_page_sizes) {
7870Sstevel@tonic-gate 		if (!mmu_init_mmu_page_sizes(allp)) {
7880Sstevel@tonic-gate 			cmn_err(CE_PANIC, "mmu_page_sizes %d not initialized",
7890Sstevel@tonic-gate 			    mmu_page_sizes);
7900Sstevel@tonic-gate 		}
7910Sstevel@tonic-gate 	}
7920Sstevel@tonic-gate 	ASSERT(mmu_page_sizes >= DEFAULT_MMU_PAGE_SIZES);
7930Sstevel@tonic-gate 
7940Sstevel@tonic-gate 	/* first time called - allocate max_mem_nodes dimension */
7950Sstevel@tonic-gate 	if (mnode == 0) {
7960Sstevel@tonic-gate 		int	i;
7970Sstevel@tonic-gate 
7980Sstevel@tonic-gate 		/* page_cachelists */
7990Sstevel@tonic-gate 		alloc_sz = MAX_MEM_TYPES * max_mem_nodes *
8000Sstevel@tonic-gate 		    sizeof (page_t **);
8010Sstevel@tonic-gate 
8020Sstevel@tonic-gate 		/* page_freelists */
8030Sstevel@tonic-gate 		alloc_sz += MAX_MEM_TYPES * mmu_page_sizes * max_mem_nodes *
8040Sstevel@tonic-gate 		    sizeof (page_t **);
8050Sstevel@tonic-gate 
8060Sstevel@tonic-gate 		/* fpc_mutex and cpc_mutex */
8070Sstevel@tonic-gate 		alloc_sz += 2 * NPC_MUTEX * max_mem_nodes * sizeof (kmutex_t);
8080Sstevel@tonic-gate 
8090Sstevel@tonic-gate 		alloc_base = ndata_alloc(ndata, alloc_sz, ecache_alignsize);
8100Sstevel@tonic-gate 		if (alloc_base == NULL)
8110Sstevel@tonic-gate 			return (-1);
8120Sstevel@tonic-gate 
8130Sstevel@tonic-gate 		ASSERT(((uintptr_t)alloc_base & (ecache_alignsize - 1)) == 0);
8140Sstevel@tonic-gate 
8150Sstevel@tonic-gate 		for (mtype = 0; mtype < MAX_MEM_TYPES; mtype++) {
8160Sstevel@tonic-gate 			page_cachelists[mtype] = (page_t ***)alloc_base;
8170Sstevel@tonic-gate 			alloc_base += (max_mem_nodes * sizeof (page_t **));
8180Sstevel@tonic-gate 			for (szc = 0; szc < mmu_page_sizes; szc++) {
8190Sstevel@tonic-gate 				page_freelists[szc][mtype] =
8200Sstevel@tonic-gate 				    (page_t ***)alloc_base;
8210Sstevel@tonic-gate 				alloc_base += (max_mem_nodes *
8220Sstevel@tonic-gate 				    sizeof (page_t **));
8230Sstevel@tonic-gate 			}
8240Sstevel@tonic-gate 		}
8250Sstevel@tonic-gate 		for (i = 0; i < NPC_MUTEX; i++) {
8260Sstevel@tonic-gate 			fpc_mutex[i] = (kmutex_t *)alloc_base;
8270Sstevel@tonic-gate 			alloc_base += (sizeof (kmutex_t) * max_mem_nodes);
8280Sstevel@tonic-gate 			cpc_mutex[i] = (kmutex_t *)alloc_base;
8290Sstevel@tonic-gate 			alloc_base += (sizeof (kmutex_t) * max_mem_nodes);
8300Sstevel@tonic-gate 		}
8310Sstevel@tonic-gate 		alloc_sz = 0;
8320Sstevel@tonic-gate 	}
8330Sstevel@tonic-gate 
8340Sstevel@tonic-gate 	/*
8350Sstevel@tonic-gate 	 * Calculate the size needed by alloc_page_freelists().
8360Sstevel@tonic-gate 	 */
8370Sstevel@tonic-gate 	for (mtype = 0; mtype < MAX_MEM_TYPES; mtype++) {
8382961Sdp78419 		alloc_sz += sizeof (page_t *) * page_get_pagecolors(0);
8390Sstevel@tonic-gate 
8400Sstevel@tonic-gate 		for (szc = 0; szc < mmu_page_sizes; szc++)
8410Sstevel@tonic-gate 			alloc_sz += sizeof (page_t *) *
8420Sstevel@tonic-gate 			    page_get_pagecolors(szc);
8430Sstevel@tonic-gate 	}
8440Sstevel@tonic-gate 
8450Sstevel@tonic-gate 	alloc_base = ndata_alloc(ndata, alloc_sz, ecache_alignsize);
8460Sstevel@tonic-gate 	if (alloc_base == NULL)
8470Sstevel@tonic-gate 		return (-1);
8480Sstevel@tonic-gate 
8490Sstevel@tonic-gate 	end = alloc_page_freelists(mnode, alloc_base, ecache_alignsize);
8500Sstevel@tonic-gate 	ASSERT((uintptr_t)end == roundup((uintptr_t)alloc_base + alloc_sz,
8510Sstevel@tonic-gate 	    ecache_alignsize));
8520Sstevel@tonic-gate 
8530Sstevel@tonic-gate 	return (0);
8540Sstevel@tonic-gate }
8550Sstevel@tonic-gate 
8560Sstevel@tonic-gate /*
8570Sstevel@tonic-gate  * To select our starting bin, we stride through the bins with a stride
8580Sstevel@tonic-gate  * of 337.  Why 337?  It's prime, it's largeish, and it performs well both
8590Sstevel@tonic-gate  * in simulation and practice for different workloads on varying cache sizes.
8600Sstevel@tonic-gate  */
8610Sstevel@tonic-gate uint32_t color_start_current = 0;
8620Sstevel@tonic-gate uint32_t color_start_stride = 337;
8630Sstevel@tonic-gate int color_start_random = 0;
8640Sstevel@tonic-gate 
8650Sstevel@tonic-gate /* ARGSUSED */
8660Sstevel@tonic-gate uint_t
8670Sstevel@tonic-gate get_color_start(struct as *as)
8680Sstevel@tonic-gate {
8690Sstevel@tonic-gate 	uint32_t old, new;
8700Sstevel@tonic-gate 
8710Sstevel@tonic-gate 	if (consistent_coloring == 2 || color_start_random) {
8720Sstevel@tonic-gate 		return ((uint_t)(((gettick()) << (vac_shift - MMU_PAGESHIFT)) &
8732961Sdp78419 		    (hw_page_array[0].hp_colors - 1)));
8740Sstevel@tonic-gate 	}
8750Sstevel@tonic-gate 
8760Sstevel@tonic-gate 	do {
8770Sstevel@tonic-gate 		old = color_start_current;
8780Sstevel@tonic-gate 		new = old + (color_start_stride << (vac_shift - MMU_PAGESHIFT));
8790Sstevel@tonic-gate 	} while (cas32(&color_start_current, old, new) != old);
8800Sstevel@tonic-gate 
8810Sstevel@tonic-gate 	return ((uint_t)(new));
8820Sstevel@tonic-gate }
8830Sstevel@tonic-gate 
8840Sstevel@tonic-gate /*
8850Sstevel@tonic-gate  * Called once at startup from kphysm_init() -- before memialloc()
8860Sstevel@tonic-gate  * is invoked to do the 1st page_free()/page_freelist_add().
8870Sstevel@tonic-gate  *
8880Sstevel@tonic-gate  * initializes page_colors and page_colors_mask based on ecache_setsize.
8890Sstevel@tonic-gate  *
8900Sstevel@tonic-gate  * Also initializes the counter locks.
8910Sstevel@tonic-gate  */
8920Sstevel@tonic-gate void
8930Sstevel@tonic-gate page_coloring_init()
8940Sstevel@tonic-gate {
8952961Sdp78419 	int	a, i;
8962961Sdp78419 	uint_t colors;
8970Sstevel@tonic-gate 
8980Sstevel@tonic-gate 	if (do_pg_coloring == 0) {
8990Sstevel@tonic-gate 		page_colors = 1;
9002961Sdp78419 		for (i = 0; i < mmu_page_sizes; i++)
9012961Sdp78419 			hw_page_array[i].hp_colors = 1;
9020Sstevel@tonic-gate 		return;
9030Sstevel@tonic-gate 	}
9040Sstevel@tonic-gate 
9050Sstevel@tonic-gate 	/*
9060Sstevel@tonic-gate 	 * Calculate page_colors from ecache_setsize. ecache_setsize contains
9070Sstevel@tonic-gate 	 * the max ecache setsize of all cpus configured in the system or, for
9080Sstevel@tonic-gate 	 * cheetah+ systems, the max possible ecache setsize for all possible
9090Sstevel@tonic-gate 	 * cheetah+ cpus.
9100Sstevel@tonic-gate 	 */
9110Sstevel@tonic-gate 	page_colors = ecache_setsize / MMU_PAGESIZE;
9120Sstevel@tonic-gate 	page_colors_mask = page_colors - 1;
9130Sstevel@tonic-gate 
9142961Sdp78419 	vac_colors = vac_size / MMU_PAGESIZE;
9152961Sdp78419 	vac_colors_mask = vac_colors -1;
9162961Sdp78419 
9172961Sdp78419 	page_coloring_shift = 0;
9182961Sdp78419 	a = ecache_setsize;
9192961Sdp78419 	while (a >>= 1) {
9202961Sdp78419 		page_coloring_shift++;
9212961Sdp78419 	}
9222961Sdp78419 
9232961Sdp78419 	/* initialize number of colors per page size */
9242961Sdp78419 	for (i = 0; i < mmu_page_sizes; i++) {
9252961Sdp78419 		hw_page_array[i].hp_colors = (page_colors_mask >>
9262961Sdp78419 		    (hw_page_array[i].hp_shift - hw_page_array[0].hp_shift))
9272961Sdp78419 		    + 1;
9282961Sdp78419 	}
9292961Sdp78419 
9300Sstevel@tonic-gate 	/*
9310Sstevel@tonic-gate 	 * initialize cpu_page_colors if ecache setsizes are homogenous.
9320Sstevel@tonic-gate 	 * cpu_page_colors set to -1 during DR operation or during startup
9330Sstevel@tonic-gate 	 * if setsizes are heterogenous.
9340Sstevel@tonic-gate 	 *
9350Sstevel@tonic-gate 	 * The value of cpu_page_colors determines if additional color bins
9360Sstevel@tonic-gate 	 * need to be checked for a particular color in the page_get routines.
9370Sstevel@tonic-gate 	 */
9382961Sdp78419 	if ((cpu_page_colors == 0) && (cpu_setsize < ecache_setsize)) {
9392961Sdp78419 
9400Sstevel@tonic-gate 		cpu_page_colors = cpu_setsize / MMU_PAGESIZE;
9412961Sdp78419 		a = lowbit(page_colors) - lowbit(cpu_page_colors);
9422961Sdp78419 		ASSERT(a > 0);
9432961Sdp78419 		ASSERT(a < 16);
9442961Sdp78419 
9452961Sdp78419 		for (i = 0; i < mmu_page_sizes; i++) {
9462961Sdp78419 			if ((colors = hw_page_array[i].hp_colors) <= 1) {
9472961Sdp78419 				colorequivszc[i] = 0;
9482961Sdp78419 				continue;
9492961Sdp78419 			}
9502961Sdp78419 			while ((colors >> a) == 0)
9512961Sdp78419 				a--;
9522961Sdp78419 			ASSERT(a >= 0);
9532961Sdp78419 
9542961Sdp78419 			/* higher 4 bits encodes color equiv mask */
9552961Sdp78419 			colorequivszc[i] = (a << 4);
9562961Sdp78419 		}
9572961Sdp78419 	}
9580Sstevel@tonic-gate 
9592961Sdp78419 	/* factor in colorequiv to check additional 'equivalent' bins. */
9602961Sdp78419 	if (colorequiv > 1 && &page_coloring_init_cpu == NULL) {
9612961Sdp78419 
9622961Sdp78419 		a = lowbit(colorequiv) - 1;
9632961Sdp78419 
9642961Sdp78419 		if (a > 15)
9652961Sdp78419 			a = 15;
9660Sstevel@tonic-gate 
9672961Sdp78419 		for (i = 0; i < mmu_page_sizes; i++) {
9682961Sdp78419 			if ((colors = hw_page_array[i].hp_colors) <= 1) {
9692961Sdp78419 				continue;
9702961Sdp78419 			}
9712961Sdp78419 			while ((colors >> a) == 0)
9722961Sdp78419 				a--;
9732961Sdp78419 			if ((a << 4) > colorequivszc[i]) {
9742961Sdp78419 				colorequivszc[i] = (a << 4);
9752961Sdp78419 			}
9762961Sdp78419 		}
9772961Sdp78419 	}
9782961Sdp78419 
9792961Sdp78419 	/* do cpu specific color initialization */
9802961Sdp78419 	if (&page_coloring_init_cpu) {
9812961Sdp78419 		page_coloring_init_cpu();
9820Sstevel@tonic-gate 	}
9830Sstevel@tonic-gate }
9840Sstevel@tonic-gate 
9850Sstevel@tonic-gate int
9860Sstevel@tonic-gate bp_color(struct buf *bp)
9870Sstevel@tonic-gate {
9880Sstevel@tonic-gate 	int color = -1;
9890Sstevel@tonic-gate 
9900Sstevel@tonic-gate 	if (vac) {
9910Sstevel@tonic-gate 		if ((bp->b_flags & B_PAGEIO) != 0) {
9920Sstevel@tonic-gate 			color = sfmmu_get_ppvcolor(bp->b_pages);
9930Sstevel@tonic-gate 		} else if (bp->b_un.b_addr != NULL) {
9940Sstevel@tonic-gate 			color = sfmmu_get_addrvcolor(bp->b_un.b_addr);
9950Sstevel@tonic-gate 		}
9960Sstevel@tonic-gate 	}
9970Sstevel@tonic-gate 	return (color < 0 ? 0 : ptob(color));
9980Sstevel@tonic-gate }
9990Sstevel@tonic-gate 
10000Sstevel@tonic-gate /*
10010Sstevel@tonic-gate  * Create & Initialise pageout scanner thread. The thread has to
10020Sstevel@tonic-gate  * start at procedure with process pp and priority pri.
10030Sstevel@tonic-gate  */
10040Sstevel@tonic-gate void
10050Sstevel@tonic-gate pageout_init(void (*procedure)(), proc_t *pp, pri_t pri)
10060Sstevel@tonic-gate {
10070Sstevel@tonic-gate 	(void) thread_create(NULL, 0, procedure, NULL, 0, pp, TS_RUN, pri);
10080Sstevel@tonic-gate }
10090Sstevel@tonic-gate 
10100Sstevel@tonic-gate /*
10110Sstevel@tonic-gate  * Function for flushing D-cache when performing module relocations
10120Sstevel@tonic-gate  * to an alternate mapping.  Stubbed out on all platforms except sun4u,
10130Sstevel@tonic-gate  * at least for now.
10140Sstevel@tonic-gate  */
10150Sstevel@tonic-gate void
10160Sstevel@tonic-gate dcache_flushall()
10170Sstevel@tonic-gate {
10180Sstevel@tonic-gate 	sfmmu_cache_flushall();
10190Sstevel@tonic-gate }
10200Sstevel@tonic-gate 
10210Sstevel@tonic-gate static int
10220Sstevel@tonic-gate kdi_range_overlap(uintptr_t va1, size_t sz1, uintptr_t va2, size_t sz2)
10230Sstevel@tonic-gate {
10240Sstevel@tonic-gate 	if (va1 < va2 && va1 + sz1 <= va2)
10250Sstevel@tonic-gate 		return (0);
10260Sstevel@tonic-gate 
10270Sstevel@tonic-gate 	if (va2 < va1 && va2 + sz2 <= va1)
10280Sstevel@tonic-gate 		return (0);
10290Sstevel@tonic-gate 
10300Sstevel@tonic-gate 	return (1);
10310Sstevel@tonic-gate }
10320Sstevel@tonic-gate 
10330Sstevel@tonic-gate /*
10340Sstevel@tonic-gate  * Return the number of bytes, relative to the beginning of a given range, that
10350Sstevel@tonic-gate  * are non-toxic (can be read from and written to with relative impunity).
10360Sstevel@tonic-gate  */
10370Sstevel@tonic-gate size_t
10380Sstevel@tonic-gate kdi_range_is_nontoxic(uintptr_t va, size_t sz, int write)
10390Sstevel@tonic-gate {
10400Sstevel@tonic-gate 	/* OBP reads are harmless, but we don't want people writing there */
10410Sstevel@tonic-gate 	if (write && kdi_range_overlap(va, sz, OFW_START_ADDR, OFW_END_ADDR -
10420Sstevel@tonic-gate 	    OFW_START_ADDR + 1))
10430Sstevel@tonic-gate 		return (va < OFW_START_ADDR ? OFW_START_ADDR - va : 0);
10440Sstevel@tonic-gate 
10450Sstevel@tonic-gate 	if (kdi_range_overlap(va, sz, PIOMAPBASE, PIOMAPSIZE))
10460Sstevel@tonic-gate 		return (va < PIOMAPBASE ? PIOMAPBASE - va : 0);
10470Sstevel@tonic-gate 
10480Sstevel@tonic-gate 	return (sz); /* no overlap */
10490Sstevel@tonic-gate }
10500Sstevel@tonic-gate 
10510Sstevel@tonic-gate /*
10520Sstevel@tonic-gate  * Minimum physmem required for enabling large pages for kernel heap
10530Sstevel@tonic-gate  * Currently we do not enable lp for kmem on systems with less
10540Sstevel@tonic-gate  * than 1GB of memory. This value can be changed via /etc/system
10550Sstevel@tonic-gate  */
10560Sstevel@tonic-gate size_t segkmem_lpminphysmem = 0x40000000;	/* 1GB */
10570Sstevel@tonic-gate 
10580Sstevel@tonic-gate /*
10590Sstevel@tonic-gate  * this function chooses large page size for kernel heap
10600Sstevel@tonic-gate  */
10610Sstevel@tonic-gate size_t
10620Sstevel@tonic-gate get_segkmem_lpsize(size_t lpsize)
10630Sstevel@tonic-gate {
10640Sstevel@tonic-gate 	size_t memtotal = physmem * PAGESIZE;
10652251Selowe 	size_t mmusz;
10662251Selowe 	uint_t szc;
10670Sstevel@tonic-gate 
10680Sstevel@tonic-gate 	if (memtotal < segkmem_lpminphysmem)
10690Sstevel@tonic-gate 		return (PAGESIZE);
10700Sstevel@tonic-gate 
10710Sstevel@tonic-gate 	if (plat_lpkmem_is_supported != NULL &&
10720Sstevel@tonic-gate 	    plat_lpkmem_is_supported() == 0)
10730Sstevel@tonic-gate 		return (PAGESIZE);
10740Sstevel@tonic-gate 
10752251Selowe 	mmusz = mmu_get_kernel_lpsize(lpsize);
10762251Selowe 	szc = page_szc(mmusz);
10772251Selowe 
10782251Selowe 	while (szc) {
10792251Selowe 		if (!(disable_large_pages & (1 << szc)))
10802251Selowe 			return (page_get_pagesize(szc));
10812251Selowe 		szc--;
10822251Selowe 	}
10832251Selowe 	return (PAGESIZE);
10840Sstevel@tonic-gate }
1085