xref: /onnv-gate/usr/src/uts/i86pc/vm/vm_dep.h (revision 12908:80a39220b451)
10Sstevel@tonic-gate /*
20Sstevel@tonic-gate  * CDDL HEADER START
30Sstevel@tonic-gate  *
40Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
52855Skchow  * Common Development and Distribution License (the "License").
62855Skchow  * You may not use this file except in compliance with the License.
70Sstevel@tonic-gate  *
80Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
100Sstevel@tonic-gate  * See the License for the specific language governing permissions
110Sstevel@tonic-gate  * and limitations under the License.
120Sstevel@tonic-gate  *
130Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
140Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
160Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
170Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
180Sstevel@tonic-gate  *
190Sstevel@tonic-gate  * CDDL HEADER END
200Sstevel@tonic-gate  */
210Sstevel@tonic-gate /*
22*12908SPavel.Tatashin@Sun.COM  * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
230Sstevel@tonic-gate  */
2412004Sjiang.liu@intel.com /*
2512004Sjiang.liu@intel.com  * Copyright (c) 2010, Intel Corporation.
2612004Sjiang.liu@intel.com  * All rights reserved.
2712004Sjiang.liu@intel.com  */
280Sstevel@tonic-gate 
290Sstevel@tonic-gate /*
300Sstevel@tonic-gate  * UNIX machine dependent virtual memory support.
310Sstevel@tonic-gate  */
320Sstevel@tonic-gate 
330Sstevel@tonic-gate #ifndef	_VM_DEP_H
340Sstevel@tonic-gate #define	_VM_DEP_H
350Sstevel@tonic-gate 
360Sstevel@tonic-gate 
370Sstevel@tonic-gate #ifdef	__cplusplus
380Sstevel@tonic-gate extern "C" {
390Sstevel@tonic-gate #endif
400Sstevel@tonic-gate 
410Sstevel@tonic-gate #include <sys/clock.h>
420Sstevel@tonic-gate #include <vm/hat_pte.h>
433446Smrj #include <sys/param.h>
444769Sdp78419 #include <sys/memnode.h>
450Sstevel@tonic-gate 
460Sstevel@tonic-gate /*
475084Sjohnlev  * WARNING: vm_dep.h is included by files in common.
480Sstevel@tonic-gate  */
490Sstevel@tonic-gate 
500Sstevel@tonic-gate #define	GETTICK()	tsc_read()
516880Sdv142724 /*
526880Sdv142724  * Do not use this function for obtaining clock tick.  This
536880Sdv142724  * is called by callers who do not need to have a guarenteed
546880Sdv142724  * correct tick value.  The proper routine to use is tsc_read().
556880Sdv142724  */
560Sstevel@tonic-gate 
57*12908SPavel.Tatashin@Sun.COM extern u_longlong_t	randtick();
585084Sjohnlev extern uint_t page_create_update_flags_x86(uint_t);
595084Sjohnlev 
605084Sjohnlev extern size_t plcnt_sz(size_t);
615084Sjohnlev #define	PLCNT_SZ(ctrs_sz) (ctrs_sz = plcnt_sz(ctrs_sz))
620Sstevel@tonic-gate 
635084Sjohnlev extern caddr_t plcnt_init(caddr_t);
645084Sjohnlev #define	PLCNT_INIT(addr) (addr = plcnt_init(addr))
650Sstevel@tonic-gate 
665084Sjohnlev extern void plcnt_inc_dec(page_t *, int, int, long, int);
675084Sjohnlev #define	PLCNT_INCR(pp, mnode, mtype, szc, flags)			\
685084Sjohnlev 	plcnt_inc_dec(pp, mtype, szc, 1l << PAGE_BSZS_SHIFT(szc), flags)
695084Sjohnlev #define	PLCNT_DECR(pp, mnode, mtype, szc, flags)			\
705084Sjohnlev 	plcnt_inc_dec(pp, mtype, szc, -1l << PAGE_BSZS_SHIFT(szc), flags)
711385Skchow 
720Sstevel@tonic-gate /*
735084Sjohnlev  * macro to update page list max counts.  no-op on x86.
74414Skchow  */
751373Skchow #define	PLCNT_XFER_NORELOC(pp)
761373Skchow 
7712004Sjiang.liu@intel.com /*
7812004Sjiang.liu@intel.com  * macro to modify the page list max counts when memory is added to
7912004Sjiang.liu@intel.com  * the page lists during startup (add_physmem) or during a DR operation
8012004Sjiang.liu@intel.com  * when memory is added (kphysm_add_memory_dynamic) or deleted
8112004Sjiang.liu@intel.com  * (kphysm_del_cleanup).
8212004Sjiang.liu@intel.com  */
8312004Sjiang.liu@intel.com #define	PLCNT_MODIFY_MAX(pfn, cnt)	mtype_modify_max(pfn, cnt)
8412004Sjiang.liu@intel.com 
855084Sjohnlev extern int memrange_num(pfn_t);
865084Sjohnlev extern int pfn_2_mtype(pfn_t);
875084Sjohnlev extern int mtype_func(int, int, uint_t);
885084Sjohnlev extern void mtype_modify_max(pfn_t, long);
895084Sjohnlev extern int mnode_pgcnt(int);
905084Sjohnlev extern int mnode_range_cnt(int);
910Sstevel@tonic-gate 
920Sstevel@tonic-gate /*
932961Sdp78419  * candidate counters in vm_pagelist.c are indexed by color and range
942961Sdp78419  */
9512004Sjiang.liu@intel.com #define	NUM_MEM_RANGES		4		/* memory range types */
962961Sdp78419 #define	MAX_MNODE_MRANGES	NUM_MEM_RANGES
972961Sdp78419 #define	MNODE_RANGE_CNT(mnode)	mnode_range_cnt(mnode)
985084Sjohnlev #define	MNODE_MAX_MRANGE(mnode)	memrange_num(mem_node_config[mnode].physbase)
995084Sjohnlev 
1005084Sjohnlev /*
1015084Sjohnlev  * This was really badly defined, it implicitly uses mnode_maxmrange[]
1025084Sjohnlev  * which is a static in vm_pagelist.c
1035084Sjohnlev  */
1045084Sjohnlev extern int mtype_2_mrange(int);
1052961Sdp78419 #define	MTYPE_2_MRANGE(mnode, mtype)	\
1065084Sjohnlev 	(mnode_maxmrange[mnode] - mtype_2_mrange(mtype))
1072961Sdp78419 
1082961Sdp78419 /*
10912293SJames.McPherson@Sun.COM  * Per page size free lists. Allocated dynamically.
1100Sstevel@tonic-gate  * dimensions [mtype][mmu_page_sizes][colors]
1110Sstevel@tonic-gate  *
1120Sstevel@tonic-gate  * mtype specifies a physical memory range with a unique mnode.
1130Sstevel@tonic-gate  */
1140Sstevel@tonic-gate 
11512293SJames.McPherson@Sun.COM extern page_t ****page_freelists;
1160Sstevel@tonic-gate 
11712293SJames.McPherson@Sun.COM #define	PAGE_FREELISTS(mnode, szc, color, mtype)		\
11812293SJames.McPherson@Sun.COM 	(*(page_freelists[mtype][szc] + (color)))
1190Sstevel@tonic-gate 
1200Sstevel@tonic-gate /*
1210Sstevel@tonic-gate  * For now there is only a single size cache list. Allocated dynamically.
1220Sstevel@tonic-gate  * dimensions [mtype][colors]
1230Sstevel@tonic-gate  *
1240Sstevel@tonic-gate  * mtype specifies a physical memory range with a unique mnode.
1250Sstevel@tonic-gate  */
1260Sstevel@tonic-gate extern page_t ***page_cachelists;
1270Sstevel@tonic-gate 
1280Sstevel@tonic-gate #define	PAGE_CACHELISTS(mnode, color, mtype) 		\
1290Sstevel@tonic-gate 	(*(page_cachelists[mtype] + (color)))
1300Sstevel@tonic-gate 
1310Sstevel@tonic-gate /*
13212293SJames.McPherson@Sun.COM  * There are mutexes for both the page freelist
1330Sstevel@tonic-gate  * and the page cachelist.  We want enough locks to make contention
1340Sstevel@tonic-gate  * reasonable, but not too many -- otherwise page_freelist_lock() gets
1350Sstevel@tonic-gate  * so expensive that it becomes the bottleneck!
1360Sstevel@tonic-gate  */
1370Sstevel@tonic-gate 
1380Sstevel@tonic-gate #define	NPC_MUTEX	16
1390Sstevel@tonic-gate 
1400Sstevel@tonic-gate extern kmutex_t	*fpc_mutex[NPC_MUTEX];
1410Sstevel@tonic-gate extern kmutex_t	*cpc_mutex[NPC_MUTEX];
1420Sstevel@tonic-gate 
14312293SJames.McPherson@Sun.COM extern page_t *page_get_mnode_freelist(int, uint_t, int, uchar_t, uint_t);
1440Sstevel@tonic-gate extern page_t *page_get_mnode_cachelist(uint_t, uint_t, int, int);
1450Sstevel@tonic-gate 
1464769Sdp78419 /* mem node iterator is not used on x86 */
1474769Sdp78419 #define	MEM_NODE_ITERATOR_DECL(it)
1486041Sdp78419 #define	MEM_NODE_ITERATOR_INIT(pfn, mnode, szc, it)
1494769Sdp78419 
1504769Sdp78419 /*
1514769Sdp78419  * interleaved_mnodes mode is never set on x86, therefore,
1524769Sdp78419  * simply return the limits of the given mnode, which then
1534769Sdp78419  * determines the length of hpm_counters array for the mnode.
1544769Sdp78419  */
1554769Sdp78419 #define	HPM_COUNTERS_LIMITS(mnode, physbase, physmax, first) 	\
1564769Sdp78419 	{							\
1574769Sdp78419 		(physbase) = mem_node_config[(mnode)].physbase;	\
1584769Sdp78419 		(physmax) = mem_node_config[(mnode)].physmax;	\
1594769Sdp78419 		(first) = (mnode);				\
1604769Sdp78419 	}
1614769Sdp78419 
1624769Sdp78419 #define	PAGE_CTRS_WRITE_LOCK(mnode)				\
1634769Sdp78419 	{							\
1644769Sdp78419 		rw_enter(&page_ctrs_rwlock[(mnode)], RW_WRITER);\
1654769Sdp78419 		page_freelist_lock(mnode);			\
1664769Sdp78419 	}
1674769Sdp78419 
1684769Sdp78419 #define	PAGE_CTRS_WRITE_UNLOCK(mnode)				\
1694769Sdp78419 	{							\
1704769Sdp78419 		page_freelist_unlock(mnode);			\
1714769Sdp78419 		rw_exit(&page_ctrs_rwlock[(mnode)]);		\
1724769Sdp78419 	}
1734769Sdp78419 
17412004Sjiang.liu@intel.com /*
17512004Sjiang.liu@intel.com  * macro to call page_ctrs_adjust() when memory is added
17612004Sjiang.liu@intel.com  * during a DR operation.
17712004Sjiang.liu@intel.com  */
17812004Sjiang.liu@intel.com #define	PAGE_CTRS_ADJUST(pfn, cnt, rv) {				       \
17912004Sjiang.liu@intel.com 	spgcnt_t _cnt = (spgcnt_t)(cnt);				       \
18012004Sjiang.liu@intel.com 	int _mn;							       \
18112004Sjiang.liu@intel.com 	pgcnt_t _np;							       \
18212004Sjiang.liu@intel.com 	pfn_t _pfn = (pfn);						       \
18312004Sjiang.liu@intel.com 	pfn_t _endpfn = _pfn + _cnt;					       \
18412004Sjiang.liu@intel.com 	while (_pfn < _endpfn) {					       \
18512004Sjiang.liu@intel.com 		_mn = PFN_2_MEM_NODE(_pfn);				       \
18612004Sjiang.liu@intel.com 		_np = MIN(_endpfn, mem_node_config[_mn].physmax + 1) - _pfn;   \
18712004Sjiang.liu@intel.com 		_pfn += _np;						       \
18812004Sjiang.liu@intel.com 		if ((rv = page_ctrs_adjust(_mn)) != 0)			       \
18912004Sjiang.liu@intel.com 			break;						       \
19012004Sjiang.liu@intel.com 	}								       \
19112004Sjiang.liu@intel.com }
19212004Sjiang.liu@intel.com 
1934769Sdp78419 #define	PAGE_GET_COLOR_SHIFT(szc, nszc)				\
1942961Sdp78419 	    (hw_page_array[(nszc)].hp_shift - hw_page_array[(szc)].hp_shift)
1952961Sdp78419 
1964769Sdp78419 #define	PAGE_CONVERT_COLOR(ncolor, szc, nszc)			\
1974769Sdp78419 	    ((ncolor) << PAGE_GET_COLOR_SHIFT((szc), (nszc)))
1984769Sdp78419 
1994769Sdp78419 #define	PFN_2_COLOR(pfn, szc, it)					\
2002961Sdp78419 	(((pfn) & page_colors_mask) >>			                \
2012961Sdp78419 	(hw_page_array[szc].hp_shift - hw_page_array[0].hp_shift))
2022961Sdp78419 
2032961Sdp78419 #define	PNUM_SIZE(szc)							\
2042961Sdp78419 	(hw_page_array[(szc)].hp_pgcnt)
2052961Sdp78419 #define	PNUM_SHIFT(szc)							\
2062961Sdp78419 	(hw_page_array[(szc)].hp_shift - hw_page_array[0].hp_shift)
2072961Sdp78419 #define	PAGE_GET_SHIFT(szc)						\
2082961Sdp78419 	(hw_page_array[(szc)].hp_shift)
2092961Sdp78419 #define	PAGE_GET_PAGECOLORS(szc)					\
2102961Sdp78419 	(hw_page_array[(szc)].hp_colors)
2112961Sdp78419 
2122961Sdp78419 /*
2132961Sdp78419  * This macro calculates the next sequential pfn with the specified
2142961Sdp78419  * color using color equivalency mask
2152961Sdp78419  */
2164769Sdp78419 #define	PAGE_NEXT_PFN_FOR_COLOR(pfn, szc, color, ceq_mask, color_mask, it)    \
2172961Sdp78419 	{								      \
2182961Sdp78419 		uint_t	pfn_shift = PAGE_BSZS_SHIFT(szc);                     \
2192961Sdp78419 		pfn_t	spfn = pfn >> pfn_shift;                              \
2202961Sdp78419 		pfn_t	stride = (ceq_mask) + 1;                              \
2215847Sdp78419 		ASSERT(((color) & ~(ceq_mask)) == 0);                         \
2222961Sdp78419 		ASSERT((((ceq_mask) + 1) & (ceq_mask)) == 0);                 \
2232961Sdp78419 		if (((spfn ^ (color)) & (ceq_mask)) == 0) {                   \
2242961Sdp78419 			pfn += stride << pfn_shift;                           \
2252961Sdp78419 		} else {                                                      \
2262961Sdp78419 			pfn = (spfn & ~(pfn_t)(ceq_mask)) | (color);          \
2272961Sdp78419 			pfn = (pfn > spfn ? pfn : pfn + stride) << pfn_shift; \
2282961Sdp78419 		}                                                             \
2292961Sdp78419 	}
2302961Sdp78419 
2312961Sdp78419 /* get the color equivalency mask for the next szc */
2322961Sdp78419 #define	PAGE_GET_NSZ_MASK(szc, mask)                                         \
2332961Sdp78419 	((mask) >> (PAGE_GET_SHIFT((szc) + 1) - PAGE_GET_SHIFT(szc)))
2342961Sdp78419 
2352961Sdp78419 /* get the color of the next szc */
2362961Sdp78419 #define	PAGE_GET_NSZ_COLOR(szc, color)                                       \
2372961Sdp78419 	((color) >> (PAGE_GET_SHIFT((szc) + 1) - PAGE_GET_SHIFT(szc)))
2382961Sdp78419 
2390Sstevel@tonic-gate /* Find the bin for the given page if it was of size szc */
2404769Sdp78419 #define	PP_2_BIN_SZC(pp, szc)	(PFN_2_COLOR(pp->p_pagenum, szc, NULL))
2410Sstevel@tonic-gate 
24212293SJames.McPherson@Sun.COM #define	PP_2_BIN(pp)		(PP_2_BIN_SZC(pp, pp->p_szc))
2430Sstevel@tonic-gate 
2440Sstevel@tonic-gate #define	PP_2_MEM_NODE(pp)	(PFN_2_MEM_NODE(pp->p_pagenum))
2455084Sjohnlev #define	PP_2_MTYPE(pp)		(pfn_2_mtype(pp->p_pagenum))
2460Sstevel@tonic-gate #define	PP_2_SZC(pp)		(pp->p_szc)
2470Sstevel@tonic-gate 
2480Sstevel@tonic-gate #define	SZCPAGES(szc)		(1 << PAGE_BSZS_SHIFT(szc))
2490Sstevel@tonic-gate #define	PFN_BASE(pfnum, szc)	(pfnum & ~(SZCPAGES(szc) - 1))
2500Sstevel@tonic-gate 
25112293SJames.McPherson@Sun.COM /*
25212293SJames.McPherson@Sun.COM  * this structure is used for walking free page lists
25312293SJames.McPherson@Sun.COM  * controls when to split large pages into smaller pages,
25412293SJames.McPherson@Sun.COM  * and when to coalesce smaller pages into larger pages
25512293SJames.McPherson@Sun.COM  */
25612293SJames.McPherson@Sun.COM typedef struct page_list_walker {
25712293SJames.McPherson@Sun.COM 	uint_t	plw_colors;		/* num of colors for szc */
25812293SJames.McPherson@Sun.COM 	uint_t  plw_color_mask;		/* colors-1 */
25912293SJames.McPherson@Sun.COM 	uint_t	plw_bin_step;		/* next bin: 1 or 2 */
26012293SJames.McPherson@Sun.COM 	uint_t  plw_count;		/* loop count */
26112293SJames.McPherson@Sun.COM 	uint_t	plw_bin0;		/* starting bin */
26212293SJames.McPherson@Sun.COM 	uint_t  plw_bin_marker;		/* bin after initial jump */
26312293SJames.McPherson@Sun.COM 	uint_t  plw_bin_split_prev;	/* last bin we tried to split */
26412293SJames.McPherson@Sun.COM 	uint_t  plw_do_split;		/* set if OK to split */
26512293SJames.McPherson@Sun.COM 	uint_t  plw_split_next;		/* next bin to split */
26612293SJames.McPherson@Sun.COM 	uint_t	plw_ceq_dif;		/* number of different color groups */
26712293SJames.McPherson@Sun.COM 					/* to check */
26812293SJames.McPherson@Sun.COM 	uint_t	plw_ceq_mask[MMU_PAGE_SIZES + 1]; /* color equiv mask */
26912293SJames.McPherson@Sun.COM 	uint_t	plw_bins[MMU_PAGE_SIZES + 1];	/* num of bins */
27012293SJames.McPherson@Sun.COM } page_list_walker_t;
27112293SJames.McPherson@Sun.COM 
2722961Sdp78419 void	page_list_walk_init(uchar_t szc, uint_t flags, uint_t bin,
2732961Sdp78419     int can_split, int use_ceq, page_list_walker_t *plw);
2742961Sdp78419 
2752961Sdp78419 uint_t	page_list_walk_next_bin(uchar_t szc, uint_t bin,
2762961Sdp78419     page_list_walker_t *plw);
2772961Sdp78419 
278414Skchow extern struct cpu	cpus[];
279414Skchow #define	CPU0		cpus
280414Skchow 
2815084Sjohnlev extern int mtype_init(vnode_t *, caddr_t, uint_t *, size_t);
2825084Sjohnlev #define	MTYPE_INIT(mtype, vp, vaddr, flags, pgsz)		\
2835084Sjohnlev 	(mtype = mtype_init(vp, vaddr, &(flags), pgsz))
2840Sstevel@tonic-gate 
2850Sstevel@tonic-gate /*
2860Sstevel@tonic-gate  * macros to loop through the mtype range (page_get_mnode_{free,cache,any}list,
2870Sstevel@tonic-gate  * and page_get_contig_pages)
2880Sstevel@tonic-gate  *
2890Sstevel@tonic-gate  * MTYPE_START sets the initial mtype. -1 if the mtype range specified does
2900Sstevel@tonic-gate  * not contain mnode.
2910Sstevel@tonic-gate  *
2920Sstevel@tonic-gate  * MTYPE_NEXT sets the next mtype. -1 if there are no more valid
2930Sstevel@tonic-gate  * mtype in the range.
2940Sstevel@tonic-gate  */
2950Sstevel@tonic-gate 
2960Sstevel@tonic-gate #define	MTYPE_START(mnode, mtype, flags)				\
2970Sstevel@tonic-gate 	(mtype = mtype_func(mnode, mtype, flags))
2980Sstevel@tonic-gate 
299414Skchow #define	MTYPE_NEXT(mnode, mtype, flags) {				\
300414Skchow 	if (flags & PGI_MT_RANGE) {					\
301414Skchow 		mtype = mtype_func(mnode, mtype, flags | PGI_MT_NEXT);	\
302414Skchow 	} else {							\
303414Skchow 		mtype = -1;						\
304414Skchow 	}								\
305414Skchow }
3060Sstevel@tonic-gate 
3075084Sjohnlev extern int mtype_pgr_init(int *, page_t *, int, pgcnt_t);
3085084Sjohnlev #define	MTYPE_PGR_INIT(mtype, flags, pp, mnode, pgcnt)			\
3095084Sjohnlev 	(mtype = mtype_pgr_init(&flags, pp, mnode, pgcnt))
3100Sstevel@tonic-gate 
311414Skchow #define	MNODE_PGCNT(mnode)		mnode_pgcnt(mnode)
312414Skchow 
3135084Sjohnlev extern void mnodetype_2_pfn(int, int, pfn_t *, pfn_t *);
3140Sstevel@tonic-gate #define	MNODETYPE_2_PFN(mnode, mtype, pfnlo, pfnhi)			\
3155084Sjohnlev 	mnodetype_2_pfn(mnode, mtype, &pfnlo, &pfnhi)
3160Sstevel@tonic-gate 
31712293SJames.McPherson@Sun.COM #define	PC_BIN_MUTEX(mnode, bin, flags) ((flags & PG_FREE_LIST) ?	\
31812293SJames.McPherson@Sun.COM 	&fpc_mutex[(bin) & (NPC_MUTEX - 1)][mnode] :			\
3190Sstevel@tonic-gate 	&cpc_mutex[(bin) & (NPC_MUTEX - 1)][mnode])
3200Sstevel@tonic-gate 
3210Sstevel@tonic-gate #define	FPC_MUTEX(mnode, i)	(&fpc_mutex[i][mnode])
3220Sstevel@tonic-gate #define	CPC_MUTEX(mnode, i)	(&cpc_mutex[i][mnode])
3230Sstevel@tonic-gate 
3240Sstevel@tonic-gate #ifdef DEBUG
3250Sstevel@tonic-gate #define	CHK_LPG(pp, szc)	chk_lpg(pp, szc)
3260Sstevel@tonic-gate extern void	chk_lpg(page_t *, uchar_t);
3270Sstevel@tonic-gate #else
3280Sstevel@tonic-gate #define	CHK_LPG(pp, szc)
3290Sstevel@tonic-gate #endif
3300Sstevel@tonic-gate 
3310Sstevel@tonic-gate #define	FULL_REGION_CNT(rg_szc)	\
3320Sstevel@tonic-gate 	(LEVEL_SIZE(rg_szc) >> LEVEL_SHIFT(rg_szc - 1))
3330Sstevel@tonic-gate 
3340Sstevel@tonic-gate /* Return the leader for this mapping size */
3350Sstevel@tonic-gate #define	PP_GROUPLEADER(pp, szc) \
3360Sstevel@tonic-gate 	(&(pp)[-(int)((pp)->p_pagenum & (SZCPAGES(szc)-1))])
3370Sstevel@tonic-gate 
3380Sstevel@tonic-gate /* Return the root page for this page based on p_szc */
3390Sstevel@tonic-gate #define	PP_PAGEROOT(pp) ((pp)->p_szc == 0 ? (pp) : \
3400Sstevel@tonic-gate 	PP_GROUPLEADER((pp), (pp)->p_szc))
3410Sstevel@tonic-gate 
3420Sstevel@tonic-gate /*
3430Sstevel@tonic-gate  * The counter base must be per page_counter element to prevent
3440Sstevel@tonic-gate  * races when re-indexing, and the base page size element should
3450Sstevel@tonic-gate  * be aligned on a boundary of the given region size.
3460Sstevel@tonic-gate  *
3470Sstevel@tonic-gate  * We also round up the number of pages spanned by the counters
3480Sstevel@tonic-gate  * for a given region to PC_BASE_ALIGN in certain situations to simplify
3490Sstevel@tonic-gate  * the coding for some non-performance critical routines.
3500Sstevel@tonic-gate  */
3510Sstevel@tonic-gate 
3520Sstevel@tonic-gate #define	PC_BASE_ALIGN		((pfn_t)1 << PAGE_BSZS_SHIFT(MMU_PAGE_SIZES-1))
3530Sstevel@tonic-gate #define	PC_BASE_ALIGN_MASK	(PC_BASE_ALIGN - 1)
3540Sstevel@tonic-gate 
3550Sstevel@tonic-gate /*
3560Sstevel@tonic-gate  * cpu/mmu-dependent vm variables
3570Sstevel@tonic-gate  */
3580Sstevel@tonic-gate extern uint_t mmu_page_sizes;
3590Sstevel@tonic-gate extern uint_t mmu_exported_page_sizes;
3605349Skchow /*
3615349Skchow  * page sizes that legacy applications can see via getpagesizes(3c).
3625349Skchow  * Used to prevent legacy applications from inadvertantly using the
3635349Skchow  * 'new' large pagesizes (1g and above).
3645349Skchow  */
3655349Skchow extern uint_t mmu_legacy_page_sizes;
3660Sstevel@tonic-gate 
3670Sstevel@tonic-gate /* For x86, userszc is the same as the kernel's szc */
3680Sstevel@tonic-gate #define	USERSZC_2_SZC(userszc)	(userszc)
3690Sstevel@tonic-gate #define	SZC_2_USERSZC(szc)	(szc)
3700Sstevel@tonic-gate 
3710Sstevel@tonic-gate /*
3720Sstevel@tonic-gate  * for hw_page_map_t, sized to hold the ratio of large page to base
3730Sstevel@tonic-gate  * pagesize (1024 max)
3740Sstevel@tonic-gate  */
3750Sstevel@tonic-gate typedef	short	hpmctr_t;
3760Sstevel@tonic-gate 
3770Sstevel@tonic-gate /*
3780Sstevel@tonic-gate  * get the setsize of the current cpu - assume homogenous for x86
3790Sstevel@tonic-gate  */
3800Sstevel@tonic-gate extern int	l2cache_sz, l2cache_linesz, l2cache_assoc;
3810Sstevel@tonic-gate 
3820Sstevel@tonic-gate #define	L2CACHE_ALIGN		l2cache_linesz
383414Skchow #define	L2CACHE_ALIGN_MAX	64
3840Sstevel@tonic-gate #define	CPUSETSIZE()		\
3850Sstevel@tonic-gate 	(l2cache_assoc ? (l2cache_sz / l2cache_assoc) : MMU_PAGESIZE)
3860Sstevel@tonic-gate 
3870Sstevel@tonic-gate /*
3880Sstevel@tonic-gate  * Return the log2(pagesize(szc) / MMU_PAGESIZE) --- or the shift count
3890Sstevel@tonic-gate  * for the number of base pages in this pagesize
3900Sstevel@tonic-gate  */
3910Sstevel@tonic-gate #define	PAGE_BSZS_SHIFT(szc) (LEVEL_SHIFT(szc) - MMU_PAGESHIFT)
3920Sstevel@tonic-gate 
3930Sstevel@tonic-gate /*
3940Sstevel@tonic-gate  * Internal PG_ flags.
3950Sstevel@tonic-gate  */
3960Sstevel@tonic-gate #define	PGI_RELOCONLY	0x010000	/* opposite of PG_NORELOC */
3970Sstevel@tonic-gate #define	PGI_NOCAGE	0x020000	/* cage is disabled */
3980Sstevel@tonic-gate #define	PGI_PGCPHIPRI	0x040000	/* page_get_contig_page pri alloc */
3990Sstevel@tonic-gate #define	PGI_PGCPSZC0	0x080000	/* relocate base pagesize page */
4000Sstevel@tonic-gate 
4010Sstevel@tonic-gate /*
4020Sstevel@tonic-gate  * PGI range flags - should not overlap PGI flags
4030Sstevel@tonic-gate  */
4040Sstevel@tonic-gate #define	PGI_MT_RANGE0	0x1000000	/* mtype range to 0 */
4055084Sjohnlev #define	PGI_MT_RANGE16M 0x2000000	/* mtype range to 16m */
4061385Skchow #define	PGI_MT_RANGE4G	0x4000000	/* mtype range to 4g */
4071385Skchow #define	PGI_MT_NEXT	0x8000000	/* get next mtype */
4081385Skchow #define	PGI_MT_RANGE	(PGI_MT_RANGE0 | PGI_MT_RANGE16M | PGI_MT_RANGE4G)
4090Sstevel@tonic-gate 
4105084Sjohnlev 
4110Sstevel@tonic-gate /*
4122991Ssusans  * Maximum and default values for user heap, stack, private and shared
4132991Ssusans  * anonymous memory, and user text and initialized data.
4142991Ssusans  * Used by map_pgsz*() routines.
4152991Ssusans  */
4162991Ssusans extern size_t max_uheap_lpsize;
4172991Ssusans extern size_t default_uheap_lpsize;
4182991Ssusans extern size_t max_ustack_lpsize;
4192991Ssusans extern size_t default_ustack_lpsize;
4202991Ssusans extern size_t max_privmap_lpsize;
4212991Ssusans extern size_t max_uidata_lpsize;
4222991Ssusans extern size_t max_utext_lpsize;
4232991Ssusans extern size_t max_shm_lpsize;
4242991Ssusans extern size_t mcntl0_lpsize;
4252991Ssusans 
4262991Ssusans /*
4272991Ssusans  * Sanity control. Don't use large pages regardless of user
4282991Ssusans  * settings if there's less than priv or shm_lpg_min_physmem memory installed.
4292991Ssusans  * The units for this variable are 8K pages.
4302991Ssusans  */
4312991Ssusans extern pgcnt_t privm_lpg_min_physmem;
4322991Ssusans extern pgcnt_t shm_lpg_min_physmem;
4332991Ssusans 
4342991Ssusans /*
4350Sstevel@tonic-gate  * hash as and addr to get a bin.
4360Sstevel@tonic-gate  */
4370Sstevel@tonic-gate 
43812293SJames.McPherson@Sun.COM #define	AS_2_BIN(as, seg, vp, addr, bin, szc)				    \
4392961Sdp78419 	bin = (((((uintptr_t)(addr) >> PAGESHIFT) + ((uintptr_t)(as) >> 4)) \
4402961Sdp78419 	    & page_colors_mask) >>					    \
4412961Sdp78419 	    (hw_page_array[szc].hp_shift - hw_page_array[0].hp_shift))
4420Sstevel@tonic-gate 
4430Sstevel@tonic-gate /*
444414Skchow  * cpu private vm data - accessed thru CPU->cpu_vm_data
445414Skchow  *	vc_pnum_memseg: tracks last memseg visited in page_numtopp_nolock()
446414Skchow  *	vc_pnext_memseg: tracks last memseg visited in page_nextn()
447414Skchow  *	vc_kmptr: orignal unaligned kmem pointer for this vm_cpu_data_t
448450Skchow  *	vc_kmsize: orignal kmem size for this vm_cpu_data_t
449414Skchow  */
450414Skchow 
451414Skchow typedef struct {
452414Skchow 	struct memseg	*vc_pnum_memseg;
453414Skchow 	struct memseg	*vc_pnext_memseg;
454414Skchow 	void		*vc_kmptr;
455450Skchow 	size_t		vc_kmsize;
456414Skchow } vm_cpu_data_t;
457414Skchow 
458414Skchow /* allocation size to ensure vm_cpu_data_t resides in its own cache line */
459414Skchow #define	VM_CPU_DATA_PADSIZE						\
460414Skchow 	(P2ROUNDUP(sizeof (vm_cpu_data_t), L2CACHE_ALIGN_MAX))
461414Skchow 
462414Skchow /* for boot cpu before kmem is initialized */
463414Skchow extern char	vm_cpu_data0[];
464414Skchow 
465414Skchow /*
4660Sstevel@tonic-gate  * When a bin is empty, and we can't satisfy a color request correctly,
4670Sstevel@tonic-gate  * we scan.  If we assume that the programs have reasonable spatial
4680Sstevel@tonic-gate  * behavior, then it will not be a good idea to use the adjacent color.
4690Sstevel@tonic-gate  * Using the adjacent color would result in virtually adjacent addresses
4700Sstevel@tonic-gate  * mapping into the same spot in the cache.  So, if we stumble across
4710Sstevel@tonic-gate  * an empty bin, skip a bunch before looking.  After the first skip,
4720Sstevel@tonic-gate  * then just look one bin at a time so we don't miss our cache on
4730Sstevel@tonic-gate  * every look. Be sure to check every bin.  Page_create() will panic
4740Sstevel@tonic-gate  * if we miss a page.
4750Sstevel@tonic-gate  *
4760Sstevel@tonic-gate  * This also explains the `<=' in the for loops in both page_get_freelist()
4770Sstevel@tonic-gate  * and page_get_cachelist().  Since we checked the target bin, skipped
4780Sstevel@tonic-gate  * a bunch, then continued one a time, we wind up checking the target bin
4790Sstevel@tonic-gate  * twice to make sure we get all of them bins.
4800Sstevel@tonic-gate  */
4810Sstevel@tonic-gate #define	BIN_STEP	19
4820Sstevel@tonic-gate 
4830Sstevel@tonic-gate #ifdef VM_STATS
4840Sstevel@tonic-gate struct vmm_vmstats_str {
48512293SJames.McPherson@Sun.COM 	ulong_t pgf_alloc[MMU_PAGE_SIZES];	/* page_get_freelist */
48612293SJames.McPherson@Sun.COM 	ulong_t pgf_allocok[MMU_PAGE_SIZES];
48712293SJames.McPherson@Sun.COM 	ulong_t pgf_allocokrem[MMU_PAGE_SIZES];
48812293SJames.McPherson@Sun.COM 	ulong_t pgf_allocfailed[MMU_PAGE_SIZES];
4890Sstevel@tonic-gate 	ulong_t	pgf_allocdeferred;
49012293SJames.McPherson@Sun.COM 	ulong_t	pgf_allocretry[MMU_PAGE_SIZES];
491414Skchow 	ulong_t pgc_alloc;			/* page_get_cachelist */
4920Sstevel@tonic-gate 	ulong_t pgc_allocok;
4930Sstevel@tonic-gate 	ulong_t pgc_allocokrem;
4940Sstevel@tonic-gate 	ulong_t pgc_allocokdeferred;
4950Sstevel@tonic-gate 	ulong_t pgc_allocfailed;
496414Skchow 	ulong_t	pgcp_alloc[MMU_PAGE_SIZES];	/* page_get_contig_pages */
4970Sstevel@tonic-gate 	ulong_t	pgcp_allocfailed[MMU_PAGE_SIZES];
4980Sstevel@tonic-gate 	ulong_t	pgcp_allocempty[MMU_PAGE_SIZES];
4990Sstevel@tonic-gate 	ulong_t	pgcp_allocok[MMU_PAGE_SIZES];
500414Skchow 	ulong_t	ptcp[MMU_PAGE_SIZES];		/* page_trylock_contig_pages */
5010Sstevel@tonic-gate 	ulong_t	ptcpfreethresh[MMU_PAGE_SIZES];
5020Sstevel@tonic-gate 	ulong_t	ptcpfailexcl[MMU_PAGE_SIZES];
5030Sstevel@tonic-gate 	ulong_t	ptcpfailszc[MMU_PAGE_SIZES];
5040Sstevel@tonic-gate 	ulong_t	ptcpfailcage[MMU_PAGE_SIZES];
5050Sstevel@tonic-gate 	ulong_t	ptcpok[MMU_PAGE_SIZES];
506414Skchow 	ulong_t	pgmf_alloc[MMU_PAGE_SIZES];	/* page_get_mnode_freelist */
5070Sstevel@tonic-gate 	ulong_t	pgmf_allocfailed[MMU_PAGE_SIZES];
5080Sstevel@tonic-gate 	ulong_t	pgmf_allocempty[MMU_PAGE_SIZES];
5090Sstevel@tonic-gate 	ulong_t	pgmf_allocok[MMU_PAGE_SIZES];
510414Skchow 	ulong_t	pgmc_alloc;			/* page_get_mnode_cachelist */
5110Sstevel@tonic-gate 	ulong_t	pgmc_allocfailed;
5120Sstevel@tonic-gate 	ulong_t	pgmc_allocempty;
5130Sstevel@tonic-gate 	ulong_t	pgmc_allocok;
514414Skchow 	ulong_t	pladd_free[MMU_PAGE_SIZES];	/* page_list_add/sub */
515414Skchow 	ulong_t	plsub_free[MMU_PAGE_SIZES];
516414Skchow 	ulong_t	pladd_cache;
517414Skchow 	ulong_t	plsub_cache;
518414Skchow 	ulong_t	plsubpages_szcbig;
519414Skchow 	ulong_t	plsubpages_szc0;
5202961Sdp78419 	ulong_t	pfs_req[MMU_PAGE_SIZES];	/* page_freelist_split */
5212961Sdp78419 	ulong_t	pfs_demote[MMU_PAGE_SIZES];
5222961Sdp78419 	ulong_t	pfc_coalok[MMU_PAGE_SIZES][MAX_MNODE_MRANGES];
523414Skchow 	ulong_t	ppr_reloc[MMU_PAGE_SIZES];	/* page_relocate */
5240Sstevel@tonic-gate 	ulong_t ppr_relocnoroot[MMU_PAGE_SIZES];
5250Sstevel@tonic-gate 	ulong_t ppr_reloc_replnoroot[MMU_PAGE_SIZES];
5260Sstevel@tonic-gate 	ulong_t ppr_relocnolock[MMU_PAGE_SIZES];
5270Sstevel@tonic-gate 	ulong_t ppr_relocnomem[MMU_PAGE_SIZES];
5280Sstevel@tonic-gate 	ulong_t ppr_relocok[MMU_PAGE_SIZES];
5293253Smec 	ulong_t ppr_copyfail;
5302961Sdp78419 	/* page coalesce counter */
5312961Sdp78419 	ulong_t page_ctrs_coalesce[MMU_PAGE_SIZES][MAX_MNODE_MRANGES];
5322961Sdp78419 	/* candidates useful */
5332961Sdp78419 	ulong_t page_ctrs_cands_skip[MMU_PAGE_SIZES][MAX_MNODE_MRANGES];
5342961Sdp78419 	/* ctrs changed after locking */
5352961Sdp78419 	ulong_t page_ctrs_changed[MMU_PAGE_SIZES][MAX_MNODE_MRANGES];
5362961Sdp78419 	/* page_freelist_coalesce failed */
5372961Sdp78419 	ulong_t page_ctrs_failed[MMU_PAGE_SIZES][MAX_MNODE_MRANGES];
5380Sstevel@tonic-gate 	ulong_t page_ctrs_coalesce_all;	/* page coalesce all counter */
5390Sstevel@tonic-gate 	ulong_t page_ctrs_cands_skip_all; /* candidates useful for all func */
5400Sstevel@tonic-gate 	ulong_t	restrict4gcnt;
5411385Skchow 	ulong_t	unrestrict16mcnt;	/* non-DMA 16m allocs allowed */
5422855Skchow 	ulong_t	pgpanicalloc;		/* PG_PANIC allocation */
5435466Skchow 	ulong_t	pcf_deny[MMU_PAGE_SIZES];	/* page_chk_freelist */
5445466Skchow 	ulong_t	pcf_allow[MMU_PAGE_SIZES];
5450Sstevel@tonic-gate };
5460Sstevel@tonic-gate extern struct vmm_vmstats_str vmm_vmstats;
5470Sstevel@tonic-gate #endif	/* VM_STATS */
5480Sstevel@tonic-gate 
5490Sstevel@tonic-gate extern size_t page_ctrs_sz(void);
5500Sstevel@tonic-gate extern caddr_t page_ctrs_alloc(caddr_t);
551414Skchow extern void page_ctr_sub(int, int, page_t *, int);
5522961Sdp78419 extern page_t *page_freelist_split(uchar_t,
5537656SSherry.Moore@Sun.COM     uint_t, int, int, pfn_t, pfn_t, page_list_walker_t *);
5542961Sdp78419 extern page_t *page_freelist_coalesce(int, uchar_t, uint_t, uint_t, int,
5552961Sdp78419     pfn_t);
55612004Sjiang.liu@intel.com extern void page_freelist_coalesce_all(int);
5570Sstevel@tonic-gate extern uint_t page_get_pagecolors(uint_t);
5585262Srscott extern void pfnzero(pfn_t, uint_t, uint_t);
5590Sstevel@tonic-gate 
5600Sstevel@tonic-gate #ifdef	__cplusplus
5610Sstevel@tonic-gate }
5620Sstevel@tonic-gate #endif
5630Sstevel@tonic-gate 
5640Sstevel@tonic-gate #endif	/* _VM_DEP_H */
565