1*0Sstevel@tonic-gate /*
2*0Sstevel@tonic-gate  * CDDL HEADER START
3*0Sstevel@tonic-gate  *
4*0Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
5*0Sstevel@tonic-gate  * Common Development and Distribution License, Version 1.0 only
6*0Sstevel@tonic-gate  * (the "License").  You may not use this file except in compliance
7*0Sstevel@tonic-gate  * with the License.
8*0Sstevel@tonic-gate  *
9*0Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10*0Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
11*0Sstevel@tonic-gate  * See the License for the specific language governing permissions
12*0Sstevel@tonic-gate  * and limitations under the License.
13*0Sstevel@tonic-gate  *
14*0Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
15*0Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16*0Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
17*0Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
18*0Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
19*0Sstevel@tonic-gate  *
20*0Sstevel@tonic-gate  * CDDL HEADER END
21*0Sstevel@tonic-gate  */
22*0Sstevel@tonic-gate /*
23*0Sstevel@tonic-gate  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24*0Sstevel@tonic-gate  * Use is subject to license terms.
25*0Sstevel@tonic-gate  */
26*0Sstevel@tonic-gate 
27*0Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
28*0Sstevel@tonic-gate 
29*0Sstevel@tonic-gate /*
30*0Sstevel@tonic-gate  * Kernel Physical Mapping (kpm) segment driver (segkpm).
31*0Sstevel@tonic-gate  *
32*0Sstevel@tonic-gate  * This driver delivers along with the hat_kpm* interfaces an alternative
33*0Sstevel@tonic-gate  * mechanism for kernel mappings within the 64-bit Solaris operating system,
34*0Sstevel@tonic-gate  * which allows the mapping of all physical memory into the kernel address
35*0Sstevel@tonic-gate  * space at once. This is feasible in 64 bit kernels, e.g. for Ultrasparc II
36*0Sstevel@tonic-gate  * and beyond processors, since the available VA range is much larger than
37*0Sstevel@tonic-gate  * possible physical memory. Momentarily all physical memory is supported,
38*0Sstevel@tonic-gate  * that is represented by the list of memory segments (memsegs).
39*0Sstevel@tonic-gate  *
40*0Sstevel@tonic-gate  * Segkpm mappings have also very low overhead and large pages are used
41*0Sstevel@tonic-gate  * (when possible) to minimize the TLB and TSB footprint. It is also
42*0Sstevel@tonic-gate  * extentable for other than Sparc architectures (e.g. AMD64). Main
43*0Sstevel@tonic-gate  * advantage is the avoidance of the TLB-shootdown X-calls, which are
44*0Sstevel@tonic-gate  * normally needed when a kernel (global) mapping has to be removed.
45*0Sstevel@tonic-gate  *
46*0Sstevel@tonic-gate  * First example of a kernel facility that uses the segkpm mapping scheme
47*0Sstevel@tonic-gate  * is seg_map, where it is used as an alternative to hat_memload().
48*0Sstevel@tonic-gate  * See also hat layer for more information about the hat_kpm* routines.
49*0Sstevel@tonic-gate  * The kpm facilty can be turned off at boot time (e.g. /etc/system).
50*0Sstevel@tonic-gate  */
51*0Sstevel@tonic-gate 
52*0Sstevel@tonic-gate #include <sys/types.h>
53*0Sstevel@tonic-gate #include <sys/param.h>
54*0Sstevel@tonic-gate #include <sys/sysmacros.h>
55*0Sstevel@tonic-gate #include <sys/systm.h>
56*0Sstevel@tonic-gate #include <sys/vnode.h>
57*0Sstevel@tonic-gate #include <sys/cmn_err.h>
58*0Sstevel@tonic-gate #include <sys/debug.h>
59*0Sstevel@tonic-gate #include <sys/thread.h>
60*0Sstevel@tonic-gate #include <sys/cpuvar.h>
61*0Sstevel@tonic-gate #include <sys/bitmap.h>
62*0Sstevel@tonic-gate #include <sys/atomic.h>
63*0Sstevel@tonic-gate 
64*0Sstevel@tonic-gate #include <vm/seg_kmem.h>
65*0Sstevel@tonic-gate #include <vm/seg_kpm.h>
66*0Sstevel@tonic-gate #include <vm/hat.h>
67*0Sstevel@tonic-gate #include <vm/as.h>
68*0Sstevel@tonic-gate #include <vm/seg.h>
69*0Sstevel@tonic-gate #include <vm/page.h>
70*0Sstevel@tonic-gate 
71*0Sstevel@tonic-gate /*
72*0Sstevel@tonic-gate  * Global kpm controls.
73*0Sstevel@tonic-gate  * See also platform and mmu specific controls.
74*0Sstevel@tonic-gate  *
75*0Sstevel@tonic-gate  * kpm_enable -- global on/off switch for segkpm.
76*0Sstevel@tonic-gate  * . Set by default on 64bit platforms that have kpm support.
77*0Sstevel@tonic-gate  * . Will be disabled from platform layer if not supported.
78*0Sstevel@tonic-gate  * . Can be disabled via /etc/system.
79*0Sstevel@tonic-gate  *
80*0Sstevel@tonic-gate  * kpm_smallpages -- use only regular/system pagesize for kpm mappings.
81*0Sstevel@tonic-gate  * . Can be useful for critical debugging of kpm clients.
82*0Sstevel@tonic-gate  * . Set to zero by default for platforms that support kpm large pages.
83*0Sstevel@tonic-gate  *   The use of kpm large pages reduces the footprint of kpm meta data
84*0Sstevel@tonic-gate  *   and has all the other advantages of using large pages (e.g TLB
85*0Sstevel@tonic-gate  *   miss reduction).
86*0Sstevel@tonic-gate  * . Set by default for platforms that don't support kpm large pages or
87*0Sstevel@tonic-gate  *   where large pages cannot be used for other reasons (e.g. there are
88*0Sstevel@tonic-gate  *   only few full associative TLB entries available for large pages).
89*0Sstevel@tonic-gate  *
90*0Sstevel@tonic-gate  * segmap_kpm -- separate on/off switch for segmap using segkpm:
91*0Sstevel@tonic-gate  * . Set by default.
92*0Sstevel@tonic-gate  * . Will be disabled when kpm_enable is zero.
93*0Sstevel@tonic-gate  * . Will be disabled when MAXBSIZE != PAGESIZE.
94*0Sstevel@tonic-gate  * . Can be disabled via /etc/system.
95*0Sstevel@tonic-gate  *
96*0Sstevel@tonic-gate  */
97*0Sstevel@tonic-gate int kpm_enable = 1;
98*0Sstevel@tonic-gate int kpm_smallpages = 0;
99*0Sstevel@tonic-gate int segmap_kpm = 1;
100*0Sstevel@tonic-gate 
101*0Sstevel@tonic-gate /*
102*0Sstevel@tonic-gate  * Private seg op routines.
103*0Sstevel@tonic-gate  */
104*0Sstevel@tonic-gate faultcode_t segkpm_fault(struct hat *hat, struct seg *seg, caddr_t addr,
105*0Sstevel@tonic-gate 			size_t len, enum fault_type type, enum seg_rw rw);
106*0Sstevel@tonic-gate static void	segkpm_dump(struct seg *);
107*0Sstevel@tonic-gate static void	segkpm_badop(void);
108*0Sstevel@tonic-gate static int	segkpm_notsup(void);
109*0Sstevel@tonic-gate 
110*0Sstevel@tonic-gate #define	SEGKPM_BADOP(t)	(t(*)())segkpm_badop
111*0Sstevel@tonic-gate #define	SEGKPM_NOTSUP	(int(*)())segkpm_notsup
112*0Sstevel@tonic-gate 
113*0Sstevel@tonic-gate static struct seg_ops segkpm_ops = {
114*0Sstevel@tonic-gate 	SEGKPM_BADOP(int),	/* dup */
115*0Sstevel@tonic-gate 	SEGKPM_BADOP(int),	/* unmap */
116*0Sstevel@tonic-gate 	SEGKPM_BADOP(void),	/* free */
117*0Sstevel@tonic-gate 	segkpm_fault,
118*0Sstevel@tonic-gate 	SEGKPM_BADOP(int),	/* faulta */
119*0Sstevel@tonic-gate 	SEGKPM_BADOP(int),	/* setprot */
120*0Sstevel@tonic-gate 	SEGKPM_BADOP(int),	/* checkprot */
121*0Sstevel@tonic-gate 	SEGKPM_BADOP(int),	/* kluster */
122*0Sstevel@tonic-gate 	SEGKPM_BADOP(size_t),	/* swapout */
123*0Sstevel@tonic-gate 	SEGKPM_BADOP(int),	/* sync */
124*0Sstevel@tonic-gate 	SEGKPM_BADOP(size_t),	/* incore */
125*0Sstevel@tonic-gate 	SEGKPM_BADOP(int),	/* lockop */
126*0Sstevel@tonic-gate 	SEGKPM_BADOP(int),	/* getprot */
127*0Sstevel@tonic-gate 	SEGKPM_BADOP(u_offset_t), /* getoffset */
128*0Sstevel@tonic-gate 	SEGKPM_BADOP(int),	/* gettype */
129*0Sstevel@tonic-gate 	SEGKPM_BADOP(int),	/* getvp */
130*0Sstevel@tonic-gate 	SEGKPM_BADOP(int),	/* advise */
131*0Sstevel@tonic-gate 	segkpm_dump,		/* dump */
132*0Sstevel@tonic-gate 	SEGKPM_NOTSUP,		/* pagelock */
133*0Sstevel@tonic-gate 	SEGKPM_BADOP(int),	/* setpgsz */
134*0Sstevel@tonic-gate 	SEGKPM_BADOP(int),	/* getmemid */
135*0Sstevel@tonic-gate };
136*0Sstevel@tonic-gate 
137*0Sstevel@tonic-gate /*
138*0Sstevel@tonic-gate  * kpm_pgsz and kpm_pgshft are set by platform layer.
139*0Sstevel@tonic-gate  */
140*0Sstevel@tonic-gate size_t		kpm_pgsz;	/* kpm page size */
141*0Sstevel@tonic-gate uint_t		kpm_pgshft;	/* kpm page shift */
142*0Sstevel@tonic-gate u_offset_t	kpm_pgoff;	/* kpm page offset mask */
143*0Sstevel@tonic-gate uint_t		kpmp2pshft;	/* kpm page to page shift */
144*0Sstevel@tonic-gate pgcnt_t		kpmpnpgs;	/* how many pages per kpm page */
145*0Sstevel@tonic-gate 
146*0Sstevel@tonic-gate 
147*0Sstevel@tonic-gate #ifdef	SEGKPM_SUPPORT
148*0Sstevel@tonic-gate 
149*0Sstevel@tonic-gate int
150*0Sstevel@tonic-gate segkpm_create(struct seg *seg, void *argsp)
151*0Sstevel@tonic-gate {
152*0Sstevel@tonic-gate 	struct segkpm_data *skd;
153*0Sstevel@tonic-gate 	struct segkpm_crargs *b = (struct segkpm_crargs *)argsp;
154*0Sstevel@tonic-gate 	ushort_t *p;
155*0Sstevel@tonic-gate 	int i, j;
156*0Sstevel@tonic-gate 
157*0Sstevel@tonic-gate 	ASSERT(seg->s_as && RW_WRITE_HELD(&seg->s_as->a_lock));
158*0Sstevel@tonic-gate 	ASSERT(btokpmp(seg->s_size) >= 1 &&
159*0Sstevel@tonic-gate 		kpmpageoff((uintptr_t)seg->s_base) == 0 &&
160*0Sstevel@tonic-gate 		kpmpageoff((uintptr_t)seg->s_base + seg->s_size) == 0);
161*0Sstevel@tonic-gate 
162*0Sstevel@tonic-gate 	skd = kmem_zalloc(sizeof (struct segkpm_data), KM_SLEEP);
163*0Sstevel@tonic-gate 
164*0Sstevel@tonic-gate 	seg->s_data = (void *)skd;
165*0Sstevel@tonic-gate 	seg->s_ops = &segkpm_ops;
166*0Sstevel@tonic-gate 	skd->skd_prot = b->prot;
167*0Sstevel@tonic-gate 
168*0Sstevel@tonic-gate 	/*
169*0Sstevel@tonic-gate 	 * (1) Segkpm virtual addresses are based on physical adresses.
170*0Sstevel@tonic-gate 	 * From this and in opposite to other segment drivers it is
171*0Sstevel@tonic-gate 	 * often required to allocate a page first to be able to
172*0Sstevel@tonic-gate 	 * calculate the final segkpm virtual address.
173*0Sstevel@tonic-gate 	 * (2) Page  allocation is done by calling page_create_va(),
174*0Sstevel@tonic-gate 	 * one important input argument is a virtual address (also
175*0Sstevel@tonic-gate 	 * expressed by the "va" in the function name). This function
176*0Sstevel@tonic-gate 	 * is highly optimized to select the right page for an optimal
177*0Sstevel@tonic-gate 	 * processor and platform support (e.g. virtual addressed
178*0Sstevel@tonic-gate 	 * caches (VAC), physical addressed caches, NUMA).
179*0Sstevel@tonic-gate 	 *
180*0Sstevel@tonic-gate 	 * Because of (1) the approach is to generate a faked virtual
181*0Sstevel@tonic-gate 	 * address for calling page_create_va(). In order to exploit
182*0Sstevel@tonic-gate 	 * the abilities of (2), especially to utilize the cache
183*0Sstevel@tonic-gate 	 * hierarchy (3) and to avoid VAC alias conflicts (4) the
184*0Sstevel@tonic-gate 	 * selection has to be done carefully. For each virtual color
185*0Sstevel@tonic-gate 	 * a separate counter is provided (4). The count values are
186*0Sstevel@tonic-gate 	 * used for the utilization of all cache lines (3) and are
187*0Sstevel@tonic-gate 	 * corresponding to the cache bins.
188*0Sstevel@tonic-gate 	 */
189*0Sstevel@tonic-gate 	skd->skd_nvcolors = b->nvcolors;
190*0Sstevel@tonic-gate 
191*0Sstevel@tonic-gate 	p = skd->skd_va_select =
192*0Sstevel@tonic-gate 		kmem_zalloc(NCPU * b->nvcolors * sizeof (ushort_t), KM_SLEEP);
193*0Sstevel@tonic-gate 
194*0Sstevel@tonic-gate 	for (i = 0; i < NCPU; i++)
195*0Sstevel@tonic-gate 		for (j = 0; j < b->nvcolors; j++, p++)
196*0Sstevel@tonic-gate 			*p = j;
197*0Sstevel@tonic-gate 
198*0Sstevel@tonic-gate 	return (0);
199*0Sstevel@tonic-gate }
200*0Sstevel@tonic-gate 
201*0Sstevel@tonic-gate /*
202*0Sstevel@tonic-gate  * This routine is called via a machine specific fault handling
203*0Sstevel@tonic-gate  * routine.
204*0Sstevel@tonic-gate  */
205*0Sstevel@tonic-gate /* ARGSUSED */
206*0Sstevel@tonic-gate faultcode_t
207*0Sstevel@tonic-gate segkpm_fault(struct hat *hat, struct seg *seg, caddr_t addr, size_t len,
208*0Sstevel@tonic-gate 	enum fault_type type, enum seg_rw rw)
209*0Sstevel@tonic-gate {
210*0Sstevel@tonic-gate 	faultcode_t error;
211*0Sstevel@tonic-gate 
212*0Sstevel@tonic-gate 	ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
213*0Sstevel@tonic-gate 
214*0Sstevel@tonic-gate 	error = hat_kpm_fault(hat, addr);
215*0Sstevel@tonic-gate 
216*0Sstevel@tonic-gate 	return (error);
217*0Sstevel@tonic-gate }
218*0Sstevel@tonic-gate 
219*0Sstevel@tonic-gate #define	addr_to_vcolor(addr, vcolors) \
220*0Sstevel@tonic-gate 	((int)(((uintptr_t)(addr) & ((vcolors << PAGESHIFT) - 1)) >> PAGESHIFT))
221*0Sstevel@tonic-gate 
222*0Sstevel@tonic-gate /*
223*0Sstevel@tonic-gate  * Create a virtual address that can be used for invocations of
224*0Sstevel@tonic-gate  * page_create_va. Goal is to utilize the cache hierarchy (round
225*0Sstevel@tonic-gate  * robin bins) and to select the right color for virtual indexed
226*0Sstevel@tonic-gate  * caches. It isn't exact since we also increment the bin counter
227*0Sstevel@tonic-gate  * when the caller uses VOP_GETPAGE and gets a hit in the page
228*0Sstevel@tonic-gate  * cache, but we keep the bins turning for cache distribution
229*0Sstevel@tonic-gate  * (see also segkpm_create block comment).
230*0Sstevel@tonic-gate  */
231*0Sstevel@tonic-gate caddr_t
232*0Sstevel@tonic-gate segkpm_create_va(u_offset_t off)
233*0Sstevel@tonic-gate {
234*0Sstevel@tonic-gate 	int vcolor;
235*0Sstevel@tonic-gate 	ushort_t *p;
236*0Sstevel@tonic-gate 	struct segkpm_data *skd = (struct segkpm_data *)segkpm->s_data;
237*0Sstevel@tonic-gate 	int nvcolors = skd->skd_nvcolors;
238*0Sstevel@tonic-gate 	caddr_t	va;
239*0Sstevel@tonic-gate 
240*0Sstevel@tonic-gate 	vcolor = (nvcolors > 1) ? addr_to_vcolor(off, nvcolors) : 0;
241*0Sstevel@tonic-gate 	p = &skd->skd_va_select[(CPU->cpu_id * nvcolors) + vcolor];
242*0Sstevel@tonic-gate 	va = (caddr_t)ptob(*p);
243*0Sstevel@tonic-gate 
244*0Sstevel@tonic-gate 	atomic_add_16(p, nvcolors);
245*0Sstevel@tonic-gate 
246*0Sstevel@tonic-gate 	return (va);
247*0Sstevel@tonic-gate }
248*0Sstevel@tonic-gate 
249*0Sstevel@tonic-gate /*
250*0Sstevel@tonic-gate  * Unload mapping if the instance has an active kpm mapping.
251*0Sstevel@tonic-gate  */
252*0Sstevel@tonic-gate void
253*0Sstevel@tonic-gate segkpm_mapout_validkpme(struct kpme *kpme)
254*0Sstevel@tonic-gate {
255*0Sstevel@tonic-gate 	caddr_t vaddr;
256*0Sstevel@tonic-gate 	page_t *pp;
257*0Sstevel@tonic-gate 
258*0Sstevel@tonic-gate retry:
259*0Sstevel@tonic-gate 	if ((pp = kpme->kpe_page) == NULL) {
260*0Sstevel@tonic-gate 		return;
261*0Sstevel@tonic-gate 	}
262*0Sstevel@tonic-gate 
263*0Sstevel@tonic-gate 	if (page_lock(pp, SE_SHARED, (kmutex_t *)NULL, P_RECLAIM) == 0)
264*0Sstevel@tonic-gate 		goto retry;
265*0Sstevel@tonic-gate 
266*0Sstevel@tonic-gate 	/*
267*0Sstevel@tonic-gate 	 * Check if segkpm mapping is not unloaded in the meantime
268*0Sstevel@tonic-gate 	 */
269*0Sstevel@tonic-gate 	if (kpme->kpe_page == NULL) {
270*0Sstevel@tonic-gate 		page_unlock(pp);
271*0Sstevel@tonic-gate 		return;
272*0Sstevel@tonic-gate 	}
273*0Sstevel@tonic-gate 
274*0Sstevel@tonic-gate 	vaddr = hat_kpm_page2va(pp, 1);
275*0Sstevel@tonic-gate 	hat_kpm_mapout(pp, kpme, vaddr);
276*0Sstevel@tonic-gate 	page_unlock(pp);
277*0Sstevel@tonic-gate }
278*0Sstevel@tonic-gate 
279*0Sstevel@tonic-gate static void
280*0Sstevel@tonic-gate segkpm_badop()
281*0Sstevel@tonic-gate {
282*0Sstevel@tonic-gate 	panic("segkpm_badop");
283*0Sstevel@tonic-gate }
284*0Sstevel@tonic-gate 
285*0Sstevel@tonic-gate #else	/* SEGKPM_SUPPORT */
286*0Sstevel@tonic-gate 
287*0Sstevel@tonic-gate /* segkpm stubs */
288*0Sstevel@tonic-gate 
289*0Sstevel@tonic-gate /*ARGSUSED*/
290*0Sstevel@tonic-gate int segkpm_create(struct seg *seg, void *argsp) { return (0); }
291*0Sstevel@tonic-gate 
292*0Sstevel@tonic-gate /* ARGSUSED */
293*0Sstevel@tonic-gate faultcode_t
294*0Sstevel@tonic-gate segkpm_fault(struct hat *hat, struct seg *seg, caddr_t addr, size_t len,
295*0Sstevel@tonic-gate 	enum fault_type type, enum seg_rw rw)
296*0Sstevel@tonic-gate {
297*0Sstevel@tonic-gate 	return ((faultcode_t)0);
298*0Sstevel@tonic-gate }
299*0Sstevel@tonic-gate 
300*0Sstevel@tonic-gate /* ARGSUSED */
301*0Sstevel@tonic-gate caddr_t segkpm_create_va(u_offset_t off) { return (NULL); }
302*0Sstevel@tonic-gate 
303*0Sstevel@tonic-gate /* ARGSUSED */
304*0Sstevel@tonic-gate void segkpm_mapout_validkpme(struct kpme *kpme) {}
305*0Sstevel@tonic-gate 
306*0Sstevel@tonic-gate static void
307*0Sstevel@tonic-gate segkpm_badop() {}
308*0Sstevel@tonic-gate 
309*0Sstevel@tonic-gate #endif	/* SEGKPM_SUPPORT */
310*0Sstevel@tonic-gate 
311*0Sstevel@tonic-gate static int
312*0Sstevel@tonic-gate segkpm_notsup()
313*0Sstevel@tonic-gate {
314*0Sstevel@tonic-gate 	return (ENOTSUP);
315*0Sstevel@tonic-gate }
316*0Sstevel@tonic-gate 
317*0Sstevel@tonic-gate /*
318*0Sstevel@tonic-gate  * segkpm pages are not dumped, so we just return
319*0Sstevel@tonic-gate  */
320*0Sstevel@tonic-gate /*ARGSUSED*/
321*0Sstevel@tonic-gate static void
322*0Sstevel@tonic-gate segkpm_dump(struct seg *seg)
323*0Sstevel@tonic-gate {}
324