xref: /netbsd-src/sys/arch/amd64/include/pmap.h (revision a5847cc334d9a7029f6352b847e9e8d71a0f9e0c)
1 /*	$NetBSD: pmap.h,v 1.28 2011/11/06 15:18:18 cherry Exp $	*/
2 
3 /*
4  * Copyright (c) 1997 Charles D. Cranor and Washington University.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 /*
29  * Copyright (c) 2001 Wasabi Systems, Inc.
30  * All rights reserved.
31  *
32  * Written by Frank van der Linden for Wasabi Systems, Inc.
33  *
34  * Redistribution and use in source and binary forms, with or without
35  * modification, are permitted provided that the following conditions
36  * are met:
37  * 1. Redistributions of source code must retain the above copyright
38  *    notice, this list of conditions and the following disclaimer.
39  * 2. Redistributions in binary form must reproduce the above copyright
40  *    notice, this list of conditions and the following disclaimer in the
41  *    documentation and/or other materials provided with the distribution.
42  * 3. All advertising materials mentioning features or use of this software
43  *    must display the following acknowledgement:
44  *      This product includes software developed for the NetBSD Project by
45  *      Wasabi Systems, Inc.
46  * 4. The name of Wasabi Systems, Inc. may not be used to endorse
47  *    or promote products derived from this software without specific prior
48  *    written permission.
49  *
50  * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
51  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
52  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
53  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
54  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
55  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
56  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
57  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
58  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
59  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
60  * POSSIBILITY OF SUCH DAMAGE.
61  */
62 
63 #ifndef	_AMD64_PMAP_H_
64 #define	_AMD64_PMAP_H_
65 
66 #ifdef __x86_64__
67 
68 #if defined(_KERNEL_OPT)
69 #include "opt_xen.h"
70 #endif
71 
72 #include <sys/atomic.h>
73 
74 #include <machine/pte.h>
75 #include <machine/segments.h>
76 #ifdef _KERNEL
77 #include <machine/cpufunc.h>
78 #endif
79 
80 #include <uvm/uvm_object.h>
81 #ifdef XEN
82 #include <xen/xenfunc.h>
83 #include <xen/xenpmap.h>
84 #endif /* XEN */
85 
86 /*
87  * The x86_64 pmap module closely resembles the i386 one. It uses
88  * the same recursive entry scheme, and the same alternate area
89  * trick for accessing non-current pmaps. See the i386 pmap.h
90  * for a description. The obvious difference is that 3 extra
91  * levels of page table need to be dealt with. The level 1 page
92  * table pages are at:
93  *
94  * l1: 0x00007f8000000000 - 0x00007fffffffffff     (39 bits, needs PML4 entry)
95  *
96  * The alternate space is at:
97  *
98  * l1: 0xffffff8000000000 - 0xffffffffffffffff     (39 bits, needs PML4 entry)
99  *
100  * The rest is kept as physical pages in 3 UVM objects, and is
101  * temporarily mapped for virtual access when needed.
102  *
103  * Note that address space is signed, so the layout for 48 bits is:
104  *
105  *  +---------------------------------+ 0xffffffffffffffff
106  *  |                                 |
107  *  |    alt.L1 table (PTE pages)     |
108  *  |                                 |
109  *  +---------------------------------+ 0xffffff8000000000
110  *  ~                                 ~
111  *  |                                 |
112  *  |         Kernel Space            |
113  *  |                                 |
114  *  |                                 |
115  *  +---------------------------------+ 0xffff800000000000 = 0x0000800000000000
116  *  |                                 |
117  *  |    alt.L1 table (PTE pages)     |
118  *  |                                 |
119  *  +---------------------------------+ 0x00007f8000000000
120  *  ~                                 ~
121  *  |                                 |
122  *  |         User Space              |
123  *  |                                 |
124  *  |                                 |
125  *  +---------------------------------+ 0x0000000000000000
126  *
127  * In other words, there is a 'VA hole' at 0x0000800000000000 -
128  * 0xffff800000000000 which will trap, just as on, for example,
129  * sparcv9.
130  *
131  * The unused space can be used if needed, but it adds a little more
132  * complexity to the calculations.
133  */
134 
135 /*
136  * The first generation of Hammer processors can use 48 bits of
137  * virtual memory, and 40 bits of physical memory. This will be
138  * more for later generations. These defines can be changed to
139  * variable names containing the # of bits, extracted from an
140  * extended cpuid instruction (variables are harder to use during
141  * bootstrap, though)
142  */
143 #define VIRT_BITS	48
144 #define PHYS_BITS	40
145 
146 /*
147  * Mask to get rid of the sign-extended part of addresses.
148  */
149 #define VA_SIGN_MASK		0xffff000000000000
150 #define VA_SIGN_NEG(va)		((va) | VA_SIGN_MASK)
151 /*
152  * XXXfvdl this one's not right.
153  */
154 #define VA_SIGN_POS(va)		((va) & ~VA_SIGN_MASK)
155 
156 #define L4_SLOT_PTE		255
157 #ifndef XEN
158 #define L4_SLOT_KERN		256
159 #else
160 /* Xen use slots 256-272, let's move farther */
161 #define L4_SLOT_KERN		320
162 #endif
163 #define L4_SLOT_KERNBASE	511
164 #define L4_SLOT_APTE		510
165 
166 #define PDIR_SLOT_KERN	L4_SLOT_KERN
167 #define PDIR_SLOT_PTE	L4_SLOT_PTE
168 #define PDIR_SLOT_APTE	L4_SLOT_APTE
169 
170 /*
171  * the following defines give the virtual addresses of various MMU
172  * data structures:
173  * PTE_BASE and APTE_BASE: the base VA of the linear PTE mappings
174  * PTD_BASE and APTD_BASE: the base VA of the recursive mapping of the PTD
175  * PDP_PDE and APDP_PDE: the VA of the PDE that points back to the PDP/APDP
176  *
177  */
178 
179 #define PTE_BASE  ((pt_entry_t *) (L4_SLOT_PTE * NBPD_L4))
180 #define KERN_BASE  ((pt_entry_t *) (L4_SLOT_KERN * NBPD_L4))
181 #define APTE_BASE ((pt_entry_t *) (VA_SIGN_NEG((L4_SLOT_APTE * NBPD_L4))))
182 
183 #define L1_BASE		PTE_BASE
184 #define AL1_BASE	APTE_BASE
185 
186 #define L2_BASE ((pd_entry_t *)((char *)L1_BASE + L4_SLOT_PTE * NBPD_L3))
187 #define L3_BASE ((pd_entry_t *)((char *)L2_BASE + L4_SLOT_PTE * NBPD_L2))
188 #define L4_BASE ((pd_entry_t *)((char *)L3_BASE + L4_SLOT_PTE * NBPD_L1))
189 
190 #define AL2_BASE ((pd_entry_t *)((char *)AL1_BASE + L4_SLOT_PTE * NBPD_L3))
191 #define AL3_BASE ((pd_entry_t *)((char *)AL2_BASE + L4_SLOT_PTE * NBPD_L2))
192 #define AL4_BASE ((pd_entry_t *)((char *)AL3_BASE + L4_SLOT_PTE * NBPD_L1))
193 
194 #define PDP_PDE		(L4_BASE + PDIR_SLOT_PTE)
195 #define APDP_PDE	(&curcpu()->ci_kpm_pdir[PDIR_SLOT_APTE])
196 #define APDP_PDE_SHADOW	(L4_BASE + PDIR_SLOT_APTE)
197 
198 #define PDP_BASE	L4_BASE
199 #define APDP_BASE	AL4_BASE
200 
201 #define NKL4_MAX_ENTRIES	(unsigned long)1
202 #define NKL3_MAX_ENTRIES	(unsigned long)(NKL4_MAX_ENTRIES * 512)
203 #define NKL2_MAX_ENTRIES	(unsigned long)(NKL3_MAX_ENTRIES * 512)
204 #define NKL1_MAX_ENTRIES	(unsigned long)(NKL2_MAX_ENTRIES * 512)
205 
206 #define NKL4_KIMG_ENTRIES	1
207 #define NKL3_KIMG_ENTRIES	1
208 #define NKL2_KIMG_ENTRIES	10
209 
210 /*
211  * Since kva space is below the kernel in its entirety, we start off
212  * with zero entries on each level.
213  */
214 #define NKL4_START_ENTRIES	0
215 #define NKL3_START_ENTRIES	0
216 #define NKL2_START_ENTRIES	0
217 #define NKL1_START_ENTRIES	0	/* XXX */
218 
219 #define NTOPLEVEL_PDES		(PAGE_SIZE / (sizeof (pd_entry_t)))
220 
221 #define NPDPG			(PAGE_SIZE / sizeof (pd_entry_t))
222 
223 #define PTP_MASK_INITIALIZER	{ L1_FRAME, L2_FRAME, L3_FRAME, L4_FRAME }
224 #define PTP_SHIFT_INITIALIZER	{ L1_SHIFT, L2_SHIFT, L3_SHIFT, L4_SHIFT }
225 #define NKPTP_INITIALIZER	{ NKL1_START_ENTRIES, NKL2_START_ENTRIES, \
226 				  NKL3_START_ENTRIES, NKL4_START_ENTRIES }
227 #define NKPTPMAX_INITIALIZER	{ NKL1_MAX_ENTRIES, NKL2_MAX_ENTRIES, \
228 				  NKL3_MAX_ENTRIES, NKL4_MAX_ENTRIES }
229 #define NBPD_INITIALIZER	{ NBPD_L1, NBPD_L2, NBPD_L3, NBPD_L4 }
230 #define PDES_INITIALIZER	{ L2_BASE, L3_BASE, L4_BASE }
231 #define APDES_INITIALIZER	{ AL2_BASE, AL3_BASE, AL4_BASE }
232 
233 #define PTP_LEVELS	4
234 
235 /*
236  * PG_AVAIL usage: we make use of the ignored bits of the PTE
237  */
238 
239 #define PG_W		PG_AVAIL1	/* "wired" mapping */
240 #define PG_PVLIST	PG_AVAIL2	/* mapping has entry on pvlist */
241 /* PG_AVAIL3 not used */
242 
243 #define	PG_X		0		/* XXX dummy */
244 
245 /*
246  * Number of PTE's per cache line.  8 byte pte, 64-byte cache line
247  * Used to avoid false sharing of cache lines.
248  */
249 #define NPTECL		8
250 
251 #include <x86/pmap.h>
252 
253 #ifndef XEN
254 #define pmap_pa2pte(a)			(a)
255 #define pmap_pte2pa(a)			((a) & PG_FRAME)
256 #define pmap_pte_set(p, n)		do { *(p) = (n); } while (0)
257 #define pmap_pte_cas(p, o, n)		atomic_cas_64((p), (o), (n))
258 #define pmap_pte_testset(p, n)		\
259     atomic_swap_ulong((volatile unsigned long *)p, n)
260 #define pmap_pte_setbits(p, b)		\
261     atomic_or_ulong((volatile unsigned long *)p, b)
262 #define pmap_pte_clearbits(p, b)	\
263     atomic_and_ulong((volatile unsigned long *)p, ~(b))
264 #define pmap_pte_flush()		/* nothing */
265 #else
266 static __inline pt_entry_t
267 pmap_pa2pte(paddr_t pa)
268 {
269 	return (pt_entry_t)xpmap_ptom_masked(pa);
270 }
271 
272 static __inline paddr_t
273 pmap_pte2pa(pt_entry_t pte)
274 {
275 	return xpmap_mtop_masked(pte & PG_FRAME);
276 }
277 static __inline void
278 pmap_pte_set(pt_entry_t *pte, pt_entry_t npte)
279 {
280 	int s = splvm();
281 	xpq_queue_pte_update(xpmap_ptetomach(pte), npte);
282 	splx(s);
283 }
284 
285 static __inline pt_entry_t
286 pmap_pte_cas(volatile pt_entry_t *ptep, pt_entry_t o, pt_entry_t n)
287 {
288 	int s = splvm();
289 
290 	pt_entry_t opte = *ptep;
291 
292 	if (opte == o) {
293 		xpq_queue_pte_update(xpmap_ptetomach(__UNVOLATILE(ptep)), n);
294 		xpq_flush_queue();
295 	}
296 	splx(s);
297 	return opte;
298 }
299 
300 static __inline pt_entry_t
301 pmap_pte_testset(volatile pt_entry_t *pte, pt_entry_t npte)
302 {
303 	int s = splvm();
304 	pt_entry_t opte = *pte;
305 	xpq_queue_pte_update(xpmap_ptetomach(__UNVOLATILE(pte)), npte);
306 	xpq_flush_queue();
307 	splx(s);
308 	return opte;
309 }
310 
311 static __inline void
312 pmap_pte_setbits(volatile pt_entry_t *pte, pt_entry_t bits)
313 {
314 	int s = splvm();
315 	xpq_queue_pte_update(xpmap_ptetomach(__UNVOLATILE(pte)), (*pte) | bits);
316 	xpq_flush_queue();
317 	splx(s);
318 }
319 
320 static __inline void
321 pmap_pte_clearbits(volatile pt_entry_t *pte, pt_entry_t bits)
322 {
323 	int s = splvm();
324 	xpq_queue_pte_update(xpmap_ptetomach(__UNVOLATILE(pte)),
325 	    (*pte) & ~bits);
326 	xpq_flush_queue();
327 	splx(s);
328 }
329 
330 static __inline void
331 pmap_pte_flush(void)
332 {
333 	int s = splvm();
334 	xpq_flush_queue();
335 	splx(s);
336 }
337 #endif
338 
339 void pmap_prealloc_lowmem_ptps(void);
340 void pmap_changeprot_local(vaddr_t, vm_prot_t);
341 
342 #include <x86/pmap_pv.h>
343 
344 #define	__HAVE_VM_PAGE_MD
345 #define	VM_MDPAGE_INIT(pg) \
346 	memset(&(pg)->mdpage, 0, sizeof((pg)->mdpage)); \
347 	PMAP_PAGE_INIT(&(pg)->mdpage.mp_pp)
348 
349 struct vm_page_md {
350 	struct pmap_page mp_pp;
351 };
352 
353 #else	/*	!__x86_64__	*/
354 
355 #include <i386/pmap.h>
356 
357 #endif	/*	__x86_64__	*/
358 
359 #endif	/* _AMD64_PMAP_H_ */
360