xref: /openbsd-src/sys/arch/amd64/include/pmap.h (revision f2da64fbbbf1b03f09f390ab01267c93dfd77c4c)
1 /*	$OpenBSD: pmap.h,v 1.62 2016/02/08 18:23:04 stefan Exp $	*/
2 /*	$NetBSD: pmap.h,v 1.1 2003/04/26 18:39:46 fvdl Exp $	*/
3 
4 /*
5  * Copyright (c) 1997 Charles D. Cranor and Washington University.
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 /*
30  * Copyright (c) 2001 Wasabi Systems, Inc.
31  * All rights reserved.
32  *
33  * Written by Frank van der Linden for Wasabi Systems, Inc.
34  *
35  * Redistribution and use in source and binary forms, with or without
36  * modification, are permitted provided that the following conditions
37  * are met:
38  * 1. Redistributions of source code must retain the above copyright
39  *    notice, this list of conditions and the following disclaimer.
40  * 2. Redistributions in binary form must reproduce the above copyright
41  *    notice, this list of conditions and the following disclaimer in the
42  *    documentation and/or other materials provided with the distribution.
43  * 3. All advertising materials mentioning features or use of this software
44  *    must display the following acknowledgement:
45  *      This product includes software developed for the NetBSD Project by
46  *      Wasabi Systems, Inc.
47  * 4. The name of Wasabi Systems, Inc. may not be used to endorse
48  *    or promote products derived from this software without specific prior
49  *    written permission.
50  *
51  * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
52  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
53  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
54  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
55  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
56  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
57  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
58  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
59  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
60  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
61  * POSSIBILITY OF SUCH DAMAGE.
62  */
63 
64 /*
65  * pmap.h: see pmap.c for the history of this pmap module.
66  */
67 
68 #ifndef	_MACHINE_PMAP_H_
69 #define	_MACHINE_PMAP_H_
70 
71 #ifndef _LOCORE
72 #ifdef _KERNEL
73 #include <machine/cpufunc.h>
74 #include <machine/segments.h>
75 #endif /* _KERNEL */
76 #include <sys/mutex.h>
77 #include <uvm/uvm_object.h>
78 #include <machine/pte.h>
79 #endif
80 
81 /*
82  * The x86_64 pmap module closely resembles the i386 one. It uses
83  * the same recursive entry scheme. See the i386 pmap.h for a
84  * description. The alternate area trick for accessing non-current
85  * pmaps has been removed, though, because it performs badly on SMP
86  * systems.
87  * The most obvious difference to i386 is that 2 extra levels of page
88  * table need to be dealt with. The level 1 page table pages are at:
89  *
90  * l1: 0x00007f8000000000 - 0x00007fffffffffff     (39 bits, needs PML4 entry)
91  *
92  * The other levels are kept as physical pages in 3 UVM objects and are
93  * temporarily mapped for virtual access when needed.
94  *
95  * The other obvious difference from i386 is that it has a direct map of all
96  * physical memory in the VA range:
97  *
98  *     0xffffff0000000000 - 0xffffff7fffffffff
99  *
100  * The direct map is used in some cases to access PTEs of non-current pmaps.
101  *
102  * Note that address space is signed, so the layout for 48 bits is:
103  *
104  *  +---------------------------------+ 0xffffffffffffffff
105  *  |         Kernel Image            |
106  *  +---------------------------------+ 0xffffff8000000000
107  *  |         Direct Map              |
108  *  +---------------------------------+ 0xffffff0000000000
109  *  ~                                 ~
110  *  |                                 |
111  *  |         Kernel Space            |
112  *  |                                 |
113  *  |                                 |
114  *  +---------------------------------+ 0xffff800000000000 = 0x0000800000000000
115  *  |    L1 table (PTE pages)         |
116  *  +---------------------------------+ 0x00007f8000000000
117  *  ~                                 ~
118  *  |                                 |
119  *  |         User Space              |
120  *  |                                 |
121  *  |                                 |
122  *  +---------------------------------+ 0x0000000000000000
123  *
124  * In other words, there is a 'VA hole' at 0x0000800000000000 -
125  * 0xffff800000000000 which will trap, just as on, for example,
126  * sparcv9.
127  *
128  * The unused space can be used if needed, but it adds a little more
129  * complexity to the calculations.
130  */
131 
132 /*
133  * Mask to get rid of the sign-extended part of addresses.
134  */
135 #define VA_SIGN_MASK		0xffff000000000000
136 #define VA_SIGN_NEG(va)		((va) | VA_SIGN_MASK)
137 /*
138  * XXXfvdl this one's not right.
139  */
140 #define VA_SIGN_POS(va)		((va) & ~VA_SIGN_MASK)
141 
142 #define L4_SLOT_PTE		255
143 #define L4_SLOT_KERN		256
144 #define L4_SLOT_KERNBASE	511
145 #define L4_SLOT_DIRECT		510
146 
147 #define PDIR_SLOT_KERN		L4_SLOT_KERN
148 #define PDIR_SLOT_PTE		L4_SLOT_PTE
149 #define PDIR_SLOT_DIRECT	L4_SLOT_DIRECT
150 
151 /*
152  * the following defines give the virtual addresses of various MMU
153  * data structures:
154  * PTE_BASE: the base VA of the linear PTE mappings
155  * PDP_PDE: the VA of the PDE that points back to the PDP
156  *
157  */
158 
159 #define PTE_BASE  ((pt_entry_t *) (L4_SLOT_PTE * NBPD_L4))
160 #define PMAP_DIRECT_BASE	(VA_SIGN_NEG((L4_SLOT_DIRECT * NBPD_L4)))
161 #define PMAP_DIRECT_END		(VA_SIGN_NEG(((L4_SLOT_DIRECT + 1) * NBPD_L4)))
162 
163 #define L1_BASE		PTE_BASE
164 
165 #define L2_BASE ((pd_entry_t *)((char *)L1_BASE + L4_SLOT_PTE * NBPD_L3))
166 #define L3_BASE ((pd_entry_t *)((char *)L2_BASE + L4_SLOT_PTE * NBPD_L2))
167 #define L4_BASE ((pd_entry_t *)((char *)L3_BASE + L4_SLOT_PTE * NBPD_L1))
168 
169 #define PDP_PDE		(L4_BASE + PDIR_SLOT_PTE)
170 
171 #define PDP_BASE	L4_BASE
172 
173 #define NKL4_MAX_ENTRIES	(unsigned long)1
174 #define NKL3_MAX_ENTRIES	(unsigned long)(NKL4_MAX_ENTRIES * 512)
175 #define NKL2_MAX_ENTRIES	(unsigned long)(NKL3_MAX_ENTRIES * 512)
176 #define NKL1_MAX_ENTRIES	(unsigned long)(NKL2_MAX_ENTRIES * 512)
177 
178 #define NKL4_KIMG_ENTRIES	1
179 #define NKL3_KIMG_ENTRIES	1
180 #define NKL2_KIMG_ENTRIES	16
181 
182 #define NDML4_ENTRIES		1
183 #define NDML3_ENTRIES		1
184 #define NDML2_ENTRIES		4	/* 4GB */
185 
186 /*
187  * Since kva space is below the kernel in its entirety, we start off
188  * with zero entries on each level.
189  */
190 #define NKL4_START_ENTRIES	0
191 #define NKL3_START_ENTRIES	0
192 #define NKL2_START_ENTRIES	0
193 #define NKL1_START_ENTRIES	0	/* XXX */
194 
195 #define NTOPLEVEL_PDES		(PAGE_SIZE / (sizeof (pd_entry_t)))
196 
197 #define NPDPG			(PAGE_SIZE / sizeof (pd_entry_t))
198 
199 /*
200  * pl*_pi: index in the ptp page for a pde mapping a VA.
201  * (pl*_i below is the index in the virtual array of all pdes per level)
202  */
203 #define pl1_pi(VA)	(((VA_SIGN_POS(VA)) & L1_MASK) >> L1_SHIFT)
204 #define pl2_pi(VA)	(((VA_SIGN_POS(VA)) & L2_MASK) >> L2_SHIFT)
205 #define pl3_pi(VA)	(((VA_SIGN_POS(VA)) & L3_MASK) >> L3_SHIFT)
206 #define pl4_pi(VA)	(((VA_SIGN_POS(VA)) & L4_MASK) >> L4_SHIFT)
207 
208 /*
209  * pl*_i: generate index into pde/pte arrays in virtual space
210  */
211 #define pl1_i(VA)	(((VA_SIGN_POS(VA)) & L1_FRAME) >> L1_SHIFT)
212 #define pl2_i(VA)	(((VA_SIGN_POS(VA)) & L2_FRAME) >> L2_SHIFT)
213 #define pl3_i(VA)	(((VA_SIGN_POS(VA)) & L3_FRAME) >> L3_SHIFT)
214 #define pl4_i(VA)	(((VA_SIGN_POS(VA)) & L4_FRAME) >> L4_SHIFT)
215 #define pl_i(va, lvl) \
216         (((VA_SIGN_POS(va)) & ptp_masks[(lvl)-1]) >> ptp_shifts[(lvl)-1])
217 
218 #define PTP_MASK_INITIALIZER	{ L1_FRAME, L2_FRAME, L3_FRAME, L4_FRAME }
219 #define PTP_SHIFT_INITIALIZER	{ L1_SHIFT, L2_SHIFT, L3_SHIFT, L4_SHIFT }
220 #define NKPTP_INITIALIZER	{ NKL1_START_ENTRIES, NKL2_START_ENTRIES, \
221 				  NKL3_START_ENTRIES, NKL4_START_ENTRIES }
222 #define NKPTPMAX_INITIALIZER	{ NKL1_MAX_ENTRIES, NKL2_MAX_ENTRIES, \
223 				  NKL3_MAX_ENTRIES, NKL4_MAX_ENTRIES }
224 #define NBPD_INITIALIZER	{ NBPD_L1, NBPD_L2, NBPD_L3, NBPD_L4 }
225 #define PDES_INITIALIZER	{ L2_BASE, L3_BASE, L4_BASE }
226 
227 /*
228  * PTP macros:
229  *   a PTP's index is the PD index of the PDE that points to it
230  *   a PTP's offset is the byte-offset in the PTE space that this PTP is at
231  *   a PTP's VA is the first VA mapped by that PTP
232  *
233  * note that PAGE_SIZE == number of bytes in a PTP (4096 bytes == 1024 entries)
234  *           NBPD == number of bytes a PTP can map (4MB)
235  */
236 
237 #define ptp_va2o(va, lvl)	(pl_i(va, (lvl)+1) * PAGE_SIZE)
238 
239 #define PTP_LEVELS	4
240 
241 /*
242  * PG_AVAIL usage: we make use of the ignored bits of the PTE
243  */
244 
245 #define PG_W		PG_AVAIL1	/* "wired" mapping */
246 #define PG_PVLIST	PG_AVAIL2	/* mapping has entry on pvlist */
247 /* PG_AVAIL3 not used */
248 
249 /*
250  * Number of PTE's per cache line.  8 byte pte, 64-byte cache line
251  * Used to avoid false sharing of cache lines.
252  */
253 #define NPTECL		8
254 
255 
256 #if defined(_KERNEL) && !defined(_LOCORE)
257 /*
258  * pmap data structures: see pmap.c for details of locking.
259  */
260 
261 struct pmap;
262 typedef struct pmap *pmap_t;
263 
264 /*
265  * we maintain a list of all non-kernel pmaps
266  */
267 
268 LIST_HEAD(pmap_head, pmap); /* struct pmap_head: head of a pmap list */
269 
270 /*
271  * the pmap structure
272  *
273  * note that the pm_obj contains the reference count,
274  * page list, and number of PTPs within the pmap.
275  */
276 
277 #define PMAP_TYPE_NORMAL	1
278 #define PMAP_TYPE_EPT		2
279 #define PMAP_TYPE_RVI		3
280 #define pmap_nested(pm) ((pm)->pm_type != PMAP_TYPE_NORMAL)
281 
282 struct pmap {
283 	struct mutex pm_mtx;
284 	struct uvm_object pm_obj[PTP_LEVELS-1]; /* objects for lvl >= 1) */
285 	LIST_ENTRY(pmap) pm_list;	/* list (lck by pm_list lock) */
286 	pd_entry_t *pm_pdir;		/* VA of PD (lck by object lock) */
287 	paddr_t pm_pdirpa;		/* PA of PD (read-only after create) */
288 	struct vm_page *pm_ptphint[PTP_LEVELS-1];
289 					/* pointer to a PTP in our pmap */
290 	struct pmap_statistics pm_stats;  /* pmap stats (lck by object lock) */
291 
292 	u_int64_t pm_cpus;		/* mask of CPUs using pmap */
293 	int pm_type;			/* Type of pmap this is (PMAP_TYPE_x) */
294 };
295 
296 /*
297  * MD flags that we use for pmap_enter (in the pa):
298  */
299 #define PMAP_PA_MASK	~((paddr_t)PAGE_MASK) /* to remove the flags */
300 #define	PMAP_NOCACHE	0x1 /* set the non-cacheable bit. */
301 #define	PMAP_WC		0x2 /* set page write combining. */
302 
303 /*
304  * We keep mod/ref flags in struct vm_page->pg_flags.
305  */
306 #define	PG_PMAP_MOD	PG_PMAP0
307 #define	PG_PMAP_REF	PG_PMAP1
308 #define	PG_PMAP_WC      PG_PMAP2
309 
310 /*
311  * for each managed physical page we maintain a list of <PMAP,VA>'s
312  * which it is mapped at.
313  */
314 struct pv_entry {			/* locked by its list's pvh_lock */
315 	struct pv_entry *pv_next;	/* next entry */
316 	struct pmap *pv_pmap;		/* the pmap */
317 	vaddr_t pv_va;			/* the virtual address */
318 	struct vm_page *pv_ptp;		/* the vm_page of the PTP */
319 };
320 
321 /*
322  * global kernel variables
323  */
324 
325 /* PTDpaddr: is the physical address of the kernel's PDP */
326 extern u_long PTDpaddr;
327 
328 extern struct pmap kernel_pmap_store;	/* kernel pmap */
329 
330 extern paddr_t ptp_masks[];
331 extern int ptp_shifts[];
332 extern long nkptp[], nbpd[], nkptpmax[];
333 
334 /*
335  * macros
336  */
337 
338 #define	pmap_kernel()			(&kernel_pmap_store)
339 #define	pmap_resident_count(pmap)	((pmap)->pm_stats.resident_count)
340 #define	pmap_wired_count(pmap)		((pmap)->pm_stats.wired_count)
341 #define	pmap_update(pmap)		/* nothing (yet) */
342 
343 #define pmap_clear_modify(pg)		pmap_clear_attrs(pg, PG_M)
344 #define pmap_clear_reference(pg)	pmap_clear_attrs(pg, PG_U)
345 #define pmap_copy(DP,SP,D,L,S)
346 #define pmap_is_modified(pg)		pmap_test_attrs(pg, PG_M)
347 #define pmap_is_referenced(pg)		pmap_test_attrs(pg, PG_U)
348 #define pmap_move(DP,SP,D,L,S)
349 #define pmap_valid_entry(E) 		((E) & PG_V) /* is PDE or PTE valid? */
350 
351 #define pmap_proc_iflush(p,va,len)	/* nothing */
352 #define pmap_unuse_final(p)		/* nothing */
353 #define	pmap_remove_holes(vm)		do { /* nothing */ } while (0)
354 
355 
356 /*
357  * prototypes
358  */
359 
360 paddr_t		pmap_bootstrap(paddr_t, paddr_t);
361 boolean_t	pmap_clear_attrs(struct vm_page *, unsigned long);
362 static void	pmap_page_protect(struct vm_page *, vm_prot_t);
363 void		pmap_page_remove (struct vm_page *);
364 static void	pmap_protect(struct pmap *, vaddr_t,
365 				vaddr_t, vm_prot_t);
366 void		pmap_remove(struct pmap *, vaddr_t, vaddr_t);
367 boolean_t	pmap_test_attrs(struct vm_page *, unsigned);
368 static void	pmap_update_pg(vaddr_t);
369 static void	pmap_update_2pg(vaddr_t,vaddr_t);
370 void		pmap_write_protect(struct pmap *, vaddr_t,
371 				vaddr_t, vm_prot_t);
372 void		pmap_fix_ept(struct pmap *, vaddr_t);
373 
374 vaddr_t reserve_dumppages(vaddr_t); /* XXX: not a pmap fn */
375 
376 paddr_t	pmap_prealloc_lowmem_ptps(paddr_t);
377 
378 void	pagezero(vaddr_t);
379 
380 int	pmap_convert(struct pmap *, int);
381 
382 /*
383  * functions for flushing the cache for vaddrs and pages.
384  * these functions are not part of the MI pmap interface and thus
385  * should not be used as such.
386  */
387 void	pmap_flush_cache(vaddr_t, vsize_t);
388 #define pmap_flush_page(paddr) do {					\
389 	KDASSERT(PHYS_TO_VM_PAGE(paddr) != NULL);			\
390 	pmap_flush_cache(PMAP_DIRECT_MAP(paddr), PAGE_SIZE);		\
391 } while (/* CONSTCOND */ 0)
392 
393 #define	PMAP_STEAL_MEMORY	/* enable pmap_steal_memory() */
394 #define PMAP_GROWKERNEL		/* turn on pmap_growkernel interface */
395 
396 /*
397  * inline functions
398  */
399 
400 static __inline void
401 pmap_remove_all(struct pmap *pmap)
402 {
403 	/* Nothing. */
404 }
405 
406 /*
407  * pmap_update_pg: flush one page from the TLB (or flush the whole thing
408  *	if hardware doesn't support one-page flushing)
409  */
410 
411 __inline static void
412 pmap_update_pg(vaddr_t va)
413 {
414 	invlpg(va);
415 }
416 
417 /*
418  * pmap_update_2pg: flush two pages from the TLB
419  */
420 
421 __inline static void
422 pmap_update_2pg(vaddr_t va, vaddr_t vb)
423 {
424 	invlpg(va);
425 	invlpg(vb);
426 }
427 
428 /*
429  * pmap_page_protect: change the protection of all recorded mappings
430  *	of a managed page
431  *
432  * => this function is a frontend for pmap_page_remove/pmap_clear_attrs
433  * => we only have to worry about making the page more protected.
434  *	unprotecting a page is done on-demand at fault time.
435  */
436 
437 __inline static void
438 pmap_page_protect(struct vm_page *pg, vm_prot_t prot)
439 {
440 	if ((prot & PROT_WRITE) == 0) {
441 		if (prot & (PROT_READ | PROT_EXEC)) {
442 			(void) pmap_clear_attrs(pg, PG_RW);
443 		} else {
444 			pmap_page_remove(pg);
445 		}
446 	}
447 }
448 
449 /*
450  * pmap_protect: change the protection of pages in a pmap
451  *
452  * => this function is a frontend for pmap_remove/pmap_write_protect
453  * => we only have to worry about making the page more protected.
454  *	unprotecting a page is done on-demand at fault time.
455  */
456 
457 __inline static void
458 pmap_protect(struct pmap *pmap, vaddr_t sva, vaddr_t eva, vm_prot_t prot)
459 {
460 	if ((prot & PROT_WRITE) == 0) {
461 		if (prot & (PROT_READ| PROT_EXEC)) {
462 			pmap_write_protect(pmap, sva, eva, prot);
463 		} else {
464 			pmap_remove(pmap, sva, eva);
465 		}
466 	}
467 }
468 
469 /*
470  * various address inlines
471  *
472  *  vtopte: return a pointer to the PTE mapping a VA, works only for
473  *  user and PT addresses
474  *
475  *  kvtopte: return a pointer to the PTE mapping a kernel VA
476  */
477 
478 static __inline pt_entry_t *
479 vtopte(vaddr_t va)
480 {
481 	return (PTE_BASE + pl1_i(va));
482 }
483 
484 static __inline pt_entry_t *
485 kvtopte(vaddr_t va)
486 {
487 #ifdef LARGEPAGES
488 	{
489 		pd_entry_t *pde;
490 
491 		pde = L1_BASE + pl2_i(va);
492 		if (*pde & PG_PS)
493 			return ((pt_entry_t *)pde);
494 	}
495 #endif
496 
497 	return (PTE_BASE + pl1_i(va));
498 }
499 
500 #define PMAP_DIRECT_MAP(pa)	((vaddr_t)PMAP_DIRECT_BASE + (pa))
501 #define PMAP_DIRECT_UNMAP(va)	((paddr_t)(va) - PMAP_DIRECT_BASE)
502 #define pmap_map_direct(pg)	PMAP_DIRECT_MAP(VM_PAGE_TO_PHYS(pg))
503 #define pmap_unmap_direct(va)	PHYS_TO_VM_PAGE(PMAP_DIRECT_UNMAP(va))
504 
505 #define __HAVE_PMAP_DIRECT
506 
507 #endif /* _KERNEL && !_LOCORE */
508 
509 #ifndef _LOCORE
510 struct pv_entry;
511 struct vm_page_md {
512 	struct mutex pv_mtx;
513 	struct pv_entry *pv_list;
514 };
515 
516 #define VM_MDPAGE_INIT(pg) do {		\
517 	mtx_init(&(pg)->mdpage.pv_mtx, IPL_VM); \
518 	(pg)->mdpage.pv_list = NULL;	\
519 } while (0)
520 #endif	/* !_LOCORE */
521 
522 #endif	/* _MACHINE_PMAP_H_ */
523