xref: /openbsd-src/regress/sys/arch/amd64/dump_tables/dump_tables.c (revision e9dcde5677852948d4d87ff9cb8593d3d445da15)
1 /*	$OpenBSD: dump_tables.c,v 1.8 2023/05/11 22:28:38 guenther Exp $	*/
2 /*
3  * Copyright (c) 2019,2023 Philip Guenther <guenther@openbsd.org>
4  *
5  * Permission to use, copy, modify, and distribute this software for any
6  * purpose with or without fee is hereby granted, provided that the above
7  * copyright notice and this permission notice appear in all copies.
8  *
9  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16  */
17 
18 /*
19  * Dump amd64 page tables to text for analysis
20  * Requires "kern.allowkmem=1" sysctl
21  */
22 
23 #include <sys/param.h>	/* PAGE_SIZE and other things */
24 #include <sys/sysctl.h>
25 #include <sys/time.h>
26 #include <uvm/uvm_extern.h>
27 #include <machine/pmap.h>
28 #include <machine/pcb.h>
29 
30 /*
31  * Getting struct pmap from <machine/pmap.h> is too hard right now.
32  * Just extract it and go.
33  */
34 #include "struct_pmap.h"
35 
36 #define PG_1GFRAME	0x000fffffc0000000UL	/* should be in pmap.h */
37 
38 #define PG_PK_SHIFT	59
39 
40 
41 #include <err.h>
42 #include <fcntl.h>
43 #include <kvm.h>
44 #include <nlist.h>
45 #include <stdlib.h>
46 #include <unistd.h>
47 
48 void
usage(int status)49 usage(int status)
50 {
51 	printf("\
52 Usage: dump_tables [-1234dlmpr]\n\
53  -1234\tShow the specified levels in the page tables.\n\
54  -d\tHide the entries in the direct-map.\n\
55  -h\tShow this usage message.\n\
56  -l\tShow the leaf entries, whether 4kB, 2MB, or 1GB.\n\
57  -m\tShow the Meltdown U-K page tables instead.\n\
58  -p\tHide the entries through the recursive PTE mapping.\n\
59  -r\tSuppress the 'U'sed and 'M'odified attributes to increase.\n\
60 \treproducibility.\n\
61  -u\tShow the page tables for PID 1 instead of those for kernel threads.\n\
62 \n\
63 Dump the page tables, including intermediate levels, showing for\n\
64 each valid entry the virtual-address (VA) it applies to, the level\n\
65 of page table, the index of the entry within its page, the physical\n\
66 address (PA) it points to, the size of leaf page it points to, the\n\
67 attributes on the entry, the effective attributes for those affected\n\
68 by higher levels of page table, and the slot type for those which have\n\
69 a particular name.\n\n\
70 If none of the options -1234l are used, then all levels will be shown.\n\
71 ");
72 	exit(status);
73 }
74 
75 
76 kvm_t *k;
77 pd_entry_t *pt[5];
78 int meltdown, hide_direct, hide_pte, reproducible, show[5], show_leaves;
79 int user_proc;
80 
81 struct nlist proc0[] = { { "_proc0paddr" }, { NULL } };
82 
83 #define KGET(addr, var)							\
84 	KGETRET(addr, &var, sizeof var, #var)
85 #define KGETRET(addr, p, s, msg)	do {				\
86 	if (kvm_read(k, addr, p, s) != s)				\
87 		errx(1, "cannot read %s: %s", msg, kvm_geterr(k));	\
88 } while (0)
89 #define KGETPT_PA(addr, level)						\
90 	KGETPT_VA(PMAP_DIRECT_MAP(addr), level)
91 #define KGETPT_VA(addr, level)						\
92 	KGETRET(addr, pt[level], PAGE_SIZE, ptname[level])
93 
94 const int shift[] = {
95     [3] = L3_SHIFT,
96     [2] = L2_SHIFT,
97     [1] = L1_SHIFT,
98 };
99 const char * const ptname[] = {
100     [4] = "pml4",
101     [3] = "pt3",
102     [2] = "pt2",
103     [1] = "pt1",
104 };
105 
106 /* Not currently used */
107 const pd_entry_t ign_normal[] = {
108     [4] =		0x0000000000000f40UL,
109 #define	IGN_1GFRAME	0x0000000000000e00UL
110     [3] =		0x0000000000000f40UL,
111 #define	IGN_LGFRAME	0x0000000000000e00UL
112     [2] =		0x0000000000000f40UL,
113     [1] =		0x0000000000000e00UL,
114 };
115 
116 const pd_entry_t mbz_normal[] = {
117     [5] =		0x0000000000000fe7UL,
118     [4] =		0x0000000000000080UL,
119 #define	MBZ_1GFRAME	0x000000003fffe000UL
120     [3] =		0x0000000000000000UL,
121 #define	MBZ_LGFRAME	0x00000000001fe000UL
122     [2] =		0x0000000000000000UL,
123     [1] =		0x0000000000000000UL,
124 };
125 
126 static inline void
check_mbz(pd_entry_t e,pd_entry_t mbz)127 check_mbz(pd_entry_t e, pd_entry_t mbz)
128 {
129 	if ((e & mbz) != 0)
130 		errx(1, "non-zero mbz: %016llx in %016llx", e & mbz, e);
131 }
132 
133 enum l4_type { T_NORMAL = 0, T_DIRECT, T_PTE, T_KERNBASE, };
134 
135 static inline enum l4_type
l4type(int i)136 l4type(int i)
137 {
138 	if (i >= L4_SLOT_DIRECT && i < L4_SLOT_DIRECT + NUM_L4_SLOT_DIRECT)
139 		return T_DIRECT;
140 	if (i == L4_SLOT_PTE)
141 		return T_PTE;
142 	if (i == L4_SLOT_KERNBASE)
143 		return T_KERNBASE;
144 	return T_NORMAL;
145 }
146 
147 const char pk_name[16] = "R-23456789abcdef";
148 void
pflags(pd_entry_t e,pd_entry_t inherited)149 pflags(pd_entry_t e, pd_entry_t inherited)
150 {
151 	int pk = (e & PG_PKMASK) >> PG_PK_SHIFT;
152 	if (reproducible)
153 		e &= ~(PG_M|PG_U);
154 	inherited &= e;
155 	printf("[%c%c%c%c%c""%c%c%c%c][%c%c%c%c]",
156 	    e & PG_NX ? 'X' : '-',	/* reversed */
157 	    pk_name[pk],
158 	    e & PG_G  ? 'G' : '-',
159 	    e & PG_M  ? 'M' : '-',
160 	    e & PG_U  ? 'U' : '-',
161 	    e & PG_N  ? 'N' : '-',
162 	    e & PG_WT ? 'w' : '-',
163 	    e & PG_u  ? 'u' : '-',
164 	    e & PG_RW ? 'W' : '-',
165 	    inherited & PG_u  ? 'u' : '-',
166 	    pk_name[pk],
167 	    inherited & PG_RW ? 'W' : '-',
168 	    inherited & PG_NX ? 'X' : '-');	/* reversed */
169 }
170 
171 const char * const prefix[] = {
172     [4] = "4   ",
173     [3] = " 3  ",
174     [2] = "  2 ",
175     [1] = "   1",
176 };
177 
178 void
pent(int level,int idx,vaddr_t va,pd_entry_t e,pd_entry_t inherited,enum l4_type l4_type)179 pent(int level, int idx, vaddr_t va, pd_entry_t e, pd_entry_t inherited,
180     enum l4_type l4_type)
181 {
182 	if ((e & PG_V) == 0)
183 		return;
184 
185 	/* have an actual mapping */
186 	pd_entry_t pa, mbz;
187 	char type;
188 	if ((e & PG_PS) && level == 2) {
189 		pa = e & PG_LGFRAME;
190 		mbz = MBZ_LGFRAME;
191 		type = 'M';
192 	} else if ((e & PG_PS) && level == 3) {
193 		pa = e & PG_1GFRAME;
194 		mbz = MBZ_1GFRAME;
195 		type = 'G';
196 	} else {
197 		pa = e & PG_FRAME;
198 		mbz = mbz_normal[level];
199 		type = level == 1 ? 'k' : ' ';
200 	}
201 	check_mbz(e, mbz);
202 
203 	e ^= PG_NX;
204 	inherited &= e;
205 	if (show[level] || (show_leaves && type != ' ')) {
206 		printf("%016lx %s% 4d -> ", va, prefix[level], idx);
207 
208 		printf("%016llx %c ", pa, type);
209 		pflags(e, inherited);
210 		switch (l4_type) {
211 		case T_NORMAL:		putchar('\n'); break;
212 		case T_DIRECT:		puts(" direct"); break;
213 		case T_PTE:		puts(" pte"); break;
214 		case T_KERNBASE:	puts(" kernbase"); break;
215 		}
216 	}
217 
218 	if (type != ' ')
219 		return;
220 	level--;
221 	KGETPT_PA(pa, level);
222 	for (u_long i = 0; i < PAGE_SIZE / 8; i++) {
223 		pent(level, i, (i << shift[level]) + va, pt[level][i],
224 		    inherited, l4_type == T_PTE ? l4type(i) : T_NORMAL);
225 	}
226 }
227 
228 
229 int
main(int argc,char ** argv)230 main(int argc, char **argv)
231 {
232 	u_long paddr;
233 	struct pcb pcb;
234 	pd_entry_t cr3;
235 	u_long i;
236 	int ch;
237 
238 	while ((ch = getopt(argc, argv, "1234dhlmpru")) != -1) {
239 		switch (ch) {
240 		case '1': case '2': case '3': case '4':
241 			show[ch - '0'] = 1;
242 			break;
243 		case 'd':
244 			hide_direct = 1;
245 			break;
246 		case 'h':
247 			usage(0);
248 			break;
249 		case 'l':
250 			show_leaves = 1;
251 			break;
252 		case 'm':
253 			meltdown = 1;
254 			break;
255 		case 'p':
256 			hide_pte = 1;
257 			break;
258 		case 'r':
259 			reproducible = 1;
260 			break;
261 		case 'u':
262 			user_proc = 1;
263 			break;
264 		default:
265 			usage(1);
266 		}
267 	}
268 	argc -= optind;
269 	argv += optind;
270 	if (argc != 0)
271 		usage(1);
272 
273 	if (!show[1] && !show[2] && !show[3] && !show[4] && !show_leaves)
274 		show[1] = show[2] = show[3] = show[4] = 1;
275 
276 	if ((pt[4] = malloc(PAGE_SIZE)) == NULL ||
277 	    (pt[3] = malloc(PAGE_SIZE)) == NULL ||
278 	    (pt[2] = malloc(PAGE_SIZE)) == NULL ||
279 	    (pt[1] = malloc(PAGE_SIZE)) == NULL)
280 		err(1, "malloc");
281 
282 	k = kvm_open(NULL, NULL, NULL, O_RDONLY, "foo");
283 	if (k == NULL)
284 		return 1;
285 
286 	if (user_proc) {
287 		int cnt;
288 		struct kinfo_proc *kp = kvm_getprocs(k, KERN_PROC_PID, 1,
289 			sizeof *kp, &cnt);
290 		paddr = kp->p_addr;
291 	} else {
292 		if (kvm_nlist(k, proc0) != 0)
293 			err(1, "nlist");
294 		KGET(proc0[0].n_value, paddr);
295 	}
296 
297 	KGET(paddr, pcb);
298 
299 	cr3 = pcb.pcb_cr3 & ~0xfff;		/* mask off PCID */
300 	if (meltdown) {
301 		struct pmap pmap;
302 		KGET((u_long)pcb.pcb_pmap, pmap);
303 		if (cr3 != pmap.pm_pdirpa)
304 			errx(1, "cr3 != pm_pdir: %016llx != %016lx",
305 			    cr3, pmap.pm_pdirpa);
306 
307 		cr3 = (u_long)pmap.pm_pdir_intel;	/* VA */
308 		if (cr3 == 0)
309 			errx(1, "meltdown mitigation not enabled");
310 		KGETPT_VA(cr3, 4);
311 	} else {
312 		KGETPT_PA(cr3, 4);
313 		/*printf("PML4 @ %016llx\n", cr3);*/
314 		check_mbz(cr3, mbz_normal[5]);
315 	}
316 	printf("\
317 VA               lvl  idx    PA              sz entry-attr  eff   L4-slot\
318 \n");
319 	for (i = 0; i < PAGE_SIZE / sizeof(pd_entry_t); i++) {
320 		enum l4_type l4_type = l4type(i);
321 		if ((l4_type == T_DIRECT && hide_direct) ||
322 		    (l4_type == T_PTE && hide_pte))
323 			continue;
324 		u_long va = i << L4_SHIFT;
325 		if (i > 255)
326 			va |= VA_SIGN_MASK;
327 		pent(4, i, va, pt[4][i], ~0UL, l4_type);
328 	}
329 	return 0;
330 }
331