1 /* $OpenBSD: dump_tables.c,v 1.8 2023/05/11 22:28:38 guenther Exp $ */
2 /*
3 * Copyright (c) 2019,2023 Philip Guenther <guenther@openbsd.org>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
17
18 /*
19 * Dump amd64 page tables to text for analysis
20 * Requires "kern.allowkmem=1" sysctl
21 */
22
23 #include <sys/param.h> /* PAGE_SIZE and other things */
24 #include <sys/sysctl.h>
25 #include <sys/time.h>
26 #include <uvm/uvm_extern.h>
27 #include <machine/pmap.h>
28 #include <machine/pcb.h>
29
30 /*
31 * Getting struct pmap from <machine/pmap.h> is too hard right now.
32 * Just extract it and go.
33 */
34 #include "struct_pmap.h"
35
36 #define PG_1GFRAME 0x000fffffc0000000UL /* should be in pmap.h */
37
38 #define PG_PK_SHIFT 59
39
40
41 #include <err.h>
42 #include <fcntl.h>
43 #include <kvm.h>
44 #include <nlist.h>
45 #include <stdlib.h>
46 #include <unistd.h>
47
48 void
usage(int status)49 usage(int status)
50 {
51 printf("\
52 Usage: dump_tables [-1234dlmpr]\n\
53 -1234\tShow the specified levels in the page tables.\n\
54 -d\tHide the entries in the direct-map.\n\
55 -h\tShow this usage message.\n\
56 -l\tShow the leaf entries, whether 4kB, 2MB, or 1GB.\n\
57 -m\tShow the Meltdown U-K page tables instead.\n\
58 -p\tHide the entries through the recursive PTE mapping.\n\
59 -r\tSuppress the 'U'sed and 'M'odified attributes to increase.\n\
60 \treproducibility.\n\
61 -u\tShow the page tables for PID 1 instead of those for kernel threads.\n\
62 \n\
63 Dump the page tables, including intermediate levels, showing for\n\
64 each valid entry the virtual-address (VA) it applies to, the level\n\
65 of page table, the index of the entry within its page, the physical\n\
66 address (PA) it points to, the size of leaf page it points to, the\n\
67 attributes on the entry, the effective attributes for those affected\n\
68 by higher levels of page table, and the slot type for those which have\n\
69 a particular name.\n\n\
70 If none of the options -1234l are used, then all levels will be shown.\n\
71 ");
72 exit(status);
73 }
74
75
76 kvm_t *k;
77 pd_entry_t *pt[5];
78 int meltdown, hide_direct, hide_pte, reproducible, show[5], show_leaves;
79 int user_proc;
80
81 struct nlist proc0[] = { { "_proc0paddr" }, { NULL } };
82
83 #define KGET(addr, var) \
84 KGETRET(addr, &var, sizeof var, #var)
85 #define KGETRET(addr, p, s, msg) do { \
86 if (kvm_read(k, addr, p, s) != s) \
87 errx(1, "cannot read %s: %s", msg, kvm_geterr(k)); \
88 } while (0)
89 #define KGETPT_PA(addr, level) \
90 KGETPT_VA(PMAP_DIRECT_MAP(addr), level)
91 #define KGETPT_VA(addr, level) \
92 KGETRET(addr, pt[level], PAGE_SIZE, ptname[level])
93
94 const int shift[] = {
95 [3] = L3_SHIFT,
96 [2] = L2_SHIFT,
97 [1] = L1_SHIFT,
98 };
99 const char * const ptname[] = {
100 [4] = "pml4",
101 [3] = "pt3",
102 [2] = "pt2",
103 [1] = "pt1",
104 };
105
106 /* Not currently used */
107 const pd_entry_t ign_normal[] = {
108 [4] = 0x0000000000000f40UL,
109 #define IGN_1GFRAME 0x0000000000000e00UL
110 [3] = 0x0000000000000f40UL,
111 #define IGN_LGFRAME 0x0000000000000e00UL
112 [2] = 0x0000000000000f40UL,
113 [1] = 0x0000000000000e00UL,
114 };
115
116 const pd_entry_t mbz_normal[] = {
117 [5] = 0x0000000000000fe7UL,
118 [4] = 0x0000000000000080UL,
119 #define MBZ_1GFRAME 0x000000003fffe000UL
120 [3] = 0x0000000000000000UL,
121 #define MBZ_LGFRAME 0x00000000001fe000UL
122 [2] = 0x0000000000000000UL,
123 [1] = 0x0000000000000000UL,
124 };
125
126 static inline void
check_mbz(pd_entry_t e,pd_entry_t mbz)127 check_mbz(pd_entry_t e, pd_entry_t mbz)
128 {
129 if ((e & mbz) != 0)
130 errx(1, "non-zero mbz: %016llx in %016llx", e & mbz, e);
131 }
132
133 enum l4_type { T_NORMAL = 0, T_DIRECT, T_PTE, T_KERNBASE, };
134
135 static inline enum l4_type
l4type(int i)136 l4type(int i)
137 {
138 if (i >= L4_SLOT_DIRECT && i < L4_SLOT_DIRECT + NUM_L4_SLOT_DIRECT)
139 return T_DIRECT;
140 if (i == L4_SLOT_PTE)
141 return T_PTE;
142 if (i == L4_SLOT_KERNBASE)
143 return T_KERNBASE;
144 return T_NORMAL;
145 }
146
147 const char pk_name[16] = "R-23456789abcdef";
148 void
pflags(pd_entry_t e,pd_entry_t inherited)149 pflags(pd_entry_t e, pd_entry_t inherited)
150 {
151 int pk = (e & PG_PKMASK) >> PG_PK_SHIFT;
152 if (reproducible)
153 e &= ~(PG_M|PG_U);
154 inherited &= e;
155 printf("[%c%c%c%c%c""%c%c%c%c][%c%c%c%c]",
156 e & PG_NX ? 'X' : '-', /* reversed */
157 pk_name[pk],
158 e & PG_G ? 'G' : '-',
159 e & PG_M ? 'M' : '-',
160 e & PG_U ? 'U' : '-',
161 e & PG_N ? 'N' : '-',
162 e & PG_WT ? 'w' : '-',
163 e & PG_u ? 'u' : '-',
164 e & PG_RW ? 'W' : '-',
165 inherited & PG_u ? 'u' : '-',
166 pk_name[pk],
167 inherited & PG_RW ? 'W' : '-',
168 inherited & PG_NX ? 'X' : '-'); /* reversed */
169 }
170
171 const char * const prefix[] = {
172 [4] = "4 ",
173 [3] = " 3 ",
174 [2] = " 2 ",
175 [1] = " 1",
176 };
177
178 void
pent(int level,int idx,vaddr_t va,pd_entry_t e,pd_entry_t inherited,enum l4_type l4_type)179 pent(int level, int idx, vaddr_t va, pd_entry_t e, pd_entry_t inherited,
180 enum l4_type l4_type)
181 {
182 if ((e & PG_V) == 0)
183 return;
184
185 /* have an actual mapping */
186 pd_entry_t pa, mbz;
187 char type;
188 if ((e & PG_PS) && level == 2) {
189 pa = e & PG_LGFRAME;
190 mbz = MBZ_LGFRAME;
191 type = 'M';
192 } else if ((e & PG_PS) && level == 3) {
193 pa = e & PG_1GFRAME;
194 mbz = MBZ_1GFRAME;
195 type = 'G';
196 } else {
197 pa = e & PG_FRAME;
198 mbz = mbz_normal[level];
199 type = level == 1 ? 'k' : ' ';
200 }
201 check_mbz(e, mbz);
202
203 e ^= PG_NX;
204 inherited &= e;
205 if (show[level] || (show_leaves && type != ' ')) {
206 printf("%016lx %s% 4d -> ", va, prefix[level], idx);
207
208 printf("%016llx %c ", pa, type);
209 pflags(e, inherited);
210 switch (l4_type) {
211 case T_NORMAL: putchar('\n'); break;
212 case T_DIRECT: puts(" direct"); break;
213 case T_PTE: puts(" pte"); break;
214 case T_KERNBASE: puts(" kernbase"); break;
215 }
216 }
217
218 if (type != ' ')
219 return;
220 level--;
221 KGETPT_PA(pa, level);
222 for (u_long i = 0; i < PAGE_SIZE / 8; i++) {
223 pent(level, i, (i << shift[level]) + va, pt[level][i],
224 inherited, l4_type == T_PTE ? l4type(i) : T_NORMAL);
225 }
226 }
227
228
229 int
main(int argc,char ** argv)230 main(int argc, char **argv)
231 {
232 u_long paddr;
233 struct pcb pcb;
234 pd_entry_t cr3;
235 u_long i;
236 int ch;
237
238 while ((ch = getopt(argc, argv, "1234dhlmpru")) != -1) {
239 switch (ch) {
240 case '1': case '2': case '3': case '4':
241 show[ch - '0'] = 1;
242 break;
243 case 'd':
244 hide_direct = 1;
245 break;
246 case 'h':
247 usage(0);
248 break;
249 case 'l':
250 show_leaves = 1;
251 break;
252 case 'm':
253 meltdown = 1;
254 break;
255 case 'p':
256 hide_pte = 1;
257 break;
258 case 'r':
259 reproducible = 1;
260 break;
261 case 'u':
262 user_proc = 1;
263 break;
264 default:
265 usage(1);
266 }
267 }
268 argc -= optind;
269 argv += optind;
270 if (argc != 0)
271 usage(1);
272
273 if (!show[1] && !show[2] && !show[3] && !show[4] && !show_leaves)
274 show[1] = show[2] = show[3] = show[4] = 1;
275
276 if ((pt[4] = malloc(PAGE_SIZE)) == NULL ||
277 (pt[3] = malloc(PAGE_SIZE)) == NULL ||
278 (pt[2] = malloc(PAGE_SIZE)) == NULL ||
279 (pt[1] = malloc(PAGE_SIZE)) == NULL)
280 err(1, "malloc");
281
282 k = kvm_open(NULL, NULL, NULL, O_RDONLY, "foo");
283 if (k == NULL)
284 return 1;
285
286 if (user_proc) {
287 int cnt;
288 struct kinfo_proc *kp = kvm_getprocs(k, KERN_PROC_PID, 1,
289 sizeof *kp, &cnt);
290 paddr = kp->p_addr;
291 } else {
292 if (kvm_nlist(k, proc0) != 0)
293 err(1, "nlist");
294 KGET(proc0[0].n_value, paddr);
295 }
296
297 KGET(paddr, pcb);
298
299 cr3 = pcb.pcb_cr3 & ~0xfff; /* mask off PCID */
300 if (meltdown) {
301 struct pmap pmap;
302 KGET((u_long)pcb.pcb_pmap, pmap);
303 if (cr3 != pmap.pm_pdirpa)
304 errx(1, "cr3 != pm_pdir: %016llx != %016lx",
305 cr3, pmap.pm_pdirpa);
306
307 cr3 = (u_long)pmap.pm_pdir_intel; /* VA */
308 if (cr3 == 0)
309 errx(1, "meltdown mitigation not enabled");
310 KGETPT_VA(cr3, 4);
311 } else {
312 KGETPT_PA(cr3, 4);
313 /*printf("PML4 @ %016llx\n", cr3);*/
314 check_mbz(cr3, mbz_normal[5]);
315 }
316 printf("\
317 VA lvl idx PA sz entry-attr eff L4-slot\
318 \n");
319 for (i = 0; i < PAGE_SIZE / sizeof(pd_entry_t); i++) {
320 enum l4_type l4_type = l4type(i);
321 if ((l4_type == T_DIRECT && hide_direct) ||
322 (l4_type == T_PTE && hide_pte))
323 continue;
324 u_long va = i << L4_SHIFT;
325 if (i > 255)
326 va |= VA_SIGN_MASK;
327 pent(4, i, va, pt[4][i], ~0UL, l4_type);
328 }
329 return 0;
330 }
331