1 /* $NetBSD: x86_pte_tester.c,v 1.3 2022/08/21 14:06:42 mlelstv Exp $ */
2
3 /*
4 * Copyright (c) 2016 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
17 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
18 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
20 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29 #define __HAVE_DIRECT_MAP
30 #define __HAVE_PCPU_AREA
31 #define SVS
32
33 #include <sys/cdefs.h>
34 #include <sys/param.h>
35 #include <sys/module.h>
36 #include <sys/proc.h>
37 #include <sys/sysctl.h>
38 #include <uvm/uvm.h>
39 #include <x86/pmap.h>
40
41 #if defined(__x86_64__)
42 # include <amd64/pmap.h>
43 # include <amd64/pmap_private.h>
44 # define NLEVEL 4
45 #else
46 # error "Unsupported configuration"
47 #endif
48
49 static struct {
50 struct sysctllog *ctx_sysctllog;
51 vaddr_t levels[NLEVEL];
52 struct {
53 size_t l4;
54 size_t l3;
55 size_t l2;
56 size_t l1;
57 } coord;
58 struct {
59 size_t n_rwx;
60 size_t n_shstk;
61 bool kernel_map_with_low_ptes;
62 bool pte_is_user_accessible;
63 size_t n_user_space_is_kernel;
64 size_t n_kernel_space_is_user;
65 size_t n_svs_g_bit_set;
66 } results;
67 } tester_ctx;
68
69 typedef enum {
70 WALK_NEXT, /* go to the next level */
71 WALK_SKIP, /* skip the next level, but keep iterating on the current one */
72 WALK_STOP /* stop the iteration on the current level */
73 } walk_type;
74
75 /* -------------------------------------------------------------------------- */
76
77 #define is_flag(__ent, __flag) (((__ent) & __flag) != 0)
78 #define is_valid(__ent) is_flag(__ent, PTE_P)
79 #define get_pa(__pde) (__pde & PTE_FRAME)
80
81 #define L4_MAX_NENTRIES (PAGE_SIZE / sizeof(pd_entry_t))
82 #define L3_MAX_NENTRIES (PAGE_SIZE / sizeof(pd_entry_t))
83 #define L2_MAX_NENTRIES (PAGE_SIZE / sizeof(pd_entry_t))
84 #define L1_MAX_NENTRIES (PAGE_SIZE / sizeof(pd_entry_t))
85
86 static void
scan_l1(paddr_t pa,walk_type (fn)(pd_entry_t pde,size_t slot,int lvl))87 scan_l1(paddr_t pa, walk_type (fn)(pd_entry_t pde, size_t slot, int lvl))
88 {
89 pd_entry_t *pd = (pd_entry_t *)tester_ctx.levels[0];
90 size_t i;
91
92 pmap_kenter_pa(tester_ctx.levels[0], pa, VM_PROT_READ, 0);
93 pmap_update(pmap_kernel());
94
95 for (i = 0; i < L1_MAX_NENTRIES; i++) {
96 tester_ctx.coord.l1 = i;
97 if (is_valid(pd[i])) {
98 fn(pd[i], i, 1);
99 }
100 }
101
102 pmap_kremove(tester_ctx.levels[0], PAGE_SIZE);
103 pmap_update(pmap_kernel());
104 }
105
106 static void
scan_l2(paddr_t pa,walk_type (fn)(pd_entry_t pde,size_t slot,int lvl))107 scan_l2(paddr_t pa, walk_type (fn)(pd_entry_t pde, size_t slot, int lvl))
108 {
109 pd_entry_t *pd = (pd_entry_t *)tester_ctx.levels[1];
110 walk_type ret;
111 size_t i;
112
113 pmap_kenter_pa(tester_ctx.levels[1], pa, VM_PROT_READ, 0);
114 pmap_update(pmap_kernel());
115
116 for (i = 0; i < L2_MAX_NENTRIES; i++) {
117 tester_ctx.coord.l2 = i;
118 if (!is_valid(pd[i]))
119 continue;
120 ret = fn(pd[i], i, 2);
121 if (ret == WALK_STOP)
122 break;
123 if (is_flag(pd[i], PTE_PS))
124 continue;
125 if (ret == WALK_NEXT)
126 scan_l1(get_pa(pd[i]), fn);
127 }
128
129 pmap_kremove(tester_ctx.levels[1], PAGE_SIZE);
130 pmap_update(pmap_kernel());
131 }
132
133 static void
scan_l3(paddr_t pa,walk_type (fn)(pd_entry_t pde,size_t slot,int lvl))134 scan_l3(paddr_t pa, walk_type (fn)(pd_entry_t pde, size_t slot, int lvl))
135 {
136 pd_entry_t *pd = (pd_entry_t *)tester_ctx.levels[2];
137 walk_type ret;
138 size_t i;
139
140 pmap_kenter_pa(tester_ctx.levels[2], pa, VM_PROT_READ, 0);
141 pmap_update(pmap_kernel());
142
143 for (i = 0; i < L3_MAX_NENTRIES; i++) {
144 tester_ctx.coord.l3 = i;
145 if (!is_valid(pd[i]))
146 continue;
147 ret = fn(pd[i], i, 3);
148 if (ret == WALK_STOP)
149 break;
150 if (is_flag(pd[i], PTE_PS))
151 continue;
152 if (ret == WALK_NEXT)
153 scan_l2(get_pa(pd[i]), fn);
154 }
155
156 pmap_kremove(tester_ctx.levels[2], PAGE_SIZE);
157 pmap_update(pmap_kernel());
158 }
159
160 static void
scan_l4(paddr_t pa,walk_type (fn)(pd_entry_t pde,size_t slot,int lvl))161 scan_l4(paddr_t pa, walk_type (fn)(pd_entry_t pde, size_t slot, int lvl))
162 {
163 pd_entry_t *pd = (pd_entry_t *)tester_ctx.levels[3];
164 walk_type ret;
165 size_t i;
166
167 pmap_kenter_pa(tester_ctx.levels[3], pa, VM_PROT_READ, 0);
168 pmap_update(pmap_kernel());
169
170 for (i = 0; i < L4_MAX_NENTRIES; i++) {
171 tester_ctx.coord.l4 = i;
172 if (!is_valid(pd[i]))
173 continue;
174 ret = fn(pd[i], i, 4);
175 if (ret == WALK_STOP)
176 break;
177 if (is_flag(pd[i], PTE_PS))
178 continue;
179 if (ret == WALK_NEXT)
180 scan_l3(get_pa(pd[i]), fn);
181 }
182
183 pmap_kremove(tester_ctx.levels[3], PAGE_SIZE);
184 pmap_update(pmap_kernel());
185 }
186
187 static void
scan_tree(paddr_t pa,walk_type (fn)(pd_entry_t pde,size_t slot,int lvl))188 scan_tree(paddr_t pa, walk_type (fn)(pd_entry_t pde, size_t slot, int lvl))
189 {
190 scan_l4(pa, fn);
191 }
192
193 /* -------------------------------------------------------------------------- */
194
195 /*
196 * Rule: the number of kernel RWX pages should be zero.
197 */
198 static walk_type
count_krwx(pd_entry_t pde,size_t slot,int lvl)199 count_krwx(pd_entry_t pde, size_t slot, int lvl)
200 {
201 if (lvl == NLEVEL && slot < 256) {
202 return WALK_SKIP;
203 }
204 if (is_flag(pde, PTE_NX) || !is_flag(pde, PTE_W)) {
205 return WALK_SKIP;
206 }
207 if (lvl != 1 && !is_flag(pde, PTE_PS)) {
208 return WALK_NEXT;
209 }
210
211 if (lvl == 4) {
212 tester_ctx.results.n_rwx += (NBPD_L4 / PAGE_SIZE);
213 } else if (lvl == 3) {
214 tester_ctx.results.n_rwx += (NBPD_L3 / PAGE_SIZE);
215 } else if (lvl == 2) {
216 tester_ctx.results.n_rwx += (NBPD_L2 / PAGE_SIZE);
217 } else if (lvl == 1) {
218 tester_ctx.results.n_rwx += (NBPD_L1 / PAGE_SIZE);
219 }
220
221 return WALK_NEXT;
222 }
223
224 /*
225 * Rule: the number of kernel SHSTK pages should be zero.
226 */
227 static walk_type
count_kshstk(pd_entry_t pde,size_t slot,int lvl)228 count_kshstk(pd_entry_t pde, size_t slot, int lvl)
229 {
230 if (lvl == NLEVEL && slot < 256) {
231 return WALK_SKIP;
232 }
233
234 if (is_flag(pde, PTE_PS) || lvl == 1) {
235 if (!is_flag(pde, PTE_W) && is_flag(pde, PTE_D)) {
236 if (lvl == 4) {
237 tester_ctx.results.n_shstk += (NBPD_L4 / PAGE_SIZE);
238 } else if (lvl == 3) {
239 tester_ctx.results.n_shstk += (NBPD_L3 / PAGE_SIZE);
240 } else if (lvl == 2) {
241 tester_ctx.results.n_shstk += (NBPD_L2 / PAGE_SIZE);
242 } else if (lvl == 1) {
243 tester_ctx.results.n_shstk += (NBPD_L1 / PAGE_SIZE);
244 }
245 }
246 return WALK_SKIP;
247 }
248
249 if (!is_flag(pde, PTE_W)) {
250 return WALK_SKIP;
251 }
252
253 return WALK_NEXT;
254 }
255
256 /*
257 * Rule: the lower half of the kernel map must be zero.
258 */
259 static walk_type
check_kernel_map(pd_entry_t pde,size_t slot,int lvl)260 check_kernel_map(pd_entry_t pde, size_t slot, int lvl)
261 {
262 if (lvl != NLEVEL) {
263 return WALK_STOP;
264 }
265 if (slot >= 256) {
266 return WALK_SKIP;
267 }
268 if (pde != 0) {
269 tester_ctx.results.kernel_map_with_low_ptes |= true;
270 }
271 return WALK_SKIP;
272 }
273
274 /*
275 * Rule: the PTE space must not have user permissions.
276 */
277 static walk_type
check_pte_space(pd_entry_t pde,size_t slot,int lvl)278 check_pte_space(pd_entry_t pde, size_t slot, int lvl)
279 {
280 if (lvl != NLEVEL) {
281 return WALK_STOP;
282 }
283 if (slot != PDIR_SLOT_PTE) {
284 return WALK_SKIP;
285 }
286 if (is_flag(pde, PTE_U)) {
287 tester_ctx.results.pte_is_user_accessible |= true;
288 }
289 return WALK_SKIP;
290 }
291
292 /*
293 * Rule: each page in the lower half must have user permissions.
294 */
295 static walk_type
check_user_space(pd_entry_t pde,size_t slot,int lvl)296 check_user_space(pd_entry_t pde, size_t slot, int lvl)
297 {
298 if (lvl == NLEVEL && slot >= 256) {
299 return WALK_SKIP;
300 }
301 if (!is_flag(pde, PTE_U)) {
302 tester_ctx.results.n_user_space_is_kernel += 1;
303 return WALK_SKIP;
304 }
305 return WALK_NEXT;
306 }
307
308 /*
309 * Rule: each page in the higher half must have kernel permissions.
310 */
311 static walk_type
check_kernel_space(pd_entry_t pde,size_t slot,int lvl)312 check_kernel_space(pd_entry_t pde, size_t slot, int lvl)
313 {
314 if (lvl == NLEVEL && slot < 256) {
315 return WALK_SKIP;
316 }
317 if (lvl == NLEVEL && slot == PDIR_SLOT_PTE) {
318 return WALK_SKIP;
319 }
320 if (is_flag(pde, PTE_U)) {
321 tester_ctx.results.n_kernel_space_is_user += 1;
322 return WALK_SKIP;
323 }
324 return WALK_NEXT;
325 }
326
327 /*
328 * Rule: the SVS map is allowed to use the G bit only on the PCPU area.
329 */
330 static walk_type
check_svs_g_bit(pd_entry_t pde,size_t slot,int lvl)331 check_svs_g_bit(pd_entry_t pde, size_t slot, int lvl)
332 {
333 if (lvl == NLEVEL && slot == PDIR_SLOT_PCPU) {
334 return WALK_SKIP;
335 }
336 if (is_flag(pde, PTE_G)) {
337 tester_ctx.results.n_svs_g_bit_set += 1;
338 return WALK_SKIP;
339 }
340 return WALK_NEXT;
341 }
342
343 /* -------------------------------------------------------------------------- */
344
345 static void
scan_svs(void)346 scan_svs(void)
347 {
348 extern bool svs_enabled;
349 paddr_t pa0;
350
351 if (!svs_enabled) {
352 tester_ctx.results.n_svs_g_bit_set = -1;
353 return;
354 }
355
356 kpreempt_disable();
357 pa0 = curcpu()->ci_svs_updirpa;
358 scan_tree(pa0, &check_user_space);
359 scan_tree(pa0, &check_kernel_space);
360 scan_tree(pa0, &check_svs_g_bit);
361 kpreempt_enable();
362 }
363
364 static void
scan_proc(struct proc * p)365 scan_proc(struct proc *p)
366 {
367 struct pmap *pmap = p->p_vmspace->vm_map.pmap;
368 paddr_t pa0;
369
370 mutex_enter(&pmap->pm_lock);
371
372 kpreempt_disable();
373 pa0 = (paddr_t)pmap->pm_pdirpa[0];
374 scan_tree(pa0, &check_user_space);
375 scan_tree(pa0, &check_kernel_space);
376 scan_tree(pa0, &check_pte_space);
377 kpreempt_enable();
378
379 mutex_exit(&pmap->pm_lock);
380 }
381
382 static void
x86_pte_run_scans(void)383 x86_pte_run_scans(void)
384 {
385 struct pmap *kpm = pmap_kernel();
386 paddr_t pa0;
387
388 memset(&tester_ctx.results, 0, sizeof(tester_ctx.results));
389
390 /* Scan the current user process. */
391 scan_proc(curproc);
392
393 /* Scan the SVS mapping. */
394 scan_svs();
395
396 /* Scan the kernel map. */
397 pa0 = (paddr_t)kpm->pm_pdirpa[0];
398 scan_tree(pa0, &count_krwx);
399 scan_tree(pa0, &count_kshstk);
400 scan_tree(pa0, &check_kernel_map);
401 }
402
403 static void
x86_pte_levels_init(void)404 x86_pte_levels_init(void)
405 {
406 size_t i;
407 for (i = 0; i < NLEVEL; i++) {
408 tester_ctx.levels[i] = uvm_km_alloc(kernel_map, PAGE_SIZE, 0,
409 UVM_KMF_VAONLY);
410 }
411 }
412
413 static void
x86_pte_levels_destroy(void)414 x86_pte_levels_destroy(void)
415 {
416 size_t i;
417 for (i = 0; i < NLEVEL; i++) {
418 uvm_km_free(kernel_map, tester_ctx.levels[i], PAGE_SIZE,
419 UVM_KMF_VAONLY);
420 }
421 }
422
423 /* -------------------------------------------------------------------------- */
424
425 static int
x86_pte_sysctl_run(SYSCTLFN_ARGS)426 x86_pte_sysctl_run(SYSCTLFN_ARGS)
427 {
428 if (oldlenp == NULL)
429 return EINVAL;
430
431 x86_pte_run_scans();
432
433 if (oldp == NULL) {
434 *oldlenp = sizeof(tester_ctx.results);
435 return 0;
436 }
437
438 if (*oldlenp < sizeof(tester_ctx.results))
439 return ENOMEM;
440
441 return copyout(&tester_ctx.results, oldp, sizeof(tester_ctx.results));
442 }
443
444 static int
x86_pte_sysctl_init(void)445 x86_pte_sysctl_init(void)
446 {
447 struct sysctllog **log = &tester_ctx.ctx_sysctllog;
448 const struct sysctlnode *rnode, *cnode;
449 int error;
450
451 error = sysctl_createv(log, 0, NULL, &rnode, CTLFLAG_PERMANENT,
452 CTLTYPE_NODE, "x86_pte_test",
453 SYSCTL_DESCR("x86_pte testing interface"),
454 NULL, 0, NULL, 0, CTL_KERN, CTL_CREATE, CTL_EOL);
455 if (error)
456 goto out;
457
458 error = sysctl_createv(log, 0, &rnode, &cnode, CTLFLAG_PERMANENT,
459 CTLTYPE_STRUCT, "test",
460 SYSCTL_DESCR("execute a x86_pte test"),
461 x86_pte_sysctl_run, 0, NULL, 0, CTL_CREATE, CTL_EOL);
462
463 out:
464 if (error)
465 sysctl_teardown(log);
466 return error;
467 }
468
469 static void
x86_pte_sysctl_destroy(void)470 x86_pte_sysctl_destroy(void)
471 {
472 sysctl_teardown(&tester_ctx.ctx_sysctllog);
473 }
474
475 /* -------------------------------------------------------------------------- */
476
477 MODULE(MODULE_CLASS_MISC, x86_pte_tester, NULL);
478
479 static int
x86_pte_tester_modcmd(modcmd_t cmd,void * arg __unused)480 x86_pte_tester_modcmd(modcmd_t cmd, void *arg __unused)
481 {
482 int error = 0;
483
484 switch (cmd) {
485 case MODULE_CMD_INIT:
486 x86_pte_levels_init();
487 error = x86_pte_sysctl_init();
488 break;
489 case MODULE_CMD_FINI:
490 x86_pte_sysctl_destroy();
491 x86_pte_levels_destroy();
492 break;
493 default:
494 error = ENOTTY;
495 break;
496 }
497
498 return error;
499 }
500