xref: /netbsd-src/tests/modules/x86_pte_tester/x86_pte_tester.c (revision c0019d52610b83e77b163935aad3f5d483c9a46a)
1 /*	$NetBSD: x86_pte_tester.c,v 1.3 2022/08/21 14:06:42 mlelstv Exp $	*/
2 
3 /*
4  * Copyright (c) 2016 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
17  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
18  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
20  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26  * POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #define __HAVE_DIRECT_MAP
30 #define __HAVE_PCPU_AREA
31 #define SVS
32 
33 #include <sys/cdefs.h>
34 #include <sys/param.h>
35 #include <sys/module.h>
36 #include <sys/proc.h>
37 #include <sys/sysctl.h>
38 #include <uvm/uvm.h>
39 #include <x86/pmap.h>
40 
41 #if defined(__x86_64__)
42 # include <amd64/pmap.h>
43 # include <amd64/pmap_private.h>
44 # define NLEVEL 4
45 #else
46 # error "Unsupported configuration"
47 #endif
48 
49 static struct {
50 	struct sysctllog *ctx_sysctllog;
51 	vaddr_t levels[NLEVEL];
52 	struct {
53 		size_t l4;
54 		size_t l3;
55 		size_t l2;
56 		size_t l1;
57 	} coord;
58 	struct {
59 		size_t n_rwx;
60 		size_t n_shstk;
61 		bool kernel_map_with_low_ptes;
62 		bool pte_is_user_accessible;
63 		size_t n_user_space_is_kernel;
64 		size_t n_kernel_space_is_user;
65 		size_t n_svs_g_bit_set;
66 	} results;
67 } tester_ctx;
68 
69 typedef enum {
70 	WALK_NEXT, /* go to the next level */
71 	WALK_SKIP, /* skip the next level, but keep iterating on the current one */
72 	WALK_STOP  /* stop the iteration on the current level */
73 } walk_type;
74 
75 /* -------------------------------------------------------------------------- */
76 
77 #define is_flag(__ent, __flag)	(((__ent) & __flag) != 0)
78 #define is_valid(__ent)		is_flag(__ent, PTE_P)
79 #define get_pa(__pde)		(__pde & PTE_FRAME)
80 
81 #define L4_MAX_NENTRIES (PAGE_SIZE / sizeof(pd_entry_t))
82 #define L3_MAX_NENTRIES (PAGE_SIZE / sizeof(pd_entry_t))
83 #define L2_MAX_NENTRIES (PAGE_SIZE / sizeof(pd_entry_t))
84 #define L1_MAX_NENTRIES (PAGE_SIZE / sizeof(pd_entry_t))
85 
86 static void
scan_l1(paddr_t pa,walk_type (fn)(pd_entry_t pde,size_t slot,int lvl))87 scan_l1(paddr_t pa, walk_type (fn)(pd_entry_t pde, size_t slot, int lvl))
88 {
89 	pd_entry_t *pd = (pd_entry_t *)tester_ctx.levels[0];
90 	size_t i;
91 
92 	pmap_kenter_pa(tester_ctx.levels[0], pa, VM_PROT_READ, 0);
93 	pmap_update(pmap_kernel());
94 
95 	for (i = 0; i < L1_MAX_NENTRIES; i++) {
96 		tester_ctx.coord.l1 = i;
97 		if (is_valid(pd[i])) {
98 			fn(pd[i], i, 1);
99 		}
100 	}
101 
102 	pmap_kremove(tester_ctx.levels[0], PAGE_SIZE);
103 	pmap_update(pmap_kernel());
104 }
105 
106 static void
scan_l2(paddr_t pa,walk_type (fn)(pd_entry_t pde,size_t slot,int lvl))107 scan_l2(paddr_t pa, walk_type (fn)(pd_entry_t pde, size_t slot, int lvl))
108 {
109 	pd_entry_t *pd = (pd_entry_t *)tester_ctx.levels[1];
110 	walk_type ret;
111 	size_t i;
112 
113 	pmap_kenter_pa(tester_ctx.levels[1], pa, VM_PROT_READ, 0);
114 	pmap_update(pmap_kernel());
115 
116 	for (i = 0; i < L2_MAX_NENTRIES; i++) {
117 		tester_ctx.coord.l2 = i;
118 		if (!is_valid(pd[i]))
119 			continue;
120 		ret = fn(pd[i], i, 2);
121 		if (ret == WALK_STOP)
122 			break;
123 		if (is_flag(pd[i], PTE_PS))
124 			continue;
125 		if (ret == WALK_NEXT)
126 			scan_l1(get_pa(pd[i]), fn);
127 	}
128 
129 	pmap_kremove(tester_ctx.levels[1], PAGE_SIZE);
130 	pmap_update(pmap_kernel());
131 }
132 
133 static void
scan_l3(paddr_t pa,walk_type (fn)(pd_entry_t pde,size_t slot,int lvl))134 scan_l3(paddr_t pa, walk_type (fn)(pd_entry_t pde, size_t slot, int lvl))
135 {
136 	pd_entry_t *pd = (pd_entry_t *)tester_ctx.levels[2];
137 	walk_type ret;
138 	size_t i;
139 
140 	pmap_kenter_pa(tester_ctx.levels[2], pa, VM_PROT_READ, 0);
141 	pmap_update(pmap_kernel());
142 
143 	for (i = 0; i < L3_MAX_NENTRIES; i++) {
144 		tester_ctx.coord.l3 = i;
145 		if (!is_valid(pd[i]))
146 			continue;
147 		ret = fn(pd[i], i, 3);
148 		if (ret == WALK_STOP)
149 			break;
150 		if (is_flag(pd[i], PTE_PS))
151 			continue;
152 		if (ret == WALK_NEXT)
153 			scan_l2(get_pa(pd[i]), fn);
154 	}
155 
156 	pmap_kremove(tester_ctx.levels[2], PAGE_SIZE);
157 	pmap_update(pmap_kernel());
158 }
159 
160 static void
scan_l4(paddr_t pa,walk_type (fn)(pd_entry_t pde,size_t slot,int lvl))161 scan_l4(paddr_t pa, walk_type (fn)(pd_entry_t pde, size_t slot, int lvl))
162 {
163 	pd_entry_t *pd = (pd_entry_t *)tester_ctx.levels[3];
164 	walk_type ret;
165 	size_t i;
166 
167 	pmap_kenter_pa(tester_ctx.levels[3], pa, VM_PROT_READ, 0);
168 	pmap_update(pmap_kernel());
169 
170 	for (i = 0; i < L4_MAX_NENTRIES; i++) {
171 		tester_ctx.coord.l4 = i;
172 		if (!is_valid(pd[i]))
173 			continue;
174 		ret = fn(pd[i], i, 4);
175 		if (ret == WALK_STOP)
176 			break;
177 		if (is_flag(pd[i], PTE_PS))
178 			continue;
179 		if (ret == WALK_NEXT)
180 			scan_l3(get_pa(pd[i]), fn);
181 	}
182 
183 	pmap_kremove(tester_ctx.levels[3], PAGE_SIZE);
184 	pmap_update(pmap_kernel());
185 }
186 
187 static void
scan_tree(paddr_t pa,walk_type (fn)(pd_entry_t pde,size_t slot,int lvl))188 scan_tree(paddr_t pa, walk_type (fn)(pd_entry_t pde, size_t slot, int lvl))
189 {
190 	scan_l4(pa, fn);
191 }
192 
193 /* -------------------------------------------------------------------------- */
194 
195 /*
196  * Rule: the number of kernel RWX pages should be zero.
197  */
198 static walk_type
count_krwx(pd_entry_t pde,size_t slot,int lvl)199 count_krwx(pd_entry_t pde, size_t slot, int lvl)
200 {
201 	if (lvl == NLEVEL && slot < 256) {
202 		return WALK_SKIP;
203 	}
204 	if (is_flag(pde, PTE_NX) || !is_flag(pde, PTE_W)) {
205 		return WALK_SKIP;
206 	}
207 	if (lvl != 1 && !is_flag(pde, PTE_PS)) {
208 		return WALK_NEXT;
209 	}
210 
211 	if (lvl == 4) {
212 		tester_ctx.results.n_rwx += (NBPD_L4 / PAGE_SIZE);
213 	} else if (lvl == 3) {
214 		tester_ctx.results.n_rwx += (NBPD_L3 / PAGE_SIZE);
215 	} else if (lvl == 2) {
216 		tester_ctx.results.n_rwx += (NBPD_L2 / PAGE_SIZE);
217 	} else if (lvl == 1) {
218 		tester_ctx.results.n_rwx += (NBPD_L1 / PAGE_SIZE);
219 	}
220 
221 	return WALK_NEXT;
222 }
223 
224 /*
225  * Rule: the number of kernel SHSTK pages should be zero.
226  */
227 static walk_type
count_kshstk(pd_entry_t pde,size_t slot,int lvl)228 count_kshstk(pd_entry_t pde, size_t slot, int lvl)
229 {
230 	if (lvl == NLEVEL && slot < 256) {
231 		return WALK_SKIP;
232 	}
233 
234 	if (is_flag(pde, PTE_PS) || lvl == 1) {
235 		if (!is_flag(pde, PTE_W) && is_flag(pde, PTE_D)) {
236 			if (lvl == 4) {
237 				tester_ctx.results.n_shstk += (NBPD_L4 / PAGE_SIZE);
238 			} else if (lvl == 3) {
239 				tester_ctx.results.n_shstk += (NBPD_L3 / PAGE_SIZE);
240 			} else if (lvl == 2) {
241 				tester_ctx.results.n_shstk += (NBPD_L2 / PAGE_SIZE);
242 			} else if (lvl == 1) {
243 				tester_ctx.results.n_shstk += (NBPD_L1 / PAGE_SIZE);
244 			}
245 		}
246 		return WALK_SKIP;
247 	}
248 
249 	if (!is_flag(pde, PTE_W)) {
250 		return WALK_SKIP;
251 	}
252 
253 	return WALK_NEXT;
254 }
255 
256 /*
257  * Rule: the lower half of the kernel map must be zero.
258  */
259 static walk_type
check_kernel_map(pd_entry_t pde,size_t slot,int lvl)260 check_kernel_map(pd_entry_t pde, size_t slot, int lvl)
261 {
262 	if (lvl != NLEVEL) {
263 		return WALK_STOP;
264 	}
265 	if (slot >= 256) {
266 		return WALK_SKIP;
267 	}
268 	if (pde != 0) {
269 		tester_ctx.results.kernel_map_with_low_ptes |= true;
270 	}
271 	return WALK_SKIP;
272 }
273 
274 /*
275  * Rule: the PTE space must not have user permissions.
276  */
277 static walk_type
check_pte_space(pd_entry_t pde,size_t slot,int lvl)278 check_pte_space(pd_entry_t pde, size_t slot, int lvl)
279 {
280 	if (lvl != NLEVEL) {
281 		return WALK_STOP;
282 	}
283 	if (slot != PDIR_SLOT_PTE) {
284 		return WALK_SKIP;
285 	}
286 	if (is_flag(pde, PTE_U)) {
287 		tester_ctx.results.pte_is_user_accessible |= true;
288 	}
289 	return WALK_SKIP;
290 }
291 
292 /*
293  * Rule: each page in the lower half must have user permissions.
294  */
295 static walk_type
check_user_space(pd_entry_t pde,size_t slot,int lvl)296 check_user_space(pd_entry_t pde, size_t slot, int lvl)
297 {
298 	if (lvl == NLEVEL && slot >= 256) {
299 		return WALK_SKIP;
300 	}
301 	if (!is_flag(pde, PTE_U)) {
302 		tester_ctx.results.n_user_space_is_kernel += 1;
303 		return WALK_SKIP;
304 	}
305 	return WALK_NEXT;
306 }
307 
308 /*
309  * Rule: each page in the higher half must have kernel permissions.
310  */
311 static walk_type
check_kernel_space(pd_entry_t pde,size_t slot,int lvl)312 check_kernel_space(pd_entry_t pde, size_t slot, int lvl)
313 {
314 	if (lvl == NLEVEL && slot < 256) {
315 		return WALK_SKIP;
316 	}
317 	if (lvl == NLEVEL && slot == PDIR_SLOT_PTE) {
318 		return WALK_SKIP;
319 	}
320 	if (is_flag(pde, PTE_U)) {
321 		tester_ctx.results.n_kernel_space_is_user += 1;
322 		return WALK_SKIP;
323 	}
324 	return WALK_NEXT;
325 }
326 
327 /*
328  * Rule: the SVS map is allowed to use the G bit only on the PCPU area.
329  */
330 static walk_type
check_svs_g_bit(pd_entry_t pde,size_t slot,int lvl)331 check_svs_g_bit(pd_entry_t pde, size_t slot, int lvl)
332 {
333 	if (lvl == NLEVEL && slot == PDIR_SLOT_PCPU) {
334 		return WALK_SKIP;
335 	}
336 	if (is_flag(pde, PTE_G)) {
337 		tester_ctx.results.n_svs_g_bit_set += 1;
338 		return WALK_SKIP;
339 	}
340 	return WALK_NEXT;
341 }
342 
343 /* -------------------------------------------------------------------------- */
344 
345 static void
scan_svs(void)346 scan_svs(void)
347 {
348 	extern bool svs_enabled;
349 	paddr_t pa0;
350 
351 	if (!svs_enabled) {
352 		tester_ctx.results.n_svs_g_bit_set = -1;
353 		return;
354 	}
355 
356 	kpreempt_disable();
357 	pa0 = curcpu()->ci_svs_updirpa;
358 	scan_tree(pa0, &check_user_space);
359 	scan_tree(pa0, &check_kernel_space);
360 	scan_tree(pa0, &check_svs_g_bit);
361 	kpreempt_enable();
362 }
363 
364 static void
scan_proc(struct proc * p)365 scan_proc(struct proc *p)
366 {
367 	struct pmap *pmap = p->p_vmspace->vm_map.pmap;
368 	paddr_t pa0;
369 
370 	mutex_enter(&pmap->pm_lock);
371 
372 	kpreempt_disable();
373 	pa0 = (paddr_t)pmap->pm_pdirpa[0];
374 	scan_tree(pa0, &check_user_space);
375 	scan_tree(pa0, &check_kernel_space);
376 	scan_tree(pa0, &check_pte_space);
377 	kpreempt_enable();
378 
379 	mutex_exit(&pmap->pm_lock);
380 }
381 
382 static void
x86_pte_run_scans(void)383 x86_pte_run_scans(void)
384 {
385 	struct pmap *kpm = pmap_kernel();
386 	paddr_t pa0;
387 
388 	memset(&tester_ctx.results, 0, sizeof(tester_ctx.results));
389 
390 	/* Scan the current user process. */
391 	scan_proc(curproc);
392 
393 	/* Scan the SVS mapping. */
394 	scan_svs();
395 
396 	/* Scan the kernel map. */
397 	pa0 = (paddr_t)kpm->pm_pdirpa[0];
398 	scan_tree(pa0, &count_krwx);
399 	scan_tree(pa0, &count_kshstk);
400 	scan_tree(pa0, &check_kernel_map);
401 }
402 
403 static void
x86_pte_levels_init(void)404 x86_pte_levels_init(void)
405 {
406 	size_t i;
407 	for (i = 0; i < NLEVEL; i++) {
408 		tester_ctx.levels[i] = uvm_km_alloc(kernel_map, PAGE_SIZE, 0,
409 		    UVM_KMF_VAONLY);
410 	}
411 }
412 
413 static void
x86_pte_levels_destroy(void)414 x86_pte_levels_destroy(void)
415 {
416 	size_t i;
417 	for (i = 0; i < NLEVEL; i++) {
418 		uvm_km_free(kernel_map, tester_ctx.levels[i], PAGE_SIZE,
419 		    UVM_KMF_VAONLY);
420 	}
421 }
422 
423 /* -------------------------------------------------------------------------- */
424 
425 static int
x86_pte_sysctl_run(SYSCTLFN_ARGS)426 x86_pte_sysctl_run(SYSCTLFN_ARGS)
427 {
428 	if (oldlenp == NULL)
429 		return EINVAL;
430 
431 	x86_pte_run_scans();
432 
433 	if (oldp == NULL) {
434 		*oldlenp = sizeof(tester_ctx.results);
435 		return 0;
436 	}
437 
438 	if (*oldlenp < sizeof(tester_ctx.results))
439 		return ENOMEM;
440 
441 	return copyout(&tester_ctx.results, oldp, sizeof(tester_ctx.results));
442 }
443 
444 static int
x86_pte_sysctl_init(void)445 x86_pte_sysctl_init(void)
446 {
447 	struct sysctllog **log = &tester_ctx.ctx_sysctllog;
448 	const struct sysctlnode *rnode, *cnode;
449 	int error;
450 
451 	error = sysctl_createv(log, 0, NULL, &rnode, CTLFLAG_PERMANENT,
452 	    CTLTYPE_NODE, "x86_pte_test",
453 	    SYSCTL_DESCR("x86_pte testing interface"),
454 	    NULL, 0, NULL, 0, CTL_KERN, CTL_CREATE, CTL_EOL);
455 	if (error)
456 		goto out;
457 
458 	error = sysctl_createv(log, 0, &rnode, &cnode, CTLFLAG_PERMANENT,
459 	    CTLTYPE_STRUCT, "test",
460 	    SYSCTL_DESCR("execute a x86_pte test"),
461 	    x86_pte_sysctl_run, 0, NULL, 0, CTL_CREATE, CTL_EOL);
462 
463 out:
464  	if (error)
465 		sysctl_teardown(log);
466 	return error;
467 }
468 
469 static void
x86_pte_sysctl_destroy(void)470 x86_pte_sysctl_destroy(void)
471 {
472 	sysctl_teardown(&tester_ctx.ctx_sysctllog);
473 }
474 
475 /* -------------------------------------------------------------------------- */
476 
477 MODULE(MODULE_CLASS_MISC, x86_pte_tester, NULL);
478 
479 static int
x86_pte_tester_modcmd(modcmd_t cmd,void * arg __unused)480 x86_pte_tester_modcmd(modcmd_t cmd, void *arg __unused)
481 {
482 	int error = 0;
483 
484 	switch (cmd) {
485 	case MODULE_CMD_INIT:
486 		x86_pte_levels_init();
487 		error = x86_pte_sysctl_init();
488 		break;
489 	case MODULE_CMD_FINI:
490 		x86_pte_sysctl_destroy();
491 		x86_pte_levels_destroy();
492 		break;
493 	default:
494 		error = ENOTTY;
495 		break;
496 	}
497 
498 	return error;
499 }
500