xref: /netbsd-src/sys/arch/amd64/stand/prekern/mm.c (revision 796c32c94f6e154afc9de0f63da35c91bb739b45)
1 /*	$NetBSD: mm.c,v 1.20 2017/11/26 14:29:48 maxv Exp $	*/
2 
3 /*
4  * Copyright (c) 2017 The NetBSD Foundation, Inc. All rights reserved.
5  *
6  * This code is derived from software contributed to The NetBSD Foundation
7  * by Maxime Villard.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
19  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
20  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
22  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28  * POSSIBILITY OF SUCH DAMAGE.
29  */
30 
31 #include "prekern.h"
32 
33 #define PAD_TEXT	0xCC
34 #define PAD_RODATA	0x00
35 #define PAD_DATA	0x00
36 
37 #define ELFROUND	64
38 
39 static const uint8_t pads[4] = {
40 	[BTSEG_NONE] = 0x00,
41 	[BTSEG_TEXT] = 0xCC,
42 	[BTSEG_RODATA] = 0x00,
43 	[BTSEG_DATA] = 0x00
44 };
45 
46 #define MM_PROT_READ	0x00
47 #define MM_PROT_WRITE	0x01
48 #define MM_PROT_EXECUTE	0x02
49 
50 static const pt_entry_t protection_codes[3] = {
51 	[MM_PROT_READ] = PG_RO | PG_NX,
52 	[MM_PROT_WRITE] = PG_RW | PG_NX,
53 	[MM_PROT_EXECUTE] = PG_RO,
54 	/* RWX does not exist */
55 };
56 
57 struct bootspace bootspace;
58 
59 extern paddr_t kernpa_start, kernpa_end;
60 vaddr_t iom_base;
61 
62 paddr_t pa_avail = 0;
63 static const vaddr_t tmpva = (PREKERNBASE + NKL2_KIMG_ENTRIES * NBPD_L2);
64 
65 void
66 mm_init(paddr_t first_pa)
67 {
68 	pa_avail = first_pa;
69 }
70 
71 static void
72 mm_enter_pa(paddr_t pa, vaddr_t va, pte_prot_t prot)
73 {
74 	if (PTE_BASE[pl1_i(va)] & PG_V) {
75 		fatal("mm_enter_pa: mapping already present");
76 	}
77 	PTE_BASE[pl1_i(va)] = pa | PG_V | protection_codes[prot];
78 }
79 
80 static void
81 mm_reenter_pa(paddr_t pa, vaddr_t va, pte_prot_t prot)
82 {
83 	PTE_BASE[pl1_i(va)] = pa | PG_V | protection_codes[prot];
84 }
85 
86 static void
87 mm_flush_va(vaddr_t va)
88 {
89 	asm volatile("invlpg (%0)" ::"r" (va) : "memory");
90 }
91 
92 static paddr_t
93 mm_palloc(size_t npages)
94 {
95 	paddr_t pa;
96 	size_t i;
97 
98 	/* Allocate the physical pages */
99 	pa = pa_avail;
100 	pa_avail += npages * PAGE_SIZE;
101 
102 	/* Zero them out */
103 	for (i = 0; i < npages; i++) {
104 		mm_reenter_pa(pa + i * PAGE_SIZE, tmpva,
105 		    MM_PROT_READ|MM_PROT_WRITE);
106 		mm_flush_va(tmpva);
107 		memset((void *)tmpva, 0, PAGE_SIZE);
108 	}
109 
110 	return pa;
111 }
112 
113 static bool
114 mm_pte_is_valid(pt_entry_t pte)
115 {
116 	return ((pte & PG_V) != 0);
117 }
118 
119 static void
120 mm_mprotect(vaddr_t startva, size_t size, pte_prot_t prot)
121 {
122 	size_t i, npages;
123 	vaddr_t va;
124 	paddr_t pa;
125 
126 	ASSERT(size % PAGE_SIZE == 0);
127 	npages = size / PAGE_SIZE;
128 
129 	for (i = 0; i < npages; i++) {
130 		va = startva + i * PAGE_SIZE;
131 		pa = (PTE_BASE[pl1_i(va)] & PG_FRAME);
132 		mm_reenter_pa(pa, va, prot);
133 		mm_flush_va(va);
134 	}
135 }
136 
137 void
138 mm_bootspace_mprotect(void)
139 {
140 	pte_prot_t prot;
141 	size_t i;
142 
143 	/* Remap the kernel segments with proper permissions. */
144 	for (i = 0; i < BTSPACE_NSEGS; i++) {
145 		if (bootspace.segs[i].type == BTSEG_TEXT) {
146 			prot = MM_PROT_READ|MM_PROT_EXECUTE;
147 		} else if (bootspace.segs[i].type == BTSEG_RODATA) {
148 			prot = MM_PROT_READ;
149 		} else {
150 			continue;
151 		}
152 		mm_mprotect(bootspace.segs[i].va, bootspace.segs[i].sz, prot);
153 	}
154 
155 	print_state(true, "Segments protection updated");
156 }
157 
158 static size_t
159 mm_nentries_range(vaddr_t startva, vaddr_t endva, size_t pgsz)
160 {
161 	size_t npages;
162 
163 	npages = roundup((endva / PAGE_SIZE), (pgsz / PAGE_SIZE)) -
164 	    rounddown((startva / PAGE_SIZE), (pgsz / PAGE_SIZE));
165 	return (npages / (pgsz / PAGE_SIZE));
166 }
167 
168 static void
169 mm_map_tree(vaddr_t startva, vaddr_t endva)
170 {
171 	size_t i, nL4e, nL3e, nL2e;
172 	size_t L4e_idx, L3e_idx, L2e_idx;
173 	paddr_t pa;
174 
175 	/* Build L4. */
176 	L4e_idx = pl4_i(startva);
177 	nL4e = mm_nentries_range(startva, endva, NBPD_L4);
178 	ASSERT(L4e_idx == 511);
179 	ASSERT(nL4e == 1);
180 	if (!mm_pte_is_valid(L4_BASE[L4e_idx])) {
181 		pa = mm_palloc(1);
182 		L4_BASE[L4e_idx] = pa | PG_V | PG_RW;
183 	}
184 
185 	/* Build L3. */
186 	L3e_idx = pl3_i(startva);
187 	nL3e = mm_nentries_range(startva, endva, NBPD_L3);
188 	for (i = 0; i < nL3e; i++) {
189 		if (mm_pte_is_valid(L3_BASE[L3e_idx+i])) {
190 			continue;
191 		}
192 		pa = mm_palloc(1);
193 		L3_BASE[L3e_idx+i] = pa | PG_V | PG_RW;
194 	}
195 
196 	/* Build L2. */
197 	L2e_idx = pl2_i(startva);
198 	nL2e = mm_nentries_range(startva, endva, NBPD_L2);
199 	for (i = 0; i < nL2e; i++) {
200 		if (mm_pte_is_valid(L2_BASE[L2e_idx+i])) {
201 			continue;
202 		}
203 		pa = mm_palloc(1);
204 		L2_BASE[L2e_idx+i] = pa | PG_V | PG_RW;
205 	}
206 }
207 
208 static vaddr_t
209 mm_randva_kregion(size_t size, size_t pagesz)
210 {
211 	vaddr_t sva, eva;
212 	vaddr_t randva;
213 	uint64_t rnd;
214 	size_t i;
215 	bool ok;
216 
217 	while (1) {
218 		prng_get_rand(&rnd, sizeof(rnd));
219 		randva = rounddown(KASLR_WINDOW_BASE +
220 		    rnd % (KASLR_WINDOW_SIZE - size), pagesz);
221 
222 		/* Detect collisions */
223 		ok = true;
224 		for (i = 0; i < BTSPACE_NSEGS; i++) {
225 			if (bootspace.segs[i].type == BTSEG_NONE) {
226 				continue;
227 			}
228 			sva = bootspace.segs[i].va;
229 			eva = sva + bootspace.segs[i].sz;
230 
231 			if ((sva <= randva) && (randva < eva)) {
232 				ok = false;
233 				break;
234 			}
235 			if ((sva < randva + size) && (randva + size <= eva)) {
236 				ok = false;
237 				break;
238 			}
239 			if (randva < sva && eva < (randva + size)) {
240 				ok = false;
241 				break;
242 			}
243 		}
244 		if (ok) {
245 			break;
246 		}
247 	}
248 
249 	mm_map_tree(randva, randva + size);
250 
251 	return randva;
252 }
253 
254 static paddr_t
255 bootspace_getend(void)
256 {
257 	paddr_t pa, max = 0;
258 	size_t i;
259 
260 	for (i = 0; i < BTSPACE_NSEGS; i++) {
261 		if (bootspace.segs[i].type == BTSEG_NONE) {
262 			continue;
263 		}
264 		pa = bootspace.segs[i].pa + bootspace.segs[i].sz;
265 		if (pa > max)
266 			max = pa;
267 	}
268 
269 	return max;
270 }
271 
272 static void
273 bootspace_addseg(int type, vaddr_t va, paddr_t pa, size_t sz)
274 {
275 	size_t i;
276 
277 	for (i = 0; i < BTSPACE_NSEGS; i++) {
278 		if (bootspace.segs[i].type == BTSEG_NONE) {
279 			bootspace.segs[i].type = type;
280 			bootspace.segs[i].va = va;
281 			bootspace.segs[i].pa = pa;
282 			bootspace.segs[i].sz = sz;
283 			return;
284 		}
285 	}
286 
287 	fatal("bootspace_addseg: segments full");
288 }
289 
290 static size_t
291 mm_shift_segment(vaddr_t va, size_t pagesz, size_t elfsz, size_t elfalign)
292 {
293 	size_t shiftsize, offset;
294 	uint64_t rnd;
295 
296 	if (elfalign == 0) {
297 		elfalign = ELFROUND;
298 	}
299 
300 	ASSERT(pagesz >= elfalign);
301 	ASSERT(pagesz % elfalign == 0);
302 	shiftsize = roundup(elfsz, pagesz) - roundup(elfsz, elfalign);
303 	if (shiftsize == 0) {
304 		return 0;
305 	}
306 
307 	prng_get_rand(&rnd, sizeof(rnd));
308 	offset = roundup(rnd % shiftsize, elfalign);
309 	ASSERT((va + offset) % elfalign == 0);
310 
311 	memmove((void *)(va + offset), (void *)va, elfsz);
312 
313 	return offset;
314 }
315 
316 static void
317 mm_map_head(void)
318 {
319 	size_t i, npages, size;
320 	uint64_t rnd;
321 	vaddr_t randva;
322 
323 	/*
324 	 * To get the size of the head, we give a look at the read-only
325 	 * mapping of the kernel we created in locore. We're identity mapped,
326 	 * so kernpa = kernva.
327 	 */
328 	size = elf_get_head_size((vaddr_t)kernpa_start);
329 	npages = size / PAGE_SIZE;
330 
331 	prng_get_rand(&rnd, sizeof(rnd));
332 	randva = rounddown(HEAD_WINDOW_BASE + rnd % (HEAD_WINDOW_SIZE - size),
333 	    PAGE_SIZE);
334 	mm_map_tree(randva, randva + size);
335 
336 	/* Enter the area and build the ELF info */
337 	for (i = 0; i < npages; i++) {
338 		mm_enter_pa(kernpa_start + i * PAGE_SIZE,
339 		    randva + i * PAGE_SIZE, MM_PROT_READ|MM_PROT_WRITE);
340 	}
341 	elf_build_head(randva);
342 
343 	/* Register the values in bootspace */
344 	bootspace.head.va = randva;
345 	bootspace.head.pa = kernpa_start;
346 	bootspace.head.sz = size;
347 }
348 
349 vaddr_t
350 mm_map_segment(int segtype, paddr_t pa, size_t elfsz, size_t elfalign)
351 {
352 	size_t i, npages, size, pagesz, offset;
353 	vaddr_t randva;
354 	char pad;
355 
356 	if (elfsz <= PAGE_SIZE) {
357 		pagesz = NBPD_L1;
358 	} else {
359 		pagesz = NBPD_L2;
360 	}
361 
362 	size = roundup(elfsz, pagesz);
363 	randva = mm_randva_kregion(size, pagesz);
364 
365 	npages = size / PAGE_SIZE;
366 	for (i = 0; i < npages; i++) {
367 		mm_enter_pa(pa + i * PAGE_SIZE,
368 		    randva + i * PAGE_SIZE, MM_PROT_READ|MM_PROT_WRITE);
369 	}
370 
371 	offset = mm_shift_segment(randva, pagesz, elfsz, elfalign);
372 	ASSERT(offset + elfsz <= size);
373 
374 	pad = pads[segtype];
375 	memset((void *)randva, pad, offset);
376 	memset((void *)(randva + offset + elfsz), pad, size - elfsz - offset);
377 
378 	bootspace_addseg(segtype, randva, pa, size);
379 
380 	return (randva + offset);
381 }
382 
383 static void
384 mm_map_boot(void)
385 {
386 	size_t i, npages, size;
387 	vaddr_t randva;
388 	paddr_t bootpa;
389 
390 	/*
391 	 * The "boot" region is special: its page tree has a fixed size, but
392 	 * the number of pages entered is lower.
393 	 */
394 
395 	/* Create the page tree */
396 	size = (NKL2_KIMG_ENTRIES + 1) * NBPD_L2;
397 	randva = mm_randva_kregion(size, PAGE_SIZE);
398 
399 	/* Enter the area and build the ELF info */
400 	bootpa = bootspace_getend();
401 	size = (pa_avail - bootpa);
402 	npages = size / PAGE_SIZE;
403 	for (i = 0; i < npages; i++) {
404 		mm_enter_pa(bootpa + i * PAGE_SIZE,
405 		    randva + i * PAGE_SIZE, MM_PROT_READ|MM_PROT_WRITE);
406 	}
407 	elf_build_boot(randva, bootpa);
408 
409 	/* Enter the ISA I/O MEM */
410 	iom_base = randva + npages * PAGE_SIZE;
411 	npages = IOM_SIZE / PAGE_SIZE;
412 	for (i = 0; i < npages; i++) {
413 		mm_enter_pa(IOM_BEGIN + i * PAGE_SIZE,
414 		    iom_base + i * PAGE_SIZE, MM_PROT_READ|MM_PROT_WRITE);
415 	}
416 
417 	/* Register the values in bootspace */
418 	bootspace.boot.va = randva;
419 	bootspace.boot.pa = bootpa;
420 	bootspace.boot.sz = (size_t)(iom_base + IOM_SIZE) -
421 	    (size_t)bootspace.boot.va;
422 
423 	/* Initialize the values that are located in the "boot" region */
424 	extern uint64_t PDPpaddr;
425 	bootspace.spareva = bootspace.boot.va + NKL2_KIMG_ENTRIES * NBPD_L2;
426 	bootspace.pdir = bootspace.boot.va + (PDPpaddr - bootspace.boot.pa);
427 	bootspace.emodule = bootspace.boot.va + NKL2_KIMG_ENTRIES * NBPD_L2;
428 }
429 
430 /*
431  * There is a variable number of independent regions: one head, several kernel
432  * segments, one boot. They are all mapped at random VAs.
433  *
434  * Head contains the ELF Header and ELF Section Headers, and we use them to
435  * map the rest of the regions. Head must be placed in memory *before* the
436  * other regions.
437  *
438  * At the end of this function, the bootspace structure is fully constructed.
439  */
440 void
441 mm_map_kernel(void)
442 {
443 	memset(&bootspace, 0, sizeof(bootspace));
444 	mm_map_head();
445 	print_state(true, "Head region mapped");
446 	elf_map_sections();
447 	print_state(true, "Segments mapped");
448 	mm_map_boot();
449 	print_state(true, "Boot region mapped");
450 }
451