1 /* $OpenBSD: exec_i386.c,v 1.38 2023/07/22 10:11:19 jsg Exp $ */
2
3 /*
4 * Copyright (c) 1997-1998 Michael Shalayeff
5 * Copyright (c) 1997 Tobias Weingartner
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 */
30
31 #include <sys/param.h>
32 #include <sys/disklabel.h>
33 #include <dev/cons.h>
34 #include <lib/libsa/loadfile.h>
35 #include <machine/biosvar.h>
36 #include <machine/pte.h>
37 #include <machine/specialreg.h>
38 #include <stand/boot/bootarg.h>
39
40 #include "cmd.h"
41 #include "disk.h"
42 #include "libsa.h"
43
44 #ifdef SOFTRAID
45 #include <dev/softraidvar.h>
46 #include <lib/libsa/softraid.h>
47 #include "softraid_amd64.h"
48 #endif
49
50 #define BOOT_DEBUG
51
52 #ifdef BOOT_DEBUG
53 #define DPRINTF(x...) do { printf(x); } while(0)
54 #else
55 #define DPRINTF(x...)
56 #endif /* BOOT_DEBUG */
57
58 #define LEGACY_KERNEL_ENTRY_POINT 0xffffffff81001000ULL
59
60 typedef void (*startfuncp)(int, int, int, int, int, int, int, int)
61 __attribute__ ((noreturn));
62
63 extern void launch_amd64_kernel_long(caddr_t, caddr_t, caddr_t, uint64_t, int,
64 int, int, uint64_t, int, int, int, uint64_t);
65
66 caddr_t boot_alloc(void);
67 caddr_t make_kernel_page_tables(uint64_t);
68
69 void ucode_load(void);
70 extern struct cmd_state cmd;
71
72 char *bootmac = NULL;
73 extern char end[], _start[];
74
75 caddr_t pt_base_addr;
76
77 #define LONG_KERN_PML4_ADDR1 0x1000
78 #define LONG_KERN_PML4_ADDR2 (((uint64_t)(end) + PAGE_MASK) & ~PAGE_MASK)
79
80 /*
81 * N.B. - The following must stay in sync with pmap.h (including that here
82 * causes compile errors related to RBT_HEAD.
83 */
84 #define NKL2_KIMG_ENTRIES 64
85 #define NPDPG 512
86
87 void
run_loadfile(uint64_t * marks,int howto)88 run_loadfile(uint64_t *marks, int howto)
89 {
90 uint64_t entry;
91 dev_t bootdev = bootdev_dip->bootdev;
92 size_t ac = BOOTARG_LEN;
93 caddr_t av = (caddr_t)BOOTARG_OFF;
94 bios_consdev_t cd;
95 extern int com_speed; /* from bioscons.c */
96 extern int com_addr;
97 bios_ddb_t ddb;
98 extern int db_console;
99 bios_bootduid_t bootduid;
100 caddr_t pml4, stack, new_av;
101 #ifdef SOFTRAID
102 bios_bootsr_t bootsr;
103 struct sr_boot_volume *bv;
104 #endif /* SOFTRAID */
105 if (sa_cleanup != NULL)
106 (*sa_cleanup)();
107
108 memset(&cd, 0, sizeof(cd));
109 cd.consdev = cn_tab->cn_dev;
110 cd.conspeed = com_speed;
111 cd.consaddr = com_addr;
112 addbootarg(BOOTARG_CONSDEV, sizeof(cd), &cd);
113
114 if (bootmac != NULL)
115 addbootarg(BOOTARG_BOOTMAC, sizeof(bios_bootmac_t), bootmac);
116
117 if (db_console != -1) {
118 ddb.db_console = db_console;
119 addbootarg(BOOTARG_DDB, sizeof(ddb), &ddb);
120 }
121
122 bcopy(bootdev_dip->disklabel.d_uid, &bootduid.duid, sizeof(bootduid));
123 addbootarg(BOOTARG_BOOTDUID, sizeof(bootduid), &bootduid);
124
125 ucode_load();
126
127 #ifdef SOFTRAID
128 if (bootdev_dip->sr_vol != NULL) {
129 bv = bootdev_dip->sr_vol;
130 bzero(&bootsr, sizeof(bootsr));
131 bcopy(&bv->sbv_uuid, &bootsr.uuid, sizeof(bootsr.uuid));
132 if (bv->sbv_maskkey != NULL)
133 bcopy(bv->sbv_maskkey, &bootsr.maskkey,
134 sizeof(bootsr.maskkey));
135 addbootarg(BOOTARG_BOOTSR, sizeof(bios_bootsr_t), &bootsr);
136 explicit_bzero(&bootsr, sizeof(bootsr));
137 }
138
139 sr_clear_keys();
140 #endif /* SOFTRAID */
141
142 entry = marks[MARK_ENTRY];
143
144 printf("entry point at 0x%llx\n", entry);
145
146 pt_base_addr = (caddr_t)LONG_KERN_PML4_ADDR1;
147
148 /* Pass memory map to the kernel */
149 mem_pass();
150
151 makebootargs(av, &ac);
152
153 /*
154 * Legacy kernels have entry set to 0xffffffff81001000.
155 * Other entry values indicate kernels that have random
156 * base VA and launch in 64 bit (long) mode.
157 */
158 if (entry == LEGACY_KERNEL_ENTRY_POINT) {
159 /*
160 * Legacy boot code expects entry 0x1001000, so mask
161 * off the high bits.
162 */
163 entry &= 0xFFFFFFF;
164
165 /*
166 * Launch a legacy kernel
167 */
168 (*(startfuncp)entry)(howto, bootdev, BOOTARG_APIVER,
169 marks[MARK_END] & 0xfffffff, extmem, cnvmem, ac, (int)av);
170 /* not reached */
171 }
172
173 /*
174 * Launch a long mode/randomly linked (post-6.5) kernel?
175 */
176 new_av = boot_alloc(); /* Replaces old heap */
177 memcpy((void *)new_av, av, ac);
178
179 /* Stack grows down, so grab two pages. We'll waste the 2nd */
180 stack = boot_alloc();
181 stack = boot_alloc();
182
183 pml4 = make_kernel_page_tables(entry);
184 launch_amd64_kernel_long((void *)launch_amd64_kernel_long,
185 pml4, stack, entry, howto, bootdev, BOOTARG_APIVER,
186 marks[MARK_END], extmem, cnvmem, ac, (uint64_t)new_av);
187 /* not reached */
188 }
189
190 void
ucode_load(void)191 ucode_load(void)
192 {
193 uint32_t model, family, stepping;
194 uint32_t dummy, signature;
195 uint32_t vendor[4];
196 bios_ucode_t uc;
197 struct stat sb;
198 char path[128];
199 size_t buflen;
200 char *buf;
201 int fd;
202
203 CPUID(0, dummy, vendor[0], vendor[2], vendor[1]);
204 vendor[3] = 0; /* NULL-terminate */
205 if (strcmp((char *)vendor, "GenuineIntel") != 0 &&
206 strcmp((char *)vendor, "AuthenticAMD") != 0)
207 return;
208
209 CPUID(1, signature, dummy, dummy, dummy);
210 family = (signature >> 8) & 0x0f;
211 model = (signature >> 4) & 0x0f;
212 if (family == 0x6 || family == 0xf) {
213 family += (signature >> 20) & 0xff;
214 model += ((signature >> 16) & 0x0f) << 4;
215 }
216 stepping = (signature >> 0) & 0x0f;
217
218 if (strcmp((char *)vendor, "GenuineIntel") == 0) {
219 snprintf(path, sizeof(path),
220 "%s:/etc/firmware/intel/%02x-%02x-%02x",
221 cmd.bootdev, family, model, stepping);
222 } else if (strcmp((char *)vendor, "AuthenticAMD") == 0) {
223 if (family < 0x10)
224 return;
225 else if (family <= 0x14)
226 snprintf(path, sizeof(path),
227 "%s:/etc/firmware/amd/microcode_amd.bin",
228 cmd.bootdev);
229 else
230 snprintf(path, sizeof(path),
231 "%s:/etc/firmware/amd/microcode_amd_fam%02xh.bin",
232 cmd.bootdev, family);
233 }
234
235 fd = open(path, O_RDONLY);
236 if (fd == -1)
237 return;
238
239 if (fstat(fd, &sb) == -1)
240 return;
241
242 buflen = sb.st_size;
243 if (buflen > 256*1024) {
244 printf("ucode too large\n");
245 return;
246 }
247
248 buf = (char *)(1*1024*1024);
249
250 if (read(fd, buf, buflen) != buflen) {
251 close(fd);
252 return;
253 }
254
255 uc.uc_addr = (uint64_t)buf;
256 uc.uc_size = (uint64_t)buflen;
257 addbootarg(BOOTARG_UCODE, sizeof(uc), &uc);
258
259 close(fd);
260 }
261
262 /*
263 * boot_alloc
264 *
265 * Special allocator for page table pages and kernel stack
266 *
267 * Allocates 1 page (PAGE_SIZE) of data.
268 *
269 * We have 2 regions available to us:
270 * 0x1000 ... 0xF000 : range 1 (stack is at 0xF000)
271 * end ... 0xA0000 (640KB) : range 2
272 *
273 * We allocate from range 1 until it is complete, then skip to range 2. If
274 * range 2 is exhausted, we panic.
275 *
276 * Return value:
277 * VA of requested allocation
278 */
279 caddr_t
boot_alloc(void)280 boot_alloc(void)
281 {
282 caddr_t ret;
283 static caddr_t cur = 0;
284 static int skipped = 0;
285
286 /* First time? */
287 if (cur == 0)
288 cur = (caddr_t)pt_base_addr;
289
290 ret = cur;
291
292 if (((uint64_t)cur + PAGE_SIZE >= 0xF000) && !skipped) {
293 cur = (caddr_t)LONG_KERN_PML4_ADDR2;
294 skipped = 1;
295 } else
296 cur += PAGE_SIZE;
297
298 if ((uint64_t)cur >= 640 * 1024)
299 panic("out of memory");
300
301 return ret;
302 }
303
304 /*
305 * make_kernel_page_tables
306 *
307 * Sets up a minimal set of page tables for early use in the kernel. In
308 * pre_init_x86_64, the kernel will rebuild its page tables, so the
309 * table constructed here only needs the minimal mapping.
310 *
311 * [entry ... end] => PA 0x1000000 (16MB, the current phys loadaddr)
312 *
313 * In BIOS boot mode, this function overwrites the heap with the long
314 * mode kernel bootstrap page tables and thus must be called immediately
315 * before switching to long mode and starting the kernel.
316 *
317 * Parameters:
318 * entry_lo: the low byte (masked) of the kernel entry point
319 *
320 * Return value:
321 * PML4 PA of the new table
322 */
323 caddr_t
make_kernel_page_tables(uint64_t entry)324 make_kernel_page_tables(uint64_t entry)
325 {
326 uint64_t *pml4, *pml3, *pml2, *pml1;
327 int i, j, k, kern_pml4, kern_pml3, kern_pml2, kern_pml1;
328
329 kern_pml4 = (entry & L4_MASK) >> L4_SHIFT;
330 kern_pml3 = (entry & L3_MASK) >> L3_SHIFT;
331 kern_pml2 = (entry & L2_MASK) >> L2_SHIFT;
332 kern_pml1 = (entry & L1_MASK) >> L1_SHIFT;
333
334 pml4 = (uint64_t *)boot_alloc();
335
336 /* Map kernel */
337 pml3 = (uint64_t *)boot_alloc();
338 pml4[kern_pml4] = (uint64_t)pml3 | PG_V | PG_RW;
339
340 pml2 = (uint64_t *)boot_alloc();
341 pml3[kern_pml3] = (uint64_t)pml2 | PG_V | PG_RW;
342
343 for (i = 0; i < NKL2_KIMG_ENTRIES; i++) {
344 pml1 = (uint64_t *)boot_alloc();
345 pml2[i + kern_pml2] = (uint64_t)pml1 | PG_V | PG_RW;
346
347 /* The first page of PTEs may start at a different offset */
348 if (i == kern_pml2)
349 k = kern_pml1;
350 else
351 k = 0;
352
353 /*
354 * Map [k...511] PTEs.
355 */
356 for (j = k; j < NPDPG; j++)
357 pml1[j] = (uint64_t)(((8 + i) * NBPD_L2) +
358 (j - kern_pml1) * PAGE_SIZE) | PG_V | PG_RW;
359 }
360
361 /* Map first 4GB phys for kernel page table, stack, and bootstrap */
362 pml3 = (uint64_t *)boot_alloc();
363 pml4[0] = (uint64_t)pml3 | PG_V | PG_RW; /* Covers 0-512GB */
364
365 pml2 = (uint64_t *)boot_alloc();
366 pml3[0] = (uint64_t)pml2 | PG_V | PG_RW; /* Covers 0-1GB */
367
368 for (i = 0; i < NPDPG; i++)
369 pml2[i] = (i << L2_SHIFT) | PG_V | PG_RW | PG_PS;
370
371 return (caddr_t)pml4;
372 }
373