1*126dac3cSjsg /* $OpenBSD: exec_i386.c,v 1.38 2023/07/22 10:11:19 jsg Exp $ */
2a47f7207Smickey
3a47f7207Smickey /*
4a47f7207Smickey * Copyright (c) 1997-1998 Michael Shalayeff
5a47f7207Smickey * Copyright (c) 1997 Tobias Weingartner
6a47f7207Smickey * All rights reserved.
7a47f7207Smickey *
8a47f7207Smickey * Redistribution and use in source and binary forms, with or without
9a47f7207Smickey * modification, are permitted provided that the following conditions
10a47f7207Smickey * are met:
11a47f7207Smickey * 1. Redistributions of source code must retain the above copyright
12a47f7207Smickey * notice, this list of conditions and the following disclaimer.
13a47f7207Smickey * 2. Redistributions in binary form must reproduce the above copyright
14a47f7207Smickey * notice, this list of conditions and the following disclaimer in the
15a47f7207Smickey * documentation and/or other materials provided with the distribution.
16a47f7207Smickey *
17a47f7207Smickey * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR
18a47f7207Smickey * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19a47f7207Smickey * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20a47f7207Smickey * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21a47f7207Smickey * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22a47f7207Smickey * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23a47f7207Smickey * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24a47f7207Smickey * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25a47f7207Smickey * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26a47f7207Smickey * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27a47f7207Smickey * SUCH DAMAGE.
28a47f7207Smickey *
29a47f7207Smickey */
30a47f7207Smickey
31a47f7207Smickey #include <sys/param.h>
32a47f7207Smickey #include <sys/disklabel.h>
3392befcddSjsing #include <dev/cons.h>
3492befcddSjsing #include <lib/libsa/loadfile.h>
3592befcddSjsing #include <machine/biosvar.h>
36fcd2aafaSmlarkin #include <machine/pte.h>
37b4ccf14aSpatrick #include <machine/specialreg.h>
3892befcddSjsing #include <stand/boot/bootarg.h>
3992befcddSjsing
40b4ccf14aSpatrick #include "cmd.h"
41a47f7207Smickey #include "disk.h"
42a47f7207Smickey #include "libsa.h"
43a47f7207Smickey
4492befcddSjsing #ifdef SOFTRAID
4570197c51Sjsing #include <dev/softraidvar.h>
46e876def9Sjsing #include <lib/libsa/softraid.h>
4765f4a3c7Sjsing #include "softraid_amd64.h"
4870197c51Sjsing #endif
4970197c51Sjsing
50fcd2aafaSmlarkin #define BOOT_DEBUG
51fcd2aafaSmlarkin
52fcd2aafaSmlarkin #ifdef BOOT_DEBUG
53fcd2aafaSmlarkin #define DPRINTF(x...) do { printf(x); } while(0)
54fcd2aafaSmlarkin #else
55fcd2aafaSmlarkin #define DPRINTF(x...)
56fcd2aafaSmlarkin #endif /* BOOT_DEBUG */
57fcd2aafaSmlarkin
58fcd2aafaSmlarkin #define LEGACY_KERNEL_ENTRY_POINT 0xffffffff81001000ULL
59fcd2aafaSmlarkin
606483bf47Sderaadt typedef void (*startfuncp)(int, int, int, int, int, int, int, int)
61a47f7207Smickey __attribute__ ((noreturn));
62a47f7207Smickey
63fcd2aafaSmlarkin extern void launch_amd64_kernel_long(caddr_t, caddr_t, caddr_t, uint64_t, int,
64fcd2aafaSmlarkin int, int, uint64_t, int, int, int, uint64_t);
65fcd2aafaSmlarkin
66fcd2aafaSmlarkin caddr_t boot_alloc(void);
67fcd2aafaSmlarkin caddr_t make_kernel_page_tables(uint64_t);
68fcd2aafaSmlarkin
69b4ccf14aSpatrick void ucode_load(void);
70b4ccf14aSpatrick extern struct cmd_state cmd;
71b4ccf14aSpatrick
72c7471d74Stom char *bootmac = NULL;
73fcd2aafaSmlarkin extern char end[], _start[];
74fcd2aafaSmlarkin
75fcd2aafaSmlarkin caddr_t pt_base_addr;
76fcd2aafaSmlarkin
77fcd2aafaSmlarkin #define LONG_KERN_PML4_ADDR1 0x1000
78fcd2aafaSmlarkin #define LONG_KERN_PML4_ADDR2 (((uint64_t)(end) + PAGE_MASK) & ~PAGE_MASK)
79fcd2aafaSmlarkin
80fcd2aafaSmlarkin /*
81fcd2aafaSmlarkin * N.B. - The following must stay in sync with pmap.h (including that here
82fcd2aafaSmlarkin * causes compile errors related to RBT_HEAD.
83fcd2aafaSmlarkin */
84fcd2aafaSmlarkin #define NKL2_KIMG_ENTRIES 64
85731ae6f3Smlarkin #define NPDPG 512
86c7471d74Stom
87a47f7207Smickey void
run_loadfile(uint64_t * marks,int howto)882340cfa5Sderaadt run_loadfile(uint64_t *marks, int howto)
89a47f7207Smickey {
90fcd2aafaSmlarkin uint64_t entry;
91a47f7207Smickey dev_t bootdev = bootdev_dip->bootdev;
92a47f7207Smickey size_t ac = BOOTARG_LEN;
93a47f7207Smickey caddr_t av = (caddr_t)BOOTARG_OFF;
9466d55b09Skettenis bios_consdev_t cd;
95a47f7207Smickey extern int com_speed; /* from bioscons.c */
96a1e303c3Skettenis extern int com_addr;
97c350d4dcSdlg bios_ddb_t ddb;
98c350d4dcSdlg extern int db_console;
9958db950fSjsing bios_bootduid_t bootduid;
100fcd2aafaSmlarkin caddr_t pml4, stack, new_av;
10192befcddSjsing #ifdef SOFTRAID
10270197c51Sjsing bios_bootsr_t bootsr;
10370197c51Sjsing struct sr_boot_volume *bv;
104fcd2aafaSmlarkin #endif /* SOFTRAID */
1058641b11fStom if (sa_cleanup != NULL)
1068641b11fStom (*sa_cleanup)();
1078641b11fStom
10866d55b09Skettenis memset(&cd, 0, sizeof(cd));
109a47f7207Smickey cd.consdev = cn_tab->cn_dev;
110a47f7207Smickey cd.conspeed = com_speed;
111a1e303c3Skettenis cd.consaddr = com_addr;
112a47f7207Smickey addbootarg(BOOTARG_CONSDEV, sizeof(cd), &cd);
113a47f7207Smickey
114c7471d74Stom if (bootmac != NULL)
115c7471d74Stom addbootarg(BOOTARG_BOOTMAC, sizeof(bios_bootmac_t), bootmac);
116c7471d74Stom
117c350d4dcSdlg if (db_console != -1) {
118c350d4dcSdlg ddb.db_console = db_console;
119c350d4dcSdlg addbootarg(BOOTARG_DDB, sizeof(ddb), &ddb);
120c350d4dcSdlg }
121c350d4dcSdlg
12258db950fSjsing bcopy(bootdev_dip->disklabel.d_uid, &bootduid.duid, sizeof(bootduid));
12358db950fSjsing addbootarg(BOOTARG_BOOTDUID, sizeof(bootduid), &bootduid);
124331b9dcdSjsing
125b4ccf14aSpatrick ucode_load();
126b4ccf14aSpatrick
12792befcddSjsing #ifdef SOFTRAID
12870197c51Sjsing if (bootdev_dip->sr_vol != NULL) {
12970197c51Sjsing bv = bootdev_dip->sr_vol;
13070197c51Sjsing bzero(&bootsr, sizeof(bootsr));
13170197c51Sjsing bcopy(&bv->sbv_uuid, &bootsr.uuid, sizeof(bootsr.uuid));
13270197c51Sjsing if (bv->sbv_maskkey != NULL)
13370197c51Sjsing bcopy(bv->sbv_maskkey, &bootsr.maskkey,
13470197c51Sjsing sizeof(bootsr.maskkey));
13570197c51Sjsing addbootarg(BOOTARG_BOOTSR, sizeof(bios_bootsr_t), &bootsr);
13670197c51Sjsing explicit_bzero(&bootsr, sizeof(bootsr));
13770197c51Sjsing }
13870197c51Sjsing
13970197c51Sjsing sr_clear_keys();
140fcd2aafaSmlarkin #endif /* SOFTRAID */
14170197c51Sjsing
142fcd2aafaSmlarkin entry = marks[MARK_ENTRY];
1433da24ca2Syasuoka
144fcd2aafaSmlarkin printf("entry point at 0x%llx\n", entry);
145fcd2aafaSmlarkin
146fcd2aafaSmlarkin pt_base_addr = (caddr_t)LONG_KERN_PML4_ADDR1;
1473da24ca2Syasuoka
148a47f7207Smickey /* Pass memory map to the kernel */
149a47f7207Smickey mem_pass();
150a47f7207Smickey
151fce71ba5Syasuoka makebootargs(av, &ac);
152a47f7207Smickey
153fcd2aafaSmlarkin /*
154fcd2aafaSmlarkin * Legacy kernels have entry set to 0xffffffff81001000.
155fcd2aafaSmlarkin * Other entry values indicate kernels that have random
156fcd2aafaSmlarkin * base VA and launch in 64 bit (long) mode.
157fcd2aafaSmlarkin */
158731ae6f3Smlarkin if (entry == LEGACY_KERNEL_ENTRY_POINT) {
159fcd2aafaSmlarkin /*
160fcd2aafaSmlarkin * Legacy boot code expects entry 0x1001000, so mask
161fcd2aafaSmlarkin * off the high bits.
162fcd2aafaSmlarkin */
163fcd2aafaSmlarkin entry &= 0xFFFFFFF;
164731ae6f3Smlarkin
165731ae6f3Smlarkin /*
166731ae6f3Smlarkin * Launch a legacy kernel
167731ae6f3Smlarkin */
168731ae6f3Smlarkin (*(startfuncp)entry)(howto, bootdev, BOOTARG_APIVER,
169731ae6f3Smlarkin marks[MARK_END] & 0xfffffff, extmem, cnvmem, ac, (int)av);
170731ae6f3Smlarkin /* not reached */
171731ae6f3Smlarkin }
17243d589dfSmlarkin
173fcd2aafaSmlarkin /*
174fcd2aafaSmlarkin * Launch a long mode/randomly linked (post-6.5) kernel?
175fcd2aafaSmlarkin */
176fcd2aafaSmlarkin new_av = boot_alloc(); /* Replaces old heap */
177fcd2aafaSmlarkin memcpy((void *)new_av, av, ac);
178fcd2aafaSmlarkin
179fcd2aafaSmlarkin /* Stack grows down, so grab two pages. We'll waste the 2nd */
180fcd2aafaSmlarkin stack = boot_alloc();
181fcd2aafaSmlarkin stack = boot_alloc();
182fcd2aafaSmlarkin
183731ae6f3Smlarkin pml4 = make_kernel_page_tables(entry);
184fcd2aafaSmlarkin launch_amd64_kernel_long((void *)launch_amd64_kernel_long,
185731ae6f3Smlarkin pml4, stack, entry, howto, bootdev, BOOTARG_APIVER,
186731ae6f3Smlarkin marks[MARK_END], extmem, cnvmem, ac, (uint64_t)new_av);
1870d1fcfa7Syasuoka /* not reached */
188a47f7207Smickey }
189b4ccf14aSpatrick
190b4ccf14aSpatrick void
ucode_load(void)191b4ccf14aSpatrick ucode_load(void)
192b4ccf14aSpatrick {
193b4ccf14aSpatrick uint32_t model, family, stepping;
194b4ccf14aSpatrick uint32_t dummy, signature;
195b4ccf14aSpatrick uint32_t vendor[4];
196b4ccf14aSpatrick bios_ucode_t uc;
197b4ccf14aSpatrick struct stat sb;
198b4ccf14aSpatrick char path[128];
199b4ccf14aSpatrick size_t buflen;
200b4ccf14aSpatrick char *buf;
201b4ccf14aSpatrick int fd;
202b4ccf14aSpatrick
203b4ccf14aSpatrick CPUID(0, dummy, vendor[0], vendor[2], vendor[1]);
204b4ccf14aSpatrick vendor[3] = 0; /* NULL-terminate */
205*126dac3cSjsg if (strcmp((char *)vendor, "GenuineIntel") != 0 &&
206*126dac3cSjsg strcmp((char *)vendor, "AuthenticAMD") != 0)
207b4ccf14aSpatrick return;
208b4ccf14aSpatrick
209b4ccf14aSpatrick CPUID(1, signature, dummy, dummy, dummy);
210b4ccf14aSpatrick family = (signature >> 8) & 0x0f;
211b4ccf14aSpatrick model = (signature >> 4) & 0x0f;
212b4ccf14aSpatrick if (family == 0x6 || family == 0xf) {
213b4ccf14aSpatrick family += (signature >> 20) & 0xff;
214b4ccf14aSpatrick model += ((signature >> 16) & 0x0f) << 4;
215b4ccf14aSpatrick }
216b4ccf14aSpatrick stepping = (signature >> 0) & 0x0f;
217b4ccf14aSpatrick
218*126dac3cSjsg if (strcmp((char *)vendor, "GenuineIntel") == 0) {
219*126dac3cSjsg snprintf(path, sizeof(path),
220*126dac3cSjsg "%s:/etc/firmware/intel/%02x-%02x-%02x",
221b4ccf14aSpatrick cmd.bootdev, family, model, stepping);
222*126dac3cSjsg } else if (strcmp((char *)vendor, "AuthenticAMD") == 0) {
223*126dac3cSjsg if (family < 0x10)
224*126dac3cSjsg return;
225*126dac3cSjsg else if (family <= 0x14)
226*126dac3cSjsg snprintf(path, sizeof(path),
227*126dac3cSjsg "%s:/etc/firmware/amd/microcode_amd.bin",
228*126dac3cSjsg cmd.bootdev);
229*126dac3cSjsg else
230*126dac3cSjsg snprintf(path, sizeof(path),
231*126dac3cSjsg "%s:/etc/firmware/amd/microcode_amd_fam%02xh.bin",
232*126dac3cSjsg cmd.bootdev, family);
233*126dac3cSjsg }
234b4ccf14aSpatrick
235d6da11dcSderaadt fd = open(path, O_RDONLY);
236b4ccf14aSpatrick if (fd == -1)
237b4ccf14aSpatrick return;
238b4ccf14aSpatrick
239b4ccf14aSpatrick if (fstat(fd, &sb) == -1)
240b4ccf14aSpatrick return;
241b4ccf14aSpatrick
242b4ccf14aSpatrick buflen = sb.st_size;
243173af1c8Sjsg if (buflen > 256*1024) {
2445f80233cSderaadt printf("ucode too large\n");
245b4ccf14aSpatrick return;
2465f80233cSderaadt }
2475f80233cSderaadt
2485f80233cSderaadt buf = (char *)(1*1024*1024);
249b4ccf14aSpatrick
250b4ccf14aSpatrick if (read(fd, buf, buflen) != buflen) {
251d60df8a9Sjsing close(fd);
252b4ccf14aSpatrick return;
253b4ccf14aSpatrick }
254b4ccf14aSpatrick
255b4ccf14aSpatrick uc.uc_addr = (uint64_t)buf;
256b4ccf14aSpatrick uc.uc_size = (uint64_t)buflen;
257b4ccf14aSpatrick addbootarg(BOOTARG_UCODE, sizeof(uc), &uc);
258d60df8a9Sjsing
259d60df8a9Sjsing close(fd);
260b4ccf14aSpatrick }
261fcd2aafaSmlarkin
262fcd2aafaSmlarkin /*
263fcd2aafaSmlarkin * boot_alloc
264fcd2aafaSmlarkin *
265fcd2aafaSmlarkin * Special allocator for page table pages and kernel stack
266fcd2aafaSmlarkin *
267fcd2aafaSmlarkin * Allocates 1 page (PAGE_SIZE) of data.
268fcd2aafaSmlarkin *
269fcd2aafaSmlarkin * We have 2 regions available to us:
270fcd2aafaSmlarkin * 0x1000 ... 0xF000 : range 1 (stack is at 0xF000)
271fcd2aafaSmlarkin * end ... 0xA0000 (640KB) : range 2
272fcd2aafaSmlarkin *
273fcd2aafaSmlarkin * We allocate from range 1 until it is complete, then skip to range 2. If
274fcd2aafaSmlarkin * range 2 is exhausted, we panic.
275fcd2aafaSmlarkin *
276fcd2aafaSmlarkin * Return value:
277fcd2aafaSmlarkin * VA of requested allocation
278fcd2aafaSmlarkin */
279fcd2aafaSmlarkin caddr_t
boot_alloc(void)280fcd2aafaSmlarkin boot_alloc(void)
281fcd2aafaSmlarkin {
282fcd2aafaSmlarkin caddr_t ret;
283fcd2aafaSmlarkin static caddr_t cur = 0;
284fcd2aafaSmlarkin static int skipped = 0;
285fcd2aafaSmlarkin
286fcd2aafaSmlarkin /* First time? */
287fcd2aafaSmlarkin if (cur == 0)
288fcd2aafaSmlarkin cur = (caddr_t)pt_base_addr;
289fcd2aafaSmlarkin
290fcd2aafaSmlarkin ret = cur;
291fcd2aafaSmlarkin
292fcd2aafaSmlarkin if (((uint64_t)cur + PAGE_SIZE >= 0xF000) && !skipped) {
293fcd2aafaSmlarkin cur = (caddr_t)LONG_KERN_PML4_ADDR2;
294fcd2aafaSmlarkin skipped = 1;
295fcd2aafaSmlarkin } else
296fcd2aafaSmlarkin cur += PAGE_SIZE;
297fcd2aafaSmlarkin
298fcd2aafaSmlarkin if ((uint64_t)cur >= 640 * 1024)
299fcd2aafaSmlarkin panic("out of memory");
300fcd2aafaSmlarkin
301fcd2aafaSmlarkin return ret;
302fcd2aafaSmlarkin }
303fcd2aafaSmlarkin
304fcd2aafaSmlarkin /*
305fcd2aafaSmlarkin * make_kernel_page_tables
306fcd2aafaSmlarkin *
307fcd2aafaSmlarkin * Sets up a minimal set of page tables for early use in the kernel. In
308fcd2aafaSmlarkin * pre_init_x86_64, the kernel will rebuild its page tables, so the
309fcd2aafaSmlarkin * table constructed here only needs the minimal mapping.
310fcd2aafaSmlarkin *
311fcd2aafaSmlarkin * [entry ... end] => PA 0x1000000 (16MB, the current phys loadaddr)
312fcd2aafaSmlarkin *
313fcd2aafaSmlarkin * In BIOS boot mode, this function overwrites the heap with the long
31436fd90dcSjsg * mode kernel bootstrap page tables and thus must be called immediately
315fcd2aafaSmlarkin * before switching to long mode and starting the kernel.
316fcd2aafaSmlarkin *
317fcd2aafaSmlarkin * Parameters:
318fcd2aafaSmlarkin * entry_lo: the low byte (masked) of the kernel entry point
319fcd2aafaSmlarkin *
320fcd2aafaSmlarkin * Return value:
321fcd2aafaSmlarkin * PML4 PA of the new table
322fcd2aafaSmlarkin */
323fcd2aafaSmlarkin caddr_t
make_kernel_page_tables(uint64_t entry)324fcd2aafaSmlarkin make_kernel_page_tables(uint64_t entry)
325fcd2aafaSmlarkin {
326fcd2aafaSmlarkin uint64_t *pml4, *pml3, *pml2, *pml1;
327fcd2aafaSmlarkin int i, j, k, kern_pml4, kern_pml3, kern_pml2, kern_pml1;
328fcd2aafaSmlarkin
329fcd2aafaSmlarkin kern_pml4 = (entry & L4_MASK) >> L4_SHIFT;
330fcd2aafaSmlarkin kern_pml3 = (entry & L3_MASK) >> L3_SHIFT;
331fcd2aafaSmlarkin kern_pml2 = (entry & L2_MASK) >> L2_SHIFT;
332fcd2aafaSmlarkin kern_pml1 = (entry & L1_MASK) >> L1_SHIFT;
333fcd2aafaSmlarkin
334fcd2aafaSmlarkin pml4 = (uint64_t *)boot_alloc();
335fcd2aafaSmlarkin
336fcd2aafaSmlarkin /* Map kernel */
337fcd2aafaSmlarkin pml3 = (uint64_t *)boot_alloc();
338fcd2aafaSmlarkin pml4[kern_pml4] = (uint64_t)pml3 | PG_V | PG_RW;
339fcd2aafaSmlarkin
340fcd2aafaSmlarkin pml2 = (uint64_t *)boot_alloc();
341fcd2aafaSmlarkin pml3[kern_pml3] = (uint64_t)pml2 | PG_V | PG_RW;
342fcd2aafaSmlarkin
343fcd2aafaSmlarkin for (i = 0; i < NKL2_KIMG_ENTRIES; i++) {
344fcd2aafaSmlarkin pml1 = (uint64_t *)boot_alloc();
345fcd2aafaSmlarkin pml2[i + kern_pml2] = (uint64_t)pml1 | PG_V | PG_RW;
346fcd2aafaSmlarkin
347fcd2aafaSmlarkin /* The first page of PTEs may start at a different offset */
348fcd2aafaSmlarkin if (i == kern_pml2)
349fcd2aafaSmlarkin k = kern_pml1;
350fcd2aafaSmlarkin else
351fcd2aafaSmlarkin k = 0;
352fcd2aafaSmlarkin
353fcd2aafaSmlarkin /*
354fcd2aafaSmlarkin * Map [k...511] PTEs.
355fcd2aafaSmlarkin */
356731ae6f3Smlarkin for (j = k; j < NPDPG; j++)
357fcd2aafaSmlarkin pml1[j] = (uint64_t)(((8 + i) * NBPD_L2) +
358fcd2aafaSmlarkin (j - kern_pml1) * PAGE_SIZE) | PG_V | PG_RW;
359fcd2aafaSmlarkin }
360fcd2aafaSmlarkin
361fcd2aafaSmlarkin /* Map first 4GB phys for kernel page table, stack, and bootstrap */
362fcd2aafaSmlarkin pml3 = (uint64_t *)boot_alloc();
363fcd2aafaSmlarkin pml4[0] = (uint64_t)pml3 | PG_V | PG_RW; /* Covers 0-512GB */
364fcd2aafaSmlarkin
365fcd2aafaSmlarkin pml2 = (uint64_t *)boot_alloc();
366fcd2aafaSmlarkin pml3[0] = (uint64_t)pml2 | PG_V | PG_RW; /* Covers 0-1GB */
367fcd2aafaSmlarkin
368731ae6f3Smlarkin for (i = 0; i < NPDPG; i++)
369731ae6f3Smlarkin pml2[i] = (i << L2_SHIFT) | PG_V | PG_RW | PG_PS;
370fcd2aafaSmlarkin
371fcd2aafaSmlarkin return (caddr_t)pml4;
372fcd2aafaSmlarkin }
373