1f3c0184aSmlarkin /* $NetBSD: loadfile.c,v 1.10 2000/12/03 02:53:04 tsutsui Exp $ */ 2*65bbee46Sjsg /* $OpenBSD: loadfile_elf.c,v 1.50 2024/09/26 01:45:13 jsg Exp $ */ 3f3c0184aSmlarkin 4f3c0184aSmlarkin /*- 5f3c0184aSmlarkin * Copyright (c) 1997 The NetBSD Foundation, Inc. 6f3c0184aSmlarkin * All rights reserved. 7f3c0184aSmlarkin * 8f3c0184aSmlarkin * This code is derived from software contributed to The NetBSD Foundation 9f3c0184aSmlarkin * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 10f3c0184aSmlarkin * NASA Ames Research Center and by Christos Zoulas. 11f3c0184aSmlarkin * 12f3c0184aSmlarkin * Redistribution and use in source and binary forms, with or without 13f3c0184aSmlarkin * modification, are permitted provided that the following conditions 14f3c0184aSmlarkin * are met: 15f3c0184aSmlarkin * 1. Redistributions of source code must retain the above copyright 16f3c0184aSmlarkin * notice, this list of conditions and the following disclaimer. 17f3c0184aSmlarkin * 2. Redistributions in binary form must reproduce the above copyright 18f3c0184aSmlarkin * notice, this list of conditions and the following disclaimer in the 19f3c0184aSmlarkin * documentation and/or other materials provided with the distribution. 20f3c0184aSmlarkin * 21f3c0184aSmlarkin * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 22f3c0184aSmlarkin * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 23f3c0184aSmlarkin * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 24f3c0184aSmlarkin * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 25f3c0184aSmlarkin * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 26f3c0184aSmlarkin * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 27f3c0184aSmlarkin * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 28f3c0184aSmlarkin * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 29f3c0184aSmlarkin * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 30f3c0184aSmlarkin * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31f3c0184aSmlarkin * POSSIBILITY OF SUCH DAMAGE. 32f3c0184aSmlarkin */ 33f3c0184aSmlarkin 34f3c0184aSmlarkin /* 35f3c0184aSmlarkin * Copyright (c) 1992, 1993 36f3c0184aSmlarkin * The Regents of the University of California. All rights reserved. 37f3c0184aSmlarkin * 38f3c0184aSmlarkin * This code is derived from software contributed to Berkeley by 39f3c0184aSmlarkin * Ralph Campbell. 40f3c0184aSmlarkin * 41f3c0184aSmlarkin * Redistribution and use in source and binary forms, with or without 42f3c0184aSmlarkin * modification, are permitted provided that the following conditions 43f3c0184aSmlarkin * are met: 44f3c0184aSmlarkin * 1. Redistributions of source code must retain the above copyright 45f3c0184aSmlarkin * notice, this list of conditions and the following disclaimer. 46f3c0184aSmlarkin * 2. Redistributions in binary form must reproduce the above copyright 47f3c0184aSmlarkin * notice, this list of conditions and the following disclaimer in the 48f3c0184aSmlarkin * documentation and/or other materials provided with the distribution. 49f3c0184aSmlarkin * 3. Neither the name of the University nor the names of its contributors 50f3c0184aSmlarkin * may be used to endorse or promote products derived from this software 51f3c0184aSmlarkin * without specific prior written permission. 52f3c0184aSmlarkin * 53f3c0184aSmlarkin * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 54f3c0184aSmlarkin * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 55f3c0184aSmlarkin * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 56f3c0184aSmlarkin * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 57f3c0184aSmlarkin * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 58f3c0184aSmlarkin * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 59f3c0184aSmlarkin * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 60f3c0184aSmlarkin * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 61f3c0184aSmlarkin * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 62f3c0184aSmlarkin * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 63f3c0184aSmlarkin * SUCH DAMAGE. 64f3c0184aSmlarkin * 65f3c0184aSmlarkin * @(#)boot.c 8.1 (Berkeley) 6/10/93 66f3c0184aSmlarkin */ 67f3c0184aSmlarkin 68f3c0184aSmlarkin /* 69f3c0184aSmlarkin * Copyright (c) 2015 Mike Larkin <mlarkin@openbsd.org> 70f3c0184aSmlarkin * 71f3c0184aSmlarkin * Permission to use, copy, modify, and distribute this software for any 72f3c0184aSmlarkin * purpose with or without fee is hereby granted, provided that the above 73f3c0184aSmlarkin * copyright notice and this permission notice appear in all copies. 74f3c0184aSmlarkin * 75f3c0184aSmlarkin * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 76f3c0184aSmlarkin * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 77f3c0184aSmlarkin * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 78f3c0184aSmlarkin * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 79f3c0184aSmlarkin * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 80f3c0184aSmlarkin * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 81f3c0184aSmlarkin * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 82f3c0184aSmlarkin */ 83f3c0184aSmlarkin 8487302766Sderaadt #include <sys/param.h> /* PAGE_SIZE PAGE_MASK roundup */ 8587302766Sderaadt #include <sys/reboot.h> 8687302766Sderaadt #include <sys/exec.h> 8787302766Sderaadt 889f79a698Smpi #include <elf.h> 89f3c0184aSmlarkin #include <string.h> 90f3c0184aSmlarkin #include <errno.h> 91f3c0184aSmlarkin #include <stdlib.h> 92f3c0184aSmlarkin #include <unistd.h> 93f3c0184aSmlarkin #include <err.h> 94f3c0184aSmlarkin 95ba66f564Sdv #include <dev/vmm/vmm.h> 96ba66f564Sdv 97f3c0184aSmlarkin #include <machine/biosvar.h> 98f3c0184aSmlarkin #include <machine/segments.h> 9957544062Smlarkin #include <machine/specialreg.h> 100fc059887Smlarkin #include <machine/pte.h> 101f3c0184aSmlarkin 1023475ba91Smlarkin #include "loadfile.h" 1033475ba91Smlarkin #include "vmd.h" 1043475ba91Smlarkin 10565d68140Sclaudio #define LOADADDR(a) ((((u_long)(a)) + offset)&0xfffffff) 10665d68140Sclaudio 107f3c0184aSmlarkin union { 108f3c0184aSmlarkin Elf32_Ehdr elf32; 109f3c0184aSmlarkin Elf64_Ehdr elf64; 110f3c0184aSmlarkin } hdr; 111f3c0184aSmlarkin 112f3c0184aSmlarkin static void setsegment(struct mem_segment_descriptor *, uint32_t, 113f3c0184aSmlarkin size_t, int, int, int, int); 11445bdc46fSdv static int elf32_exec(gzFile, Elf32_Ehdr *, u_long *, int); 11545bdc46fSdv static int elf64_exec(gzFile, Elf64_Ehdr *, u_long *, int); 11640a3b6a0Sstefan static size_t create_bios_memmap(struct vm_create_params *, bios_memmap_t *); 1173227aa04Sclaudio static uint32_t push_bootargs(bios_memmap_t *, size_t, bios_bootmac_t *); 118a13de4d1Skn static size_t push_stack(uint32_t, uint32_t); 119f3c0184aSmlarkin static void push_gdt(void); 12057544062Smlarkin static void push_pt_32(void); 12157544062Smlarkin static void push_pt_64(void); 12240a3b6a0Sstefan static void marc4random_buf(paddr_t, int); 12340a3b6a0Sstefan static void mbzero(paddr_t, int); 1247cc0a091Sstefan static void mbcopy(void *, paddr_t, int); 125f3c0184aSmlarkin 126f3c0184aSmlarkin extern char *__progname; 127f3c0184aSmlarkin extern int vm_id; 128f3c0184aSmlarkin 129f4b47ae8Sbluhm uint64_t pg_crypt = 0; 130f4b47ae8Sbluhm 131f3c0184aSmlarkin /* 132f3c0184aSmlarkin * setsegment 133f3c0184aSmlarkin * 134f3c0184aSmlarkin * Initializes a segment selector entry with the provided descriptor. 135f3c0184aSmlarkin * For the purposes of the bootloader mimiced by vmd(8), we only need 136f3c0184aSmlarkin * memory-type segment descriptor support. 137f3c0184aSmlarkin * 138f3c0184aSmlarkin * This function was copied from machdep.c 139f3c0184aSmlarkin * 140f3c0184aSmlarkin * Parameters: 141f3c0184aSmlarkin * sd: Address of the entry to initialize 142f3c0184aSmlarkin * base: base of the segment 143f3c0184aSmlarkin * limit: limit of the segment 144f3c0184aSmlarkin * type: type of the segment 145f3c0184aSmlarkin * dpl: privilege level of the egment 146f3c0184aSmlarkin * def32: default 16/32 bit size of the segment 147f3c0184aSmlarkin * gran: granularity of the segment (byte/page) 148f3c0184aSmlarkin */ 149f3c0184aSmlarkin static void 150f3c0184aSmlarkin setsegment(struct mem_segment_descriptor *sd, uint32_t base, size_t limit, 151f3c0184aSmlarkin int type, int dpl, int def32, int gran) 152f3c0184aSmlarkin { 153f3c0184aSmlarkin sd->sd_lolimit = (int)limit; 154f3c0184aSmlarkin sd->sd_lobase = (int)base; 155f3c0184aSmlarkin sd->sd_type = type; 156f3c0184aSmlarkin sd->sd_dpl = dpl; 157f3c0184aSmlarkin sd->sd_p = 1; 158f3c0184aSmlarkin sd->sd_hilimit = (int)limit >> 16; 159f3c0184aSmlarkin sd->sd_avl = 0; 160f3c0184aSmlarkin sd->sd_long = 0; 161f3c0184aSmlarkin sd->sd_def32 = def32; 162f3c0184aSmlarkin sd->sd_gran = gran; 163f3c0184aSmlarkin sd->sd_hibase = (int)base >> 24; 164f3c0184aSmlarkin } 165f3c0184aSmlarkin 166f3c0184aSmlarkin /* 167f3c0184aSmlarkin * push_gdt 168f3c0184aSmlarkin * 169f3c0184aSmlarkin * Allocates and populates a page in the guest phys memory space to hold 170f3c0184aSmlarkin * the boot-time GDT. Since vmd(8) is acting as the bootloader, we need to 171f3c0184aSmlarkin * create the same GDT that a real bootloader would have created. 172f3c0184aSmlarkin * This is loaded into the guest phys RAM space at address GDT_PAGE. 173f3c0184aSmlarkin */ 174f3c0184aSmlarkin static void 175f3c0184aSmlarkin push_gdt(void) 176f3c0184aSmlarkin { 177f3c0184aSmlarkin uint8_t gdtpage[PAGE_SIZE]; 178f3c0184aSmlarkin struct mem_segment_descriptor *sd; 179f3c0184aSmlarkin 18082ff3aeaSmlarkin memset(&gdtpage, 0, sizeof(gdtpage)); 1819dae6e67Smlarkin 182f3c0184aSmlarkin sd = (struct mem_segment_descriptor *)&gdtpage; 183f3c0184aSmlarkin 184f3c0184aSmlarkin /* 185f3c0184aSmlarkin * Create three segment descriptors: 186f3c0184aSmlarkin * 1873a50f0a9Sjmc * GDT[0] : null descriptor. "Created" via memset above. 188f3c0184aSmlarkin * GDT[1] (selector @ 0x8): Executable segment, for CS 189f3c0184aSmlarkin * GDT[2] (selector @ 0x10): RW Data segment, for DS/ES/SS 190f3c0184aSmlarkin */ 191f3c0184aSmlarkin setsegment(&sd[1], 0, 0xffffffff, SDT_MEMERA, SEL_KPL, 1, 1); 192f3c0184aSmlarkin setsegment(&sd[2], 0, 0xffffffff, SDT_MEMRWA, SEL_KPL, 1, 1); 193f3c0184aSmlarkin 1945d69b7aaSstefan write_mem(GDT_PAGE, gdtpage, PAGE_SIZE); 195f4b47ae8Sbluhm sev_register_encryption(GDT_PAGE, PAGE_SIZE); 196f3c0184aSmlarkin } 197f3c0184aSmlarkin 198f3c0184aSmlarkin /* 19957544062Smlarkin * push_pt_32 200fc059887Smlarkin * 201fc059887Smlarkin * Create an identity-mapped page directory hierarchy mapping the first 20257544062Smlarkin * 4GB of physical memory. This is used during bootstrapping i386 VMs on 203fc059887Smlarkin * CPUs without unrestricted guest capability. 204fc059887Smlarkin */ 205fc059887Smlarkin static void 20657544062Smlarkin push_pt_32(void) 207fc059887Smlarkin { 20857544062Smlarkin uint32_t ptes[1024], i; 20957544062Smlarkin 21057544062Smlarkin memset(ptes, 0, sizeof(ptes)); 21157544062Smlarkin for (i = 0 ; i < 1024; i++) { 21257544062Smlarkin ptes[i] = PG_V | PG_RW | PG_u | PG_PS | ((4096 * 1024) * i); 21357544062Smlarkin } 21457544062Smlarkin write_mem(PML3_PAGE, ptes, PAGE_SIZE); 21557544062Smlarkin } 21657544062Smlarkin 21757544062Smlarkin /* 21857544062Smlarkin * push_pt_64 21957544062Smlarkin * 22057544062Smlarkin * Create an identity-mapped page directory hierarchy mapping the first 22157544062Smlarkin * 1GB of physical memory. This is used during bootstrapping 64 bit VMs on 22257544062Smlarkin * CPUs without unrestricted guest capability. 22357544062Smlarkin */ 22457544062Smlarkin static void 22557544062Smlarkin push_pt_64(void) 22657544062Smlarkin { 22757544062Smlarkin uint64_t ptes[512], i; 228fc059887Smlarkin 229c96d1163Smlarkin /* PDPDE0 - first 1GB */ 230fc059887Smlarkin memset(ptes, 0, sizeof(ptes)); 231f4b47ae8Sbluhm ptes[0] = pg_crypt | PG_V | PML3_PAGE; 232fc059887Smlarkin write_mem(PML4_PAGE, ptes, PAGE_SIZE); 233f4b47ae8Sbluhm sev_register_encryption(PML4_PAGE, PAGE_SIZE); 234fc059887Smlarkin 235c96d1163Smlarkin /* PDE0 - first 1GB */ 236fc059887Smlarkin memset(ptes, 0, sizeof(ptes)); 237f4b47ae8Sbluhm ptes[0] = pg_crypt | PG_V | PG_RW | PG_u | PML2_PAGE; 238fc059887Smlarkin write_mem(PML3_PAGE, ptes, PAGE_SIZE); 239f4b47ae8Sbluhm sev_register_encryption(PML3_PAGE, PAGE_SIZE); 240fc059887Smlarkin 241c96d1163Smlarkin /* First 1GB (in 2MB pages) */ 242fc059887Smlarkin memset(ptes, 0, sizeof(ptes)); 24357544062Smlarkin for (i = 0 ; i < 512; i++) { 244f4b47ae8Sbluhm ptes[i] = pg_crypt | PG_V | PG_RW | PG_u | PG_PS | 245f4b47ae8Sbluhm ((2048 * 1024) * i); 246fc059887Smlarkin } 247fc059887Smlarkin write_mem(PML2_PAGE, ptes, PAGE_SIZE); 248f4b47ae8Sbluhm sev_register_encryption(PML2_PAGE, PAGE_SIZE); 249fc059887Smlarkin } 250fc059887Smlarkin 251fc059887Smlarkin /* 25238d0e0c3Sreyk * loadfile_elf 253f3c0184aSmlarkin * 2546668a736Sguenther * Loads an ELF kernel to its defined load address in the guest VM. 25540a3b6a0Sstefan * The kernel is loaded to its defined start point as set in the ELF header. 256f3c0184aSmlarkin * 257f3c0184aSmlarkin * Parameters: 25838d0e0c3Sreyk * fp: file of a kernel file to load 25940a3b6a0Sstefan * vcp: the VM create parameters, holding the exact memory map 26011fac253Sstefan * (out) vrs: register state to set on init for this kernel 26107052635Sreyk * bootdev: the optional non-default boot device 262cea90f51Smlarkin * howto: optional boot flags for the kernel 263f3c0184aSmlarkin * 264f3c0184aSmlarkin * Return values: 265f3c0184aSmlarkin * 0 if successful 26645bdc46fSdv * various error codes returned from gzread(3) or loadelf functions 267f3c0184aSmlarkin */ 268f3c0184aSmlarkin int 26973a98491Sdv loadfile_elf(gzFile fp, struct vmd_vm *vm, struct vcpu_reg_state *vrs, 27073a98491Sdv unsigned int bootdevice) 271f3c0184aSmlarkin { 27257544062Smlarkin int r, is_i386 = 0; 27340a3b6a0Sstefan uint32_t bootargsz; 27440a3b6a0Sstefan size_t n, stacksize; 275f3c0184aSmlarkin u_long marks[MARK_MAX]; 27640a3b6a0Sstefan bios_memmap_t memmap[VMM_MAX_MEM_RANGES + 1]; 2773227aa04Sclaudio bios_bootmac_t bm, *bootmac = NULL; 27873a98491Sdv struct vm_create_params *vcp = &vm->vm_params.vmc_params; 279f3c0184aSmlarkin 28045bdc46fSdv if ((r = gzread(fp, &hdr, sizeof(hdr))) != sizeof(hdr)) 281f3c0184aSmlarkin return 1; 282f3c0184aSmlarkin 28382ff3aeaSmlarkin memset(&marks, 0, sizeof(marks)); 284f3c0184aSmlarkin if (memcmp(hdr.elf32.e_ident, ELFMAG, SELFMAG) == 0 && 285f3c0184aSmlarkin hdr.elf32.e_ident[EI_CLASS] == ELFCLASS32) { 28682fb2f0aSreyk r = elf32_exec(fp, &hdr.elf32, marks, LOAD_ALL); 28757544062Smlarkin is_i386 = 1; 288f3c0184aSmlarkin } else if (memcmp(hdr.elf64.e_ident, ELFMAG, SELFMAG) == 0 && 289f3c0184aSmlarkin hdr.elf64.e_ident[EI_CLASS] == ELFCLASS64) { 29082fb2f0aSreyk r = elf64_exec(fp, &hdr.elf64, marks, LOAD_ALL); 29138d0e0c3Sreyk } else 29238d0e0c3Sreyk errno = ENOEXEC; 293f3c0184aSmlarkin 294222311c5Smlarkin if (r) 295222311c5Smlarkin return (r); 296222311c5Smlarkin 297f3c0184aSmlarkin push_gdt(); 29857544062Smlarkin 29957544062Smlarkin if (is_i386) { 30057544062Smlarkin push_pt_32(); 30157544062Smlarkin /* Reconfigure the default flat-64 register set for 32 bit */ 30257544062Smlarkin vrs->vrs_crs[VCPU_REGS_CR3] = PML3_PAGE; 30357544062Smlarkin vrs->vrs_crs[VCPU_REGS_CR4] = CR4_PSE; 30457544062Smlarkin vrs->vrs_msrs[VCPU_REGS_EFER] = 0ULL; 30557544062Smlarkin } 306f4b47ae8Sbluhm else { 307f4b47ae8Sbluhm if (vcp->vcp_sev) { 308f4b47ae8Sbluhm if (vcp->vcp_poscbit == 0) { 309f4b47ae8Sbluhm log_warnx("SEV enabled but no C-bit reported"); 310f4b47ae8Sbluhm return 1; 311f4b47ae8Sbluhm } 312f4b47ae8Sbluhm pg_crypt = (1ULL << vcp->vcp_poscbit); 313f4b47ae8Sbluhm log_debug("%s: poscbit %d pg_crypt 0x%016llx", 314f4b47ae8Sbluhm __func__, vcp->vcp_poscbit, pg_crypt); 315f4b47ae8Sbluhm } 31657544062Smlarkin push_pt_64(); 317f4b47ae8Sbluhm } 31857544062Smlarkin 3193227aa04Sclaudio if (bootdevice == VMBOOTDEV_NET) { 3203227aa04Sclaudio bootmac = &bm; 32173a98491Sdv memcpy(bootmac, vm->vm_params.vmc_macs[0], ETHER_ADDR_LEN); 3223227aa04Sclaudio } 32340a3b6a0Sstefan n = create_bios_memmap(vcp, memmap); 3243227aa04Sclaudio bootargsz = push_bootargs(memmap, n, bootmac); 325a13de4d1Skn stacksize = push_stack(bootargsz, marks[MARK_END]); 3263475ba91Smlarkin 32711fac253Sstefan vrs->vrs_gprs[VCPU_REGS_RIP] = (uint64_t)marks[MARK_ENTRY]; 32811fac253Sstefan vrs->vrs_gprs[VCPU_REGS_RSP] = (uint64_t)(STACK_PAGE + PAGE_SIZE) - stacksize; 32911fac253Sstefan vrs->vrs_gdtr.vsi_base = GDT_PAGE; 330f3c0184aSmlarkin 33138d0e0c3Sreyk log_debug("%s: loaded ELF kernel", __func__); 33238d0e0c3Sreyk 333222311c5Smlarkin return (0); 334f3c0184aSmlarkin } 335f3c0184aSmlarkin 336f3c0184aSmlarkin /* 33740a3b6a0Sstefan * create_bios_memmap 33840a3b6a0Sstefan * 33940a3b6a0Sstefan * Construct a memory map as returned by the BIOS INT 0x15, e820 routine. 34040a3b6a0Sstefan * 34140a3b6a0Sstefan * Parameters: 34240a3b6a0Sstefan * vcp: the VM create parameters, containing the memory map passed to vmm(4) 34340a3b6a0Sstefan * memmap (out): the BIOS memory map 34440a3b6a0Sstefan * 34540a3b6a0Sstefan * Return values: 34640a3b6a0Sstefan * Number of bios_memmap_t entries, including the terminating nul-entry. 34740a3b6a0Sstefan */ 34840a3b6a0Sstefan static size_t 34940a3b6a0Sstefan create_bios_memmap(struct vm_create_params *vcp, bios_memmap_t *memmap) 35040a3b6a0Sstefan { 351cf08ffabSdv size_t i, n = 0; 35240a3b6a0Sstefan struct vm_mem_range *vmr; 35340a3b6a0Sstefan 354cf08ffabSdv for (i = 0; i < vcp->vcp_nmemranges; i++, n++) { 35540a3b6a0Sstefan vmr = &vcp->vcp_memranges[i]; 356cf08ffabSdv memmap[n].addr = vmr->vmr_gpa; 357cf08ffabSdv memmap[n].size = vmr->vmr_size; 358cf08ffabSdv if (vmr->vmr_type == VM_MEM_RAM) 359cf08ffabSdv memmap[n].type = BIOS_MAP_FREE; 36040a3b6a0Sstefan else 361cf08ffabSdv memmap[n].type = BIOS_MAP_RES; 36240a3b6a0Sstefan } 36340a3b6a0Sstefan 36440a3b6a0Sstefan /* Null mem map entry to denote the end of the ranges */ 36540a3b6a0Sstefan memmap[n].addr = 0x0; 36640a3b6a0Sstefan memmap[n].size = 0x0; 367cf08ffabSdv memmap[n].type = BIOS_MAP_END; 36840a3b6a0Sstefan n++; 36940a3b6a0Sstefan 37040a3b6a0Sstefan return (n); 37140a3b6a0Sstefan } 37240a3b6a0Sstefan 37340a3b6a0Sstefan /* 374f3c0184aSmlarkin * push_bootargs 375f3c0184aSmlarkin * 376f3c0184aSmlarkin * Creates the boot arguments page in the guest address space. 377f3c0184aSmlarkin * Since vmd(8) is acting as the bootloader, we need to create the same boot 378f3c0184aSmlarkin * arguments page that a real bootloader would have created. This is loaded 379f3c0184aSmlarkin * into the guest phys RAM space at address BOOTARGS_PAGE. 380f3c0184aSmlarkin * 381f3c0184aSmlarkin * Parameters: 38240a3b6a0Sstefan * memmap: the BIOS memory map 38340a3b6a0Sstefan * n: number of entries in memmap 3848652dcf0Sdv * bootmac: optional PXE boot MAC address 385f3c0184aSmlarkin * 386f3c0184aSmlarkin * Return values: 3878652dcf0Sdv * The size of the bootargs in bytes 388f3c0184aSmlarkin */ 38940a3b6a0Sstefan static uint32_t 3903227aa04Sclaudio push_bootargs(bios_memmap_t *memmap, size_t n, bios_bootmac_t *bootmac) 391f3c0184aSmlarkin { 3923227aa04Sclaudio uint32_t memmap_sz, consdev_sz, bootmac_sz, i; 393f3c0184aSmlarkin bios_consdev_t consdev; 394f3c0184aSmlarkin uint32_t ba[1024]; 395f3c0184aSmlarkin 3968652dcf0Sdv memmap_sz = 3 * sizeof(uint32_t) + n * sizeof(bios_memmap_t); 3978652dcf0Sdv ba[0] = BOOTARG_MEMMAP; 39840a3b6a0Sstefan ba[1] = memmap_sz; 3998652dcf0Sdv ba[2] = memmap_sz; 40040a3b6a0Sstefan memcpy(&ba[3], memmap, n * sizeof(bios_memmap_t)); 4018652dcf0Sdv i = memmap_sz / sizeof(uint32_t); 402f3c0184aSmlarkin 403f3c0184aSmlarkin /* Serial console device, COM1 @ 0x3f8 */ 4048652dcf0Sdv memset(&consdev, 0, sizeof(consdev)); 4058652dcf0Sdv consdev.consdev = makedev(8, 0); 406fa2284ecSclaudio consdev.conspeed = 115200; 407f3c0184aSmlarkin consdev.consaddr = 0x3f8; 408f3c0184aSmlarkin 4098652dcf0Sdv consdev_sz = 3 * sizeof(uint32_t) + sizeof(bios_consdev_t); 4108652dcf0Sdv ba[i] = BOOTARG_CONSDEV; 41140a3b6a0Sstefan ba[i + 1] = consdev_sz; 41240a3b6a0Sstefan ba[i + 2] = consdev_sz; 41340a3b6a0Sstefan memcpy(&ba[i + 3], &consdev, sizeof(bios_consdev_t)); 4148652dcf0Sdv i += consdev_sz / sizeof(uint32_t); 41516a063dcSmlarkin 4163227aa04Sclaudio if (bootmac) { 4178652dcf0Sdv bootmac_sz = 3 * sizeof(uint32_t) + 4188652dcf0Sdv (sizeof(bios_bootmac_t) + 3) & ~3; 4198652dcf0Sdv ba[i] = BOOTARG_BOOTMAC; 4203227aa04Sclaudio ba[i + 1] = bootmac_sz; 4213227aa04Sclaudio ba[i + 2] = bootmac_sz; 4223227aa04Sclaudio memcpy(&ba[i + 3], bootmac, sizeof(bios_bootmac_t)); 4238652dcf0Sdv i += bootmac_sz / sizeof(uint32_t); 4243227aa04Sclaudio } 4253227aa04Sclaudio 426cc104512Sclaudio ba[i++] = 0xFFFFFFFF; /* BOOTARG_END */ 427f3c0184aSmlarkin 4285d69b7aaSstefan write_mem(BOOTARGS_PAGE, ba, PAGE_SIZE); 429f4b47ae8Sbluhm sev_register_encryption(BOOTARGS_PAGE, PAGE_SIZE); 43040a3b6a0Sstefan 4318652dcf0Sdv return (i * sizeof(uint32_t)); 432f3c0184aSmlarkin } 433f3c0184aSmlarkin 434f3c0184aSmlarkin /* 435f3c0184aSmlarkin * push_stack 436f3c0184aSmlarkin * 437f3c0184aSmlarkin * Creates the boot stack page in the guest address space. When using a real 438f3c0184aSmlarkin * bootloader, the stack will be prepared using the following format before 439f3c0184aSmlarkin * transitioning to kernel start, so vmd(8) needs to mimic the same stack 440f3c0184aSmlarkin * layout. The stack content is pushed to the guest phys RAM at address 441f3c0184aSmlarkin * STACK_PAGE. The bootloader operates in 32 bit mode; each stack entry is 442f3c0184aSmlarkin * 4 bytes. 443f3c0184aSmlarkin * 444f3c0184aSmlarkin * Stack Layout: (TOS == Top Of Stack) 445f3c0184aSmlarkin * TOS location of boot arguments page 446f3c0184aSmlarkin * TOS - 0x4 size of the content in the boot arguments page 44740a3b6a0Sstefan * TOS - 0x8 size of low memory (biosbasemem: kernel uses BIOS map only if 0) 44840a3b6a0Sstefan * TOS - 0xc size of high memory (biosextmem, not used by kernel at all) 449f3c0184aSmlarkin * TOS - 0x10 kernel 'end' symbol value 450f3c0184aSmlarkin * TOS - 0x14 version of bootarg API 451f3c0184aSmlarkin * 452f3c0184aSmlarkin * Parameters: 45340a3b6a0Sstefan * bootargsz: size of boot arguments 454f3c0184aSmlarkin * end: kernel 'end' symbol value 45507052635Sreyk * bootdev: the optional non-default boot device 456cea90f51Smlarkin * howto: optional boot flags for the kernel 457f3c0184aSmlarkin * 458f3c0184aSmlarkin * Return values: 4593475ba91Smlarkin * size of the stack 460f3c0184aSmlarkin */ 4613475ba91Smlarkin static size_t 462a13de4d1Skn push_stack(uint32_t bootargsz, uint32_t end) 463f3c0184aSmlarkin { 464f3c0184aSmlarkin uint32_t stack[1024]; 465f3c0184aSmlarkin uint16_t loc; 466f3c0184aSmlarkin 46782ff3aeaSmlarkin memset(&stack, 0, sizeof(stack)); 468f3c0184aSmlarkin loc = 1024; 469f3c0184aSmlarkin 470f3c0184aSmlarkin stack[--loc] = BOOTARGS_PAGE; 47140a3b6a0Sstefan stack[--loc] = bootargsz; 47240a3b6a0Sstefan stack[--loc] = 0; /* biosbasemem */ 47340a3b6a0Sstefan stack[--loc] = 0; /* biosextmem */ 474f3c0184aSmlarkin stack[--loc] = end; 475f3c0184aSmlarkin stack[--loc] = 0x0e; 476a13de4d1Skn stack[--loc] = MAKEBOOTDEV(0x4, 0, 0, 0, 0); /* bootdev: sd0a */ 477a13de4d1Skn stack[--loc] = 0; 478f3c0184aSmlarkin 4795d69b7aaSstefan write_mem(STACK_PAGE, &stack, PAGE_SIZE); 480f4b47ae8Sbluhm sev_register_encryption(STACK_PAGE, PAGE_SIZE); 4813475ba91Smlarkin 4823475ba91Smlarkin return (1024 - (loc - 1)) * sizeof(uint32_t); 483f3c0184aSmlarkin } 484f3c0184aSmlarkin 485f3c0184aSmlarkin /* 486f3c0184aSmlarkin * mread 487f3c0184aSmlarkin * 488f3c0184aSmlarkin * Reads 'sz' bytes from the file whose descriptor is provided in 'fd' 48940a3b6a0Sstefan * into the guest address space at paddr 'addr'. 490f3c0184aSmlarkin * 491f3c0184aSmlarkin * Parameters: 49245bdc46fSdv * fp: kernel image file to read from. 493f3c0184aSmlarkin * addr: guest paddr_t to load to 494f3c0184aSmlarkin * sz: number of bytes to load 495f3c0184aSmlarkin * 496f3c0184aSmlarkin * Return values: 497f3c0184aSmlarkin * returns 'sz' if successful, or 0 otherwise. 498f3c0184aSmlarkin */ 49938d0e0c3Sreyk size_t 50045bdc46fSdv mread(gzFile fp, paddr_t addr, size_t sz) 501f3c0184aSmlarkin { 5024dcbda62Sdv const char *errstr = NULL; 5034dcbda62Sdv int errnum = 0; 50482fb2f0aSreyk size_t ct; 50598142e86Sclaudio size_t i, osz; 506f3c0184aSmlarkin char buf[PAGE_SIZE]; 507f3c0184aSmlarkin 508f4b47ae8Sbluhm sev_register_encryption(addr, sz); 509f4b47ae8Sbluhm 510f3c0184aSmlarkin /* 511f3c0184aSmlarkin * break up the 'sz' bytes into PAGE_SIZE chunks for use with 5124835dca6Sstefan * write_mem 513f3c0184aSmlarkin */ 514f3c0184aSmlarkin ct = 0; 515f3c0184aSmlarkin osz = sz; 516f3c0184aSmlarkin if ((addr & PAGE_MASK) != 0) { 51782ff3aeaSmlarkin memset(buf, 0, sizeof(buf)); 518f3c0184aSmlarkin if (sz > PAGE_SIZE) 519f3c0184aSmlarkin ct = PAGE_SIZE - (addr & PAGE_MASK); 520f3c0184aSmlarkin else 521f3c0184aSmlarkin ct = sz; 522f3c0184aSmlarkin 52345bdc46fSdv if ((size_t)gzread(fp, buf, ct) != ct) { 5244dcbda62Sdv errstr = gzerror(fp, &errnum); 5254dcbda62Sdv if (errnum == Z_ERRNO) 5264dcbda62Sdv errnum = errno; 5274dcbda62Sdv log_warnx("%s: error %d in mread, %s", __progname, 5284dcbda62Sdv errnum, errstr); 529f3c0184aSmlarkin return (0); 530f3c0184aSmlarkin } 531f3c0184aSmlarkin 5325d69b7aaSstefan if (write_mem(addr, buf, ct)) 533f3c0184aSmlarkin return (0); 534f3c0184aSmlarkin 535f3c0184aSmlarkin addr += ct; 536f3c0184aSmlarkin } 537f3c0184aSmlarkin 538f3c0184aSmlarkin sz = sz - ct; 539f3c0184aSmlarkin 540f3c0184aSmlarkin if (sz == 0) 541f3c0184aSmlarkin return (osz); 542f3c0184aSmlarkin 543f3c0184aSmlarkin for (i = 0; i < sz; i += PAGE_SIZE, addr += PAGE_SIZE) { 54482ff3aeaSmlarkin memset(buf, 0, sizeof(buf)); 545f3c0184aSmlarkin if (i + PAGE_SIZE > sz) 546f3c0184aSmlarkin ct = sz - i; 547f3c0184aSmlarkin else 548f3c0184aSmlarkin ct = PAGE_SIZE; 549f3c0184aSmlarkin 55045bdc46fSdv if ((size_t)gzread(fp, buf, ct) != ct) { 5514dcbda62Sdv errstr = gzerror(fp, &errnum); 5524dcbda62Sdv if (errnum == Z_ERRNO) 5534dcbda62Sdv errnum = errno; 5544dcbda62Sdv log_warnx("%s: error %d in mread, %s", __progname, 5554dcbda62Sdv errnum, errstr); 556f3c0184aSmlarkin return (0); 557f3c0184aSmlarkin } 558f3c0184aSmlarkin 5595d69b7aaSstefan if (write_mem(addr, buf, ct)) 560f3c0184aSmlarkin return (0); 561f3c0184aSmlarkin } 562f3c0184aSmlarkin 563f3c0184aSmlarkin return (osz); 564f3c0184aSmlarkin } 565f3c0184aSmlarkin 566f3c0184aSmlarkin /* 567f3c0184aSmlarkin * marc4random_buf 568f3c0184aSmlarkin * 569f3c0184aSmlarkin * load 'sz' bytes of random data into the guest address space at paddr 57040a3b6a0Sstefan * 'addr'. 571f3c0184aSmlarkin * 572f3c0184aSmlarkin * Parameters: 573f3c0184aSmlarkin * addr: guest paddr_t to load random bytes into 574f3c0184aSmlarkin * sz: number of random bytes to load 575f3c0184aSmlarkin * 576f3c0184aSmlarkin * Return values: 577f3c0184aSmlarkin * nothing 578f3c0184aSmlarkin */ 579f3c0184aSmlarkin static void 58040a3b6a0Sstefan marc4random_buf(paddr_t addr, int sz) 581f3c0184aSmlarkin { 582f3c0184aSmlarkin int i, ct; 583f3c0184aSmlarkin char buf[PAGE_SIZE]; 584f3c0184aSmlarkin 585f4b47ae8Sbluhm sev_register_encryption(addr, sz); 586f4b47ae8Sbluhm 587f3c0184aSmlarkin /* 588f3c0184aSmlarkin * break up the 'sz' bytes into PAGE_SIZE chunks for use with 5894835dca6Sstefan * write_mem 590f3c0184aSmlarkin */ 591f3c0184aSmlarkin ct = 0; 592f3c0184aSmlarkin if (addr % PAGE_SIZE != 0) { 59382ff3aeaSmlarkin memset(buf, 0, sizeof(buf)); 594f3c0184aSmlarkin ct = PAGE_SIZE - (addr % PAGE_SIZE); 595f3c0184aSmlarkin 596f3c0184aSmlarkin arc4random_buf(buf, ct); 597f3c0184aSmlarkin 5985d69b7aaSstefan if (write_mem(addr, buf, ct)) 599f3c0184aSmlarkin return; 600f3c0184aSmlarkin 601f3c0184aSmlarkin addr += ct; 602f3c0184aSmlarkin } 603f3c0184aSmlarkin 604f3c0184aSmlarkin for (i = 0; i < sz; i+= PAGE_SIZE, addr += PAGE_SIZE) { 60582ff3aeaSmlarkin memset(buf, 0, sizeof(buf)); 606f3c0184aSmlarkin if (i + PAGE_SIZE > sz) 607f3c0184aSmlarkin ct = sz - i; 608f3c0184aSmlarkin else 609f3c0184aSmlarkin ct = PAGE_SIZE; 610f3c0184aSmlarkin 611f3c0184aSmlarkin arc4random_buf(buf, ct); 612f3c0184aSmlarkin 6135d69b7aaSstefan if (write_mem(addr, buf, ct)) 614f3c0184aSmlarkin return; 615f3c0184aSmlarkin } 616f3c0184aSmlarkin } 617f3c0184aSmlarkin 618f3c0184aSmlarkin /* 619f3c0184aSmlarkin * mbzero 620f3c0184aSmlarkin * 621f3c0184aSmlarkin * load 'sz' bytes of zeros into the guest address space at paddr 62240a3b6a0Sstefan * 'addr'. 623f3c0184aSmlarkin * 624f3c0184aSmlarkin * Parameters: 625f3c0184aSmlarkin * addr: guest paddr_t to zero 626f3c0184aSmlarkin * sz: number of zero bytes to store 627f3c0184aSmlarkin * 628f3c0184aSmlarkin * Return values: 629f3c0184aSmlarkin * nothing 630f3c0184aSmlarkin */ 631f3c0184aSmlarkin static void 63240a3b6a0Sstefan mbzero(paddr_t addr, int sz) 633f3c0184aSmlarkin { 634c4bec800Sclaudio if (write_mem(addr, NULL, sz)) 635f3c0184aSmlarkin return; 636f4b47ae8Sbluhm sev_register_encryption(addr, sz); 637f3c0184aSmlarkin } 638f3c0184aSmlarkin 639f3c0184aSmlarkin /* 640f3c0184aSmlarkin * mbcopy 641f3c0184aSmlarkin * 6427cc0a091Sstefan * copies 'sz' bytes from buffer 'src' to guest paddr 'dst'. 643f3c0184aSmlarkin * 644f3c0184aSmlarkin * Parameters: 6457cc0a091Sstefan * src: source buffer to copy from 646f3c0184aSmlarkin * dst: destination guest paddr_t to copy to 647f3c0184aSmlarkin * sz: number of bytes to copy 648f3c0184aSmlarkin * 649f3c0184aSmlarkin * Return values: 650f3c0184aSmlarkin * nothing 651f3c0184aSmlarkin */ 652f3c0184aSmlarkin static void 6537cc0a091Sstefan mbcopy(void *src, paddr_t dst, int sz) 654f3c0184aSmlarkin { 6557cc0a091Sstefan write_mem(dst, src, sz); 656f4b47ae8Sbluhm sev_register_encryption(dst, sz); 657f3c0184aSmlarkin } 658f3c0184aSmlarkin 659f3c0184aSmlarkin /* 660f3c0184aSmlarkin * elf64_exec 661f3c0184aSmlarkin * 66245bdc46fSdv * Load the kernel indicated by 'fp' into the guest physical memory 663f3c0184aSmlarkin * space, at the addresses defined in the ELF header. 664f3c0184aSmlarkin * 665f3c0184aSmlarkin * This function is used for 64 bit kernels. 666f3c0184aSmlarkin * 667f3c0184aSmlarkin * Parameters: 66845bdc46fSdv * fp: kernel image file to load 669f3c0184aSmlarkin * elf: ELF header of the kernel 670f3c0184aSmlarkin * marks: array to store the offsets of various kernel structures 671f3c0184aSmlarkin * (start, bss, etc) 672f3c0184aSmlarkin * flags: flag value to indicate which section(s) to load (usually 673f3c0184aSmlarkin * LOAD_ALL) 674f3c0184aSmlarkin * 675f3c0184aSmlarkin * Return values: 676f3c0184aSmlarkin * 0 if successful 677f3c0184aSmlarkin * 1 if unsuccessful 678f3c0184aSmlarkin */ 679b275a5ddSmlarkin static int 68045bdc46fSdv elf64_exec(gzFile fp, Elf64_Ehdr *elf, u_long *marks, int flags) 681f3c0184aSmlarkin { 682f3c0184aSmlarkin Elf64_Shdr *shp; 683f3c0184aSmlarkin Elf64_Phdr *phdr; 684f3c0184aSmlarkin Elf64_Off off; 685f3c0184aSmlarkin int i; 68682fb2f0aSreyk size_t sz; 687174697beSvisa int havesyms; 688f3c0184aSmlarkin paddr_t minp = ~0, maxp = 0, pos = 0; 689f3c0184aSmlarkin paddr_t offset = marks[MARK_START], shpp, elfp; 690f3c0184aSmlarkin 691f3c0184aSmlarkin sz = elf->e_phnum * sizeof(Elf64_Phdr); 692f3c0184aSmlarkin phdr = malloc(sz); 693f3c0184aSmlarkin 69445bdc46fSdv if (gzseek(fp, (off_t)elf->e_phoff, SEEK_SET) == -1) { 695f3c0184aSmlarkin free(phdr); 696f3c0184aSmlarkin return 1; 697f3c0184aSmlarkin } 698f3c0184aSmlarkin 69945bdc46fSdv if ((size_t)gzread(fp, phdr, sz) != sz) { 700f3c0184aSmlarkin free(phdr); 701f3c0184aSmlarkin return 1; 702f3c0184aSmlarkin } 703f3c0184aSmlarkin 70498142e86Sclaudio for (i = 0; i < elf->e_phnum; i++) { 705f3c0184aSmlarkin if (phdr[i].p_type == PT_OPENBSD_RANDOMIZE) { 706f3c0184aSmlarkin int m; 707f3c0184aSmlarkin 708f3c0184aSmlarkin /* Fill segment if asked for. */ 709f3c0184aSmlarkin if (flags & LOAD_RANDOM) { 710f3c0184aSmlarkin for (pos = 0; pos < phdr[i].p_filesz; 711f3c0184aSmlarkin pos += m) { 712f3c0184aSmlarkin m = phdr[i].p_filesz - pos; 713762ebf84Sreyk marc4random_buf(phdr[i].p_paddr + pos, 714762ebf84Sreyk m); 715f3c0184aSmlarkin } 716f3c0184aSmlarkin } 717f3c0184aSmlarkin if (flags & (LOAD_RANDOM | COUNT_RANDOM)) { 718f3c0184aSmlarkin marks[MARK_RANDOM] = LOADADDR(phdr[i].p_paddr); 719f3c0184aSmlarkin marks[MARK_ERANDOM] = 720f3c0184aSmlarkin marks[MARK_RANDOM] + phdr[i].p_filesz; 721f3c0184aSmlarkin } 722f3c0184aSmlarkin continue; 723f3c0184aSmlarkin } 724f3c0184aSmlarkin 725f3c0184aSmlarkin if (phdr[i].p_type != PT_LOAD || 726f3c0184aSmlarkin (phdr[i].p_flags & (PF_W|PF_R|PF_X)) == 0) 727f3c0184aSmlarkin continue; 728f3c0184aSmlarkin 729f3c0184aSmlarkin #define IS_TEXT(p) (p.p_flags & PF_X) 730f3c0184aSmlarkin #define IS_DATA(p) ((p.p_flags & PF_X) == 0) 731f3c0184aSmlarkin #define IS_BSS(p) (p.p_filesz < p.p_memsz) 732f3c0184aSmlarkin /* 733f3c0184aSmlarkin * XXX: Assume first address is lowest 734f3c0184aSmlarkin */ 735f3c0184aSmlarkin if ((IS_TEXT(phdr[i]) && (flags & LOAD_TEXT)) || 736f3c0184aSmlarkin (IS_DATA(phdr[i]) && (flags & LOAD_DATA))) { 737f3c0184aSmlarkin 738f3c0184aSmlarkin /* Read in segment. */ 73945bdc46fSdv if (gzseek(fp, (off_t)phdr[i].p_offset, 740762ebf84Sreyk SEEK_SET) == -1) { 741f3c0184aSmlarkin free(phdr); 742f3c0184aSmlarkin return 1; 743f3c0184aSmlarkin } 74482fb2f0aSreyk if (mread(fp, phdr[i].p_paddr, phdr[i].p_filesz) != 745f3c0184aSmlarkin phdr[i].p_filesz) { 746f3c0184aSmlarkin free(phdr); 747f3c0184aSmlarkin return 1; 748f3c0184aSmlarkin } 749f3c0184aSmlarkin } 750f3c0184aSmlarkin 751f3c0184aSmlarkin if ((IS_TEXT(phdr[i]) && (flags & (LOAD_TEXT | COUNT_TEXT))) || 752f3c0184aSmlarkin (IS_DATA(phdr[i]) && (flags & (LOAD_DATA | COUNT_TEXT)))) { 753f3c0184aSmlarkin pos = phdr[i].p_paddr; 754f3c0184aSmlarkin if (minp > pos) 755f3c0184aSmlarkin minp = pos; 756f3c0184aSmlarkin pos += phdr[i].p_filesz; 757f3c0184aSmlarkin if (maxp < pos) 758f3c0184aSmlarkin maxp = pos; 759f3c0184aSmlarkin } 760f3c0184aSmlarkin 761f3c0184aSmlarkin /* Zero out BSS. */ 762f3c0184aSmlarkin if (IS_BSS(phdr[i]) && (flags & LOAD_BSS)) { 7635d69b7aaSstefan mbzero((phdr[i].p_paddr + phdr[i].p_filesz), 764f3c0184aSmlarkin phdr[i].p_memsz - phdr[i].p_filesz); 765f3c0184aSmlarkin } 766f3c0184aSmlarkin if (IS_BSS(phdr[i]) && (flags & (LOAD_BSS|COUNT_BSS))) { 767f3c0184aSmlarkin pos += phdr[i].p_memsz - phdr[i].p_filesz; 768f3c0184aSmlarkin if (maxp < pos) 769f3c0184aSmlarkin maxp = pos; 770f3c0184aSmlarkin } 771f3c0184aSmlarkin } 772f3c0184aSmlarkin free(phdr); 773f3c0184aSmlarkin 774f3c0184aSmlarkin /* 775f3c0184aSmlarkin * Copy the ELF and section headers. 776f3c0184aSmlarkin */ 777f3c0184aSmlarkin elfp = maxp = roundup(maxp, sizeof(Elf64_Addr)); 778f3c0184aSmlarkin if (flags & (LOAD_HDR | COUNT_HDR)) 779f3c0184aSmlarkin maxp += sizeof(Elf64_Ehdr); 780f3c0184aSmlarkin 781f3c0184aSmlarkin if (flags & (LOAD_SYM | COUNT_SYM)) { 78245bdc46fSdv if (gzseek(fp, (off_t)elf->e_shoff, SEEK_SET) == -1) { 78345bdc46fSdv warn("gzseek section headers"); 784f3c0184aSmlarkin return 1; 785f3c0184aSmlarkin } 786f3c0184aSmlarkin sz = elf->e_shnum * sizeof(Elf64_Shdr); 787f3c0184aSmlarkin shp = malloc(sz); 788f3c0184aSmlarkin 78945bdc46fSdv if ((size_t)gzread(fp, shp, sz) != sz) { 790f3c0184aSmlarkin free(shp); 791f3c0184aSmlarkin return 1; 792f3c0184aSmlarkin } 793f3c0184aSmlarkin 794f3c0184aSmlarkin shpp = maxp; 795f3c0184aSmlarkin maxp += roundup(sz, sizeof(Elf64_Addr)); 796f3c0184aSmlarkin 79782fb2f0aSreyk size_t shstrsz = shp[elf->e_shstrndx].sh_size; 798f3c0184aSmlarkin char *shstr = malloc(shstrsz); 79945bdc46fSdv if (gzseek(fp, (off_t)shp[elf->e_shstrndx].sh_offset, 800f3c0184aSmlarkin SEEK_SET) == -1) { 801f3c0184aSmlarkin free(shstr); 802f3c0184aSmlarkin free(shp); 803f3c0184aSmlarkin return 1; 804f3c0184aSmlarkin } 80545bdc46fSdv if ((size_t)gzread(fp, shstr, shstrsz) != shstrsz) { 806f3c0184aSmlarkin free(shstr); 807f3c0184aSmlarkin free(shp); 808f3c0184aSmlarkin return 1; 809f3c0184aSmlarkin } 810f3c0184aSmlarkin 811f3c0184aSmlarkin /* 812f3c0184aSmlarkin * Now load the symbol sections themselves. Make sure the 813f3c0184aSmlarkin * sections are aligned. Don't bother with string tables if 814f3c0184aSmlarkin * there are no symbol sections. 815f3c0184aSmlarkin */ 816f3c0184aSmlarkin off = roundup((sizeof(Elf64_Ehdr) + sz), sizeof(Elf64_Addr)); 817f3c0184aSmlarkin 818174697beSvisa for (havesyms = i = 0; i < elf->e_shnum; i++) 819f3c0184aSmlarkin if (shp[i].sh_type == SHT_SYMTAB) 820f3c0184aSmlarkin havesyms = 1; 821f3c0184aSmlarkin 82298142e86Sclaudio for (i = 0; i < elf->e_shnum; i++) { 823f3c0184aSmlarkin if (shp[i].sh_type == SHT_SYMTAB || 824f3c0184aSmlarkin shp[i].sh_type == SHT_STRTAB || 82559027026Sjasper !strcmp(shstr + shp[i].sh_name, ".debug_line") || 82672123b7dSjasper !strcmp(shstr + shp[i].sh_name, ELF_CTF)) { 827f3c0184aSmlarkin if (havesyms && (flags & LOAD_SYM)) { 82845bdc46fSdv if (gzseek(fp, (off_t)shp[i].sh_offset, 829f3c0184aSmlarkin SEEK_SET) == -1) { 830f3c0184aSmlarkin free(shstr); 831f3c0184aSmlarkin free(shp); 832f3c0184aSmlarkin return 1; 833f3c0184aSmlarkin } 83482fb2f0aSreyk if (mread(fp, maxp, 835f3c0184aSmlarkin shp[i].sh_size) != shp[i].sh_size) { 836f3c0184aSmlarkin free(shstr); 837f3c0184aSmlarkin free(shp); 838f3c0184aSmlarkin return 1; 839f3c0184aSmlarkin } 840f3c0184aSmlarkin } 841f3c0184aSmlarkin maxp += roundup(shp[i].sh_size, 842f3c0184aSmlarkin sizeof(Elf64_Addr)); 843f3c0184aSmlarkin shp[i].sh_offset = off; 844f3c0184aSmlarkin shp[i].sh_flags |= SHF_ALLOC; 845f3c0184aSmlarkin off += roundup(shp[i].sh_size, 846f3c0184aSmlarkin sizeof(Elf64_Addr)); 847f3c0184aSmlarkin } 848f3c0184aSmlarkin } 849f3c0184aSmlarkin if (flags & LOAD_SYM) { 8507cc0a091Sstefan mbcopy(shp, shpp, sz); 851f3c0184aSmlarkin } 852f3c0184aSmlarkin free(shstr); 853f3c0184aSmlarkin free(shp); 854f3c0184aSmlarkin } 855f3c0184aSmlarkin 856f3c0184aSmlarkin /* 857f3c0184aSmlarkin * Frob the copied ELF header to give information relative 858f3c0184aSmlarkin * to elfp. 859f3c0184aSmlarkin */ 860f3c0184aSmlarkin if (flags & LOAD_HDR) { 861f3c0184aSmlarkin elf->e_phoff = 0; 862f3c0184aSmlarkin elf->e_shoff = sizeof(Elf64_Ehdr); 863f3c0184aSmlarkin elf->e_phentsize = 0; 864f3c0184aSmlarkin elf->e_phnum = 0; 8657cc0a091Sstefan mbcopy(elf, elfp, sizeof(*elf)); 866f3c0184aSmlarkin } 867f3c0184aSmlarkin 868f3c0184aSmlarkin marks[MARK_START] = LOADADDR(minp); 869f3c0184aSmlarkin marks[MARK_ENTRY] = LOADADDR(elf->e_entry); 870f3c0184aSmlarkin marks[MARK_NSYM] = 1; /* XXX: Kernel needs >= 0 */ 871f3c0184aSmlarkin marks[MARK_SYM] = LOADADDR(elfp); 872f3c0184aSmlarkin marks[MARK_END] = LOADADDR(maxp); 873f3c0184aSmlarkin 874f3c0184aSmlarkin return 0; 875f3c0184aSmlarkin } 876f3c0184aSmlarkin 877f3c0184aSmlarkin /* 878f3c0184aSmlarkin * elf32_exec 879f3c0184aSmlarkin * 88045bdc46fSdv * Load the kernel indicated by 'fp' into the guest physical memory 881f3c0184aSmlarkin * space, at the addresses defined in the ELF header. 882f3c0184aSmlarkin * 883f3c0184aSmlarkin * This function is used for 32 bit kernels. 884f3c0184aSmlarkin * 885f3c0184aSmlarkin * Parameters: 88645bdc46fSdv * fp: kernel image file to load 887f3c0184aSmlarkin * elf: ELF header of the kernel 888f3c0184aSmlarkin * marks: array to store the offsets of various kernel structures 889f3c0184aSmlarkin * (start, bss, etc) 890f3c0184aSmlarkin * flags: flag value to indicate which section(s) to load (usually 891f3c0184aSmlarkin * LOAD_ALL) 892f3c0184aSmlarkin * 893f3c0184aSmlarkin * Return values: 894f3c0184aSmlarkin * 0 if successful 895f3c0184aSmlarkin * 1 if unsuccessful 896f3c0184aSmlarkin */ 897b275a5ddSmlarkin static int 89845bdc46fSdv elf32_exec(gzFile fp, Elf32_Ehdr *elf, u_long *marks, int flags) 899f3c0184aSmlarkin { 900f3c0184aSmlarkin Elf32_Shdr *shp; 901f3c0184aSmlarkin Elf32_Phdr *phdr; 902f3c0184aSmlarkin Elf32_Off off; 903f3c0184aSmlarkin int i; 90482fb2f0aSreyk size_t sz; 905174697beSvisa int havesyms; 906f3c0184aSmlarkin paddr_t minp = ~0, maxp = 0, pos = 0; 907f3c0184aSmlarkin paddr_t offset = marks[MARK_START], shpp, elfp; 908f3c0184aSmlarkin 909f3c0184aSmlarkin sz = elf->e_phnum * sizeof(Elf32_Phdr); 910f3c0184aSmlarkin phdr = malloc(sz); 911f3c0184aSmlarkin 91245bdc46fSdv if (gzseek(fp, (off_t)elf->e_phoff, SEEK_SET) == -1) { 913f3c0184aSmlarkin free(phdr); 914f3c0184aSmlarkin return 1; 915f3c0184aSmlarkin } 916f3c0184aSmlarkin 91745bdc46fSdv if ((size_t)gzread(fp, phdr, sz) != sz) { 918f3c0184aSmlarkin free(phdr); 919f3c0184aSmlarkin return 1; 920f3c0184aSmlarkin } 921f3c0184aSmlarkin 92298142e86Sclaudio for (i = 0; i < elf->e_phnum; i++) { 923f3c0184aSmlarkin if (phdr[i].p_type == PT_OPENBSD_RANDOMIZE) { 924f3c0184aSmlarkin int m; 925f3c0184aSmlarkin 926f3c0184aSmlarkin /* Fill segment if asked for. */ 927f3c0184aSmlarkin if (flags & LOAD_RANDOM) { 928f3c0184aSmlarkin for (pos = 0; pos < phdr[i].p_filesz; 929f3c0184aSmlarkin pos += m) { 930f3c0184aSmlarkin m = phdr[i].p_filesz - pos; 931762ebf84Sreyk marc4random_buf(phdr[i].p_paddr + pos, 932762ebf84Sreyk m); 933f3c0184aSmlarkin } 934f3c0184aSmlarkin } 935f3c0184aSmlarkin if (flags & (LOAD_RANDOM | COUNT_RANDOM)) { 936f3c0184aSmlarkin marks[MARK_RANDOM] = LOADADDR(phdr[i].p_paddr); 937f3c0184aSmlarkin marks[MARK_ERANDOM] = 938f3c0184aSmlarkin marks[MARK_RANDOM] + phdr[i].p_filesz; 939f3c0184aSmlarkin } 940f3c0184aSmlarkin continue; 941f3c0184aSmlarkin } 942f3c0184aSmlarkin 943f3c0184aSmlarkin if (phdr[i].p_type != PT_LOAD || 944f3c0184aSmlarkin (phdr[i].p_flags & (PF_W|PF_R|PF_X)) == 0) 945f3c0184aSmlarkin continue; 946f3c0184aSmlarkin 947f3c0184aSmlarkin #define IS_TEXT(p) (p.p_flags & PF_X) 948f3c0184aSmlarkin #define IS_DATA(p) ((p.p_flags & PF_X) == 0) 949f3c0184aSmlarkin #define IS_BSS(p) (p.p_filesz < p.p_memsz) 950f3c0184aSmlarkin /* 951f3c0184aSmlarkin * XXX: Assume first address is lowest 952f3c0184aSmlarkin */ 953f3c0184aSmlarkin if ((IS_TEXT(phdr[i]) && (flags & LOAD_TEXT)) || 954f3c0184aSmlarkin (IS_DATA(phdr[i]) && (flags & LOAD_DATA))) { 955f3c0184aSmlarkin 956f3c0184aSmlarkin /* Read in segment. */ 95745bdc46fSdv if (gzseek(fp, (off_t)phdr[i].p_offset, 958762ebf84Sreyk SEEK_SET) == -1) { 959f3c0184aSmlarkin free(phdr); 960f3c0184aSmlarkin return 1; 961f3c0184aSmlarkin } 96282fb2f0aSreyk if (mread(fp, phdr[i].p_paddr, phdr[i].p_filesz) != 963f3c0184aSmlarkin phdr[i].p_filesz) { 964f3c0184aSmlarkin free(phdr); 965f3c0184aSmlarkin return 1; 966f3c0184aSmlarkin } 967f3c0184aSmlarkin } 968f3c0184aSmlarkin 969f3c0184aSmlarkin if ((IS_TEXT(phdr[i]) && (flags & (LOAD_TEXT | COUNT_TEXT))) || 970f3c0184aSmlarkin (IS_DATA(phdr[i]) && (flags & (LOAD_DATA | COUNT_TEXT)))) { 971f3c0184aSmlarkin pos = phdr[i].p_paddr; 972f3c0184aSmlarkin if (minp > pos) 973f3c0184aSmlarkin minp = pos; 974f3c0184aSmlarkin pos += phdr[i].p_filesz; 975f3c0184aSmlarkin if (maxp < pos) 976f3c0184aSmlarkin maxp = pos; 977f3c0184aSmlarkin } 978f3c0184aSmlarkin 979f3c0184aSmlarkin /* Zero out BSS. */ 980f3c0184aSmlarkin if (IS_BSS(phdr[i]) && (flags & LOAD_BSS)) { 981f3c0184aSmlarkin mbzero((phdr[i].p_paddr + phdr[i].p_filesz), 982f3c0184aSmlarkin phdr[i].p_memsz - phdr[i].p_filesz); 983f3c0184aSmlarkin } 984f3c0184aSmlarkin if (IS_BSS(phdr[i]) && (flags & (LOAD_BSS|COUNT_BSS))) { 985f3c0184aSmlarkin pos += phdr[i].p_memsz - phdr[i].p_filesz; 986f3c0184aSmlarkin if (maxp < pos) 987f3c0184aSmlarkin maxp = pos; 988f3c0184aSmlarkin } 989f3c0184aSmlarkin } 990f3c0184aSmlarkin free(phdr); 991f3c0184aSmlarkin 992f3c0184aSmlarkin /* 993f3c0184aSmlarkin * Copy the ELF and section headers. 994f3c0184aSmlarkin */ 995f3c0184aSmlarkin elfp = maxp = roundup(maxp, sizeof(Elf32_Addr)); 996f3c0184aSmlarkin if (flags & (LOAD_HDR | COUNT_HDR)) 997f3c0184aSmlarkin maxp += sizeof(Elf32_Ehdr); 998f3c0184aSmlarkin 999f3c0184aSmlarkin if (flags & (LOAD_SYM | COUNT_SYM)) { 100045bdc46fSdv if (gzseek(fp, (off_t)elf->e_shoff, SEEK_SET) == -1) { 100165d68140Sclaudio warn("lseek section headers"); 1002f3c0184aSmlarkin return 1; 1003f3c0184aSmlarkin } 1004f3c0184aSmlarkin sz = elf->e_shnum * sizeof(Elf32_Shdr); 1005f3c0184aSmlarkin shp = malloc(sz); 1006f3c0184aSmlarkin 100745bdc46fSdv if ((size_t)gzread(fp, shp, sz) != sz) { 1008f3c0184aSmlarkin free(shp); 1009f3c0184aSmlarkin return 1; 1010f3c0184aSmlarkin } 1011f3c0184aSmlarkin 1012f3c0184aSmlarkin shpp = maxp; 1013f3c0184aSmlarkin maxp += roundup(sz, sizeof(Elf32_Addr)); 1014f3c0184aSmlarkin 101582fb2f0aSreyk size_t shstrsz = shp[elf->e_shstrndx].sh_size; 1016f3c0184aSmlarkin char *shstr = malloc(shstrsz); 101745bdc46fSdv if (gzseek(fp, (off_t)shp[elf->e_shstrndx].sh_offset, 1018f3c0184aSmlarkin SEEK_SET) == -1) { 1019f3c0184aSmlarkin free(shstr); 1020f3c0184aSmlarkin free(shp); 1021f3c0184aSmlarkin return 1; 1022f3c0184aSmlarkin } 102345bdc46fSdv if ((size_t)gzread(fp, shstr, shstrsz) != shstrsz) { 1024f3c0184aSmlarkin free(shstr); 1025f3c0184aSmlarkin free(shp); 1026f3c0184aSmlarkin return 1; 1027f3c0184aSmlarkin } 1028f3c0184aSmlarkin 1029f3c0184aSmlarkin /* 1030f3c0184aSmlarkin * Now load the symbol sections themselves. Make sure the 1031f3c0184aSmlarkin * sections are aligned. Don't bother with string tables if 1032f3c0184aSmlarkin * there are no symbol sections. 1033f3c0184aSmlarkin */ 1034f3c0184aSmlarkin off = roundup((sizeof(Elf32_Ehdr) + sz), sizeof(Elf32_Addr)); 1035f3c0184aSmlarkin 1036174697beSvisa for (havesyms = i = 0; i < elf->e_shnum; i++) 1037f3c0184aSmlarkin if (shp[i].sh_type == SHT_SYMTAB) 1038f3c0184aSmlarkin havesyms = 1; 1039f3c0184aSmlarkin 104098142e86Sclaudio for (i = 0; i < elf->e_shnum; i++) { 1041f3c0184aSmlarkin if (shp[i].sh_type == SHT_SYMTAB || 1042f3c0184aSmlarkin shp[i].sh_type == SHT_STRTAB || 1043f3c0184aSmlarkin !strcmp(shstr + shp[i].sh_name, ".debug_line")) { 1044f3c0184aSmlarkin if (havesyms && (flags & LOAD_SYM)) { 104545bdc46fSdv if (gzseek(fp, (off_t)shp[i].sh_offset, 1046f3c0184aSmlarkin SEEK_SET) == -1) { 1047f3c0184aSmlarkin free(shstr); 1048f3c0184aSmlarkin free(shp); 1049f3c0184aSmlarkin return 1; 1050f3c0184aSmlarkin } 105182fb2f0aSreyk if (mread(fp, maxp, 1052f3c0184aSmlarkin shp[i].sh_size) != shp[i].sh_size) { 1053f3c0184aSmlarkin free(shstr); 1054f3c0184aSmlarkin free(shp); 1055f3c0184aSmlarkin return 1; 1056f3c0184aSmlarkin } 1057f3c0184aSmlarkin } 1058f3c0184aSmlarkin maxp += roundup(shp[i].sh_size, 1059f3c0184aSmlarkin sizeof(Elf32_Addr)); 1060f3c0184aSmlarkin shp[i].sh_offset = off; 1061f3c0184aSmlarkin shp[i].sh_flags |= SHF_ALLOC; 1062f3c0184aSmlarkin off += roundup(shp[i].sh_size, 1063f3c0184aSmlarkin sizeof(Elf32_Addr)); 1064f3c0184aSmlarkin } 1065f3c0184aSmlarkin } 1066f3c0184aSmlarkin if (flags & LOAD_SYM) { 10677cc0a091Sstefan mbcopy(shp, shpp, sz); 1068f3c0184aSmlarkin } 1069f3c0184aSmlarkin free(shstr); 1070f3c0184aSmlarkin free(shp); 1071f3c0184aSmlarkin } 1072f3c0184aSmlarkin 1073f3c0184aSmlarkin /* 1074f3c0184aSmlarkin * Frob the copied ELF header to give information relative 1075f3c0184aSmlarkin * to elfp. 1076f3c0184aSmlarkin */ 1077f3c0184aSmlarkin if (flags & LOAD_HDR) { 1078f3c0184aSmlarkin elf->e_phoff = 0; 1079f3c0184aSmlarkin elf->e_shoff = sizeof(Elf32_Ehdr); 1080f3c0184aSmlarkin elf->e_phentsize = 0; 1081f3c0184aSmlarkin elf->e_phnum = 0; 10827cc0a091Sstefan mbcopy(elf, elfp, sizeof(*elf)); 1083f3c0184aSmlarkin } 1084f3c0184aSmlarkin 1085f3c0184aSmlarkin marks[MARK_START] = LOADADDR(minp); 1086f3c0184aSmlarkin marks[MARK_ENTRY] = LOADADDR(elf->e_entry); 1087f3c0184aSmlarkin marks[MARK_NSYM] = 1; /* XXX: Kernel needs >= 0 */ 1088f3c0184aSmlarkin marks[MARK_SYM] = LOADADDR(elfp); 1089f3c0184aSmlarkin marks[MARK_END] = LOADADDR(maxp); 1090f3c0184aSmlarkin 1091f3c0184aSmlarkin return 0; 1092f3c0184aSmlarkin } 1093