1 /* $NetBSD: loadfile.c,v 1.10 2000/12/03 02:53:04 tsutsui Exp $ */ 2 /* $OpenBSD: loadfile_elf.c,v 1.50 2024/09/26 01:45:13 jsg Exp $ */ 3 4 /*- 5 * Copyright (c) 1997 The NetBSD Foundation, Inc. 6 * All rights reserved. 7 * 8 * This code is derived from software contributed to The NetBSD Foundation 9 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 10 * NASA Ames Research Center and by Christos Zoulas. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 23 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 24 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 25 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 * POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 /* 35 * Copyright (c) 1992, 1993 36 * The Regents of the University of California. All rights reserved. 37 * 38 * This code is derived from software contributed to Berkeley by 39 * Ralph Campbell. 40 * 41 * Redistribution and use in source and binary forms, with or without 42 * modification, are permitted provided that the following conditions 43 * are met: 44 * 1. Redistributions of source code must retain the above copyright 45 * notice, this list of conditions and the following disclaimer. 46 * 2. Redistributions in binary form must reproduce the above copyright 47 * notice, this list of conditions and the following disclaimer in the 48 * documentation and/or other materials provided with the distribution. 49 * 3. Neither the name of the University nor the names of its contributors 50 * may be used to endorse or promote products derived from this software 51 * without specific prior written permission. 52 * 53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 63 * SUCH DAMAGE. 64 * 65 * @(#)boot.c 8.1 (Berkeley) 6/10/93 66 */ 67 68 /* 69 * Copyright (c) 2015 Mike Larkin <mlarkin@openbsd.org> 70 * 71 * Permission to use, copy, modify, and distribute this software for any 72 * purpose with or without fee is hereby granted, provided that the above 73 * copyright notice and this permission notice appear in all copies. 74 * 75 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 76 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 77 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 78 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 79 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 80 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 81 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 82 */ 83 84 #include <sys/param.h> /* PAGE_SIZE PAGE_MASK roundup */ 85 #include <sys/reboot.h> 86 #include <sys/exec.h> 87 88 #include <elf.h> 89 #include <string.h> 90 #include <errno.h> 91 #include <stdlib.h> 92 #include <unistd.h> 93 #include <err.h> 94 95 #include <dev/vmm/vmm.h> 96 97 #include <machine/biosvar.h> 98 #include <machine/segments.h> 99 #include <machine/specialreg.h> 100 #include <machine/pte.h> 101 102 #include "loadfile.h" 103 #include "vmd.h" 104 105 #define LOADADDR(a) ((((u_long)(a)) + offset)&0xfffffff) 106 107 union { 108 Elf32_Ehdr elf32; 109 Elf64_Ehdr elf64; 110 } hdr; 111 112 static void setsegment(struct mem_segment_descriptor *, uint32_t, 113 size_t, int, int, int, int); 114 static int elf32_exec(gzFile, Elf32_Ehdr *, u_long *, int); 115 static int elf64_exec(gzFile, Elf64_Ehdr *, u_long *, int); 116 static size_t create_bios_memmap(struct vm_create_params *, bios_memmap_t *); 117 static uint32_t push_bootargs(bios_memmap_t *, size_t, bios_bootmac_t *); 118 static size_t push_stack(uint32_t, uint32_t); 119 static void push_gdt(void); 120 static void push_pt_32(void); 121 static void push_pt_64(void); 122 static void marc4random_buf(paddr_t, int); 123 static void mbzero(paddr_t, int); 124 static void mbcopy(void *, paddr_t, int); 125 126 extern char *__progname; 127 extern int vm_id; 128 129 uint64_t pg_crypt = 0; 130 131 /* 132 * setsegment 133 * 134 * Initializes a segment selector entry with the provided descriptor. 135 * For the purposes of the bootloader mimiced by vmd(8), we only need 136 * memory-type segment descriptor support. 137 * 138 * This function was copied from machdep.c 139 * 140 * Parameters: 141 * sd: Address of the entry to initialize 142 * base: base of the segment 143 * limit: limit of the segment 144 * type: type of the segment 145 * dpl: privilege level of the egment 146 * def32: default 16/32 bit size of the segment 147 * gran: granularity of the segment (byte/page) 148 */ 149 static void 150 setsegment(struct mem_segment_descriptor *sd, uint32_t base, size_t limit, 151 int type, int dpl, int def32, int gran) 152 { 153 sd->sd_lolimit = (int)limit; 154 sd->sd_lobase = (int)base; 155 sd->sd_type = type; 156 sd->sd_dpl = dpl; 157 sd->sd_p = 1; 158 sd->sd_hilimit = (int)limit >> 16; 159 sd->sd_avl = 0; 160 sd->sd_long = 0; 161 sd->sd_def32 = def32; 162 sd->sd_gran = gran; 163 sd->sd_hibase = (int)base >> 24; 164 } 165 166 /* 167 * push_gdt 168 * 169 * Allocates and populates a page in the guest phys memory space to hold 170 * the boot-time GDT. Since vmd(8) is acting as the bootloader, we need to 171 * create the same GDT that a real bootloader would have created. 172 * This is loaded into the guest phys RAM space at address GDT_PAGE. 173 */ 174 static void 175 push_gdt(void) 176 { 177 uint8_t gdtpage[PAGE_SIZE]; 178 struct mem_segment_descriptor *sd; 179 180 memset(&gdtpage, 0, sizeof(gdtpage)); 181 182 sd = (struct mem_segment_descriptor *)&gdtpage; 183 184 /* 185 * Create three segment descriptors: 186 * 187 * GDT[0] : null descriptor. "Created" via memset above. 188 * GDT[1] (selector @ 0x8): Executable segment, for CS 189 * GDT[2] (selector @ 0x10): RW Data segment, for DS/ES/SS 190 */ 191 setsegment(&sd[1], 0, 0xffffffff, SDT_MEMERA, SEL_KPL, 1, 1); 192 setsegment(&sd[2], 0, 0xffffffff, SDT_MEMRWA, SEL_KPL, 1, 1); 193 194 write_mem(GDT_PAGE, gdtpage, PAGE_SIZE); 195 sev_register_encryption(GDT_PAGE, PAGE_SIZE); 196 } 197 198 /* 199 * push_pt_32 200 * 201 * Create an identity-mapped page directory hierarchy mapping the first 202 * 4GB of physical memory. This is used during bootstrapping i386 VMs on 203 * CPUs without unrestricted guest capability. 204 */ 205 static void 206 push_pt_32(void) 207 { 208 uint32_t ptes[1024], i; 209 210 memset(ptes, 0, sizeof(ptes)); 211 for (i = 0 ; i < 1024; i++) { 212 ptes[i] = PG_V | PG_RW | PG_u | PG_PS | ((4096 * 1024) * i); 213 } 214 write_mem(PML3_PAGE, ptes, PAGE_SIZE); 215 } 216 217 /* 218 * push_pt_64 219 * 220 * Create an identity-mapped page directory hierarchy mapping the first 221 * 1GB of physical memory. This is used during bootstrapping 64 bit VMs on 222 * CPUs without unrestricted guest capability. 223 */ 224 static void 225 push_pt_64(void) 226 { 227 uint64_t ptes[512], i; 228 229 /* PDPDE0 - first 1GB */ 230 memset(ptes, 0, sizeof(ptes)); 231 ptes[0] = pg_crypt | PG_V | PML3_PAGE; 232 write_mem(PML4_PAGE, ptes, PAGE_SIZE); 233 sev_register_encryption(PML4_PAGE, PAGE_SIZE); 234 235 /* PDE0 - first 1GB */ 236 memset(ptes, 0, sizeof(ptes)); 237 ptes[0] = pg_crypt | PG_V | PG_RW | PG_u | PML2_PAGE; 238 write_mem(PML3_PAGE, ptes, PAGE_SIZE); 239 sev_register_encryption(PML3_PAGE, PAGE_SIZE); 240 241 /* First 1GB (in 2MB pages) */ 242 memset(ptes, 0, sizeof(ptes)); 243 for (i = 0 ; i < 512; i++) { 244 ptes[i] = pg_crypt | PG_V | PG_RW | PG_u | PG_PS | 245 ((2048 * 1024) * i); 246 } 247 write_mem(PML2_PAGE, ptes, PAGE_SIZE); 248 sev_register_encryption(PML2_PAGE, PAGE_SIZE); 249 } 250 251 /* 252 * loadfile_elf 253 * 254 * Loads an ELF kernel to its defined load address in the guest VM. 255 * The kernel is loaded to its defined start point as set in the ELF header. 256 * 257 * Parameters: 258 * fp: file of a kernel file to load 259 * vcp: the VM create parameters, holding the exact memory map 260 * (out) vrs: register state to set on init for this kernel 261 * bootdev: the optional non-default boot device 262 * howto: optional boot flags for the kernel 263 * 264 * Return values: 265 * 0 if successful 266 * various error codes returned from gzread(3) or loadelf functions 267 */ 268 int 269 loadfile_elf(gzFile fp, struct vmd_vm *vm, struct vcpu_reg_state *vrs, 270 unsigned int bootdevice) 271 { 272 int r, is_i386 = 0; 273 uint32_t bootargsz; 274 size_t n, stacksize; 275 u_long marks[MARK_MAX]; 276 bios_memmap_t memmap[VMM_MAX_MEM_RANGES + 1]; 277 bios_bootmac_t bm, *bootmac = NULL; 278 struct vm_create_params *vcp = &vm->vm_params.vmc_params; 279 280 if ((r = gzread(fp, &hdr, sizeof(hdr))) != sizeof(hdr)) 281 return 1; 282 283 memset(&marks, 0, sizeof(marks)); 284 if (memcmp(hdr.elf32.e_ident, ELFMAG, SELFMAG) == 0 && 285 hdr.elf32.e_ident[EI_CLASS] == ELFCLASS32) { 286 r = elf32_exec(fp, &hdr.elf32, marks, LOAD_ALL); 287 is_i386 = 1; 288 } else if (memcmp(hdr.elf64.e_ident, ELFMAG, SELFMAG) == 0 && 289 hdr.elf64.e_ident[EI_CLASS] == ELFCLASS64) { 290 r = elf64_exec(fp, &hdr.elf64, marks, LOAD_ALL); 291 } else 292 errno = ENOEXEC; 293 294 if (r) 295 return (r); 296 297 push_gdt(); 298 299 if (is_i386) { 300 push_pt_32(); 301 /* Reconfigure the default flat-64 register set for 32 bit */ 302 vrs->vrs_crs[VCPU_REGS_CR3] = PML3_PAGE; 303 vrs->vrs_crs[VCPU_REGS_CR4] = CR4_PSE; 304 vrs->vrs_msrs[VCPU_REGS_EFER] = 0ULL; 305 } 306 else { 307 if (vcp->vcp_sev) { 308 if (vcp->vcp_poscbit == 0) { 309 log_warnx("SEV enabled but no C-bit reported"); 310 return 1; 311 } 312 pg_crypt = (1ULL << vcp->vcp_poscbit); 313 log_debug("%s: poscbit %d pg_crypt 0x%016llx", 314 __func__, vcp->vcp_poscbit, pg_crypt); 315 } 316 push_pt_64(); 317 } 318 319 if (bootdevice == VMBOOTDEV_NET) { 320 bootmac = &bm; 321 memcpy(bootmac, vm->vm_params.vmc_macs[0], ETHER_ADDR_LEN); 322 } 323 n = create_bios_memmap(vcp, memmap); 324 bootargsz = push_bootargs(memmap, n, bootmac); 325 stacksize = push_stack(bootargsz, marks[MARK_END]); 326 327 vrs->vrs_gprs[VCPU_REGS_RIP] = (uint64_t)marks[MARK_ENTRY]; 328 vrs->vrs_gprs[VCPU_REGS_RSP] = (uint64_t)(STACK_PAGE + PAGE_SIZE) - stacksize; 329 vrs->vrs_gdtr.vsi_base = GDT_PAGE; 330 331 log_debug("%s: loaded ELF kernel", __func__); 332 333 return (0); 334 } 335 336 /* 337 * create_bios_memmap 338 * 339 * Construct a memory map as returned by the BIOS INT 0x15, e820 routine. 340 * 341 * Parameters: 342 * vcp: the VM create parameters, containing the memory map passed to vmm(4) 343 * memmap (out): the BIOS memory map 344 * 345 * Return values: 346 * Number of bios_memmap_t entries, including the terminating nul-entry. 347 */ 348 static size_t 349 create_bios_memmap(struct vm_create_params *vcp, bios_memmap_t *memmap) 350 { 351 size_t i, n = 0; 352 struct vm_mem_range *vmr; 353 354 for (i = 0; i < vcp->vcp_nmemranges; i++, n++) { 355 vmr = &vcp->vcp_memranges[i]; 356 memmap[n].addr = vmr->vmr_gpa; 357 memmap[n].size = vmr->vmr_size; 358 if (vmr->vmr_type == VM_MEM_RAM) 359 memmap[n].type = BIOS_MAP_FREE; 360 else 361 memmap[n].type = BIOS_MAP_RES; 362 } 363 364 /* Null mem map entry to denote the end of the ranges */ 365 memmap[n].addr = 0x0; 366 memmap[n].size = 0x0; 367 memmap[n].type = BIOS_MAP_END; 368 n++; 369 370 return (n); 371 } 372 373 /* 374 * push_bootargs 375 * 376 * Creates the boot arguments page in the guest address space. 377 * Since vmd(8) is acting as the bootloader, we need to create the same boot 378 * arguments page that a real bootloader would have created. This is loaded 379 * into the guest phys RAM space at address BOOTARGS_PAGE. 380 * 381 * Parameters: 382 * memmap: the BIOS memory map 383 * n: number of entries in memmap 384 * bootmac: optional PXE boot MAC address 385 * 386 * Return values: 387 * The size of the bootargs in bytes 388 */ 389 static uint32_t 390 push_bootargs(bios_memmap_t *memmap, size_t n, bios_bootmac_t *bootmac) 391 { 392 uint32_t memmap_sz, consdev_sz, bootmac_sz, i; 393 bios_consdev_t consdev; 394 uint32_t ba[1024]; 395 396 memmap_sz = 3 * sizeof(uint32_t) + n * sizeof(bios_memmap_t); 397 ba[0] = BOOTARG_MEMMAP; 398 ba[1] = memmap_sz; 399 ba[2] = memmap_sz; 400 memcpy(&ba[3], memmap, n * sizeof(bios_memmap_t)); 401 i = memmap_sz / sizeof(uint32_t); 402 403 /* Serial console device, COM1 @ 0x3f8 */ 404 memset(&consdev, 0, sizeof(consdev)); 405 consdev.consdev = makedev(8, 0); 406 consdev.conspeed = 115200; 407 consdev.consaddr = 0x3f8; 408 409 consdev_sz = 3 * sizeof(uint32_t) + sizeof(bios_consdev_t); 410 ba[i] = BOOTARG_CONSDEV; 411 ba[i + 1] = consdev_sz; 412 ba[i + 2] = consdev_sz; 413 memcpy(&ba[i + 3], &consdev, sizeof(bios_consdev_t)); 414 i += consdev_sz / sizeof(uint32_t); 415 416 if (bootmac) { 417 bootmac_sz = 3 * sizeof(uint32_t) + 418 (sizeof(bios_bootmac_t) + 3) & ~3; 419 ba[i] = BOOTARG_BOOTMAC; 420 ba[i + 1] = bootmac_sz; 421 ba[i + 2] = bootmac_sz; 422 memcpy(&ba[i + 3], bootmac, sizeof(bios_bootmac_t)); 423 i += bootmac_sz / sizeof(uint32_t); 424 } 425 426 ba[i++] = 0xFFFFFFFF; /* BOOTARG_END */ 427 428 write_mem(BOOTARGS_PAGE, ba, PAGE_SIZE); 429 sev_register_encryption(BOOTARGS_PAGE, PAGE_SIZE); 430 431 return (i * sizeof(uint32_t)); 432 } 433 434 /* 435 * push_stack 436 * 437 * Creates the boot stack page in the guest address space. When using a real 438 * bootloader, the stack will be prepared using the following format before 439 * transitioning to kernel start, so vmd(8) needs to mimic the same stack 440 * layout. The stack content is pushed to the guest phys RAM at address 441 * STACK_PAGE. The bootloader operates in 32 bit mode; each stack entry is 442 * 4 bytes. 443 * 444 * Stack Layout: (TOS == Top Of Stack) 445 * TOS location of boot arguments page 446 * TOS - 0x4 size of the content in the boot arguments page 447 * TOS - 0x8 size of low memory (biosbasemem: kernel uses BIOS map only if 0) 448 * TOS - 0xc size of high memory (biosextmem, not used by kernel at all) 449 * TOS - 0x10 kernel 'end' symbol value 450 * TOS - 0x14 version of bootarg API 451 * 452 * Parameters: 453 * bootargsz: size of boot arguments 454 * end: kernel 'end' symbol value 455 * bootdev: the optional non-default boot device 456 * howto: optional boot flags for the kernel 457 * 458 * Return values: 459 * size of the stack 460 */ 461 static size_t 462 push_stack(uint32_t bootargsz, uint32_t end) 463 { 464 uint32_t stack[1024]; 465 uint16_t loc; 466 467 memset(&stack, 0, sizeof(stack)); 468 loc = 1024; 469 470 stack[--loc] = BOOTARGS_PAGE; 471 stack[--loc] = bootargsz; 472 stack[--loc] = 0; /* biosbasemem */ 473 stack[--loc] = 0; /* biosextmem */ 474 stack[--loc] = end; 475 stack[--loc] = 0x0e; 476 stack[--loc] = MAKEBOOTDEV(0x4, 0, 0, 0, 0); /* bootdev: sd0a */ 477 stack[--loc] = 0; 478 479 write_mem(STACK_PAGE, &stack, PAGE_SIZE); 480 sev_register_encryption(STACK_PAGE, PAGE_SIZE); 481 482 return (1024 - (loc - 1)) * sizeof(uint32_t); 483 } 484 485 /* 486 * mread 487 * 488 * Reads 'sz' bytes from the file whose descriptor is provided in 'fd' 489 * into the guest address space at paddr 'addr'. 490 * 491 * Parameters: 492 * fp: kernel image file to read from. 493 * addr: guest paddr_t to load to 494 * sz: number of bytes to load 495 * 496 * Return values: 497 * returns 'sz' if successful, or 0 otherwise. 498 */ 499 size_t 500 mread(gzFile fp, paddr_t addr, size_t sz) 501 { 502 const char *errstr = NULL; 503 int errnum = 0; 504 size_t ct; 505 size_t i, osz; 506 char buf[PAGE_SIZE]; 507 508 sev_register_encryption(addr, sz); 509 510 /* 511 * break up the 'sz' bytes into PAGE_SIZE chunks for use with 512 * write_mem 513 */ 514 ct = 0; 515 osz = sz; 516 if ((addr & PAGE_MASK) != 0) { 517 memset(buf, 0, sizeof(buf)); 518 if (sz > PAGE_SIZE) 519 ct = PAGE_SIZE - (addr & PAGE_MASK); 520 else 521 ct = sz; 522 523 if ((size_t)gzread(fp, buf, ct) != ct) { 524 errstr = gzerror(fp, &errnum); 525 if (errnum == Z_ERRNO) 526 errnum = errno; 527 log_warnx("%s: error %d in mread, %s", __progname, 528 errnum, errstr); 529 return (0); 530 } 531 532 if (write_mem(addr, buf, ct)) 533 return (0); 534 535 addr += ct; 536 } 537 538 sz = sz - ct; 539 540 if (sz == 0) 541 return (osz); 542 543 for (i = 0; i < sz; i += PAGE_SIZE, addr += PAGE_SIZE) { 544 memset(buf, 0, sizeof(buf)); 545 if (i + PAGE_SIZE > sz) 546 ct = sz - i; 547 else 548 ct = PAGE_SIZE; 549 550 if ((size_t)gzread(fp, buf, ct) != ct) { 551 errstr = gzerror(fp, &errnum); 552 if (errnum == Z_ERRNO) 553 errnum = errno; 554 log_warnx("%s: error %d in mread, %s", __progname, 555 errnum, errstr); 556 return (0); 557 } 558 559 if (write_mem(addr, buf, ct)) 560 return (0); 561 } 562 563 return (osz); 564 } 565 566 /* 567 * marc4random_buf 568 * 569 * load 'sz' bytes of random data into the guest address space at paddr 570 * 'addr'. 571 * 572 * Parameters: 573 * addr: guest paddr_t to load random bytes into 574 * sz: number of random bytes to load 575 * 576 * Return values: 577 * nothing 578 */ 579 static void 580 marc4random_buf(paddr_t addr, int sz) 581 { 582 int i, ct; 583 char buf[PAGE_SIZE]; 584 585 sev_register_encryption(addr, sz); 586 587 /* 588 * break up the 'sz' bytes into PAGE_SIZE chunks for use with 589 * write_mem 590 */ 591 ct = 0; 592 if (addr % PAGE_SIZE != 0) { 593 memset(buf, 0, sizeof(buf)); 594 ct = PAGE_SIZE - (addr % PAGE_SIZE); 595 596 arc4random_buf(buf, ct); 597 598 if (write_mem(addr, buf, ct)) 599 return; 600 601 addr += ct; 602 } 603 604 for (i = 0; i < sz; i+= PAGE_SIZE, addr += PAGE_SIZE) { 605 memset(buf, 0, sizeof(buf)); 606 if (i + PAGE_SIZE > sz) 607 ct = sz - i; 608 else 609 ct = PAGE_SIZE; 610 611 arc4random_buf(buf, ct); 612 613 if (write_mem(addr, buf, ct)) 614 return; 615 } 616 } 617 618 /* 619 * mbzero 620 * 621 * load 'sz' bytes of zeros into the guest address space at paddr 622 * 'addr'. 623 * 624 * Parameters: 625 * addr: guest paddr_t to zero 626 * sz: number of zero bytes to store 627 * 628 * Return values: 629 * nothing 630 */ 631 static void 632 mbzero(paddr_t addr, int sz) 633 { 634 if (write_mem(addr, NULL, sz)) 635 return; 636 sev_register_encryption(addr, sz); 637 } 638 639 /* 640 * mbcopy 641 * 642 * copies 'sz' bytes from buffer 'src' to guest paddr 'dst'. 643 * 644 * Parameters: 645 * src: source buffer to copy from 646 * dst: destination guest paddr_t to copy to 647 * sz: number of bytes to copy 648 * 649 * Return values: 650 * nothing 651 */ 652 static void 653 mbcopy(void *src, paddr_t dst, int sz) 654 { 655 write_mem(dst, src, sz); 656 sev_register_encryption(dst, sz); 657 } 658 659 /* 660 * elf64_exec 661 * 662 * Load the kernel indicated by 'fp' into the guest physical memory 663 * space, at the addresses defined in the ELF header. 664 * 665 * This function is used for 64 bit kernels. 666 * 667 * Parameters: 668 * fp: kernel image file to load 669 * elf: ELF header of the kernel 670 * marks: array to store the offsets of various kernel structures 671 * (start, bss, etc) 672 * flags: flag value to indicate which section(s) to load (usually 673 * LOAD_ALL) 674 * 675 * Return values: 676 * 0 if successful 677 * 1 if unsuccessful 678 */ 679 static int 680 elf64_exec(gzFile fp, Elf64_Ehdr *elf, u_long *marks, int flags) 681 { 682 Elf64_Shdr *shp; 683 Elf64_Phdr *phdr; 684 Elf64_Off off; 685 int i; 686 size_t sz; 687 int havesyms; 688 paddr_t minp = ~0, maxp = 0, pos = 0; 689 paddr_t offset = marks[MARK_START], shpp, elfp; 690 691 sz = elf->e_phnum * sizeof(Elf64_Phdr); 692 phdr = malloc(sz); 693 694 if (gzseek(fp, (off_t)elf->e_phoff, SEEK_SET) == -1) { 695 free(phdr); 696 return 1; 697 } 698 699 if ((size_t)gzread(fp, phdr, sz) != sz) { 700 free(phdr); 701 return 1; 702 } 703 704 for (i = 0; i < elf->e_phnum; i++) { 705 if (phdr[i].p_type == PT_OPENBSD_RANDOMIZE) { 706 int m; 707 708 /* Fill segment if asked for. */ 709 if (flags & LOAD_RANDOM) { 710 for (pos = 0; pos < phdr[i].p_filesz; 711 pos += m) { 712 m = phdr[i].p_filesz - pos; 713 marc4random_buf(phdr[i].p_paddr + pos, 714 m); 715 } 716 } 717 if (flags & (LOAD_RANDOM | COUNT_RANDOM)) { 718 marks[MARK_RANDOM] = LOADADDR(phdr[i].p_paddr); 719 marks[MARK_ERANDOM] = 720 marks[MARK_RANDOM] + phdr[i].p_filesz; 721 } 722 continue; 723 } 724 725 if (phdr[i].p_type != PT_LOAD || 726 (phdr[i].p_flags & (PF_W|PF_R|PF_X)) == 0) 727 continue; 728 729 #define IS_TEXT(p) (p.p_flags & PF_X) 730 #define IS_DATA(p) ((p.p_flags & PF_X) == 0) 731 #define IS_BSS(p) (p.p_filesz < p.p_memsz) 732 /* 733 * XXX: Assume first address is lowest 734 */ 735 if ((IS_TEXT(phdr[i]) && (flags & LOAD_TEXT)) || 736 (IS_DATA(phdr[i]) && (flags & LOAD_DATA))) { 737 738 /* Read in segment. */ 739 if (gzseek(fp, (off_t)phdr[i].p_offset, 740 SEEK_SET) == -1) { 741 free(phdr); 742 return 1; 743 } 744 if (mread(fp, phdr[i].p_paddr, phdr[i].p_filesz) != 745 phdr[i].p_filesz) { 746 free(phdr); 747 return 1; 748 } 749 } 750 751 if ((IS_TEXT(phdr[i]) && (flags & (LOAD_TEXT | COUNT_TEXT))) || 752 (IS_DATA(phdr[i]) && (flags & (LOAD_DATA | COUNT_TEXT)))) { 753 pos = phdr[i].p_paddr; 754 if (minp > pos) 755 minp = pos; 756 pos += phdr[i].p_filesz; 757 if (maxp < pos) 758 maxp = pos; 759 } 760 761 /* Zero out BSS. */ 762 if (IS_BSS(phdr[i]) && (flags & LOAD_BSS)) { 763 mbzero((phdr[i].p_paddr + phdr[i].p_filesz), 764 phdr[i].p_memsz - phdr[i].p_filesz); 765 } 766 if (IS_BSS(phdr[i]) && (flags & (LOAD_BSS|COUNT_BSS))) { 767 pos += phdr[i].p_memsz - phdr[i].p_filesz; 768 if (maxp < pos) 769 maxp = pos; 770 } 771 } 772 free(phdr); 773 774 /* 775 * Copy the ELF and section headers. 776 */ 777 elfp = maxp = roundup(maxp, sizeof(Elf64_Addr)); 778 if (flags & (LOAD_HDR | COUNT_HDR)) 779 maxp += sizeof(Elf64_Ehdr); 780 781 if (flags & (LOAD_SYM | COUNT_SYM)) { 782 if (gzseek(fp, (off_t)elf->e_shoff, SEEK_SET) == -1) { 783 warn("gzseek section headers"); 784 return 1; 785 } 786 sz = elf->e_shnum * sizeof(Elf64_Shdr); 787 shp = malloc(sz); 788 789 if ((size_t)gzread(fp, shp, sz) != sz) { 790 free(shp); 791 return 1; 792 } 793 794 shpp = maxp; 795 maxp += roundup(sz, sizeof(Elf64_Addr)); 796 797 size_t shstrsz = shp[elf->e_shstrndx].sh_size; 798 char *shstr = malloc(shstrsz); 799 if (gzseek(fp, (off_t)shp[elf->e_shstrndx].sh_offset, 800 SEEK_SET) == -1) { 801 free(shstr); 802 free(shp); 803 return 1; 804 } 805 if ((size_t)gzread(fp, shstr, shstrsz) != shstrsz) { 806 free(shstr); 807 free(shp); 808 return 1; 809 } 810 811 /* 812 * Now load the symbol sections themselves. Make sure the 813 * sections are aligned. Don't bother with string tables if 814 * there are no symbol sections. 815 */ 816 off = roundup((sizeof(Elf64_Ehdr) + sz), sizeof(Elf64_Addr)); 817 818 for (havesyms = i = 0; i < elf->e_shnum; i++) 819 if (shp[i].sh_type == SHT_SYMTAB) 820 havesyms = 1; 821 822 for (i = 0; i < elf->e_shnum; i++) { 823 if (shp[i].sh_type == SHT_SYMTAB || 824 shp[i].sh_type == SHT_STRTAB || 825 !strcmp(shstr + shp[i].sh_name, ".debug_line") || 826 !strcmp(shstr + shp[i].sh_name, ELF_CTF)) { 827 if (havesyms && (flags & LOAD_SYM)) { 828 if (gzseek(fp, (off_t)shp[i].sh_offset, 829 SEEK_SET) == -1) { 830 free(shstr); 831 free(shp); 832 return 1; 833 } 834 if (mread(fp, maxp, 835 shp[i].sh_size) != shp[i].sh_size) { 836 free(shstr); 837 free(shp); 838 return 1; 839 } 840 } 841 maxp += roundup(shp[i].sh_size, 842 sizeof(Elf64_Addr)); 843 shp[i].sh_offset = off; 844 shp[i].sh_flags |= SHF_ALLOC; 845 off += roundup(shp[i].sh_size, 846 sizeof(Elf64_Addr)); 847 } 848 } 849 if (flags & LOAD_SYM) { 850 mbcopy(shp, shpp, sz); 851 } 852 free(shstr); 853 free(shp); 854 } 855 856 /* 857 * Frob the copied ELF header to give information relative 858 * to elfp. 859 */ 860 if (flags & LOAD_HDR) { 861 elf->e_phoff = 0; 862 elf->e_shoff = sizeof(Elf64_Ehdr); 863 elf->e_phentsize = 0; 864 elf->e_phnum = 0; 865 mbcopy(elf, elfp, sizeof(*elf)); 866 } 867 868 marks[MARK_START] = LOADADDR(minp); 869 marks[MARK_ENTRY] = LOADADDR(elf->e_entry); 870 marks[MARK_NSYM] = 1; /* XXX: Kernel needs >= 0 */ 871 marks[MARK_SYM] = LOADADDR(elfp); 872 marks[MARK_END] = LOADADDR(maxp); 873 874 return 0; 875 } 876 877 /* 878 * elf32_exec 879 * 880 * Load the kernel indicated by 'fp' into the guest physical memory 881 * space, at the addresses defined in the ELF header. 882 * 883 * This function is used for 32 bit kernels. 884 * 885 * Parameters: 886 * fp: kernel image file to load 887 * elf: ELF header of the kernel 888 * marks: array to store the offsets of various kernel structures 889 * (start, bss, etc) 890 * flags: flag value to indicate which section(s) to load (usually 891 * LOAD_ALL) 892 * 893 * Return values: 894 * 0 if successful 895 * 1 if unsuccessful 896 */ 897 static int 898 elf32_exec(gzFile fp, Elf32_Ehdr *elf, u_long *marks, int flags) 899 { 900 Elf32_Shdr *shp; 901 Elf32_Phdr *phdr; 902 Elf32_Off off; 903 int i; 904 size_t sz; 905 int havesyms; 906 paddr_t minp = ~0, maxp = 0, pos = 0; 907 paddr_t offset = marks[MARK_START], shpp, elfp; 908 909 sz = elf->e_phnum * sizeof(Elf32_Phdr); 910 phdr = malloc(sz); 911 912 if (gzseek(fp, (off_t)elf->e_phoff, SEEK_SET) == -1) { 913 free(phdr); 914 return 1; 915 } 916 917 if ((size_t)gzread(fp, phdr, sz) != sz) { 918 free(phdr); 919 return 1; 920 } 921 922 for (i = 0; i < elf->e_phnum; i++) { 923 if (phdr[i].p_type == PT_OPENBSD_RANDOMIZE) { 924 int m; 925 926 /* Fill segment if asked for. */ 927 if (flags & LOAD_RANDOM) { 928 for (pos = 0; pos < phdr[i].p_filesz; 929 pos += m) { 930 m = phdr[i].p_filesz - pos; 931 marc4random_buf(phdr[i].p_paddr + pos, 932 m); 933 } 934 } 935 if (flags & (LOAD_RANDOM | COUNT_RANDOM)) { 936 marks[MARK_RANDOM] = LOADADDR(phdr[i].p_paddr); 937 marks[MARK_ERANDOM] = 938 marks[MARK_RANDOM] + phdr[i].p_filesz; 939 } 940 continue; 941 } 942 943 if (phdr[i].p_type != PT_LOAD || 944 (phdr[i].p_flags & (PF_W|PF_R|PF_X)) == 0) 945 continue; 946 947 #define IS_TEXT(p) (p.p_flags & PF_X) 948 #define IS_DATA(p) ((p.p_flags & PF_X) == 0) 949 #define IS_BSS(p) (p.p_filesz < p.p_memsz) 950 /* 951 * XXX: Assume first address is lowest 952 */ 953 if ((IS_TEXT(phdr[i]) && (flags & LOAD_TEXT)) || 954 (IS_DATA(phdr[i]) && (flags & LOAD_DATA))) { 955 956 /* Read in segment. */ 957 if (gzseek(fp, (off_t)phdr[i].p_offset, 958 SEEK_SET) == -1) { 959 free(phdr); 960 return 1; 961 } 962 if (mread(fp, phdr[i].p_paddr, phdr[i].p_filesz) != 963 phdr[i].p_filesz) { 964 free(phdr); 965 return 1; 966 } 967 } 968 969 if ((IS_TEXT(phdr[i]) && (flags & (LOAD_TEXT | COUNT_TEXT))) || 970 (IS_DATA(phdr[i]) && (flags & (LOAD_DATA | COUNT_TEXT)))) { 971 pos = phdr[i].p_paddr; 972 if (minp > pos) 973 minp = pos; 974 pos += phdr[i].p_filesz; 975 if (maxp < pos) 976 maxp = pos; 977 } 978 979 /* Zero out BSS. */ 980 if (IS_BSS(phdr[i]) && (flags & LOAD_BSS)) { 981 mbzero((phdr[i].p_paddr + phdr[i].p_filesz), 982 phdr[i].p_memsz - phdr[i].p_filesz); 983 } 984 if (IS_BSS(phdr[i]) && (flags & (LOAD_BSS|COUNT_BSS))) { 985 pos += phdr[i].p_memsz - phdr[i].p_filesz; 986 if (maxp < pos) 987 maxp = pos; 988 } 989 } 990 free(phdr); 991 992 /* 993 * Copy the ELF and section headers. 994 */ 995 elfp = maxp = roundup(maxp, sizeof(Elf32_Addr)); 996 if (flags & (LOAD_HDR | COUNT_HDR)) 997 maxp += sizeof(Elf32_Ehdr); 998 999 if (flags & (LOAD_SYM | COUNT_SYM)) { 1000 if (gzseek(fp, (off_t)elf->e_shoff, SEEK_SET) == -1) { 1001 warn("lseek section headers"); 1002 return 1; 1003 } 1004 sz = elf->e_shnum * sizeof(Elf32_Shdr); 1005 shp = malloc(sz); 1006 1007 if ((size_t)gzread(fp, shp, sz) != sz) { 1008 free(shp); 1009 return 1; 1010 } 1011 1012 shpp = maxp; 1013 maxp += roundup(sz, sizeof(Elf32_Addr)); 1014 1015 size_t shstrsz = shp[elf->e_shstrndx].sh_size; 1016 char *shstr = malloc(shstrsz); 1017 if (gzseek(fp, (off_t)shp[elf->e_shstrndx].sh_offset, 1018 SEEK_SET) == -1) { 1019 free(shstr); 1020 free(shp); 1021 return 1; 1022 } 1023 if ((size_t)gzread(fp, shstr, shstrsz) != shstrsz) { 1024 free(shstr); 1025 free(shp); 1026 return 1; 1027 } 1028 1029 /* 1030 * Now load the symbol sections themselves. Make sure the 1031 * sections are aligned. Don't bother with string tables if 1032 * there are no symbol sections. 1033 */ 1034 off = roundup((sizeof(Elf32_Ehdr) + sz), sizeof(Elf32_Addr)); 1035 1036 for (havesyms = i = 0; i < elf->e_shnum; i++) 1037 if (shp[i].sh_type == SHT_SYMTAB) 1038 havesyms = 1; 1039 1040 for (i = 0; i < elf->e_shnum; i++) { 1041 if (shp[i].sh_type == SHT_SYMTAB || 1042 shp[i].sh_type == SHT_STRTAB || 1043 !strcmp(shstr + shp[i].sh_name, ".debug_line")) { 1044 if (havesyms && (flags & LOAD_SYM)) { 1045 if (gzseek(fp, (off_t)shp[i].sh_offset, 1046 SEEK_SET) == -1) { 1047 free(shstr); 1048 free(shp); 1049 return 1; 1050 } 1051 if (mread(fp, maxp, 1052 shp[i].sh_size) != shp[i].sh_size) { 1053 free(shstr); 1054 free(shp); 1055 return 1; 1056 } 1057 } 1058 maxp += roundup(shp[i].sh_size, 1059 sizeof(Elf32_Addr)); 1060 shp[i].sh_offset = off; 1061 shp[i].sh_flags |= SHF_ALLOC; 1062 off += roundup(shp[i].sh_size, 1063 sizeof(Elf32_Addr)); 1064 } 1065 } 1066 if (flags & LOAD_SYM) { 1067 mbcopy(shp, shpp, sz); 1068 } 1069 free(shstr); 1070 free(shp); 1071 } 1072 1073 /* 1074 * Frob the copied ELF header to give information relative 1075 * to elfp. 1076 */ 1077 if (flags & LOAD_HDR) { 1078 elf->e_phoff = 0; 1079 elf->e_shoff = sizeof(Elf32_Ehdr); 1080 elf->e_phentsize = 0; 1081 elf->e_phnum = 0; 1082 mbcopy(elf, elfp, sizeof(*elf)); 1083 } 1084 1085 marks[MARK_START] = LOADADDR(minp); 1086 marks[MARK_ENTRY] = LOADADDR(elf->e_entry); 1087 marks[MARK_NSYM] = 1; /* XXX: Kernel needs >= 0 */ 1088 marks[MARK_SYM] = LOADADDR(elfp); 1089 marks[MARK_END] = LOADADDR(maxp); 1090 1091 return 0; 1092 } 1093