1 /* $NetBSD: loadfile.c,v 1.10 2000/12/03 02:53:04 tsutsui Exp $ */ 2 /* $OpenBSD: loadfile_elf.c,v 1.46 2023/04/19 12:58:16 jsg Exp $ */ 3 4 /*- 5 * Copyright (c) 1997 The NetBSD Foundation, Inc. 6 * All rights reserved. 7 * 8 * This code is derived from software contributed to The NetBSD Foundation 9 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 10 * NASA Ames Research Center and by Christos Zoulas. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 23 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 24 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 25 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 * POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 /* 35 * Copyright (c) 1992, 1993 36 * The Regents of the University of California. All rights reserved. 37 * 38 * This code is derived from software contributed to Berkeley by 39 * Ralph Campbell. 40 * 41 * Redistribution and use in source and binary forms, with or without 42 * modification, are permitted provided that the following conditions 43 * are met: 44 * 1. Redistributions of source code must retain the above copyright 45 * notice, this list of conditions and the following disclaimer. 46 * 2. Redistributions in binary form must reproduce the above copyright 47 * notice, this list of conditions and the following disclaimer in the 48 * documentation and/or other materials provided with the distribution. 49 * 3. Neither the name of the University nor the names of its contributors 50 * may be used to endorse or promote products derived from this software 51 * without specific prior written permission. 52 * 53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 63 * SUCH DAMAGE. 64 * 65 * @(#)boot.c 8.1 (Berkeley) 6/10/93 66 */ 67 68 /* 69 * Copyright (c) 2015 Mike Larkin <mlarkin@openbsd.org> 70 * 71 * Permission to use, copy, modify, and distribute this software for any 72 * purpose with or without fee is hereby granted, provided that the above 73 * copyright notice and this permission notice appear in all copies. 74 * 75 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 76 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 77 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 78 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 79 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 80 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 81 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 82 */ 83 84 #include <sys/param.h> /* PAGE_SIZE PAGE_MASK roundup */ 85 #include <sys/ioctl.h> 86 #include <sys/reboot.h> 87 #include <sys/exec.h> 88 89 #include <elf.h> 90 #include <stdio.h> 91 #include <string.h> 92 #include <errno.h> 93 #include <stdlib.h> 94 #include <unistd.h> 95 #include <fcntl.h> 96 #include <err.h> 97 #include <stddef.h> 98 99 #include <machine/vmmvar.h> 100 #include <machine/biosvar.h> 101 #include <machine/segments.h> 102 #include <machine/specialreg.h> 103 #include <machine/pte.h> 104 105 #include "loadfile.h" 106 #include "vmd.h" 107 108 #define LOADADDR(a) ((((u_long)(a)) + offset)&0xfffffff) 109 110 union { 111 Elf32_Ehdr elf32; 112 Elf64_Ehdr elf64; 113 } hdr; 114 115 static void setsegment(struct mem_segment_descriptor *, uint32_t, 116 size_t, int, int, int, int); 117 static int elf32_exec(gzFile, Elf32_Ehdr *, u_long *, int); 118 static int elf64_exec(gzFile, Elf64_Ehdr *, u_long *, int); 119 static size_t create_bios_memmap(struct vm_create_params *, bios_memmap_t *); 120 static uint32_t push_bootargs(bios_memmap_t *, size_t, bios_bootmac_t *); 121 static size_t push_stack(uint32_t, uint32_t); 122 static void push_gdt(void); 123 static void push_pt_32(void); 124 static void push_pt_64(void); 125 static void marc4random_buf(paddr_t, int); 126 static void mbzero(paddr_t, int); 127 static void mbcopy(void *, paddr_t, int); 128 129 extern char *__progname; 130 extern int vm_id; 131 132 /* 133 * setsegment 134 * 135 * Initializes a segment selector entry with the provided descriptor. 136 * For the purposes of the bootloader mimiced by vmd(8), we only need 137 * memory-type segment descriptor support. 138 * 139 * This function was copied from machdep.c 140 * 141 * Parameters: 142 * sd: Address of the entry to initialize 143 * base: base of the segment 144 * limit: limit of the segment 145 * type: type of the segment 146 * dpl: privilege level of the egment 147 * def32: default 16/32 bit size of the segment 148 * gran: granularity of the segment (byte/page) 149 */ 150 static void 151 setsegment(struct mem_segment_descriptor *sd, uint32_t base, size_t limit, 152 int type, int dpl, int def32, int gran) 153 { 154 sd->sd_lolimit = (int)limit; 155 sd->sd_lobase = (int)base; 156 sd->sd_type = type; 157 sd->sd_dpl = dpl; 158 sd->sd_p = 1; 159 sd->sd_hilimit = (int)limit >> 16; 160 sd->sd_avl = 0; 161 sd->sd_long = 0; 162 sd->sd_def32 = def32; 163 sd->sd_gran = gran; 164 sd->sd_hibase = (int)base >> 24; 165 } 166 167 /* 168 * push_gdt 169 * 170 * Allocates and populates a page in the guest phys memory space to hold 171 * the boot-time GDT. Since vmd(8) is acting as the bootloader, we need to 172 * create the same GDT that a real bootloader would have created. 173 * This is loaded into the guest phys RAM space at address GDT_PAGE. 174 */ 175 static void 176 push_gdt(void) 177 { 178 uint8_t gdtpage[PAGE_SIZE]; 179 struct mem_segment_descriptor *sd; 180 181 memset(&gdtpage, 0, sizeof(gdtpage)); 182 183 sd = (struct mem_segment_descriptor *)&gdtpage; 184 185 /* 186 * Create three segment descriptors: 187 * 188 * GDT[0] : null descriptor. "Created" via memset above. 189 * GDT[1] (selector @ 0x8): Executable segment, for CS 190 * GDT[2] (selector @ 0x10): RW Data segment, for DS/ES/SS 191 */ 192 setsegment(&sd[1], 0, 0xffffffff, SDT_MEMERA, SEL_KPL, 1, 1); 193 setsegment(&sd[2], 0, 0xffffffff, SDT_MEMRWA, SEL_KPL, 1, 1); 194 195 write_mem(GDT_PAGE, gdtpage, PAGE_SIZE); 196 } 197 198 /* 199 * push_pt_32 200 * 201 * Create an identity-mapped page directory hierarchy mapping the first 202 * 4GB of physical memory. This is used during bootstrapping i386 VMs on 203 * CPUs without unrestricted guest capability. 204 */ 205 static void 206 push_pt_32(void) 207 { 208 uint32_t ptes[1024], i; 209 210 memset(ptes, 0, sizeof(ptes)); 211 for (i = 0 ; i < 1024; i++) { 212 ptes[i] = PG_V | PG_RW | PG_u | PG_PS | ((4096 * 1024) * i); 213 } 214 write_mem(PML3_PAGE, ptes, PAGE_SIZE); 215 } 216 217 /* 218 * push_pt_64 219 * 220 * Create an identity-mapped page directory hierarchy mapping the first 221 * 1GB of physical memory. This is used during bootstrapping 64 bit VMs on 222 * CPUs without unrestricted guest capability. 223 */ 224 static void 225 push_pt_64(void) 226 { 227 uint64_t ptes[512], i; 228 229 /* PDPDE0 - first 1GB */ 230 memset(ptes, 0, sizeof(ptes)); 231 ptes[0] = PG_V | PML3_PAGE; 232 write_mem(PML4_PAGE, ptes, PAGE_SIZE); 233 234 /* PDE0 - first 1GB */ 235 memset(ptes, 0, sizeof(ptes)); 236 ptes[0] = PG_V | PG_RW | PG_u | PML2_PAGE; 237 write_mem(PML3_PAGE, ptes, PAGE_SIZE); 238 239 /* First 1GB (in 2MB pages) */ 240 memset(ptes, 0, sizeof(ptes)); 241 for (i = 0 ; i < 512; i++) { 242 ptes[i] = PG_V | PG_RW | PG_u | PG_PS | ((2048 * 1024) * i); 243 } 244 write_mem(PML2_PAGE, ptes, PAGE_SIZE); 245 } 246 247 /* 248 * loadfile_elf 249 * 250 * Loads an ELF kernel to its defined load address in the guest VM. 251 * The kernel is loaded to its defined start point as set in the ELF header. 252 * 253 * Parameters: 254 * fp: file of a kernel file to load 255 * vcp: the VM create parameters, holding the exact memory map 256 * (out) vrs: register state to set on init for this kernel 257 * bootdev: the optional non-default boot device 258 * howto: optional boot flags for the kernel 259 * 260 * Return values: 261 * 0 if successful 262 * various error codes returned from gzread(3) or loadelf functions 263 */ 264 int 265 loadfile_elf(gzFile fp, struct vm_create_params *vcp, 266 struct vcpu_reg_state *vrs, unsigned int bootdevice) 267 { 268 int r, is_i386 = 0; 269 uint32_t bootargsz; 270 size_t n, stacksize; 271 u_long marks[MARK_MAX]; 272 bios_memmap_t memmap[VMM_MAX_MEM_RANGES + 1]; 273 bios_bootmac_t bm, *bootmac = NULL; 274 275 if ((r = gzread(fp, &hdr, sizeof(hdr))) != sizeof(hdr)) 276 return 1; 277 278 memset(&marks, 0, sizeof(marks)); 279 if (memcmp(hdr.elf32.e_ident, ELFMAG, SELFMAG) == 0 && 280 hdr.elf32.e_ident[EI_CLASS] == ELFCLASS32) { 281 r = elf32_exec(fp, &hdr.elf32, marks, LOAD_ALL); 282 is_i386 = 1; 283 } else if (memcmp(hdr.elf64.e_ident, ELFMAG, SELFMAG) == 0 && 284 hdr.elf64.e_ident[EI_CLASS] == ELFCLASS64) { 285 r = elf64_exec(fp, &hdr.elf64, marks, LOAD_ALL); 286 } else 287 errno = ENOEXEC; 288 289 if (r) 290 return (r); 291 292 push_gdt(); 293 294 if (is_i386) { 295 push_pt_32(); 296 /* Reconfigure the default flat-64 register set for 32 bit */ 297 vrs->vrs_crs[VCPU_REGS_CR3] = PML3_PAGE; 298 vrs->vrs_crs[VCPU_REGS_CR4] = CR4_PSE; 299 vrs->vrs_msrs[VCPU_REGS_EFER] = 0ULL; 300 } 301 else 302 push_pt_64(); 303 304 if (bootdevice == VMBOOTDEV_NET) { 305 bootmac = &bm; 306 memcpy(bootmac, vcp->vcp_macs[0], ETHER_ADDR_LEN); 307 } 308 n = create_bios_memmap(vcp, memmap); 309 bootargsz = push_bootargs(memmap, n, bootmac); 310 stacksize = push_stack(bootargsz, marks[MARK_END]); 311 312 vrs->vrs_gprs[VCPU_REGS_RIP] = (uint64_t)marks[MARK_ENTRY]; 313 vrs->vrs_gprs[VCPU_REGS_RSP] = (uint64_t)(STACK_PAGE + PAGE_SIZE) - stacksize; 314 vrs->vrs_gdtr.vsi_base = GDT_PAGE; 315 316 log_debug("%s: loaded ELF kernel", __func__); 317 318 return (0); 319 } 320 321 /* 322 * create_bios_memmap 323 * 324 * Construct a memory map as returned by the BIOS INT 0x15, e820 routine. 325 * 326 * Parameters: 327 * vcp: the VM create parameters, containing the memory map passed to vmm(4) 328 * memmap (out): the BIOS memory map 329 * 330 * Return values: 331 * Number of bios_memmap_t entries, including the terminating nul-entry. 332 */ 333 static size_t 334 create_bios_memmap(struct vm_create_params *vcp, bios_memmap_t *memmap) 335 { 336 size_t i, n = 0; 337 struct vm_mem_range *vmr; 338 339 for (i = 0; i < vcp->vcp_nmemranges; i++, n++) { 340 vmr = &vcp->vcp_memranges[i]; 341 memmap[n].addr = vmr->vmr_gpa; 342 memmap[n].size = vmr->vmr_size; 343 if (vmr->vmr_type == VM_MEM_RAM) 344 memmap[n].type = BIOS_MAP_FREE; 345 else 346 memmap[n].type = BIOS_MAP_RES; 347 } 348 349 /* Null mem map entry to denote the end of the ranges */ 350 memmap[n].addr = 0x0; 351 memmap[n].size = 0x0; 352 memmap[n].type = BIOS_MAP_END; 353 n++; 354 355 return (n); 356 } 357 358 /* 359 * push_bootargs 360 * 361 * Creates the boot arguments page in the guest address space. 362 * Since vmd(8) is acting as the bootloader, we need to create the same boot 363 * arguments page that a real bootloader would have created. This is loaded 364 * into the guest phys RAM space at address BOOTARGS_PAGE. 365 * 366 * Parameters: 367 * memmap: the BIOS memory map 368 * n: number of entries in memmap 369 * bootmac: optional PXE boot MAC address 370 * 371 * Return values: 372 * The size of the bootargs in bytes 373 */ 374 static uint32_t 375 push_bootargs(bios_memmap_t *memmap, size_t n, bios_bootmac_t *bootmac) 376 { 377 uint32_t memmap_sz, consdev_sz, bootmac_sz, i; 378 bios_consdev_t consdev; 379 uint32_t ba[1024]; 380 381 memmap_sz = 3 * sizeof(uint32_t) + n * sizeof(bios_memmap_t); 382 ba[0] = BOOTARG_MEMMAP; 383 ba[1] = memmap_sz; 384 ba[2] = memmap_sz; 385 memcpy(&ba[3], memmap, n * sizeof(bios_memmap_t)); 386 i = memmap_sz / sizeof(uint32_t); 387 388 /* Serial console device, COM1 @ 0x3f8 */ 389 memset(&consdev, 0, sizeof(consdev)); 390 consdev.consdev = makedev(8, 0); 391 consdev.conspeed = 115200; 392 consdev.consaddr = 0x3f8; 393 394 consdev_sz = 3 * sizeof(uint32_t) + sizeof(bios_consdev_t); 395 ba[i] = BOOTARG_CONSDEV; 396 ba[i + 1] = consdev_sz; 397 ba[i + 2] = consdev_sz; 398 memcpy(&ba[i + 3], &consdev, sizeof(bios_consdev_t)); 399 i += consdev_sz / sizeof(uint32_t); 400 401 if (bootmac) { 402 bootmac_sz = 3 * sizeof(uint32_t) + 403 (sizeof(bios_bootmac_t) + 3) & ~3; 404 ba[i] = BOOTARG_BOOTMAC; 405 ba[i + 1] = bootmac_sz; 406 ba[i + 2] = bootmac_sz; 407 memcpy(&ba[i + 3], bootmac, sizeof(bios_bootmac_t)); 408 i += bootmac_sz / sizeof(uint32_t); 409 } 410 411 ba[i++] = 0xFFFFFFFF; /* BOOTARG_END */ 412 413 write_mem(BOOTARGS_PAGE, ba, PAGE_SIZE); 414 415 return (i * sizeof(uint32_t)); 416 } 417 418 /* 419 * push_stack 420 * 421 * Creates the boot stack page in the guest address space. When using a real 422 * bootloader, the stack will be prepared using the following format before 423 * transitioning to kernel start, so vmd(8) needs to mimic the same stack 424 * layout. The stack content is pushed to the guest phys RAM at address 425 * STACK_PAGE. The bootloader operates in 32 bit mode; each stack entry is 426 * 4 bytes. 427 * 428 * Stack Layout: (TOS == Top Of Stack) 429 * TOS location of boot arguments page 430 * TOS - 0x4 size of the content in the boot arguments page 431 * TOS - 0x8 size of low memory (biosbasemem: kernel uses BIOS map only if 0) 432 * TOS - 0xc size of high memory (biosextmem, not used by kernel at all) 433 * TOS - 0x10 kernel 'end' symbol value 434 * TOS - 0x14 version of bootarg API 435 * 436 * Parameters: 437 * bootargsz: size of boot arguments 438 * end: kernel 'end' symbol value 439 * bootdev: the optional non-default boot device 440 * howto: optional boot flags for the kernel 441 * 442 * Return values: 443 * size of the stack 444 */ 445 static size_t 446 push_stack(uint32_t bootargsz, uint32_t end) 447 { 448 uint32_t stack[1024]; 449 uint16_t loc; 450 451 memset(&stack, 0, sizeof(stack)); 452 loc = 1024; 453 454 stack[--loc] = BOOTARGS_PAGE; 455 stack[--loc] = bootargsz; 456 stack[--loc] = 0; /* biosbasemem */ 457 stack[--loc] = 0; /* biosextmem */ 458 stack[--loc] = end; 459 stack[--loc] = 0x0e; 460 stack[--loc] = MAKEBOOTDEV(0x4, 0, 0, 0, 0); /* bootdev: sd0a */ 461 stack[--loc] = 0; 462 463 write_mem(STACK_PAGE, &stack, PAGE_SIZE); 464 465 return (1024 - (loc - 1)) * sizeof(uint32_t); 466 } 467 468 /* 469 * mread 470 * 471 * Reads 'sz' bytes from the file whose descriptor is provided in 'fd' 472 * into the guest address space at paddr 'addr'. 473 * 474 * Parameters: 475 * fp: kernel image file to read from. 476 * addr: guest paddr_t to load to 477 * sz: number of bytes to load 478 * 479 * Return values: 480 * returns 'sz' if successful, or 0 otherwise. 481 */ 482 size_t 483 mread(gzFile fp, paddr_t addr, size_t sz) 484 { 485 const char *errstr = NULL; 486 int errnum = 0; 487 size_t ct; 488 size_t i, osz; 489 char buf[PAGE_SIZE]; 490 491 /* 492 * break up the 'sz' bytes into PAGE_SIZE chunks for use with 493 * write_mem 494 */ 495 ct = 0; 496 osz = sz; 497 if ((addr & PAGE_MASK) != 0) { 498 memset(buf, 0, sizeof(buf)); 499 if (sz > PAGE_SIZE) 500 ct = PAGE_SIZE - (addr & PAGE_MASK); 501 else 502 ct = sz; 503 504 if ((size_t)gzread(fp, buf, ct) != ct) { 505 errstr = gzerror(fp, &errnum); 506 if (errnum == Z_ERRNO) 507 errnum = errno; 508 log_warnx("%s: error %d in mread, %s", __progname, 509 errnum, errstr); 510 return (0); 511 } 512 513 if (write_mem(addr, buf, ct)) 514 return (0); 515 516 addr += ct; 517 } 518 519 sz = sz - ct; 520 521 if (sz == 0) 522 return (osz); 523 524 for (i = 0; i < sz; i += PAGE_SIZE, addr += PAGE_SIZE) { 525 memset(buf, 0, sizeof(buf)); 526 if (i + PAGE_SIZE > sz) 527 ct = sz - i; 528 else 529 ct = PAGE_SIZE; 530 531 if ((size_t)gzread(fp, buf, ct) != ct) { 532 errstr = gzerror(fp, &errnum); 533 if (errnum == Z_ERRNO) 534 errnum = errno; 535 log_warnx("%s: error %d in mread, %s", __progname, 536 errnum, errstr); 537 return (0); 538 } 539 540 if (write_mem(addr, buf, ct)) 541 return (0); 542 } 543 544 return (osz); 545 } 546 547 /* 548 * marc4random_buf 549 * 550 * load 'sz' bytes of random data into the guest address space at paddr 551 * 'addr'. 552 * 553 * Parameters: 554 * addr: guest paddr_t to load random bytes into 555 * sz: number of random bytes to load 556 * 557 * Return values: 558 * nothing 559 */ 560 static void 561 marc4random_buf(paddr_t addr, int sz) 562 { 563 int i, ct; 564 char buf[PAGE_SIZE]; 565 566 /* 567 * break up the 'sz' bytes into PAGE_SIZE chunks for use with 568 * write_mem 569 */ 570 ct = 0; 571 if (addr % PAGE_SIZE != 0) { 572 memset(buf, 0, sizeof(buf)); 573 ct = PAGE_SIZE - (addr % PAGE_SIZE); 574 575 arc4random_buf(buf, ct); 576 577 if (write_mem(addr, buf, ct)) 578 return; 579 580 addr += ct; 581 } 582 583 for (i = 0; i < sz; i+= PAGE_SIZE, addr += PAGE_SIZE) { 584 memset(buf, 0, sizeof(buf)); 585 if (i + PAGE_SIZE > sz) 586 ct = sz - i; 587 else 588 ct = PAGE_SIZE; 589 590 arc4random_buf(buf, ct); 591 592 if (write_mem(addr, buf, ct)) 593 return; 594 } 595 } 596 597 /* 598 * mbzero 599 * 600 * load 'sz' bytes of zeros into the guest address space at paddr 601 * 'addr'. 602 * 603 * Parameters: 604 * addr: guest paddr_t to zero 605 * sz: number of zero bytes to store 606 * 607 * Return values: 608 * nothing 609 */ 610 static void 611 mbzero(paddr_t addr, int sz) 612 { 613 if (write_mem(addr, NULL, sz)) 614 return; 615 } 616 617 /* 618 * mbcopy 619 * 620 * copies 'sz' bytes from buffer 'src' to guest paddr 'dst'. 621 * 622 * Parameters: 623 * src: source buffer to copy from 624 * dst: destination guest paddr_t to copy to 625 * sz: number of bytes to copy 626 * 627 * Return values: 628 * nothing 629 */ 630 static void 631 mbcopy(void *src, paddr_t dst, int sz) 632 { 633 write_mem(dst, src, sz); 634 } 635 636 /* 637 * elf64_exec 638 * 639 * Load the kernel indicated by 'fp' into the guest physical memory 640 * space, at the addresses defined in the ELF header. 641 * 642 * This function is used for 64 bit kernels. 643 * 644 * Parameters: 645 * fp: kernel image file to load 646 * elf: ELF header of the kernel 647 * marks: array to store the offsets of various kernel structures 648 * (start, bss, etc) 649 * flags: flag value to indicate which section(s) to load (usually 650 * LOAD_ALL) 651 * 652 * Return values: 653 * 0 if successful 654 * 1 if unsuccessful 655 */ 656 static int 657 elf64_exec(gzFile fp, Elf64_Ehdr *elf, u_long *marks, int flags) 658 { 659 Elf64_Shdr *shp; 660 Elf64_Phdr *phdr; 661 Elf64_Off off; 662 int i; 663 size_t sz; 664 int havesyms; 665 paddr_t minp = ~0, maxp = 0, pos = 0; 666 paddr_t offset = marks[MARK_START], shpp, elfp; 667 668 sz = elf->e_phnum * sizeof(Elf64_Phdr); 669 phdr = malloc(sz); 670 671 if (gzseek(fp, (off_t)elf->e_phoff, SEEK_SET) == -1) { 672 free(phdr); 673 return 1; 674 } 675 676 if ((size_t)gzread(fp, phdr, sz) != sz) { 677 free(phdr); 678 return 1; 679 } 680 681 for (i = 0; i < elf->e_phnum; i++) { 682 if (phdr[i].p_type == PT_OPENBSD_RANDOMIZE) { 683 int m; 684 685 /* Fill segment if asked for. */ 686 if (flags & LOAD_RANDOM) { 687 for (pos = 0; pos < phdr[i].p_filesz; 688 pos += m) { 689 m = phdr[i].p_filesz - pos; 690 marc4random_buf(phdr[i].p_paddr + pos, 691 m); 692 } 693 } 694 if (flags & (LOAD_RANDOM | COUNT_RANDOM)) { 695 marks[MARK_RANDOM] = LOADADDR(phdr[i].p_paddr); 696 marks[MARK_ERANDOM] = 697 marks[MARK_RANDOM] + phdr[i].p_filesz; 698 } 699 continue; 700 } 701 702 if (phdr[i].p_type != PT_LOAD || 703 (phdr[i].p_flags & (PF_W|PF_R|PF_X)) == 0) 704 continue; 705 706 #define IS_TEXT(p) (p.p_flags & PF_X) 707 #define IS_DATA(p) ((p.p_flags & PF_X) == 0) 708 #define IS_BSS(p) (p.p_filesz < p.p_memsz) 709 /* 710 * XXX: Assume first address is lowest 711 */ 712 if ((IS_TEXT(phdr[i]) && (flags & LOAD_TEXT)) || 713 (IS_DATA(phdr[i]) && (flags & LOAD_DATA))) { 714 715 /* Read in segment. */ 716 if (gzseek(fp, (off_t)phdr[i].p_offset, 717 SEEK_SET) == -1) { 718 free(phdr); 719 return 1; 720 } 721 if (mread(fp, phdr[i].p_paddr, phdr[i].p_filesz) != 722 phdr[i].p_filesz) { 723 free(phdr); 724 return 1; 725 } 726 } 727 728 if ((IS_TEXT(phdr[i]) && (flags & (LOAD_TEXT | COUNT_TEXT))) || 729 (IS_DATA(phdr[i]) && (flags & (LOAD_DATA | COUNT_TEXT)))) { 730 pos = phdr[i].p_paddr; 731 if (minp > pos) 732 minp = pos; 733 pos += phdr[i].p_filesz; 734 if (maxp < pos) 735 maxp = pos; 736 } 737 738 /* Zero out BSS. */ 739 if (IS_BSS(phdr[i]) && (flags & LOAD_BSS)) { 740 mbzero((phdr[i].p_paddr + phdr[i].p_filesz), 741 phdr[i].p_memsz - phdr[i].p_filesz); 742 } 743 if (IS_BSS(phdr[i]) && (flags & (LOAD_BSS|COUNT_BSS))) { 744 pos += phdr[i].p_memsz - phdr[i].p_filesz; 745 if (maxp < pos) 746 maxp = pos; 747 } 748 } 749 free(phdr); 750 751 /* 752 * Copy the ELF and section headers. 753 */ 754 elfp = maxp = roundup(maxp, sizeof(Elf64_Addr)); 755 if (flags & (LOAD_HDR | COUNT_HDR)) 756 maxp += sizeof(Elf64_Ehdr); 757 758 if (flags & (LOAD_SYM | COUNT_SYM)) { 759 if (gzseek(fp, (off_t)elf->e_shoff, SEEK_SET) == -1) { 760 warn("gzseek section headers"); 761 return 1; 762 } 763 sz = elf->e_shnum * sizeof(Elf64_Shdr); 764 shp = malloc(sz); 765 766 if ((size_t)gzread(fp, shp, sz) != sz) { 767 free(shp); 768 return 1; 769 } 770 771 shpp = maxp; 772 maxp += roundup(sz, sizeof(Elf64_Addr)); 773 774 size_t shstrsz = shp[elf->e_shstrndx].sh_size; 775 char *shstr = malloc(shstrsz); 776 if (gzseek(fp, (off_t)shp[elf->e_shstrndx].sh_offset, 777 SEEK_SET) == -1) { 778 free(shstr); 779 free(shp); 780 return 1; 781 } 782 if ((size_t)gzread(fp, shstr, shstrsz) != shstrsz) { 783 free(shstr); 784 free(shp); 785 return 1; 786 } 787 788 /* 789 * Now load the symbol sections themselves. Make sure the 790 * sections are aligned. Don't bother with string tables if 791 * there are no symbol sections. 792 */ 793 off = roundup((sizeof(Elf64_Ehdr) + sz), sizeof(Elf64_Addr)); 794 795 for (havesyms = i = 0; i < elf->e_shnum; i++) 796 if (shp[i].sh_type == SHT_SYMTAB) 797 havesyms = 1; 798 799 for (i = 0; i < elf->e_shnum; i++) { 800 if (shp[i].sh_type == SHT_SYMTAB || 801 shp[i].sh_type == SHT_STRTAB || 802 !strcmp(shstr + shp[i].sh_name, ".debug_line") || 803 !strcmp(shstr + shp[i].sh_name, ELF_CTF)) { 804 if (havesyms && (flags & LOAD_SYM)) { 805 if (gzseek(fp, (off_t)shp[i].sh_offset, 806 SEEK_SET) == -1) { 807 free(shstr); 808 free(shp); 809 return 1; 810 } 811 if (mread(fp, maxp, 812 shp[i].sh_size) != shp[i].sh_size) { 813 free(shstr); 814 free(shp); 815 return 1; 816 } 817 } 818 maxp += roundup(shp[i].sh_size, 819 sizeof(Elf64_Addr)); 820 shp[i].sh_offset = off; 821 shp[i].sh_flags |= SHF_ALLOC; 822 off += roundup(shp[i].sh_size, 823 sizeof(Elf64_Addr)); 824 } 825 } 826 if (flags & LOAD_SYM) { 827 mbcopy(shp, shpp, sz); 828 } 829 free(shstr); 830 free(shp); 831 } 832 833 /* 834 * Frob the copied ELF header to give information relative 835 * to elfp. 836 */ 837 if (flags & LOAD_HDR) { 838 elf->e_phoff = 0; 839 elf->e_shoff = sizeof(Elf64_Ehdr); 840 elf->e_phentsize = 0; 841 elf->e_phnum = 0; 842 mbcopy(elf, elfp, sizeof(*elf)); 843 } 844 845 marks[MARK_START] = LOADADDR(minp); 846 marks[MARK_ENTRY] = LOADADDR(elf->e_entry); 847 marks[MARK_NSYM] = 1; /* XXX: Kernel needs >= 0 */ 848 marks[MARK_SYM] = LOADADDR(elfp); 849 marks[MARK_END] = LOADADDR(maxp); 850 851 return 0; 852 } 853 854 /* 855 * elf32_exec 856 * 857 * Load the kernel indicated by 'fp' into the guest physical memory 858 * space, at the addresses defined in the ELF header. 859 * 860 * This function is used for 32 bit kernels. 861 * 862 * Parameters: 863 * fp: kernel image file to load 864 * elf: ELF header of the kernel 865 * marks: array to store the offsets of various kernel structures 866 * (start, bss, etc) 867 * flags: flag value to indicate which section(s) to load (usually 868 * LOAD_ALL) 869 * 870 * Return values: 871 * 0 if successful 872 * 1 if unsuccessful 873 */ 874 static int 875 elf32_exec(gzFile fp, Elf32_Ehdr *elf, u_long *marks, int flags) 876 { 877 Elf32_Shdr *shp; 878 Elf32_Phdr *phdr; 879 Elf32_Off off; 880 int i; 881 size_t sz; 882 int havesyms; 883 paddr_t minp = ~0, maxp = 0, pos = 0; 884 paddr_t offset = marks[MARK_START], shpp, elfp; 885 886 sz = elf->e_phnum * sizeof(Elf32_Phdr); 887 phdr = malloc(sz); 888 889 if (gzseek(fp, (off_t)elf->e_phoff, SEEK_SET) == -1) { 890 free(phdr); 891 return 1; 892 } 893 894 if ((size_t)gzread(fp, phdr, sz) != sz) { 895 free(phdr); 896 return 1; 897 } 898 899 for (i = 0; i < elf->e_phnum; i++) { 900 if (phdr[i].p_type == PT_OPENBSD_RANDOMIZE) { 901 int m; 902 903 /* Fill segment if asked for. */ 904 if (flags & LOAD_RANDOM) { 905 for (pos = 0; pos < phdr[i].p_filesz; 906 pos += m) { 907 m = phdr[i].p_filesz - pos; 908 marc4random_buf(phdr[i].p_paddr + pos, 909 m); 910 } 911 } 912 if (flags & (LOAD_RANDOM | COUNT_RANDOM)) { 913 marks[MARK_RANDOM] = LOADADDR(phdr[i].p_paddr); 914 marks[MARK_ERANDOM] = 915 marks[MARK_RANDOM] + phdr[i].p_filesz; 916 } 917 continue; 918 } 919 920 if (phdr[i].p_type != PT_LOAD || 921 (phdr[i].p_flags & (PF_W|PF_R|PF_X)) == 0) 922 continue; 923 924 #define IS_TEXT(p) (p.p_flags & PF_X) 925 #define IS_DATA(p) ((p.p_flags & PF_X) == 0) 926 #define IS_BSS(p) (p.p_filesz < p.p_memsz) 927 /* 928 * XXX: Assume first address is lowest 929 */ 930 if ((IS_TEXT(phdr[i]) && (flags & LOAD_TEXT)) || 931 (IS_DATA(phdr[i]) && (flags & LOAD_DATA))) { 932 933 /* Read in segment. */ 934 if (gzseek(fp, (off_t)phdr[i].p_offset, 935 SEEK_SET) == -1) { 936 free(phdr); 937 return 1; 938 } 939 if (mread(fp, phdr[i].p_paddr, phdr[i].p_filesz) != 940 phdr[i].p_filesz) { 941 free(phdr); 942 return 1; 943 } 944 } 945 946 if ((IS_TEXT(phdr[i]) && (flags & (LOAD_TEXT | COUNT_TEXT))) || 947 (IS_DATA(phdr[i]) && (flags & (LOAD_DATA | COUNT_TEXT)))) { 948 pos = phdr[i].p_paddr; 949 if (minp > pos) 950 minp = pos; 951 pos += phdr[i].p_filesz; 952 if (maxp < pos) 953 maxp = pos; 954 } 955 956 /* Zero out BSS. */ 957 if (IS_BSS(phdr[i]) && (flags & LOAD_BSS)) { 958 mbzero((phdr[i].p_paddr + phdr[i].p_filesz), 959 phdr[i].p_memsz - phdr[i].p_filesz); 960 } 961 if (IS_BSS(phdr[i]) && (flags & (LOAD_BSS|COUNT_BSS))) { 962 pos += phdr[i].p_memsz - phdr[i].p_filesz; 963 if (maxp < pos) 964 maxp = pos; 965 } 966 } 967 free(phdr); 968 969 /* 970 * Copy the ELF and section headers. 971 */ 972 elfp = maxp = roundup(maxp, sizeof(Elf32_Addr)); 973 if (flags & (LOAD_HDR | COUNT_HDR)) 974 maxp += sizeof(Elf32_Ehdr); 975 976 if (flags & (LOAD_SYM | COUNT_SYM)) { 977 if (gzseek(fp, (off_t)elf->e_shoff, SEEK_SET) == -1) { 978 warn("lseek section headers"); 979 return 1; 980 } 981 sz = elf->e_shnum * sizeof(Elf32_Shdr); 982 shp = malloc(sz); 983 984 if ((size_t)gzread(fp, shp, sz) != sz) { 985 free(shp); 986 return 1; 987 } 988 989 shpp = maxp; 990 maxp += roundup(sz, sizeof(Elf32_Addr)); 991 992 size_t shstrsz = shp[elf->e_shstrndx].sh_size; 993 char *shstr = malloc(shstrsz); 994 if (gzseek(fp, (off_t)shp[elf->e_shstrndx].sh_offset, 995 SEEK_SET) == -1) { 996 free(shstr); 997 free(shp); 998 return 1; 999 } 1000 if ((size_t)gzread(fp, shstr, shstrsz) != shstrsz) { 1001 free(shstr); 1002 free(shp); 1003 return 1; 1004 } 1005 1006 /* 1007 * Now load the symbol sections themselves. Make sure the 1008 * sections are aligned. Don't bother with string tables if 1009 * there are no symbol sections. 1010 */ 1011 off = roundup((sizeof(Elf32_Ehdr) + sz), sizeof(Elf32_Addr)); 1012 1013 for (havesyms = i = 0; i < elf->e_shnum; i++) 1014 if (shp[i].sh_type == SHT_SYMTAB) 1015 havesyms = 1; 1016 1017 for (i = 0; i < elf->e_shnum; i++) { 1018 if (shp[i].sh_type == SHT_SYMTAB || 1019 shp[i].sh_type == SHT_STRTAB || 1020 !strcmp(shstr + shp[i].sh_name, ".debug_line")) { 1021 if (havesyms && (flags & LOAD_SYM)) { 1022 if (gzseek(fp, (off_t)shp[i].sh_offset, 1023 SEEK_SET) == -1) { 1024 free(shstr); 1025 free(shp); 1026 return 1; 1027 } 1028 if (mread(fp, maxp, 1029 shp[i].sh_size) != shp[i].sh_size) { 1030 free(shstr); 1031 free(shp); 1032 return 1; 1033 } 1034 } 1035 maxp += roundup(shp[i].sh_size, 1036 sizeof(Elf32_Addr)); 1037 shp[i].sh_offset = off; 1038 shp[i].sh_flags |= SHF_ALLOC; 1039 off += roundup(shp[i].sh_size, 1040 sizeof(Elf32_Addr)); 1041 } 1042 } 1043 if (flags & LOAD_SYM) { 1044 mbcopy(shp, shpp, sz); 1045 } 1046 free(shstr); 1047 free(shp); 1048 } 1049 1050 /* 1051 * Frob the copied ELF header to give information relative 1052 * to elfp. 1053 */ 1054 if (flags & LOAD_HDR) { 1055 elf->e_phoff = 0; 1056 elf->e_shoff = sizeof(Elf32_Ehdr); 1057 elf->e_phentsize = 0; 1058 elf->e_phnum = 0; 1059 mbcopy(elf, elfp, sizeof(*elf)); 1060 } 1061 1062 marks[MARK_START] = LOADADDR(minp); 1063 marks[MARK_ENTRY] = LOADADDR(elf->e_entry); 1064 marks[MARK_NSYM] = 1; /* XXX: Kernel needs >= 0 */ 1065 marks[MARK_SYM] = LOADADDR(elfp); 1066 marks[MARK_END] = LOADADDR(maxp); 1067 1068 return 0; 1069 } 1070