1 /* $NetBSD: loadfile.c,v 1.10 2000/12/03 02:53:04 tsutsui Exp $ */ 2 /* $OpenBSD: loadfile_elf.c,v 1.43 2022/11/28 18:24:52 dv Exp $ */ 3 4 /*- 5 * Copyright (c) 1997 The NetBSD Foundation, Inc. 6 * All rights reserved. 7 * 8 * This code is derived from software contributed to The NetBSD Foundation 9 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 10 * NASA Ames Research Center and by Christos Zoulas. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 23 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 24 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 25 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 * POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 /* 35 * Copyright (c) 1992, 1993 36 * The Regents of the University of California. All rights reserved. 37 * 38 * This code is derived from software contributed to Berkeley by 39 * Ralph Campbell. 40 * 41 * Redistribution and use in source and binary forms, with or without 42 * modification, are permitted provided that the following conditions 43 * are met: 44 * 1. Redistributions of source code must retain the above copyright 45 * notice, this list of conditions and the following disclaimer. 46 * 2. Redistributions in binary form must reproduce the above copyright 47 * notice, this list of conditions and the following disclaimer in the 48 * documentation and/or other materials provided with the distribution. 49 * 3. Neither the name of the University nor the names of its contributors 50 * may be used to endorse or promote products derived from this software 51 * without specific prior written permission. 52 * 53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 63 * SUCH DAMAGE. 64 * 65 * @(#)boot.c 8.1 (Berkeley) 6/10/93 66 */ 67 68 /* 69 * Copyright (c) 2015 Mike Larkin <mlarkin@openbsd.org> 70 * 71 * Permission to use, copy, modify, and distribute this software for any 72 * purpose with or without fee is hereby granted, provided that the above 73 * copyright notice and this permission notice appear in all copies. 74 * 75 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 76 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 77 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 78 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 79 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 80 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 81 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 82 */ 83 84 #include <sys/param.h> /* PAGE_SIZE PAGE_MASK roundup */ 85 #include <sys/ioctl.h> 86 #include <sys/reboot.h> 87 #include <sys/exec.h> 88 89 #include <elf.h> 90 #include <stdio.h> 91 #include <string.h> 92 #include <errno.h> 93 #include <stdlib.h> 94 #include <unistd.h> 95 #include <fcntl.h> 96 #include <err.h> 97 #include <errno.h> 98 #include <stddef.h> 99 100 #include <machine/vmmvar.h> 101 #include <machine/biosvar.h> 102 #include <machine/segments.h> 103 #include <machine/specialreg.h> 104 #include <machine/pte.h> 105 106 #include "loadfile.h" 107 #include "vmd.h" 108 109 #define LOADADDR(a) ((((u_long)(a)) + offset)&0xfffffff) 110 111 union { 112 Elf32_Ehdr elf32; 113 Elf64_Ehdr elf64; 114 } hdr; 115 116 static void setsegment(struct mem_segment_descriptor *, uint32_t, 117 size_t, int, int, int, int); 118 static int elf32_exec(gzFile, Elf32_Ehdr *, u_long *, int); 119 static int elf64_exec(gzFile, Elf64_Ehdr *, u_long *, int); 120 static size_t create_bios_memmap(struct vm_create_params *, bios_memmap_t *); 121 static uint32_t push_bootargs(bios_memmap_t *, size_t, bios_bootmac_t *); 122 static size_t push_stack(uint32_t, uint32_t); 123 static void push_gdt(void); 124 static void push_pt_32(void); 125 static void push_pt_64(void); 126 static void marc4random_buf(paddr_t, int); 127 static void mbzero(paddr_t, int); 128 static void mbcopy(void *, paddr_t, int); 129 130 extern char *__progname; 131 extern int vm_id; 132 133 /* 134 * setsegment 135 * 136 * Initializes a segment selector entry with the provided descriptor. 137 * For the purposes of the bootloader mimiced by vmd(8), we only need 138 * memory-type segment descriptor support. 139 * 140 * This function was copied from machdep.c 141 * 142 * Parameters: 143 * sd: Address of the entry to initialize 144 * base: base of the segment 145 * limit: limit of the segment 146 * type: type of the segment 147 * dpl: privilege level of the egment 148 * def32: default 16/32 bit size of the segment 149 * gran: granularity of the segment (byte/page) 150 */ 151 static void 152 setsegment(struct mem_segment_descriptor *sd, uint32_t base, size_t limit, 153 int type, int dpl, int def32, int gran) 154 { 155 sd->sd_lolimit = (int)limit; 156 sd->sd_lobase = (int)base; 157 sd->sd_type = type; 158 sd->sd_dpl = dpl; 159 sd->sd_p = 1; 160 sd->sd_hilimit = (int)limit >> 16; 161 sd->sd_avl = 0; 162 sd->sd_long = 0; 163 sd->sd_def32 = def32; 164 sd->sd_gran = gran; 165 sd->sd_hibase = (int)base >> 24; 166 } 167 168 /* 169 * push_gdt 170 * 171 * Allocates and populates a page in the guest phys memory space to hold 172 * the boot-time GDT. Since vmd(8) is acting as the bootloader, we need to 173 * create the same GDT that a real bootloader would have created. 174 * This is loaded into the guest phys RAM space at address GDT_PAGE. 175 */ 176 static void 177 push_gdt(void) 178 { 179 uint8_t gdtpage[PAGE_SIZE]; 180 struct mem_segment_descriptor *sd; 181 182 memset(&gdtpage, 0, sizeof(gdtpage)); 183 184 sd = (struct mem_segment_descriptor *)&gdtpage; 185 186 /* 187 * Create three segment descriptors: 188 * 189 * GDT[0] : null desriptor. "Created" via memset above. 190 * GDT[1] (selector @ 0x8): Executable segment, for CS 191 * GDT[2] (selector @ 0x10): RW Data segment, for DS/ES/SS 192 */ 193 setsegment(&sd[1], 0, 0xffffffff, SDT_MEMERA, SEL_KPL, 1, 1); 194 setsegment(&sd[2], 0, 0xffffffff, SDT_MEMRWA, SEL_KPL, 1, 1); 195 196 write_mem(GDT_PAGE, gdtpage, PAGE_SIZE); 197 } 198 199 /* 200 * push_pt_32 201 * 202 * Create an identity-mapped page directory hierarchy mapping the first 203 * 4GB of physical memory. This is used during bootstrapping i386 VMs on 204 * CPUs without unrestricted guest capability. 205 */ 206 static void 207 push_pt_32(void) 208 { 209 uint32_t ptes[1024], i; 210 211 memset(ptes, 0, sizeof(ptes)); 212 for (i = 0 ; i < 1024; i++) { 213 ptes[i] = PG_V | PG_RW | PG_u | PG_PS | ((4096 * 1024) * i); 214 } 215 write_mem(PML3_PAGE, ptes, PAGE_SIZE); 216 } 217 218 /* 219 * push_pt_64 220 * 221 * Create an identity-mapped page directory hierarchy mapping the first 222 * 1GB of physical memory. This is used during bootstrapping 64 bit VMs on 223 * CPUs without unrestricted guest capability. 224 */ 225 static void 226 push_pt_64(void) 227 { 228 uint64_t ptes[512], i; 229 230 /* PDPDE0 - first 1GB */ 231 memset(ptes, 0, sizeof(ptes)); 232 ptes[0] = PG_V | PML3_PAGE; 233 write_mem(PML4_PAGE, ptes, PAGE_SIZE); 234 235 /* PDE0 - first 1GB */ 236 memset(ptes, 0, sizeof(ptes)); 237 ptes[0] = PG_V | PG_RW | PG_u | PML2_PAGE; 238 write_mem(PML3_PAGE, ptes, PAGE_SIZE); 239 240 /* First 1GB (in 2MB pages) */ 241 memset(ptes, 0, sizeof(ptes)); 242 for (i = 0 ; i < 512; i++) { 243 ptes[i] = PG_V | PG_RW | PG_u | PG_PS | ((2048 * 1024) * i); 244 } 245 write_mem(PML2_PAGE, ptes, PAGE_SIZE); 246 } 247 248 /* 249 * loadfile_elf 250 * 251 * Loads an ELF kernel to its defined load address in the guest VM. 252 * The kernel is loaded to its defined start point as set in the ELF header. 253 * 254 * Parameters: 255 * fp: file of a kernel file to load 256 * vcp: the VM create parameters, holding the exact memory map 257 * (out) vrs: register state to set on init for this kernel 258 * bootdev: the optional non-default boot device 259 * howto: optional boot flags for the kernel 260 * 261 * Return values: 262 * 0 if successful 263 * various error codes returned from gzread(3) or loadelf functions 264 */ 265 int 266 loadfile_elf(gzFile fp, struct vm_create_params *vcp, 267 struct vcpu_reg_state *vrs, unsigned int bootdevice) 268 { 269 int r, is_i386 = 0; 270 uint32_t bootargsz; 271 size_t n, stacksize; 272 u_long marks[MARK_MAX]; 273 bios_memmap_t memmap[VMM_MAX_MEM_RANGES + 1]; 274 bios_bootmac_t bm, *bootmac = NULL; 275 276 if ((r = gzread(fp, &hdr, sizeof(hdr))) != sizeof(hdr)) 277 return 1; 278 279 memset(&marks, 0, sizeof(marks)); 280 if (memcmp(hdr.elf32.e_ident, ELFMAG, SELFMAG) == 0 && 281 hdr.elf32.e_ident[EI_CLASS] == ELFCLASS32) { 282 r = elf32_exec(fp, &hdr.elf32, marks, LOAD_ALL); 283 is_i386 = 1; 284 } else if (memcmp(hdr.elf64.e_ident, ELFMAG, SELFMAG) == 0 && 285 hdr.elf64.e_ident[EI_CLASS] == ELFCLASS64) { 286 r = elf64_exec(fp, &hdr.elf64, marks, LOAD_ALL); 287 } else 288 errno = ENOEXEC; 289 290 if (r) 291 return (r); 292 293 push_gdt(); 294 295 if (is_i386) { 296 push_pt_32(); 297 /* Reconfigure the default flat-64 register set for 32 bit */ 298 vrs->vrs_crs[VCPU_REGS_CR3] = PML3_PAGE; 299 vrs->vrs_crs[VCPU_REGS_CR4] = CR4_PSE; 300 vrs->vrs_msrs[VCPU_REGS_EFER] = 0ULL; 301 } 302 else 303 push_pt_64(); 304 305 if (bootdevice == VMBOOTDEV_NET) { 306 bootmac = &bm; 307 memcpy(bootmac, vcp->vcp_macs[0], ETHER_ADDR_LEN); 308 } 309 n = create_bios_memmap(vcp, memmap); 310 bootargsz = push_bootargs(memmap, n, bootmac); 311 stacksize = push_stack(bootargsz, marks[MARK_END]); 312 313 vrs->vrs_gprs[VCPU_REGS_RIP] = (uint64_t)marks[MARK_ENTRY]; 314 vrs->vrs_gprs[VCPU_REGS_RSP] = (uint64_t)(STACK_PAGE + PAGE_SIZE) - stacksize; 315 vrs->vrs_gdtr.vsi_base = GDT_PAGE; 316 317 log_debug("%s: loaded ELF kernel", __func__); 318 319 return (0); 320 } 321 322 /* 323 * create_bios_memmap 324 * 325 * Construct a memory map as returned by the BIOS INT 0x15, e820 routine. 326 * 327 * Parameters: 328 * vcp: the VM create parameters, containing the memory map passed to vmm(4) 329 * memmap (out): the BIOS memory map 330 * 331 * Return values: 332 * Number of bios_memmap_t entries, including the terminating nul-entry. 333 */ 334 static size_t 335 create_bios_memmap(struct vm_create_params *vcp, bios_memmap_t *memmap) 336 { 337 size_t i, n = 0, sz; 338 paddr_t gpa; 339 struct vm_mem_range *vmr; 340 341 for (i = 0; i < vcp->vcp_nmemranges; i++) { 342 vmr = &vcp->vcp_memranges[i]; 343 gpa = vmr->vmr_gpa; 344 sz = vmr->vmr_size; 345 346 /* 347 * Make sure that we do not mark the ROM/video RAM area in the 348 * low memory as physcal memory available to the kernel. 349 */ 350 if (gpa < 0x100000 && gpa + sz > LOWMEM_KB * 1024) { 351 if (gpa >= LOWMEM_KB * 1024) 352 sz = 0; 353 else 354 sz = LOWMEM_KB * 1024 - gpa; 355 } 356 357 if (sz != 0) { 358 memmap[n].addr = gpa; 359 memmap[n].size = sz; 360 memmap[n].type = 0x1; /* Type 1 : Normal memory */ 361 n++; 362 } 363 } 364 365 /* Null mem map entry to denote the end of the ranges */ 366 memmap[n].addr = 0x0; 367 memmap[n].size = 0x0; 368 memmap[n].type = 0x0; 369 n++; 370 371 return (n); 372 } 373 374 /* 375 * push_bootargs 376 * 377 * Creates the boot arguments page in the guest address space. 378 * Since vmd(8) is acting as the bootloader, we need to create the same boot 379 * arguments page that a real bootloader would have created. This is loaded 380 * into the guest phys RAM space at address BOOTARGS_PAGE. 381 * 382 * Parameters: 383 * memmap: the BIOS memory map 384 * n: number of entries in memmap 385 * bootmac: optional PXE boot MAC address 386 * 387 * Return values: 388 * The size of the bootargs in bytes 389 */ 390 static uint32_t 391 push_bootargs(bios_memmap_t *memmap, size_t n, bios_bootmac_t *bootmac) 392 { 393 uint32_t memmap_sz, consdev_sz, bootmac_sz, i; 394 bios_consdev_t consdev; 395 uint32_t ba[1024]; 396 397 memmap_sz = 3 * sizeof(uint32_t) + n * sizeof(bios_memmap_t); 398 ba[0] = BOOTARG_MEMMAP; 399 ba[1] = memmap_sz; 400 ba[2] = memmap_sz; 401 memcpy(&ba[3], memmap, n * sizeof(bios_memmap_t)); 402 i = memmap_sz / sizeof(uint32_t); 403 404 /* Serial console device, COM1 @ 0x3f8 */ 405 memset(&consdev, 0, sizeof(consdev)); 406 consdev.consdev = makedev(8, 0); 407 consdev.conspeed = 115200; 408 consdev.consaddr = 0x3f8; 409 410 consdev_sz = 3 * sizeof(uint32_t) + sizeof(bios_consdev_t); 411 ba[i] = BOOTARG_CONSDEV; 412 ba[i + 1] = consdev_sz; 413 ba[i + 2] = consdev_sz; 414 memcpy(&ba[i + 3], &consdev, sizeof(bios_consdev_t)); 415 i += consdev_sz / sizeof(uint32_t); 416 417 if (bootmac) { 418 bootmac_sz = 3 * sizeof(uint32_t) + 419 (sizeof(bios_bootmac_t) + 3) & ~3; 420 ba[i] = BOOTARG_BOOTMAC; 421 ba[i + 1] = bootmac_sz; 422 ba[i + 2] = bootmac_sz; 423 memcpy(&ba[i + 3], bootmac, sizeof(bios_bootmac_t)); 424 i += bootmac_sz / sizeof(uint32_t); 425 } 426 427 ba[i++] = 0xFFFFFFFF; /* BOOTARG_END */ 428 429 write_mem(BOOTARGS_PAGE, ba, PAGE_SIZE); 430 431 return (i * sizeof(uint32_t)); 432 } 433 434 /* 435 * push_stack 436 * 437 * Creates the boot stack page in the guest address space. When using a real 438 * bootloader, the stack will be prepared using the following format before 439 * transitioning to kernel start, so vmd(8) needs to mimic the same stack 440 * layout. The stack content is pushed to the guest phys RAM at address 441 * STACK_PAGE. The bootloader operates in 32 bit mode; each stack entry is 442 * 4 bytes. 443 * 444 * Stack Layout: (TOS == Top Of Stack) 445 * TOS location of boot arguments page 446 * TOS - 0x4 size of the content in the boot arguments page 447 * TOS - 0x8 size of low memory (biosbasemem: kernel uses BIOS map only if 0) 448 * TOS - 0xc size of high memory (biosextmem, not used by kernel at all) 449 * TOS - 0x10 kernel 'end' symbol value 450 * TOS - 0x14 version of bootarg API 451 * 452 * Parameters: 453 * bootargsz: size of boot arguments 454 * end: kernel 'end' symbol value 455 * bootdev: the optional non-default boot device 456 * howto: optional boot flags for the kernel 457 * 458 * Return values: 459 * size of the stack 460 */ 461 static size_t 462 push_stack(uint32_t bootargsz, uint32_t end) 463 { 464 uint32_t stack[1024]; 465 uint16_t loc; 466 467 memset(&stack, 0, sizeof(stack)); 468 loc = 1024; 469 470 stack[--loc] = BOOTARGS_PAGE; 471 stack[--loc] = bootargsz; 472 stack[--loc] = 0; /* biosbasemem */ 473 stack[--loc] = 0; /* biosextmem */ 474 stack[--loc] = end; 475 stack[--loc] = 0x0e; 476 stack[--loc] = MAKEBOOTDEV(0x4, 0, 0, 0, 0); /* bootdev: sd0a */ 477 stack[--loc] = 0; 478 479 write_mem(STACK_PAGE, &stack, PAGE_SIZE); 480 481 return (1024 - (loc - 1)) * sizeof(uint32_t); 482 } 483 484 /* 485 * mread 486 * 487 * Reads 'sz' bytes from the file whose descriptor is provided in 'fd' 488 * into the guest address space at paddr 'addr'. 489 * 490 * Parameters: 491 * fp: kernel image file to read from. 492 * addr: guest paddr_t to load to 493 * sz: number of bytes to load 494 * 495 * Return values: 496 * returns 'sz' if successful, or 0 otherwise. 497 */ 498 size_t 499 mread(gzFile fp, paddr_t addr, size_t sz) 500 { 501 const char *errstr = NULL; 502 int errnum = 0; 503 size_t ct; 504 size_t i, osz; 505 char buf[PAGE_SIZE]; 506 507 /* 508 * break up the 'sz' bytes into PAGE_SIZE chunks for use with 509 * write_mem 510 */ 511 ct = 0; 512 osz = sz; 513 if ((addr & PAGE_MASK) != 0) { 514 memset(buf, 0, sizeof(buf)); 515 if (sz > PAGE_SIZE) 516 ct = PAGE_SIZE - (addr & PAGE_MASK); 517 else 518 ct = sz; 519 520 if ((size_t)gzread(fp, buf, ct) != ct) { 521 errstr = gzerror(fp, &errnum); 522 if (errnum == Z_ERRNO) 523 errnum = errno; 524 log_warnx("%s: error %d in mread, %s", __progname, 525 errnum, errstr); 526 return (0); 527 } 528 529 if (write_mem(addr, buf, ct)) 530 return (0); 531 532 addr += ct; 533 } 534 535 sz = sz - ct; 536 537 if (sz == 0) 538 return (osz); 539 540 for (i = 0; i < sz; i += PAGE_SIZE, addr += PAGE_SIZE) { 541 memset(buf, 0, sizeof(buf)); 542 if (i + PAGE_SIZE > sz) 543 ct = sz - i; 544 else 545 ct = PAGE_SIZE; 546 547 if ((size_t)gzread(fp, buf, ct) != ct) { 548 errstr = gzerror(fp, &errnum); 549 if (errnum == Z_ERRNO) 550 errnum = errno; 551 log_warnx("%s: error %d in mread, %s", __progname, 552 errnum, errstr); 553 return (0); 554 } 555 556 if (write_mem(addr, buf, ct)) 557 return (0); 558 } 559 560 return (osz); 561 } 562 563 /* 564 * marc4random_buf 565 * 566 * load 'sz' bytes of random data into the guest address space at paddr 567 * 'addr'. 568 * 569 * Parameters: 570 * addr: guest paddr_t to load random bytes into 571 * sz: number of random bytes to load 572 * 573 * Return values: 574 * nothing 575 */ 576 static void 577 marc4random_buf(paddr_t addr, int sz) 578 { 579 int i, ct; 580 char buf[PAGE_SIZE]; 581 582 /* 583 * break up the 'sz' bytes into PAGE_SIZE chunks for use with 584 * write_mem 585 */ 586 ct = 0; 587 if (addr % PAGE_SIZE != 0) { 588 memset(buf, 0, sizeof(buf)); 589 ct = PAGE_SIZE - (addr % PAGE_SIZE); 590 591 arc4random_buf(buf, ct); 592 593 if (write_mem(addr, buf, ct)) 594 return; 595 596 addr += ct; 597 } 598 599 for (i = 0; i < sz; i+= PAGE_SIZE, addr += PAGE_SIZE) { 600 memset(buf, 0, sizeof(buf)); 601 if (i + PAGE_SIZE > sz) 602 ct = sz - i; 603 else 604 ct = PAGE_SIZE; 605 606 arc4random_buf(buf, ct); 607 608 if (write_mem(addr, buf, ct)) 609 return; 610 } 611 } 612 613 /* 614 * mbzero 615 * 616 * load 'sz' bytes of zeros into the guest address space at paddr 617 * 'addr'. 618 * 619 * Parameters: 620 * addr: guest paddr_t to zero 621 * sz: number of zero bytes to store 622 * 623 * Return values: 624 * nothing 625 */ 626 static void 627 mbzero(paddr_t addr, int sz) 628 { 629 if (write_mem(addr, NULL, sz)) 630 return; 631 } 632 633 /* 634 * mbcopy 635 * 636 * copies 'sz' bytes from buffer 'src' to guest paddr 'dst'. 637 * 638 * Parameters: 639 * src: source buffer to copy from 640 * dst: destination guest paddr_t to copy to 641 * sz: number of bytes to copy 642 * 643 * Return values: 644 * nothing 645 */ 646 static void 647 mbcopy(void *src, paddr_t dst, int sz) 648 { 649 write_mem(dst, src, sz); 650 } 651 652 /* 653 * elf64_exec 654 * 655 * Load the kernel indicated by 'fp' into the guest physical memory 656 * space, at the addresses defined in the ELF header. 657 * 658 * This function is used for 64 bit kernels. 659 * 660 * Parameters: 661 * fp: kernel image file to load 662 * elf: ELF header of the kernel 663 * marks: array to store the offsets of various kernel structures 664 * (start, bss, etc) 665 * flags: flag value to indicate which section(s) to load (usually 666 * LOAD_ALL) 667 * 668 * Return values: 669 * 0 if successful 670 * 1 if unsuccessful 671 */ 672 static int 673 elf64_exec(gzFile fp, Elf64_Ehdr *elf, u_long *marks, int flags) 674 { 675 Elf64_Shdr *shp; 676 Elf64_Phdr *phdr; 677 Elf64_Off off; 678 int i; 679 size_t sz; 680 int havesyms; 681 paddr_t minp = ~0, maxp = 0, pos = 0; 682 paddr_t offset = marks[MARK_START], shpp, elfp; 683 684 sz = elf->e_phnum * sizeof(Elf64_Phdr); 685 phdr = malloc(sz); 686 687 if (gzseek(fp, (off_t)elf->e_phoff, SEEK_SET) == -1) { 688 free(phdr); 689 return 1; 690 } 691 692 if ((size_t)gzread(fp, phdr, sz) != sz) { 693 free(phdr); 694 return 1; 695 } 696 697 for (i = 0; i < elf->e_phnum; i++) { 698 if (phdr[i].p_type == PT_OPENBSD_RANDOMIZE) { 699 int m; 700 701 /* Fill segment if asked for. */ 702 if (flags & LOAD_RANDOM) { 703 for (pos = 0; pos < phdr[i].p_filesz; 704 pos += m) { 705 m = phdr[i].p_filesz - pos; 706 marc4random_buf(phdr[i].p_paddr + pos, 707 m); 708 } 709 } 710 if (flags & (LOAD_RANDOM | COUNT_RANDOM)) { 711 marks[MARK_RANDOM] = LOADADDR(phdr[i].p_paddr); 712 marks[MARK_ERANDOM] = 713 marks[MARK_RANDOM] + phdr[i].p_filesz; 714 } 715 continue; 716 } 717 718 if (phdr[i].p_type != PT_LOAD || 719 (phdr[i].p_flags & (PF_W|PF_R|PF_X)) == 0) 720 continue; 721 722 #define IS_TEXT(p) (p.p_flags & PF_X) 723 #define IS_DATA(p) ((p.p_flags & PF_X) == 0) 724 #define IS_BSS(p) (p.p_filesz < p.p_memsz) 725 /* 726 * XXX: Assume first address is lowest 727 */ 728 if ((IS_TEXT(phdr[i]) && (flags & LOAD_TEXT)) || 729 (IS_DATA(phdr[i]) && (flags & LOAD_DATA))) { 730 731 /* Read in segment. */ 732 if (gzseek(fp, (off_t)phdr[i].p_offset, 733 SEEK_SET) == -1) { 734 free(phdr); 735 return 1; 736 } 737 if (mread(fp, phdr[i].p_paddr, phdr[i].p_filesz) != 738 phdr[i].p_filesz) { 739 free(phdr); 740 return 1; 741 } 742 } 743 744 if ((IS_TEXT(phdr[i]) && (flags & (LOAD_TEXT | COUNT_TEXT))) || 745 (IS_DATA(phdr[i]) && (flags & (LOAD_DATA | COUNT_TEXT)))) { 746 pos = phdr[i].p_paddr; 747 if (minp > pos) 748 minp = pos; 749 pos += phdr[i].p_filesz; 750 if (maxp < pos) 751 maxp = pos; 752 } 753 754 /* Zero out BSS. */ 755 if (IS_BSS(phdr[i]) && (flags & LOAD_BSS)) { 756 mbzero((phdr[i].p_paddr + phdr[i].p_filesz), 757 phdr[i].p_memsz - phdr[i].p_filesz); 758 } 759 if (IS_BSS(phdr[i]) && (flags & (LOAD_BSS|COUNT_BSS))) { 760 pos += phdr[i].p_memsz - phdr[i].p_filesz; 761 if (maxp < pos) 762 maxp = pos; 763 } 764 } 765 free(phdr); 766 767 /* 768 * Copy the ELF and section headers. 769 */ 770 elfp = maxp = roundup(maxp, sizeof(Elf64_Addr)); 771 if (flags & (LOAD_HDR | COUNT_HDR)) 772 maxp += sizeof(Elf64_Ehdr); 773 774 if (flags & (LOAD_SYM | COUNT_SYM)) { 775 if (gzseek(fp, (off_t)elf->e_shoff, SEEK_SET) == -1) { 776 warn("gzseek section headers"); 777 return 1; 778 } 779 sz = elf->e_shnum * sizeof(Elf64_Shdr); 780 shp = malloc(sz); 781 782 if ((size_t)gzread(fp, shp, sz) != sz) { 783 free(shp); 784 return 1; 785 } 786 787 shpp = maxp; 788 maxp += roundup(sz, sizeof(Elf64_Addr)); 789 790 size_t shstrsz = shp[elf->e_shstrndx].sh_size; 791 char *shstr = malloc(shstrsz); 792 if (gzseek(fp, (off_t)shp[elf->e_shstrndx].sh_offset, 793 SEEK_SET) == -1) { 794 free(shstr); 795 free(shp); 796 return 1; 797 } 798 if ((size_t)gzread(fp, shstr, shstrsz) != shstrsz) { 799 free(shstr); 800 free(shp); 801 return 1; 802 } 803 804 /* 805 * Now load the symbol sections themselves. Make sure the 806 * sections are aligned. Don't bother with string tables if 807 * there are no symbol sections. 808 */ 809 off = roundup((sizeof(Elf64_Ehdr) + sz), sizeof(Elf64_Addr)); 810 811 for (havesyms = i = 0; i < elf->e_shnum; i++) 812 if (shp[i].sh_type == SHT_SYMTAB) 813 havesyms = 1; 814 815 for (i = 0; i < elf->e_shnum; i++) { 816 if (shp[i].sh_type == SHT_SYMTAB || 817 shp[i].sh_type == SHT_STRTAB || 818 !strcmp(shstr + shp[i].sh_name, ".debug_line") || 819 !strcmp(shstr + shp[i].sh_name, ELF_CTF)) { 820 if (havesyms && (flags & LOAD_SYM)) { 821 if (gzseek(fp, (off_t)shp[i].sh_offset, 822 SEEK_SET) == -1) { 823 free(shstr); 824 free(shp); 825 return 1; 826 } 827 if (mread(fp, maxp, 828 shp[i].sh_size) != shp[i].sh_size) { 829 free(shstr); 830 free(shp); 831 return 1; 832 } 833 } 834 maxp += roundup(shp[i].sh_size, 835 sizeof(Elf64_Addr)); 836 shp[i].sh_offset = off; 837 shp[i].sh_flags |= SHF_ALLOC; 838 off += roundup(shp[i].sh_size, 839 sizeof(Elf64_Addr)); 840 } 841 } 842 if (flags & LOAD_SYM) { 843 mbcopy(shp, shpp, sz); 844 } 845 free(shstr); 846 free(shp); 847 } 848 849 /* 850 * Frob the copied ELF header to give information relative 851 * to elfp. 852 */ 853 if (flags & LOAD_HDR) { 854 elf->e_phoff = 0; 855 elf->e_shoff = sizeof(Elf64_Ehdr); 856 elf->e_phentsize = 0; 857 elf->e_phnum = 0; 858 mbcopy(elf, elfp, sizeof(*elf)); 859 } 860 861 marks[MARK_START] = LOADADDR(minp); 862 marks[MARK_ENTRY] = LOADADDR(elf->e_entry); 863 marks[MARK_NSYM] = 1; /* XXX: Kernel needs >= 0 */ 864 marks[MARK_SYM] = LOADADDR(elfp); 865 marks[MARK_END] = LOADADDR(maxp); 866 867 return 0; 868 } 869 870 /* 871 * elf32_exec 872 * 873 * Load the kernel indicated by 'fp' into the guest physical memory 874 * space, at the addresses defined in the ELF header. 875 * 876 * This function is used for 32 bit kernels. 877 * 878 * Parameters: 879 * fp: kernel image file to load 880 * elf: ELF header of the kernel 881 * marks: array to store the offsets of various kernel structures 882 * (start, bss, etc) 883 * flags: flag value to indicate which section(s) to load (usually 884 * LOAD_ALL) 885 * 886 * Return values: 887 * 0 if successful 888 * 1 if unsuccessful 889 */ 890 static int 891 elf32_exec(gzFile fp, Elf32_Ehdr *elf, u_long *marks, int flags) 892 { 893 Elf32_Shdr *shp; 894 Elf32_Phdr *phdr; 895 Elf32_Off off; 896 int i; 897 size_t sz; 898 int havesyms; 899 paddr_t minp = ~0, maxp = 0, pos = 0; 900 paddr_t offset = marks[MARK_START], shpp, elfp; 901 902 sz = elf->e_phnum * sizeof(Elf32_Phdr); 903 phdr = malloc(sz); 904 905 if (gzseek(fp, (off_t)elf->e_phoff, SEEK_SET) == -1) { 906 free(phdr); 907 return 1; 908 } 909 910 if ((size_t)gzread(fp, phdr, sz) != sz) { 911 free(phdr); 912 return 1; 913 } 914 915 for (i = 0; i < elf->e_phnum; i++) { 916 if (phdr[i].p_type == PT_OPENBSD_RANDOMIZE) { 917 int m; 918 919 /* Fill segment if asked for. */ 920 if (flags & LOAD_RANDOM) { 921 for (pos = 0; pos < phdr[i].p_filesz; 922 pos += m) { 923 m = phdr[i].p_filesz - pos; 924 marc4random_buf(phdr[i].p_paddr + pos, 925 m); 926 } 927 } 928 if (flags & (LOAD_RANDOM | COUNT_RANDOM)) { 929 marks[MARK_RANDOM] = LOADADDR(phdr[i].p_paddr); 930 marks[MARK_ERANDOM] = 931 marks[MARK_RANDOM] + phdr[i].p_filesz; 932 } 933 continue; 934 } 935 936 if (phdr[i].p_type != PT_LOAD || 937 (phdr[i].p_flags & (PF_W|PF_R|PF_X)) == 0) 938 continue; 939 940 #define IS_TEXT(p) (p.p_flags & PF_X) 941 #define IS_DATA(p) ((p.p_flags & PF_X) == 0) 942 #define IS_BSS(p) (p.p_filesz < p.p_memsz) 943 /* 944 * XXX: Assume first address is lowest 945 */ 946 if ((IS_TEXT(phdr[i]) && (flags & LOAD_TEXT)) || 947 (IS_DATA(phdr[i]) && (flags & LOAD_DATA))) { 948 949 /* Read in segment. */ 950 if (gzseek(fp, (off_t)phdr[i].p_offset, 951 SEEK_SET) == -1) { 952 free(phdr); 953 return 1; 954 } 955 if (mread(fp, phdr[i].p_paddr, phdr[i].p_filesz) != 956 phdr[i].p_filesz) { 957 free(phdr); 958 return 1; 959 } 960 } 961 962 if ((IS_TEXT(phdr[i]) && (flags & (LOAD_TEXT | COUNT_TEXT))) || 963 (IS_DATA(phdr[i]) && (flags & (LOAD_DATA | COUNT_TEXT)))) { 964 pos = phdr[i].p_paddr; 965 if (minp > pos) 966 minp = pos; 967 pos += phdr[i].p_filesz; 968 if (maxp < pos) 969 maxp = pos; 970 } 971 972 /* Zero out BSS. */ 973 if (IS_BSS(phdr[i]) && (flags & LOAD_BSS)) { 974 mbzero((phdr[i].p_paddr + phdr[i].p_filesz), 975 phdr[i].p_memsz - phdr[i].p_filesz); 976 } 977 if (IS_BSS(phdr[i]) && (flags & (LOAD_BSS|COUNT_BSS))) { 978 pos += phdr[i].p_memsz - phdr[i].p_filesz; 979 if (maxp < pos) 980 maxp = pos; 981 } 982 } 983 free(phdr); 984 985 /* 986 * Copy the ELF and section headers. 987 */ 988 elfp = maxp = roundup(maxp, sizeof(Elf32_Addr)); 989 if (flags & (LOAD_HDR | COUNT_HDR)) 990 maxp += sizeof(Elf32_Ehdr); 991 992 if (flags & (LOAD_SYM | COUNT_SYM)) { 993 if (gzseek(fp, (off_t)elf->e_shoff, SEEK_SET) == -1) { 994 warn("lseek section headers"); 995 return 1; 996 } 997 sz = elf->e_shnum * sizeof(Elf32_Shdr); 998 shp = malloc(sz); 999 1000 if ((size_t)gzread(fp, shp, sz) != sz) { 1001 free(shp); 1002 return 1; 1003 } 1004 1005 shpp = maxp; 1006 maxp += roundup(sz, sizeof(Elf32_Addr)); 1007 1008 size_t shstrsz = shp[elf->e_shstrndx].sh_size; 1009 char *shstr = malloc(shstrsz); 1010 if (gzseek(fp, (off_t)shp[elf->e_shstrndx].sh_offset, 1011 SEEK_SET) == -1) { 1012 free(shstr); 1013 free(shp); 1014 return 1; 1015 } 1016 if ((size_t)gzread(fp, shstr, shstrsz) != shstrsz) { 1017 free(shstr); 1018 free(shp); 1019 return 1; 1020 } 1021 1022 /* 1023 * Now load the symbol sections themselves. Make sure the 1024 * sections are aligned. Don't bother with string tables if 1025 * there are no symbol sections. 1026 */ 1027 off = roundup((sizeof(Elf32_Ehdr) + sz), sizeof(Elf32_Addr)); 1028 1029 for (havesyms = i = 0; i < elf->e_shnum; i++) 1030 if (shp[i].sh_type == SHT_SYMTAB) 1031 havesyms = 1; 1032 1033 for (i = 0; i < elf->e_shnum; i++) { 1034 if (shp[i].sh_type == SHT_SYMTAB || 1035 shp[i].sh_type == SHT_STRTAB || 1036 !strcmp(shstr + shp[i].sh_name, ".debug_line")) { 1037 if (havesyms && (flags & LOAD_SYM)) { 1038 if (gzseek(fp, (off_t)shp[i].sh_offset, 1039 SEEK_SET) == -1) { 1040 free(shstr); 1041 free(shp); 1042 return 1; 1043 } 1044 if (mread(fp, maxp, 1045 shp[i].sh_size) != shp[i].sh_size) { 1046 free(shstr); 1047 free(shp); 1048 return 1; 1049 } 1050 } 1051 maxp += roundup(shp[i].sh_size, 1052 sizeof(Elf32_Addr)); 1053 shp[i].sh_offset = off; 1054 shp[i].sh_flags |= SHF_ALLOC; 1055 off += roundup(shp[i].sh_size, 1056 sizeof(Elf32_Addr)); 1057 } 1058 } 1059 if (flags & LOAD_SYM) { 1060 mbcopy(shp, shpp, sz); 1061 } 1062 free(shstr); 1063 free(shp); 1064 } 1065 1066 /* 1067 * Frob the copied ELF header to give information relative 1068 * to elfp. 1069 */ 1070 if (flags & LOAD_HDR) { 1071 elf->e_phoff = 0; 1072 elf->e_shoff = sizeof(Elf32_Ehdr); 1073 elf->e_phentsize = 0; 1074 elf->e_phnum = 0; 1075 mbcopy(elf, elfp, sizeof(*elf)); 1076 } 1077 1078 marks[MARK_START] = LOADADDR(minp); 1079 marks[MARK_ENTRY] = LOADADDR(elf->e_entry); 1080 marks[MARK_NSYM] = 1; /* XXX: Kernel needs >= 0 */ 1081 marks[MARK_SYM] = LOADADDR(elfp); 1082 marks[MARK_END] = LOADADDR(maxp); 1083 1084 return 0; 1085 } 1086