1 /* $NetBSD: loadfile.c,v 1.10 2000/12/03 02:53:04 tsutsui Exp $ */ 2 /* $OpenBSD: loadfile_elf.c,v 1.42 2022/01/28 06:33:27 guenther Exp $ */ 3 4 /*- 5 * Copyright (c) 1997 The NetBSD Foundation, Inc. 6 * All rights reserved. 7 * 8 * This code is derived from software contributed to The NetBSD Foundation 9 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 10 * NASA Ames Research Center and by Christos Zoulas. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 23 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 24 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 25 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 * POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 /* 35 * Copyright (c) 1992, 1993 36 * The Regents of the University of California. All rights reserved. 37 * 38 * This code is derived from software contributed to Berkeley by 39 * Ralph Campbell. 40 * 41 * Redistribution and use in source and binary forms, with or without 42 * modification, are permitted provided that the following conditions 43 * are met: 44 * 1. Redistributions of source code must retain the above copyright 45 * notice, this list of conditions and the following disclaimer. 46 * 2. Redistributions in binary form must reproduce the above copyright 47 * notice, this list of conditions and the following disclaimer in the 48 * documentation and/or other materials provided with the distribution. 49 * 3. Neither the name of the University nor the names of its contributors 50 * may be used to endorse or promote products derived from this software 51 * without specific prior written permission. 52 * 53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 63 * SUCH DAMAGE. 64 * 65 * @(#)boot.c 8.1 (Berkeley) 6/10/93 66 */ 67 68 /* 69 * Copyright (c) 2015 Mike Larkin <mlarkin@openbsd.org> 70 * 71 * Permission to use, copy, modify, and distribute this software for any 72 * purpose with or without fee is hereby granted, provided that the above 73 * copyright notice and this permission notice appear in all copies. 74 * 75 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 76 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 77 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 78 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 79 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 80 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 81 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 82 */ 83 84 #include <sys/param.h> /* PAGE_SIZE PAGE_MASK roundup */ 85 #include <sys/ioctl.h> 86 #include <sys/reboot.h> 87 #include <sys/exec.h> 88 89 #include <elf.h> 90 #include <stdio.h> 91 #include <string.h> 92 #include <errno.h> 93 #include <stdlib.h> 94 #include <unistd.h> 95 #include <fcntl.h> 96 #include <err.h> 97 #include <errno.h> 98 #include <stddef.h> 99 100 #include <machine/vmmvar.h> 101 #include <machine/biosvar.h> 102 #include <machine/segments.h> 103 #include <machine/specialreg.h> 104 #include <machine/pte.h> 105 106 #include "loadfile.h" 107 #include "vmd.h" 108 109 #define LOADADDR(a) ((((u_long)(a)) + offset)&0xfffffff) 110 111 union { 112 Elf32_Ehdr elf32; 113 Elf64_Ehdr elf64; 114 } hdr; 115 116 static void setsegment(struct mem_segment_descriptor *, uint32_t, 117 size_t, int, int, int, int); 118 static int elf32_exec(gzFile, Elf32_Ehdr *, u_long *, int); 119 static int elf64_exec(gzFile, Elf64_Ehdr *, u_long *, int); 120 static size_t create_bios_memmap(struct vm_create_params *, bios_memmap_t *); 121 static uint32_t push_bootargs(bios_memmap_t *, size_t, bios_bootmac_t *); 122 static size_t push_stack(uint32_t, uint32_t); 123 static void push_gdt(void); 124 static void push_pt_32(void); 125 static void push_pt_64(void); 126 static void marc4random_buf(paddr_t, int); 127 static void mbzero(paddr_t, int); 128 static void mbcopy(void *, paddr_t, int); 129 130 extern char *__progname; 131 extern int vm_id; 132 133 /* 134 * setsegment 135 * 136 * Initializes a segment selector entry with the provided descriptor. 137 * For the purposes of the bootloader mimiced by vmd(8), we only need 138 * memory-type segment descriptor support. 139 * 140 * This function was copied from machdep.c 141 * 142 * Parameters: 143 * sd: Address of the entry to initialize 144 * base: base of the segment 145 * limit: limit of the segment 146 * type: type of the segment 147 * dpl: privilege level of the egment 148 * def32: default 16/32 bit size of the segment 149 * gran: granularity of the segment (byte/page) 150 */ 151 static void 152 setsegment(struct mem_segment_descriptor *sd, uint32_t base, size_t limit, 153 int type, int dpl, int def32, int gran) 154 { 155 sd->sd_lolimit = (int)limit; 156 sd->sd_lobase = (int)base; 157 sd->sd_type = type; 158 sd->sd_dpl = dpl; 159 sd->sd_p = 1; 160 sd->sd_hilimit = (int)limit >> 16; 161 sd->sd_avl = 0; 162 sd->sd_long = 0; 163 sd->sd_def32 = def32; 164 sd->sd_gran = gran; 165 sd->sd_hibase = (int)base >> 24; 166 } 167 168 /* 169 * push_gdt 170 * 171 * Allocates and populates a page in the guest phys memory space to hold 172 * the boot-time GDT. Since vmd(8) is acting as the bootloader, we need to 173 * create the same GDT that a real bootloader would have created. 174 * This is loaded into the guest phys RAM space at address GDT_PAGE. 175 */ 176 static void 177 push_gdt(void) 178 { 179 uint8_t gdtpage[PAGE_SIZE]; 180 struct mem_segment_descriptor *sd; 181 182 memset(&gdtpage, 0, sizeof(gdtpage)); 183 184 sd = (struct mem_segment_descriptor *)&gdtpage; 185 186 /* 187 * Create three segment descriptors: 188 * 189 * GDT[0] : null desriptor. "Created" via memset above. 190 * GDT[1] (selector @ 0x8): Executable segment, for CS 191 * GDT[2] (selector @ 0x10): RW Data segment, for DS/ES/SS 192 */ 193 setsegment(&sd[1], 0, 0xffffffff, SDT_MEMERA, SEL_KPL, 1, 1); 194 setsegment(&sd[2], 0, 0xffffffff, SDT_MEMRWA, SEL_KPL, 1, 1); 195 196 write_mem(GDT_PAGE, gdtpage, PAGE_SIZE); 197 } 198 199 /* 200 * push_pt_32 201 * 202 * Create an identity-mapped page directory hierarchy mapping the first 203 * 4GB of physical memory. This is used during bootstrapping i386 VMs on 204 * CPUs without unrestricted guest capability. 205 */ 206 static void 207 push_pt_32(void) 208 { 209 uint32_t ptes[1024], i; 210 211 memset(ptes, 0, sizeof(ptes)); 212 for (i = 0 ; i < 1024; i++) { 213 ptes[i] = PG_V | PG_RW | PG_u | PG_PS | ((4096 * 1024) * i); 214 } 215 write_mem(PML3_PAGE, ptes, PAGE_SIZE); 216 } 217 218 /* 219 * push_pt_64 220 * 221 * Create an identity-mapped page directory hierarchy mapping the first 222 * 1GB of physical memory. This is used during bootstrapping 64 bit VMs on 223 * CPUs without unrestricted guest capability. 224 */ 225 static void 226 push_pt_64(void) 227 { 228 uint64_t ptes[512], i; 229 230 /* PDPDE0 - first 1GB */ 231 memset(ptes, 0, sizeof(ptes)); 232 ptes[0] = PG_V | PML3_PAGE; 233 write_mem(PML4_PAGE, ptes, PAGE_SIZE); 234 235 /* PDE0 - first 1GB */ 236 memset(ptes, 0, sizeof(ptes)); 237 ptes[0] = PG_V | PG_RW | PG_u | PML2_PAGE; 238 write_mem(PML3_PAGE, ptes, PAGE_SIZE); 239 240 /* First 1GB (in 2MB pages) */ 241 memset(ptes, 0, sizeof(ptes)); 242 for (i = 0 ; i < 512; i++) { 243 ptes[i] = PG_V | PG_RW | PG_u | PG_PS | ((2048 * 1024) * i); 244 } 245 write_mem(PML2_PAGE, ptes, PAGE_SIZE); 246 } 247 248 /* 249 * loadfile_elf 250 * 251 * Loads an ELF kernel to its defined load address in the guest VM. 252 * The kernel is loaded to its defined start point as set in the ELF header. 253 * 254 * Parameters: 255 * fp: file of a kernel file to load 256 * vcp: the VM create parameters, holding the exact memory map 257 * (out) vrs: register state to set on init for this kernel 258 * bootdev: the optional non-default boot device 259 * howto: optional boot flags for the kernel 260 * 261 * Return values: 262 * 0 if successful 263 * various error codes returned from gzread(3) or loadelf functions 264 */ 265 int 266 loadfile_elf(gzFile fp, struct vm_create_params *vcp, 267 struct vcpu_reg_state *vrs, unsigned int bootdevice) 268 { 269 int r, is_i386 = 0; 270 uint32_t bootargsz; 271 size_t n, stacksize; 272 u_long marks[MARK_MAX]; 273 bios_memmap_t memmap[VMM_MAX_MEM_RANGES + 1]; 274 bios_bootmac_t bm, *bootmac = NULL; 275 276 if ((r = gzread(fp, &hdr, sizeof(hdr))) != sizeof(hdr)) 277 return 1; 278 279 memset(&marks, 0, sizeof(marks)); 280 if (memcmp(hdr.elf32.e_ident, ELFMAG, SELFMAG) == 0 && 281 hdr.elf32.e_ident[EI_CLASS] == ELFCLASS32) { 282 r = elf32_exec(fp, &hdr.elf32, marks, LOAD_ALL); 283 is_i386 = 1; 284 } else if (memcmp(hdr.elf64.e_ident, ELFMAG, SELFMAG) == 0 && 285 hdr.elf64.e_ident[EI_CLASS] == ELFCLASS64) { 286 r = elf64_exec(fp, &hdr.elf64, marks, LOAD_ALL); 287 } else 288 errno = ENOEXEC; 289 290 if (r) 291 return (r); 292 293 push_gdt(); 294 295 if (is_i386) { 296 push_pt_32(); 297 /* Reconfigure the default flat-64 register set for 32 bit */ 298 vrs->vrs_crs[VCPU_REGS_CR3] = PML3_PAGE; 299 vrs->vrs_crs[VCPU_REGS_CR4] = CR4_PSE; 300 vrs->vrs_msrs[VCPU_REGS_EFER] = 0ULL; 301 } 302 else 303 push_pt_64(); 304 305 if (bootdevice == VMBOOTDEV_NET) { 306 bootmac = &bm; 307 memcpy(bootmac, vcp->vcp_macs[0], ETHER_ADDR_LEN); 308 } 309 n = create_bios_memmap(vcp, memmap); 310 bootargsz = push_bootargs(memmap, n, bootmac); 311 stacksize = push_stack(bootargsz, marks[MARK_END]); 312 313 vrs->vrs_gprs[VCPU_REGS_RIP] = (uint64_t)marks[MARK_ENTRY]; 314 vrs->vrs_gprs[VCPU_REGS_RSP] = (uint64_t)(STACK_PAGE + PAGE_SIZE) - stacksize; 315 vrs->vrs_gdtr.vsi_base = GDT_PAGE; 316 317 log_debug("%s: loaded ELF kernel", __func__); 318 319 return (0); 320 } 321 322 /* 323 * create_bios_memmap 324 * 325 * Construct a memory map as returned by the BIOS INT 0x15, e820 routine. 326 * 327 * Parameters: 328 * vcp: the VM create parameters, containing the memory map passed to vmm(4) 329 * memmap (out): the BIOS memory map 330 * 331 * Return values: 332 * Number of bios_memmap_t entries, including the terminating nul-entry. 333 */ 334 static size_t 335 create_bios_memmap(struct vm_create_params *vcp, bios_memmap_t *memmap) 336 { 337 size_t i, n = 0, sz; 338 paddr_t gpa; 339 struct vm_mem_range *vmr; 340 341 for (i = 0; i < vcp->vcp_nmemranges; i++) { 342 vmr = &vcp->vcp_memranges[i]; 343 gpa = vmr->vmr_gpa; 344 sz = vmr->vmr_size; 345 346 /* 347 * Make sure that we do not mark the ROM/video RAM area in the 348 * low memory as physcal memory available to the kernel. 349 */ 350 if (gpa < 0x100000 && gpa + sz > LOWMEM_KB * 1024) { 351 if (gpa >= LOWMEM_KB * 1024) 352 sz = 0; 353 else 354 sz = LOWMEM_KB * 1024 - gpa; 355 } 356 357 if (sz != 0) { 358 memmap[n].addr = gpa; 359 memmap[n].size = sz; 360 memmap[n].type = 0x1; /* Type 1 : Normal memory */ 361 n++; 362 } 363 } 364 365 /* Null mem map entry to denote the end of the ranges */ 366 memmap[n].addr = 0x0; 367 memmap[n].size = 0x0; 368 memmap[n].type = 0x0; 369 n++; 370 371 return (n); 372 } 373 374 /* 375 * push_bootargs 376 * 377 * Creates the boot arguments page in the guest address space. 378 * Since vmd(8) is acting as the bootloader, we need to create the same boot 379 * arguments page that a real bootloader would have created. This is loaded 380 * into the guest phys RAM space at address BOOTARGS_PAGE. 381 * 382 * Parameters: 383 * memmap: the BIOS memory map 384 * n: number of entries in memmap 385 * 386 * Return values: 387 * The size of the bootargs 388 */ 389 static uint32_t 390 push_bootargs(bios_memmap_t *memmap, size_t n, bios_bootmac_t *bootmac) 391 { 392 uint32_t memmap_sz, consdev_sz, bootmac_sz, i; 393 bios_consdev_t consdev; 394 uint32_t ba[1024]; 395 396 memmap_sz = 3 * sizeof(int) + n * sizeof(bios_memmap_t); 397 ba[0] = 0x0; /* memory map */ 398 ba[1] = memmap_sz; 399 ba[2] = memmap_sz; /* next */ 400 memcpy(&ba[3], memmap, n * sizeof(bios_memmap_t)); 401 i = memmap_sz / sizeof(int); 402 403 /* Serial console device, COM1 @ 0x3f8 */ 404 consdev.consdev = makedev(8, 0); /* com1 @ 0x3f8 */ 405 consdev.conspeed = 115200; 406 consdev.consaddr = 0x3f8; 407 consdev.consfreq = 0; 408 409 consdev_sz = 3 * sizeof(int) + sizeof(bios_consdev_t); 410 ba[i] = 0x5; /* consdev */ 411 ba[i + 1] = consdev_sz; 412 ba[i + 2] = consdev_sz; 413 memcpy(&ba[i + 3], &consdev, sizeof(bios_consdev_t)); 414 i += consdev_sz / sizeof(int); 415 416 if (bootmac) { 417 bootmac_sz = 3 * sizeof(int) + (sizeof(bios_bootmac_t) + 3) & ~3; 418 ba[i] = 0x7; /* bootmac */ 419 ba[i + 1] = bootmac_sz; 420 ba[i + 2] = bootmac_sz; 421 memcpy(&ba[i + 3], bootmac, sizeof(bios_bootmac_t)); 422 i += bootmac_sz / sizeof(int); 423 } 424 425 ba[i++] = 0xFFFFFFFF; /* BOOTARG_END */ 426 427 write_mem(BOOTARGS_PAGE, ba, PAGE_SIZE); 428 429 return (i * sizeof(int)); 430 } 431 432 /* 433 * push_stack 434 * 435 * Creates the boot stack page in the guest address space. When using a real 436 * bootloader, the stack will be prepared using the following format before 437 * transitioning to kernel start, so vmd(8) needs to mimic the same stack 438 * layout. The stack content is pushed to the guest phys RAM at address 439 * STACK_PAGE. The bootloader operates in 32 bit mode; each stack entry is 440 * 4 bytes. 441 * 442 * Stack Layout: (TOS == Top Of Stack) 443 * TOS location of boot arguments page 444 * TOS - 0x4 size of the content in the boot arguments page 445 * TOS - 0x8 size of low memory (biosbasemem: kernel uses BIOS map only if 0) 446 * TOS - 0xc size of high memory (biosextmem, not used by kernel at all) 447 * TOS - 0x10 kernel 'end' symbol value 448 * TOS - 0x14 version of bootarg API 449 * 450 * Parameters: 451 * bootargsz: size of boot arguments 452 * end: kernel 'end' symbol value 453 * bootdev: the optional non-default boot device 454 * howto: optional boot flags for the kernel 455 * 456 * Return values: 457 * size of the stack 458 */ 459 static size_t 460 push_stack(uint32_t bootargsz, uint32_t end) 461 { 462 uint32_t stack[1024]; 463 uint16_t loc; 464 465 memset(&stack, 0, sizeof(stack)); 466 loc = 1024; 467 468 stack[--loc] = BOOTARGS_PAGE; 469 stack[--loc] = bootargsz; 470 stack[--loc] = 0; /* biosbasemem */ 471 stack[--loc] = 0; /* biosextmem */ 472 stack[--loc] = end; 473 stack[--loc] = 0x0e; 474 stack[--loc] = MAKEBOOTDEV(0x4, 0, 0, 0, 0); /* bootdev: sd0a */ 475 stack[--loc] = 0; 476 477 write_mem(STACK_PAGE, &stack, PAGE_SIZE); 478 479 return (1024 - (loc - 1)) * sizeof(uint32_t); 480 } 481 482 /* 483 * mread 484 * 485 * Reads 'sz' bytes from the file whose descriptor is provided in 'fd' 486 * into the guest address space at paddr 'addr'. 487 * 488 * Parameters: 489 * fp: kernel image file to read from. 490 * addr: guest paddr_t to load to 491 * sz: number of bytes to load 492 * 493 * Return values: 494 * returns 'sz' if successful, or 0 otherwise. 495 */ 496 size_t 497 mread(gzFile fp, paddr_t addr, size_t sz) 498 { 499 const char *errstr = NULL; 500 int errnum = 0; 501 size_t ct; 502 size_t i, osz; 503 char buf[PAGE_SIZE]; 504 505 /* 506 * break up the 'sz' bytes into PAGE_SIZE chunks for use with 507 * write_mem 508 */ 509 ct = 0; 510 osz = sz; 511 if ((addr & PAGE_MASK) != 0) { 512 memset(buf, 0, sizeof(buf)); 513 if (sz > PAGE_SIZE) 514 ct = PAGE_SIZE - (addr & PAGE_MASK); 515 else 516 ct = sz; 517 518 if ((size_t)gzread(fp, buf, ct) != ct) { 519 errstr = gzerror(fp, &errnum); 520 if (errnum == Z_ERRNO) 521 errnum = errno; 522 log_warnx("%s: error %d in mread, %s", __progname, 523 errnum, errstr); 524 return (0); 525 } 526 527 if (write_mem(addr, buf, ct)) 528 return (0); 529 530 addr += ct; 531 } 532 533 sz = sz - ct; 534 535 if (sz == 0) 536 return (osz); 537 538 for (i = 0; i < sz; i += PAGE_SIZE, addr += PAGE_SIZE) { 539 memset(buf, 0, sizeof(buf)); 540 if (i + PAGE_SIZE > sz) 541 ct = sz - i; 542 else 543 ct = PAGE_SIZE; 544 545 if ((size_t)gzread(fp, buf, ct) != ct) { 546 errstr = gzerror(fp, &errnum); 547 if (errnum == Z_ERRNO) 548 errnum = errno; 549 log_warnx("%s: error %d in mread, %s", __progname, 550 errnum, errstr); 551 return (0); 552 } 553 554 if (write_mem(addr, buf, ct)) 555 return (0); 556 } 557 558 return (osz); 559 } 560 561 /* 562 * marc4random_buf 563 * 564 * load 'sz' bytes of random data into the guest address space at paddr 565 * 'addr'. 566 * 567 * Parameters: 568 * addr: guest paddr_t to load random bytes into 569 * sz: number of random bytes to load 570 * 571 * Return values: 572 * nothing 573 */ 574 static void 575 marc4random_buf(paddr_t addr, int sz) 576 { 577 int i, ct; 578 char buf[PAGE_SIZE]; 579 580 /* 581 * break up the 'sz' bytes into PAGE_SIZE chunks for use with 582 * write_mem 583 */ 584 ct = 0; 585 if (addr % PAGE_SIZE != 0) { 586 memset(buf, 0, sizeof(buf)); 587 ct = PAGE_SIZE - (addr % PAGE_SIZE); 588 589 arc4random_buf(buf, ct); 590 591 if (write_mem(addr, buf, ct)) 592 return; 593 594 addr += ct; 595 } 596 597 for (i = 0; i < sz; i+= PAGE_SIZE, addr += PAGE_SIZE) { 598 memset(buf, 0, sizeof(buf)); 599 if (i + PAGE_SIZE > sz) 600 ct = sz - i; 601 else 602 ct = PAGE_SIZE; 603 604 arc4random_buf(buf, ct); 605 606 if (write_mem(addr, buf, ct)) 607 return; 608 } 609 } 610 611 /* 612 * mbzero 613 * 614 * load 'sz' bytes of zeros into the guest address space at paddr 615 * 'addr'. 616 * 617 * Parameters: 618 * addr: guest paddr_t to zero 619 * sz: number of zero bytes to store 620 * 621 * Return values: 622 * nothing 623 */ 624 static void 625 mbzero(paddr_t addr, int sz) 626 { 627 if (write_mem(addr, NULL, sz)) 628 return; 629 } 630 631 /* 632 * mbcopy 633 * 634 * copies 'sz' bytes from buffer 'src' to guest paddr 'dst'. 635 * 636 * Parameters: 637 * src: source buffer to copy from 638 * dst: destination guest paddr_t to copy to 639 * sz: number of bytes to copy 640 * 641 * Return values: 642 * nothing 643 */ 644 static void 645 mbcopy(void *src, paddr_t dst, int sz) 646 { 647 write_mem(dst, src, sz); 648 } 649 650 /* 651 * elf64_exec 652 * 653 * Load the kernel indicated by 'fp' into the guest physical memory 654 * space, at the addresses defined in the ELF header. 655 * 656 * This function is used for 64 bit kernels. 657 * 658 * Parameters: 659 * fp: kernel image file to load 660 * elf: ELF header of the kernel 661 * marks: array to store the offsets of various kernel structures 662 * (start, bss, etc) 663 * flags: flag value to indicate which section(s) to load (usually 664 * LOAD_ALL) 665 * 666 * Return values: 667 * 0 if successful 668 * 1 if unsuccessful 669 */ 670 static int 671 elf64_exec(gzFile fp, Elf64_Ehdr *elf, u_long *marks, int flags) 672 { 673 Elf64_Shdr *shp; 674 Elf64_Phdr *phdr; 675 Elf64_Off off; 676 int i; 677 size_t sz; 678 int havesyms; 679 paddr_t minp = ~0, maxp = 0, pos = 0; 680 paddr_t offset = marks[MARK_START], shpp, elfp; 681 682 sz = elf->e_phnum * sizeof(Elf64_Phdr); 683 phdr = malloc(sz); 684 685 if (gzseek(fp, (off_t)elf->e_phoff, SEEK_SET) == -1) { 686 free(phdr); 687 return 1; 688 } 689 690 if ((size_t)gzread(fp, phdr, sz) != sz) { 691 free(phdr); 692 return 1; 693 } 694 695 for (i = 0; i < elf->e_phnum; i++) { 696 if (phdr[i].p_type == PT_OPENBSD_RANDOMIZE) { 697 int m; 698 699 /* Fill segment if asked for. */ 700 if (flags & LOAD_RANDOM) { 701 for (pos = 0; pos < phdr[i].p_filesz; 702 pos += m) { 703 m = phdr[i].p_filesz - pos; 704 marc4random_buf(phdr[i].p_paddr + pos, 705 m); 706 } 707 } 708 if (flags & (LOAD_RANDOM | COUNT_RANDOM)) { 709 marks[MARK_RANDOM] = LOADADDR(phdr[i].p_paddr); 710 marks[MARK_ERANDOM] = 711 marks[MARK_RANDOM] + phdr[i].p_filesz; 712 } 713 continue; 714 } 715 716 if (phdr[i].p_type != PT_LOAD || 717 (phdr[i].p_flags & (PF_W|PF_R|PF_X)) == 0) 718 continue; 719 720 #define IS_TEXT(p) (p.p_flags & PF_X) 721 #define IS_DATA(p) ((p.p_flags & PF_X) == 0) 722 #define IS_BSS(p) (p.p_filesz < p.p_memsz) 723 /* 724 * XXX: Assume first address is lowest 725 */ 726 if ((IS_TEXT(phdr[i]) && (flags & LOAD_TEXT)) || 727 (IS_DATA(phdr[i]) && (flags & LOAD_DATA))) { 728 729 /* Read in segment. */ 730 if (gzseek(fp, (off_t)phdr[i].p_offset, 731 SEEK_SET) == -1) { 732 free(phdr); 733 return 1; 734 } 735 if (mread(fp, phdr[i].p_paddr, phdr[i].p_filesz) != 736 phdr[i].p_filesz) { 737 free(phdr); 738 return 1; 739 } 740 } 741 742 if ((IS_TEXT(phdr[i]) && (flags & (LOAD_TEXT | COUNT_TEXT))) || 743 (IS_DATA(phdr[i]) && (flags & (LOAD_DATA | COUNT_TEXT)))) { 744 pos = phdr[i].p_paddr; 745 if (minp > pos) 746 minp = pos; 747 pos += phdr[i].p_filesz; 748 if (maxp < pos) 749 maxp = pos; 750 } 751 752 /* Zero out BSS. */ 753 if (IS_BSS(phdr[i]) && (flags & LOAD_BSS)) { 754 mbzero((phdr[i].p_paddr + phdr[i].p_filesz), 755 phdr[i].p_memsz - phdr[i].p_filesz); 756 } 757 if (IS_BSS(phdr[i]) && (flags & (LOAD_BSS|COUNT_BSS))) { 758 pos += phdr[i].p_memsz - phdr[i].p_filesz; 759 if (maxp < pos) 760 maxp = pos; 761 } 762 } 763 free(phdr); 764 765 /* 766 * Copy the ELF and section headers. 767 */ 768 elfp = maxp = roundup(maxp, sizeof(Elf64_Addr)); 769 if (flags & (LOAD_HDR | COUNT_HDR)) 770 maxp += sizeof(Elf64_Ehdr); 771 772 if (flags & (LOAD_SYM | COUNT_SYM)) { 773 if (gzseek(fp, (off_t)elf->e_shoff, SEEK_SET) == -1) { 774 warn("gzseek section headers"); 775 return 1; 776 } 777 sz = elf->e_shnum * sizeof(Elf64_Shdr); 778 shp = malloc(sz); 779 780 if ((size_t)gzread(fp, shp, sz) != sz) { 781 free(shp); 782 return 1; 783 } 784 785 shpp = maxp; 786 maxp += roundup(sz, sizeof(Elf64_Addr)); 787 788 size_t shstrsz = shp[elf->e_shstrndx].sh_size; 789 char *shstr = malloc(shstrsz); 790 if (gzseek(fp, (off_t)shp[elf->e_shstrndx].sh_offset, 791 SEEK_SET) == -1) { 792 free(shstr); 793 free(shp); 794 return 1; 795 } 796 if ((size_t)gzread(fp, shstr, shstrsz) != shstrsz) { 797 free(shstr); 798 free(shp); 799 return 1; 800 } 801 802 /* 803 * Now load the symbol sections themselves. Make sure the 804 * sections are aligned. Don't bother with string tables if 805 * there are no symbol sections. 806 */ 807 off = roundup((sizeof(Elf64_Ehdr) + sz), sizeof(Elf64_Addr)); 808 809 for (havesyms = i = 0; i < elf->e_shnum; i++) 810 if (shp[i].sh_type == SHT_SYMTAB) 811 havesyms = 1; 812 813 for (i = 0; i < elf->e_shnum; i++) { 814 if (shp[i].sh_type == SHT_SYMTAB || 815 shp[i].sh_type == SHT_STRTAB || 816 !strcmp(shstr + shp[i].sh_name, ".debug_line") || 817 !strcmp(shstr + shp[i].sh_name, ELF_CTF)) { 818 if (havesyms && (flags & LOAD_SYM)) { 819 if (gzseek(fp, (off_t)shp[i].sh_offset, 820 SEEK_SET) == -1) { 821 free(shstr); 822 free(shp); 823 return 1; 824 } 825 if (mread(fp, maxp, 826 shp[i].sh_size) != shp[i].sh_size) { 827 free(shstr); 828 free(shp); 829 return 1; 830 } 831 } 832 maxp += roundup(shp[i].sh_size, 833 sizeof(Elf64_Addr)); 834 shp[i].sh_offset = off; 835 shp[i].sh_flags |= SHF_ALLOC; 836 off += roundup(shp[i].sh_size, 837 sizeof(Elf64_Addr)); 838 } 839 } 840 if (flags & LOAD_SYM) { 841 mbcopy(shp, shpp, sz); 842 } 843 free(shstr); 844 free(shp); 845 } 846 847 /* 848 * Frob the copied ELF header to give information relative 849 * to elfp. 850 */ 851 if (flags & LOAD_HDR) { 852 elf->e_phoff = 0; 853 elf->e_shoff = sizeof(Elf64_Ehdr); 854 elf->e_phentsize = 0; 855 elf->e_phnum = 0; 856 mbcopy(elf, elfp, sizeof(*elf)); 857 } 858 859 marks[MARK_START] = LOADADDR(minp); 860 marks[MARK_ENTRY] = LOADADDR(elf->e_entry); 861 marks[MARK_NSYM] = 1; /* XXX: Kernel needs >= 0 */ 862 marks[MARK_SYM] = LOADADDR(elfp); 863 marks[MARK_END] = LOADADDR(maxp); 864 865 return 0; 866 } 867 868 /* 869 * elf32_exec 870 * 871 * Load the kernel indicated by 'fp' into the guest physical memory 872 * space, at the addresses defined in the ELF header. 873 * 874 * This function is used for 32 bit kernels. 875 * 876 * Parameters: 877 * fp: kernel image file to load 878 * elf: ELF header of the kernel 879 * marks: array to store the offsets of various kernel structures 880 * (start, bss, etc) 881 * flags: flag value to indicate which section(s) to load (usually 882 * LOAD_ALL) 883 * 884 * Return values: 885 * 0 if successful 886 * 1 if unsuccessful 887 */ 888 static int 889 elf32_exec(gzFile fp, Elf32_Ehdr *elf, u_long *marks, int flags) 890 { 891 Elf32_Shdr *shp; 892 Elf32_Phdr *phdr; 893 Elf32_Off off; 894 int i; 895 size_t sz; 896 int havesyms; 897 paddr_t minp = ~0, maxp = 0, pos = 0; 898 paddr_t offset = marks[MARK_START], shpp, elfp; 899 900 sz = elf->e_phnum * sizeof(Elf32_Phdr); 901 phdr = malloc(sz); 902 903 if (gzseek(fp, (off_t)elf->e_phoff, SEEK_SET) == -1) { 904 free(phdr); 905 return 1; 906 } 907 908 if ((size_t)gzread(fp, phdr, sz) != sz) { 909 free(phdr); 910 return 1; 911 } 912 913 for (i = 0; i < elf->e_phnum; i++) { 914 if (phdr[i].p_type == PT_OPENBSD_RANDOMIZE) { 915 int m; 916 917 /* Fill segment if asked for. */ 918 if (flags & LOAD_RANDOM) { 919 for (pos = 0; pos < phdr[i].p_filesz; 920 pos += m) { 921 m = phdr[i].p_filesz - pos; 922 marc4random_buf(phdr[i].p_paddr + pos, 923 m); 924 } 925 } 926 if (flags & (LOAD_RANDOM | COUNT_RANDOM)) { 927 marks[MARK_RANDOM] = LOADADDR(phdr[i].p_paddr); 928 marks[MARK_ERANDOM] = 929 marks[MARK_RANDOM] + phdr[i].p_filesz; 930 } 931 continue; 932 } 933 934 if (phdr[i].p_type != PT_LOAD || 935 (phdr[i].p_flags & (PF_W|PF_R|PF_X)) == 0) 936 continue; 937 938 #define IS_TEXT(p) (p.p_flags & PF_X) 939 #define IS_DATA(p) ((p.p_flags & PF_X) == 0) 940 #define IS_BSS(p) (p.p_filesz < p.p_memsz) 941 /* 942 * XXX: Assume first address is lowest 943 */ 944 if ((IS_TEXT(phdr[i]) && (flags & LOAD_TEXT)) || 945 (IS_DATA(phdr[i]) && (flags & LOAD_DATA))) { 946 947 /* Read in segment. */ 948 if (gzseek(fp, (off_t)phdr[i].p_offset, 949 SEEK_SET) == -1) { 950 free(phdr); 951 return 1; 952 } 953 if (mread(fp, phdr[i].p_paddr, phdr[i].p_filesz) != 954 phdr[i].p_filesz) { 955 free(phdr); 956 return 1; 957 } 958 } 959 960 if ((IS_TEXT(phdr[i]) && (flags & (LOAD_TEXT | COUNT_TEXT))) || 961 (IS_DATA(phdr[i]) && (flags & (LOAD_DATA | COUNT_TEXT)))) { 962 pos = phdr[i].p_paddr; 963 if (minp > pos) 964 minp = pos; 965 pos += phdr[i].p_filesz; 966 if (maxp < pos) 967 maxp = pos; 968 } 969 970 /* Zero out BSS. */ 971 if (IS_BSS(phdr[i]) && (flags & LOAD_BSS)) { 972 mbzero((phdr[i].p_paddr + phdr[i].p_filesz), 973 phdr[i].p_memsz - phdr[i].p_filesz); 974 } 975 if (IS_BSS(phdr[i]) && (flags & (LOAD_BSS|COUNT_BSS))) { 976 pos += phdr[i].p_memsz - phdr[i].p_filesz; 977 if (maxp < pos) 978 maxp = pos; 979 } 980 } 981 free(phdr); 982 983 /* 984 * Copy the ELF and section headers. 985 */ 986 elfp = maxp = roundup(maxp, sizeof(Elf32_Addr)); 987 if (flags & (LOAD_HDR | COUNT_HDR)) 988 maxp += sizeof(Elf32_Ehdr); 989 990 if (flags & (LOAD_SYM | COUNT_SYM)) { 991 if (gzseek(fp, (off_t)elf->e_shoff, SEEK_SET) == -1) { 992 warn("lseek section headers"); 993 return 1; 994 } 995 sz = elf->e_shnum * sizeof(Elf32_Shdr); 996 shp = malloc(sz); 997 998 if ((size_t)gzread(fp, shp, sz) != sz) { 999 free(shp); 1000 return 1; 1001 } 1002 1003 shpp = maxp; 1004 maxp += roundup(sz, sizeof(Elf32_Addr)); 1005 1006 size_t shstrsz = shp[elf->e_shstrndx].sh_size; 1007 char *shstr = malloc(shstrsz); 1008 if (gzseek(fp, (off_t)shp[elf->e_shstrndx].sh_offset, 1009 SEEK_SET) == -1) { 1010 free(shstr); 1011 free(shp); 1012 return 1; 1013 } 1014 if ((size_t)gzread(fp, shstr, shstrsz) != shstrsz) { 1015 free(shstr); 1016 free(shp); 1017 return 1; 1018 } 1019 1020 /* 1021 * Now load the symbol sections themselves. Make sure the 1022 * sections are aligned. Don't bother with string tables if 1023 * there are no symbol sections. 1024 */ 1025 off = roundup((sizeof(Elf32_Ehdr) + sz), sizeof(Elf32_Addr)); 1026 1027 for (havesyms = i = 0; i < elf->e_shnum; i++) 1028 if (shp[i].sh_type == SHT_SYMTAB) 1029 havesyms = 1; 1030 1031 for (i = 0; i < elf->e_shnum; i++) { 1032 if (shp[i].sh_type == SHT_SYMTAB || 1033 shp[i].sh_type == SHT_STRTAB || 1034 !strcmp(shstr + shp[i].sh_name, ".debug_line")) { 1035 if (havesyms && (flags & LOAD_SYM)) { 1036 if (gzseek(fp, (off_t)shp[i].sh_offset, 1037 SEEK_SET) == -1) { 1038 free(shstr); 1039 free(shp); 1040 return 1; 1041 } 1042 if (mread(fp, maxp, 1043 shp[i].sh_size) != shp[i].sh_size) { 1044 free(shstr); 1045 free(shp); 1046 return 1; 1047 } 1048 } 1049 maxp += roundup(shp[i].sh_size, 1050 sizeof(Elf32_Addr)); 1051 shp[i].sh_offset = off; 1052 shp[i].sh_flags |= SHF_ALLOC; 1053 off += roundup(shp[i].sh_size, 1054 sizeof(Elf32_Addr)); 1055 } 1056 } 1057 if (flags & LOAD_SYM) { 1058 mbcopy(shp, shpp, sz); 1059 } 1060 free(shstr); 1061 free(shp); 1062 } 1063 1064 /* 1065 * Frob the copied ELF header to give information relative 1066 * to elfp. 1067 */ 1068 if (flags & LOAD_HDR) { 1069 elf->e_phoff = 0; 1070 elf->e_shoff = sizeof(Elf32_Ehdr); 1071 elf->e_phentsize = 0; 1072 elf->e_phnum = 0; 1073 mbcopy(elf, elfp, sizeof(*elf)); 1074 } 1075 1076 marks[MARK_START] = LOADADDR(minp); 1077 marks[MARK_ENTRY] = LOADADDR(elf->e_entry); 1078 marks[MARK_NSYM] = 1; /* XXX: Kernel needs >= 0 */ 1079 marks[MARK_SYM] = LOADADDR(elfp); 1080 marks[MARK_END] = LOADADDR(maxp); 1081 1082 return 0; 1083 } 1084