1 /* $NetBSD: loadfile.c,v 1.10 2000/12/03 02:53:04 tsutsui Exp $ */ 2 /* $OpenBSD: loadfile_elf.c,v 1.30 2018/07/17 13:47:06 mlarkin Exp $ */ 3 4 /*- 5 * Copyright (c) 1997 The NetBSD Foundation, Inc. 6 * All rights reserved. 7 * 8 * This code is derived from software contributed to The NetBSD Foundation 9 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 10 * NASA Ames Research Center and by Christos Zoulas. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 23 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 24 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 25 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 * POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 /* 35 * Copyright (c) 1992, 1993 36 * The Regents of the University of California. All rights reserved. 37 * 38 * This code is derived from software contributed to Berkeley by 39 * Ralph Campbell. 40 * 41 * Redistribution and use in source and binary forms, with or without 42 * modification, are permitted provided that the following conditions 43 * are met: 44 * 1. Redistributions of source code must retain the above copyright 45 * notice, this list of conditions and the following disclaimer. 46 * 2. Redistributions in binary form must reproduce the above copyright 47 * notice, this list of conditions and the following disclaimer in the 48 * documentation and/or other materials provided with the distribution. 49 * 3. Neither the name of the University nor the names of its contributors 50 * may be used to endorse or promote products derived from this software 51 * without specific prior written permission. 52 * 53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 63 * SUCH DAMAGE. 64 * 65 * @(#)boot.c 8.1 (Berkeley) 6/10/93 66 */ 67 68 /* 69 * Copyright (c) 2015 Mike Larkin <mlarkin@openbsd.org> 70 * 71 * Permission to use, copy, modify, and distribute this software for any 72 * purpose with or without fee is hereby granted, provided that the above 73 * copyright notice and this permission notice appear in all copies. 74 * 75 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 76 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 77 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 78 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 79 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 80 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 81 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 82 */ 83 84 #include <sys/param.h> /* PAGE_SIZE PAGE_MASK roundup */ 85 #include <sys/ioctl.h> 86 #include <sys/reboot.h> 87 #include <sys/exec.h> 88 89 #include <elf.h> 90 #include <stdio.h> 91 #include <string.h> 92 #include <errno.h> 93 #include <stdlib.h> 94 #include <unistd.h> 95 #include <fcntl.h> 96 #include <err.h> 97 #include <errno.h> 98 #include <stddef.h> 99 100 #include <machine/vmmvar.h> 101 #include <machine/biosvar.h> 102 #include <machine/segments.h> 103 #include <machine/specialreg.h> 104 #include <machine/pte.h> 105 106 #include "loadfile.h" 107 #include "vmd.h" 108 109 union { 110 Elf32_Ehdr elf32; 111 Elf64_Ehdr elf64; 112 } hdr; 113 114 #ifdef __i386__ 115 typedef uint32_t pt_entry_t; 116 static void setsegment(struct segment_descriptor *, uint32_t, 117 size_t, int, int, int, int); 118 #else 119 static void setsegment(struct mem_segment_descriptor *, uint32_t, 120 size_t, int, int, int, int); 121 #endif 122 static int elf32_exec(FILE *, Elf32_Ehdr *, u_long *, int); 123 static int elf64_exec(FILE *, Elf64_Ehdr *, u_long *, int); 124 static size_t create_bios_memmap(struct vm_create_params *, bios_memmap_t *); 125 static uint32_t push_bootargs(bios_memmap_t *, size_t); 126 static size_t push_stack(uint32_t, uint32_t, uint32_t, uint32_t); 127 static void push_gdt(void); 128 static void push_pt_32(void); 129 static void push_pt_64(void); 130 static void marc4random_buf(paddr_t, int); 131 static void mbzero(paddr_t, int); 132 static void mbcopy(void *, paddr_t, int); 133 134 extern char *__progname; 135 extern int vm_id; 136 137 /* 138 * setsegment 139 * 140 * Initializes a segment selector entry with the provided descriptor. 141 * For the purposes of the bootloader mimiced by vmd(8), we only need 142 * memory-type segment descriptor support. 143 * 144 * This function was copied from machdep.c 145 * 146 * Parameters: 147 * sd: Address of the entry to initialize 148 * base: base of the segment 149 * limit: limit of the segment 150 * type: type of the segment 151 * dpl: privilege level of the egment 152 * def32: default 16/32 bit size of the segment 153 * gran: granularity of the segment (byte/page) 154 */ 155 #ifdef __i386__ 156 static void 157 setsegment(struct segment_descriptor *sd, uint32_t base, size_t limit, 158 int type, int dpl, int def32, int gran) 159 #else 160 static void 161 setsegment(struct mem_segment_descriptor *sd, uint32_t base, size_t limit, 162 int type, int dpl, int def32, int gran) 163 #endif 164 { 165 sd->sd_lolimit = (int)limit; 166 sd->sd_lobase = (int)base; 167 sd->sd_type = type; 168 sd->sd_dpl = dpl; 169 sd->sd_p = 1; 170 sd->sd_hilimit = (int)limit >> 16; 171 #ifdef __i386__ 172 sd->sd_xx = 0; 173 #else 174 sd->sd_avl = 0; 175 sd->sd_long = 0; 176 #endif 177 sd->sd_def32 = def32; 178 sd->sd_gran = gran; 179 sd->sd_hibase = (int)base >> 24; 180 } 181 182 /* 183 * push_gdt 184 * 185 * Allocates and populates a page in the guest phys memory space to hold 186 * the boot-time GDT. Since vmd(8) is acting as the bootloader, we need to 187 * create the same GDT that a real bootloader would have created. 188 * This is loaded into the guest phys RAM space at address GDT_PAGE. 189 */ 190 static void 191 push_gdt(void) 192 { 193 uint8_t gdtpage[PAGE_SIZE]; 194 #ifdef __i386__ 195 struct segment_descriptor *sd; 196 #else 197 struct mem_segment_descriptor *sd; 198 #endif 199 200 memset(&gdtpage, 0, sizeof(gdtpage)); 201 202 #ifdef __i386__ 203 sd = (struct segment_descriptor *)&gdtpage; 204 #else 205 sd = (struct mem_segment_descriptor *)&gdtpage; 206 #endif 207 208 /* 209 * Create three segment descriptors: 210 * 211 * GDT[0] : null desriptor. "Created" via memset above. 212 * GDT[1] (selector @ 0x8): Executable segment, for CS 213 * GDT[2] (selector @ 0x10): RW Data segment, for DS/ES/SS 214 */ 215 setsegment(&sd[1], 0, 0xffffffff, SDT_MEMERA, SEL_KPL, 1, 1); 216 setsegment(&sd[2], 0, 0xffffffff, SDT_MEMRWA, SEL_KPL, 1, 1); 217 218 write_mem(GDT_PAGE, gdtpage, PAGE_SIZE); 219 } 220 221 /* 222 * push_pt_32 223 * 224 * Create an identity-mapped page directory hierarchy mapping the first 225 * 4GB of physical memory. This is used during bootstrapping i386 VMs on 226 * CPUs without unrestricted guest capability. 227 */ 228 static void 229 push_pt_32(void) 230 { 231 uint32_t ptes[1024], i; 232 233 memset(ptes, 0, sizeof(ptes)); 234 for (i = 0 ; i < 1024; i++) { 235 ptes[i] = PG_V | PG_RW | PG_u | PG_PS | ((4096 * 1024) * i); 236 } 237 write_mem(PML3_PAGE, ptes, PAGE_SIZE); 238 } 239 240 /* 241 * push_pt_64 242 * 243 * Create an identity-mapped page directory hierarchy mapping the first 244 * 1GB of physical memory. This is used during bootstrapping 64 bit VMs on 245 * CPUs without unrestricted guest capability. 246 */ 247 static void 248 push_pt_64(void) 249 { 250 uint64_t ptes[512], i; 251 252 /* PDPDE0 - first 1GB */ 253 memset(ptes, 0, sizeof(ptes)); 254 ptes[0] = PG_V | PML3_PAGE; 255 write_mem(PML4_PAGE, ptes, PAGE_SIZE); 256 257 /* PDE0 - first 1GB */ 258 memset(ptes, 0, sizeof(ptes)); 259 ptes[0] = PG_V | PG_RW | PG_u | PML2_PAGE; 260 write_mem(PML3_PAGE, ptes, PAGE_SIZE); 261 262 /* First 1GB (in 2MB pages) */ 263 memset(ptes, 0, sizeof(ptes)); 264 for (i = 0 ; i < 512; i++) { 265 ptes[i] = PG_V | PG_RW | PG_u | PG_PS | ((2048 * 1024) * i); 266 } 267 write_mem(PML2_PAGE, ptes, PAGE_SIZE); 268 } 269 270 /* 271 * loadfile_elf 272 * 273 * Loads an ELF kernel to it's defined load address in the guest VM. 274 * The kernel is loaded to its defined start point as set in the ELF header. 275 * 276 * Parameters: 277 * fp: file of a kernel file to load 278 * vcp: the VM create parameters, holding the exact memory map 279 * (out) vrs: register state to set on init for this kernel 280 * bootdev: the optional non-default boot device 281 * howto: optional boot flags for the kernel 282 * 283 * Return values: 284 * 0 if successful 285 * various error codes returned from read(2) or loadelf functions 286 */ 287 int 288 loadfile_elf(FILE *fp, struct vm_create_params *vcp, 289 struct vcpu_reg_state *vrs, uint32_t bootdev, uint32_t howto) 290 { 291 int r, is_i386 = 0; 292 uint32_t bootargsz; 293 size_t n, stacksize; 294 u_long marks[MARK_MAX]; 295 bios_memmap_t memmap[VMM_MAX_MEM_RANGES + 1]; 296 297 if ((r = fread(&hdr, 1, sizeof(hdr), fp)) != sizeof(hdr)) 298 return 1; 299 300 memset(&marks, 0, sizeof(marks)); 301 if (memcmp(hdr.elf32.e_ident, ELFMAG, SELFMAG) == 0 && 302 hdr.elf32.e_ident[EI_CLASS] == ELFCLASS32) { 303 r = elf32_exec(fp, &hdr.elf32, marks, LOAD_ALL); 304 is_i386 = 1; 305 } else if (memcmp(hdr.elf64.e_ident, ELFMAG, SELFMAG) == 0 && 306 hdr.elf64.e_ident[EI_CLASS] == ELFCLASS64) { 307 r = elf64_exec(fp, &hdr.elf64, marks, LOAD_ALL); 308 } else 309 errno = ENOEXEC; 310 311 if (r) 312 return (r); 313 314 push_gdt(); 315 316 if (is_i386) { 317 push_pt_32(); 318 /* Reconfigure the default flat-64 register set for 32 bit */ 319 vrs->vrs_crs[VCPU_REGS_CR3] = PML3_PAGE; 320 vrs->vrs_crs[VCPU_REGS_CR4] = CR4_PSE; 321 vrs->vrs_msrs[VCPU_REGS_EFER] = 0ULL; 322 } 323 else 324 push_pt_64(); 325 326 n = create_bios_memmap(vcp, memmap); 327 bootargsz = push_bootargs(memmap, n); 328 stacksize = push_stack(bootargsz, marks[MARK_END], bootdev, howto); 329 330 #ifdef __i386__ 331 vrs->vrs_gprs[VCPU_REGS_EIP] = (uint32_t)marks[MARK_ENTRY]; 332 vrs->vrs_gprs[VCPU_REGS_ESP] = (uint32_t)(STACK_PAGE + PAGE_SIZE) - stacksize; 333 #else 334 vrs->vrs_gprs[VCPU_REGS_RIP] = (uint64_t)marks[MARK_ENTRY]; 335 vrs->vrs_gprs[VCPU_REGS_RSP] = (uint64_t)(STACK_PAGE + PAGE_SIZE) - stacksize; 336 #endif 337 vrs->vrs_gdtr.vsi_base = GDT_PAGE; 338 339 log_debug("%s: loaded ELF kernel", __func__); 340 341 return (0); 342 } 343 344 /* 345 * create_bios_memmap 346 * 347 * Construct a memory map as returned by the BIOS INT 0x15, e820 routine. 348 * 349 * Parameters: 350 * vcp: the VM create parameters, containing the memory map passed to vmm(4) 351 * memmap (out): the BIOS memory map 352 * 353 * Return values: 354 * Number of bios_memmap_t entries, including the terminating nul-entry. 355 */ 356 static size_t 357 create_bios_memmap(struct vm_create_params *vcp, bios_memmap_t *memmap) 358 { 359 size_t i, n = 0, sz; 360 paddr_t gpa; 361 struct vm_mem_range *vmr; 362 363 for (i = 0; i < vcp->vcp_nmemranges; i++) { 364 vmr = &vcp->vcp_memranges[i]; 365 gpa = vmr->vmr_gpa; 366 sz = vmr->vmr_size; 367 368 /* 369 * Make sure that we do not mark the ROM/video RAM area in the 370 * low memory as physcal memory available to the kernel. 371 */ 372 if (gpa < 0x100000 && gpa + sz > LOWMEM_KB * 1024) { 373 if (gpa >= LOWMEM_KB * 1024) 374 sz = 0; 375 else 376 sz = LOWMEM_KB * 1024 - gpa; 377 } 378 379 if (sz != 0) { 380 memmap[n].addr = gpa; 381 memmap[n].size = sz; 382 memmap[n].type = 0x1; /* Type 1 : Normal memory */ 383 n++; 384 } 385 } 386 387 /* Null mem map entry to denote the end of the ranges */ 388 memmap[n].addr = 0x0; 389 memmap[n].size = 0x0; 390 memmap[n].type = 0x0; 391 n++; 392 393 return (n); 394 } 395 396 /* 397 * push_bootargs 398 * 399 * Creates the boot arguments page in the guest address space. 400 * Since vmd(8) is acting as the bootloader, we need to create the same boot 401 * arguments page that a real bootloader would have created. This is loaded 402 * into the guest phys RAM space at address BOOTARGS_PAGE. 403 * 404 * Parameters: 405 * memmap: the BIOS memory map 406 * n: number of entries in memmap 407 * 408 * Return values: 409 * The size of the bootargs 410 */ 411 static uint32_t 412 push_bootargs(bios_memmap_t *memmap, size_t n) 413 { 414 uint32_t memmap_sz, consdev_sz, i; 415 bios_consdev_t consdev; 416 uint32_t ba[1024]; 417 418 memmap_sz = 3 * sizeof(int) + n * sizeof(bios_memmap_t); 419 ba[0] = 0x0; /* memory map */ 420 ba[1] = memmap_sz; 421 ba[2] = memmap_sz; /* next */ 422 memcpy(&ba[3], memmap, n * sizeof(bios_memmap_t)); 423 i = memmap_sz / sizeof(int); 424 425 /* Serial console device, COM1 @ 0x3f8 */ 426 consdev.consdev = makedev(8, 0); /* com1 @ 0x3f8 */ 427 consdev.conspeed = 9600; 428 consdev.consaddr = 0x3f8; 429 consdev.consfreq = 0; 430 431 consdev_sz = 3 * sizeof(int) + sizeof(bios_consdev_t); 432 ba[i] = 0x5; /* consdev */ 433 ba[i + 1] = consdev_sz; 434 ba[i + 2] = consdev_sz; 435 memcpy(&ba[i + 3], &consdev, sizeof(bios_consdev_t)); 436 i = i + 3 + (sizeof(bios_consdev_t) / 4); 437 438 ba[i] = 0xFFFFFFFF; /* BOOTARG_END */ 439 440 write_mem(BOOTARGS_PAGE, ba, PAGE_SIZE); 441 442 return (memmap_sz + consdev_sz); 443 } 444 445 /* 446 * push_stack 447 * 448 * Creates the boot stack page in the guest address space. When using a real 449 * bootloader, the stack will be prepared using the following format before 450 * transitioning to kernel start, so vmd(8) needs to mimic the same stack 451 * layout. The stack content is pushed to the guest phys RAM at address 452 * STACK_PAGE. The bootloader operates in 32 bit mode; each stack entry is 453 * 4 bytes. 454 * 455 * Stack Layout: (TOS == Top Of Stack) 456 * TOS location of boot arguments page 457 * TOS - 0x4 size of the content in the boot arguments page 458 * TOS - 0x8 size of low memory (biosbasemem: kernel uses BIOS map only if 0) 459 * TOS - 0xc size of high memory (biosextmem, not used by kernel at all) 460 * TOS - 0x10 kernel 'end' symbol value 461 * TOS - 0x14 version of bootarg API 462 * 463 * Parameters: 464 * bootargsz: size of boot arguments 465 * end: kernel 'end' symbol value 466 * bootdev: the optional non-default boot device 467 * howto: optional boot flags for the kernel 468 * 469 * Return values: 470 * size of the stack 471 */ 472 static size_t 473 push_stack(uint32_t bootargsz, uint32_t end, uint32_t bootdev, uint32_t howto) 474 { 475 uint32_t stack[1024]; 476 uint16_t loc; 477 478 memset(&stack, 0, sizeof(stack)); 479 loc = 1024; 480 481 if (bootdev == 0) 482 bootdev = MAKEBOOTDEV(0x4, 0, 0, 0, 0); /* bootdev: sd0a */ 483 484 stack[--loc] = BOOTARGS_PAGE; 485 stack[--loc] = bootargsz; 486 stack[--loc] = 0; /* biosbasemem */ 487 stack[--loc] = 0; /* biosextmem */ 488 stack[--loc] = end; 489 stack[--loc] = 0x0e; 490 stack[--loc] = bootdev; 491 stack[--loc] = howto; 492 493 write_mem(STACK_PAGE, &stack, PAGE_SIZE); 494 495 return (1024 - (loc - 1)) * sizeof(uint32_t); 496 } 497 498 /* 499 * mread 500 * 501 * Reads 'sz' bytes from the file whose descriptor is provided in 'fd' 502 * into the guest address space at paddr 'addr'. 503 * 504 * Parameters: 505 * fd: file descriptor of the kernel image file to read from. 506 * addr: guest paddr_t to load to 507 * sz: number of bytes to load 508 * 509 * Return values: 510 * returns 'sz' if successful, or 0 otherwise. 511 */ 512 size_t 513 mread(FILE *fp, paddr_t addr, size_t sz) 514 { 515 size_t ct; 516 size_t i, rd, osz; 517 char buf[PAGE_SIZE]; 518 519 /* 520 * break up the 'sz' bytes into PAGE_SIZE chunks for use with 521 * write_mem 522 */ 523 ct = 0; 524 rd = 0; 525 osz = sz; 526 if ((addr & PAGE_MASK) != 0) { 527 memset(buf, 0, sizeof(buf)); 528 if (sz > PAGE_SIZE) 529 ct = PAGE_SIZE - (addr & PAGE_MASK); 530 else 531 ct = sz; 532 533 if (fread(buf, 1, ct, fp) != ct) { 534 log_warn("%s: error %d in mread", __progname, errno); 535 return (0); 536 } 537 rd += ct; 538 539 if (write_mem(addr, buf, ct)) 540 return (0); 541 542 addr += ct; 543 } 544 545 sz = sz - ct; 546 547 if (sz == 0) 548 return (osz); 549 550 for (i = 0; i < sz; i += PAGE_SIZE, addr += PAGE_SIZE) { 551 memset(buf, 0, sizeof(buf)); 552 if (i + PAGE_SIZE > sz) 553 ct = sz - i; 554 else 555 ct = PAGE_SIZE; 556 557 if (fread(buf, 1, ct, fp) != ct) { 558 log_warn("%s: error %d in mread", __progname, errno); 559 return (0); 560 } 561 rd += ct; 562 563 if (write_mem(addr, buf, ct)) 564 return (0); 565 } 566 567 return (osz); 568 } 569 570 /* 571 * marc4random_buf 572 * 573 * load 'sz' bytes of random data into the guest address space at paddr 574 * 'addr'. 575 * 576 * Parameters: 577 * addr: guest paddr_t to load random bytes into 578 * sz: number of random bytes to load 579 * 580 * Return values: 581 * nothing 582 */ 583 static void 584 marc4random_buf(paddr_t addr, int sz) 585 { 586 int i, ct; 587 char buf[PAGE_SIZE]; 588 589 /* 590 * break up the 'sz' bytes into PAGE_SIZE chunks for use with 591 * write_mem 592 */ 593 ct = 0; 594 if (addr % PAGE_SIZE != 0) { 595 memset(buf, 0, sizeof(buf)); 596 ct = PAGE_SIZE - (addr % PAGE_SIZE); 597 598 arc4random_buf(buf, ct); 599 600 if (write_mem(addr, buf, ct)) 601 return; 602 603 addr += ct; 604 } 605 606 for (i = 0; i < sz; i+= PAGE_SIZE, addr += PAGE_SIZE) { 607 memset(buf, 0, sizeof(buf)); 608 if (i + PAGE_SIZE > sz) 609 ct = sz - i; 610 else 611 ct = PAGE_SIZE; 612 613 arc4random_buf(buf, ct); 614 615 if (write_mem(addr, buf, ct)) 616 return; 617 } 618 } 619 620 /* 621 * mbzero 622 * 623 * load 'sz' bytes of zeros into the guest address space at paddr 624 * 'addr'. 625 * 626 * Parameters: 627 * addr: guest paddr_t to zero 628 * sz: number of zero bytes to store 629 * 630 * Return values: 631 * nothing 632 */ 633 static void 634 mbzero(paddr_t addr, int sz) 635 { 636 int i, ct; 637 char buf[PAGE_SIZE]; 638 639 /* 640 * break up the 'sz' bytes into PAGE_SIZE chunks for use with 641 * write_mem 642 */ 643 ct = 0; 644 memset(buf, 0, sizeof(buf)); 645 if (addr % PAGE_SIZE != 0) { 646 ct = PAGE_SIZE - (addr % PAGE_SIZE); 647 648 if (write_mem(addr, buf, ct)) 649 return; 650 651 addr += ct; 652 } 653 654 for (i = 0; i < sz; i+= PAGE_SIZE, addr += PAGE_SIZE) { 655 if (i + PAGE_SIZE > sz) 656 ct = sz - i; 657 else 658 ct = PAGE_SIZE; 659 660 if (write_mem(addr, buf, ct)) 661 return; 662 } 663 } 664 665 /* 666 * mbcopy 667 * 668 * copies 'sz' bytes from buffer 'src' to guest paddr 'dst'. 669 * 670 * Parameters: 671 * src: source buffer to copy from 672 * dst: destination guest paddr_t to copy to 673 * sz: number of bytes to copy 674 * 675 * Return values: 676 * nothing 677 */ 678 static void 679 mbcopy(void *src, paddr_t dst, int sz) 680 { 681 write_mem(dst, src, sz); 682 } 683 684 /* 685 * elf64_exec 686 * 687 * Load the kernel indicated by 'fd' into the guest physical memory 688 * space, at the addresses defined in the ELF header. 689 * 690 * This function is used for 64 bit kernels. 691 * 692 * Parameters: 693 * fd: file descriptor of the kernel to load 694 * elf: ELF header of the kernel 695 * marks: array to store the offsets of various kernel structures 696 * (start, bss, etc) 697 * flags: flag value to indicate which section(s) to load (usually 698 * LOAD_ALL) 699 * 700 * Return values: 701 * 0 if successful 702 * 1 if unsuccessful 703 */ 704 static int 705 elf64_exec(FILE *fp, Elf64_Ehdr *elf, u_long *marks, int flags) 706 { 707 Elf64_Shdr *shp; 708 Elf64_Phdr *phdr; 709 Elf64_Off off; 710 int i; 711 size_t sz; 712 int first; 713 int havesyms, havelines; 714 paddr_t minp = ~0, maxp = 0, pos = 0; 715 paddr_t offset = marks[MARK_START], shpp, elfp; 716 717 sz = elf->e_phnum * sizeof(Elf64_Phdr); 718 phdr = malloc(sz); 719 720 if (fseeko(fp, (off_t)elf->e_phoff, SEEK_SET) == -1) { 721 free(phdr); 722 return 1; 723 } 724 725 if (fread(phdr, 1, sz, fp) != sz) { 726 free(phdr); 727 return 1; 728 } 729 730 for (first = 1, i = 0; i < elf->e_phnum; i++) { 731 if (phdr[i].p_type == PT_OPENBSD_RANDOMIZE) { 732 int m; 733 734 /* Fill segment if asked for. */ 735 if (flags & LOAD_RANDOM) { 736 for (pos = 0; pos < phdr[i].p_filesz; 737 pos += m) { 738 m = phdr[i].p_filesz - pos; 739 marc4random_buf(phdr[i].p_paddr + pos, 740 m); 741 } 742 } 743 if (flags & (LOAD_RANDOM | COUNT_RANDOM)) { 744 marks[MARK_RANDOM] = LOADADDR(phdr[i].p_paddr); 745 marks[MARK_ERANDOM] = 746 marks[MARK_RANDOM] + phdr[i].p_filesz; 747 } 748 continue; 749 } 750 751 if (phdr[i].p_type != PT_LOAD || 752 (phdr[i].p_flags & (PF_W|PF_R|PF_X)) == 0) 753 continue; 754 755 #define IS_TEXT(p) (p.p_flags & PF_X) 756 #define IS_DATA(p) ((p.p_flags & PF_X) == 0) 757 #define IS_BSS(p) (p.p_filesz < p.p_memsz) 758 /* 759 * XXX: Assume first address is lowest 760 */ 761 if ((IS_TEXT(phdr[i]) && (flags & LOAD_TEXT)) || 762 (IS_DATA(phdr[i]) && (flags & LOAD_DATA))) { 763 764 /* Read in segment. */ 765 if (fseeko(fp, (off_t)phdr[i].p_offset, 766 SEEK_SET) == -1) { 767 free(phdr); 768 return 1; 769 } 770 if (mread(fp, phdr[i].p_paddr, phdr[i].p_filesz) != 771 phdr[i].p_filesz) { 772 free(phdr); 773 return 1; 774 } 775 776 first = 0; 777 } 778 779 if ((IS_TEXT(phdr[i]) && (flags & (LOAD_TEXT | COUNT_TEXT))) || 780 (IS_DATA(phdr[i]) && (flags & (LOAD_DATA | COUNT_TEXT)))) { 781 pos = phdr[i].p_paddr; 782 if (minp > pos) 783 minp = pos; 784 pos += phdr[i].p_filesz; 785 if (maxp < pos) 786 maxp = pos; 787 } 788 789 /* Zero out BSS. */ 790 if (IS_BSS(phdr[i]) && (flags & LOAD_BSS)) { 791 mbzero((phdr[i].p_paddr + phdr[i].p_filesz), 792 phdr[i].p_memsz - phdr[i].p_filesz); 793 } 794 if (IS_BSS(phdr[i]) && (flags & (LOAD_BSS|COUNT_BSS))) { 795 pos += phdr[i].p_memsz - phdr[i].p_filesz; 796 if (maxp < pos) 797 maxp = pos; 798 } 799 } 800 free(phdr); 801 802 /* 803 * Copy the ELF and section headers. 804 */ 805 elfp = maxp = roundup(maxp, sizeof(Elf64_Addr)); 806 if (flags & (LOAD_HDR | COUNT_HDR)) 807 maxp += sizeof(Elf64_Ehdr); 808 809 if (flags & (LOAD_SYM | COUNT_SYM)) { 810 if (fseeko(fp, (off_t)elf->e_shoff, SEEK_SET) == -1) { 811 WARN(("lseek section headers")); 812 return 1; 813 } 814 sz = elf->e_shnum * sizeof(Elf64_Shdr); 815 shp = malloc(sz); 816 817 if (fread(shp, 1, sz, fp) != sz) { 818 free(shp); 819 return 1; 820 } 821 822 shpp = maxp; 823 maxp += roundup(sz, sizeof(Elf64_Addr)); 824 825 size_t shstrsz = shp[elf->e_shstrndx].sh_size; 826 char *shstr = malloc(shstrsz); 827 if (fseeko(fp, (off_t)shp[elf->e_shstrndx].sh_offset, 828 SEEK_SET) == -1) { 829 free(shstr); 830 free(shp); 831 return 1; 832 } 833 if (fread(shstr, 1, shstrsz, fp) != shstrsz) { 834 free(shstr); 835 free(shp); 836 return 1; 837 } 838 839 /* 840 * Now load the symbol sections themselves. Make sure the 841 * sections are aligned. Don't bother with string tables if 842 * there are no symbol sections. 843 */ 844 off = roundup((sizeof(Elf64_Ehdr) + sz), sizeof(Elf64_Addr)); 845 846 for (havesyms = havelines = i = 0; i < elf->e_shnum; i++) 847 if (shp[i].sh_type == SHT_SYMTAB) 848 havesyms = 1; 849 850 for (first = 1, i = 0; i < elf->e_shnum; i++) { 851 if (shp[i].sh_type == SHT_SYMTAB || 852 shp[i].sh_type == SHT_STRTAB || 853 !strcmp(shstr + shp[i].sh_name, ".debug_line") || 854 !strcmp(shstr + shp[i].sh_name, ELF_CTF)) { 855 if (havesyms && (flags & LOAD_SYM)) { 856 if (fseeko(fp, (off_t)shp[i].sh_offset, 857 SEEK_SET) == -1) { 858 free(shstr); 859 free(shp); 860 return 1; 861 } 862 if (mread(fp, maxp, 863 shp[i].sh_size) != shp[i].sh_size) { 864 free(shstr); 865 free(shp); 866 return 1; 867 } 868 } 869 maxp += roundup(shp[i].sh_size, 870 sizeof(Elf64_Addr)); 871 shp[i].sh_offset = off; 872 shp[i].sh_flags |= SHF_ALLOC; 873 off += roundup(shp[i].sh_size, 874 sizeof(Elf64_Addr)); 875 first = 0; 876 } 877 } 878 if (flags & LOAD_SYM) { 879 mbcopy(shp, shpp, sz); 880 } 881 free(shstr); 882 free(shp); 883 } 884 885 /* 886 * Frob the copied ELF header to give information relative 887 * to elfp. 888 */ 889 if (flags & LOAD_HDR) { 890 elf->e_phoff = 0; 891 elf->e_shoff = sizeof(Elf64_Ehdr); 892 elf->e_phentsize = 0; 893 elf->e_phnum = 0; 894 mbcopy(elf, elfp, sizeof(*elf)); 895 } 896 897 marks[MARK_START] = LOADADDR(minp); 898 marks[MARK_ENTRY] = LOADADDR(elf->e_entry); 899 marks[MARK_NSYM] = 1; /* XXX: Kernel needs >= 0 */ 900 marks[MARK_SYM] = LOADADDR(elfp); 901 marks[MARK_END] = LOADADDR(maxp); 902 903 return 0; 904 } 905 906 /* 907 * elf32_exec 908 * 909 * Load the kernel indicated by 'fd' into the guest physical memory 910 * space, at the addresses defined in the ELF header. 911 * 912 * This function is used for 32 bit kernels. 913 * 914 * Parameters: 915 * fd: file descriptor of the kernel to load 916 * elf: ELF header of the kernel 917 * marks: array to store the offsets of various kernel structures 918 * (start, bss, etc) 919 * flags: flag value to indicate which section(s) to load (usually 920 * LOAD_ALL) 921 * 922 * Return values: 923 * 0 if successful 924 * 1 if unsuccessful 925 */ 926 static int 927 elf32_exec(FILE *fp, Elf32_Ehdr *elf, u_long *marks, int flags) 928 { 929 Elf32_Shdr *shp; 930 Elf32_Phdr *phdr; 931 Elf32_Off off; 932 int i; 933 size_t sz; 934 int first; 935 int havesyms, havelines; 936 paddr_t minp = ~0, maxp = 0, pos = 0; 937 paddr_t offset = marks[MARK_START], shpp, elfp; 938 939 sz = elf->e_phnum * sizeof(Elf32_Phdr); 940 phdr = malloc(sz); 941 942 if (fseeko(fp, (off_t)elf->e_phoff, SEEK_SET) == -1) { 943 free(phdr); 944 return 1; 945 } 946 947 if (fread(phdr, 1, sz, fp) != sz) { 948 free(phdr); 949 return 1; 950 } 951 952 for (first = 1, i = 0; i < elf->e_phnum; i++) { 953 if (phdr[i].p_type == PT_OPENBSD_RANDOMIZE) { 954 int m; 955 956 /* Fill segment if asked for. */ 957 if (flags & LOAD_RANDOM) { 958 for (pos = 0; pos < phdr[i].p_filesz; 959 pos += m) { 960 m = phdr[i].p_filesz - pos; 961 marc4random_buf(phdr[i].p_paddr + pos, 962 m); 963 } 964 } 965 if (flags & (LOAD_RANDOM | COUNT_RANDOM)) { 966 marks[MARK_RANDOM] = LOADADDR(phdr[i].p_paddr); 967 marks[MARK_ERANDOM] = 968 marks[MARK_RANDOM] + phdr[i].p_filesz; 969 } 970 continue; 971 } 972 973 if (phdr[i].p_type != PT_LOAD || 974 (phdr[i].p_flags & (PF_W|PF_R|PF_X)) == 0) 975 continue; 976 977 #define IS_TEXT(p) (p.p_flags & PF_X) 978 #define IS_DATA(p) ((p.p_flags & PF_X) == 0) 979 #define IS_BSS(p) (p.p_filesz < p.p_memsz) 980 /* 981 * XXX: Assume first address is lowest 982 */ 983 if ((IS_TEXT(phdr[i]) && (flags & LOAD_TEXT)) || 984 (IS_DATA(phdr[i]) && (flags & LOAD_DATA))) { 985 986 /* Read in segment. */ 987 if (fseeko(fp, (off_t)phdr[i].p_offset, 988 SEEK_SET) == -1) { 989 free(phdr); 990 return 1; 991 } 992 if (mread(fp, phdr[i].p_paddr, phdr[i].p_filesz) != 993 phdr[i].p_filesz) { 994 free(phdr); 995 return 1; 996 } 997 998 first = 0; 999 } 1000 1001 if ((IS_TEXT(phdr[i]) && (flags & (LOAD_TEXT | COUNT_TEXT))) || 1002 (IS_DATA(phdr[i]) && (flags & (LOAD_DATA | COUNT_TEXT)))) { 1003 pos = phdr[i].p_paddr; 1004 if (minp > pos) 1005 minp = pos; 1006 pos += phdr[i].p_filesz; 1007 if (maxp < pos) 1008 maxp = pos; 1009 } 1010 1011 /* Zero out BSS. */ 1012 if (IS_BSS(phdr[i]) && (flags & LOAD_BSS)) { 1013 mbzero((phdr[i].p_paddr + phdr[i].p_filesz), 1014 phdr[i].p_memsz - phdr[i].p_filesz); 1015 } 1016 if (IS_BSS(phdr[i]) && (flags & (LOAD_BSS|COUNT_BSS))) { 1017 pos += phdr[i].p_memsz - phdr[i].p_filesz; 1018 if (maxp < pos) 1019 maxp = pos; 1020 } 1021 } 1022 free(phdr); 1023 1024 /* 1025 * Copy the ELF and section headers. 1026 */ 1027 elfp = maxp = roundup(maxp, sizeof(Elf32_Addr)); 1028 if (flags & (LOAD_HDR | COUNT_HDR)) 1029 maxp += sizeof(Elf32_Ehdr); 1030 1031 if (flags & (LOAD_SYM | COUNT_SYM)) { 1032 if (fseeko(fp, (off_t)elf->e_shoff, SEEK_SET) == -1) { 1033 WARN(("lseek section headers")); 1034 return 1; 1035 } 1036 sz = elf->e_shnum * sizeof(Elf32_Shdr); 1037 shp = malloc(sz); 1038 1039 if (fread(shp, 1, sz, fp) != sz) { 1040 free(shp); 1041 return 1; 1042 } 1043 1044 shpp = maxp; 1045 maxp += roundup(sz, sizeof(Elf32_Addr)); 1046 1047 size_t shstrsz = shp[elf->e_shstrndx].sh_size; 1048 char *shstr = malloc(shstrsz); 1049 if (fseeko(fp, (off_t)shp[elf->e_shstrndx].sh_offset, 1050 SEEK_SET) == -1) { 1051 free(shstr); 1052 free(shp); 1053 return 1; 1054 } 1055 if (fread(shstr, 1, shstrsz, fp) != shstrsz) { 1056 free(shstr); 1057 free(shp); 1058 return 1; 1059 } 1060 1061 /* 1062 * Now load the symbol sections themselves. Make sure the 1063 * sections are aligned. Don't bother with string tables if 1064 * there are no symbol sections. 1065 */ 1066 off = roundup((sizeof(Elf32_Ehdr) + sz), sizeof(Elf32_Addr)); 1067 1068 for (havesyms = havelines = i = 0; i < elf->e_shnum; i++) 1069 if (shp[i].sh_type == SHT_SYMTAB) 1070 havesyms = 1; 1071 1072 for (first = 1, i = 0; i < elf->e_shnum; i++) { 1073 if (shp[i].sh_type == SHT_SYMTAB || 1074 shp[i].sh_type == SHT_STRTAB || 1075 !strcmp(shstr + shp[i].sh_name, ".debug_line")) { 1076 if (havesyms && (flags & LOAD_SYM)) { 1077 if (fseeko(fp, (off_t)shp[i].sh_offset, 1078 SEEK_SET) == -1) { 1079 free(shstr); 1080 free(shp); 1081 return 1; 1082 } 1083 if (mread(fp, maxp, 1084 shp[i].sh_size) != shp[i].sh_size) { 1085 free(shstr); 1086 free(shp); 1087 return 1; 1088 } 1089 } 1090 maxp += roundup(shp[i].sh_size, 1091 sizeof(Elf32_Addr)); 1092 shp[i].sh_offset = off; 1093 shp[i].sh_flags |= SHF_ALLOC; 1094 off += roundup(shp[i].sh_size, 1095 sizeof(Elf32_Addr)); 1096 first = 0; 1097 } 1098 } 1099 if (flags & LOAD_SYM) { 1100 mbcopy(shp, shpp, sz); 1101 } 1102 free(shstr); 1103 free(shp); 1104 } 1105 1106 /* 1107 * Frob the copied ELF header to give information relative 1108 * to elfp. 1109 */ 1110 if (flags & LOAD_HDR) { 1111 elf->e_phoff = 0; 1112 elf->e_shoff = sizeof(Elf32_Ehdr); 1113 elf->e_phentsize = 0; 1114 elf->e_phnum = 0; 1115 mbcopy(elf, elfp, sizeof(*elf)); 1116 } 1117 1118 marks[MARK_START] = LOADADDR(minp); 1119 marks[MARK_ENTRY] = LOADADDR(elf->e_entry); 1120 marks[MARK_NSYM] = 1; /* XXX: Kernel needs >= 0 */ 1121 marks[MARK_SYM] = LOADADDR(elfp); 1122 marks[MARK_END] = LOADADDR(maxp); 1123 1124 return 0; 1125 } 1126