xref: /openbsd-src/usr.sbin/vmd/loadfile_elf.c (revision f2da64fbbbf1b03f09f390ab01267c93dfd77c4c)
1 /* $NetBSD: loadfile.c,v 1.10 2000/12/03 02:53:04 tsutsui Exp $ */
2 /* $OpenBSD: loadfile_elf.c,v 1.19 2016/09/17 17:39:34 jasper Exp $ */
3 
4 /*-
5  * Copyright (c) 1997 The NetBSD Foundation, Inc.
6  * All rights reserved.
7  *
8  * This code is derived from software contributed to The NetBSD Foundation
9  * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
10  * NASA Ames Research Center and by Christos Zoulas.
11  *
12  * Redistribution and use in source and binary forms, with or without
13  * modification, are permitted provided that the following conditions
14  * are met:
15  * 1. Redistributions of source code must retain the above copyright
16  *    notice, this list of conditions and the following disclaimer.
17  * 2. Redistributions in binary form must reproduce the above copyright
18  *    notice, this list of conditions and the following disclaimer in the
19  *    documentation and/or other materials provided with the distribution.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
23  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
24  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
25  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31  * POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 /*
35  * Copyright (c) 1992, 1993
36  *	The Regents of the University of California.  All rights reserved.
37  *
38  * This code is derived from software contributed to Berkeley by
39  * Ralph Campbell.
40  *
41  * Redistribution and use in source and binary forms, with or without
42  * modification, are permitted provided that the following conditions
43  * are met:
44  * 1. Redistributions of source code must retain the above copyright
45  *    notice, this list of conditions and the following disclaimer.
46  * 2. Redistributions in binary form must reproduce the above copyright
47  *    notice, this list of conditions and the following disclaimer in the
48  *    documentation and/or other materials provided with the distribution.
49  * 3. Neither the name of the University nor the names of its contributors
50  *    may be used to endorse or promote products derived from this software
51  *    without specific prior written permission.
52  *
53  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63  * SUCH DAMAGE.
64  *
65  *	@(#)boot.c	8.1 (Berkeley) 6/10/93
66  */
67 
68 /*
69  * Copyright (c) 2015 Mike Larkin <mlarkin@openbsd.org>
70  *
71  * Permission to use, copy, modify, and distribute this software for any
72  * purpose with or without fee is hereby granted, provided that the above
73  * copyright notice and this permission notice appear in all copies.
74  *
75  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
76  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
77  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
78  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
79  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
80  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
81  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
82  */
83 
84 #include <sys/param.h>	/* PAGE_SIZE PAGE_MASK roundup */
85 #include <sys/ioctl.h>
86 #include <sys/reboot.h>
87 #include <sys/exec.h>
88 #include <sys/exec_elf.h>
89 
90 #include <stdio.h>
91 #include <string.h>
92 #include <errno.h>
93 #include <stdlib.h>
94 #include <unistd.h>
95 #include <fcntl.h>
96 #include <err.h>
97 #include <errno.h>
98 #include <stddef.h>
99 
100 #include <machine/vmmvar.h>
101 #include <machine/biosvar.h>
102 #include <machine/segments.h>
103 #include <machine/pte.h>
104 
105 #include "loadfile.h"
106 #include "vmd.h"
107 
108 union {
109 	Elf32_Ehdr elf32;
110 	Elf64_Ehdr elf64;
111 } hdr;
112 
113 static void setsegment(struct mem_segment_descriptor *, uint32_t,
114     size_t, int, int, int, int);
115 static int elf32_exec(int, Elf32_Ehdr *, u_long *, int);
116 static int elf64_exec(int, Elf64_Ehdr *, u_long *, int);
117 static size_t create_bios_memmap(struct vm_create_params *, bios_memmap_t *);
118 static uint32_t push_bootargs(bios_memmap_t *, size_t);
119 static size_t push_stack(uint32_t, uint32_t);
120 static void push_gdt(void);
121 static size_t mread(int, paddr_t, size_t);
122 static void marc4random_buf(paddr_t, int);
123 static void mbzero(paddr_t, int);
124 static void mbcopy(void *, paddr_t, int);
125 
126 extern char *__progname;
127 extern int vm_id;
128 
129 /*
130  * setsegment
131  *
132  * Initializes a segment selector entry with the provided descriptor.
133  * For the purposes of the bootloader mimiced by vmd(8), we only need
134  * memory-type segment descriptor support.
135  *
136  * This function was copied from machdep.c
137  *
138  * Parameters:
139  *  sd: Address of the entry to initialize
140  *  base: base of the segment
141  *  limit: limit of the segment
142  *  type: type of the segment
143  *  dpl: privilege level of the egment
144  *  def32: default 16/32 bit size of the segment
145  *  gran: granularity of the segment (byte/page)
146  */
147 static void
148 setsegment(struct mem_segment_descriptor *sd, uint32_t base, size_t limit,
149     int type, int dpl, int def32, int gran)
150 {
151 	sd->sd_lolimit = (int)limit;
152 	sd->sd_lobase = (int)base;
153 	sd->sd_type = type;
154 	sd->sd_dpl = dpl;
155 	sd->sd_p = 1;
156 	sd->sd_hilimit = (int)limit >> 16;
157 	sd->sd_avl = 0;
158 	sd->sd_long = 0;
159 	sd->sd_def32 = def32;
160 	sd->sd_gran = gran;
161 	sd->sd_hibase = (int)base >> 24;
162 }
163 
164 /*
165  * push_gdt
166  *
167  * Allocates and populates a page in the guest phys memory space to hold
168  * the boot-time GDT. Since vmd(8) is acting as the bootloader, we need to
169  * create the same GDT that a real bootloader would have created.
170  * This is loaded into the guest phys RAM space at address GDT_PAGE.
171  */
172 static void
173 push_gdt(void)
174 {
175 	uint8_t gdtpage[PAGE_SIZE];
176 	struct mem_segment_descriptor *sd;
177 
178 	memset(&gdtpage, 0, sizeof(gdtpage));
179 	sd = (struct mem_segment_descriptor *)&gdtpage;
180 
181 	/*
182 	 * Create three segment descriptors:
183 	 *
184 	 * GDT[0] : null desriptor. "Created" via memset above.
185 	 * GDT[1] (selector @ 0x8): Executable segment, for CS
186 	 * GDT[2] (selector @ 0x10): RW Data segment, for DS/ES/SS
187 	 */
188 	setsegment(&sd[1], 0, 0xffffffff, SDT_MEMERA, SEL_KPL, 1, 1);
189 	setsegment(&sd[2], 0, 0xffffffff, SDT_MEMRWA, SEL_KPL, 1, 1);
190 
191 	write_mem(GDT_PAGE, gdtpage, PAGE_SIZE);
192 }
193 
194 /*
195  * push_pt
196  *
197  * Create an identity-mapped page directory hierarchy mapping the first
198  * 1GB of physical memory. This is used during bootstrapping VMs on
199  * CPUs without unrestricted guest capability.
200  */
201 static void
202 push_pt(void)
203 {
204 	pt_entry_t ptes[NPTE_PG];
205 	uint64_t i;
206 
207 	/* PML3 [0] - first 1GB */
208 	memset(ptes, 0, sizeof(ptes));
209 	ptes[0] = PG_V | PML3_PAGE;
210 	write_mem(PML4_PAGE, ptes, PAGE_SIZE);
211 
212 	/* PML3 [0] - first 1GB */
213 	memset(ptes, 0, sizeof(ptes));
214 	ptes[0] = PG_V | PG_RW | PG_u | PML2_PAGE;
215 	write_mem(PML3_PAGE, ptes, PAGE_SIZE);
216 
217 	/* PML2 [0..511] - first 1GB (in 2MB pages) */
218 	memset(ptes, 0, sizeof(ptes));
219 	for (i = 0 ; i < NPTE_PG; i++) {
220 		ptes[i] = PG_V | PG_RW | PG_u | PG_PS | (NBPD_L2 * i);
221 	}
222 	write_mem(PML2_PAGE, ptes, PAGE_SIZE);
223 }
224 
225 /*
226  * loadelf_main
227  *
228  * Loads an ELF kernel to it's defined load address in the guest VM.
229  * The kernel is loaded to its defined start point as set in the ELF header.
230  *
231  * Parameters:
232  *  fd: file descriptor of a kernel file to load
233  *  vcp: the VM create parameters, holding the exact memory map
234  *  (out) vrs: register state to set on init for this kernel
235  *
236  * Return values:
237  *  0 if successful
238  *  various error codes returned from read(2) or loadelf functions
239  */
240 int
241 loadelf_main(int fd, struct vm_create_params *vcp, struct vcpu_reg_state *vrs)
242 {
243 	int r;
244 	uint32_t bootargsz;
245 	size_t n, stacksize;
246 	u_long marks[MARK_MAX];
247 	bios_memmap_t memmap[VMM_MAX_MEM_RANGES + 1];
248 
249 	if ((r = read(fd, &hdr, sizeof(hdr))) != sizeof(hdr))
250 		return 1;
251 
252 	memset(&marks, 0, sizeof(marks));
253 	if (memcmp(hdr.elf32.e_ident, ELFMAG, SELFMAG) == 0 &&
254 	    hdr.elf32.e_ident[EI_CLASS] == ELFCLASS32) {
255 		r = elf32_exec(fd, &hdr.elf32, marks, LOAD_ALL);
256 	} else if (memcmp(hdr.elf64.e_ident, ELFMAG, SELFMAG) == 0 &&
257 	    hdr.elf64.e_ident[EI_CLASS] == ELFCLASS64) {
258 		r = elf64_exec(fd, &hdr.elf64, marks, LOAD_ALL);
259 	}
260 
261 	if (r)
262 		return (r);
263 
264 	push_gdt();
265 	push_pt();
266 	n = create_bios_memmap(vcp, memmap);
267 	bootargsz = push_bootargs(memmap, n);
268 	stacksize = push_stack(bootargsz, marks[MARK_END]);
269 
270 	vrs->vrs_gprs[VCPU_REGS_RIP] = (uint64_t)marks[MARK_ENTRY];
271 	vrs->vrs_gprs[VCPU_REGS_RSP] = (uint64_t)(STACK_PAGE + PAGE_SIZE) - stacksize;
272 	vrs->vrs_gdtr.vsi_base = GDT_PAGE;
273 
274 	return (0);
275 }
276 
277 /*
278  * create_bios_memmap
279  *
280  * Construct a memory map as returned by the BIOS INT 0x15, e820 routine.
281  *
282  * Parameters:
283  *  vcp: the VM create parameters, containing the memory map passed to vmm(4)
284  *   memmap (out): the BIOS memory map
285  *
286  * Return values:
287  * Number of bios_memmap_t entries, including the terminating nul-entry.
288  */
289 static size_t
290 create_bios_memmap(struct vm_create_params *vcp, bios_memmap_t *memmap)
291 {
292 	size_t i, n = 0, sz;
293 	paddr_t gpa;
294 	struct vm_mem_range *vmr;
295 
296 	for (i = 0; i < vcp->vcp_nmemranges; i++) {
297 		vmr = &vcp->vcp_memranges[i];
298 		gpa = vmr->vmr_gpa;
299 		sz = vmr->vmr_size;
300 
301 		/*
302 		 * Make sure that we do not mark the ROM/video RAM area in the
303 		 * low memory as physcal memory available to the kernel.
304 		 */
305 		if (gpa < 0x100000 && gpa + sz > LOWMEM_KB * 1024) {
306 			if (gpa >= LOWMEM_KB * 1024)
307 				sz = 0;
308 			else
309 				sz = LOWMEM_KB * 1024 - gpa;
310 		}
311 
312 		if (sz != 0) {
313 			memmap[n].addr = gpa;
314 			memmap[n].size = sz;
315 			memmap[n].type = 0x1;	/* Type 1 : Normal memory */
316 			n++;
317 		}
318 	}
319 
320 	/* Null mem map entry to denote the end of the ranges */
321 	memmap[n].addr = 0x0;
322 	memmap[n].size = 0x0;
323 	memmap[n].type = 0x0;
324 	n++;
325 
326 	return (n);
327 }
328 
329 /*
330  * push_bootargs
331  *
332  * Creates the boot arguments page in the guest address space.
333  * Since vmd(8) is acting as the bootloader, we need to create the same boot
334  * arguments page that a real bootloader would have created. This is loaded
335  * into the guest phys RAM space at address BOOTARGS_PAGE.
336  *
337  * Parameters:
338  *  memmap: the BIOS memory map
339  *  n: number of entries in memmap
340  *
341  * Return values:
342  *  The size of the bootargs
343  */
344 static uint32_t
345 push_bootargs(bios_memmap_t *memmap, size_t n)
346 {
347 	uint32_t memmap_sz, consdev_sz, i;
348 	bios_consdev_t consdev;
349 	uint32_t ba[1024];
350 
351 	memmap_sz = 3 * sizeof(int) + n * sizeof(bios_memmap_t);
352 	ba[0] = 0x0;    /* memory map */
353 	ba[1] = memmap_sz;
354 	ba[2] = memmap_sz;     /* next */
355 	memcpy(&ba[3], memmap, n * sizeof(bios_memmap_t));
356 	i = memmap_sz / sizeof(int);
357 
358 	/* Serial console device, COM1 @ 0x3f8 */
359 	consdev.consdev = makedev(8, 0);        /* com1 @ 0x3f8 */
360 	consdev.conspeed = 9600;
361 	consdev.consaddr = 0x3f8;
362 	consdev.consfreq = 0;
363 
364 	consdev_sz = 3 * sizeof(int) + sizeof(bios_consdev_t);
365 	ba[i] = 0x5;   /* consdev */
366 	ba[i + 1] = consdev_sz;
367 	ba[i + 2] = consdev_sz;
368 	memcpy(&ba[i + 3], &consdev, sizeof(bios_consdev_t));
369 	i = i + 3 + (sizeof(bios_consdev_t) / 4);
370 
371 	ba[i] = 0xFFFFFFFF; /* BOOTARG_END */
372 
373 	write_mem(BOOTARGS_PAGE, ba, PAGE_SIZE);
374 
375 	return (memmap_sz + consdev_sz);
376 }
377 
378 /*
379  * push_stack
380  *
381  * Creates the boot stack page in the guest address space. When using a real
382  * bootloader, the stack will be prepared using the following format before
383  * transitioning to kernel start, so vmd(8) needs to mimic the same stack
384  * layout. The stack content is pushed to the guest phys RAM at address
385  * STACK_PAGE. The bootloader operates in 32 bit mode; each stack entry is
386  * 4 bytes.
387  *
388  * Stack Layout: (TOS == Top Of Stack)
389  *  TOS		location of boot arguments page
390  *  TOS - 0x4	size of the content in the boot arguments page
391  *  TOS - 0x8	size of low memory (biosbasemem: kernel uses BIOS map only if 0)
392  *  TOS - 0xc	size of high memory (biosextmem, not used by kernel at all)
393  *  TOS - 0x10	kernel 'end' symbol value
394  *  TOS - 0x14	version of bootarg API
395  *
396  * Parameters:
397  *  bootargsz: size of boot arguments
398  *  end: kernel 'end' symbol value
399  *
400  * Return values:
401  *  size of the stack
402  */
403 static size_t
404 push_stack(uint32_t bootargsz, uint32_t end)
405 {
406 	uint32_t stack[1024];
407 	uint16_t loc;
408 
409 	memset(&stack, 0, sizeof(stack));
410 	loc = 1024;
411 
412 	stack[--loc] = BOOTARGS_PAGE;
413 	stack[--loc] = bootargsz;
414 	stack[--loc] = 0; /* biosbasemem */
415 	stack[--loc] = 0; /* biosextmem */
416 	stack[--loc] = end;
417 	stack[--loc] = 0x0e;
418 	stack[--loc] = MAKEBOOTDEV(0x4, 0, 0, 0, 0); /* bootdev: sd0a */
419 	stack[--loc] = 0x0;
420 
421 	write_mem(STACK_PAGE, &stack, PAGE_SIZE);
422 
423 	return (1024 - (loc - 1)) * sizeof(uint32_t);
424 }
425 
426 /*
427  * mread
428  *
429  * Reads 'sz' bytes from the file whose descriptor is provided in 'fd'
430  * into the guest address space at paddr 'addr'.
431  *
432  * Parameters:
433  *  fd: file descriptor of the kernel image file to read from.
434  *  addr: guest paddr_t to load to
435  *  sz: number of bytes to load
436  *
437  * Return values:
438  *  returns 'sz' if successful, or 0 otherwise.
439  */
440 static size_t
441 mread(int fd, paddr_t addr, size_t sz)
442 {
443 	int ct;
444 	size_t i, rd, osz;
445 	char buf[PAGE_SIZE];
446 
447 	/*
448 	 * break up the 'sz' bytes into PAGE_SIZE chunks for use with
449 	 * write_mem
450 	 */
451 	ct = 0;
452 	rd = 0;
453 	osz = sz;
454 	if ((addr & PAGE_MASK) != 0) {
455 		memset(buf, 0, sizeof(buf));
456 		if (sz > PAGE_SIZE)
457 			ct = PAGE_SIZE - (addr & PAGE_MASK);
458 		else
459 			ct = sz;
460 
461 		if (read(fd, buf, ct) != ct) {
462 			log_warn("%s: error %d in mread", __progname, errno);
463 			return (0);
464 		}
465 		rd += ct;
466 
467 		if (write_mem(addr, buf, ct))
468 			return (0);
469 
470 		addr += ct;
471 	}
472 
473 	sz = sz - ct;
474 
475 	if (sz == 0)
476 		return (osz);
477 
478 	for (i = 0; i < sz; i += PAGE_SIZE, addr += PAGE_SIZE) {
479 		memset(buf, 0, sizeof(buf));
480 		if (i + PAGE_SIZE > sz)
481 			ct = sz - i;
482 		else
483 			ct = PAGE_SIZE;
484 
485 		if (read(fd, buf, ct) != ct) {
486 			log_warn("%s: error %d in mread", __progname, errno);
487 			return (0);
488 		}
489 		rd += ct;
490 
491 		if (write_mem(addr, buf, ct))
492 			return (0);
493 	}
494 
495 	return (osz);
496 }
497 
498 /*
499  * marc4random_buf
500  *
501  * load 'sz' bytes of random data into the guest address space at paddr
502  * 'addr'.
503  *
504  * Parameters:
505  *  addr: guest paddr_t to load random bytes into
506  *  sz: number of random bytes to load
507  *
508  * Return values:
509  *  nothing
510  */
511 static void
512 marc4random_buf(paddr_t addr, int sz)
513 {
514 	int i, ct;
515 	char buf[PAGE_SIZE];
516 
517 	/*
518 	 * break up the 'sz' bytes into PAGE_SIZE chunks for use with
519 	 * write_mem
520 	 */
521 	ct = 0;
522 	if (addr % PAGE_SIZE != 0) {
523 		memset(buf, 0, sizeof(buf));
524 		ct = PAGE_SIZE - (addr % PAGE_SIZE);
525 
526 		arc4random_buf(buf, ct);
527 
528 		if (write_mem(addr, buf, ct))
529 			return;
530 
531 		addr += ct;
532 	}
533 
534 	for (i = 0; i < sz; i+= PAGE_SIZE, addr += PAGE_SIZE) {
535 		memset(buf, 0, sizeof(buf));
536 		if (i + PAGE_SIZE > sz)
537 			ct = sz - i;
538 		else
539 			ct = PAGE_SIZE;
540 
541 		arc4random_buf(buf, ct);
542 
543 		if (write_mem(addr, buf, ct))
544 			return;
545 	}
546 }
547 
548 /*
549  * mbzero
550  *
551  * load 'sz' bytes of zeros into the guest address space at paddr
552  * 'addr'.
553  *
554  * Parameters:
555  *  addr: guest paddr_t to zero
556  *  sz: number of zero bytes to store
557  *
558  * Return values:
559  *  nothing
560  */
561 static void
562 mbzero(paddr_t addr, int sz)
563 {
564 	int i, ct;
565 	char buf[PAGE_SIZE];
566 
567 	/*
568 	 * break up the 'sz' bytes into PAGE_SIZE chunks for use with
569 	 * write_mem
570 	 */
571 	ct = 0;
572 	memset(buf, 0, sizeof(buf));
573 	if (addr % PAGE_SIZE != 0) {
574 		ct = PAGE_SIZE - (addr % PAGE_SIZE);
575 
576 		if (write_mem(addr, buf, ct))
577 			return;
578 
579 		addr += ct;
580 	}
581 
582 	for (i = 0; i < sz; i+= PAGE_SIZE, addr += PAGE_SIZE) {
583 		if (i + PAGE_SIZE > sz)
584 			ct = sz - i;
585 		else
586 			ct = PAGE_SIZE;
587 
588 		if (write_mem(addr, buf, ct))
589 			return;
590 	}
591 }
592 
593 /*
594  * mbcopy
595  *
596  * copies 'sz' bytes from buffer 'src' to guest paddr 'dst'.
597  *
598  * Parameters:
599  *  src: source buffer to copy from
600  *  dst: destination guest paddr_t to copy to
601  *  sz: number of bytes to copy
602  *
603  * Return values:
604  *  nothing
605  */
606 static void
607 mbcopy(void *src, paddr_t dst, int sz)
608 {
609 	write_mem(dst, src, sz);
610 }
611 
612 /*
613  * elf64_exec
614  *
615  * Load the kernel indicated by 'fd' into the guest physical memory
616  * space, at the addresses defined in the ELF header.
617  *
618  * This function is used for 64 bit kernels.
619  *
620  * Parameters:
621  *  fd: file descriptor of the kernel to load
622  *  elf: ELF header of the kernel
623  *  marks: array to store the offsets of various kernel structures
624  *      (start, bss, etc)
625  *  flags: flag value to indicate which section(s) to load (usually
626  *      LOAD_ALL)
627  *
628  * Return values:
629  *  0 if successful
630  *  1 if unsuccessful
631  */
632 static int
633 elf64_exec(int fd, Elf64_Ehdr *elf, u_long *marks, int flags)
634 {
635 	Elf64_Shdr *shp;
636 	Elf64_Phdr *phdr;
637 	Elf64_Off off;
638 	int i;
639 	ssize_t sz;
640 	int first;
641 	int havesyms, havelines;
642 	paddr_t minp = ~0, maxp = 0, pos = 0;
643 	paddr_t offset = marks[MARK_START], shpp, elfp;
644 
645 	sz = elf->e_phnum * sizeof(Elf64_Phdr);
646 	phdr = malloc(sz);
647 
648 	if (lseek(fd, (off_t)elf->e_phoff, SEEK_SET) == -1)  {
649 		free(phdr);
650 		return 1;
651 	}
652 
653 	if (read(fd, phdr, sz) != sz) {
654 		free(phdr);
655 		return 1;
656 	}
657 
658 	for (first = 1, i = 0; i < elf->e_phnum; i++) {
659 		if (phdr[i].p_type == PT_OPENBSD_RANDOMIZE) {
660 			int m;
661 
662 			/* Fill segment if asked for. */
663 			if (flags & LOAD_RANDOM) {
664 				for (pos = 0; pos < phdr[i].p_filesz;
665 				    pos += m) {
666 					m = phdr[i].p_filesz - pos;
667 					marc4random_buf(phdr[i].p_paddr + pos,
668 					    m);
669 				}
670 			}
671 			if (flags & (LOAD_RANDOM | COUNT_RANDOM)) {
672 				marks[MARK_RANDOM] = LOADADDR(phdr[i].p_paddr);
673 				marks[MARK_ERANDOM] =
674 				    marks[MARK_RANDOM] + phdr[i].p_filesz;
675 			}
676 			continue;
677 		}
678 
679 		if (phdr[i].p_type != PT_LOAD ||
680 		    (phdr[i].p_flags & (PF_W|PF_R|PF_X)) == 0)
681 			continue;
682 
683 #define IS_TEXT(p)	(p.p_flags & PF_X)
684 #define IS_DATA(p)	((p.p_flags & PF_X) == 0)
685 #define IS_BSS(p)	(p.p_filesz < p.p_memsz)
686 		/*
687 		 * XXX: Assume first address is lowest
688 		 */
689 		if ((IS_TEXT(phdr[i]) && (flags & LOAD_TEXT)) ||
690 		    (IS_DATA(phdr[i]) && (flags & LOAD_DATA))) {
691 
692 			/* Read in segment. */
693 			if (lseek(fd, (off_t)phdr[i].p_offset,
694 			    SEEK_SET) == -1) {
695 				free(phdr);
696 				return 1;
697 			}
698 			if (mread(fd, phdr[i].p_paddr, phdr[i].p_filesz) !=
699 			    phdr[i].p_filesz) {
700 				free(phdr);
701 				return 1;
702 			}
703 
704 			first = 0;
705 		}
706 
707 		if ((IS_TEXT(phdr[i]) && (flags & (LOAD_TEXT | COUNT_TEXT))) ||
708 		    (IS_DATA(phdr[i]) && (flags & (LOAD_DATA | COUNT_TEXT)))) {
709 			pos = phdr[i].p_paddr;
710 			if (minp > pos)
711 				minp = pos;
712 			pos += phdr[i].p_filesz;
713 			if (maxp < pos)
714 				maxp = pos;
715 		}
716 
717 		/* Zero out BSS. */
718 		if (IS_BSS(phdr[i]) && (flags & LOAD_BSS)) {
719 			mbzero((phdr[i].p_paddr + phdr[i].p_filesz),
720 			    phdr[i].p_memsz - phdr[i].p_filesz);
721 		}
722 		if (IS_BSS(phdr[i]) && (flags & (LOAD_BSS|COUNT_BSS))) {
723 			pos += phdr[i].p_memsz - phdr[i].p_filesz;
724 			if (maxp < pos)
725 				maxp = pos;
726 		}
727 	}
728 	free(phdr);
729 
730 	/*
731 	 * Copy the ELF and section headers.
732 	 */
733 	elfp = maxp = roundup(maxp, sizeof(Elf64_Addr));
734 	if (flags & (LOAD_HDR | COUNT_HDR))
735 		maxp += sizeof(Elf64_Ehdr);
736 
737 	if (flags & (LOAD_SYM | COUNT_SYM)) {
738 		if (lseek(fd, (off_t)elf->e_shoff, SEEK_SET) == -1)  {
739 			WARN(("lseek section headers"));
740 			return 1;
741 		}
742 		sz = elf->e_shnum * sizeof(Elf64_Shdr);
743 		shp = malloc(sz);
744 
745 		if (read(fd, shp, sz) != sz) {
746 			free(shp);
747 			return 1;
748 		}
749 
750 		shpp = maxp;
751 		maxp += roundup(sz, sizeof(Elf64_Addr));
752 
753 		ssize_t shstrsz = shp[elf->e_shstrndx].sh_size;
754 		char *shstr = malloc(shstrsz);
755 		if (lseek(fd, (off_t)shp[elf->e_shstrndx].sh_offset,
756 		    SEEK_SET) == -1) {
757 			free(shstr);
758 			free(shp);
759 			return 1;
760 		}
761 		if (read(fd, shstr, shstrsz) != shstrsz) {
762 			free(shstr);
763 			free(shp);
764 			return 1;
765 		}
766 
767 		/*
768 		 * Now load the symbol sections themselves. Make sure the
769 		 * sections are aligned. Don't bother with string tables if
770 		 * there are no symbol sections.
771 		 */
772 		off = roundup((sizeof(Elf64_Ehdr) + sz), sizeof(Elf64_Addr));
773 
774 		for (havesyms = havelines = i = 0; i < elf->e_shnum; i++)
775 			if (shp[i].sh_type == SHT_SYMTAB)
776 				havesyms = 1;
777 
778 		for (first = 1, i = 0; i < elf->e_shnum; i++) {
779 			if (shp[i].sh_type == SHT_SYMTAB ||
780 			    shp[i].sh_type == SHT_STRTAB ||
781 			    !strcmp(shstr + shp[i].sh_name, ".debug_line") ||
782 			    !strcmp(shstr + shp[i].sh_name, ELF_CTF)) {
783 				if (havesyms && (flags & LOAD_SYM)) {
784 					if (lseek(fd, (off_t)shp[i].sh_offset,
785 					    SEEK_SET) == -1) {
786 						free(shstr);
787 						free(shp);
788 						return 1;
789 					}
790 					if (mread(fd, maxp,
791 					    shp[i].sh_size) != shp[i].sh_size) {
792 						free(shstr);
793 						free(shp);
794 						return 1;
795 					}
796 				}
797 				maxp += roundup(shp[i].sh_size,
798 				    sizeof(Elf64_Addr));
799 				shp[i].sh_offset = off;
800 				shp[i].sh_flags |= SHF_ALLOC;
801 				off += roundup(shp[i].sh_size,
802 				    sizeof(Elf64_Addr));
803 				first = 0;
804 			}
805 		}
806 		if (flags & LOAD_SYM) {
807 			mbcopy(shp, shpp, sz);
808 		}
809 		free(shstr);
810 		free(shp);
811 	}
812 
813 	/*
814 	 * Frob the copied ELF header to give information relative
815 	 * to elfp.
816 	 */
817 	if (flags & LOAD_HDR) {
818 		elf->e_phoff = 0;
819 		elf->e_shoff = sizeof(Elf64_Ehdr);
820 		elf->e_phentsize = 0;
821 		elf->e_phnum = 0;
822 		mbcopy(elf, elfp, sizeof(*elf));
823 	}
824 
825 	marks[MARK_START] = LOADADDR(minp);
826 	marks[MARK_ENTRY] = LOADADDR(elf->e_entry);
827 	marks[MARK_NSYM] = 1;	/* XXX: Kernel needs >= 0 */
828 	marks[MARK_SYM] = LOADADDR(elfp);
829 	marks[MARK_END] = LOADADDR(maxp);
830 
831 	return 0;
832 }
833 
834 /*
835  * elf32_exec
836  *
837  * Load the kernel indicated by 'fd' into the guest physical memory
838  * space, at the addresses defined in the ELF header.
839  *
840  * This function is used for 32 bit kernels.
841  *
842  * Parameters:
843  *  fd: file descriptor of the kernel to load
844  *  elf: ELF header of the kernel
845  *  marks: array to store the offsets of various kernel structures
846  *      (start, bss, etc)
847  *  flags: flag value to indicate which section(s) to load (usually
848  *      LOAD_ALL)
849  *
850  * Return values:
851  *  0 if successful
852  *  1 if unsuccessful
853  */
854 static int
855 elf32_exec(int fd, Elf32_Ehdr *elf, u_long *marks, int flags)
856 {
857 	Elf32_Shdr *shp;
858 	Elf32_Phdr *phdr;
859 	Elf32_Off off;
860 	int i;
861 	ssize_t sz;
862 	int first;
863 	int havesyms, havelines;
864 	paddr_t minp = ~0, maxp = 0, pos = 0;
865 	paddr_t offset = marks[MARK_START], shpp, elfp;
866 
867 	sz = elf->e_phnum * sizeof(Elf32_Phdr);
868 	phdr = malloc(sz);
869 
870 	if (lseek(fd, (off_t)elf->e_phoff, SEEK_SET) == -1)  {
871 		free(phdr);
872 		return 1;
873 	}
874 
875 	if (read(fd, phdr, sz) != sz) {
876 		free(phdr);
877 		return 1;
878 	}
879 
880 	for (first = 1, i = 0; i < elf->e_phnum; i++) {
881 		if (phdr[i].p_type == PT_OPENBSD_RANDOMIZE) {
882 			int m;
883 
884 			/* Fill segment if asked for. */
885 			if (flags & LOAD_RANDOM) {
886 				for (pos = 0; pos < phdr[i].p_filesz;
887 				    pos += m) {
888 					m = phdr[i].p_filesz - pos;
889 					marc4random_buf(phdr[i].p_paddr + pos,
890 					    m);
891 				}
892 			}
893 			if (flags & (LOAD_RANDOM | COUNT_RANDOM)) {
894 				marks[MARK_RANDOM] = LOADADDR(phdr[i].p_paddr);
895 				marks[MARK_ERANDOM] =
896 				    marks[MARK_RANDOM] + phdr[i].p_filesz;
897 			}
898 			continue;
899 		}
900 
901 		if (phdr[i].p_type != PT_LOAD ||
902 		    (phdr[i].p_flags & (PF_W|PF_R|PF_X)) == 0)
903 			continue;
904 
905 #define IS_TEXT(p)	(p.p_flags & PF_X)
906 #define IS_DATA(p)	((p.p_flags & PF_X) == 0)
907 #define IS_BSS(p)	(p.p_filesz < p.p_memsz)
908 		/*
909 		 * XXX: Assume first address is lowest
910 		 */
911 		if ((IS_TEXT(phdr[i]) && (flags & LOAD_TEXT)) ||
912 		    (IS_DATA(phdr[i]) && (flags & LOAD_DATA))) {
913 
914 			/* Read in segment. */
915 			if (lseek(fd, (off_t)phdr[i].p_offset,
916 			    SEEK_SET) == -1) {
917 				free(phdr);
918 				return 1;
919 			}
920 			if (mread(fd, phdr[i].p_paddr, phdr[i].p_filesz) !=
921 			    phdr[i].p_filesz) {
922 				free(phdr);
923 				return 1;
924 			}
925 
926 			first = 0;
927 		}
928 
929 		if ((IS_TEXT(phdr[i]) && (flags & (LOAD_TEXT | COUNT_TEXT))) ||
930 		    (IS_DATA(phdr[i]) && (flags & (LOAD_DATA | COUNT_TEXT)))) {
931 			pos = phdr[i].p_paddr;
932 			if (minp > pos)
933 				minp = pos;
934 			pos += phdr[i].p_filesz;
935 			if (maxp < pos)
936 				maxp = pos;
937 		}
938 
939 		/* Zero out BSS. */
940 		if (IS_BSS(phdr[i]) && (flags & LOAD_BSS)) {
941 			mbzero((phdr[i].p_paddr + phdr[i].p_filesz),
942 			    phdr[i].p_memsz - phdr[i].p_filesz);
943 		}
944 		if (IS_BSS(phdr[i]) && (flags & (LOAD_BSS|COUNT_BSS))) {
945 			pos += phdr[i].p_memsz - phdr[i].p_filesz;
946 			if (maxp < pos)
947 				maxp = pos;
948 		}
949 	}
950 	free(phdr);
951 
952 	/*
953 	 * Copy the ELF and section headers.
954 	 */
955 	elfp = maxp = roundup(maxp, sizeof(Elf32_Addr));
956 	if (flags & (LOAD_HDR | COUNT_HDR))
957 		maxp += sizeof(Elf32_Ehdr);
958 
959 	if (flags & (LOAD_SYM | COUNT_SYM)) {
960 		if (lseek(fd, (off_t)elf->e_shoff, SEEK_SET) == -1)  {
961 			WARN(("lseek section headers"));
962 			return 1;
963 		}
964 		sz = elf->e_shnum * sizeof(Elf32_Shdr);
965 		shp = malloc(sz);
966 
967 		if (read(fd, shp, sz) != sz) {
968 			free(shp);
969 			return 1;
970 		}
971 
972 		shpp = maxp;
973 		maxp += roundup(sz, sizeof(Elf32_Addr));
974 
975 		ssize_t shstrsz = shp[elf->e_shstrndx].sh_size;
976 		char *shstr = malloc(shstrsz);
977 		if (lseek(fd, (off_t)shp[elf->e_shstrndx].sh_offset,
978 		    SEEK_SET) == -1) {
979 			free(shstr);
980 			free(shp);
981 			return 1;
982 		}
983 		if (read(fd, shstr, shstrsz) != shstrsz) {
984 			free(shstr);
985 			free(shp);
986 			return 1;
987 		}
988 
989 		/*
990 		 * Now load the symbol sections themselves. Make sure the
991 		 * sections are aligned. Don't bother with string tables if
992 		 * there are no symbol sections.
993 		 */
994 		off = roundup((sizeof(Elf32_Ehdr) + sz), sizeof(Elf32_Addr));
995 
996 		for (havesyms = havelines = i = 0; i < elf->e_shnum; i++)
997 			if (shp[i].sh_type == SHT_SYMTAB)
998 				havesyms = 1;
999 
1000 		for (first = 1, i = 0; i < elf->e_shnum; i++) {
1001 			if (shp[i].sh_type == SHT_SYMTAB ||
1002 			    shp[i].sh_type == SHT_STRTAB ||
1003 			    !strcmp(shstr + shp[i].sh_name, ".debug_line")) {
1004 				if (havesyms && (flags & LOAD_SYM)) {
1005 					if (lseek(fd, (off_t)shp[i].sh_offset,
1006 					    SEEK_SET) == -1) {
1007 						free(shstr);
1008 						free(shp);
1009 						return 1;
1010 					}
1011 					if (mread(fd, maxp,
1012 					    shp[i].sh_size) != shp[i].sh_size) {
1013 						free(shstr);
1014 						free(shp);
1015 						return 1;
1016 					}
1017 				}
1018 				maxp += roundup(shp[i].sh_size,
1019 				    sizeof(Elf32_Addr));
1020 				shp[i].sh_offset = off;
1021 				shp[i].sh_flags |= SHF_ALLOC;
1022 				off += roundup(shp[i].sh_size,
1023 				    sizeof(Elf32_Addr));
1024 				first = 0;
1025 			}
1026 		}
1027 		if (flags & LOAD_SYM) {
1028 			mbcopy(shp, shpp, sz);
1029 		}
1030 		free(shstr);
1031 		free(shp);
1032 	}
1033 
1034 	/*
1035 	 * Frob the copied ELF header to give information relative
1036 	 * to elfp.
1037 	 */
1038 	if (flags & LOAD_HDR) {
1039 		elf->e_phoff = 0;
1040 		elf->e_shoff = sizeof(Elf32_Ehdr);
1041 		elf->e_phentsize = 0;
1042 		elf->e_phnum = 0;
1043 		mbcopy(elf, elfp, sizeof(*elf));
1044 	}
1045 
1046 	marks[MARK_START] = LOADADDR(minp);
1047 	marks[MARK_ENTRY] = LOADADDR(elf->e_entry);
1048 	marks[MARK_NSYM] = 1;	/* XXX: Kernel needs >= 0 */
1049 	marks[MARK_SYM] = LOADADDR(elfp);
1050 	marks[MARK_END] = LOADADDR(maxp);
1051 
1052 	return 0;
1053 }
1054