xref: /openbsd-src/usr.sbin/vmd/loadfile_elf.c (revision d59bb9942320b767f2a19aaa7690c8c6e30b724c)
1 /* $NetBSD: loadfile.c,v 1.10 2000/12/03 02:53:04 tsutsui Exp $ */
2 /* $OpenBSD: loadfile_elf.c,v 1.24 2017/02/04 07:23:25 mlarkin Exp $ */
3 
4 /*-
5  * Copyright (c) 1997 The NetBSD Foundation, Inc.
6  * All rights reserved.
7  *
8  * This code is derived from software contributed to The NetBSD Foundation
9  * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
10  * NASA Ames Research Center and by Christos Zoulas.
11  *
12  * Redistribution and use in source and binary forms, with or without
13  * modification, are permitted provided that the following conditions
14  * are met:
15  * 1. Redistributions of source code must retain the above copyright
16  *    notice, this list of conditions and the following disclaimer.
17  * 2. Redistributions in binary form must reproduce the above copyright
18  *    notice, this list of conditions and the following disclaimer in the
19  *    documentation and/or other materials provided with the distribution.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
23  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
24  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
25  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31  * POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 /*
35  * Copyright (c) 1992, 1993
36  *	The Regents of the University of California.  All rights reserved.
37  *
38  * This code is derived from software contributed to Berkeley by
39  * Ralph Campbell.
40  *
41  * Redistribution and use in source and binary forms, with or without
42  * modification, are permitted provided that the following conditions
43  * are met:
44  * 1. Redistributions of source code must retain the above copyright
45  *    notice, this list of conditions and the following disclaimer.
46  * 2. Redistributions in binary form must reproduce the above copyright
47  *    notice, this list of conditions and the following disclaimer in the
48  *    documentation and/or other materials provided with the distribution.
49  * 3. Neither the name of the University nor the names of its contributors
50  *    may be used to endorse or promote products derived from this software
51  *    without specific prior written permission.
52  *
53  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63  * SUCH DAMAGE.
64  *
65  *	@(#)boot.c	8.1 (Berkeley) 6/10/93
66  */
67 
68 /*
69  * Copyright (c) 2015 Mike Larkin <mlarkin@openbsd.org>
70  *
71  * Permission to use, copy, modify, and distribute this software for any
72  * purpose with or without fee is hereby granted, provided that the above
73  * copyright notice and this permission notice appear in all copies.
74  *
75  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
76  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
77  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
78  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
79  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
80  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
81  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
82  */
83 
84 #include <sys/param.h>	/* PAGE_SIZE PAGE_MASK roundup */
85 #include <sys/ioctl.h>
86 #include <sys/reboot.h>
87 #include <sys/exec.h>
88 #include <sys/exec_elf.h>
89 
90 #include <stdio.h>
91 #include <string.h>
92 #include <errno.h>
93 #include <stdlib.h>
94 #include <unistd.h>
95 #include <fcntl.h>
96 #include <err.h>
97 #include <errno.h>
98 #include <stddef.h>
99 
100 #include <machine/vmmvar.h>
101 #include <machine/biosvar.h>
102 #include <machine/segments.h>
103 #include <machine/pte.h>
104 
105 #include "loadfile.h"
106 #include "vmd.h"
107 
108 union {
109 	Elf32_Ehdr elf32;
110 	Elf64_Ehdr elf64;
111 } hdr;
112 
113 #ifdef __i386__
114 typedef uint32_t pt_entry_t;
115 static void setsegment(struct segment_descriptor *, uint32_t,
116     size_t, int, int, int, int);
117 #else
118 static void setsegment(struct mem_segment_descriptor *, uint32_t,
119     size_t, int, int, int, int);
120 #endif
121 static int elf32_exec(FILE *, Elf32_Ehdr *, u_long *, int);
122 static int elf64_exec(FILE *, Elf64_Ehdr *, u_long *, int);
123 static size_t create_bios_memmap(struct vm_create_params *, bios_memmap_t *);
124 static uint32_t push_bootargs(bios_memmap_t *, size_t);
125 static size_t push_stack(uint32_t, uint32_t, uint32_t, uint32_t);
126 static void push_gdt(void);
127 static void push_pt(void);
128 static size_t mread(FILE *, paddr_t, size_t);
129 static void marc4random_buf(paddr_t, int);
130 static void mbzero(paddr_t, int);
131 static void mbcopy(void *, paddr_t, int);
132 
133 extern char *__progname;
134 extern int vm_id;
135 
136 /*
137  * setsegment
138  *
139  * Initializes a segment selector entry with the provided descriptor.
140  * For the purposes of the bootloader mimiced by vmd(8), we only need
141  * memory-type segment descriptor support.
142  *
143  * This function was copied from machdep.c
144  *
145  * Parameters:
146  *  sd: Address of the entry to initialize
147  *  base: base of the segment
148  *  limit: limit of the segment
149  *  type: type of the segment
150  *  dpl: privilege level of the egment
151  *  def32: default 16/32 bit size of the segment
152  *  gran: granularity of the segment (byte/page)
153  */
154 #ifdef __i386__
155 static void
156 setsegment(struct segment_descriptor *sd, uint32_t base, size_t limit,
157     int type, int dpl, int def32, int gran)
158 #else
159 static void
160 setsegment(struct mem_segment_descriptor *sd, uint32_t base, size_t limit,
161     int type, int dpl, int def32, int gran)
162 #endif
163 {
164 	sd->sd_lolimit = (int)limit;
165 	sd->sd_lobase = (int)base;
166 	sd->sd_type = type;
167 	sd->sd_dpl = dpl;
168 	sd->sd_p = 1;
169 	sd->sd_hilimit = (int)limit >> 16;
170 #ifdef __i386__
171 	sd->sd_xx = 0;
172 #else
173 	sd->sd_avl = 0;
174 	sd->sd_long = 0;
175 #endif
176 	sd->sd_def32 = def32;
177 	sd->sd_gran = gran;
178 	sd->sd_hibase = (int)base >> 24;
179 }
180 
181 /*
182  * push_gdt
183  *
184  * Allocates and populates a page in the guest phys memory space to hold
185  * the boot-time GDT. Since vmd(8) is acting as the bootloader, we need to
186  * create the same GDT that a real bootloader would have created.
187  * This is loaded into the guest phys RAM space at address GDT_PAGE.
188  */
189 static void
190 push_gdt(void)
191 {
192 	uint8_t gdtpage[PAGE_SIZE];
193 #ifdef __i386__
194 	struct segment_descriptor *sd;
195 #else
196 	struct mem_segment_descriptor *sd;
197 #endif
198 
199 	memset(&gdtpage, 0, sizeof(gdtpage));
200 
201 #ifdef __i386__
202 	sd = (struct segment_descriptor *)&gdtpage;
203 #else
204 	sd = (struct mem_segment_descriptor *)&gdtpage;
205 #endif
206 
207 	/*
208 	 * Create three segment descriptors:
209 	 *
210 	 * GDT[0] : null desriptor. "Created" via memset above.
211 	 * GDT[1] (selector @ 0x8): Executable segment, for CS
212 	 * GDT[2] (selector @ 0x10): RW Data segment, for DS/ES/SS
213 	 */
214 	setsegment(&sd[1], 0, 0xffffffff, SDT_MEMERA, SEL_KPL, 1, 1);
215 	setsegment(&sd[2], 0, 0xffffffff, SDT_MEMRWA, SEL_KPL, 1, 1);
216 
217 	write_mem(GDT_PAGE, gdtpage, PAGE_SIZE);
218 }
219 
220 /*
221  * push_pt
222  *
223  * Create an identity-mapped page directory hierarchy mapping the first
224  * 1GB of physical memory. This is used during bootstrapping VMs on
225  * CPUs without unrestricted guest capability.
226  */
227 static void
228 push_pt(void)
229 {
230 	pt_entry_t ptes[NPTE_PG];
231 	uint64_t i;
232 
233 #ifdef __i386__
234 	memset(ptes, 0, sizeof(ptes));
235 	for (i = 0 ; i < NPTE_PG; i++) {
236 		ptes[i] = PG_V | PG_PS | (NBPD * i);
237 	}
238 	write_mem(PML4_PAGE, ptes, PAGE_SIZE);
239 #else
240 	/* PML3 [0] - first 1GB */
241 	memset(ptes, 0, sizeof(ptes));
242 	ptes[0] = PG_V | PML3_PAGE;
243 	write_mem(PML4_PAGE, ptes, PAGE_SIZE);
244 
245 	/* PML3 [0] - first 1GB */
246 	memset(ptes, 0, sizeof(ptes));
247 	ptes[0] = PG_V | PG_RW | PG_u | PML2_PAGE;
248 	write_mem(PML3_PAGE, ptes, PAGE_SIZE);
249 
250 	/* PML2 [0..511] - first 1GB (in 2MB pages) */
251 	memset(ptes, 0, sizeof(ptes));
252 	for (i = 0 ; i < NPTE_PG; i++) {
253 		ptes[i] = PG_V | PG_RW | PG_u | PG_PS | (NBPD_L2 * i);
254 	}
255 	write_mem(PML2_PAGE, ptes, PAGE_SIZE);
256 #endif
257 }
258 
259 /*
260  * loadelf_main
261  *
262  * Loads an ELF kernel to it's defined load address in the guest VM.
263  * The kernel is loaded to its defined start point as set in the ELF header.
264  *
265  * Parameters:
266  *  fd: file descriptor of a kernel file to load
267  *  vcp: the VM create parameters, holding the exact memory map
268  *  (out) vrs: register state to set on init for this kernel
269  *  bootdev: the optional non-default boot device
270  *  howto: optionel boot flags for the kernel
271  *
272  * Return values:
273  *  0 if successful
274  *  various error codes returned from read(2) or loadelf functions
275  */
276 int
277 loadelf_main(FILE *fp, struct vm_create_params *vcp,
278     struct vcpu_reg_state *vrs, uint32_t bootdev, uint32_t howto)
279 {
280 	int r;
281 	uint32_t bootargsz;
282 	size_t n, stacksize;
283 	u_long marks[MARK_MAX];
284 	bios_memmap_t memmap[VMM_MAX_MEM_RANGES + 1];
285 
286 	if ((r = fread(&hdr, 1, sizeof(hdr), fp)) != sizeof(hdr))
287 		return 1;
288 
289 	memset(&marks, 0, sizeof(marks));
290 	if (memcmp(hdr.elf32.e_ident, ELFMAG, SELFMAG) == 0 &&
291 	    hdr.elf32.e_ident[EI_CLASS] == ELFCLASS32) {
292 		r = elf32_exec(fp, &hdr.elf32, marks, LOAD_ALL);
293 	} else if (memcmp(hdr.elf64.e_ident, ELFMAG, SELFMAG) == 0 &&
294 	    hdr.elf64.e_ident[EI_CLASS] == ELFCLASS64) {
295 		r = elf64_exec(fp, &hdr.elf64, marks, LOAD_ALL);
296 	}
297 
298 	if (r)
299 		return (r);
300 
301 	push_gdt();
302 	push_pt();
303 	n = create_bios_memmap(vcp, memmap);
304 	bootargsz = push_bootargs(memmap, n);
305 	stacksize = push_stack(bootargsz, marks[MARK_END], bootdev, howto);
306 
307 #ifdef __i386__
308 	vrs->vrs_gprs[VCPU_REGS_EIP] = (uint32_t)marks[MARK_ENTRY];
309 	vrs->vrs_gprs[VCPU_REGS_ESP] = (uint32_t)(STACK_PAGE + PAGE_SIZE) - stacksize;
310 #else
311 	vrs->vrs_gprs[VCPU_REGS_RIP] = (uint64_t)marks[MARK_ENTRY];
312 	vrs->vrs_gprs[VCPU_REGS_RSP] = (uint64_t)(STACK_PAGE + PAGE_SIZE) - stacksize;
313 #endif
314 	vrs->vrs_gdtr.vsi_base = GDT_PAGE;
315 
316 	return (0);
317 }
318 
319 /*
320  * create_bios_memmap
321  *
322  * Construct a memory map as returned by the BIOS INT 0x15, e820 routine.
323  *
324  * Parameters:
325  *  vcp: the VM create parameters, containing the memory map passed to vmm(4)
326  *   memmap (out): the BIOS memory map
327  *
328  * Return values:
329  * Number of bios_memmap_t entries, including the terminating nul-entry.
330  */
331 static size_t
332 create_bios_memmap(struct vm_create_params *vcp, bios_memmap_t *memmap)
333 {
334 	size_t i, n = 0, sz;
335 	paddr_t gpa;
336 	struct vm_mem_range *vmr;
337 
338 	for (i = 0; i < vcp->vcp_nmemranges; i++) {
339 		vmr = &vcp->vcp_memranges[i];
340 		gpa = vmr->vmr_gpa;
341 		sz = vmr->vmr_size;
342 
343 		/*
344 		 * Make sure that we do not mark the ROM/video RAM area in the
345 		 * low memory as physcal memory available to the kernel.
346 		 */
347 		if (gpa < 0x100000 && gpa + sz > LOWMEM_KB * 1024) {
348 			if (gpa >= LOWMEM_KB * 1024)
349 				sz = 0;
350 			else
351 				sz = LOWMEM_KB * 1024 - gpa;
352 		}
353 
354 		if (sz != 0) {
355 			memmap[n].addr = gpa;
356 			memmap[n].size = sz;
357 			memmap[n].type = 0x1;	/* Type 1 : Normal memory */
358 			n++;
359 		}
360 	}
361 
362 	/* Null mem map entry to denote the end of the ranges */
363 	memmap[n].addr = 0x0;
364 	memmap[n].size = 0x0;
365 	memmap[n].type = 0x0;
366 	n++;
367 
368 	return (n);
369 }
370 
371 /*
372  * push_bootargs
373  *
374  * Creates the boot arguments page in the guest address space.
375  * Since vmd(8) is acting as the bootloader, we need to create the same boot
376  * arguments page that a real bootloader would have created. This is loaded
377  * into the guest phys RAM space at address BOOTARGS_PAGE.
378  *
379  * Parameters:
380  *  memmap: the BIOS memory map
381  *  n: number of entries in memmap
382  *
383  * Return values:
384  *  The size of the bootargs
385  */
386 static uint32_t
387 push_bootargs(bios_memmap_t *memmap, size_t n)
388 {
389 	uint32_t memmap_sz, consdev_sz, i;
390 	bios_consdev_t consdev;
391 	uint32_t ba[1024];
392 
393 	memmap_sz = 3 * sizeof(int) + n * sizeof(bios_memmap_t);
394 	ba[0] = 0x0;    /* memory map */
395 	ba[1] = memmap_sz;
396 	ba[2] = memmap_sz;     /* next */
397 	memcpy(&ba[3], memmap, n * sizeof(bios_memmap_t));
398 	i = memmap_sz / sizeof(int);
399 
400 	/* Serial console device, COM1 @ 0x3f8 */
401 	consdev.consdev = makedev(8, 0);        /* com1 @ 0x3f8 */
402 	consdev.conspeed = 9600;
403 	consdev.consaddr = 0x3f8;
404 	consdev.consfreq = 0;
405 
406 	consdev_sz = 3 * sizeof(int) + sizeof(bios_consdev_t);
407 	ba[i] = 0x5;   /* consdev */
408 	ba[i + 1] = consdev_sz;
409 	ba[i + 2] = consdev_sz;
410 	memcpy(&ba[i + 3], &consdev, sizeof(bios_consdev_t));
411 	i = i + 3 + (sizeof(bios_consdev_t) / 4);
412 
413 	ba[i] = 0xFFFFFFFF; /* BOOTARG_END */
414 
415 	write_mem(BOOTARGS_PAGE, ba, PAGE_SIZE);
416 
417 	return (memmap_sz + consdev_sz);
418 }
419 
420 /*
421  * push_stack
422  *
423  * Creates the boot stack page in the guest address space. When using a real
424  * bootloader, the stack will be prepared using the following format before
425  * transitioning to kernel start, so vmd(8) needs to mimic the same stack
426  * layout. The stack content is pushed to the guest phys RAM at address
427  * STACK_PAGE. The bootloader operates in 32 bit mode; each stack entry is
428  * 4 bytes.
429  *
430  * Stack Layout: (TOS == Top Of Stack)
431  *  TOS		location of boot arguments page
432  *  TOS - 0x4	size of the content in the boot arguments page
433  *  TOS - 0x8	size of low memory (biosbasemem: kernel uses BIOS map only if 0)
434  *  TOS - 0xc	size of high memory (biosextmem, not used by kernel at all)
435  *  TOS - 0x10	kernel 'end' symbol value
436  *  TOS - 0x14	version of bootarg API
437  *
438  * Parameters:
439  *  bootargsz: size of boot arguments
440  *  end: kernel 'end' symbol value
441  *  bootdev: the optional non-default boot device
442  *  howto: optionel boot flags for the kernel
443  *
444  * Return values:
445  *  size of the stack
446  */
447 static size_t
448 push_stack(uint32_t bootargsz, uint32_t end, uint32_t bootdev, uint32_t howto)
449 {
450 	uint32_t stack[1024];
451 	uint16_t loc;
452 
453 	memset(&stack, 0, sizeof(stack));
454 	loc = 1024;
455 
456 	if (bootdev == 0)
457 		bootdev = MAKEBOOTDEV(0x4, 0, 0, 0, 0); /* bootdev: sd0a */
458 
459 	stack[--loc] = BOOTARGS_PAGE;
460 	stack[--loc] = bootargsz;
461 	stack[--loc] = 0; /* biosbasemem */
462 	stack[--loc] = 0; /* biosextmem */
463 	stack[--loc] = end;
464 	stack[--loc] = 0x0e;
465 	stack[--loc] = bootdev;
466 	stack[--loc] = howto;
467 
468 	write_mem(STACK_PAGE, &stack, PAGE_SIZE);
469 
470 	return (1024 - (loc - 1)) * sizeof(uint32_t);
471 }
472 
473 /*
474  * mread
475  *
476  * Reads 'sz' bytes from the file whose descriptor is provided in 'fd'
477  * into the guest address space at paddr 'addr'.
478  *
479  * Parameters:
480  *  fd: file descriptor of the kernel image file to read from.
481  *  addr: guest paddr_t to load to
482  *  sz: number of bytes to load
483  *
484  * Return values:
485  *  returns 'sz' if successful, or 0 otherwise.
486  */
487 static size_t
488 mread(FILE *fp, paddr_t addr, size_t sz)
489 {
490 	size_t ct;
491 	size_t i, rd, osz;
492 	char buf[PAGE_SIZE];
493 
494 	/*
495 	 * break up the 'sz' bytes into PAGE_SIZE chunks for use with
496 	 * write_mem
497 	 */
498 	ct = 0;
499 	rd = 0;
500 	osz = sz;
501 	if ((addr & PAGE_MASK) != 0) {
502 		memset(buf, 0, sizeof(buf));
503 		if (sz > PAGE_SIZE)
504 			ct = PAGE_SIZE - (addr & PAGE_MASK);
505 		else
506 			ct = sz;
507 
508 		if (fread(buf, 1, ct, fp) != ct) {
509 			log_warn("%s: error %d in mread", __progname, errno);
510 			return (0);
511 		}
512 		rd += ct;
513 
514 		if (write_mem(addr, buf, ct))
515 			return (0);
516 
517 		addr += ct;
518 	}
519 
520 	sz = sz - ct;
521 
522 	if (sz == 0)
523 		return (osz);
524 
525 	for (i = 0; i < sz; i += PAGE_SIZE, addr += PAGE_SIZE) {
526 		memset(buf, 0, sizeof(buf));
527 		if (i + PAGE_SIZE > sz)
528 			ct = sz - i;
529 		else
530 			ct = PAGE_SIZE;
531 
532 		if (fread(buf, 1, ct, fp) != ct) {
533 			log_warn("%s: error %d in mread", __progname, errno);
534 			return (0);
535 		}
536 		rd += ct;
537 
538 		if (write_mem(addr, buf, ct))
539 			return (0);
540 	}
541 
542 	return (osz);
543 }
544 
545 /*
546  * marc4random_buf
547  *
548  * load 'sz' bytes of random data into the guest address space at paddr
549  * 'addr'.
550  *
551  * Parameters:
552  *  addr: guest paddr_t to load random bytes into
553  *  sz: number of random bytes to load
554  *
555  * Return values:
556  *  nothing
557  */
558 static void
559 marc4random_buf(paddr_t addr, int sz)
560 {
561 	int i, ct;
562 	char buf[PAGE_SIZE];
563 
564 	/*
565 	 * break up the 'sz' bytes into PAGE_SIZE chunks for use with
566 	 * write_mem
567 	 */
568 	ct = 0;
569 	if (addr % PAGE_SIZE != 0) {
570 		memset(buf, 0, sizeof(buf));
571 		ct = PAGE_SIZE - (addr % PAGE_SIZE);
572 
573 		arc4random_buf(buf, ct);
574 
575 		if (write_mem(addr, buf, ct))
576 			return;
577 
578 		addr += ct;
579 	}
580 
581 	for (i = 0; i < sz; i+= PAGE_SIZE, addr += PAGE_SIZE) {
582 		memset(buf, 0, sizeof(buf));
583 		if (i + PAGE_SIZE > sz)
584 			ct = sz - i;
585 		else
586 			ct = PAGE_SIZE;
587 
588 		arc4random_buf(buf, ct);
589 
590 		if (write_mem(addr, buf, ct))
591 			return;
592 	}
593 }
594 
595 /*
596  * mbzero
597  *
598  * load 'sz' bytes of zeros into the guest address space at paddr
599  * 'addr'.
600  *
601  * Parameters:
602  *  addr: guest paddr_t to zero
603  *  sz: number of zero bytes to store
604  *
605  * Return values:
606  *  nothing
607  */
608 static void
609 mbzero(paddr_t addr, int sz)
610 {
611 	int i, ct;
612 	char buf[PAGE_SIZE];
613 
614 	/*
615 	 * break up the 'sz' bytes into PAGE_SIZE chunks for use with
616 	 * write_mem
617 	 */
618 	ct = 0;
619 	memset(buf, 0, sizeof(buf));
620 	if (addr % PAGE_SIZE != 0) {
621 		ct = PAGE_SIZE - (addr % PAGE_SIZE);
622 
623 		if (write_mem(addr, buf, ct))
624 			return;
625 
626 		addr += ct;
627 	}
628 
629 	for (i = 0; i < sz; i+= PAGE_SIZE, addr += PAGE_SIZE) {
630 		if (i + PAGE_SIZE > sz)
631 			ct = sz - i;
632 		else
633 			ct = PAGE_SIZE;
634 
635 		if (write_mem(addr, buf, ct))
636 			return;
637 	}
638 }
639 
640 /*
641  * mbcopy
642  *
643  * copies 'sz' bytes from buffer 'src' to guest paddr 'dst'.
644  *
645  * Parameters:
646  *  src: source buffer to copy from
647  *  dst: destination guest paddr_t to copy to
648  *  sz: number of bytes to copy
649  *
650  * Return values:
651  *  nothing
652  */
653 static void
654 mbcopy(void *src, paddr_t dst, int sz)
655 {
656 	write_mem(dst, src, sz);
657 }
658 
659 /*
660  * elf64_exec
661  *
662  * Load the kernel indicated by 'fd' into the guest physical memory
663  * space, at the addresses defined in the ELF header.
664  *
665  * This function is used for 64 bit kernels.
666  *
667  * Parameters:
668  *  fd: file descriptor of the kernel to load
669  *  elf: ELF header of the kernel
670  *  marks: array to store the offsets of various kernel structures
671  *      (start, bss, etc)
672  *  flags: flag value to indicate which section(s) to load (usually
673  *      LOAD_ALL)
674  *
675  * Return values:
676  *  0 if successful
677  *  1 if unsuccessful
678  */
679 static int
680 elf64_exec(FILE *fp, Elf64_Ehdr *elf, u_long *marks, int flags)
681 {
682 	Elf64_Shdr *shp;
683 	Elf64_Phdr *phdr;
684 	Elf64_Off off;
685 	int i;
686 	size_t sz;
687 	int first;
688 	int havesyms, havelines;
689 	paddr_t minp = ~0, maxp = 0, pos = 0;
690 	paddr_t offset = marks[MARK_START], shpp, elfp;
691 
692 	sz = elf->e_phnum * sizeof(Elf64_Phdr);
693 	phdr = malloc(sz);
694 
695 	if (fseeko(fp, (off_t)elf->e_phoff, SEEK_SET) == -1)  {
696 		free(phdr);
697 		return 1;
698 	}
699 
700 	if (fread(phdr, 1, sz, fp) != sz) {
701 		free(phdr);
702 		return 1;
703 	}
704 
705 	for (first = 1, i = 0; i < elf->e_phnum; i++) {
706 		if (phdr[i].p_type == PT_OPENBSD_RANDOMIZE) {
707 			int m;
708 
709 			/* Fill segment if asked for. */
710 			if (flags & LOAD_RANDOM) {
711 				for (pos = 0; pos < phdr[i].p_filesz;
712 				    pos += m) {
713 					m = phdr[i].p_filesz - pos;
714 					marc4random_buf(phdr[i].p_paddr + pos,
715 					    m);
716 				}
717 			}
718 			if (flags & (LOAD_RANDOM | COUNT_RANDOM)) {
719 				marks[MARK_RANDOM] = LOADADDR(phdr[i].p_paddr);
720 				marks[MARK_ERANDOM] =
721 				    marks[MARK_RANDOM] + phdr[i].p_filesz;
722 			}
723 			continue;
724 		}
725 
726 		if (phdr[i].p_type != PT_LOAD ||
727 		    (phdr[i].p_flags & (PF_W|PF_R|PF_X)) == 0)
728 			continue;
729 
730 #define IS_TEXT(p)	(p.p_flags & PF_X)
731 #define IS_DATA(p)	((p.p_flags & PF_X) == 0)
732 #define IS_BSS(p)	(p.p_filesz < p.p_memsz)
733 		/*
734 		 * XXX: Assume first address is lowest
735 		 */
736 		if ((IS_TEXT(phdr[i]) && (flags & LOAD_TEXT)) ||
737 		    (IS_DATA(phdr[i]) && (flags & LOAD_DATA))) {
738 
739 			/* Read in segment. */
740 			if (fseeko(fp, (off_t)phdr[i].p_offset,
741 			    SEEK_SET) == -1) {
742 				free(phdr);
743 				return 1;
744 			}
745 			if (mread(fp, phdr[i].p_paddr, phdr[i].p_filesz) !=
746 			    phdr[i].p_filesz) {
747 				free(phdr);
748 				return 1;
749 			}
750 
751 			first = 0;
752 		}
753 
754 		if ((IS_TEXT(phdr[i]) && (flags & (LOAD_TEXT | COUNT_TEXT))) ||
755 		    (IS_DATA(phdr[i]) && (flags & (LOAD_DATA | COUNT_TEXT)))) {
756 			pos = phdr[i].p_paddr;
757 			if (minp > pos)
758 				minp = pos;
759 			pos += phdr[i].p_filesz;
760 			if (maxp < pos)
761 				maxp = pos;
762 		}
763 
764 		/* Zero out BSS. */
765 		if (IS_BSS(phdr[i]) && (flags & LOAD_BSS)) {
766 			mbzero((phdr[i].p_paddr + phdr[i].p_filesz),
767 			    phdr[i].p_memsz - phdr[i].p_filesz);
768 		}
769 		if (IS_BSS(phdr[i]) && (flags & (LOAD_BSS|COUNT_BSS))) {
770 			pos += phdr[i].p_memsz - phdr[i].p_filesz;
771 			if (maxp < pos)
772 				maxp = pos;
773 		}
774 	}
775 	free(phdr);
776 
777 	/*
778 	 * Copy the ELF and section headers.
779 	 */
780 	elfp = maxp = roundup(maxp, sizeof(Elf64_Addr));
781 	if (flags & (LOAD_HDR | COUNT_HDR))
782 		maxp += sizeof(Elf64_Ehdr);
783 
784 	if (flags & (LOAD_SYM | COUNT_SYM)) {
785 		if (fseeko(fp, (off_t)elf->e_shoff, SEEK_SET) == -1)  {
786 			WARN(("lseek section headers"));
787 			return 1;
788 		}
789 		sz = elf->e_shnum * sizeof(Elf64_Shdr);
790 		shp = malloc(sz);
791 
792 		if (fread(shp, 1, sz, fp) != sz) {
793 			free(shp);
794 			return 1;
795 		}
796 
797 		shpp = maxp;
798 		maxp += roundup(sz, sizeof(Elf64_Addr));
799 
800 		size_t shstrsz = shp[elf->e_shstrndx].sh_size;
801 		char *shstr = malloc(shstrsz);
802 		if (fseeko(fp, (off_t)shp[elf->e_shstrndx].sh_offset,
803 		    SEEK_SET) == -1) {
804 			free(shstr);
805 			free(shp);
806 			return 1;
807 		}
808 		if (fread(shstr, 1, shstrsz, fp) != shstrsz) {
809 			free(shstr);
810 			free(shp);
811 			return 1;
812 		}
813 
814 		/*
815 		 * Now load the symbol sections themselves. Make sure the
816 		 * sections are aligned. Don't bother with string tables if
817 		 * there are no symbol sections.
818 		 */
819 		off = roundup((sizeof(Elf64_Ehdr) + sz), sizeof(Elf64_Addr));
820 
821 		for (havesyms = havelines = i = 0; i < elf->e_shnum; i++)
822 			if (shp[i].sh_type == SHT_SYMTAB)
823 				havesyms = 1;
824 
825 		for (first = 1, i = 0; i < elf->e_shnum; i++) {
826 			if (shp[i].sh_type == SHT_SYMTAB ||
827 			    shp[i].sh_type == SHT_STRTAB ||
828 			    !strcmp(shstr + shp[i].sh_name, ".debug_line") ||
829 			    !strcmp(shstr + shp[i].sh_name, ELF_CTF)) {
830 				if (havesyms && (flags & LOAD_SYM)) {
831 					if (fseeko(fp, (off_t)shp[i].sh_offset,
832 					    SEEK_SET) == -1) {
833 						free(shstr);
834 						free(shp);
835 						return 1;
836 					}
837 					if (mread(fp, maxp,
838 					    shp[i].sh_size) != shp[i].sh_size) {
839 						free(shstr);
840 						free(shp);
841 						return 1;
842 					}
843 				}
844 				maxp += roundup(shp[i].sh_size,
845 				    sizeof(Elf64_Addr));
846 				shp[i].sh_offset = off;
847 				shp[i].sh_flags |= SHF_ALLOC;
848 				off += roundup(shp[i].sh_size,
849 				    sizeof(Elf64_Addr));
850 				first = 0;
851 			}
852 		}
853 		if (flags & LOAD_SYM) {
854 			mbcopy(shp, shpp, sz);
855 		}
856 		free(shstr);
857 		free(shp);
858 	}
859 
860 	/*
861 	 * Frob the copied ELF header to give information relative
862 	 * to elfp.
863 	 */
864 	if (flags & LOAD_HDR) {
865 		elf->e_phoff = 0;
866 		elf->e_shoff = sizeof(Elf64_Ehdr);
867 		elf->e_phentsize = 0;
868 		elf->e_phnum = 0;
869 		mbcopy(elf, elfp, sizeof(*elf));
870 	}
871 
872 	marks[MARK_START] = LOADADDR(minp);
873 	marks[MARK_ENTRY] = LOADADDR(elf->e_entry);
874 	marks[MARK_NSYM] = 1;	/* XXX: Kernel needs >= 0 */
875 	marks[MARK_SYM] = LOADADDR(elfp);
876 	marks[MARK_END] = LOADADDR(maxp);
877 
878 	return 0;
879 }
880 
881 /*
882  * elf32_exec
883  *
884  * Load the kernel indicated by 'fd' into the guest physical memory
885  * space, at the addresses defined in the ELF header.
886  *
887  * This function is used for 32 bit kernels.
888  *
889  * Parameters:
890  *  fd: file descriptor of the kernel to load
891  *  elf: ELF header of the kernel
892  *  marks: array to store the offsets of various kernel structures
893  *      (start, bss, etc)
894  *  flags: flag value to indicate which section(s) to load (usually
895  *      LOAD_ALL)
896  *
897  * Return values:
898  *  0 if successful
899  *  1 if unsuccessful
900  */
901 static int
902 elf32_exec(FILE *fp, Elf32_Ehdr *elf, u_long *marks, int flags)
903 {
904 	Elf32_Shdr *shp;
905 	Elf32_Phdr *phdr;
906 	Elf32_Off off;
907 	int i;
908 	size_t sz;
909 	int first;
910 	int havesyms, havelines;
911 	paddr_t minp = ~0, maxp = 0, pos = 0;
912 	paddr_t offset = marks[MARK_START], shpp, elfp;
913 
914 	sz = elf->e_phnum * sizeof(Elf32_Phdr);
915 	phdr = malloc(sz);
916 
917 	if (fseeko(fp, (off_t)elf->e_phoff, SEEK_SET) == -1)  {
918 		free(phdr);
919 		return 1;
920 	}
921 
922 	if (fread(phdr, 1, sz, fp) != sz) {
923 		free(phdr);
924 		return 1;
925 	}
926 
927 	for (first = 1, i = 0; i < elf->e_phnum; i++) {
928 		if (phdr[i].p_type == PT_OPENBSD_RANDOMIZE) {
929 			int m;
930 
931 			/* Fill segment if asked for. */
932 			if (flags & LOAD_RANDOM) {
933 				for (pos = 0; pos < phdr[i].p_filesz;
934 				    pos += m) {
935 					m = phdr[i].p_filesz - pos;
936 					marc4random_buf(phdr[i].p_paddr + pos,
937 					    m);
938 				}
939 			}
940 			if (flags & (LOAD_RANDOM | COUNT_RANDOM)) {
941 				marks[MARK_RANDOM] = LOADADDR(phdr[i].p_paddr);
942 				marks[MARK_ERANDOM] =
943 				    marks[MARK_RANDOM] + phdr[i].p_filesz;
944 			}
945 			continue;
946 		}
947 
948 		if (phdr[i].p_type != PT_LOAD ||
949 		    (phdr[i].p_flags & (PF_W|PF_R|PF_X)) == 0)
950 			continue;
951 
952 #define IS_TEXT(p)	(p.p_flags & PF_X)
953 #define IS_DATA(p)	((p.p_flags & PF_X) == 0)
954 #define IS_BSS(p)	(p.p_filesz < p.p_memsz)
955 		/*
956 		 * XXX: Assume first address is lowest
957 		 */
958 		if ((IS_TEXT(phdr[i]) && (flags & LOAD_TEXT)) ||
959 		    (IS_DATA(phdr[i]) && (flags & LOAD_DATA))) {
960 
961 			/* Read in segment. */
962 			if (fseeko(fp, (off_t)phdr[i].p_offset,
963 			    SEEK_SET) == -1) {
964 				free(phdr);
965 				return 1;
966 			}
967 			if (mread(fp, phdr[i].p_paddr, phdr[i].p_filesz) !=
968 			    phdr[i].p_filesz) {
969 				free(phdr);
970 				return 1;
971 			}
972 
973 			first = 0;
974 		}
975 
976 		if ((IS_TEXT(phdr[i]) && (flags & (LOAD_TEXT | COUNT_TEXT))) ||
977 		    (IS_DATA(phdr[i]) && (flags & (LOAD_DATA | COUNT_TEXT)))) {
978 			pos = phdr[i].p_paddr;
979 			if (minp > pos)
980 				minp = pos;
981 			pos += phdr[i].p_filesz;
982 			if (maxp < pos)
983 				maxp = pos;
984 		}
985 
986 		/* Zero out BSS. */
987 		if (IS_BSS(phdr[i]) && (flags & LOAD_BSS)) {
988 			mbzero((phdr[i].p_paddr + phdr[i].p_filesz),
989 			    phdr[i].p_memsz - phdr[i].p_filesz);
990 		}
991 		if (IS_BSS(phdr[i]) && (flags & (LOAD_BSS|COUNT_BSS))) {
992 			pos += phdr[i].p_memsz - phdr[i].p_filesz;
993 			if (maxp < pos)
994 				maxp = pos;
995 		}
996 	}
997 	free(phdr);
998 
999 	/*
1000 	 * Copy the ELF and section headers.
1001 	 */
1002 	elfp = maxp = roundup(maxp, sizeof(Elf32_Addr));
1003 	if (flags & (LOAD_HDR | COUNT_HDR))
1004 		maxp += sizeof(Elf32_Ehdr);
1005 
1006 	if (flags & (LOAD_SYM | COUNT_SYM)) {
1007 		if (fseeko(fp, (off_t)elf->e_shoff, SEEK_SET) == -1)  {
1008 			WARN(("lseek section headers"));
1009 			return 1;
1010 		}
1011 		sz = elf->e_shnum * sizeof(Elf32_Shdr);
1012 		shp = malloc(sz);
1013 
1014 		if (fread(shp, 1, sz, fp) != sz) {
1015 			free(shp);
1016 			return 1;
1017 		}
1018 
1019 		shpp = maxp;
1020 		maxp += roundup(sz, sizeof(Elf32_Addr));
1021 
1022 		size_t shstrsz = shp[elf->e_shstrndx].sh_size;
1023 		char *shstr = malloc(shstrsz);
1024 		if (fseeko(fp, (off_t)shp[elf->e_shstrndx].sh_offset,
1025 		    SEEK_SET) == -1) {
1026 			free(shstr);
1027 			free(shp);
1028 			return 1;
1029 		}
1030 		if (fread(shstr, 1, shstrsz, fp) != shstrsz) {
1031 			free(shstr);
1032 			free(shp);
1033 			return 1;
1034 		}
1035 
1036 		/*
1037 		 * Now load the symbol sections themselves. Make sure the
1038 		 * sections are aligned. Don't bother with string tables if
1039 		 * there are no symbol sections.
1040 		 */
1041 		off = roundup((sizeof(Elf32_Ehdr) + sz), sizeof(Elf32_Addr));
1042 
1043 		for (havesyms = havelines = i = 0; i < elf->e_shnum; i++)
1044 			if (shp[i].sh_type == SHT_SYMTAB)
1045 				havesyms = 1;
1046 
1047 		for (first = 1, i = 0; i < elf->e_shnum; i++) {
1048 			if (shp[i].sh_type == SHT_SYMTAB ||
1049 			    shp[i].sh_type == SHT_STRTAB ||
1050 			    !strcmp(shstr + shp[i].sh_name, ".debug_line")) {
1051 				if (havesyms && (flags & LOAD_SYM)) {
1052 					if (fseeko(fp, (off_t)shp[i].sh_offset,
1053 					    SEEK_SET) == -1) {
1054 						free(shstr);
1055 						free(shp);
1056 						return 1;
1057 					}
1058 					if (mread(fp, maxp,
1059 					    shp[i].sh_size) != shp[i].sh_size) {
1060 						free(shstr);
1061 						free(shp);
1062 						return 1;
1063 					}
1064 				}
1065 				maxp += roundup(shp[i].sh_size,
1066 				    sizeof(Elf32_Addr));
1067 				shp[i].sh_offset = off;
1068 				shp[i].sh_flags |= SHF_ALLOC;
1069 				off += roundup(shp[i].sh_size,
1070 				    sizeof(Elf32_Addr));
1071 				first = 0;
1072 			}
1073 		}
1074 		if (flags & LOAD_SYM) {
1075 			mbcopy(shp, shpp, sz);
1076 		}
1077 		free(shstr);
1078 		free(shp);
1079 	}
1080 
1081 	/*
1082 	 * Frob the copied ELF header to give information relative
1083 	 * to elfp.
1084 	 */
1085 	if (flags & LOAD_HDR) {
1086 		elf->e_phoff = 0;
1087 		elf->e_shoff = sizeof(Elf32_Ehdr);
1088 		elf->e_phentsize = 0;
1089 		elf->e_phnum = 0;
1090 		mbcopy(elf, elfp, sizeof(*elf));
1091 	}
1092 
1093 	marks[MARK_START] = LOADADDR(minp);
1094 	marks[MARK_ENTRY] = LOADADDR(elf->e_entry);
1095 	marks[MARK_NSYM] = 1;	/* XXX: Kernel needs >= 0 */
1096 	marks[MARK_SYM] = LOADADDR(elfp);
1097 	marks[MARK_END] = LOADADDR(maxp);
1098 
1099 	return 0;
1100 }
1101