xref: /openbsd-src/usr.sbin/vmd/loadfile_elf.c (revision d1df930ffab53da22f3324c32bed7ac5709915e6)
1 /* $NetBSD: loadfile.c,v 1.10 2000/12/03 02:53:04 tsutsui Exp $ */
2 /* $OpenBSD: loadfile_elf.c,v 1.30 2018/07/17 13:47:06 mlarkin Exp $ */
3 
4 /*-
5  * Copyright (c) 1997 The NetBSD Foundation, Inc.
6  * All rights reserved.
7  *
8  * This code is derived from software contributed to The NetBSD Foundation
9  * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
10  * NASA Ames Research Center and by Christos Zoulas.
11  *
12  * Redistribution and use in source and binary forms, with or without
13  * modification, are permitted provided that the following conditions
14  * are met:
15  * 1. Redistributions of source code must retain the above copyright
16  *    notice, this list of conditions and the following disclaimer.
17  * 2. Redistributions in binary form must reproduce the above copyright
18  *    notice, this list of conditions and the following disclaimer in the
19  *    documentation and/or other materials provided with the distribution.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
23  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
24  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
25  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31  * POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 /*
35  * Copyright (c) 1992, 1993
36  *	The Regents of the University of California.  All rights reserved.
37  *
38  * This code is derived from software contributed to Berkeley by
39  * Ralph Campbell.
40  *
41  * Redistribution and use in source and binary forms, with or without
42  * modification, are permitted provided that the following conditions
43  * are met:
44  * 1. Redistributions of source code must retain the above copyright
45  *    notice, this list of conditions and the following disclaimer.
46  * 2. Redistributions in binary form must reproduce the above copyright
47  *    notice, this list of conditions and the following disclaimer in the
48  *    documentation and/or other materials provided with the distribution.
49  * 3. Neither the name of the University nor the names of its contributors
50  *    may be used to endorse or promote products derived from this software
51  *    without specific prior written permission.
52  *
53  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63  * SUCH DAMAGE.
64  *
65  *	@(#)boot.c	8.1 (Berkeley) 6/10/93
66  */
67 
68 /*
69  * Copyright (c) 2015 Mike Larkin <mlarkin@openbsd.org>
70  *
71  * Permission to use, copy, modify, and distribute this software for any
72  * purpose with or without fee is hereby granted, provided that the above
73  * copyright notice and this permission notice appear in all copies.
74  *
75  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
76  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
77  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
78  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
79  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
80  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
81  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
82  */
83 
84 #include <sys/param.h>	/* PAGE_SIZE PAGE_MASK roundup */
85 #include <sys/ioctl.h>
86 #include <sys/reboot.h>
87 #include <sys/exec.h>
88 
89 #include <elf.h>
90 #include <stdio.h>
91 #include <string.h>
92 #include <errno.h>
93 #include <stdlib.h>
94 #include <unistd.h>
95 #include <fcntl.h>
96 #include <err.h>
97 #include <errno.h>
98 #include <stddef.h>
99 
100 #include <machine/vmmvar.h>
101 #include <machine/biosvar.h>
102 #include <machine/segments.h>
103 #include <machine/specialreg.h>
104 #include <machine/pte.h>
105 
106 #include "loadfile.h"
107 #include "vmd.h"
108 
109 union {
110 	Elf32_Ehdr elf32;
111 	Elf64_Ehdr elf64;
112 } hdr;
113 
114 #ifdef __i386__
115 typedef uint32_t pt_entry_t;
116 static void setsegment(struct segment_descriptor *, uint32_t,
117     size_t, int, int, int, int);
118 #else
119 static void setsegment(struct mem_segment_descriptor *, uint32_t,
120     size_t, int, int, int, int);
121 #endif
122 static int elf32_exec(FILE *, Elf32_Ehdr *, u_long *, int);
123 static int elf64_exec(FILE *, Elf64_Ehdr *, u_long *, int);
124 static size_t create_bios_memmap(struct vm_create_params *, bios_memmap_t *);
125 static uint32_t push_bootargs(bios_memmap_t *, size_t);
126 static size_t push_stack(uint32_t, uint32_t, uint32_t, uint32_t);
127 static void push_gdt(void);
128 static void push_pt_32(void);
129 static void push_pt_64(void);
130 static void marc4random_buf(paddr_t, int);
131 static void mbzero(paddr_t, int);
132 static void mbcopy(void *, paddr_t, int);
133 
134 extern char *__progname;
135 extern int vm_id;
136 
137 /*
138  * setsegment
139  *
140  * Initializes a segment selector entry with the provided descriptor.
141  * For the purposes of the bootloader mimiced by vmd(8), we only need
142  * memory-type segment descriptor support.
143  *
144  * This function was copied from machdep.c
145  *
146  * Parameters:
147  *  sd: Address of the entry to initialize
148  *  base: base of the segment
149  *  limit: limit of the segment
150  *  type: type of the segment
151  *  dpl: privilege level of the egment
152  *  def32: default 16/32 bit size of the segment
153  *  gran: granularity of the segment (byte/page)
154  */
155 #ifdef __i386__
156 static void
157 setsegment(struct segment_descriptor *sd, uint32_t base, size_t limit,
158     int type, int dpl, int def32, int gran)
159 #else
160 static void
161 setsegment(struct mem_segment_descriptor *sd, uint32_t base, size_t limit,
162     int type, int dpl, int def32, int gran)
163 #endif
164 {
165 	sd->sd_lolimit = (int)limit;
166 	sd->sd_lobase = (int)base;
167 	sd->sd_type = type;
168 	sd->sd_dpl = dpl;
169 	sd->sd_p = 1;
170 	sd->sd_hilimit = (int)limit >> 16;
171 #ifdef __i386__
172 	sd->sd_xx = 0;
173 #else
174 	sd->sd_avl = 0;
175 	sd->sd_long = 0;
176 #endif
177 	sd->sd_def32 = def32;
178 	sd->sd_gran = gran;
179 	sd->sd_hibase = (int)base >> 24;
180 }
181 
182 /*
183  * push_gdt
184  *
185  * Allocates and populates a page in the guest phys memory space to hold
186  * the boot-time GDT. Since vmd(8) is acting as the bootloader, we need to
187  * create the same GDT that a real bootloader would have created.
188  * This is loaded into the guest phys RAM space at address GDT_PAGE.
189  */
190 static void
191 push_gdt(void)
192 {
193 	uint8_t gdtpage[PAGE_SIZE];
194 #ifdef __i386__
195 	struct segment_descriptor *sd;
196 #else
197 	struct mem_segment_descriptor *sd;
198 #endif
199 
200 	memset(&gdtpage, 0, sizeof(gdtpage));
201 
202 #ifdef __i386__
203 	sd = (struct segment_descriptor *)&gdtpage;
204 #else
205 	sd = (struct mem_segment_descriptor *)&gdtpage;
206 #endif
207 
208 	/*
209 	 * Create three segment descriptors:
210 	 *
211 	 * GDT[0] : null desriptor. "Created" via memset above.
212 	 * GDT[1] (selector @ 0x8): Executable segment, for CS
213 	 * GDT[2] (selector @ 0x10): RW Data segment, for DS/ES/SS
214 	 */
215 	setsegment(&sd[1], 0, 0xffffffff, SDT_MEMERA, SEL_KPL, 1, 1);
216 	setsegment(&sd[2], 0, 0xffffffff, SDT_MEMRWA, SEL_KPL, 1, 1);
217 
218 	write_mem(GDT_PAGE, gdtpage, PAGE_SIZE);
219 }
220 
221 /*
222  * push_pt_32
223  *
224  * Create an identity-mapped page directory hierarchy mapping the first
225  * 4GB of physical memory. This is used during bootstrapping i386 VMs on
226  * CPUs without unrestricted guest capability.
227  */
228 static void
229 push_pt_32(void)
230 {
231 	uint32_t ptes[1024], i;
232 
233 	memset(ptes, 0, sizeof(ptes));
234 	for (i = 0 ; i < 1024; i++) {
235 		ptes[i] = PG_V | PG_RW | PG_u | PG_PS | ((4096 * 1024) * i);
236 	}
237 	write_mem(PML3_PAGE, ptes, PAGE_SIZE);
238 }
239 
240 /*
241  * push_pt_64
242  *
243  * Create an identity-mapped page directory hierarchy mapping the first
244  * 1GB of physical memory. This is used during bootstrapping 64 bit VMs on
245  * CPUs without unrestricted guest capability.
246  */
247 static void
248 push_pt_64(void)
249 {
250 	uint64_t ptes[512], i;
251 
252 	/* PDPDE0 - first 1GB */
253 	memset(ptes, 0, sizeof(ptes));
254 	ptes[0] = PG_V | PML3_PAGE;
255 	write_mem(PML4_PAGE, ptes, PAGE_SIZE);
256 
257 	/* PDE0 - first 1GB */
258 	memset(ptes, 0, sizeof(ptes));
259 	ptes[0] = PG_V | PG_RW | PG_u | PML2_PAGE;
260 	write_mem(PML3_PAGE, ptes, PAGE_SIZE);
261 
262 	/* First 1GB (in 2MB pages) */
263 	memset(ptes, 0, sizeof(ptes));
264 	for (i = 0 ; i < 512; i++) {
265 		ptes[i] = PG_V | PG_RW | PG_u | PG_PS | ((2048 * 1024) * i);
266 	}
267 	write_mem(PML2_PAGE, ptes, PAGE_SIZE);
268 }
269 
270 /*
271  * loadfile_elf
272  *
273  * Loads an ELF kernel to it's defined load address in the guest VM.
274  * The kernel is loaded to its defined start point as set in the ELF header.
275  *
276  * Parameters:
277  *  fp: file of a kernel file to load
278  *  vcp: the VM create parameters, holding the exact memory map
279  *  (out) vrs: register state to set on init for this kernel
280  *  bootdev: the optional non-default boot device
281  *  howto: optional boot flags for the kernel
282  *
283  * Return values:
284  *  0 if successful
285  *  various error codes returned from read(2) or loadelf functions
286  */
287 int
288 loadfile_elf(FILE *fp, struct vm_create_params *vcp,
289     struct vcpu_reg_state *vrs, uint32_t bootdev, uint32_t howto)
290 {
291 	int r, is_i386 = 0;
292 	uint32_t bootargsz;
293 	size_t n, stacksize;
294 	u_long marks[MARK_MAX];
295 	bios_memmap_t memmap[VMM_MAX_MEM_RANGES + 1];
296 
297 	if ((r = fread(&hdr, 1, sizeof(hdr), fp)) != sizeof(hdr))
298 		return 1;
299 
300 	memset(&marks, 0, sizeof(marks));
301 	if (memcmp(hdr.elf32.e_ident, ELFMAG, SELFMAG) == 0 &&
302 	    hdr.elf32.e_ident[EI_CLASS] == ELFCLASS32) {
303 		r = elf32_exec(fp, &hdr.elf32, marks, LOAD_ALL);
304 		is_i386 = 1;
305 	} else if (memcmp(hdr.elf64.e_ident, ELFMAG, SELFMAG) == 0 &&
306 	    hdr.elf64.e_ident[EI_CLASS] == ELFCLASS64) {
307 		r = elf64_exec(fp, &hdr.elf64, marks, LOAD_ALL);
308 	} else
309 		errno = ENOEXEC;
310 
311 	if (r)
312 		return (r);
313 
314 	push_gdt();
315 
316 	if (is_i386) {
317 		push_pt_32();
318 		/* Reconfigure the default flat-64 register set for 32 bit */
319 		vrs->vrs_crs[VCPU_REGS_CR3] = PML3_PAGE;
320 		vrs->vrs_crs[VCPU_REGS_CR4] = CR4_PSE;
321 		vrs->vrs_msrs[VCPU_REGS_EFER] = 0ULL;
322 	}
323 	else
324 		push_pt_64();
325 
326 	n = create_bios_memmap(vcp, memmap);
327 	bootargsz = push_bootargs(memmap, n);
328 	stacksize = push_stack(bootargsz, marks[MARK_END], bootdev, howto);
329 
330 #ifdef __i386__
331 	vrs->vrs_gprs[VCPU_REGS_EIP] = (uint32_t)marks[MARK_ENTRY];
332 	vrs->vrs_gprs[VCPU_REGS_ESP] = (uint32_t)(STACK_PAGE + PAGE_SIZE) - stacksize;
333 #else
334 	vrs->vrs_gprs[VCPU_REGS_RIP] = (uint64_t)marks[MARK_ENTRY];
335 	vrs->vrs_gprs[VCPU_REGS_RSP] = (uint64_t)(STACK_PAGE + PAGE_SIZE) - stacksize;
336 #endif
337 	vrs->vrs_gdtr.vsi_base = GDT_PAGE;
338 
339 	log_debug("%s: loaded ELF kernel", __func__);
340 
341 	return (0);
342 }
343 
344 /*
345  * create_bios_memmap
346  *
347  * Construct a memory map as returned by the BIOS INT 0x15, e820 routine.
348  *
349  * Parameters:
350  *  vcp: the VM create parameters, containing the memory map passed to vmm(4)
351  *   memmap (out): the BIOS memory map
352  *
353  * Return values:
354  * Number of bios_memmap_t entries, including the terminating nul-entry.
355  */
356 static size_t
357 create_bios_memmap(struct vm_create_params *vcp, bios_memmap_t *memmap)
358 {
359 	size_t i, n = 0, sz;
360 	paddr_t gpa;
361 	struct vm_mem_range *vmr;
362 
363 	for (i = 0; i < vcp->vcp_nmemranges; i++) {
364 		vmr = &vcp->vcp_memranges[i];
365 		gpa = vmr->vmr_gpa;
366 		sz = vmr->vmr_size;
367 
368 		/*
369 		 * Make sure that we do not mark the ROM/video RAM area in the
370 		 * low memory as physcal memory available to the kernel.
371 		 */
372 		if (gpa < 0x100000 && gpa + sz > LOWMEM_KB * 1024) {
373 			if (gpa >= LOWMEM_KB * 1024)
374 				sz = 0;
375 			else
376 				sz = LOWMEM_KB * 1024 - gpa;
377 		}
378 
379 		if (sz != 0) {
380 			memmap[n].addr = gpa;
381 			memmap[n].size = sz;
382 			memmap[n].type = 0x1;	/* Type 1 : Normal memory */
383 			n++;
384 		}
385 	}
386 
387 	/* Null mem map entry to denote the end of the ranges */
388 	memmap[n].addr = 0x0;
389 	memmap[n].size = 0x0;
390 	memmap[n].type = 0x0;
391 	n++;
392 
393 	return (n);
394 }
395 
396 /*
397  * push_bootargs
398  *
399  * Creates the boot arguments page in the guest address space.
400  * Since vmd(8) is acting as the bootloader, we need to create the same boot
401  * arguments page that a real bootloader would have created. This is loaded
402  * into the guest phys RAM space at address BOOTARGS_PAGE.
403  *
404  * Parameters:
405  *  memmap: the BIOS memory map
406  *  n: number of entries in memmap
407  *
408  * Return values:
409  *  The size of the bootargs
410  */
411 static uint32_t
412 push_bootargs(bios_memmap_t *memmap, size_t n)
413 {
414 	uint32_t memmap_sz, consdev_sz, i;
415 	bios_consdev_t consdev;
416 	uint32_t ba[1024];
417 
418 	memmap_sz = 3 * sizeof(int) + n * sizeof(bios_memmap_t);
419 	ba[0] = 0x0;    /* memory map */
420 	ba[1] = memmap_sz;
421 	ba[2] = memmap_sz;	/* next */
422 	memcpy(&ba[3], memmap, n * sizeof(bios_memmap_t));
423 	i = memmap_sz / sizeof(int);
424 
425 	/* Serial console device, COM1 @ 0x3f8 */
426 	consdev.consdev = makedev(8, 0);	/* com1 @ 0x3f8 */
427 	consdev.conspeed = 9600;
428 	consdev.consaddr = 0x3f8;
429 	consdev.consfreq = 0;
430 
431 	consdev_sz = 3 * sizeof(int) + sizeof(bios_consdev_t);
432 	ba[i] = 0x5;   /* consdev */
433 	ba[i + 1] = consdev_sz;
434 	ba[i + 2] = consdev_sz;
435 	memcpy(&ba[i + 3], &consdev, sizeof(bios_consdev_t));
436 	i = i + 3 + (sizeof(bios_consdev_t) / 4);
437 
438 	ba[i] = 0xFFFFFFFF; /* BOOTARG_END */
439 
440 	write_mem(BOOTARGS_PAGE, ba, PAGE_SIZE);
441 
442 	return (memmap_sz + consdev_sz);
443 }
444 
445 /*
446  * push_stack
447  *
448  * Creates the boot stack page in the guest address space. When using a real
449  * bootloader, the stack will be prepared using the following format before
450  * transitioning to kernel start, so vmd(8) needs to mimic the same stack
451  * layout. The stack content is pushed to the guest phys RAM at address
452  * STACK_PAGE. The bootloader operates in 32 bit mode; each stack entry is
453  * 4 bytes.
454  *
455  * Stack Layout: (TOS == Top Of Stack)
456  *  TOS		location of boot arguments page
457  *  TOS - 0x4	size of the content in the boot arguments page
458  *  TOS - 0x8	size of low memory (biosbasemem: kernel uses BIOS map only if 0)
459  *  TOS - 0xc	size of high memory (biosextmem, not used by kernel at all)
460  *  TOS - 0x10	kernel 'end' symbol value
461  *  TOS - 0x14	version of bootarg API
462  *
463  * Parameters:
464  *  bootargsz: size of boot arguments
465  *  end: kernel 'end' symbol value
466  *  bootdev: the optional non-default boot device
467  *  howto: optional boot flags for the kernel
468  *
469  * Return values:
470  *  size of the stack
471  */
472 static size_t
473 push_stack(uint32_t bootargsz, uint32_t end, uint32_t bootdev, uint32_t howto)
474 {
475 	uint32_t stack[1024];
476 	uint16_t loc;
477 
478 	memset(&stack, 0, sizeof(stack));
479 	loc = 1024;
480 
481 	if (bootdev == 0)
482 		bootdev = MAKEBOOTDEV(0x4, 0, 0, 0, 0); /* bootdev: sd0a */
483 
484 	stack[--loc] = BOOTARGS_PAGE;
485 	stack[--loc] = bootargsz;
486 	stack[--loc] = 0; /* biosbasemem */
487 	stack[--loc] = 0; /* biosextmem */
488 	stack[--loc] = end;
489 	stack[--loc] = 0x0e;
490 	stack[--loc] = bootdev;
491 	stack[--loc] = howto;
492 
493 	write_mem(STACK_PAGE, &stack, PAGE_SIZE);
494 
495 	return (1024 - (loc - 1)) * sizeof(uint32_t);
496 }
497 
498 /*
499  * mread
500  *
501  * Reads 'sz' bytes from the file whose descriptor is provided in 'fd'
502  * into the guest address space at paddr 'addr'.
503  *
504  * Parameters:
505  *  fd: file descriptor of the kernel image file to read from.
506  *  addr: guest paddr_t to load to
507  *  sz: number of bytes to load
508  *
509  * Return values:
510  *  returns 'sz' if successful, or 0 otherwise.
511  */
512 size_t
513 mread(FILE *fp, paddr_t addr, size_t sz)
514 {
515 	size_t ct;
516 	size_t i, rd, osz;
517 	char buf[PAGE_SIZE];
518 
519 	/*
520 	 * break up the 'sz' bytes into PAGE_SIZE chunks for use with
521 	 * write_mem
522 	 */
523 	ct = 0;
524 	rd = 0;
525 	osz = sz;
526 	if ((addr & PAGE_MASK) != 0) {
527 		memset(buf, 0, sizeof(buf));
528 		if (sz > PAGE_SIZE)
529 			ct = PAGE_SIZE - (addr & PAGE_MASK);
530 		else
531 			ct = sz;
532 
533 		if (fread(buf, 1, ct, fp) != ct) {
534 			log_warn("%s: error %d in mread", __progname, errno);
535 			return (0);
536 		}
537 		rd += ct;
538 
539 		if (write_mem(addr, buf, ct))
540 			return (0);
541 
542 		addr += ct;
543 	}
544 
545 	sz = sz - ct;
546 
547 	if (sz == 0)
548 		return (osz);
549 
550 	for (i = 0; i < sz; i += PAGE_SIZE, addr += PAGE_SIZE) {
551 		memset(buf, 0, sizeof(buf));
552 		if (i + PAGE_SIZE > sz)
553 			ct = sz - i;
554 		else
555 			ct = PAGE_SIZE;
556 
557 		if (fread(buf, 1, ct, fp) != ct) {
558 			log_warn("%s: error %d in mread", __progname, errno);
559 			return (0);
560 		}
561 		rd += ct;
562 
563 		if (write_mem(addr, buf, ct))
564 			return (0);
565 	}
566 
567 	return (osz);
568 }
569 
570 /*
571  * marc4random_buf
572  *
573  * load 'sz' bytes of random data into the guest address space at paddr
574  * 'addr'.
575  *
576  * Parameters:
577  *  addr: guest paddr_t to load random bytes into
578  *  sz: number of random bytes to load
579  *
580  * Return values:
581  *  nothing
582  */
583 static void
584 marc4random_buf(paddr_t addr, int sz)
585 {
586 	int i, ct;
587 	char buf[PAGE_SIZE];
588 
589 	/*
590 	 * break up the 'sz' bytes into PAGE_SIZE chunks for use with
591 	 * write_mem
592 	 */
593 	ct = 0;
594 	if (addr % PAGE_SIZE != 0) {
595 		memset(buf, 0, sizeof(buf));
596 		ct = PAGE_SIZE - (addr % PAGE_SIZE);
597 
598 		arc4random_buf(buf, ct);
599 
600 		if (write_mem(addr, buf, ct))
601 			return;
602 
603 		addr += ct;
604 	}
605 
606 	for (i = 0; i < sz; i+= PAGE_SIZE, addr += PAGE_SIZE) {
607 		memset(buf, 0, sizeof(buf));
608 		if (i + PAGE_SIZE > sz)
609 			ct = sz - i;
610 		else
611 			ct = PAGE_SIZE;
612 
613 		arc4random_buf(buf, ct);
614 
615 		if (write_mem(addr, buf, ct))
616 			return;
617 	}
618 }
619 
620 /*
621  * mbzero
622  *
623  * load 'sz' bytes of zeros into the guest address space at paddr
624  * 'addr'.
625  *
626  * Parameters:
627  *  addr: guest paddr_t to zero
628  *  sz: number of zero bytes to store
629  *
630  * Return values:
631  *  nothing
632  */
633 static void
634 mbzero(paddr_t addr, int sz)
635 {
636 	int i, ct;
637 	char buf[PAGE_SIZE];
638 
639 	/*
640 	 * break up the 'sz' bytes into PAGE_SIZE chunks for use with
641 	 * write_mem
642 	 */
643 	ct = 0;
644 	memset(buf, 0, sizeof(buf));
645 	if (addr % PAGE_SIZE != 0) {
646 		ct = PAGE_SIZE - (addr % PAGE_SIZE);
647 
648 		if (write_mem(addr, buf, ct))
649 			return;
650 
651 		addr += ct;
652 	}
653 
654 	for (i = 0; i < sz; i+= PAGE_SIZE, addr += PAGE_SIZE) {
655 		if (i + PAGE_SIZE > sz)
656 			ct = sz - i;
657 		else
658 			ct = PAGE_SIZE;
659 
660 		if (write_mem(addr, buf, ct))
661 			return;
662 	}
663 }
664 
665 /*
666  * mbcopy
667  *
668  * copies 'sz' bytes from buffer 'src' to guest paddr 'dst'.
669  *
670  * Parameters:
671  *  src: source buffer to copy from
672  *  dst: destination guest paddr_t to copy to
673  *  sz: number of bytes to copy
674  *
675  * Return values:
676  *  nothing
677  */
678 static void
679 mbcopy(void *src, paddr_t dst, int sz)
680 {
681 	write_mem(dst, src, sz);
682 }
683 
684 /*
685  * elf64_exec
686  *
687  * Load the kernel indicated by 'fd' into the guest physical memory
688  * space, at the addresses defined in the ELF header.
689  *
690  * This function is used for 64 bit kernels.
691  *
692  * Parameters:
693  *  fd: file descriptor of the kernel to load
694  *  elf: ELF header of the kernel
695  *  marks: array to store the offsets of various kernel structures
696  *      (start, bss, etc)
697  *  flags: flag value to indicate which section(s) to load (usually
698  *      LOAD_ALL)
699  *
700  * Return values:
701  *  0 if successful
702  *  1 if unsuccessful
703  */
704 static int
705 elf64_exec(FILE *fp, Elf64_Ehdr *elf, u_long *marks, int flags)
706 {
707 	Elf64_Shdr *shp;
708 	Elf64_Phdr *phdr;
709 	Elf64_Off off;
710 	int i;
711 	size_t sz;
712 	int first;
713 	int havesyms, havelines;
714 	paddr_t minp = ~0, maxp = 0, pos = 0;
715 	paddr_t offset = marks[MARK_START], shpp, elfp;
716 
717 	sz = elf->e_phnum * sizeof(Elf64_Phdr);
718 	phdr = malloc(sz);
719 
720 	if (fseeko(fp, (off_t)elf->e_phoff, SEEK_SET) == -1)  {
721 		free(phdr);
722 		return 1;
723 	}
724 
725 	if (fread(phdr, 1, sz, fp) != sz) {
726 		free(phdr);
727 		return 1;
728 	}
729 
730 	for (first = 1, i = 0; i < elf->e_phnum; i++) {
731 		if (phdr[i].p_type == PT_OPENBSD_RANDOMIZE) {
732 			int m;
733 
734 			/* Fill segment if asked for. */
735 			if (flags & LOAD_RANDOM) {
736 				for (pos = 0; pos < phdr[i].p_filesz;
737 				    pos += m) {
738 					m = phdr[i].p_filesz - pos;
739 					marc4random_buf(phdr[i].p_paddr + pos,
740 					    m);
741 				}
742 			}
743 			if (flags & (LOAD_RANDOM | COUNT_RANDOM)) {
744 				marks[MARK_RANDOM] = LOADADDR(phdr[i].p_paddr);
745 				marks[MARK_ERANDOM] =
746 				    marks[MARK_RANDOM] + phdr[i].p_filesz;
747 			}
748 			continue;
749 		}
750 
751 		if (phdr[i].p_type != PT_LOAD ||
752 		    (phdr[i].p_flags & (PF_W|PF_R|PF_X)) == 0)
753 			continue;
754 
755 #define IS_TEXT(p)	(p.p_flags & PF_X)
756 #define IS_DATA(p)	((p.p_flags & PF_X) == 0)
757 #define IS_BSS(p)	(p.p_filesz < p.p_memsz)
758 		/*
759 		 * XXX: Assume first address is lowest
760 		 */
761 		if ((IS_TEXT(phdr[i]) && (flags & LOAD_TEXT)) ||
762 		    (IS_DATA(phdr[i]) && (flags & LOAD_DATA))) {
763 
764 			/* Read in segment. */
765 			if (fseeko(fp, (off_t)phdr[i].p_offset,
766 			    SEEK_SET) == -1) {
767 				free(phdr);
768 				return 1;
769 			}
770 			if (mread(fp, phdr[i].p_paddr, phdr[i].p_filesz) !=
771 			    phdr[i].p_filesz) {
772 				free(phdr);
773 				return 1;
774 			}
775 
776 			first = 0;
777 		}
778 
779 		if ((IS_TEXT(phdr[i]) && (flags & (LOAD_TEXT | COUNT_TEXT))) ||
780 		    (IS_DATA(phdr[i]) && (flags & (LOAD_DATA | COUNT_TEXT)))) {
781 			pos = phdr[i].p_paddr;
782 			if (minp > pos)
783 				minp = pos;
784 			pos += phdr[i].p_filesz;
785 			if (maxp < pos)
786 				maxp = pos;
787 		}
788 
789 		/* Zero out BSS. */
790 		if (IS_BSS(phdr[i]) && (flags & LOAD_BSS)) {
791 			mbzero((phdr[i].p_paddr + phdr[i].p_filesz),
792 			    phdr[i].p_memsz - phdr[i].p_filesz);
793 		}
794 		if (IS_BSS(phdr[i]) && (flags & (LOAD_BSS|COUNT_BSS))) {
795 			pos += phdr[i].p_memsz - phdr[i].p_filesz;
796 			if (maxp < pos)
797 				maxp = pos;
798 		}
799 	}
800 	free(phdr);
801 
802 	/*
803 	 * Copy the ELF and section headers.
804 	 */
805 	elfp = maxp = roundup(maxp, sizeof(Elf64_Addr));
806 	if (flags & (LOAD_HDR | COUNT_HDR))
807 		maxp += sizeof(Elf64_Ehdr);
808 
809 	if (flags & (LOAD_SYM | COUNT_SYM)) {
810 		if (fseeko(fp, (off_t)elf->e_shoff, SEEK_SET) == -1)  {
811 			WARN(("lseek section headers"));
812 			return 1;
813 		}
814 		sz = elf->e_shnum * sizeof(Elf64_Shdr);
815 		shp = malloc(sz);
816 
817 		if (fread(shp, 1, sz, fp) != sz) {
818 			free(shp);
819 			return 1;
820 		}
821 
822 		shpp = maxp;
823 		maxp += roundup(sz, sizeof(Elf64_Addr));
824 
825 		size_t shstrsz = shp[elf->e_shstrndx].sh_size;
826 		char *shstr = malloc(shstrsz);
827 		if (fseeko(fp, (off_t)shp[elf->e_shstrndx].sh_offset,
828 		    SEEK_SET) == -1) {
829 			free(shstr);
830 			free(shp);
831 			return 1;
832 		}
833 		if (fread(shstr, 1, shstrsz, fp) != shstrsz) {
834 			free(shstr);
835 			free(shp);
836 			return 1;
837 		}
838 
839 		/*
840 		 * Now load the symbol sections themselves. Make sure the
841 		 * sections are aligned. Don't bother with string tables if
842 		 * there are no symbol sections.
843 		 */
844 		off = roundup((sizeof(Elf64_Ehdr) + sz), sizeof(Elf64_Addr));
845 
846 		for (havesyms = havelines = i = 0; i < elf->e_shnum; i++)
847 			if (shp[i].sh_type == SHT_SYMTAB)
848 				havesyms = 1;
849 
850 		for (first = 1, i = 0; i < elf->e_shnum; i++) {
851 			if (shp[i].sh_type == SHT_SYMTAB ||
852 			    shp[i].sh_type == SHT_STRTAB ||
853 			    !strcmp(shstr + shp[i].sh_name, ".debug_line") ||
854 			    !strcmp(shstr + shp[i].sh_name, ELF_CTF)) {
855 				if (havesyms && (flags & LOAD_SYM)) {
856 					if (fseeko(fp, (off_t)shp[i].sh_offset,
857 					    SEEK_SET) == -1) {
858 						free(shstr);
859 						free(shp);
860 						return 1;
861 					}
862 					if (mread(fp, maxp,
863 					    shp[i].sh_size) != shp[i].sh_size) {
864 						free(shstr);
865 						free(shp);
866 						return 1;
867 					}
868 				}
869 				maxp += roundup(shp[i].sh_size,
870 				    sizeof(Elf64_Addr));
871 				shp[i].sh_offset = off;
872 				shp[i].sh_flags |= SHF_ALLOC;
873 				off += roundup(shp[i].sh_size,
874 				    sizeof(Elf64_Addr));
875 				first = 0;
876 			}
877 		}
878 		if (flags & LOAD_SYM) {
879 			mbcopy(shp, shpp, sz);
880 		}
881 		free(shstr);
882 		free(shp);
883 	}
884 
885 	/*
886 	 * Frob the copied ELF header to give information relative
887 	 * to elfp.
888 	 */
889 	if (flags & LOAD_HDR) {
890 		elf->e_phoff = 0;
891 		elf->e_shoff = sizeof(Elf64_Ehdr);
892 		elf->e_phentsize = 0;
893 		elf->e_phnum = 0;
894 		mbcopy(elf, elfp, sizeof(*elf));
895 	}
896 
897 	marks[MARK_START] = LOADADDR(minp);
898 	marks[MARK_ENTRY] = LOADADDR(elf->e_entry);
899 	marks[MARK_NSYM] = 1;	/* XXX: Kernel needs >= 0 */
900 	marks[MARK_SYM] = LOADADDR(elfp);
901 	marks[MARK_END] = LOADADDR(maxp);
902 
903 	return 0;
904 }
905 
906 /*
907  * elf32_exec
908  *
909  * Load the kernel indicated by 'fd' into the guest physical memory
910  * space, at the addresses defined in the ELF header.
911  *
912  * This function is used for 32 bit kernels.
913  *
914  * Parameters:
915  *  fd: file descriptor of the kernel to load
916  *  elf: ELF header of the kernel
917  *  marks: array to store the offsets of various kernel structures
918  *      (start, bss, etc)
919  *  flags: flag value to indicate which section(s) to load (usually
920  *      LOAD_ALL)
921  *
922  * Return values:
923  *  0 if successful
924  *  1 if unsuccessful
925  */
926 static int
927 elf32_exec(FILE *fp, Elf32_Ehdr *elf, u_long *marks, int flags)
928 {
929 	Elf32_Shdr *shp;
930 	Elf32_Phdr *phdr;
931 	Elf32_Off off;
932 	int i;
933 	size_t sz;
934 	int first;
935 	int havesyms, havelines;
936 	paddr_t minp = ~0, maxp = 0, pos = 0;
937 	paddr_t offset = marks[MARK_START], shpp, elfp;
938 
939 	sz = elf->e_phnum * sizeof(Elf32_Phdr);
940 	phdr = malloc(sz);
941 
942 	if (fseeko(fp, (off_t)elf->e_phoff, SEEK_SET) == -1)  {
943 		free(phdr);
944 		return 1;
945 	}
946 
947 	if (fread(phdr, 1, sz, fp) != sz) {
948 		free(phdr);
949 		return 1;
950 	}
951 
952 	for (first = 1, i = 0; i < elf->e_phnum; i++) {
953 		if (phdr[i].p_type == PT_OPENBSD_RANDOMIZE) {
954 			int m;
955 
956 			/* Fill segment if asked for. */
957 			if (flags & LOAD_RANDOM) {
958 				for (pos = 0; pos < phdr[i].p_filesz;
959 				    pos += m) {
960 					m = phdr[i].p_filesz - pos;
961 					marc4random_buf(phdr[i].p_paddr + pos,
962 					    m);
963 				}
964 			}
965 			if (flags & (LOAD_RANDOM | COUNT_RANDOM)) {
966 				marks[MARK_RANDOM] = LOADADDR(phdr[i].p_paddr);
967 				marks[MARK_ERANDOM] =
968 				    marks[MARK_RANDOM] + phdr[i].p_filesz;
969 			}
970 			continue;
971 		}
972 
973 		if (phdr[i].p_type != PT_LOAD ||
974 		    (phdr[i].p_flags & (PF_W|PF_R|PF_X)) == 0)
975 			continue;
976 
977 #define IS_TEXT(p)	(p.p_flags & PF_X)
978 #define IS_DATA(p)	((p.p_flags & PF_X) == 0)
979 #define IS_BSS(p)	(p.p_filesz < p.p_memsz)
980 		/*
981 		 * XXX: Assume first address is lowest
982 		 */
983 		if ((IS_TEXT(phdr[i]) && (flags & LOAD_TEXT)) ||
984 		    (IS_DATA(phdr[i]) && (flags & LOAD_DATA))) {
985 
986 			/* Read in segment. */
987 			if (fseeko(fp, (off_t)phdr[i].p_offset,
988 			    SEEK_SET) == -1) {
989 				free(phdr);
990 				return 1;
991 			}
992 			if (mread(fp, phdr[i].p_paddr, phdr[i].p_filesz) !=
993 			    phdr[i].p_filesz) {
994 				free(phdr);
995 				return 1;
996 			}
997 
998 			first = 0;
999 		}
1000 
1001 		if ((IS_TEXT(phdr[i]) && (flags & (LOAD_TEXT | COUNT_TEXT))) ||
1002 		    (IS_DATA(phdr[i]) && (flags & (LOAD_DATA | COUNT_TEXT)))) {
1003 			pos = phdr[i].p_paddr;
1004 			if (minp > pos)
1005 				minp = pos;
1006 			pos += phdr[i].p_filesz;
1007 			if (maxp < pos)
1008 				maxp = pos;
1009 		}
1010 
1011 		/* Zero out BSS. */
1012 		if (IS_BSS(phdr[i]) && (flags & LOAD_BSS)) {
1013 			mbzero((phdr[i].p_paddr + phdr[i].p_filesz),
1014 			    phdr[i].p_memsz - phdr[i].p_filesz);
1015 		}
1016 		if (IS_BSS(phdr[i]) && (flags & (LOAD_BSS|COUNT_BSS))) {
1017 			pos += phdr[i].p_memsz - phdr[i].p_filesz;
1018 			if (maxp < pos)
1019 				maxp = pos;
1020 		}
1021 	}
1022 	free(phdr);
1023 
1024 	/*
1025 	 * Copy the ELF and section headers.
1026 	 */
1027 	elfp = maxp = roundup(maxp, sizeof(Elf32_Addr));
1028 	if (flags & (LOAD_HDR | COUNT_HDR))
1029 		maxp += sizeof(Elf32_Ehdr);
1030 
1031 	if (flags & (LOAD_SYM | COUNT_SYM)) {
1032 		if (fseeko(fp, (off_t)elf->e_shoff, SEEK_SET) == -1)  {
1033 			WARN(("lseek section headers"));
1034 			return 1;
1035 		}
1036 		sz = elf->e_shnum * sizeof(Elf32_Shdr);
1037 		shp = malloc(sz);
1038 
1039 		if (fread(shp, 1, sz, fp) != sz) {
1040 			free(shp);
1041 			return 1;
1042 		}
1043 
1044 		shpp = maxp;
1045 		maxp += roundup(sz, sizeof(Elf32_Addr));
1046 
1047 		size_t shstrsz = shp[elf->e_shstrndx].sh_size;
1048 		char *shstr = malloc(shstrsz);
1049 		if (fseeko(fp, (off_t)shp[elf->e_shstrndx].sh_offset,
1050 		    SEEK_SET) == -1) {
1051 			free(shstr);
1052 			free(shp);
1053 			return 1;
1054 		}
1055 		if (fread(shstr, 1, shstrsz, fp) != shstrsz) {
1056 			free(shstr);
1057 			free(shp);
1058 			return 1;
1059 		}
1060 
1061 		/*
1062 		 * Now load the symbol sections themselves. Make sure the
1063 		 * sections are aligned. Don't bother with string tables if
1064 		 * there are no symbol sections.
1065 		 */
1066 		off = roundup((sizeof(Elf32_Ehdr) + sz), sizeof(Elf32_Addr));
1067 
1068 		for (havesyms = havelines = i = 0; i < elf->e_shnum; i++)
1069 			if (shp[i].sh_type == SHT_SYMTAB)
1070 				havesyms = 1;
1071 
1072 		for (first = 1, i = 0; i < elf->e_shnum; i++) {
1073 			if (shp[i].sh_type == SHT_SYMTAB ||
1074 			    shp[i].sh_type == SHT_STRTAB ||
1075 			    !strcmp(shstr + shp[i].sh_name, ".debug_line")) {
1076 				if (havesyms && (flags & LOAD_SYM)) {
1077 					if (fseeko(fp, (off_t)shp[i].sh_offset,
1078 					    SEEK_SET) == -1) {
1079 						free(shstr);
1080 						free(shp);
1081 						return 1;
1082 					}
1083 					if (mread(fp, maxp,
1084 					    shp[i].sh_size) != shp[i].sh_size) {
1085 						free(shstr);
1086 						free(shp);
1087 						return 1;
1088 					}
1089 				}
1090 				maxp += roundup(shp[i].sh_size,
1091 				    sizeof(Elf32_Addr));
1092 				shp[i].sh_offset = off;
1093 				shp[i].sh_flags |= SHF_ALLOC;
1094 				off += roundup(shp[i].sh_size,
1095 				    sizeof(Elf32_Addr));
1096 				first = 0;
1097 			}
1098 		}
1099 		if (flags & LOAD_SYM) {
1100 			mbcopy(shp, shpp, sz);
1101 		}
1102 		free(shstr);
1103 		free(shp);
1104 	}
1105 
1106 	/*
1107 	 * Frob the copied ELF header to give information relative
1108 	 * to elfp.
1109 	 */
1110 	if (flags & LOAD_HDR) {
1111 		elf->e_phoff = 0;
1112 		elf->e_shoff = sizeof(Elf32_Ehdr);
1113 		elf->e_phentsize = 0;
1114 		elf->e_phnum = 0;
1115 		mbcopy(elf, elfp, sizeof(*elf));
1116 	}
1117 
1118 	marks[MARK_START] = LOADADDR(minp);
1119 	marks[MARK_ENTRY] = LOADADDR(elf->e_entry);
1120 	marks[MARK_NSYM] = 1;	/* XXX: Kernel needs >= 0 */
1121 	marks[MARK_SYM] = LOADADDR(elfp);
1122 	marks[MARK_END] = LOADADDR(maxp);
1123 
1124 	return 0;
1125 }
1126