xref: /openbsd-src/usr.sbin/vmd/loadfile_elf.c (revision c96d1163c738370f447ed64beef8dc40a15ab3d1)
1 /* $NetBSD: loadfile.c,v 1.10 2000/12/03 02:53:04 tsutsui Exp $ */
2 /* $OpenBSD: loadfile_elf.c,v 1.29 2017/11/29 02:46:10 mlarkin Exp $ */
3 
4 /*-
5  * Copyright (c) 1997 The NetBSD Foundation, Inc.
6  * All rights reserved.
7  *
8  * This code is derived from software contributed to The NetBSD Foundation
9  * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
10  * NASA Ames Research Center and by Christos Zoulas.
11  *
12  * Redistribution and use in source and binary forms, with or without
13  * modification, are permitted provided that the following conditions
14  * are met:
15  * 1. Redistributions of source code must retain the above copyright
16  *    notice, this list of conditions and the following disclaimer.
17  * 2. Redistributions in binary form must reproduce the above copyright
18  *    notice, this list of conditions and the following disclaimer in the
19  *    documentation and/or other materials provided with the distribution.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
23  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
24  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
25  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31  * POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 /*
35  * Copyright (c) 1992, 1993
36  *	The Regents of the University of California.  All rights reserved.
37  *
38  * This code is derived from software contributed to Berkeley by
39  * Ralph Campbell.
40  *
41  * Redistribution and use in source and binary forms, with or without
42  * modification, are permitted provided that the following conditions
43  * are met:
44  * 1. Redistributions of source code must retain the above copyright
45  *    notice, this list of conditions and the following disclaimer.
46  * 2. Redistributions in binary form must reproduce the above copyright
47  *    notice, this list of conditions and the following disclaimer in the
48  *    documentation and/or other materials provided with the distribution.
49  * 3. Neither the name of the University nor the names of its contributors
50  *    may be used to endorse or promote products derived from this software
51  *    without specific prior written permission.
52  *
53  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63  * SUCH DAMAGE.
64  *
65  *	@(#)boot.c	8.1 (Berkeley) 6/10/93
66  */
67 
68 /*
69  * Copyright (c) 2015 Mike Larkin <mlarkin@openbsd.org>
70  *
71  * Permission to use, copy, modify, and distribute this software for any
72  * purpose with or without fee is hereby granted, provided that the above
73  * copyright notice and this permission notice appear in all copies.
74  *
75  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
76  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
77  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
78  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
79  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
80  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
81  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
82  */
83 
84 #include <sys/param.h>	/* PAGE_SIZE PAGE_MASK roundup */
85 #include <sys/ioctl.h>
86 #include <sys/reboot.h>
87 #include <sys/exec.h>
88 
89 #include <elf.h>
90 #include <stdio.h>
91 #include <string.h>
92 #include <errno.h>
93 #include <stdlib.h>
94 #include <unistd.h>
95 #include <fcntl.h>
96 #include <err.h>
97 #include <errno.h>
98 #include <stddef.h>
99 
100 #include <machine/vmmvar.h>
101 #include <machine/biosvar.h>
102 #include <machine/segments.h>
103 #include <machine/pte.h>
104 
105 #include "loadfile.h"
106 #include "vmd.h"
107 
108 union {
109 	Elf32_Ehdr elf32;
110 	Elf64_Ehdr elf64;
111 } hdr;
112 
113 #ifdef __i386__
114 typedef uint32_t pt_entry_t;
115 static void setsegment(struct segment_descriptor *, uint32_t,
116     size_t, int, int, int, int);
117 #else
118 static void setsegment(struct mem_segment_descriptor *, uint32_t,
119     size_t, int, int, int, int);
120 #endif
121 static int elf32_exec(FILE *, Elf32_Ehdr *, u_long *, int);
122 static int elf64_exec(FILE *, Elf64_Ehdr *, u_long *, int);
123 static size_t create_bios_memmap(struct vm_create_params *, bios_memmap_t *);
124 static uint32_t push_bootargs(bios_memmap_t *, size_t);
125 static size_t push_stack(uint32_t, uint32_t, uint32_t, uint32_t);
126 static void push_gdt(void);
127 static void push_pt(void);
128 static void marc4random_buf(paddr_t, int);
129 static void mbzero(paddr_t, int);
130 static void mbcopy(void *, paddr_t, int);
131 
132 extern char *__progname;
133 extern int vm_id;
134 
135 /*
136  * setsegment
137  *
138  * Initializes a segment selector entry with the provided descriptor.
139  * For the purposes of the bootloader mimiced by vmd(8), we only need
140  * memory-type segment descriptor support.
141  *
142  * This function was copied from machdep.c
143  *
144  * Parameters:
145  *  sd: Address of the entry to initialize
146  *  base: base of the segment
147  *  limit: limit of the segment
148  *  type: type of the segment
149  *  dpl: privilege level of the egment
150  *  def32: default 16/32 bit size of the segment
151  *  gran: granularity of the segment (byte/page)
152  */
153 #ifdef __i386__
154 static void
155 setsegment(struct segment_descriptor *sd, uint32_t base, size_t limit,
156     int type, int dpl, int def32, int gran)
157 #else
158 static void
159 setsegment(struct mem_segment_descriptor *sd, uint32_t base, size_t limit,
160     int type, int dpl, int def32, int gran)
161 #endif
162 {
163 	sd->sd_lolimit = (int)limit;
164 	sd->sd_lobase = (int)base;
165 	sd->sd_type = type;
166 	sd->sd_dpl = dpl;
167 	sd->sd_p = 1;
168 	sd->sd_hilimit = (int)limit >> 16;
169 #ifdef __i386__
170 	sd->sd_xx = 0;
171 #else
172 	sd->sd_avl = 0;
173 	sd->sd_long = 0;
174 #endif
175 	sd->sd_def32 = def32;
176 	sd->sd_gran = gran;
177 	sd->sd_hibase = (int)base >> 24;
178 }
179 
180 /*
181  * push_gdt
182  *
183  * Allocates and populates a page in the guest phys memory space to hold
184  * the boot-time GDT. Since vmd(8) is acting as the bootloader, we need to
185  * create the same GDT that a real bootloader would have created.
186  * This is loaded into the guest phys RAM space at address GDT_PAGE.
187  */
188 static void
189 push_gdt(void)
190 {
191 	uint8_t gdtpage[PAGE_SIZE];
192 #ifdef __i386__
193 	struct segment_descriptor *sd;
194 #else
195 	struct mem_segment_descriptor *sd;
196 #endif
197 
198 	memset(&gdtpage, 0, sizeof(gdtpage));
199 
200 #ifdef __i386__
201 	sd = (struct segment_descriptor *)&gdtpage;
202 #else
203 	sd = (struct mem_segment_descriptor *)&gdtpage;
204 #endif
205 
206 	/*
207 	 * Create three segment descriptors:
208 	 *
209 	 * GDT[0] : null desriptor. "Created" via memset above.
210 	 * GDT[1] (selector @ 0x8): Executable segment, for CS
211 	 * GDT[2] (selector @ 0x10): RW Data segment, for DS/ES/SS
212 	 */
213 	setsegment(&sd[1], 0, 0xffffffff, SDT_MEMERA, SEL_KPL, 1, 1);
214 	setsegment(&sd[2], 0, 0xffffffff, SDT_MEMRWA, SEL_KPL, 1, 1);
215 
216 	write_mem(GDT_PAGE, gdtpage, PAGE_SIZE);
217 }
218 
219 /*
220  * push_pt
221  *
222  * Create an identity-mapped page directory hierarchy mapping the first
223  * 1GB of physical memory. This is used during bootstrapping VMs on
224  * CPUs without unrestricted guest capability.
225  */
226 static void
227 push_pt(void)
228 {
229 	uint64_t ptes[NPTE_PG], i;
230 
231 	/* PDPDE0 - first 1GB */
232 	memset(ptes, 0, sizeof(ptes));
233 	ptes[0] = PG_V | PML3_PAGE;
234 	write_mem(PML4_PAGE, ptes, PAGE_SIZE);
235 
236 	/* PDE0 - first 1GB */
237 	memset(ptes, 0, sizeof(ptes));
238 	ptes[0] = PG_V | PG_RW | PG_u | PML2_PAGE;
239 	write_mem(PML3_PAGE, ptes, PAGE_SIZE);
240 
241 	/* First 1GB (in 2MB pages) */
242 	memset(ptes, 0, sizeof(ptes));
243 	for (i = 0 ; i < NPTE_PG; i++) {
244 		ptes[i] = PG_V | PG_RW | PG_u | PG_PS | ((2048 * 1024) * i);
245 	}
246 	write_mem(PML2_PAGE, ptes, PAGE_SIZE);
247 }
248 
249 /*
250  * loadfile_elf
251  *
252  * Loads an ELF kernel to it's defined load address in the guest VM.
253  * The kernel is loaded to its defined start point as set in the ELF header.
254  *
255  * Parameters:
256  *  fp: file of a kernel file to load
257  *  vcp: the VM create parameters, holding the exact memory map
258  *  (out) vrs: register state to set on init for this kernel
259  *  bootdev: the optional non-default boot device
260  *  howto: optional boot flags for the kernel
261  *
262  * Return values:
263  *  0 if successful
264  *  various error codes returned from read(2) or loadelf functions
265  */
266 int
267 loadfile_elf(FILE *fp, struct vm_create_params *vcp,
268     struct vcpu_reg_state *vrs, uint32_t bootdev, uint32_t howto)
269 {
270 	int r;
271 	uint32_t bootargsz;
272 	size_t n, stacksize;
273 	u_long marks[MARK_MAX];
274 	bios_memmap_t memmap[VMM_MAX_MEM_RANGES + 1];
275 
276 	if ((r = fread(&hdr, 1, sizeof(hdr), fp)) != sizeof(hdr))
277 		return 1;
278 
279 	memset(&marks, 0, sizeof(marks));
280 	if (memcmp(hdr.elf32.e_ident, ELFMAG, SELFMAG) == 0 &&
281 	    hdr.elf32.e_ident[EI_CLASS] == ELFCLASS32) {
282 		r = elf32_exec(fp, &hdr.elf32, marks, LOAD_ALL);
283 	} else if (memcmp(hdr.elf64.e_ident, ELFMAG, SELFMAG) == 0 &&
284 	    hdr.elf64.e_ident[EI_CLASS] == ELFCLASS64) {
285 		r = elf64_exec(fp, &hdr.elf64, marks, LOAD_ALL);
286 	} else
287 		errno = ENOEXEC;
288 
289 	if (r)
290 		return (r);
291 
292 	push_gdt();
293 	push_pt();
294 	n = create_bios_memmap(vcp, memmap);
295 	bootargsz = push_bootargs(memmap, n);
296 	stacksize = push_stack(bootargsz, marks[MARK_END], bootdev, howto);
297 
298 #ifdef __i386__
299 	vrs->vrs_gprs[VCPU_REGS_EIP] = (uint32_t)marks[MARK_ENTRY];
300 	vrs->vrs_gprs[VCPU_REGS_ESP] = (uint32_t)(STACK_PAGE + PAGE_SIZE) - stacksize;
301 #else
302 	vrs->vrs_gprs[VCPU_REGS_RIP] = (uint64_t)marks[MARK_ENTRY];
303 	vrs->vrs_gprs[VCPU_REGS_RSP] = (uint64_t)(STACK_PAGE + PAGE_SIZE) - stacksize;
304 #endif
305 	vrs->vrs_gdtr.vsi_base = GDT_PAGE;
306 
307 	log_debug("%s: loaded ELF kernel", __func__);
308 
309 	return (0);
310 }
311 
312 /*
313  * create_bios_memmap
314  *
315  * Construct a memory map as returned by the BIOS INT 0x15, e820 routine.
316  *
317  * Parameters:
318  *  vcp: the VM create parameters, containing the memory map passed to vmm(4)
319  *   memmap (out): the BIOS memory map
320  *
321  * Return values:
322  * Number of bios_memmap_t entries, including the terminating nul-entry.
323  */
324 static size_t
325 create_bios_memmap(struct vm_create_params *vcp, bios_memmap_t *memmap)
326 {
327 	size_t i, n = 0, sz;
328 	paddr_t gpa;
329 	struct vm_mem_range *vmr;
330 
331 	for (i = 0; i < vcp->vcp_nmemranges; i++) {
332 		vmr = &vcp->vcp_memranges[i];
333 		gpa = vmr->vmr_gpa;
334 		sz = vmr->vmr_size;
335 
336 		/*
337 		 * Make sure that we do not mark the ROM/video RAM area in the
338 		 * low memory as physcal memory available to the kernel.
339 		 */
340 		if (gpa < 0x100000 && gpa + sz > LOWMEM_KB * 1024) {
341 			if (gpa >= LOWMEM_KB * 1024)
342 				sz = 0;
343 			else
344 				sz = LOWMEM_KB * 1024 - gpa;
345 		}
346 
347 		if (sz != 0) {
348 			memmap[n].addr = gpa;
349 			memmap[n].size = sz;
350 			memmap[n].type = 0x1;	/* Type 1 : Normal memory */
351 			n++;
352 		}
353 	}
354 
355 	/* Null mem map entry to denote the end of the ranges */
356 	memmap[n].addr = 0x0;
357 	memmap[n].size = 0x0;
358 	memmap[n].type = 0x0;
359 	n++;
360 
361 	return (n);
362 }
363 
364 /*
365  * push_bootargs
366  *
367  * Creates the boot arguments page in the guest address space.
368  * Since vmd(8) is acting as the bootloader, we need to create the same boot
369  * arguments page that a real bootloader would have created. This is loaded
370  * into the guest phys RAM space at address BOOTARGS_PAGE.
371  *
372  * Parameters:
373  *  memmap: the BIOS memory map
374  *  n: number of entries in memmap
375  *
376  * Return values:
377  *  The size of the bootargs
378  */
379 static uint32_t
380 push_bootargs(bios_memmap_t *memmap, size_t n)
381 {
382 	uint32_t memmap_sz, consdev_sz, i;
383 	bios_consdev_t consdev;
384 	uint32_t ba[1024];
385 
386 	memmap_sz = 3 * sizeof(int) + n * sizeof(bios_memmap_t);
387 	ba[0] = 0x0;    /* memory map */
388 	ba[1] = memmap_sz;
389 	ba[2] = memmap_sz;	/* next */
390 	memcpy(&ba[3], memmap, n * sizeof(bios_memmap_t));
391 	i = memmap_sz / sizeof(int);
392 
393 	/* Serial console device, COM1 @ 0x3f8 */
394 	consdev.consdev = makedev(8, 0);	/* com1 @ 0x3f8 */
395 	consdev.conspeed = 9600;
396 	consdev.consaddr = 0x3f8;
397 	consdev.consfreq = 0;
398 
399 	consdev_sz = 3 * sizeof(int) + sizeof(bios_consdev_t);
400 	ba[i] = 0x5;   /* consdev */
401 	ba[i + 1] = consdev_sz;
402 	ba[i + 2] = consdev_sz;
403 	memcpy(&ba[i + 3], &consdev, sizeof(bios_consdev_t));
404 	i = i + 3 + (sizeof(bios_consdev_t) / 4);
405 
406 	ba[i] = 0xFFFFFFFF; /* BOOTARG_END */
407 
408 	write_mem(BOOTARGS_PAGE, ba, PAGE_SIZE);
409 
410 	return (memmap_sz + consdev_sz);
411 }
412 
413 /*
414  * push_stack
415  *
416  * Creates the boot stack page in the guest address space. When using a real
417  * bootloader, the stack will be prepared using the following format before
418  * transitioning to kernel start, so vmd(8) needs to mimic the same stack
419  * layout. The stack content is pushed to the guest phys RAM at address
420  * STACK_PAGE. The bootloader operates in 32 bit mode; each stack entry is
421  * 4 bytes.
422  *
423  * Stack Layout: (TOS == Top Of Stack)
424  *  TOS		location of boot arguments page
425  *  TOS - 0x4	size of the content in the boot arguments page
426  *  TOS - 0x8	size of low memory (biosbasemem: kernel uses BIOS map only if 0)
427  *  TOS - 0xc	size of high memory (biosextmem, not used by kernel at all)
428  *  TOS - 0x10	kernel 'end' symbol value
429  *  TOS - 0x14	version of bootarg API
430  *
431  * Parameters:
432  *  bootargsz: size of boot arguments
433  *  end: kernel 'end' symbol value
434  *  bootdev: the optional non-default boot device
435  *  howto: optional boot flags for the kernel
436  *
437  * Return values:
438  *  size of the stack
439  */
440 static size_t
441 push_stack(uint32_t bootargsz, uint32_t end, uint32_t bootdev, uint32_t howto)
442 {
443 	uint32_t stack[1024];
444 	uint16_t loc;
445 
446 	memset(&stack, 0, sizeof(stack));
447 	loc = 1024;
448 
449 	if (bootdev == 0)
450 		bootdev = MAKEBOOTDEV(0x4, 0, 0, 0, 0); /* bootdev: sd0a */
451 
452 	stack[--loc] = BOOTARGS_PAGE;
453 	stack[--loc] = bootargsz;
454 	stack[--loc] = 0; /* biosbasemem */
455 	stack[--loc] = 0; /* biosextmem */
456 	stack[--loc] = end;
457 	stack[--loc] = 0x0e;
458 	stack[--loc] = bootdev;
459 	stack[--loc] = howto;
460 
461 	write_mem(STACK_PAGE, &stack, PAGE_SIZE);
462 
463 	return (1024 - (loc - 1)) * sizeof(uint32_t);
464 }
465 
466 /*
467  * mread
468  *
469  * Reads 'sz' bytes from the file whose descriptor is provided in 'fd'
470  * into the guest address space at paddr 'addr'.
471  *
472  * Parameters:
473  *  fd: file descriptor of the kernel image file to read from.
474  *  addr: guest paddr_t to load to
475  *  sz: number of bytes to load
476  *
477  * Return values:
478  *  returns 'sz' if successful, or 0 otherwise.
479  */
480 size_t
481 mread(FILE *fp, paddr_t addr, size_t sz)
482 {
483 	size_t ct;
484 	size_t i, rd, osz;
485 	char buf[PAGE_SIZE];
486 
487 	/*
488 	 * break up the 'sz' bytes into PAGE_SIZE chunks for use with
489 	 * write_mem
490 	 */
491 	ct = 0;
492 	rd = 0;
493 	osz = sz;
494 	if ((addr & PAGE_MASK) != 0) {
495 		memset(buf, 0, sizeof(buf));
496 		if (sz > PAGE_SIZE)
497 			ct = PAGE_SIZE - (addr & PAGE_MASK);
498 		else
499 			ct = sz;
500 
501 		if (fread(buf, 1, ct, fp) != ct) {
502 			log_warn("%s: error %d in mread", __progname, errno);
503 			return (0);
504 		}
505 		rd += ct;
506 
507 		if (write_mem(addr, buf, ct))
508 			return (0);
509 
510 		addr += ct;
511 	}
512 
513 	sz = sz - ct;
514 
515 	if (sz == 0)
516 		return (osz);
517 
518 	for (i = 0; i < sz; i += PAGE_SIZE, addr += PAGE_SIZE) {
519 		memset(buf, 0, sizeof(buf));
520 		if (i + PAGE_SIZE > sz)
521 			ct = sz - i;
522 		else
523 			ct = PAGE_SIZE;
524 
525 		if (fread(buf, 1, ct, fp) != ct) {
526 			log_warn("%s: error %d in mread", __progname, errno);
527 			return (0);
528 		}
529 		rd += ct;
530 
531 		if (write_mem(addr, buf, ct))
532 			return (0);
533 	}
534 
535 	return (osz);
536 }
537 
538 /*
539  * marc4random_buf
540  *
541  * load 'sz' bytes of random data into the guest address space at paddr
542  * 'addr'.
543  *
544  * Parameters:
545  *  addr: guest paddr_t to load random bytes into
546  *  sz: number of random bytes to load
547  *
548  * Return values:
549  *  nothing
550  */
551 static void
552 marc4random_buf(paddr_t addr, int sz)
553 {
554 	int i, ct;
555 	char buf[PAGE_SIZE];
556 
557 	/*
558 	 * break up the 'sz' bytes into PAGE_SIZE chunks for use with
559 	 * write_mem
560 	 */
561 	ct = 0;
562 	if (addr % PAGE_SIZE != 0) {
563 		memset(buf, 0, sizeof(buf));
564 		ct = PAGE_SIZE - (addr % PAGE_SIZE);
565 
566 		arc4random_buf(buf, ct);
567 
568 		if (write_mem(addr, buf, ct))
569 			return;
570 
571 		addr += ct;
572 	}
573 
574 	for (i = 0; i < sz; i+= PAGE_SIZE, addr += PAGE_SIZE) {
575 		memset(buf, 0, sizeof(buf));
576 		if (i + PAGE_SIZE > sz)
577 			ct = sz - i;
578 		else
579 			ct = PAGE_SIZE;
580 
581 		arc4random_buf(buf, ct);
582 
583 		if (write_mem(addr, buf, ct))
584 			return;
585 	}
586 }
587 
588 /*
589  * mbzero
590  *
591  * load 'sz' bytes of zeros into the guest address space at paddr
592  * 'addr'.
593  *
594  * Parameters:
595  *  addr: guest paddr_t to zero
596  *  sz: number of zero bytes to store
597  *
598  * Return values:
599  *  nothing
600  */
601 static void
602 mbzero(paddr_t addr, int sz)
603 {
604 	int i, ct;
605 	char buf[PAGE_SIZE];
606 
607 	/*
608 	 * break up the 'sz' bytes into PAGE_SIZE chunks for use with
609 	 * write_mem
610 	 */
611 	ct = 0;
612 	memset(buf, 0, sizeof(buf));
613 	if (addr % PAGE_SIZE != 0) {
614 		ct = PAGE_SIZE - (addr % PAGE_SIZE);
615 
616 		if (write_mem(addr, buf, ct))
617 			return;
618 
619 		addr += ct;
620 	}
621 
622 	for (i = 0; i < sz; i+= PAGE_SIZE, addr += PAGE_SIZE) {
623 		if (i + PAGE_SIZE > sz)
624 			ct = sz - i;
625 		else
626 			ct = PAGE_SIZE;
627 
628 		if (write_mem(addr, buf, ct))
629 			return;
630 	}
631 }
632 
633 /*
634  * mbcopy
635  *
636  * copies 'sz' bytes from buffer 'src' to guest paddr 'dst'.
637  *
638  * Parameters:
639  *  src: source buffer to copy from
640  *  dst: destination guest paddr_t to copy to
641  *  sz: number of bytes to copy
642  *
643  * Return values:
644  *  nothing
645  */
646 static void
647 mbcopy(void *src, paddr_t dst, int sz)
648 {
649 	write_mem(dst, src, sz);
650 }
651 
652 /*
653  * elf64_exec
654  *
655  * Load the kernel indicated by 'fd' into the guest physical memory
656  * space, at the addresses defined in the ELF header.
657  *
658  * This function is used for 64 bit kernels.
659  *
660  * Parameters:
661  *  fd: file descriptor of the kernel to load
662  *  elf: ELF header of the kernel
663  *  marks: array to store the offsets of various kernel structures
664  *      (start, bss, etc)
665  *  flags: flag value to indicate which section(s) to load (usually
666  *      LOAD_ALL)
667  *
668  * Return values:
669  *  0 if successful
670  *  1 if unsuccessful
671  */
672 static int
673 elf64_exec(FILE *fp, Elf64_Ehdr *elf, u_long *marks, int flags)
674 {
675 	Elf64_Shdr *shp;
676 	Elf64_Phdr *phdr;
677 	Elf64_Off off;
678 	int i;
679 	size_t sz;
680 	int first;
681 	int havesyms, havelines;
682 	paddr_t minp = ~0, maxp = 0, pos = 0;
683 	paddr_t offset = marks[MARK_START], shpp, elfp;
684 
685 	sz = elf->e_phnum * sizeof(Elf64_Phdr);
686 	phdr = malloc(sz);
687 
688 	if (fseeko(fp, (off_t)elf->e_phoff, SEEK_SET) == -1)  {
689 		free(phdr);
690 		return 1;
691 	}
692 
693 	if (fread(phdr, 1, sz, fp) != sz) {
694 		free(phdr);
695 		return 1;
696 	}
697 
698 	for (first = 1, i = 0; i < elf->e_phnum; i++) {
699 		if (phdr[i].p_type == PT_OPENBSD_RANDOMIZE) {
700 			int m;
701 
702 			/* Fill segment if asked for. */
703 			if (flags & LOAD_RANDOM) {
704 				for (pos = 0; pos < phdr[i].p_filesz;
705 				    pos += m) {
706 					m = phdr[i].p_filesz - pos;
707 					marc4random_buf(phdr[i].p_paddr + pos,
708 					    m);
709 				}
710 			}
711 			if (flags & (LOAD_RANDOM | COUNT_RANDOM)) {
712 				marks[MARK_RANDOM] = LOADADDR(phdr[i].p_paddr);
713 				marks[MARK_ERANDOM] =
714 				    marks[MARK_RANDOM] + phdr[i].p_filesz;
715 			}
716 			continue;
717 		}
718 
719 		if (phdr[i].p_type != PT_LOAD ||
720 		    (phdr[i].p_flags & (PF_W|PF_R|PF_X)) == 0)
721 			continue;
722 
723 #define IS_TEXT(p)	(p.p_flags & PF_X)
724 #define IS_DATA(p)	((p.p_flags & PF_X) == 0)
725 #define IS_BSS(p)	(p.p_filesz < p.p_memsz)
726 		/*
727 		 * XXX: Assume first address is lowest
728 		 */
729 		if ((IS_TEXT(phdr[i]) && (flags & LOAD_TEXT)) ||
730 		    (IS_DATA(phdr[i]) && (flags & LOAD_DATA))) {
731 
732 			/* Read in segment. */
733 			if (fseeko(fp, (off_t)phdr[i].p_offset,
734 			    SEEK_SET) == -1) {
735 				free(phdr);
736 				return 1;
737 			}
738 			if (mread(fp, phdr[i].p_paddr, phdr[i].p_filesz) !=
739 			    phdr[i].p_filesz) {
740 				free(phdr);
741 				return 1;
742 			}
743 
744 			first = 0;
745 		}
746 
747 		if ((IS_TEXT(phdr[i]) && (flags & (LOAD_TEXT | COUNT_TEXT))) ||
748 		    (IS_DATA(phdr[i]) && (flags & (LOAD_DATA | COUNT_TEXT)))) {
749 			pos = phdr[i].p_paddr;
750 			if (minp > pos)
751 				minp = pos;
752 			pos += phdr[i].p_filesz;
753 			if (maxp < pos)
754 				maxp = pos;
755 		}
756 
757 		/* Zero out BSS. */
758 		if (IS_BSS(phdr[i]) && (flags & LOAD_BSS)) {
759 			mbzero((phdr[i].p_paddr + phdr[i].p_filesz),
760 			    phdr[i].p_memsz - phdr[i].p_filesz);
761 		}
762 		if (IS_BSS(phdr[i]) && (flags & (LOAD_BSS|COUNT_BSS))) {
763 			pos += phdr[i].p_memsz - phdr[i].p_filesz;
764 			if (maxp < pos)
765 				maxp = pos;
766 		}
767 	}
768 	free(phdr);
769 
770 	/*
771 	 * Copy the ELF and section headers.
772 	 */
773 	elfp = maxp = roundup(maxp, sizeof(Elf64_Addr));
774 	if (flags & (LOAD_HDR | COUNT_HDR))
775 		maxp += sizeof(Elf64_Ehdr);
776 
777 	if (flags & (LOAD_SYM | COUNT_SYM)) {
778 		if (fseeko(fp, (off_t)elf->e_shoff, SEEK_SET) == -1)  {
779 			WARN(("lseek section headers"));
780 			return 1;
781 		}
782 		sz = elf->e_shnum * sizeof(Elf64_Shdr);
783 		shp = malloc(sz);
784 
785 		if (fread(shp, 1, sz, fp) != sz) {
786 			free(shp);
787 			return 1;
788 		}
789 
790 		shpp = maxp;
791 		maxp += roundup(sz, sizeof(Elf64_Addr));
792 
793 		size_t shstrsz = shp[elf->e_shstrndx].sh_size;
794 		char *shstr = malloc(shstrsz);
795 		if (fseeko(fp, (off_t)shp[elf->e_shstrndx].sh_offset,
796 		    SEEK_SET) == -1) {
797 			free(shstr);
798 			free(shp);
799 			return 1;
800 		}
801 		if (fread(shstr, 1, shstrsz, fp) != shstrsz) {
802 			free(shstr);
803 			free(shp);
804 			return 1;
805 		}
806 
807 		/*
808 		 * Now load the symbol sections themselves. Make sure the
809 		 * sections are aligned. Don't bother with string tables if
810 		 * there are no symbol sections.
811 		 */
812 		off = roundup((sizeof(Elf64_Ehdr) + sz), sizeof(Elf64_Addr));
813 
814 		for (havesyms = havelines = i = 0; i < elf->e_shnum; i++)
815 			if (shp[i].sh_type == SHT_SYMTAB)
816 				havesyms = 1;
817 
818 		for (first = 1, i = 0; i < elf->e_shnum; i++) {
819 			if (shp[i].sh_type == SHT_SYMTAB ||
820 			    shp[i].sh_type == SHT_STRTAB ||
821 			    !strcmp(shstr + shp[i].sh_name, ".debug_line") ||
822 			    !strcmp(shstr + shp[i].sh_name, ELF_CTF)) {
823 				if (havesyms && (flags & LOAD_SYM)) {
824 					if (fseeko(fp, (off_t)shp[i].sh_offset,
825 					    SEEK_SET) == -1) {
826 						free(shstr);
827 						free(shp);
828 						return 1;
829 					}
830 					if (mread(fp, maxp,
831 					    shp[i].sh_size) != shp[i].sh_size) {
832 						free(shstr);
833 						free(shp);
834 						return 1;
835 					}
836 				}
837 				maxp += roundup(shp[i].sh_size,
838 				    sizeof(Elf64_Addr));
839 				shp[i].sh_offset = off;
840 				shp[i].sh_flags |= SHF_ALLOC;
841 				off += roundup(shp[i].sh_size,
842 				    sizeof(Elf64_Addr));
843 				first = 0;
844 			}
845 		}
846 		if (flags & LOAD_SYM) {
847 			mbcopy(shp, shpp, sz);
848 		}
849 		free(shstr);
850 		free(shp);
851 	}
852 
853 	/*
854 	 * Frob the copied ELF header to give information relative
855 	 * to elfp.
856 	 */
857 	if (flags & LOAD_HDR) {
858 		elf->e_phoff = 0;
859 		elf->e_shoff = sizeof(Elf64_Ehdr);
860 		elf->e_phentsize = 0;
861 		elf->e_phnum = 0;
862 		mbcopy(elf, elfp, sizeof(*elf));
863 	}
864 
865 	marks[MARK_START] = LOADADDR(minp);
866 	marks[MARK_ENTRY] = LOADADDR(elf->e_entry);
867 	marks[MARK_NSYM] = 1;	/* XXX: Kernel needs >= 0 */
868 	marks[MARK_SYM] = LOADADDR(elfp);
869 	marks[MARK_END] = LOADADDR(maxp);
870 
871 	return 0;
872 }
873 
874 /*
875  * elf32_exec
876  *
877  * Load the kernel indicated by 'fd' into the guest physical memory
878  * space, at the addresses defined in the ELF header.
879  *
880  * This function is used for 32 bit kernels.
881  *
882  * Parameters:
883  *  fd: file descriptor of the kernel to load
884  *  elf: ELF header of the kernel
885  *  marks: array to store the offsets of various kernel structures
886  *      (start, bss, etc)
887  *  flags: flag value to indicate which section(s) to load (usually
888  *      LOAD_ALL)
889  *
890  * Return values:
891  *  0 if successful
892  *  1 if unsuccessful
893  */
894 static int
895 elf32_exec(FILE *fp, Elf32_Ehdr *elf, u_long *marks, int flags)
896 {
897 	Elf32_Shdr *shp;
898 	Elf32_Phdr *phdr;
899 	Elf32_Off off;
900 	int i;
901 	size_t sz;
902 	int first;
903 	int havesyms, havelines;
904 	paddr_t minp = ~0, maxp = 0, pos = 0;
905 	paddr_t offset = marks[MARK_START], shpp, elfp;
906 
907 	sz = elf->e_phnum * sizeof(Elf32_Phdr);
908 	phdr = malloc(sz);
909 
910 	if (fseeko(fp, (off_t)elf->e_phoff, SEEK_SET) == -1)  {
911 		free(phdr);
912 		return 1;
913 	}
914 
915 	if (fread(phdr, 1, sz, fp) != sz) {
916 		free(phdr);
917 		return 1;
918 	}
919 
920 	for (first = 1, i = 0; i < elf->e_phnum; i++) {
921 		if (phdr[i].p_type == PT_OPENBSD_RANDOMIZE) {
922 			int m;
923 
924 			/* Fill segment if asked for. */
925 			if (flags & LOAD_RANDOM) {
926 				for (pos = 0; pos < phdr[i].p_filesz;
927 				    pos += m) {
928 					m = phdr[i].p_filesz - pos;
929 					marc4random_buf(phdr[i].p_paddr + pos,
930 					    m);
931 				}
932 			}
933 			if (flags & (LOAD_RANDOM | COUNT_RANDOM)) {
934 				marks[MARK_RANDOM] = LOADADDR(phdr[i].p_paddr);
935 				marks[MARK_ERANDOM] =
936 				    marks[MARK_RANDOM] + phdr[i].p_filesz;
937 			}
938 			continue;
939 		}
940 
941 		if (phdr[i].p_type != PT_LOAD ||
942 		    (phdr[i].p_flags & (PF_W|PF_R|PF_X)) == 0)
943 			continue;
944 
945 #define IS_TEXT(p)	(p.p_flags & PF_X)
946 #define IS_DATA(p)	((p.p_flags & PF_X) == 0)
947 #define IS_BSS(p)	(p.p_filesz < p.p_memsz)
948 		/*
949 		 * XXX: Assume first address is lowest
950 		 */
951 		if ((IS_TEXT(phdr[i]) && (flags & LOAD_TEXT)) ||
952 		    (IS_DATA(phdr[i]) && (flags & LOAD_DATA))) {
953 
954 			/* Read in segment. */
955 			if (fseeko(fp, (off_t)phdr[i].p_offset,
956 			    SEEK_SET) == -1) {
957 				free(phdr);
958 				return 1;
959 			}
960 			if (mread(fp, phdr[i].p_paddr, phdr[i].p_filesz) !=
961 			    phdr[i].p_filesz) {
962 				free(phdr);
963 				return 1;
964 			}
965 
966 			first = 0;
967 		}
968 
969 		if ((IS_TEXT(phdr[i]) && (flags & (LOAD_TEXT | COUNT_TEXT))) ||
970 		    (IS_DATA(phdr[i]) && (flags & (LOAD_DATA | COUNT_TEXT)))) {
971 			pos = phdr[i].p_paddr;
972 			if (minp > pos)
973 				minp = pos;
974 			pos += phdr[i].p_filesz;
975 			if (maxp < pos)
976 				maxp = pos;
977 		}
978 
979 		/* Zero out BSS. */
980 		if (IS_BSS(phdr[i]) && (flags & LOAD_BSS)) {
981 			mbzero((phdr[i].p_paddr + phdr[i].p_filesz),
982 			    phdr[i].p_memsz - phdr[i].p_filesz);
983 		}
984 		if (IS_BSS(phdr[i]) && (flags & (LOAD_BSS|COUNT_BSS))) {
985 			pos += phdr[i].p_memsz - phdr[i].p_filesz;
986 			if (maxp < pos)
987 				maxp = pos;
988 		}
989 	}
990 	free(phdr);
991 
992 	/*
993 	 * Copy the ELF and section headers.
994 	 */
995 	elfp = maxp = roundup(maxp, sizeof(Elf32_Addr));
996 	if (flags & (LOAD_HDR | COUNT_HDR))
997 		maxp += sizeof(Elf32_Ehdr);
998 
999 	if (flags & (LOAD_SYM | COUNT_SYM)) {
1000 		if (fseeko(fp, (off_t)elf->e_shoff, SEEK_SET) == -1)  {
1001 			WARN(("lseek section headers"));
1002 			return 1;
1003 		}
1004 		sz = elf->e_shnum * sizeof(Elf32_Shdr);
1005 		shp = malloc(sz);
1006 
1007 		if (fread(shp, 1, sz, fp) != sz) {
1008 			free(shp);
1009 			return 1;
1010 		}
1011 
1012 		shpp = maxp;
1013 		maxp += roundup(sz, sizeof(Elf32_Addr));
1014 
1015 		size_t shstrsz = shp[elf->e_shstrndx].sh_size;
1016 		char *shstr = malloc(shstrsz);
1017 		if (fseeko(fp, (off_t)shp[elf->e_shstrndx].sh_offset,
1018 		    SEEK_SET) == -1) {
1019 			free(shstr);
1020 			free(shp);
1021 			return 1;
1022 		}
1023 		if (fread(shstr, 1, shstrsz, fp) != shstrsz) {
1024 			free(shstr);
1025 			free(shp);
1026 			return 1;
1027 		}
1028 
1029 		/*
1030 		 * Now load the symbol sections themselves. Make sure the
1031 		 * sections are aligned. Don't bother with string tables if
1032 		 * there are no symbol sections.
1033 		 */
1034 		off = roundup((sizeof(Elf32_Ehdr) + sz), sizeof(Elf32_Addr));
1035 
1036 		for (havesyms = havelines = i = 0; i < elf->e_shnum; i++)
1037 			if (shp[i].sh_type == SHT_SYMTAB)
1038 				havesyms = 1;
1039 
1040 		for (first = 1, i = 0; i < elf->e_shnum; i++) {
1041 			if (shp[i].sh_type == SHT_SYMTAB ||
1042 			    shp[i].sh_type == SHT_STRTAB ||
1043 			    !strcmp(shstr + shp[i].sh_name, ".debug_line")) {
1044 				if (havesyms && (flags & LOAD_SYM)) {
1045 					if (fseeko(fp, (off_t)shp[i].sh_offset,
1046 					    SEEK_SET) == -1) {
1047 						free(shstr);
1048 						free(shp);
1049 						return 1;
1050 					}
1051 					if (mread(fp, maxp,
1052 					    shp[i].sh_size) != shp[i].sh_size) {
1053 						free(shstr);
1054 						free(shp);
1055 						return 1;
1056 					}
1057 				}
1058 				maxp += roundup(shp[i].sh_size,
1059 				    sizeof(Elf32_Addr));
1060 				shp[i].sh_offset = off;
1061 				shp[i].sh_flags |= SHF_ALLOC;
1062 				off += roundup(shp[i].sh_size,
1063 				    sizeof(Elf32_Addr));
1064 				first = 0;
1065 			}
1066 		}
1067 		if (flags & LOAD_SYM) {
1068 			mbcopy(shp, shpp, sz);
1069 		}
1070 		free(shstr);
1071 		free(shp);
1072 	}
1073 
1074 	/*
1075 	 * Frob the copied ELF header to give information relative
1076 	 * to elfp.
1077 	 */
1078 	if (flags & LOAD_HDR) {
1079 		elf->e_phoff = 0;
1080 		elf->e_shoff = sizeof(Elf32_Ehdr);
1081 		elf->e_phentsize = 0;
1082 		elf->e_phnum = 0;
1083 		mbcopy(elf, elfp, sizeof(*elf));
1084 	}
1085 
1086 	marks[MARK_START] = LOADADDR(minp);
1087 	marks[MARK_ENTRY] = LOADADDR(elf->e_entry);
1088 	marks[MARK_NSYM] = 1;	/* XXX: Kernel needs >= 0 */
1089 	marks[MARK_SYM] = LOADADDR(elfp);
1090 	marks[MARK_END] = LOADADDR(maxp);
1091 
1092 	return 0;
1093 }
1094