xref: /openbsd-src/usr.sbin/vmd/loadfile_elf.c (revision f9eeb76b8f9a5d12163f26a04d6f328681b924e0)
1 /* $NetBSD: loadfile.c,v 1.10 2000/12/03 02:53:04 tsutsui Exp $ */
2 /* $OpenBSD: loadfile_elf.c,v 1.26 2017/03/27 00:28:04 deraadt Exp $ */
3 
4 /*-
5  * Copyright (c) 1997 The NetBSD Foundation, Inc.
6  * All rights reserved.
7  *
8  * This code is derived from software contributed to The NetBSD Foundation
9  * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
10  * NASA Ames Research Center and by Christos Zoulas.
11  *
12  * Redistribution and use in source and binary forms, with or without
13  * modification, are permitted provided that the following conditions
14  * are met:
15  * 1. Redistributions of source code must retain the above copyright
16  *    notice, this list of conditions and the following disclaimer.
17  * 2. Redistributions in binary form must reproduce the above copyright
18  *    notice, this list of conditions and the following disclaimer in the
19  *    documentation and/or other materials provided with the distribution.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
23  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
24  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
25  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31  * POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 /*
35  * Copyright (c) 1992, 1993
36  *	The Regents of the University of California.  All rights reserved.
37  *
38  * This code is derived from software contributed to Berkeley by
39  * Ralph Campbell.
40  *
41  * Redistribution and use in source and binary forms, with or without
42  * modification, are permitted provided that the following conditions
43  * are met:
44  * 1. Redistributions of source code must retain the above copyright
45  *    notice, this list of conditions and the following disclaimer.
46  * 2. Redistributions in binary form must reproduce the above copyright
47  *    notice, this list of conditions and the following disclaimer in the
48  *    documentation and/or other materials provided with the distribution.
49  * 3. Neither the name of the University nor the names of its contributors
50  *    may be used to endorse or promote products derived from this software
51  *    without specific prior written permission.
52  *
53  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63  * SUCH DAMAGE.
64  *
65  *	@(#)boot.c	8.1 (Berkeley) 6/10/93
66  */
67 
68 /*
69  * Copyright (c) 2015 Mike Larkin <mlarkin@openbsd.org>
70  *
71  * Permission to use, copy, modify, and distribute this software for any
72  * purpose with or without fee is hereby granted, provided that the above
73  * copyright notice and this permission notice appear in all copies.
74  *
75  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
76  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
77  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
78  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
79  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
80  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
81  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
82  */
83 
84 #include <sys/param.h>	/* PAGE_SIZE PAGE_MASK roundup */
85 #include <sys/ioctl.h>
86 #include <sys/reboot.h>
87 #include <sys/exec.h>
88 #include <sys/exec_elf.h>
89 
90 #include <stdio.h>
91 #include <string.h>
92 #include <errno.h>
93 #include <stdlib.h>
94 #include <unistd.h>
95 #include <fcntl.h>
96 #include <err.h>
97 #include <errno.h>
98 #include <stddef.h>
99 
100 #include <machine/vmmvar.h>
101 #include <machine/biosvar.h>
102 #include <machine/segments.h>
103 #include <machine/pte.h>
104 
105 #include "loadfile.h"
106 #include "vmd.h"
107 
108 union {
109 	Elf32_Ehdr elf32;
110 	Elf64_Ehdr elf64;
111 } hdr;
112 
113 #ifdef __i386__
114 typedef uint32_t pt_entry_t;
115 static void setsegment(struct segment_descriptor *, uint32_t,
116     size_t, int, int, int, int);
117 #else
118 static void setsegment(struct mem_segment_descriptor *, uint32_t,
119     size_t, int, int, int, int);
120 #endif
121 static int elf32_exec(FILE *, Elf32_Ehdr *, u_long *, int);
122 static int elf64_exec(FILE *, Elf64_Ehdr *, u_long *, int);
123 static size_t create_bios_memmap(struct vm_create_params *, bios_memmap_t *);
124 static uint32_t push_bootargs(bios_memmap_t *, size_t);
125 static size_t push_stack(uint32_t, uint32_t, uint32_t, uint32_t);
126 static void push_gdt(void);
127 static void push_pt(void);
128 static void marc4random_buf(paddr_t, int);
129 static void mbzero(paddr_t, int);
130 static void mbcopy(void *, paddr_t, int);
131 
132 extern char *__progname;
133 extern int vm_id;
134 
135 /*
136  * setsegment
137  *
138  * Initializes a segment selector entry with the provided descriptor.
139  * For the purposes of the bootloader mimiced by vmd(8), we only need
140  * memory-type segment descriptor support.
141  *
142  * This function was copied from machdep.c
143  *
144  * Parameters:
145  *  sd: Address of the entry to initialize
146  *  base: base of the segment
147  *  limit: limit of the segment
148  *  type: type of the segment
149  *  dpl: privilege level of the egment
150  *  def32: default 16/32 bit size of the segment
151  *  gran: granularity of the segment (byte/page)
152  */
153 #ifdef __i386__
154 static void
155 setsegment(struct segment_descriptor *sd, uint32_t base, size_t limit,
156     int type, int dpl, int def32, int gran)
157 #else
158 static void
159 setsegment(struct mem_segment_descriptor *sd, uint32_t base, size_t limit,
160     int type, int dpl, int def32, int gran)
161 #endif
162 {
163 	sd->sd_lolimit = (int)limit;
164 	sd->sd_lobase = (int)base;
165 	sd->sd_type = type;
166 	sd->sd_dpl = dpl;
167 	sd->sd_p = 1;
168 	sd->sd_hilimit = (int)limit >> 16;
169 #ifdef __i386__
170 	sd->sd_xx = 0;
171 #else
172 	sd->sd_avl = 0;
173 	sd->sd_long = 0;
174 #endif
175 	sd->sd_def32 = def32;
176 	sd->sd_gran = gran;
177 	sd->sd_hibase = (int)base >> 24;
178 }
179 
180 /*
181  * push_gdt
182  *
183  * Allocates and populates a page in the guest phys memory space to hold
184  * the boot-time GDT. Since vmd(8) is acting as the bootloader, we need to
185  * create the same GDT that a real bootloader would have created.
186  * This is loaded into the guest phys RAM space at address GDT_PAGE.
187  */
188 static void
189 push_gdt(void)
190 {
191 	uint8_t gdtpage[PAGE_SIZE];
192 #ifdef __i386__
193 	struct segment_descriptor *sd;
194 #else
195 	struct mem_segment_descriptor *sd;
196 #endif
197 
198 	memset(&gdtpage, 0, sizeof(gdtpage));
199 
200 #ifdef __i386__
201 	sd = (struct segment_descriptor *)&gdtpage;
202 #else
203 	sd = (struct mem_segment_descriptor *)&gdtpage;
204 #endif
205 
206 	/*
207 	 * Create three segment descriptors:
208 	 *
209 	 * GDT[0] : null desriptor. "Created" via memset above.
210 	 * GDT[1] (selector @ 0x8): Executable segment, for CS
211 	 * GDT[2] (selector @ 0x10): RW Data segment, for DS/ES/SS
212 	 */
213 	setsegment(&sd[1], 0, 0xffffffff, SDT_MEMERA, SEL_KPL, 1, 1);
214 	setsegment(&sd[2], 0, 0xffffffff, SDT_MEMRWA, SEL_KPL, 1, 1);
215 
216 	write_mem(GDT_PAGE, gdtpage, PAGE_SIZE);
217 }
218 
219 /*
220  * push_pt
221  *
222  * Create an identity-mapped page directory hierarchy mapping the first
223  * 1GB of physical memory. This is used during bootstrapping VMs on
224  * CPUs without unrestricted guest capability.
225  */
226 static void
227 push_pt(void)
228 {
229 	pt_entry_t ptes[NPTE_PG];
230 	uint64_t i;
231 
232 #ifdef __i386__
233 	memset(ptes, 0, sizeof(ptes));
234 	for (i = 0 ; i < NPTE_PG; i++) {
235 		ptes[i] = PG_V | PG_PS | (NBPD * i);
236 	}
237 	write_mem(PML4_PAGE, ptes, PAGE_SIZE);
238 #else
239 	/* PML3 [0] - first 1GB */
240 	memset(ptes, 0, sizeof(ptes));
241 	ptes[0] = PG_V | PML3_PAGE;
242 	write_mem(PML4_PAGE, ptes, PAGE_SIZE);
243 
244 	/* PML3 [0] - first 1GB */
245 	memset(ptes, 0, sizeof(ptes));
246 	ptes[0] = PG_V | PG_RW | PG_u | PML2_PAGE;
247 	write_mem(PML3_PAGE, ptes, PAGE_SIZE);
248 
249 	/* PML2 [0..511] - first 1GB (in 2MB pages) */
250 	memset(ptes, 0, sizeof(ptes));
251 	for (i = 0 ; i < NPTE_PG; i++) {
252 		ptes[i] = PG_V | PG_RW | PG_u | PG_PS | (NBPD_L2 * i);
253 	}
254 	write_mem(PML2_PAGE, ptes, PAGE_SIZE);
255 #endif
256 }
257 
258 /*
259  * loadfile_elf
260  *
261  * Loads an ELF kernel to it's defined load address in the guest VM.
262  * The kernel is loaded to its defined start point as set in the ELF header.
263  *
264  * Parameters:
265  *  fp: file of a kernel file to load
266  *  vcp: the VM create parameters, holding the exact memory map
267  *  (out) vrs: register state to set on init for this kernel
268  *  bootdev: the optional non-default boot device
269  *  howto: optionel boot flags for the kernel
270  *
271  * Return values:
272  *  0 if successful
273  *  various error codes returned from read(2) or loadelf functions
274  */
275 int
276 loadfile_elf(FILE *fp, struct vm_create_params *vcp,
277     struct vcpu_reg_state *vrs, uint32_t bootdev, uint32_t howto)
278 {
279 	int r;
280 	uint32_t bootargsz;
281 	size_t n, stacksize;
282 	u_long marks[MARK_MAX];
283 	bios_memmap_t memmap[VMM_MAX_MEM_RANGES + 1];
284 
285 	if ((r = fread(&hdr, 1, sizeof(hdr), fp)) != sizeof(hdr))
286 		return 1;
287 
288 	memset(&marks, 0, sizeof(marks));
289 	if (memcmp(hdr.elf32.e_ident, ELFMAG, SELFMAG) == 0 &&
290 	    hdr.elf32.e_ident[EI_CLASS] == ELFCLASS32) {
291 		r = elf32_exec(fp, &hdr.elf32, marks, LOAD_ALL);
292 	} else if (memcmp(hdr.elf64.e_ident, ELFMAG, SELFMAG) == 0 &&
293 	    hdr.elf64.e_ident[EI_CLASS] == ELFCLASS64) {
294 		r = elf64_exec(fp, &hdr.elf64, marks, LOAD_ALL);
295 	} else
296 		errno = ENOEXEC;
297 
298 	if (r)
299 		return (r);
300 
301 	push_gdt();
302 	push_pt();
303 	n = create_bios_memmap(vcp, memmap);
304 	bootargsz = push_bootargs(memmap, n);
305 	stacksize = push_stack(bootargsz, marks[MARK_END], bootdev, howto);
306 
307 #ifdef __i386__
308 	vrs->vrs_gprs[VCPU_REGS_EIP] = (uint32_t)marks[MARK_ENTRY];
309 	vrs->vrs_gprs[VCPU_REGS_ESP] = (uint32_t)(STACK_PAGE + PAGE_SIZE) - stacksize;
310 #else
311 	vrs->vrs_gprs[VCPU_REGS_RIP] = (uint64_t)marks[MARK_ENTRY];
312 	vrs->vrs_gprs[VCPU_REGS_RSP] = (uint64_t)(STACK_PAGE + PAGE_SIZE) - stacksize;
313 #endif
314 	vrs->vrs_gdtr.vsi_base = GDT_PAGE;
315 
316 	log_debug("%s: loaded ELF kernel", __func__);
317 
318 	return (0);
319 }
320 
321 /*
322  * create_bios_memmap
323  *
324  * Construct a memory map as returned by the BIOS INT 0x15, e820 routine.
325  *
326  * Parameters:
327  *  vcp: the VM create parameters, containing the memory map passed to vmm(4)
328  *   memmap (out): the BIOS memory map
329  *
330  * Return values:
331  * Number of bios_memmap_t entries, including the terminating nul-entry.
332  */
333 static size_t
334 create_bios_memmap(struct vm_create_params *vcp, bios_memmap_t *memmap)
335 {
336 	size_t i, n = 0, sz;
337 	paddr_t gpa;
338 	struct vm_mem_range *vmr;
339 
340 	for (i = 0; i < vcp->vcp_nmemranges; i++) {
341 		vmr = &vcp->vcp_memranges[i];
342 		gpa = vmr->vmr_gpa;
343 		sz = vmr->vmr_size;
344 
345 		/*
346 		 * Make sure that we do not mark the ROM/video RAM area in the
347 		 * low memory as physcal memory available to the kernel.
348 		 */
349 		if (gpa < 0x100000 && gpa + sz > LOWMEM_KB * 1024) {
350 			if (gpa >= LOWMEM_KB * 1024)
351 				sz = 0;
352 			else
353 				sz = LOWMEM_KB * 1024 - gpa;
354 		}
355 
356 		if (sz != 0) {
357 			memmap[n].addr = gpa;
358 			memmap[n].size = sz;
359 			memmap[n].type = 0x1;	/* Type 1 : Normal memory */
360 			n++;
361 		}
362 	}
363 
364 	/* Null mem map entry to denote the end of the ranges */
365 	memmap[n].addr = 0x0;
366 	memmap[n].size = 0x0;
367 	memmap[n].type = 0x0;
368 	n++;
369 
370 	return (n);
371 }
372 
373 /*
374  * push_bootargs
375  *
376  * Creates the boot arguments page in the guest address space.
377  * Since vmd(8) is acting as the bootloader, we need to create the same boot
378  * arguments page that a real bootloader would have created. This is loaded
379  * into the guest phys RAM space at address BOOTARGS_PAGE.
380  *
381  * Parameters:
382  *  memmap: the BIOS memory map
383  *  n: number of entries in memmap
384  *
385  * Return values:
386  *  The size of the bootargs
387  */
388 static uint32_t
389 push_bootargs(bios_memmap_t *memmap, size_t n)
390 {
391 	uint32_t memmap_sz, consdev_sz, i;
392 	bios_consdev_t consdev;
393 	uint32_t ba[1024];
394 
395 	memmap_sz = 3 * sizeof(int) + n * sizeof(bios_memmap_t);
396 	ba[0] = 0x0;    /* memory map */
397 	ba[1] = memmap_sz;
398 	ba[2] = memmap_sz;	/* next */
399 	memcpy(&ba[3], memmap, n * sizeof(bios_memmap_t));
400 	i = memmap_sz / sizeof(int);
401 
402 	/* Serial console device, COM1 @ 0x3f8 */
403 	consdev.consdev = makedev(8, 0);	/* com1 @ 0x3f8 */
404 	consdev.conspeed = 9600;
405 	consdev.consaddr = 0x3f8;
406 	consdev.consfreq = 0;
407 
408 	consdev_sz = 3 * sizeof(int) + sizeof(bios_consdev_t);
409 	ba[i] = 0x5;   /* consdev */
410 	ba[i + 1] = consdev_sz;
411 	ba[i + 2] = consdev_sz;
412 	memcpy(&ba[i + 3], &consdev, sizeof(bios_consdev_t));
413 	i = i + 3 + (sizeof(bios_consdev_t) / 4);
414 
415 	ba[i] = 0xFFFFFFFF; /* BOOTARG_END */
416 
417 	write_mem(BOOTARGS_PAGE, ba, PAGE_SIZE);
418 
419 	return (memmap_sz + consdev_sz);
420 }
421 
422 /*
423  * push_stack
424  *
425  * Creates the boot stack page in the guest address space. When using a real
426  * bootloader, the stack will be prepared using the following format before
427  * transitioning to kernel start, so vmd(8) needs to mimic the same stack
428  * layout. The stack content is pushed to the guest phys RAM at address
429  * STACK_PAGE. The bootloader operates in 32 bit mode; each stack entry is
430  * 4 bytes.
431  *
432  * Stack Layout: (TOS == Top Of Stack)
433  *  TOS		location of boot arguments page
434  *  TOS - 0x4	size of the content in the boot arguments page
435  *  TOS - 0x8	size of low memory (biosbasemem: kernel uses BIOS map only if 0)
436  *  TOS - 0xc	size of high memory (biosextmem, not used by kernel at all)
437  *  TOS - 0x10	kernel 'end' symbol value
438  *  TOS - 0x14	version of bootarg API
439  *
440  * Parameters:
441  *  bootargsz: size of boot arguments
442  *  end: kernel 'end' symbol value
443  *  bootdev: the optional non-default boot device
444  *  howto: optionel boot flags for the kernel
445  *
446  * Return values:
447  *  size of the stack
448  */
449 static size_t
450 push_stack(uint32_t bootargsz, uint32_t end, uint32_t bootdev, uint32_t howto)
451 {
452 	uint32_t stack[1024];
453 	uint16_t loc;
454 
455 	memset(&stack, 0, sizeof(stack));
456 	loc = 1024;
457 
458 	if (bootdev == 0)
459 		bootdev = MAKEBOOTDEV(0x4, 0, 0, 0, 0); /* bootdev: sd0a */
460 
461 	stack[--loc] = BOOTARGS_PAGE;
462 	stack[--loc] = bootargsz;
463 	stack[--loc] = 0; /* biosbasemem */
464 	stack[--loc] = 0; /* biosextmem */
465 	stack[--loc] = end;
466 	stack[--loc] = 0x0e;
467 	stack[--loc] = bootdev;
468 	stack[--loc] = howto;
469 
470 	write_mem(STACK_PAGE, &stack, PAGE_SIZE);
471 
472 	return (1024 - (loc - 1)) * sizeof(uint32_t);
473 }
474 
475 /*
476  * mread
477  *
478  * Reads 'sz' bytes from the file whose descriptor is provided in 'fd'
479  * into the guest address space at paddr 'addr'.
480  *
481  * Parameters:
482  *  fd: file descriptor of the kernel image file to read from.
483  *  addr: guest paddr_t to load to
484  *  sz: number of bytes to load
485  *
486  * Return values:
487  *  returns 'sz' if successful, or 0 otherwise.
488  */
489 size_t
490 mread(FILE *fp, paddr_t addr, size_t sz)
491 {
492 	size_t ct;
493 	size_t i, rd, osz;
494 	char buf[PAGE_SIZE];
495 
496 	/*
497 	 * break up the 'sz' bytes into PAGE_SIZE chunks for use with
498 	 * write_mem
499 	 */
500 	ct = 0;
501 	rd = 0;
502 	osz = sz;
503 	if ((addr & PAGE_MASK) != 0) {
504 		memset(buf, 0, sizeof(buf));
505 		if (sz > PAGE_SIZE)
506 			ct = PAGE_SIZE - (addr & PAGE_MASK);
507 		else
508 			ct = sz;
509 
510 		if (fread(buf, 1, ct, fp) != ct) {
511 			log_warn("%s: error %d in mread", __progname, errno);
512 			return (0);
513 		}
514 		rd += ct;
515 
516 		if (write_mem(addr, buf, ct))
517 			return (0);
518 
519 		addr += ct;
520 	}
521 
522 	sz = sz - ct;
523 
524 	if (sz == 0)
525 		return (osz);
526 
527 	for (i = 0; i < sz; i += PAGE_SIZE, addr += PAGE_SIZE) {
528 		memset(buf, 0, sizeof(buf));
529 		if (i + PAGE_SIZE > sz)
530 			ct = sz - i;
531 		else
532 			ct = PAGE_SIZE;
533 
534 		if (fread(buf, 1, ct, fp) != ct) {
535 			log_warn("%s: error %d in mread", __progname, errno);
536 			return (0);
537 		}
538 		rd += ct;
539 
540 		if (write_mem(addr, buf, ct))
541 			return (0);
542 	}
543 
544 	return (osz);
545 }
546 
547 /*
548  * marc4random_buf
549  *
550  * load 'sz' bytes of random data into the guest address space at paddr
551  * 'addr'.
552  *
553  * Parameters:
554  *  addr: guest paddr_t to load random bytes into
555  *  sz: number of random bytes to load
556  *
557  * Return values:
558  *  nothing
559  */
560 static void
561 marc4random_buf(paddr_t addr, int sz)
562 {
563 	int i, ct;
564 	char buf[PAGE_SIZE];
565 
566 	/*
567 	 * break up the 'sz' bytes into PAGE_SIZE chunks for use with
568 	 * write_mem
569 	 */
570 	ct = 0;
571 	if (addr % PAGE_SIZE != 0) {
572 		memset(buf, 0, sizeof(buf));
573 		ct = PAGE_SIZE - (addr % PAGE_SIZE);
574 
575 		arc4random_buf(buf, ct);
576 
577 		if (write_mem(addr, buf, ct))
578 			return;
579 
580 		addr += ct;
581 	}
582 
583 	for (i = 0; i < sz; i+= PAGE_SIZE, addr += PAGE_SIZE) {
584 		memset(buf, 0, sizeof(buf));
585 		if (i + PAGE_SIZE > sz)
586 			ct = sz - i;
587 		else
588 			ct = PAGE_SIZE;
589 
590 		arc4random_buf(buf, ct);
591 
592 		if (write_mem(addr, buf, ct))
593 			return;
594 	}
595 }
596 
597 /*
598  * mbzero
599  *
600  * load 'sz' bytes of zeros into the guest address space at paddr
601  * 'addr'.
602  *
603  * Parameters:
604  *  addr: guest paddr_t to zero
605  *  sz: number of zero bytes to store
606  *
607  * Return values:
608  *  nothing
609  */
610 static void
611 mbzero(paddr_t addr, int sz)
612 {
613 	int i, ct;
614 	char buf[PAGE_SIZE];
615 
616 	/*
617 	 * break up the 'sz' bytes into PAGE_SIZE chunks for use with
618 	 * write_mem
619 	 */
620 	ct = 0;
621 	memset(buf, 0, sizeof(buf));
622 	if (addr % PAGE_SIZE != 0) {
623 		ct = PAGE_SIZE - (addr % PAGE_SIZE);
624 
625 		if (write_mem(addr, buf, ct))
626 			return;
627 
628 		addr += ct;
629 	}
630 
631 	for (i = 0; i < sz; i+= PAGE_SIZE, addr += PAGE_SIZE) {
632 		if (i + PAGE_SIZE > sz)
633 			ct = sz - i;
634 		else
635 			ct = PAGE_SIZE;
636 
637 		if (write_mem(addr, buf, ct))
638 			return;
639 	}
640 }
641 
642 /*
643  * mbcopy
644  *
645  * copies 'sz' bytes from buffer 'src' to guest paddr 'dst'.
646  *
647  * Parameters:
648  *  src: source buffer to copy from
649  *  dst: destination guest paddr_t to copy to
650  *  sz: number of bytes to copy
651  *
652  * Return values:
653  *  nothing
654  */
655 static void
656 mbcopy(void *src, paddr_t dst, int sz)
657 {
658 	write_mem(dst, src, sz);
659 }
660 
661 /*
662  * elf64_exec
663  *
664  * Load the kernel indicated by 'fd' into the guest physical memory
665  * space, at the addresses defined in the ELF header.
666  *
667  * This function is used for 64 bit kernels.
668  *
669  * Parameters:
670  *  fd: file descriptor of the kernel to load
671  *  elf: ELF header of the kernel
672  *  marks: array to store the offsets of various kernel structures
673  *      (start, bss, etc)
674  *  flags: flag value to indicate which section(s) to load (usually
675  *      LOAD_ALL)
676  *
677  * Return values:
678  *  0 if successful
679  *  1 if unsuccessful
680  */
681 static int
682 elf64_exec(FILE *fp, Elf64_Ehdr *elf, u_long *marks, int flags)
683 {
684 	Elf64_Shdr *shp;
685 	Elf64_Phdr *phdr;
686 	Elf64_Off off;
687 	int i;
688 	size_t sz;
689 	int first;
690 	int havesyms, havelines;
691 	paddr_t minp = ~0, maxp = 0, pos = 0;
692 	paddr_t offset = marks[MARK_START], shpp, elfp;
693 
694 	sz = elf->e_phnum * sizeof(Elf64_Phdr);
695 	phdr = malloc(sz);
696 
697 	if (fseeko(fp, (off_t)elf->e_phoff, SEEK_SET) == -1)  {
698 		free(phdr);
699 		return 1;
700 	}
701 
702 	if (fread(phdr, 1, sz, fp) != sz) {
703 		free(phdr);
704 		return 1;
705 	}
706 
707 	for (first = 1, i = 0; i < elf->e_phnum; i++) {
708 		if (phdr[i].p_type == PT_OPENBSD_RANDOMIZE) {
709 			int m;
710 
711 			/* Fill segment if asked for. */
712 			if (flags & LOAD_RANDOM) {
713 				for (pos = 0; pos < phdr[i].p_filesz;
714 				    pos += m) {
715 					m = phdr[i].p_filesz - pos;
716 					marc4random_buf(phdr[i].p_paddr + pos,
717 					    m);
718 				}
719 			}
720 			if (flags & (LOAD_RANDOM | COUNT_RANDOM)) {
721 				marks[MARK_RANDOM] = LOADADDR(phdr[i].p_paddr);
722 				marks[MARK_ERANDOM] =
723 				    marks[MARK_RANDOM] + phdr[i].p_filesz;
724 			}
725 			continue;
726 		}
727 
728 		if (phdr[i].p_type != PT_LOAD ||
729 		    (phdr[i].p_flags & (PF_W|PF_R|PF_X)) == 0)
730 			continue;
731 
732 #define IS_TEXT(p)	(p.p_flags & PF_X)
733 #define IS_DATA(p)	((p.p_flags & PF_X) == 0)
734 #define IS_BSS(p)	(p.p_filesz < p.p_memsz)
735 		/*
736 		 * XXX: Assume first address is lowest
737 		 */
738 		if ((IS_TEXT(phdr[i]) && (flags & LOAD_TEXT)) ||
739 		    (IS_DATA(phdr[i]) && (flags & LOAD_DATA))) {
740 
741 			/* Read in segment. */
742 			if (fseeko(fp, (off_t)phdr[i].p_offset,
743 			    SEEK_SET) == -1) {
744 				free(phdr);
745 				return 1;
746 			}
747 			if (mread(fp, phdr[i].p_paddr, phdr[i].p_filesz) !=
748 			    phdr[i].p_filesz) {
749 				free(phdr);
750 				return 1;
751 			}
752 
753 			first = 0;
754 		}
755 
756 		if ((IS_TEXT(phdr[i]) && (flags & (LOAD_TEXT | COUNT_TEXT))) ||
757 		    (IS_DATA(phdr[i]) && (flags & (LOAD_DATA | COUNT_TEXT)))) {
758 			pos = phdr[i].p_paddr;
759 			if (minp > pos)
760 				minp = pos;
761 			pos += phdr[i].p_filesz;
762 			if (maxp < pos)
763 				maxp = pos;
764 		}
765 
766 		/* Zero out BSS. */
767 		if (IS_BSS(phdr[i]) && (flags & LOAD_BSS)) {
768 			mbzero((phdr[i].p_paddr + phdr[i].p_filesz),
769 			    phdr[i].p_memsz - phdr[i].p_filesz);
770 		}
771 		if (IS_BSS(phdr[i]) && (flags & (LOAD_BSS|COUNT_BSS))) {
772 			pos += phdr[i].p_memsz - phdr[i].p_filesz;
773 			if (maxp < pos)
774 				maxp = pos;
775 		}
776 	}
777 	free(phdr);
778 
779 	/*
780 	 * Copy the ELF and section headers.
781 	 */
782 	elfp = maxp = roundup(maxp, sizeof(Elf64_Addr));
783 	if (flags & (LOAD_HDR | COUNT_HDR))
784 		maxp += sizeof(Elf64_Ehdr);
785 
786 	if (flags & (LOAD_SYM | COUNT_SYM)) {
787 		if (fseeko(fp, (off_t)elf->e_shoff, SEEK_SET) == -1)  {
788 			WARN(("lseek section headers"));
789 			return 1;
790 		}
791 		sz = elf->e_shnum * sizeof(Elf64_Shdr);
792 		shp = malloc(sz);
793 
794 		if (fread(shp, 1, sz, fp) != sz) {
795 			free(shp);
796 			return 1;
797 		}
798 
799 		shpp = maxp;
800 		maxp += roundup(sz, sizeof(Elf64_Addr));
801 
802 		size_t shstrsz = shp[elf->e_shstrndx].sh_size;
803 		char *shstr = malloc(shstrsz);
804 		if (fseeko(fp, (off_t)shp[elf->e_shstrndx].sh_offset,
805 		    SEEK_SET) == -1) {
806 			free(shstr);
807 			free(shp);
808 			return 1;
809 		}
810 		if (fread(shstr, 1, shstrsz, fp) != shstrsz) {
811 			free(shstr);
812 			free(shp);
813 			return 1;
814 		}
815 
816 		/*
817 		 * Now load the symbol sections themselves. Make sure the
818 		 * sections are aligned. Don't bother with string tables if
819 		 * there are no symbol sections.
820 		 */
821 		off = roundup((sizeof(Elf64_Ehdr) + sz), sizeof(Elf64_Addr));
822 
823 		for (havesyms = havelines = i = 0; i < elf->e_shnum; i++)
824 			if (shp[i].sh_type == SHT_SYMTAB)
825 				havesyms = 1;
826 
827 		for (first = 1, i = 0; i < elf->e_shnum; i++) {
828 			if (shp[i].sh_type == SHT_SYMTAB ||
829 			    shp[i].sh_type == SHT_STRTAB ||
830 			    !strcmp(shstr + shp[i].sh_name, ".debug_line") ||
831 			    !strcmp(shstr + shp[i].sh_name, ELF_CTF)) {
832 				if (havesyms && (flags & LOAD_SYM)) {
833 					if (fseeko(fp, (off_t)shp[i].sh_offset,
834 					    SEEK_SET) == -1) {
835 						free(shstr);
836 						free(shp);
837 						return 1;
838 					}
839 					if (mread(fp, maxp,
840 					    shp[i].sh_size) != shp[i].sh_size) {
841 						free(shstr);
842 						free(shp);
843 						return 1;
844 					}
845 				}
846 				maxp += roundup(shp[i].sh_size,
847 				    sizeof(Elf64_Addr));
848 				shp[i].sh_offset = off;
849 				shp[i].sh_flags |= SHF_ALLOC;
850 				off += roundup(shp[i].sh_size,
851 				    sizeof(Elf64_Addr));
852 				first = 0;
853 			}
854 		}
855 		if (flags & LOAD_SYM) {
856 			mbcopy(shp, shpp, sz);
857 		}
858 		free(shstr);
859 		free(shp);
860 	}
861 
862 	/*
863 	 * Frob the copied ELF header to give information relative
864 	 * to elfp.
865 	 */
866 	if (flags & LOAD_HDR) {
867 		elf->e_phoff = 0;
868 		elf->e_shoff = sizeof(Elf64_Ehdr);
869 		elf->e_phentsize = 0;
870 		elf->e_phnum = 0;
871 		mbcopy(elf, elfp, sizeof(*elf));
872 	}
873 
874 	marks[MARK_START] = LOADADDR(minp);
875 	marks[MARK_ENTRY] = LOADADDR(elf->e_entry);
876 	marks[MARK_NSYM] = 1;	/* XXX: Kernel needs >= 0 */
877 	marks[MARK_SYM] = LOADADDR(elfp);
878 	marks[MARK_END] = LOADADDR(maxp);
879 
880 	return 0;
881 }
882 
883 /*
884  * elf32_exec
885  *
886  * Load the kernel indicated by 'fd' into the guest physical memory
887  * space, at the addresses defined in the ELF header.
888  *
889  * This function is used for 32 bit kernels.
890  *
891  * Parameters:
892  *  fd: file descriptor of the kernel to load
893  *  elf: ELF header of the kernel
894  *  marks: array to store the offsets of various kernel structures
895  *      (start, bss, etc)
896  *  flags: flag value to indicate which section(s) to load (usually
897  *      LOAD_ALL)
898  *
899  * Return values:
900  *  0 if successful
901  *  1 if unsuccessful
902  */
903 static int
904 elf32_exec(FILE *fp, Elf32_Ehdr *elf, u_long *marks, int flags)
905 {
906 	Elf32_Shdr *shp;
907 	Elf32_Phdr *phdr;
908 	Elf32_Off off;
909 	int i;
910 	size_t sz;
911 	int first;
912 	int havesyms, havelines;
913 	paddr_t minp = ~0, maxp = 0, pos = 0;
914 	paddr_t offset = marks[MARK_START], shpp, elfp;
915 
916 	sz = elf->e_phnum * sizeof(Elf32_Phdr);
917 	phdr = malloc(sz);
918 
919 	if (fseeko(fp, (off_t)elf->e_phoff, SEEK_SET) == -1)  {
920 		free(phdr);
921 		return 1;
922 	}
923 
924 	if (fread(phdr, 1, sz, fp) != sz) {
925 		free(phdr);
926 		return 1;
927 	}
928 
929 	for (first = 1, i = 0; i < elf->e_phnum; i++) {
930 		if (phdr[i].p_type == PT_OPENBSD_RANDOMIZE) {
931 			int m;
932 
933 			/* Fill segment if asked for. */
934 			if (flags & LOAD_RANDOM) {
935 				for (pos = 0; pos < phdr[i].p_filesz;
936 				    pos += m) {
937 					m = phdr[i].p_filesz - pos;
938 					marc4random_buf(phdr[i].p_paddr + pos,
939 					    m);
940 				}
941 			}
942 			if (flags & (LOAD_RANDOM | COUNT_RANDOM)) {
943 				marks[MARK_RANDOM] = LOADADDR(phdr[i].p_paddr);
944 				marks[MARK_ERANDOM] =
945 				    marks[MARK_RANDOM] + phdr[i].p_filesz;
946 			}
947 			continue;
948 		}
949 
950 		if (phdr[i].p_type != PT_LOAD ||
951 		    (phdr[i].p_flags & (PF_W|PF_R|PF_X)) == 0)
952 			continue;
953 
954 #define IS_TEXT(p)	(p.p_flags & PF_X)
955 #define IS_DATA(p)	((p.p_flags & PF_X) == 0)
956 #define IS_BSS(p)	(p.p_filesz < p.p_memsz)
957 		/*
958 		 * XXX: Assume first address is lowest
959 		 */
960 		if ((IS_TEXT(phdr[i]) && (flags & LOAD_TEXT)) ||
961 		    (IS_DATA(phdr[i]) && (flags & LOAD_DATA))) {
962 
963 			/* Read in segment. */
964 			if (fseeko(fp, (off_t)phdr[i].p_offset,
965 			    SEEK_SET) == -1) {
966 				free(phdr);
967 				return 1;
968 			}
969 			if (mread(fp, phdr[i].p_paddr, phdr[i].p_filesz) !=
970 			    phdr[i].p_filesz) {
971 				free(phdr);
972 				return 1;
973 			}
974 
975 			first = 0;
976 		}
977 
978 		if ((IS_TEXT(phdr[i]) && (flags & (LOAD_TEXT | COUNT_TEXT))) ||
979 		    (IS_DATA(phdr[i]) && (flags & (LOAD_DATA | COUNT_TEXT)))) {
980 			pos = phdr[i].p_paddr;
981 			if (minp > pos)
982 				minp = pos;
983 			pos += phdr[i].p_filesz;
984 			if (maxp < pos)
985 				maxp = pos;
986 		}
987 
988 		/* Zero out BSS. */
989 		if (IS_BSS(phdr[i]) && (flags & LOAD_BSS)) {
990 			mbzero((phdr[i].p_paddr + phdr[i].p_filesz),
991 			    phdr[i].p_memsz - phdr[i].p_filesz);
992 		}
993 		if (IS_BSS(phdr[i]) && (flags & (LOAD_BSS|COUNT_BSS))) {
994 			pos += phdr[i].p_memsz - phdr[i].p_filesz;
995 			if (maxp < pos)
996 				maxp = pos;
997 		}
998 	}
999 	free(phdr);
1000 
1001 	/*
1002 	 * Copy the ELF and section headers.
1003 	 */
1004 	elfp = maxp = roundup(maxp, sizeof(Elf32_Addr));
1005 	if (flags & (LOAD_HDR | COUNT_HDR))
1006 		maxp += sizeof(Elf32_Ehdr);
1007 
1008 	if (flags & (LOAD_SYM | COUNT_SYM)) {
1009 		if (fseeko(fp, (off_t)elf->e_shoff, SEEK_SET) == -1)  {
1010 			WARN(("lseek section headers"));
1011 			return 1;
1012 		}
1013 		sz = elf->e_shnum * sizeof(Elf32_Shdr);
1014 		shp = malloc(sz);
1015 
1016 		if (fread(shp, 1, sz, fp) != sz) {
1017 			free(shp);
1018 			return 1;
1019 		}
1020 
1021 		shpp = maxp;
1022 		maxp += roundup(sz, sizeof(Elf32_Addr));
1023 
1024 		size_t shstrsz = shp[elf->e_shstrndx].sh_size;
1025 		char *shstr = malloc(shstrsz);
1026 		if (fseeko(fp, (off_t)shp[elf->e_shstrndx].sh_offset,
1027 		    SEEK_SET) == -1) {
1028 			free(shstr);
1029 			free(shp);
1030 			return 1;
1031 		}
1032 		if (fread(shstr, 1, shstrsz, fp) != shstrsz) {
1033 			free(shstr);
1034 			free(shp);
1035 			return 1;
1036 		}
1037 
1038 		/*
1039 		 * Now load the symbol sections themselves. Make sure the
1040 		 * sections are aligned. Don't bother with string tables if
1041 		 * there are no symbol sections.
1042 		 */
1043 		off = roundup((sizeof(Elf32_Ehdr) + sz), sizeof(Elf32_Addr));
1044 
1045 		for (havesyms = havelines = i = 0; i < elf->e_shnum; i++)
1046 			if (shp[i].sh_type == SHT_SYMTAB)
1047 				havesyms = 1;
1048 
1049 		for (first = 1, i = 0; i < elf->e_shnum; i++) {
1050 			if (shp[i].sh_type == SHT_SYMTAB ||
1051 			    shp[i].sh_type == SHT_STRTAB ||
1052 			    !strcmp(shstr + shp[i].sh_name, ".debug_line")) {
1053 				if (havesyms && (flags & LOAD_SYM)) {
1054 					if (fseeko(fp, (off_t)shp[i].sh_offset,
1055 					    SEEK_SET) == -1) {
1056 						free(shstr);
1057 						free(shp);
1058 						return 1;
1059 					}
1060 					if (mread(fp, maxp,
1061 					    shp[i].sh_size) != shp[i].sh_size) {
1062 						free(shstr);
1063 						free(shp);
1064 						return 1;
1065 					}
1066 				}
1067 				maxp += roundup(shp[i].sh_size,
1068 				    sizeof(Elf32_Addr));
1069 				shp[i].sh_offset = off;
1070 				shp[i].sh_flags |= SHF_ALLOC;
1071 				off += roundup(shp[i].sh_size,
1072 				    sizeof(Elf32_Addr));
1073 				first = 0;
1074 			}
1075 		}
1076 		if (flags & LOAD_SYM) {
1077 			mbcopy(shp, shpp, sz);
1078 		}
1079 		free(shstr);
1080 		free(shp);
1081 	}
1082 
1083 	/*
1084 	 * Frob the copied ELF header to give information relative
1085 	 * to elfp.
1086 	 */
1087 	if (flags & LOAD_HDR) {
1088 		elf->e_phoff = 0;
1089 		elf->e_shoff = sizeof(Elf32_Ehdr);
1090 		elf->e_phentsize = 0;
1091 		elf->e_phnum = 0;
1092 		mbcopy(elf, elfp, sizeof(*elf));
1093 	}
1094 
1095 	marks[MARK_START] = LOADADDR(minp);
1096 	marks[MARK_ENTRY] = LOADADDR(elf->e_entry);
1097 	marks[MARK_NSYM] = 1;	/* XXX: Kernel needs >= 0 */
1098 	marks[MARK_SYM] = LOADADDR(elfp);
1099 	marks[MARK_END] = LOADADDR(maxp);
1100 
1101 	return 0;
1102 }
1103