xref: /openbsd-src/usr.sbin/vmd/loadfile_elf.c (revision c90a81c56dcebd6a1b73fe4aff9b03385b8e63b3)
1 /* $NetBSD: loadfile.c,v 1.10 2000/12/03 02:53:04 tsutsui Exp $ */
2 /* $OpenBSD: loadfile_elf.c,v 1.33 2018/12/12 21:20:57 claudio Exp $ */
3 
4 /*-
5  * Copyright (c) 1997 The NetBSD Foundation, Inc.
6  * All rights reserved.
7  *
8  * This code is derived from software contributed to The NetBSD Foundation
9  * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
10  * NASA Ames Research Center and by Christos Zoulas.
11  *
12  * Redistribution and use in source and binary forms, with or without
13  * modification, are permitted provided that the following conditions
14  * are met:
15  * 1. Redistributions of source code must retain the above copyright
16  *    notice, this list of conditions and the following disclaimer.
17  * 2. Redistributions in binary form must reproduce the above copyright
18  *    notice, this list of conditions and the following disclaimer in the
19  *    documentation and/or other materials provided with the distribution.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
23  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
24  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
25  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31  * POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 /*
35  * Copyright (c) 1992, 1993
36  *	The Regents of the University of California.  All rights reserved.
37  *
38  * This code is derived from software contributed to Berkeley by
39  * Ralph Campbell.
40  *
41  * Redistribution and use in source and binary forms, with or without
42  * modification, are permitted provided that the following conditions
43  * are met:
44  * 1. Redistributions of source code must retain the above copyright
45  *    notice, this list of conditions and the following disclaimer.
46  * 2. Redistributions in binary form must reproduce the above copyright
47  *    notice, this list of conditions and the following disclaimer in the
48  *    documentation and/or other materials provided with the distribution.
49  * 3. Neither the name of the University nor the names of its contributors
50  *    may be used to endorse or promote products derived from this software
51  *    without specific prior written permission.
52  *
53  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63  * SUCH DAMAGE.
64  *
65  *	@(#)boot.c	8.1 (Berkeley) 6/10/93
66  */
67 
68 /*
69  * Copyright (c) 2015 Mike Larkin <mlarkin@openbsd.org>
70  *
71  * Permission to use, copy, modify, and distribute this software for any
72  * purpose with or without fee is hereby granted, provided that the above
73  * copyright notice and this permission notice appear in all copies.
74  *
75  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
76  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
77  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
78  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
79  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
80  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
81  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
82  */
83 
84 #include <sys/param.h>	/* PAGE_SIZE PAGE_MASK roundup */
85 #include <sys/ioctl.h>
86 #include <sys/reboot.h>
87 #include <sys/exec.h>
88 
89 #include <elf.h>
90 #include <stdio.h>
91 #include <string.h>
92 #include <errno.h>
93 #include <stdlib.h>
94 #include <unistd.h>
95 #include <fcntl.h>
96 #include <err.h>
97 #include <errno.h>
98 #include <stddef.h>
99 
100 #include <machine/vmmvar.h>
101 #include <machine/biosvar.h>
102 #include <machine/segments.h>
103 #include <machine/specialreg.h>
104 #include <machine/pte.h>
105 
106 #include "loadfile.h"
107 #include "vmd.h"
108 
109 union {
110 	Elf32_Ehdr elf32;
111 	Elf64_Ehdr elf64;
112 } hdr;
113 
114 #ifdef __i386__
115 typedef uint32_t pt_entry_t;
116 static void setsegment(struct segment_descriptor *, uint32_t,
117     size_t, int, int, int, int);
118 #else
119 static void setsegment(struct mem_segment_descriptor *, uint32_t,
120     size_t, int, int, int, int);
121 #endif
122 static int elf32_exec(FILE *, Elf32_Ehdr *, u_long *, int);
123 static int elf64_exec(FILE *, Elf64_Ehdr *, u_long *, int);
124 static size_t create_bios_memmap(struct vm_create_params *, bios_memmap_t *);
125 static uint32_t push_bootargs(bios_memmap_t *, size_t, bios_bootmac_t *);
126 static size_t push_stack(uint32_t, uint32_t, uint32_t, uint32_t);
127 static void push_gdt(void);
128 static void push_pt_32(void);
129 static void push_pt_64(void);
130 static void marc4random_buf(paddr_t, int);
131 static void mbzero(paddr_t, int);
132 static void mbcopy(void *, paddr_t, int);
133 
134 extern char *__progname;
135 extern int vm_id;
136 
137 /*
138  * setsegment
139  *
140  * Initializes a segment selector entry with the provided descriptor.
141  * For the purposes of the bootloader mimiced by vmd(8), we only need
142  * memory-type segment descriptor support.
143  *
144  * This function was copied from machdep.c
145  *
146  * Parameters:
147  *  sd: Address of the entry to initialize
148  *  base: base of the segment
149  *  limit: limit of the segment
150  *  type: type of the segment
151  *  dpl: privilege level of the egment
152  *  def32: default 16/32 bit size of the segment
153  *  gran: granularity of the segment (byte/page)
154  */
155 #ifdef __i386__
156 static void
157 setsegment(struct segment_descriptor *sd, uint32_t base, size_t limit,
158     int type, int dpl, int def32, int gran)
159 #else
160 static void
161 setsegment(struct mem_segment_descriptor *sd, uint32_t base, size_t limit,
162     int type, int dpl, int def32, int gran)
163 #endif
164 {
165 	sd->sd_lolimit = (int)limit;
166 	sd->sd_lobase = (int)base;
167 	sd->sd_type = type;
168 	sd->sd_dpl = dpl;
169 	sd->sd_p = 1;
170 	sd->sd_hilimit = (int)limit >> 16;
171 #ifdef __i386__
172 	sd->sd_xx = 0;
173 #else
174 	sd->sd_avl = 0;
175 	sd->sd_long = 0;
176 #endif
177 	sd->sd_def32 = def32;
178 	sd->sd_gran = gran;
179 	sd->sd_hibase = (int)base >> 24;
180 }
181 
182 /*
183  * push_gdt
184  *
185  * Allocates and populates a page in the guest phys memory space to hold
186  * the boot-time GDT. Since vmd(8) is acting as the bootloader, we need to
187  * create the same GDT that a real bootloader would have created.
188  * This is loaded into the guest phys RAM space at address GDT_PAGE.
189  */
190 static void
191 push_gdt(void)
192 {
193 	uint8_t gdtpage[PAGE_SIZE];
194 #ifdef __i386__
195 	struct segment_descriptor *sd;
196 #else
197 	struct mem_segment_descriptor *sd;
198 #endif
199 
200 	memset(&gdtpage, 0, sizeof(gdtpage));
201 
202 #ifdef __i386__
203 	sd = (struct segment_descriptor *)&gdtpage;
204 #else
205 	sd = (struct mem_segment_descriptor *)&gdtpage;
206 #endif
207 
208 	/*
209 	 * Create three segment descriptors:
210 	 *
211 	 * GDT[0] : null desriptor. "Created" via memset above.
212 	 * GDT[1] (selector @ 0x8): Executable segment, for CS
213 	 * GDT[2] (selector @ 0x10): RW Data segment, for DS/ES/SS
214 	 */
215 	setsegment(&sd[1], 0, 0xffffffff, SDT_MEMERA, SEL_KPL, 1, 1);
216 	setsegment(&sd[2], 0, 0xffffffff, SDT_MEMRWA, SEL_KPL, 1, 1);
217 
218 	write_mem(GDT_PAGE, gdtpage, PAGE_SIZE);
219 }
220 
221 /*
222  * push_pt_32
223  *
224  * Create an identity-mapped page directory hierarchy mapping the first
225  * 4GB of physical memory. This is used during bootstrapping i386 VMs on
226  * CPUs without unrestricted guest capability.
227  */
228 static void
229 push_pt_32(void)
230 {
231 	uint32_t ptes[1024], i;
232 
233 	memset(ptes, 0, sizeof(ptes));
234 	for (i = 0 ; i < 1024; i++) {
235 		ptes[i] = PG_V | PG_RW | PG_u | PG_PS | ((4096 * 1024) * i);
236 	}
237 	write_mem(PML3_PAGE, ptes, PAGE_SIZE);
238 }
239 
240 /*
241  * push_pt_64
242  *
243  * Create an identity-mapped page directory hierarchy mapping the first
244  * 1GB of physical memory. This is used during bootstrapping 64 bit VMs on
245  * CPUs without unrestricted guest capability.
246  */
247 static void
248 push_pt_64(void)
249 {
250 	uint64_t ptes[512], i;
251 
252 	/* PDPDE0 - first 1GB */
253 	memset(ptes, 0, sizeof(ptes));
254 	ptes[0] = PG_V | PML3_PAGE;
255 	write_mem(PML4_PAGE, ptes, PAGE_SIZE);
256 
257 	/* PDE0 - first 1GB */
258 	memset(ptes, 0, sizeof(ptes));
259 	ptes[0] = PG_V | PG_RW | PG_u | PML2_PAGE;
260 	write_mem(PML3_PAGE, ptes, PAGE_SIZE);
261 
262 	/* First 1GB (in 2MB pages) */
263 	memset(ptes, 0, sizeof(ptes));
264 	for (i = 0 ; i < 512; i++) {
265 		ptes[i] = PG_V | PG_RW | PG_u | PG_PS | ((2048 * 1024) * i);
266 	}
267 	write_mem(PML2_PAGE, ptes, PAGE_SIZE);
268 }
269 
270 /*
271  * loadfile_elf
272  *
273  * Loads an ELF kernel to it's defined load address in the guest VM.
274  * The kernel is loaded to its defined start point as set in the ELF header.
275  *
276  * Parameters:
277  *  fp: file of a kernel file to load
278  *  vcp: the VM create parameters, holding the exact memory map
279  *  (out) vrs: register state to set on init for this kernel
280  *  bootdev: the optional non-default boot device
281  *  howto: optional boot flags for the kernel
282  *
283  * Return values:
284  *  0 if successful
285  *  various error codes returned from read(2) or loadelf functions
286  */
287 int
288 loadfile_elf(FILE *fp, struct vm_create_params *vcp,
289     struct vcpu_reg_state *vrs, uint32_t bootdev, uint32_t howto,
290     unsigned int bootdevice)
291 {
292 	int r, is_i386 = 0;
293 	uint32_t bootargsz;
294 	size_t n, stacksize;
295 	u_long marks[MARK_MAX];
296 	bios_memmap_t memmap[VMM_MAX_MEM_RANGES + 1];
297 	bios_bootmac_t bm, *bootmac = NULL;
298 
299 	if ((r = fread(&hdr, 1, sizeof(hdr), fp)) != sizeof(hdr))
300 		return 1;
301 
302 	memset(&marks, 0, sizeof(marks));
303 	if (memcmp(hdr.elf32.e_ident, ELFMAG, SELFMAG) == 0 &&
304 	    hdr.elf32.e_ident[EI_CLASS] == ELFCLASS32) {
305 		r = elf32_exec(fp, &hdr.elf32, marks, LOAD_ALL);
306 		is_i386 = 1;
307 	} else if (memcmp(hdr.elf64.e_ident, ELFMAG, SELFMAG) == 0 &&
308 	    hdr.elf64.e_ident[EI_CLASS] == ELFCLASS64) {
309 		r = elf64_exec(fp, &hdr.elf64, marks, LOAD_ALL);
310 	} else
311 		errno = ENOEXEC;
312 
313 	if (r)
314 		return (r);
315 
316 	push_gdt();
317 
318 	if (is_i386) {
319 		push_pt_32();
320 		/* Reconfigure the default flat-64 register set for 32 bit */
321 		vrs->vrs_crs[VCPU_REGS_CR3] = PML3_PAGE;
322 		vrs->vrs_crs[VCPU_REGS_CR4] = CR4_PSE;
323 		vrs->vrs_msrs[VCPU_REGS_EFER] = 0ULL;
324 	}
325 	else
326 		push_pt_64();
327 
328 	if (bootdevice & VMBOOTDEV_NET) {
329 		bootmac = &bm;
330 		memcpy(bootmac, vcp->vcp_macs[0], ETHER_ADDR_LEN);
331 	}
332 	n = create_bios_memmap(vcp, memmap);
333 	bootargsz = push_bootargs(memmap, n, bootmac);
334 	stacksize = push_stack(bootargsz, marks[MARK_END], bootdev, howto);
335 
336 #ifdef __i386__
337 	vrs->vrs_gprs[VCPU_REGS_EIP] = (uint32_t)marks[MARK_ENTRY];
338 	vrs->vrs_gprs[VCPU_REGS_ESP] = (uint32_t)(STACK_PAGE + PAGE_SIZE) - stacksize;
339 #else
340 	vrs->vrs_gprs[VCPU_REGS_RIP] = (uint64_t)marks[MARK_ENTRY];
341 	vrs->vrs_gprs[VCPU_REGS_RSP] = (uint64_t)(STACK_PAGE + PAGE_SIZE) - stacksize;
342 #endif
343 	vrs->vrs_gdtr.vsi_base = GDT_PAGE;
344 
345 	log_debug("%s: loaded ELF kernel", __func__);
346 
347 	return (0);
348 }
349 
350 /*
351  * create_bios_memmap
352  *
353  * Construct a memory map as returned by the BIOS INT 0x15, e820 routine.
354  *
355  * Parameters:
356  *  vcp: the VM create parameters, containing the memory map passed to vmm(4)
357  *   memmap (out): the BIOS memory map
358  *
359  * Return values:
360  * Number of bios_memmap_t entries, including the terminating nul-entry.
361  */
362 static size_t
363 create_bios_memmap(struct vm_create_params *vcp, bios_memmap_t *memmap)
364 {
365 	size_t i, n = 0, sz;
366 	paddr_t gpa;
367 	struct vm_mem_range *vmr;
368 
369 	for (i = 0; i < vcp->vcp_nmemranges; i++) {
370 		vmr = &vcp->vcp_memranges[i];
371 		gpa = vmr->vmr_gpa;
372 		sz = vmr->vmr_size;
373 
374 		/*
375 		 * Make sure that we do not mark the ROM/video RAM area in the
376 		 * low memory as physcal memory available to the kernel.
377 		 */
378 		if (gpa < 0x100000 && gpa + sz > LOWMEM_KB * 1024) {
379 			if (gpa >= LOWMEM_KB * 1024)
380 				sz = 0;
381 			else
382 				sz = LOWMEM_KB * 1024 - gpa;
383 		}
384 
385 		if (sz != 0) {
386 			memmap[n].addr = gpa;
387 			memmap[n].size = sz;
388 			memmap[n].type = 0x1;	/* Type 1 : Normal memory */
389 			n++;
390 		}
391 	}
392 
393 	/* Null mem map entry to denote the end of the ranges */
394 	memmap[n].addr = 0x0;
395 	memmap[n].size = 0x0;
396 	memmap[n].type = 0x0;
397 	n++;
398 
399 	return (n);
400 }
401 
402 /*
403  * push_bootargs
404  *
405  * Creates the boot arguments page in the guest address space.
406  * Since vmd(8) is acting as the bootloader, we need to create the same boot
407  * arguments page that a real bootloader would have created. This is loaded
408  * into the guest phys RAM space at address BOOTARGS_PAGE.
409  *
410  * Parameters:
411  *  memmap: the BIOS memory map
412  *  n: number of entries in memmap
413  *
414  * Return values:
415  *  The size of the bootargs
416  */
417 static uint32_t
418 push_bootargs(bios_memmap_t *memmap, size_t n, bios_bootmac_t *bootmac)
419 {
420 	uint32_t memmap_sz, consdev_sz, bootmac_sz, i;
421 	bios_consdev_t consdev;
422 	uint32_t ba[1024];
423 
424 	memmap_sz = 3 * sizeof(int) + n * sizeof(bios_memmap_t);
425 	ba[0] = 0x0;    /* memory map */
426 	ba[1] = memmap_sz;
427 	ba[2] = memmap_sz;	/* next */
428 	memcpy(&ba[3], memmap, n * sizeof(bios_memmap_t));
429 	i = memmap_sz / sizeof(int);
430 
431 	/* Serial console device, COM1 @ 0x3f8 */
432 	consdev.consdev = makedev(8, 0);	/* com1 @ 0x3f8 */
433 	consdev.conspeed = 115200;
434 	consdev.consaddr = 0x3f8;
435 	consdev.consfreq = 0;
436 
437 	consdev_sz = 3 * sizeof(int) + sizeof(bios_consdev_t);
438 	ba[i] = 0x5;   /* consdev */
439 	ba[i + 1] = consdev_sz;
440 	ba[i + 2] = consdev_sz;
441 	memcpy(&ba[i + 3], &consdev, sizeof(bios_consdev_t));
442 	i += consdev_sz / sizeof(int);
443 
444 	if (bootmac) {
445 		bootmac_sz = 3 * sizeof(int) + (sizeof(bios_bootmac_t) + 3) & ~3;
446 		ba[i] = 0x7;   /* bootmac */
447 		ba[i + 1] = bootmac_sz;
448 		ba[i + 2] = bootmac_sz;
449 		memcpy(&ba[i + 3], bootmac, sizeof(bios_bootmac_t));
450 		i += bootmac_sz / sizeof(int);
451 	}
452 
453 	ba[i++] = 0xFFFFFFFF; /* BOOTARG_END */
454 
455 	write_mem(BOOTARGS_PAGE, ba, PAGE_SIZE);
456 
457 	return (i * sizeof(int));
458 }
459 
460 /*
461  * push_stack
462  *
463  * Creates the boot stack page in the guest address space. When using a real
464  * bootloader, the stack will be prepared using the following format before
465  * transitioning to kernel start, so vmd(8) needs to mimic the same stack
466  * layout. The stack content is pushed to the guest phys RAM at address
467  * STACK_PAGE. The bootloader operates in 32 bit mode; each stack entry is
468  * 4 bytes.
469  *
470  * Stack Layout: (TOS == Top Of Stack)
471  *  TOS		location of boot arguments page
472  *  TOS - 0x4	size of the content in the boot arguments page
473  *  TOS - 0x8	size of low memory (biosbasemem: kernel uses BIOS map only if 0)
474  *  TOS - 0xc	size of high memory (biosextmem, not used by kernel at all)
475  *  TOS - 0x10	kernel 'end' symbol value
476  *  TOS - 0x14	version of bootarg API
477  *
478  * Parameters:
479  *  bootargsz: size of boot arguments
480  *  end: kernel 'end' symbol value
481  *  bootdev: the optional non-default boot device
482  *  howto: optional boot flags for the kernel
483  *
484  * Return values:
485  *  size of the stack
486  */
487 static size_t
488 push_stack(uint32_t bootargsz, uint32_t end, uint32_t bootdev, uint32_t howto)
489 {
490 	uint32_t stack[1024];
491 	uint16_t loc;
492 
493 	memset(&stack, 0, sizeof(stack));
494 	loc = 1024;
495 
496 	if (bootdev == 0)
497 		bootdev = MAKEBOOTDEV(0x4, 0, 0, 0, 0); /* bootdev: sd0a */
498 
499 	stack[--loc] = BOOTARGS_PAGE;
500 	stack[--loc] = bootargsz;
501 	stack[--loc] = 0; /* biosbasemem */
502 	stack[--loc] = 0; /* biosextmem */
503 	stack[--loc] = end;
504 	stack[--loc] = 0x0e;
505 	stack[--loc] = bootdev;
506 	stack[--loc] = howto;
507 
508 	write_mem(STACK_PAGE, &stack, PAGE_SIZE);
509 
510 	return (1024 - (loc - 1)) * sizeof(uint32_t);
511 }
512 
513 /*
514  * mread
515  *
516  * Reads 'sz' bytes from the file whose descriptor is provided in 'fd'
517  * into the guest address space at paddr 'addr'.
518  *
519  * Parameters:
520  *  fd: file descriptor of the kernel image file to read from.
521  *  addr: guest paddr_t to load to
522  *  sz: number of bytes to load
523  *
524  * Return values:
525  *  returns 'sz' if successful, or 0 otherwise.
526  */
527 size_t
528 mread(FILE *fp, paddr_t addr, size_t sz)
529 {
530 	size_t ct;
531 	size_t i, rd, osz;
532 	char buf[PAGE_SIZE];
533 
534 	/*
535 	 * break up the 'sz' bytes into PAGE_SIZE chunks for use with
536 	 * write_mem
537 	 */
538 	ct = 0;
539 	rd = 0;
540 	osz = sz;
541 	if ((addr & PAGE_MASK) != 0) {
542 		memset(buf, 0, sizeof(buf));
543 		if (sz > PAGE_SIZE)
544 			ct = PAGE_SIZE - (addr & PAGE_MASK);
545 		else
546 			ct = sz;
547 
548 		if (fread(buf, 1, ct, fp) != ct) {
549 			log_warn("%s: error %d in mread", __progname, errno);
550 			return (0);
551 		}
552 		rd += ct;
553 
554 		if (write_mem(addr, buf, ct))
555 			return (0);
556 
557 		addr += ct;
558 	}
559 
560 	sz = sz - ct;
561 
562 	if (sz == 0)
563 		return (osz);
564 
565 	for (i = 0; i < sz; i += PAGE_SIZE, addr += PAGE_SIZE) {
566 		memset(buf, 0, sizeof(buf));
567 		if (i + PAGE_SIZE > sz)
568 			ct = sz - i;
569 		else
570 			ct = PAGE_SIZE;
571 
572 		if (fread(buf, 1, ct, fp) != ct) {
573 			log_warn("%s: error %d in mread", __progname, errno);
574 			return (0);
575 		}
576 		rd += ct;
577 
578 		if (write_mem(addr, buf, ct))
579 			return (0);
580 	}
581 
582 	return (osz);
583 }
584 
585 /*
586  * marc4random_buf
587  *
588  * load 'sz' bytes of random data into the guest address space at paddr
589  * 'addr'.
590  *
591  * Parameters:
592  *  addr: guest paddr_t to load random bytes into
593  *  sz: number of random bytes to load
594  *
595  * Return values:
596  *  nothing
597  */
598 static void
599 marc4random_buf(paddr_t addr, int sz)
600 {
601 	int i, ct;
602 	char buf[PAGE_SIZE];
603 
604 	/*
605 	 * break up the 'sz' bytes into PAGE_SIZE chunks for use with
606 	 * write_mem
607 	 */
608 	ct = 0;
609 	if (addr % PAGE_SIZE != 0) {
610 		memset(buf, 0, sizeof(buf));
611 		ct = PAGE_SIZE - (addr % PAGE_SIZE);
612 
613 		arc4random_buf(buf, ct);
614 
615 		if (write_mem(addr, buf, ct))
616 			return;
617 
618 		addr += ct;
619 	}
620 
621 	for (i = 0; i < sz; i+= PAGE_SIZE, addr += PAGE_SIZE) {
622 		memset(buf, 0, sizeof(buf));
623 		if (i + PAGE_SIZE > sz)
624 			ct = sz - i;
625 		else
626 			ct = PAGE_SIZE;
627 
628 		arc4random_buf(buf, ct);
629 
630 		if (write_mem(addr, buf, ct))
631 			return;
632 	}
633 }
634 
635 /*
636  * mbzero
637  *
638  * load 'sz' bytes of zeros into the guest address space at paddr
639  * 'addr'.
640  *
641  * Parameters:
642  *  addr: guest paddr_t to zero
643  *  sz: number of zero bytes to store
644  *
645  * Return values:
646  *  nothing
647  */
648 static void
649 mbzero(paddr_t addr, int sz)
650 {
651 	if (write_mem(addr, NULL, sz))
652 		return;
653 }
654 
655 /*
656  * mbcopy
657  *
658  * copies 'sz' bytes from buffer 'src' to guest paddr 'dst'.
659  *
660  * Parameters:
661  *  src: source buffer to copy from
662  *  dst: destination guest paddr_t to copy to
663  *  sz: number of bytes to copy
664  *
665  * Return values:
666  *  nothing
667  */
668 static void
669 mbcopy(void *src, paddr_t dst, int sz)
670 {
671 	write_mem(dst, src, sz);
672 }
673 
674 /*
675  * elf64_exec
676  *
677  * Load the kernel indicated by 'fd' into the guest physical memory
678  * space, at the addresses defined in the ELF header.
679  *
680  * This function is used for 64 bit kernels.
681  *
682  * Parameters:
683  *  fd: file descriptor of the kernel to load
684  *  elf: ELF header of the kernel
685  *  marks: array to store the offsets of various kernel structures
686  *      (start, bss, etc)
687  *  flags: flag value to indicate which section(s) to load (usually
688  *      LOAD_ALL)
689  *
690  * Return values:
691  *  0 if successful
692  *  1 if unsuccessful
693  */
694 static int
695 elf64_exec(FILE *fp, Elf64_Ehdr *elf, u_long *marks, int flags)
696 {
697 	Elf64_Shdr *shp;
698 	Elf64_Phdr *phdr;
699 	Elf64_Off off;
700 	int i;
701 	size_t sz;
702 	int first;
703 	int havesyms, havelines;
704 	paddr_t minp = ~0, maxp = 0, pos = 0;
705 	paddr_t offset = marks[MARK_START], shpp, elfp;
706 
707 	sz = elf->e_phnum * sizeof(Elf64_Phdr);
708 	phdr = malloc(sz);
709 
710 	if (fseeko(fp, (off_t)elf->e_phoff, SEEK_SET) == -1)  {
711 		free(phdr);
712 		return 1;
713 	}
714 
715 	if (fread(phdr, 1, sz, fp) != sz) {
716 		free(phdr);
717 		return 1;
718 	}
719 
720 	for (first = 1, i = 0; i < elf->e_phnum; i++) {
721 		if (phdr[i].p_type == PT_OPENBSD_RANDOMIZE) {
722 			int m;
723 
724 			/* Fill segment if asked for. */
725 			if (flags & LOAD_RANDOM) {
726 				for (pos = 0; pos < phdr[i].p_filesz;
727 				    pos += m) {
728 					m = phdr[i].p_filesz - pos;
729 					marc4random_buf(phdr[i].p_paddr + pos,
730 					    m);
731 				}
732 			}
733 			if (flags & (LOAD_RANDOM | COUNT_RANDOM)) {
734 				marks[MARK_RANDOM] = LOADADDR(phdr[i].p_paddr);
735 				marks[MARK_ERANDOM] =
736 				    marks[MARK_RANDOM] + phdr[i].p_filesz;
737 			}
738 			continue;
739 		}
740 
741 		if (phdr[i].p_type != PT_LOAD ||
742 		    (phdr[i].p_flags & (PF_W|PF_R|PF_X)) == 0)
743 			continue;
744 
745 #define IS_TEXT(p)	(p.p_flags & PF_X)
746 #define IS_DATA(p)	((p.p_flags & PF_X) == 0)
747 #define IS_BSS(p)	(p.p_filesz < p.p_memsz)
748 		/*
749 		 * XXX: Assume first address is lowest
750 		 */
751 		if ((IS_TEXT(phdr[i]) && (flags & LOAD_TEXT)) ||
752 		    (IS_DATA(phdr[i]) && (flags & LOAD_DATA))) {
753 
754 			/* Read in segment. */
755 			if (fseeko(fp, (off_t)phdr[i].p_offset,
756 			    SEEK_SET) == -1) {
757 				free(phdr);
758 				return 1;
759 			}
760 			if (mread(fp, phdr[i].p_paddr, phdr[i].p_filesz) !=
761 			    phdr[i].p_filesz) {
762 				free(phdr);
763 				return 1;
764 			}
765 
766 			first = 0;
767 		}
768 
769 		if ((IS_TEXT(phdr[i]) && (flags & (LOAD_TEXT | COUNT_TEXT))) ||
770 		    (IS_DATA(phdr[i]) && (flags & (LOAD_DATA | COUNT_TEXT)))) {
771 			pos = phdr[i].p_paddr;
772 			if (minp > pos)
773 				minp = pos;
774 			pos += phdr[i].p_filesz;
775 			if (maxp < pos)
776 				maxp = pos;
777 		}
778 
779 		/* Zero out BSS. */
780 		if (IS_BSS(phdr[i]) && (flags & LOAD_BSS)) {
781 			mbzero((phdr[i].p_paddr + phdr[i].p_filesz),
782 			    phdr[i].p_memsz - phdr[i].p_filesz);
783 		}
784 		if (IS_BSS(phdr[i]) && (flags & (LOAD_BSS|COUNT_BSS))) {
785 			pos += phdr[i].p_memsz - phdr[i].p_filesz;
786 			if (maxp < pos)
787 				maxp = pos;
788 		}
789 	}
790 	free(phdr);
791 
792 	/*
793 	 * Copy the ELF and section headers.
794 	 */
795 	elfp = maxp = roundup(maxp, sizeof(Elf64_Addr));
796 	if (flags & (LOAD_HDR | COUNT_HDR))
797 		maxp += sizeof(Elf64_Ehdr);
798 
799 	if (flags & (LOAD_SYM | COUNT_SYM)) {
800 		if (fseeko(fp, (off_t)elf->e_shoff, SEEK_SET) == -1)  {
801 			WARN(("lseek section headers"));
802 			return 1;
803 		}
804 		sz = elf->e_shnum * sizeof(Elf64_Shdr);
805 		shp = malloc(sz);
806 
807 		if (fread(shp, 1, sz, fp) != sz) {
808 			free(shp);
809 			return 1;
810 		}
811 
812 		shpp = maxp;
813 		maxp += roundup(sz, sizeof(Elf64_Addr));
814 
815 		size_t shstrsz = shp[elf->e_shstrndx].sh_size;
816 		char *shstr = malloc(shstrsz);
817 		if (fseeko(fp, (off_t)shp[elf->e_shstrndx].sh_offset,
818 		    SEEK_SET) == -1) {
819 			free(shstr);
820 			free(shp);
821 			return 1;
822 		}
823 		if (fread(shstr, 1, shstrsz, fp) != shstrsz) {
824 			free(shstr);
825 			free(shp);
826 			return 1;
827 		}
828 
829 		/*
830 		 * Now load the symbol sections themselves. Make sure the
831 		 * sections are aligned. Don't bother with string tables if
832 		 * there are no symbol sections.
833 		 */
834 		off = roundup((sizeof(Elf64_Ehdr) + sz), sizeof(Elf64_Addr));
835 
836 		for (havesyms = havelines = i = 0; i < elf->e_shnum; i++)
837 			if (shp[i].sh_type == SHT_SYMTAB)
838 				havesyms = 1;
839 
840 		for (first = 1, i = 0; i < elf->e_shnum; i++) {
841 			if (shp[i].sh_type == SHT_SYMTAB ||
842 			    shp[i].sh_type == SHT_STRTAB ||
843 			    !strcmp(shstr + shp[i].sh_name, ".debug_line") ||
844 			    !strcmp(shstr + shp[i].sh_name, ELF_CTF)) {
845 				if (havesyms && (flags & LOAD_SYM)) {
846 					if (fseeko(fp, (off_t)shp[i].sh_offset,
847 					    SEEK_SET) == -1) {
848 						free(shstr);
849 						free(shp);
850 						return 1;
851 					}
852 					if (mread(fp, maxp,
853 					    shp[i].sh_size) != shp[i].sh_size) {
854 						free(shstr);
855 						free(shp);
856 						return 1;
857 					}
858 				}
859 				maxp += roundup(shp[i].sh_size,
860 				    sizeof(Elf64_Addr));
861 				shp[i].sh_offset = off;
862 				shp[i].sh_flags |= SHF_ALLOC;
863 				off += roundup(shp[i].sh_size,
864 				    sizeof(Elf64_Addr));
865 				first = 0;
866 			}
867 		}
868 		if (flags & LOAD_SYM) {
869 			mbcopy(shp, shpp, sz);
870 		}
871 		free(shstr);
872 		free(shp);
873 	}
874 
875 	/*
876 	 * Frob the copied ELF header to give information relative
877 	 * to elfp.
878 	 */
879 	if (flags & LOAD_HDR) {
880 		elf->e_phoff = 0;
881 		elf->e_shoff = sizeof(Elf64_Ehdr);
882 		elf->e_phentsize = 0;
883 		elf->e_phnum = 0;
884 		mbcopy(elf, elfp, sizeof(*elf));
885 	}
886 
887 	marks[MARK_START] = LOADADDR(minp);
888 	marks[MARK_ENTRY] = LOADADDR(elf->e_entry);
889 	marks[MARK_NSYM] = 1;	/* XXX: Kernel needs >= 0 */
890 	marks[MARK_SYM] = LOADADDR(elfp);
891 	marks[MARK_END] = LOADADDR(maxp);
892 
893 	return 0;
894 }
895 
896 /*
897  * elf32_exec
898  *
899  * Load the kernel indicated by 'fd' into the guest physical memory
900  * space, at the addresses defined in the ELF header.
901  *
902  * This function is used for 32 bit kernels.
903  *
904  * Parameters:
905  *  fd: file descriptor of the kernel to load
906  *  elf: ELF header of the kernel
907  *  marks: array to store the offsets of various kernel structures
908  *      (start, bss, etc)
909  *  flags: flag value to indicate which section(s) to load (usually
910  *      LOAD_ALL)
911  *
912  * Return values:
913  *  0 if successful
914  *  1 if unsuccessful
915  */
916 static int
917 elf32_exec(FILE *fp, Elf32_Ehdr *elf, u_long *marks, int flags)
918 {
919 	Elf32_Shdr *shp;
920 	Elf32_Phdr *phdr;
921 	Elf32_Off off;
922 	int i;
923 	size_t sz;
924 	int first;
925 	int havesyms, havelines;
926 	paddr_t minp = ~0, maxp = 0, pos = 0;
927 	paddr_t offset = marks[MARK_START], shpp, elfp;
928 
929 	sz = elf->e_phnum * sizeof(Elf32_Phdr);
930 	phdr = malloc(sz);
931 
932 	if (fseeko(fp, (off_t)elf->e_phoff, SEEK_SET) == -1)  {
933 		free(phdr);
934 		return 1;
935 	}
936 
937 	if (fread(phdr, 1, sz, fp) != sz) {
938 		free(phdr);
939 		return 1;
940 	}
941 
942 	for (first = 1, i = 0; i < elf->e_phnum; i++) {
943 		if (phdr[i].p_type == PT_OPENBSD_RANDOMIZE) {
944 			int m;
945 
946 			/* Fill segment if asked for. */
947 			if (flags & LOAD_RANDOM) {
948 				for (pos = 0; pos < phdr[i].p_filesz;
949 				    pos += m) {
950 					m = phdr[i].p_filesz - pos;
951 					marc4random_buf(phdr[i].p_paddr + pos,
952 					    m);
953 				}
954 			}
955 			if (flags & (LOAD_RANDOM | COUNT_RANDOM)) {
956 				marks[MARK_RANDOM] = LOADADDR(phdr[i].p_paddr);
957 				marks[MARK_ERANDOM] =
958 				    marks[MARK_RANDOM] + phdr[i].p_filesz;
959 			}
960 			continue;
961 		}
962 
963 		if (phdr[i].p_type != PT_LOAD ||
964 		    (phdr[i].p_flags & (PF_W|PF_R|PF_X)) == 0)
965 			continue;
966 
967 #define IS_TEXT(p)	(p.p_flags & PF_X)
968 #define IS_DATA(p)	((p.p_flags & PF_X) == 0)
969 #define IS_BSS(p)	(p.p_filesz < p.p_memsz)
970 		/*
971 		 * XXX: Assume first address is lowest
972 		 */
973 		if ((IS_TEXT(phdr[i]) && (flags & LOAD_TEXT)) ||
974 		    (IS_DATA(phdr[i]) && (flags & LOAD_DATA))) {
975 
976 			/* Read in segment. */
977 			if (fseeko(fp, (off_t)phdr[i].p_offset,
978 			    SEEK_SET) == -1) {
979 				free(phdr);
980 				return 1;
981 			}
982 			if (mread(fp, phdr[i].p_paddr, phdr[i].p_filesz) !=
983 			    phdr[i].p_filesz) {
984 				free(phdr);
985 				return 1;
986 			}
987 
988 			first = 0;
989 		}
990 
991 		if ((IS_TEXT(phdr[i]) && (flags & (LOAD_TEXT | COUNT_TEXT))) ||
992 		    (IS_DATA(phdr[i]) && (flags & (LOAD_DATA | COUNT_TEXT)))) {
993 			pos = phdr[i].p_paddr;
994 			if (minp > pos)
995 				minp = pos;
996 			pos += phdr[i].p_filesz;
997 			if (maxp < pos)
998 				maxp = pos;
999 		}
1000 
1001 		/* Zero out BSS. */
1002 		if (IS_BSS(phdr[i]) && (flags & LOAD_BSS)) {
1003 			mbzero((phdr[i].p_paddr + phdr[i].p_filesz),
1004 			    phdr[i].p_memsz - phdr[i].p_filesz);
1005 		}
1006 		if (IS_BSS(phdr[i]) && (flags & (LOAD_BSS|COUNT_BSS))) {
1007 			pos += phdr[i].p_memsz - phdr[i].p_filesz;
1008 			if (maxp < pos)
1009 				maxp = pos;
1010 		}
1011 	}
1012 	free(phdr);
1013 
1014 	/*
1015 	 * Copy the ELF and section headers.
1016 	 */
1017 	elfp = maxp = roundup(maxp, sizeof(Elf32_Addr));
1018 	if (flags & (LOAD_HDR | COUNT_HDR))
1019 		maxp += sizeof(Elf32_Ehdr);
1020 
1021 	if (flags & (LOAD_SYM | COUNT_SYM)) {
1022 		if (fseeko(fp, (off_t)elf->e_shoff, SEEK_SET) == -1)  {
1023 			WARN(("lseek section headers"));
1024 			return 1;
1025 		}
1026 		sz = elf->e_shnum * sizeof(Elf32_Shdr);
1027 		shp = malloc(sz);
1028 
1029 		if (fread(shp, 1, sz, fp) != sz) {
1030 			free(shp);
1031 			return 1;
1032 		}
1033 
1034 		shpp = maxp;
1035 		maxp += roundup(sz, sizeof(Elf32_Addr));
1036 
1037 		size_t shstrsz = shp[elf->e_shstrndx].sh_size;
1038 		char *shstr = malloc(shstrsz);
1039 		if (fseeko(fp, (off_t)shp[elf->e_shstrndx].sh_offset,
1040 		    SEEK_SET) == -1) {
1041 			free(shstr);
1042 			free(shp);
1043 			return 1;
1044 		}
1045 		if (fread(shstr, 1, shstrsz, fp) != shstrsz) {
1046 			free(shstr);
1047 			free(shp);
1048 			return 1;
1049 		}
1050 
1051 		/*
1052 		 * Now load the symbol sections themselves. Make sure the
1053 		 * sections are aligned. Don't bother with string tables if
1054 		 * there are no symbol sections.
1055 		 */
1056 		off = roundup((sizeof(Elf32_Ehdr) + sz), sizeof(Elf32_Addr));
1057 
1058 		for (havesyms = havelines = i = 0; i < elf->e_shnum; i++)
1059 			if (shp[i].sh_type == SHT_SYMTAB)
1060 				havesyms = 1;
1061 
1062 		for (first = 1, i = 0; i < elf->e_shnum; i++) {
1063 			if (shp[i].sh_type == SHT_SYMTAB ||
1064 			    shp[i].sh_type == SHT_STRTAB ||
1065 			    !strcmp(shstr + shp[i].sh_name, ".debug_line")) {
1066 				if (havesyms && (flags & LOAD_SYM)) {
1067 					if (fseeko(fp, (off_t)shp[i].sh_offset,
1068 					    SEEK_SET) == -1) {
1069 						free(shstr);
1070 						free(shp);
1071 						return 1;
1072 					}
1073 					if (mread(fp, maxp,
1074 					    shp[i].sh_size) != shp[i].sh_size) {
1075 						free(shstr);
1076 						free(shp);
1077 						return 1;
1078 					}
1079 				}
1080 				maxp += roundup(shp[i].sh_size,
1081 				    sizeof(Elf32_Addr));
1082 				shp[i].sh_offset = off;
1083 				shp[i].sh_flags |= SHF_ALLOC;
1084 				off += roundup(shp[i].sh_size,
1085 				    sizeof(Elf32_Addr));
1086 				first = 0;
1087 			}
1088 		}
1089 		if (flags & LOAD_SYM) {
1090 			mbcopy(shp, shpp, sz);
1091 		}
1092 		free(shstr);
1093 		free(shp);
1094 	}
1095 
1096 	/*
1097 	 * Frob the copied ELF header to give information relative
1098 	 * to elfp.
1099 	 */
1100 	if (flags & LOAD_HDR) {
1101 		elf->e_phoff = 0;
1102 		elf->e_shoff = sizeof(Elf32_Ehdr);
1103 		elf->e_phentsize = 0;
1104 		elf->e_phnum = 0;
1105 		mbcopy(elf, elfp, sizeof(*elf));
1106 	}
1107 
1108 	marks[MARK_START] = LOADADDR(minp);
1109 	marks[MARK_ENTRY] = LOADADDR(elf->e_entry);
1110 	marks[MARK_NSYM] = 1;	/* XXX: Kernel needs >= 0 */
1111 	marks[MARK_SYM] = LOADADDR(elfp);
1112 	marks[MARK_END] = LOADADDR(maxp);
1113 
1114 	return 0;
1115 }
1116