xref: /openbsd-src/usr.sbin/vmd/loadfile_elf.c (revision 6bae335dd015f9e023db26fea05e06c52cf1d0d7)
1 /* $NetBSD: loadfile.c,v 1.10 2000/12/03 02:53:04 tsutsui Exp $ */
2 /* $OpenBSD: loadfile_elf.c,v 1.46 2023/04/19 12:58:16 jsg Exp $ */
3 
4 /*-
5  * Copyright (c) 1997 The NetBSD Foundation, Inc.
6  * All rights reserved.
7  *
8  * This code is derived from software contributed to The NetBSD Foundation
9  * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
10  * NASA Ames Research Center and by Christos Zoulas.
11  *
12  * Redistribution and use in source and binary forms, with or without
13  * modification, are permitted provided that the following conditions
14  * are met:
15  * 1. Redistributions of source code must retain the above copyright
16  *    notice, this list of conditions and the following disclaimer.
17  * 2. Redistributions in binary form must reproduce the above copyright
18  *    notice, this list of conditions and the following disclaimer in the
19  *    documentation and/or other materials provided with the distribution.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
23  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
24  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
25  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31  * POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 /*
35  * Copyright (c) 1992, 1993
36  *	The Regents of the University of California.  All rights reserved.
37  *
38  * This code is derived from software contributed to Berkeley by
39  * Ralph Campbell.
40  *
41  * Redistribution and use in source and binary forms, with or without
42  * modification, are permitted provided that the following conditions
43  * are met:
44  * 1. Redistributions of source code must retain the above copyright
45  *    notice, this list of conditions and the following disclaimer.
46  * 2. Redistributions in binary form must reproduce the above copyright
47  *    notice, this list of conditions and the following disclaimer in the
48  *    documentation and/or other materials provided with the distribution.
49  * 3. Neither the name of the University nor the names of its contributors
50  *    may be used to endorse or promote products derived from this software
51  *    without specific prior written permission.
52  *
53  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63  * SUCH DAMAGE.
64  *
65  *	@(#)boot.c	8.1 (Berkeley) 6/10/93
66  */
67 
68 /*
69  * Copyright (c) 2015 Mike Larkin <mlarkin@openbsd.org>
70  *
71  * Permission to use, copy, modify, and distribute this software for any
72  * purpose with or without fee is hereby granted, provided that the above
73  * copyright notice and this permission notice appear in all copies.
74  *
75  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
76  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
77  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
78  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
79  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
80  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
81  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
82  */
83 
84 #include <sys/param.h>	/* PAGE_SIZE PAGE_MASK roundup */
85 #include <sys/ioctl.h>
86 #include <sys/reboot.h>
87 #include <sys/exec.h>
88 
89 #include <elf.h>
90 #include <stdio.h>
91 #include <string.h>
92 #include <errno.h>
93 #include <stdlib.h>
94 #include <unistd.h>
95 #include <fcntl.h>
96 #include <err.h>
97 #include <stddef.h>
98 
99 #include <machine/vmmvar.h>
100 #include <machine/biosvar.h>
101 #include <machine/segments.h>
102 #include <machine/specialreg.h>
103 #include <machine/pte.h>
104 
105 #include "loadfile.h"
106 #include "vmd.h"
107 
108 #define LOADADDR(a)            ((((u_long)(a)) + offset)&0xfffffff)
109 
110 union {
111 	Elf32_Ehdr elf32;
112 	Elf64_Ehdr elf64;
113 } hdr;
114 
115 static void setsegment(struct mem_segment_descriptor *, uint32_t,
116     size_t, int, int, int, int);
117 static int elf32_exec(gzFile, Elf32_Ehdr *, u_long *, int);
118 static int elf64_exec(gzFile, Elf64_Ehdr *, u_long *, int);
119 static size_t create_bios_memmap(struct vm_create_params *, bios_memmap_t *);
120 static uint32_t push_bootargs(bios_memmap_t *, size_t, bios_bootmac_t *);
121 static size_t push_stack(uint32_t, uint32_t);
122 static void push_gdt(void);
123 static void push_pt_32(void);
124 static void push_pt_64(void);
125 static void marc4random_buf(paddr_t, int);
126 static void mbzero(paddr_t, int);
127 static void mbcopy(void *, paddr_t, int);
128 
129 extern char *__progname;
130 extern int vm_id;
131 
132 /*
133  * setsegment
134  *
135  * Initializes a segment selector entry with the provided descriptor.
136  * For the purposes of the bootloader mimiced by vmd(8), we only need
137  * memory-type segment descriptor support.
138  *
139  * This function was copied from machdep.c
140  *
141  * Parameters:
142  *  sd: Address of the entry to initialize
143  *  base: base of the segment
144  *  limit: limit of the segment
145  *  type: type of the segment
146  *  dpl: privilege level of the egment
147  *  def32: default 16/32 bit size of the segment
148  *  gran: granularity of the segment (byte/page)
149  */
150 static void
151 setsegment(struct mem_segment_descriptor *sd, uint32_t base, size_t limit,
152     int type, int dpl, int def32, int gran)
153 {
154 	sd->sd_lolimit = (int)limit;
155 	sd->sd_lobase = (int)base;
156 	sd->sd_type = type;
157 	sd->sd_dpl = dpl;
158 	sd->sd_p = 1;
159 	sd->sd_hilimit = (int)limit >> 16;
160 	sd->sd_avl = 0;
161 	sd->sd_long = 0;
162 	sd->sd_def32 = def32;
163 	sd->sd_gran = gran;
164 	sd->sd_hibase = (int)base >> 24;
165 }
166 
167 /*
168  * push_gdt
169  *
170  * Allocates and populates a page in the guest phys memory space to hold
171  * the boot-time GDT. Since vmd(8) is acting as the bootloader, we need to
172  * create the same GDT that a real bootloader would have created.
173  * This is loaded into the guest phys RAM space at address GDT_PAGE.
174  */
175 static void
176 push_gdt(void)
177 {
178 	uint8_t gdtpage[PAGE_SIZE];
179 	struct mem_segment_descriptor *sd;
180 
181 	memset(&gdtpage, 0, sizeof(gdtpage));
182 
183 	sd = (struct mem_segment_descriptor *)&gdtpage;
184 
185 	/*
186 	 * Create three segment descriptors:
187 	 *
188 	 * GDT[0] : null descriptor. "Created" via memset above.
189 	 * GDT[1] (selector @ 0x8): Executable segment, for CS
190 	 * GDT[2] (selector @ 0x10): RW Data segment, for DS/ES/SS
191 	 */
192 	setsegment(&sd[1], 0, 0xffffffff, SDT_MEMERA, SEL_KPL, 1, 1);
193 	setsegment(&sd[2], 0, 0xffffffff, SDT_MEMRWA, SEL_KPL, 1, 1);
194 
195 	write_mem(GDT_PAGE, gdtpage, PAGE_SIZE);
196 }
197 
198 /*
199  * push_pt_32
200  *
201  * Create an identity-mapped page directory hierarchy mapping the first
202  * 4GB of physical memory. This is used during bootstrapping i386 VMs on
203  * CPUs without unrestricted guest capability.
204  */
205 static void
206 push_pt_32(void)
207 {
208 	uint32_t ptes[1024], i;
209 
210 	memset(ptes, 0, sizeof(ptes));
211 	for (i = 0 ; i < 1024; i++) {
212 		ptes[i] = PG_V | PG_RW | PG_u | PG_PS | ((4096 * 1024) * i);
213 	}
214 	write_mem(PML3_PAGE, ptes, PAGE_SIZE);
215 }
216 
217 /*
218  * push_pt_64
219  *
220  * Create an identity-mapped page directory hierarchy mapping the first
221  * 1GB of physical memory. This is used during bootstrapping 64 bit VMs on
222  * CPUs without unrestricted guest capability.
223  */
224 static void
225 push_pt_64(void)
226 {
227 	uint64_t ptes[512], i;
228 
229 	/* PDPDE0 - first 1GB */
230 	memset(ptes, 0, sizeof(ptes));
231 	ptes[0] = PG_V | PML3_PAGE;
232 	write_mem(PML4_PAGE, ptes, PAGE_SIZE);
233 
234 	/* PDE0 - first 1GB */
235 	memset(ptes, 0, sizeof(ptes));
236 	ptes[0] = PG_V | PG_RW | PG_u | PML2_PAGE;
237 	write_mem(PML3_PAGE, ptes, PAGE_SIZE);
238 
239 	/* First 1GB (in 2MB pages) */
240 	memset(ptes, 0, sizeof(ptes));
241 	for (i = 0 ; i < 512; i++) {
242 		ptes[i] = PG_V | PG_RW | PG_u | PG_PS | ((2048 * 1024) * i);
243 	}
244 	write_mem(PML2_PAGE, ptes, PAGE_SIZE);
245 }
246 
247 /*
248  * loadfile_elf
249  *
250  * Loads an ELF kernel to its defined load address in the guest VM.
251  * The kernel is loaded to its defined start point as set in the ELF header.
252  *
253  * Parameters:
254  *  fp: file of a kernel file to load
255  *  vcp: the VM create parameters, holding the exact memory map
256  *  (out) vrs: register state to set on init for this kernel
257  *  bootdev: the optional non-default boot device
258  *  howto: optional boot flags for the kernel
259  *
260  * Return values:
261  *  0 if successful
262  *  various error codes returned from gzread(3) or loadelf functions
263  */
264 int
265 loadfile_elf(gzFile fp, struct vm_create_params *vcp,
266     struct vcpu_reg_state *vrs, unsigned int bootdevice)
267 {
268 	int r, is_i386 = 0;
269 	uint32_t bootargsz;
270 	size_t n, stacksize;
271 	u_long marks[MARK_MAX];
272 	bios_memmap_t memmap[VMM_MAX_MEM_RANGES + 1];
273 	bios_bootmac_t bm, *bootmac = NULL;
274 
275 	if ((r = gzread(fp, &hdr, sizeof(hdr))) != sizeof(hdr))
276 		return 1;
277 
278 	memset(&marks, 0, sizeof(marks));
279 	if (memcmp(hdr.elf32.e_ident, ELFMAG, SELFMAG) == 0 &&
280 	    hdr.elf32.e_ident[EI_CLASS] == ELFCLASS32) {
281 		r = elf32_exec(fp, &hdr.elf32, marks, LOAD_ALL);
282 		is_i386 = 1;
283 	} else if (memcmp(hdr.elf64.e_ident, ELFMAG, SELFMAG) == 0 &&
284 	    hdr.elf64.e_ident[EI_CLASS] == ELFCLASS64) {
285 		r = elf64_exec(fp, &hdr.elf64, marks, LOAD_ALL);
286 	} else
287 		errno = ENOEXEC;
288 
289 	if (r)
290 		return (r);
291 
292 	push_gdt();
293 
294 	if (is_i386) {
295 		push_pt_32();
296 		/* Reconfigure the default flat-64 register set for 32 bit */
297 		vrs->vrs_crs[VCPU_REGS_CR3] = PML3_PAGE;
298 		vrs->vrs_crs[VCPU_REGS_CR4] = CR4_PSE;
299 		vrs->vrs_msrs[VCPU_REGS_EFER] = 0ULL;
300 	}
301 	else
302 		push_pt_64();
303 
304 	if (bootdevice == VMBOOTDEV_NET) {
305 		bootmac = &bm;
306 		memcpy(bootmac, vcp->vcp_macs[0], ETHER_ADDR_LEN);
307 	}
308 	n = create_bios_memmap(vcp, memmap);
309 	bootargsz = push_bootargs(memmap, n, bootmac);
310 	stacksize = push_stack(bootargsz, marks[MARK_END]);
311 
312 	vrs->vrs_gprs[VCPU_REGS_RIP] = (uint64_t)marks[MARK_ENTRY];
313 	vrs->vrs_gprs[VCPU_REGS_RSP] = (uint64_t)(STACK_PAGE + PAGE_SIZE) - stacksize;
314 	vrs->vrs_gdtr.vsi_base = GDT_PAGE;
315 
316 	log_debug("%s: loaded ELF kernel", __func__);
317 
318 	return (0);
319 }
320 
321 /*
322  * create_bios_memmap
323  *
324  * Construct a memory map as returned by the BIOS INT 0x15, e820 routine.
325  *
326  * Parameters:
327  *  vcp: the VM create parameters, containing the memory map passed to vmm(4)
328  *   memmap (out): the BIOS memory map
329  *
330  * Return values:
331  * Number of bios_memmap_t entries, including the terminating nul-entry.
332  */
333 static size_t
334 create_bios_memmap(struct vm_create_params *vcp, bios_memmap_t *memmap)
335 {
336 	size_t i, n = 0;
337 	struct vm_mem_range *vmr;
338 
339 	for (i = 0; i < vcp->vcp_nmemranges; i++, n++) {
340 		vmr = &vcp->vcp_memranges[i];
341 		memmap[n].addr = vmr->vmr_gpa;
342 		memmap[n].size = vmr->vmr_size;
343 		if (vmr->vmr_type == VM_MEM_RAM)
344 			memmap[n].type = BIOS_MAP_FREE;
345 		else
346 			memmap[n].type = BIOS_MAP_RES;
347 	}
348 
349 	/* Null mem map entry to denote the end of the ranges */
350 	memmap[n].addr = 0x0;
351 	memmap[n].size = 0x0;
352 	memmap[n].type = BIOS_MAP_END;
353 	n++;
354 
355 	return (n);
356 }
357 
358 /*
359  * push_bootargs
360  *
361  * Creates the boot arguments page in the guest address space.
362  * Since vmd(8) is acting as the bootloader, we need to create the same boot
363  * arguments page that a real bootloader would have created. This is loaded
364  * into the guest phys RAM space at address BOOTARGS_PAGE.
365  *
366  * Parameters:
367  *  memmap: the BIOS memory map
368  *  n: number of entries in memmap
369  *  bootmac: optional PXE boot MAC address
370  *
371  * Return values:
372  *  The size of the bootargs in bytes
373  */
374 static uint32_t
375 push_bootargs(bios_memmap_t *memmap, size_t n, bios_bootmac_t *bootmac)
376 {
377 	uint32_t memmap_sz, consdev_sz, bootmac_sz, i;
378 	bios_consdev_t consdev;
379 	uint32_t ba[1024];
380 
381 	memmap_sz = 3 * sizeof(uint32_t) + n * sizeof(bios_memmap_t);
382 	ba[0] = BOOTARG_MEMMAP;
383 	ba[1] = memmap_sz;
384 	ba[2] = memmap_sz;
385 	memcpy(&ba[3], memmap, n * sizeof(bios_memmap_t));
386 	i = memmap_sz / sizeof(uint32_t);
387 
388 	/* Serial console device, COM1 @ 0x3f8 */
389 	memset(&consdev, 0, sizeof(consdev));
390 	consdev.consdev = makedev(8, 0);
391 	consdev.conspeed = 115200;
392 	consdev.consaddr = 0x3f8;
393 
394 	consdev_sz = 3 * sizeof(uint32_t) + sizeof(bios_consdev_t);
395 	ba[i] = BOOTARG_CONSDEV;
396 	ba[i + 1] = consdev_sz;
397 	ba[i + 2] = consdev_sz;
398 	memcpy(&ba[i + 3], &consdev, sizeof(bios_consdev_t));
399 	i += consdev_sz / sizeof(uint32_t);
400 
401 	if (bootmac) {
402 		bootmac_sz = 3 * sizeof(uint32_t) +
403 		    (sizeof(bios_bootmac_t) + 3) & ~3;
404 		ba[i] = BOOTARG_BOOTMAC;
405 		ba[i + 1] = bootmac_sz;
406 		ba[i + 2] = bootmac_sz;
407 		memcpy(&ba[i + 3], bootmac, sizeof(bios_bootmac_t));
408 		i += bootmac_sz / sizeof(uint32_t);
409 	}
410 
411 	ba[i++] = 0xFFFFFFFF; /* BOOTARG_END */
412 
413 	write_mem(BOOTARGS_PAGE, ba, PAGE_SIZE);
414 
415 	return (i * sizeof(uint32_t));
416 }
417 
418 /*
419  * push_stack
420  *
421  * Creates the boot stack page in the guest address space. When using a real
422  * bootloader, the stack will be prepared using the following format before
423  * transitioning to kernel start, so vmd(8) needs to mimic the same stack
424  * layout. The stack content is pushed to the guest phys RAM at address
425  * STACK_PAGE. The bootloader operates in 32 bit mode; each stack entry is
426  * 4 bytes.
427  *
428  * Stack Layout: (TOS == Top Of Stack)
429  *  TOS		location of boot arguments page
430  *  TOS - 0x4	size of the content in the boot arguments page
431  *  TOS - 0x8	size of low memory (biosbasemem: kernel uses BIOS map only if 0)
432  *  TOS - 0xc	size of high memory (biosextmem, not used by kernel at all)
433  *  TOS - 0x10	kernel 'end' symbol value
434  *  TOS - 0x14	version of bootarg API
435  *
436  * Parameters:
437  *  bootargsz: size of boot arguments
438  *  end: kernel 'end' symbol value
439  *  bootdev: the optional non-default boot device
440  *  howto: optional boot flags for the kernel
441  *
442  * Return values:
443  *  size of the stack
444  */
445 static size_t
446 push_stack(uint32_t bootargsz, uint32_t end)
447 {
448 	uint32_t stack[1024];
449 	uint16_t loc;
450 
451 	memset(&stack, 0, sizeof(stack));
452 	loc = 1024;
453 
454 	stack[--loc] = BOOTARGS_PAGE;
455 	stack[--loc] = bootargsz;
456 	stack[--loc] = 0; /* biosbasemem */
457 	stack[--loc] = 0; /* biosextmem */
458 	stack[--loc] = end;
459 	stack[--loc] = 0x0e;
460 	stack[--loc] = MAKEBOOTDEV(0x4, 0, 0, 0, 0); /* bootdev: sd0a */
461 	stack[--loc] = 0;
462 
463 	write_mem(STACK_PAGE, &stack, PAGE_SIZE);
464 
465 	return (1024 - (loc - 1)) * sizeof(uint32_t);
466 }
467 
468 /*
469  * mread
470  *
471  * Reads 'sz' bytes from the file whose descriptor is provided in 'fd'
472  * into the guest address space at paddr 'addr'.
473  *
474  * Parameters:
475  *  fp: kernel image file to read from.
476  *  addr: guest paddr_t to load to
477  *  sz: number of bytes to load
478  *
479  * Return values:
480  *  returns 'sz' if successful, or 0 otherwise.
481  */
482 size_t
483 mread(gzFile fp, paddr_t addr, size_t sz)
484 {
485 	const char *errstr = NULL;
486 	int errnum = 0;
487 	size_t ct;
488 	size_t i, osz;
489 	char buf[PAGE_SIZE];
490 
491 	/*
492 	 * break up the 'sz' bytes into PAGE_SIZE chunks for use with
493 	 * write_mem
494 	 */
495 	ct = 0;
496 	osz = sz;
497 	if ((addr & PAGE_MASK) != 0) {
498 		memset(buf, 0, sizeof(buf));
499 		if (sz > PAGE_SIZE)
500 			ct = PAGE_SIZE - (addr & PAGE_MASK);
501 		else
502 			ct = sz;
503 
504 		if ((size_t)gzread(fp, buf, ct) != ct) {
505 			errstr = gzerror(fp, &errnum);
506 			if (errnum == Z_ERRNO)
507 				errnum = errno;
508 			log_warnx("%s: error %d in mread, %s", __progname,
509 			    errnum, errstr);
510 			return (0);
511 		}
512 
513 		if (write_mem(addr, buf, ct))
514 			return (0);
515 
516 		addr += ct;
517 	}
518 
519 	sz = sz - ct;
520 
521 	if (sz == 0)
522 		return (osz);
523 
524 	for (i = 0; i < sz; i += PAGE_SIZE, addr += PAGE_SIZE) {
525 		memset(buf, 0, sizeof(buf));
526 		if (i + PAGE_SIZE > sz)
527 			ct = sz - i;
528 		else
529 			ct = PAGE_SIZE;
530 
531 		if ((size_t)gzread(fp, buf, ct) != ct) {
532 			errstr = gzerror(fp, &errnum);
533 			if (errnum == Z_ERRNO)
534 				errnum = errno;
535 			log_warnx("%s: error %d in mread, %s", __progname,
536 			    errnum, errstr);
537 			return (0);
538 		}
539 
540 		if (write_mem(addr, buf, ct))
541 			return (0);
542 	}
543 
544 	return (osz);
545 }
546 
547 /*
548  * marc4random_buf
549  *
550  * load 'sz' bytes of random data into the guest address space at paddr
551  * 'addr'.
552  *
553  * Parameters:
554  *  addr: guest paddr_t to load random bytes into
555  *  sz: number of random bytes to load
556  *
557  * Return values:
558  *  nothing
559  */
560 static void
561 marc4random_buf(paddr_t addr, int sz)
562 {
563 	int i, ct;
564 	char buf[PAGE_SIZE];
565 
566 	/*
567 	 * break up the 'sz' bytes into PAGE_SIZE chunks for use with
568 	 * write_mem
569 	 */
570 	ct = 0;
571 	if (addr % PAGE_SIZE != 0) {
572 		memset(buf, 0, sizeof(buf));
573 		ct = PAGE_SIZE - (addr % PAGE_SIZE);
574 
575 		arc4random_buf(buf, ct);
576 
577 		if (write_mem(addr, buf, ct))
578 			return;
579 
580 		addr += ct;
581 	}
582 
583 	for (i = 0; i < sz; i+= PAGE_SIZE, addr += PAGE_SIZE) {
584 		memset(buf, 0, sizeof(buf));
585 		if (i + PAGE_SIZE > sz)
586 			ct = sz - i;
587 		else
588 			ct = PAGE_SIZE;
589 
590 		arc4random_buf(buf, ct);
591 
592 		if (write_mem(addr, buf, ct))
593 			return;
594 	}
595 }
596 
597 /*
598  * mbzero
599  *
600  * load 'sz' bytes of zeros into the guest address space at paddr
601  * 'addr'.
602  *
603  * Parameters:
604  *  addr: guest paddr_t to zero
605  *  sz: number of zero bytes to store
606  *
607  * Return values:
608  *  nothing
609  */
610 static void
611 mbzero(paddr_t addr, int sz)
612 {
613 	if (write_mem(addr, NULL, sz))
614 		return;
615 }
616 
617 /*
618  * mbcopy
619  *
620  * copies 'sz' bytes from buffer 'src' to guest paddr 'dst'.
621  *
622  * Parameters:
623  *  src: source buffer to copy from
624  *  dst: destination guest paddr_t to copy to
625  *  sz: number of bytes to copy
626  *
627  * Return values:
628  *  nothing
629  */
630 static void
631 mbcopy(void *src, paddr_t dst, int sz)
632 {
633 	write_mem(dst, src, sz);
634 }
635 
636 /*
637  * elf64_exec
638  *
639  * Load the kernel indicated by 'fp' into the guest physical memory
640  * space, at the addresses defined in the ELF header.
641  *
642  * This function is used for 64 bit kernels.
643  *
644  * Parameters:
645  *  fp: kernel image file to load
646  *  elf: ELF header of the kernel
647  *  marks: array to store the offsets of various kernel structures
648  *      (start, bss, etc)
649  *  flags: flag value to indicate which section(s) to load (usually
650  *      LOAD_ALL)
651  *
652  * Return values:
653  *  0 if successful
654  *  1 if unsuccessful
655  */
656 static int
657 elf64_exec(gzFile fp, Elf64_Ehdr *elf, u_long *marks, int flags)
658 {
659 	Elf64_Shdr *shp;
660 	Elf64_Phdr *phdr;
661 	Elf64_Off off;
662 	int i;
663 	size_t sz;
664 	int havesyms;
665 	paddr_t minp = ~0, maxp = 0, pos = 0;
666 	paddr_t offset = marks[MARK_START], shpp, elfp;
667 
668 	sz = elf->e_phnum * sizeof(Elf64_Phdr);
669 	phdr = malloc(sz);
670 
671 	if (gzseek(fp, (off_t)elf->e_phoff, SEEK_SET) == -1)  {
672 		free(phdr);
673 		return 1;
674 	}
675 
676 	if ((size_t)gzread(fp, phdr, sz) != sz) {
677 		free(phdr);
678 		return 1;
679 	}
680 
681 	for (i = 0; i < elf->e_phnum; i++) {
682 		if (phdr[i].p_type == PT_OPENBSD_RANDOMIZE) {
683 			int m;
684 
685 			/* Fill segment if asked for. */
686 			if (flags & LOAD_RANDOM) {
687 				for (pos = 0; pos < phdr[i].p_filesz;
688 				    pos += m) {
689 					m = phdr[i].p_filesz - pos;
690 					marc4random_buf(phdr[i].p_paddr + pos,
691 					    m);
692 				}
693 			}
694 			if (flags & (LOAD_RANDOM | COUNT_RANDOM)) {
695 				marks[MARK_RANDOM] = LOADADDR(phdr[i].p_paddr);
696 				marks[MARK_ERANDOM] =
697 				    marks[MARK_RANDOM] + phdr[i].p_filesz;
698 			}
699 			continue;
700 		}
701 
702 		if (phdr[i].p_type != PT_LOAD ||
703 		    (phdr[i].p_flags & (PF_W|PF_R|PF_X)) == 0)
704 			continue;
705 
706 #define IS_TEXT(p)	(p.p_flags & PF_X)
707 #define IS_DATA(p)	((p.p_flags & PF_X) == 0)
708 #define IS_BSS(p)	(p.p_filesz < p.p_memsz)
709 		/*
710 		 * XXX: Assume first address is lowest
711 		 */
712 		if ((IS_TEXT(phdr[i]) && (flags & LOAD_TEXT)) ||
713 		    (IS_DATA(phdr[i]) && (flags & LOAD_DATA))) {
714 
715 			/* Read in segment. */
716 			if (gzseek(fp, (off_t)phdr[i].p_offset,
717 			    SEEK_SET) == -1) {
718 				free(phdr);
719 				return 1;
720 			}
721 			if (mread(fp, phdr[i].p_paddr, phdr[i].p_filesz) !=
722 			    phdr[i].p_filesz) {
723 				free(phdr);
724 				return 1;
725 			}
726 		}
727 
728 		if ((IS_TEXT(phdr[i]) && (flags & (LOAD_TEXT | COUNT_TEXT))) ||
729 		    (IS_DATA(phdr[i]) && (flags & (LOAD_DATA | COUNT_TEXT)))) {
730 			pos = phdr[i].p_paddr;
731 			if (minp > pos)
732 				minp = pos;
733 			pos += phdr[i].p_filesz;
734 			if (maxp < pos)
735 				maxp = pos;
736 		}
737 
738 		/* Zero out BSS. */
739 		if (IS_BSS(phdr[i]) && (flags & LOAD_BSS)) {
740 			mbzero((phdr[i].p_paddr + phdr[i].p_filesz),
741 			    phdr[i].p_memsz - phdr[i].p_filesz);
742 		}
743 		if (IS_BSS(phdr[i]) && (flags & (LOAD_BSS|COUNT_BSS))) {
744 			pos += phdr[i].p_memsz - phdr[i].p_filesz;
745 			if (maxp < pos)
746 				maxp = pos;
747 		}
748 	}
749 	free(phdr);
750 
751 	/*
752 	 * Copy the ELF and section headers.
753 	 */
754 	elfp = maxp = roundup(maxp, sizeof(Elf64_Addr));
755 	if (flags & (LOAD_HDR | COUNT_HDR))
756 		maxp += sizeof(Elf64_Ehdr);
757 
758 	if (flags & (LOAD_SYM | COUNT_SYM)) {
759 		if (gzseek(fp, (off_t)elf->e_shoff, SEEK_SET) == -1) {
760 			warn("gzseek section headers");
761 			return 1;
762 		}
763 		sz = elf->e_shnum * sizeof(Elf64_Shdr);
764 		shp = malloc(sz);
765 
766 		if ((size_t)gzread(fp, shp, sz) != sz) {
767 			free(shp);
768 			return 1;
769 		}
770 
771 		shpp = maxp;
772 		maxp += roundup(sz, sizeof(Elf64_Addr));
773 
774 		size_t shstrsz = shp[elf->e_shstrndx].sh_size;
775 		char *shstr = malloc(shstrsz);
776 		if (gzseek(fp, (off_t)shp[elf->e_shstrndx].sh_offset,
777 		    SEEK_SET) == -1) {
778 			free(shstr);
779 			free(shp);
780 			return 1;
781 		}
782 		if ((size_t)gzread(fp, shstr, shstrsz) != shstrsz) {
783 			free(shstr);
784 			free(shp);
785 			return 1;
786 		}
787 
788 		/*
789 		 * Now load the symbol sections themselves. Make sure the
790 		 * sections are aligned. Don't bother with string tables if
791 		 * there are no symbol sections.
792 		 */
793 		off = roundup((sizeof(Elf64_Ehdr) + sz), sizeof(Elf64_Addr));
794 
795 		for (havesyms = i = 0; i < elf->e_shnum; i++)
796 			if (shp[i].sh_type == SHT_SYMTAB)
797 				havesyms = 1;
798 
799 		for (i = 0; i < elf->e_shnum; i++) {
800 			if (shp[i].sh_type == SHT_SYMTAB ||
801 			    shp[i].sh_type == SHT_STRTAB ||
802 			    !strcmp(shstr + shp[i].sh_name, ".debug_line") ||
803 			    !strcmp(shstr + shp[i].sh_name, ELF_CTF)) {
804 				if (havesyms && (flags & LOAD_SYM)) {
805 					if (gzseek(fp, (off_t)shp[i].sh_offset,
806 					    SEEK_SET) == -1) {
807 						free(shstr);
808 						free(shp);
809 						return 1;
810 					}
811 					if (mread(fp, maxp,
812 					    shp[i].sh_size) != shp[i].sh_size) {
813 						free(shstr);
814 						free(shp);
815 						return 1;
816 					}
817 				}
818 				maxp += roundup(shp[i].sh_size,
819 				    sizeof(Elf64_Addr));
820 				shp[i].sh_offset = off;
821 				shp[i].sh_flags |= SHF_ALLOC;
822 				off += roundup(shp[i].sh_size,
823 				    sizeof(Elf64_Addr));
824 			}
825 		}
826 		if (flags & LOAD_SYM) {
827 			mbcopy(shp, shpp, sz);
828 		}
829 		free(shstr);
830 		free(shp);
831 	}
832 
833 	/*
834 	 * Frob the copied ELF header to give information relative
835 	 * to elfp.
836 	 */
837 	if (flags & LOAD_HDR) {
838 		elf->e_phoff = 0;
839 		elf->e_shoff = sizeof(Elf64_Ehdr);
840 		elf->e_phentsize = 0;
841 		elf->e_phnum = 0;
842 		mbcopy(elf, elfp, sizeof(*elf));
843 	}
844 
845 	marks[MARK_START] = LOADADDR(minp);
846 	marks[MARK_ENTRY] = LOADADDR(elf->e_entry);
847 	marks[MARK_NSYM] = 1;	/* XXX: Kernel needs >= 0 */
848 	marks[MARK_SYM] = LOADADDR(elfp);
849 	marks[MARK_END] = LOADADDR(maxp);
850 
851 	return 0;
852 }
853 
854 /*
855  * elf32_exec
856  *
857  * Load the kernel indicated by 'fp' into the guest physical memory
858  * space, at the addresses defined in the ELF header.
859  *
860  * This function is used for 32 bit kernels.
861  *
862  * Parameters:
863  *  fp: kernel image file to load
864  *  elf: ELF header of the kernel
865  *  marks: array to store the offsets of various kernel structures
866  *      (start, bss, etc)
867  *  flags: flag value to indicate which section(s) to load (usually
868  *      LOAD_ALL)
869  *
870  * Return values:
871  *  0 if successful
872  *  1 if unsuccessful
873  */
874 static int
875 elf32_exec(gzFile fp, Elf32_Ehdr *elf, u_long *marks, int flags)
876 {
877 	Elf32_Shdr *shp;
878 	Elf32_Phdr *phdr;
879 	Elf32_Off off;
880 	int i;
881 	size_t sz;
882 	int havesyms;
883 	paddr_t minp = ~0, maxp = 0, pos = 0;
884 	paddr_t offset = marks[MARK_START], shpp, elfp;
885 
886 	sz = elf->e_phnum * sizeof(Elf32_Phdr);
887 	phdr = malloc(sz);
888 
889 	if (gzseek(fp, (off_t)elf->e_phoff, SEEK_SET) == -1)  {
890 		free(phdr);
891 		return 1;
892 	}
893 
894 	if ((size_t)gzread(fp, phdr, sz) != sz) {
895 		free(phdr);
896 		return 1;
897 	}
898 
899 	for (i = 0; i < elf->e_phnum; i++) {
900 		if (phdr[i].p_type == PT_OPENBSD_RANDOMIZE) {
901 			int m;
902 
903 			/* Fill segment if asked for. */
904 			if (flags & LOAD_RANDOM) {
905 				for (pos = 0; pos < phdr[i].p_filesz;
906 				    pos += m) {
907 					m = phdr[i].p_filesz - pos;
908 					marc4random_buf(phdr[i].p_paddr + pos,
909 					    m);
910 				}
911 			}
912 			if (flags & (LOAD_RANDOM | COUNT_RANDOM)) {
913 				marks[MARK_RANDOM] = LOADADDR(phdr[i].p_paddr);
914 				marks[MARK_ERANDOM] =
915 				    marks[MARK_RANDOM] + phdr[i].p_filesz;
916 			}
917 			continue;
918 		}
919 
920 		if (phdr[i].p_type != PT_LOAD ||
921 		    (phdr[i].p_flags & (PF_W|PF_R|PF_X)) == 0)
922 			continue;
923 
924 #define IS_TEXT(p)	(p.p_flags & PF_X)
925 #define IS_DATA(p)	((p.p_flags & PF_X) == 0)
926 #define IS_BSS(p)	(p.p_filesz < p.p_memsz)
927 		/*
928 		 * XXX: Assume first address is lowest
929 		 */
930 		if ((IS_TEXT(phdr[i]) && (flags & LOAD_TEXT)) ||
931 		    (IS_DATA(phdr[i]) && (flags & LOAD_DATA))) {
932 
933 			/* Read in segment. */
934 			if (gzseek(fp, (off_t)phdr[i].p_offset,
935 			    SEEK_SET) == -1) {
936 				free(phdr);
937 				return 1;
938 			}
939 			if (mread(fp, phdr[i].p_paddr, phdr[i].p_filesz) !=
940 			    phdr[i].p_filesz) {
941 				free(phdr);
942 				return 1;
943 			}
944 		}
945 
946 		if ((IS_TEXT(phdr[i]) && (flags & (LOAD_TEXT | COUNT_TEXT))) ||
947 		    (IS_DATA(phdr[i]) && (flags & (LOAD_DATA | COUNT_TEXT)))) {
948 			pos = phdr[i].p_paddr;
949 			if (minp > pos)
950 				minp = pos;
951 			pos += phdr[i].p_filesz;
952 			if (maxp < pos)
953 				maxp = pos;
954 		}
955 
956 		/* Zero out BSS. */
957 		if (IS_BSS(phdr[i]) && (flags & LOAD_BSS)) {
958 			mbzero((phdr[i].p_paddr + phdr[i].p_filesz),
959 			    phdr[i].p_memsz - phdr[i].p_filesz);
960 		}
961 		if (IS_BSS(phdr[i]) && (flags & (LOAD_BSS|COUNT_BSS))) {
962 			pos += phdr[i].p_memsz - phdr[i].p_filesz;
963 			if (maxp < pos)
964 				maxp = pos;
965 		}
966 	}
967 	free(phdr);
968 
969 	/*
970 	 * Copy the ELF and section headers.
971 	 */
972 	elfp = maxp = roundup(maxp, sizeof(Elf32_Addr));
973 	if (flags & (LOAD_HDR | COUNT_HDR))
974 		maxp += sizeof(Elf32_Ehdr);
975 
976 	if (flags & (LOAD_SYM | COUNT_SYM)) {
977 		if (gzseek(fp, (off_t)elf->e_shoff, SEEK_SET) == -1) {
978 			warn("lseek section headers");
979 			return 1;
980 		}
981 		sz = elf->e_shnum * sizeof(Elf32_Shdr);
982 		shp = malloc(sz);
983 
984 		if ((size_t)gzread(fp, shp, sz) != sz) {
985 			free(shp);
986 			return 1;
987 		}
988 
989 		shpp = maxp;
990 		maxp += roundup(sz, sizeof(Elf32_Addr));
991 
992 		size_t shstrsz = shp[elf->e_shstrndx].sh_size;
993 		char *shstr = malloc(shstrsz);
994 		if (gzseek(fp, (off_t)shp[elf->e_shstrndx].sh_offset,
995 		    SEEK_SET) == -1) {
996 			free(shstr);
997 			free(shp);
998 			return 1;
999 		}
1000 		if ((size_t)gzread(fp, shstr, shstrsz) != shstrsz) {
1001 			free(shstr);
1002 			free(shp);
1003 			return 1;
1004 		}
1005 
1006 		/*
1007 		 * Now load the symbol sections themselves. Make sure the
1008 		 * sections are aligned. Don't bother with string tables if
1009 		 * there are no symbol sections.
1010 		 */
1011 		off = roundup((sizeof(Elf32_Ehdr) + sz), sizeof(Elf32_Addr));
1012 
1013 		for (havesyms = i = 0; i < elf->e_shnum; i++)
1014 			if (shp[i].sh_type == SHT_SYMTAB)
1015 				havesyms = 1;
1016 
1017 		for (i = 0; i < elf->e_shnum; i++) {
1018 			if (shp[i].sh_type == SHT_SYMTAB ||
1019 			    shp[i].sh_type == SHT_STRTAB ||
1020 			    !strcmp(shstr + shp[i].sh_name, ".debug_line")) {
1021 				if (havesyms && (flags & LOAD_SYM)) {
1022 					if (gzseek(fp, (off_t)shp[i].sh_offset,
1023 					    SEEK_SET) == -1) {
1024 						free(shstr);
1025 						free(shp);
1026 						return 1;
1027 					}
1028 					if (mread(fp, maxp,
1029 					    shp[i].sh_size) != shp[i].sh_size) {
1030 						free(shstr);
1031 						free(shp);
1032 						return 1;
1033 					}
1034 				}
1035 				maxp += roundup(shp[i].sh_size,
1036 				    sizeof(Elf32_Addr));
1037 				shp[i].sh_offset = off;
1038 				shp[i].sh_flags |= SHF_ALLOC;
1039 				off += roundup(shp[i].sh_size,
1040 				    sizeof(Elf32_Addr));
1041 			}
1042 		}
1043 		if (flags & LOAD_SYM) {
1044 			mbcopy(shp, shpp, sz);
1045 		}
1046 		free(shstr);
1047 		free(shp);
1048 	}
1049 
1050 	/*
1051 	 * Frob the copied ELF header to give information relative
1052 	 * to elfp.
1053 	 */
1054 	if (flags & LOAD_HDR) {
1055 		elf->e_phoff = 0;
1056 		elf->e_shoff = sizeof(Elf32_Ehdr);
1057 		elf->e_phentsize = 0;
1058 		elf->e_phnum = 0;
1059 		mbcopy(elf, elfp, sizeof(*elf));
1060 	}
1061 
1062 	marks[MARK_START] = LOADADDR(minp);
1063 	marks[MARK_ENTRY] = LOADADDR(elf->e_entry);
1064 	marks[MARK_NSYM] = 1;	/* XXX: Kernel needs >= 0 */
1065 	marks[MARK_SYM] = LOADADDR(elfp);
1066 	marks[MARK_END] = LOADADDR(maxp);
1067 
1068 	return 0;
1069 }
1070