xref: /openbsd-src/usr.sbin/vmd/loadfile_elf.c (revision cf08ffabc25f963e835913e6fe6f473b987576f5)
1 /* $NetBSD: loadfile.c,v 1.10 2000/12/03 02:53:04 tsutsui Exp $ */
2 /* $OpenBSD: loadfile_elf.c,v 1.44 2022/12/26 23:50:20 dv Exp $ */
3 
4 /*-
5  * Copyright (c) 1997 The NetBSD Foundation, Inc.
6  * All rights reserved.
7  *
8  * This code is derived from software contributed to The NetBSD Foundation
9  * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
10  * NASA Ames Research Center and by Christos Zoulas.
11  *
12  * Redistribution and use in source and binary forms, with or without
13  * modification, are permitted provided that the following conditions
14  * are met:
15  * 1. Redistributions of source code must retain the above copyright
16  *    notice, this list of conditions and the following disclaimer.
17  * 2. Redistributions in binary form must reproduce the above copyright
18  *    notice, this list of conditions and the following disclaimer in the
19  *    documentation and/or other materials provided with the distribution.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
23  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
24  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
25  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31  * POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 /*
35  * Copyright (c) 1992, 1993
36  *	The Regents of the University of California.  All rights reserved.
37  *
38  * This code is derived from software contributed to Berkeley by
39  * Ralph Campbell.
40  *
41  * Redistribution and use in source and binary forms, with or without
42  * modification, are permitted provided that the following conditions
43  * are met:
44  * 1. Redistributions of source code must retain the above copyright
45  *    notice, this list of conditions and the following disclaimer.
46  * 2. Redistributions in binary form must reproduce the above copyright
47  *    notice, this list of conditions and the following disclaimer in the
48  *    documentation and/or other materials provided with the distribution.
49  * 3. Neither the name of the University nor the names of its contributors
50  *    may be used to endorse or promote products derived from this software
51  *    without specific prior written permission.
52  *
53  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63  * SUCH DAMAGE.
64  *
65  *	@(#)boot.c	8.1 (Berkeley) 6/10/93
66  */
67 
68 /*
69  * Copyright (c) 2015 Mike Larkin <mlarkin@openbsd.org>
70  *
71  * Permission to use, copy, modify, and distribute this software for any
72  * purpose with or without fee is hereby granted, provided that the above
73  * copyright notice and this permission notice appear in all copies.
74  *
75  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
76  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
77  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
78  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
79  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
80  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
81  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
82  */
83 
84 #include <sys/param.h>	/* PAGE_SIZE PAGE_MASK roundup */
85 #include <sys/ioctl.h>
86 #include <sys/reboot.h>
87 #include <sys/exec.h>
88 
89 #include <elf.h>
90 #include <stdio.h>
91 #include <string.h>
92 #include <errno.h>
93 #include <stdlib.h>
94 #include <unistd.h>
95 #include <fcntl.h>
96 #include <err.h>
97 #include <errno.h>
98 #include <stddef.h>
99 
100 #include <machine/vmmvar.h>
101 #include <machine/biosvar.h>
102 #include <machine/segments.h>
103 #include <machine/specialreg.h>
104 #include <machine/pte.h>
105 
106 #include "loadfile.h"
107 #include "vmd.h"
108 
109 #define LOADADDR(a)            ((((u_long)(a)) + offset)&0xfffffff)
110 
111 union {
112 	Elf32_Ehdr elf32;
113 	Elf64_Ehdr elf64;
114 } hdr;
115 
116 static void setsegment(struct mem_segment_descriptor *, uint32_t,
117     size_t, int, int, int, int);
118 static int elf32_exec(gzFile, Elf32_Ehdr *, u_long *, int);
119 static int elf64_exec(gzFile, Elf64_Ehdr *, u_long *, int);
120 static size_t create_bios_memmap(struct vm_create_params *, bios_memmap_t *);
121 static uint32_t push_bootargs(bios_memmap_t *, size_t, bios_bootmac_t *);
122 static size_t push_stack(uint32_t, uint32_t);
123 static void push_gdt(void);
124 static void push_pt_32(void);
125 static void push_pt_64(void);
126 static void marc4random_buf(paddr_t, int);
127 static void mbzero(paddr_t, int);
128 static void mbcopy(void *, paddr_t, int);
129 
130 extern char *__progname;
131 extern int vm_id;
132 
133 /*
134  * setsegment
135  *
136  * Initializes a segment selector entry with the provided descriptor.
137  * For the purposes of the bootloader mimiced by vmd(8), we only need
138  * memory-type segment descriptor support.
139  *
140  * This function was copied from machdep.c
141  *
142  * Parameters:
143  *  sd: Address of the entry to initialize
144  *  base: base of the segment
145  *  limit: limit of the segment
146  *  type: type of the segment
147  *  dpl: privilege level of the egment
148  *  def32: default 16/32 bit size of the segment
149  *  gran: granularity of the segment (byte/page)
150  */
151 static void
152 setsegment(struct mem_segment_descriptor *sd, uint32_t base, size_t limit,
153     int type, int dpl, int def32, int gran)
154 {
155 	sd->sd_lolimit = (int)limit;
156 	sd->sd_lobase = (int)base;
157 	sd->sd_type = type;
158 	sd->sd_dpl = dpl;
159 	sd->sd_p = 1;
160 	sd->sd_hilimit = (int)limit >> 16;
161 	sd->sd_avl = 0;
162 	sd->sd_long = 0;
163 	sd->sd_def32 = def32;
164 	sd->sd_gran = gran;
165 	sd->sd_hibase = (int)base >> 24;
166 }
167 
168 /*
169  * push_gdt
170  *
171  * Allocates and populates a page in the guest phys memory space to hold
172  * the boot-time GDT. Since vmd(8) is acting as the bootloader, we need to
173  * create the same GDT that a real bootloader would have created.
174  * This is loaded into the guest phys RAM space at address GDT_PAGE.
175  */
176 static void
177 push_gdt(void)
178 {
179 	uint8_t gdtpage[PAGE_SIZE];
180 	struct mem_segment_descriptor *sd;
181 
182 	memset(&gdtpage, 0, sizeof(gdtpage));
183 
184 	sd = (struct mem_segment_descriptor *)&gdtpage;
185 
186 	/*
187 	 * Create three segment descriptors:
188 	 *
189 	 * GDT[0] : null desriptor. "Created" via memset above.
190 	 * GDT[1] (selector @ 0x8): Executable segment, for CS
191 	 * GDT[2] (selector @ 0x10): RW Data segment, for DS/ES/SS
192 	 */
193 	setsegment(&sd[1], 0, 0xffffffff, SDT_MEMERA, SEL_KPL, 1, 1);
194 	setsegment(&sd[2], 0, 0xffffffff, SDT_MEMRWA, SEL_KPL, 1, 1);
195 
196 	write_mem(GDT_PAGE, gdtpage, PAGE_SIZE);
197 }
198 
199 /*
200  * push_pt_32
201  *
202  * Create an identity-mapped page directory hierarchy mapping the first
203  * 4GB of physical memory. This is used during bootstrapping i386 VMs on
204  * CPUs without unrestricted guest capability.
205  */
206 static void
207 push_pt_32(void)
208 {
209 	uint32_t ptes[1024], i;
210 
211 	memset(ptes, 0, sizeof(ptes));
212 	for (i = 0 ; i < 1024; i++) {
213 		ptes[i] = PG_V | PG_RW | PG_u | PG_PS | ((4096 * 1024) * i);
214 	}
215 	write_mem(PML3_PAGE, ptes, PAGE_SIZE);
216 }
217 
218 /*
219  * push_pt_64
220  *
221  * Create an identity-mapped page directory hierarchy mapping the first
222  * 1GB of physical memory. This is used during bootstrapping 64 bit VMs on
223  * CPUs without unrestricted guest capability.
224  */
225 static void
226 push_pt_64(void)
227 {
228 	uint64_t ptes[512], i;
229 
230 	/* PDPDE0 - first 1GB */
231 	memset(ptes, 0, sizeof(ptes));
232 	ptes[0] = PG_V | PML3_PAGE;
233 	write_mem(PML4_PAGE, ptes, PAGE_SIZE);
234 
235 	/* PDE0 - first 1GB */
236 	memset(ptes, 0, sizeof(ptes));
237 	ptes[0] = PG_V | PG_RW | PG_u | PML2_PAGE;
238 	write_mem(PML3_PAGE, ptes, PAGE_SIZE);
239 
240 	/* First 1GB (in 2MB pages) */
241 	memset(ptes, 0, sizeof(ptes));
242 	for (i = 0 ; i < 512; i++) {
243 		ptes[i] = PG_V | PG_RW | PG_u | PG_PS | ((2048 * 1024) * i);
244 	}
245 	write_mem(PML2_PAGE, ptes, PAGE_SIZE);
246 }
247 
248 /*
249  * loadfile_elf
250  *
251  * Loads an ELF kernel to its defined load address in the guest VM.
252  * The kernel is loaded to its defined start point as set in the ELF header.
253  *
254  * Parameters:
255  *  fp: file of a kernel file to load
256  *  vcp: the VM create parameters, holding the exact memory map
257  *  (out) vrs: register state to set on init for this kernel
258  *  bootdev: the optional non-default boot device
259  *  howto: optional boot flags for the kernel
260  *
261  * Return values:
262  *  0 if successful
263  *  various error codes returned from gzread(3) or loadelf functions
264  */
265 int
266 loadfile_elf(gzFile fp, struct vm_create_params *vcp,
267     struct vcpu_reg_state *vrs, unsigned int bootdevice)
268 {
269 	int r, is_i386 = 0;
270 	uint32_t bootargsz;
271 	size_t n, stacksize;
272 	u_long marks[MARK_MAX];
273 	bios_memmap_t memmap[VMM_MAX_MEM_RANGES + 1];
274 	bios_bootmac_t bm, *bootmac = NULL;
275 
276 	if ((r = gzread(fp, &hdr, sizeof(hdr))) != sizeof(hdr))
277 		return 1;
278 
279 	memset(&marks, 0, sizeof(marks));
280 	if (memcmp(hdr.elf32.e_ident, ELFMAG, SELFMAG) == 0 &&
281 	    hdr.elf32.e_ident[EI_CLASS] == ELFCLASS32) {
282 		r = elf32_exec(fp, &hdr.elf32, marks, LOAD_ALL);
283 		is_i386 = 1;
284 	} else if (memcmp(hdr.elf64.e_ident, ELFMAG, SELFMAG) == 0 &&
285 	    hdr.elf64.e_ident[EI_CLASS] == ELFCLASS64) {
286 		r = elf64_exec(fp, &hdr.elf64, marks, LOAD_ALL);
287 	} else
288 		errno = ENOEXEC;
289 
290 	if (r)
291 		return (r);
292 
293 	push_gdt();
294 
295 	if (is_i386) {
296 		push_pt_32();
297 		/* Reconfigure the default flat-64 register set for 32 bit */
298 		vrs->vrs_crs[VCPU_REGS_CR3] = PML3_PAGE;
299 		vrs->vrs_crs[VCPU_REGS_CR4] = CR4_PSE;
300 		vrs->vrs_msrs[VCPU_REGS_EFER] = 0ULL;
301 	}
302 	else
303 		push_pt_64();
304 
305 	if (bootdevice == VMBOOTDEV_NET) {
306 		bootmac = &bm;
307 		memcpy(bootmac, vcp->vcp_macs[0], ETHER_ADDR_LEN);
308 	}
309 	n = create_bios_memmap(vcp, memmap);
310 	bootargsz = push_bootargs(memmap, n, bootmac);
311 	stacksize = push_stack(bootargsz, marks[MARK_END]);
312 
313 	vrs->vrs_gprs[VCPU_REGS_RIP] = (uint64_t)marks[MARK_ENTRY];
314 	vrs->vrs_gprs[VCPU_REGS_RSP] = (uint64_t)(STACK_PAGE + PAGE_SIZE) - stacksize;
315 	vrs->vrs_gdtr.vsi_base = GDT_PAGE;
316 
317 	log_debug("%s: loaded ELF kernel", __func__);
318 
319 	return (0);
320 }
321 
322 /*
323  * create_bios_memmap
324  *
325  * Construct a memory map as returned by the BIOS INT 0x15, e820 routine.
326  *
327  * Parameters:
328  *  vcp: the VM create parameters, containing the memory map passed to vmm(4)
329  *   memmap (out): the BIOS memory map
330  *
331  * Return values:
332  * Number of bios_memmap_t entries, including the terminating nul-entry.
333  */
334 static size_t
335 create_bios_memmap(struct vm_create_params *vcp, bios_memmap_t *memmap)
336 {
337 	size_t i, n = 0;
338 	struct vm_mem_range *vmr;
339 
340 	for (i = 0; i < vcp->vcp_nmemranges; i++, n++) {
341 		vmr = &vcp->vcp_memranges[i];
342 		memmap[n].addr = vmr->vmr_gpa;
343 		memmap[n].size = vmr->vmr_size;
344 		if (vmr->vmr_type == VM_MEM_RAM)
345 			memmap[n].type = BIOS_MAP_FREE;
346 		else
347 			memmap[n].type = BIOS_MAP_RES;
348 	}
349 
350 	/* Null mem map entry to denote the end of the ranges */
351 	memmap[n].addr = 0x0;
352 	memmap[n].size = 0x0;
353 	memmap[n].type = BIOS_MAP_END;
354 	n++;
355 
356 	return (n);
357 }
358 
359 /*
360  * push_bootargs
361  *
362  * Creates the boot arguments page in the guest address space.
363  * Since vmd(8) is acting as the bootloader, we need to create the same boot
364  * arguments page that a real bootloader would have created. This is loaded
365  * into the guest phys RAM space at address BOOTARGS_PAGE.
366  *
367  * Parameters:
368  *  memmap: the BIOS memory map
369  *  n: number of entries in memmap
370  *  bootmac: optional PXE boot MAC address
371  *
372  * Return values:
373  *  The size of the bootargs in bytes
374  */
375 static uint32_t
376 push_bootargs(bios_memmap_t *memmap, size_t n, bios_bootmac_t *bootmac)
377 {
378 	uint32_t memmap_sz, consdev_sz, bootmac_sz, i;
379 	bios_consdev_t consdev;
380 	uint32_t ba[1024];
381 
382 	memmap_sz = 3 * sizeof(uint32_t) + n * sizeof(bios_memmap_t);
383 	ba[0] = BOOTARG_MEMMAP;
384 	ba[1] = memmap_sz;
385 	ba[2] = memmap_sz;
386 	memcpy(&ba[3], memmap, n * sizeof(bios_memmap_t));
387 	i = memmap_sz / sizeof(uint32_t);
388 
389 	/* Serial console device, COM1 @ 0x3f8 */
390 	memset(&consdev, 0, sizeof(consdev));
391 	consdev.consdev = makedev(8, 0);
392 	consdev.conspeed = 115200;
393 	consdev.consaddr = 0x3f8;
394 
395 	consdev_sz = 3 * sizeof(uint32_t) + sizeof(bios_consdev_t);
396 	ba[i] = BOOTARG_CONSDEV;
397 	ba[i + 1] = consdev_sz;
398 	ba[i + 2] = consdev_sz;
399 	memcpy(&ba[i + 3], &consdev, sizeof(bios_consdev_t));
400 	i += consdev_sz / sizeof(uint32_t);
401 
402 	if (bootmac) {
403 		bootmac_sz = 3 * sizeof(uint32_t) +
404 		    (sizeof(bios_bootmac_t) + 3) & ~3;
405 		ba[i] = BOOTARG_BOOTMAC;
406 		ba[i + 1] = bootmac_sz;
407 		ba[i + 2] = bootmac_sz;
408 		memcpy(&ba[i + 3], bootmac, sizeof(bios_bootmac_t));
409 		i += bootmac_sz / sizeof(uint32_t);
410 	}
411 
412 	ba[i++] = 0xFFFFFFFF; /* BOOTARG_END */
413 
414 	write_mem(BOOTARGS_PAGE, ba, PAGE_SIZE);
415 
416 	return (i * sizeof(uint32_t));
417 }
418 
419 /*
420  * push_stack
421  *
422  * Creates the boot stack page in the guest address space. When using a real
423  * bootloader, the stack will be prepared using the following format before
424  * transitioning to kernel start, so vmd(8) needs to mimic the same stack
425  * layout. The stack content is pushed to the guest phys RAM at address
426  * STACK_PAGE. The bootloader operates in 32 bit mode; each stack entry is
427  * 4 bytes.
428  *
429  * Stack Layout: (TOS == Top Of Stack)
430  *  TOS		location of boot arguments page
431  *  TOS - 0x4	size of the content in the boot arguments page
432  *  TOS - 0x8	size of low memory (biosbasemem: kernel uses BIOS map only if 0)
433  *  TOS - 0xc	size of high memory (biosextmem, not used by kernel at all)
434  *  TOS - 0x10	kernel 'end' symbol value
435  *  TOS - 0x14	version of bootarg API
436  *
437  * Parameters:
438  *  bootargsz: size of boot arguments
439  *  end: kernel 'end' symbol value
440  *  bootdev: the optional non-default boot device
441  *  howto: optional boot flags for the kernel
442  *
443  * Return values:
444  *  size of the stack
445  */
446 static size_t
447 push_stack(uint32_t bootargsz, uint32_t end)
448 {
449 	uint32_t stack[1024];
450 	uint16_t loc;
451 
452 	memset(&stack, 0, sizeof(stack));
453 	loc = 1024;
454 
455 	stack[--loc] = BOOTARGS_PAGE;
456 	stack[--loc] = bootargsz;
457 	stack[--loc] = 0; /* biosbasemem */
458 	stack[--loc] = 0; /* biosextmem */
459 	stack[--loc] = end;
460 	stack[--loc] = 0x0e;
461 	stack[--loc] = MAKEBOOTDEV(0x4, 0, 0, 0, 0); /* bootdev: sd0a */
462 	stack[--loc] = 0;
463 
464 	write_mem(STACK_PAGE, &stack, PAGE_SIZE);
465 
466 	return (1024 - (loc - 1)) * sizeof(uint32_t);
467 }
468 
469 /*
470  * mread
471  *
472  * Reads 'sz' bytes from the file whose descriptor is provided in 'fd'
473  * into the guest address space at paddr 'addr'.
474  *
475  * Parameters:
476  *  fp: kernel image file to read from.
477  *  addr: guest paddr_t to load to
478  *  sz: number of bytes to load
479  *
480  * Return values:
481  *  returns 'sz' if successful, or 0 otherwise.
482  */
483 size_t
484 mread(gzFile fp, paddr_t addr, size_t sz)
485 {
486 	const char *errstr = NULL;
487 	int errnum = 0;
488 	size_t ct;
489 	size_t i, osz;
490 	char buf[PAGE_SIZE];
491 
492 	/*
493 	 * break up the 'sz' bytes into PAGE_SIZE chunks for use with
494 	 * write_mem
495 	 */
496 	ct = 0;
497 	osz = sz;
498 	if ((addr & PAGE_MASK) != 0) {
499 		memset(buf, 0, sizeof(buf));
500 		if (sz > PAGE_SIZE)
501 			ct = PAGE_SIZE - (addr & PAGE_MASK);
502 		else
503 			ct = sz;
504 
505 		if ((size_t)gzread(fp, buf, ct) != ct) {
506 			errstr = gzerror(fp, &errnum);
507 			if (errnum == Z_ERRNO)
508 				errnum = errno;
509 			log_warnx("%s: error %d in mread, %s", __progname,
510 			    errnum, errstr);
511 			return (0);
512 		}
513 
514 		if (write_mem(addr, buf, ct))
515 			return (0);
516 
517 		addr += ct;
518 	}
519 
520 	sz = sz - ct;
521 
522 	if (sz == 0)
523 		return (osz);
524 
525 	for (i = 0; i < sz; i += PAGE_SIZE, addr += PAGE_SIZE) {
526 		memset(buf, 0, sizeof(buf));
527 		if (i + PAGE_SIZE > sz)
528 			ct = sz - i;
529 		else
530 			ct = PAGE_SIZE;
531 
532 		if ((size_t)gzread(fp, buf, ct) != ct) {
533 			errstr = gzerror(fp, &errnum);
534 			if (errnum == Z_ERRNO)
535 				errnum = errno;
536 			log_warnx("%s: error %d in mread, %s", __progname,
537 			    errnum, errstr);
538 			return (0);
539 		}
540 
541 		if (write_mem(addr, buf, ct))
542 			return (0);
543 	}
544 
545 	return (osz);
546 }
547 
548 /*
549  * marc4random_buf
550  *
551  * load 'sz' bytes of random data into the guest address space at paddr
552  * 'addr'.
553  *
554  * Parameters:
555  *  addr: guest paddr_t to load random bytes into
556  *  sz: number of random bytes to load
557  *
558  * Return values:
559  *  nothing
560  */
561 static void
562 marc4random_buf(paddr_t addr, int sz)
563 {
564 	int i, ct;
565 	char buf[PAGE_SIZE];
566 
567 	/*
568 	 * break up the 'sz' bytes into PAGE_SIZE chunks for use with
569 	 * write_mem
570 	 */
571 	ct = 0;
572 	if (addr % PAGE_SIZE != 0) {
573 		memset(buf, 0, sizeof(buf));
574 		ct = PAGE_SIZE - (addr % PAGE_SIZE);
575 
576 		arc4random_buf(buf, ct);
577 
578 		if (write_mem(addr, buf, ct))
579 			return;
580 
581 		addr += ct;
582 	}
583 
584 	for (i = 0; i < sz; i+= PAGE_SIZE, addr += PAGE_SIZE) {
585 		memset(buf, 0, sizeof(buf));
586 		if (i + PAGE_SIZE > sz)
587 			ct = sz - i;
588 		else
589 			ct = PAGE_SIZE;
590 
591 		arc4random_buf(buf, ct);
592 
593 		if (write_mem(addr, buf, ct))
594 			return;
595 	}
596 }
597 
598 /*
599  * mbzero
600  *
601  * load 'sz' bytes of zeros into the guest address space at paddr
602  * 'addr'.
603  *
604  * Parameters:
605  *  addr: guest paddr_t to zero
606  *  sz: number of zero bytes to store
607  *
608  * Return values:
609  *  nothing
610  */
611 static void
612 mbzero(paddr_t addr, int sz)
613 {
614 	if (write_mem(addr, NULL, sz))
615 		return;
616 }
617 
618 /*
619  * mbcopy
620  *
621  * copies 'sz' bytes from buffer 'src' to guest paddr 'dst'.
622  *
623  * Parameters:
624  *  src: source buffer to copy from
625  *  dst: destination guest paddr_t to copy to
626  *  sz: number of bytes to copy
627  *
628  * Return values:
629  *  nothing
630  */
631 static void
632 mbcopy(void *src, paddr_t dst, int sz)
633 {
634 	write_mem(dst, src, sz);
635 }
636 
637 /*
638  * elf64_exec
639  *
640  * Load the kernel indicated by 'fp' into the guest physical memory
641  * space, at the addresses defined in the ELF header.
642  *
643  * This function is used for 64 bit kernels.
644  *
645  * Parameters:
646  *  fp: kernel image file to load
647  *  elf: ELF header of the kernel
648  *  marks: array to store the offsets of various kernel structures
649  *      (start, bss, etc)
650  *  flags: flag value to indicate which section(s) to load (usually
651  *      LOAD_ALL)
652  *
653  * Return values:
654  *  0 if successful
655  *  1 if unsuccessful
656  */
657 static int
658 elf64_exec(gzFile fp, Elf64_Ehdr *elf, u_long *marks, int flags)
659 {
660 	Elf64_Shdr *shp;
661 	Elf64_Phdr *phdr;
662 	Elf64_Off off;
663 	int i;
664 	size_t sz;
665 	int havesyms;
666 	paddr_t minp = ~0, maxp = 0, pos = 0;
667 	paddr_t offset = marks[MARK_START], shpp, elfp;
668 
669 	sz = elf->e_phnum * sizeof(Elf64_Phdr);
670 	phdr = malloc(sz);
671 
672 	if (gzseek(fp, (off_t)elf->e_phoff, SEEK_SET) == -1)  {
673 		free(phdr);
674 		return 1;
675 	}
676 
677 	if ((size_t)gzread(fp, phdr, sz) != sz) {
678 		free(phdr);
679 		return 1;
680 	}
681 
682 	for (i = 0; i < elf->e_phnum; i++) {
683 		if (phdr[i].p_type == PT_OPENBSD_RANDOMIZE) {
684 			int m;
685 
686 			/* Fill segment if asked for. */
687 			if (flags & LOAD_RANDOM) {
688 				for (pos = 0; pos < phdr[i].p_filesz;
689 				    pos += m) {
690 					m = phdr[i].p_filesz - pos;
691 					marc4random_buf(phdr[i].p_paddr + pos,
692 					    m);
693 				}
694 			}
695 			if (flags & (LOAD_RANDOM | COUNT_RANDOM)) {
696 				marks[MARK_RANDOM] = LOADADDR(phdr[i].p_paddr);
697 				marks[MARK_ERANDOM] =
698 				    marks[MARK_RANDOM] + phdr[i].p_filesz;
699 			}
700 			continue;
701 		}
702 
703 		if (phdr[i].p_type != PT_LOAD ||
704 		    (phdr[i].p_flags & (PF_W|PF_R|PF_X)) == 0)
705 			continue;
706 
707 #define IS_TEXT(p)	(p.p_flags & PF_X)
708 #define IS_DATA(p)	((p.p_flags & PF_X) == 0)
709 #define IS_BSS(p)	(p.p_filesz < p.p_memsz)
710 		/*
711 		 * XXX: Assume first address is lowest
712 		 */
713 		if ((IS_TEXT(phdr[i]) && (flags & LOAD_TEXT)) ||
714 		    (IS_DATA(phdr[i]) && (flags & LOAD_DATA))) {
715 
716 			/* Read in segment. */
717 			if (gzseek(fp, (off_t)phdr[i].p_offset,
718 			    SEEK_SET) == -1) {
719 				free(phdr);
720 				return 1;
721 			}
722 			if (mread(fp, phdr[i].p_paddr, phdr[i].p_filesz) !=
723 			    phdr[i].p_filesz) {
724 				free(phdr);
725 				return 1;
726 			}
727 		}
728 
729 		if ((IS_TEXT(phdr[i]) && (flags & (LOAD_TEXT | COUNT_TEXT))) ||
730 		    (IS_DATA(phdr[i]) && (flags & (LOAD_DATA | COUNT_TEXT)))) {
731 			pos = phdr[i].p_paddr;
732 			if (minp > pos)
733 				minp = pos;
734 			pos += phdr[i].p_filesz;
735 			if (maxp < pos)
736 				maxp = pos;
737 		}
738 
739 		/* Zero out BSS. */
740 		if (IS_BSS(phdr[i]) && (flags & LOAD_BSS)) {
741 			mbzero((phdr[i].p_paddr + phdr[i].p_filesz),
742 			    phdr[i].p_memsz - phdr[i].p_filesz);
743 		}
744 		if (IS_BSS(phdr[i]) && (flags & (LOAD_BSS|COUNT_BSS))) {
745 			pos += phdr[i].p_memsz - phdr[i].p_filesz;
746 			if (maxp < pos)
747 				maxp = pos;
748 		}
749 	}
750 	free(phdr);
751 
752 	/*
753 	 * Copy the ELF and section headers.
754 	 */
755 	elfp = maxp = roundup(maxp, sizeof(Elf64_Addr));
756 	if (flags & (LOAD_HDR | COUNT_HDR))
757 		maxp += sizeof(Elf64_Ehdr);
758 
759 	if (flags & (LOAD_SYM | COUNT_SYM)) {
760 		if (gzseek(fp, (off_t)elf->e_shoff, SEEK_SET) == -1) {
761 			warn("gzseek section headers");
762 			return 1;
763 		}
764 		sz = elf->e_shnum * sizeof(Elf64_Shdr);
765 		shp = malloc(sz);
766 
767 		if ((size_t)gzread(fp, shp, sz) != sz) {
768 			free(shp);
769 			return 1;
770 		}
771 
772 		shpp = maxp;
773 		maxp += roundup(sz, sizeof(Elf64_Addr));
774 
775 		size_t shstrsz = shp[elf->e_shstrndx].sh_size;
776 		char *shstr = malloc(shstrsz);
777 		if (gzseek(fp, (off_t)shp[elf->e_shstrndx].sh_offset,
778 		    SEEK_SET) == -1) {
779 			free(shstr);
780 			free(shp);
781 			return 1;
782 		}
783 		if ((size_t)gzread(fp, shstr, shstrsz) != shstrsz) {
784 			free(shstr);
785 			free(shp);
786 			return 1;
787 		}
788 
789 		/*
790 		 * Now load the symbol sections themselves. Make sure the
791 		 * sections are aligned. Don't bother with string tables if
792 		 * there are no symbol sections.
793 		 */
794 		off = roundup((sizeof(Elf64_Ehdr) + sz), sizeof(Elf64_Addr));
795 
796 		for (havesyms = i = 0; i < elf->e_shnum; i++)
797 			if (shp[i].sh_type == SHT_SYMTAB)
798 				havesyms = 1;
799 
800 		for (i = 0; i < elf->e_shnum; i++) {
801 			if (shp[i].sh_type == SHT_SYMTAB ||
802 			    shp[i].sh_type == SHT_STRTAB ||
803 			    !strcmp(shstr + shp[i].sh_name, ".debug_line") ||
804 			    !strcmp(shstr + shp[i].sh_name, ELF_CTF)) {
805 				if (havesyms && (flags & LOAD_SYM)) {
806 					if (gzseek(fp, (off_t)shp[i].sh_offset,
807 					    SEEK_SET) == -1) {
808 						free(shstr);
809 						free(shp);
810 						return 1;
811 					}
812 					if (mread(fp, maxp,
813 					    shp[i].sh_size) != shp[i].sh_size) {
814 						free(shstr);
815 						free(shp);
816 						return 1;
817 					}
818 				}
819 				maxp += roundup(shp[i].sh_size,
820 				    sizeof(Elf64_Addr));
821 				shp[i].sh_offset = off;
822 				shp[i].sh_flags |= SHF_ALLOC;
823 				off += roundup(shp[i].sh_size,
824 				    sizeof(Elf64_Addr));
825 			}
826 		}
827 		if (flags & LOAD_SYM) {
828 			mbcopy(shp, shpp, sz);
829 		}
830 		free(shstr);
831 		free(shp);
832 	}
833 
834 	/*
835 	 * Frob the copied ELF header to give information relative
836 	 * to elfp.
837 	 */
838 	if (flags & LOAD_HDR) {
839 		elf->e_phoff = 0;
840 		elf->e_shoff = sizeof(Elf64_Ehdr);
841 		elf->e_phentsize = 0;
842 		elf->e_phnum = 0;
843 		mbcopy(elf, elfp, sizeof(*elf));
844 	}
845 
846 	marks[MARK_START] = LOADADDR(minp);
847 	marks[MARK_ENTRY] = LOADADDR(elf->e_entry);
848 	marks[MARK_NSYM] = 1;	/* XXX: Kernel needs >= 0 */
849 	marks[MARK_SYM] = LOADADDR(elfp);
850 	marks[MARK_END] = LOADADDR(maxp);
851 
852 	return 0;
853 }
854 
855 /*
856  * elf32_exec
857  *
858  * Load the kernel indicated by 'fp' into the guest physical memory
859  * space, at the addresses defined in the ELF header.
860  *
861  * This function is used for 32 bit kernels.
862  *
863  * Parameters:
864  *  fp: kernel image file to load
865  *  elf: ELF header of the kernel
866  *  marks: array to store the offsets of various kernel structures
867  *      (start, bss, etc)
868  *  flags: flag value to indicate which section(s) to load (usually
869  *      LOAD_ALL)
870  *
871  * Return values:
872  *  0 if successful
873  *  1 if unsuccessful
874  */
875 static int
876 elf32_exec(gzFile fp, Elf32_Ehdr *elf, u_long *marks, int flags)
877 {
878 	Elf32_Shdr *shp;
879 	Elf32_Phdr *phdr;
880 	Elf32_Off off;
881 	int i;
882 	size_t sz;
883 	int havesyms;
884 	paddr_t minp = ~0, maxp = 0, pos = 0;
885 	paddr_t offset = marks[MARK_START], shpp, elfp;
886 
887 	sz = elf->e_phnum * sizeof(Elf32_Phdr);
888 	phdr = malloc(sz);
889 
890 	if (gzseek(fp, (off_t)elf->e_phoff, SEEK_SET) == -1)  {
891 		free(phdr);
892 		return 1;
893 	}
894 
895 	if ((size_t)gzread(fp, phdr, sz) != sz) {
896 		free(phdr);
897 		return 1;
898 	}
899 
900 	for (i = 0; i < elf->e_phnum; i++) {
901 		if (phdr[i].p_type == PT_OPENBSD_RANDOMIZE) {
902 			int m;
903 
904 			/* Fill segment if asked for. */
905 			if (flags & LOAD_RANDOM) {
906 				for (pos = 0; pos < phdr[i].p_filesz;
907 				    pos += m) {
908 					m = phdr[i].p_filesz - pos;
909 					marc4random_buf(phdr[i].p_paddr + pos,
910 					    m);
911 				}
912 			}
913 			if (flags & (LOAD_RANDOM | COUNT_RANDOM)) {
914 				marks[MARK_RANDOM] = LOADADDR(phdr[i].p_paddr);
915 				marks[MARK_ERANDOM] =
916 				    marks[MARK_RANDOM] + phdr[i].p_filesz;
917 			}
918 			continue;
919 		}
920 
921 		if (phdr[i].p_type != PT_LOAD ||
922 		    (phdr[i].p_flags & (PF_W|PF_R|PF_X)) == 0)
923 			continue;
924 
925 #define IS_TEXT(p)	(p.p_flags & PF_X)
926 #define IS_DATA(p)	((p.p_flags & PF_X) == 0)
927 #define IS_BSS(p)	(p.p_filesz < p.p_memsz)
928 		/*
929 		 * XXX: Assume first address is lowest
930 		 */
931 		if ((IS_TEXT(phdr[i]) && (flags & LOAD_TEXT)) ||
932 		    (IS_DATA(phdr[i]) && (flags & LOAD_DATA))) {
933 
934 			/* Read in segment. */
935 			if (gzseek(fp, (off_t)phdr[i].p_offset,
936 			    SEEK_SET) == -1) {
937 				free(phdr);
938 				return 1;
939 			}
940 			if (mread(fp, phdr[i].p_paddr, phdr[i].p_filesz) !=
941 			    phdr[i].p_filesz) {
942 				free(phdr);
943 				return 1;
944 			}
945 		}
946 
947 		if ((IS_TEXT(phdr[i]) && (flags & (LOAD_TEXT | COUNT_TEXT))) ||
948 		    (IS_DATA(phdr[i]) && (flags & (LOAD_DATA | COUNT_TEXT)))) {
949 			pos = phdr[i].p_paddr;
950 			if (minp > pos)
951 				minp = pos;
952 			pos += phdr[i].p_filesz;
953 			if (maxp < pos)
954 				maxp = pos;
955 		}
956 
957 		/* Zero out BSS. */
958 		if (IS_BSS(phdr[i]) && (flags & LOAD_BSS)) {
959 			mbzero((phdr[i].p_paddr + phdr[i].p_filesz),
960 			    phdr[i].p_memsz - phdr[i].p_filesz);
961 		}
962 		if (IS_BSS(phdr[i]) && (flags & (LOAD_BSS|COUNT_BSS))) {
963 			pos += phdr[i].p_memsz - phdr[i].p_filesz;
964 			if (maxp < pos)
965 				maxp = pos;
966 		}
967 	}
968 	free(phdr);
969 
970 	/*
971 	 * Copy the ELF and section headers.
972 	 */
973 	elfp = maxp = roundup(maxp, sizeof(Elf32_Addr));
974 	if (flags & (LOAD_HDR | COUNT_HDR))
975 		maxp += sizeof(Elf32_Ehdr);
976 
977 	if (flags & (LOAD_SYM | COUNT_SYM)) {
978 		if (gzseek(fp, (off_t)elf->e_shoff, SEEK_SET) == -1) {
979 			warn("lseek section headers");
980 			return 1;
981 		}
982 		sz = elf->e_shnum * sizeof(Elf32_Shdr);
983 		shp = malloc(sz);
984 
985 		if ((size_t)gzread(fp, shp, sz) != sz) {
986 			free(shp);
987 			return 1;
988 		}
989 
990 		shpp = maxp;
991 		maxp += roundup(sz, sizeof(Elf32_Addr));
992 
993 		size_t shstrsz = shp[elf->e_shstrndx].sh_size;
994 		char *shstr = malloc(shstrsz);
995 		if (gzseek(fp, (off_t)shp[elf->e_shstrndx].sh_offset,
996 		    SEEK_SET) == -1) {
997 			free(shstr);
998 			free(shp);
999 			return 1;
1000 		}
1001 		if ((size_t)gzread(fp, shstr, shstrsz) != shstrsz) {
1002 			free(shstr);
1003 			free(shp);
1004 			return 1;
1005 		}
1006 
1007 		/*
1008 		 * Now load the symbol sections themselves. Make sure the
1009 		 * sections are aligned. Don't bother with string tables if
1010 		 * there are no symbol sections.
1011 		 */
1012 		off = roundup((sizeof(Elf32_Ehdr) + sz), sizeof(Elf32_Addr));
1013 
1014 		for (havesyms = i = 0; i < elf->e_shnum; i++)
1015 			if (shp[i].sh_type == SHT_SYMTAB)
1016 				havesyms = 1;
1017 
1018 		for (i = 0; i < elf->e_shnum; i++) {
1019 			if (shp[i].sh_type == SHT_SYMTAB ||
1020 			    shp[i].sh_type == SHT_STRTAB ||
1021 			    !strcmp(shstr + shp[i].sh_name, ".debug_line")) {
1022 				if (havesyms && (flags & LOAD_SYM)) {
1023 					if (gzseek(fp, (off_t)shp[i].sh_offset,
1024 					    SEEK_SET) == -1) {
1025 						free(shstr);
1026 						free(shp);
1027 						return 1;
1028 					}
1029 					if (mread(fp, maxp,
1030 					    shp[i].sh_size) != shp[i].sh_size) {
1031 						free(shstr);
1032 						free(shp);
1033 						return 1;
1034 					}
1035 				}
1036 				maxp += roundup(shp[i].sh_size,
1037 				    sizeof(Elf32_Addr));
1038 				shp[i].sh_offset = off;
1039 				shp[i].sh_flags |= SHF_ALLOC;
1040 				off += roundup(shp[i].sh_size,
1041 				    sizeof(Elf32_Addr));
1042 			}
1043 		}
1044 		if (flags & LOAD_SYM) {
1045 			mbcopy(shp, shpp, sz);
1046 		}
1047 		free(shstr);
1048 		free(shp);
1049 	}
1050 
1051 	/*
1052 	 * Frob the copied ELF header to give information relative
1053 	 * to elfp.
1054 	 */
1055 	if (flags & LOAD_HDR) {
1056 		elf->e_phoff = 0;
1057 		elf->e_shoff = sizeof(Elf32_Ehdr);
1058 		elf->e_phentsize = 0;
1059 		elf->e_phnum = 0;
1060 		mbcopy(elf, elfp, sizeof(*elf));
1061 	}
1062 
1063 	marks[MARK_START] = LOADADDR(minp);
1064 	marks[MARK_ENTRY] = LOADADDR(elf->e_entry);
1065 	marks[MARK_NSYM] = 1;	/* XXX: Kernel needs >= 0 */
1066 	marks[MARK_SYM] = LOADADDR(elfp);
1067 	marks[MARK_END] = LOADADDR(maxp);
1068 
1069 	return 0;
1070 }
1071