xref: /openbsd-src/usr.sbin/vmd/x86_vm.c (revision a4052f0f2ce65d0f836241f4aa965b0baf29081f)
1*a4052f0fSdv /*	$OpenBSD: x86_vm.c,v 1.5 2024/10/02 17:05:56 dv Exp $	*/
2234ee546Sdv /*
3234ee546Sdv  * Copyright (c) 2015 Mike Larkin <mlarkin@openbsd.org>
4234ee546Sdv  *
5234ee546Sdv  * Permission to use, copy, modify, and distribute this software for any
6234ee546Sdv  * purpose with or without fee is hereby granted, provided that the above
7234ee546Sdv  * copyright notice and this permission notice appear in all copies.
8234ee546Sdv  *
9234ee546Sdv  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10234ee546Sdv  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11234ee546Sdv  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12234ee546Sdv  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13234ee546Sdv  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14234ee546Sdv  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15234ee546Sdv  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16234ee546Sdv  */
17234ee546Sdv 
18234ee546Sdv #include <sys/stat.h>
19234ee546Sdv #include <sys/types.h>
20234ee546Sdv 
21234ee546Sdv #include <dev/ic/i8253reg.h>
22234ee546Sdv #include <dev/isa/isareg.h>
23234ee546Sdv 
24234ee546Sdv #include <machine/pte.h>
25234ee546Sdv #include <machine/specialreg.h>
26234ee546Sdv #include <machine/vmmvar.h>
27234ee546Sdv 
28234ee546Sdv #include <errno.h>
29234ee546Sdv #include <string.h>
30234ee546Sdv #include <unistd.h>
31234ee546Sdv 
32234ee546Sdv #include <zlib.h>
33234ee546Sdv 
34234ee546Sdv #include "atomicio.h"
35234ee546Sdv #include "fw_cfg.h"
36234ee546Sdv #include "i8253.h"
37234ee546Sdv #include "i8259.h"
38234ee546Sdv #include "loadfile.h"
39234ee546Sdv #include "mc146818.h"
40234ee546Sdv #include "ns8250.h"
41234ee546Sdv #include "pci.h"
42234ee546Sdv #include "virtio.h"
43234ee546Sdv 
44234ee546Sdv typedef uint8_t (*io_fn_t)(struct vm_run_params *);
45234ee546Sdv 
46234ee546Sdv #define MAX_PORTS 65536
47234ee546Sdv 
48234ee546Sdv io_fn_t	ioports_map[MAX_PORTS];
49234ee546Sdv extern char *__progname;
50234ee546Sdv 
51234ee546Sdv void	 create_memory_map(struct vm_create_params *);
52234ee546Sdv int	 translate_gva(struct vm_exit*, uint64_t, uint64_t *, int);
53234ee546Sdv 
54234ee546Sdv static int	loadfile_bios(gzFile, off_t, struct vcpu_reg_state *);
55234ee546Sdv static int	vcpu_exit_eptviolation(struct vm_run_params *);
56234ee546Sdv static void	vcpu_exit_inout(struct vm_run_params *);
57234ee546Sdv 
58234ee546Sdv extern struct vmd_vm	*current_vm;
59234ee546Sdv extern int		 con_fd;
60234ee546Sdv 
61234ee546Sdv /*
62234ee546Sdv  * Represents a standard register set for an OS to be booted
63234ee546Sdv  * as a flat 64 bit address space.
64234ee546Sdv  *
65234ee546Sdv  * NOT set here are:
66234ee546Sdv  *  RIP
67234ee546Sdv  *  RSP
68234ee546Sdv  *  GDTR BASE
69234ee546Sdv  *
70234ee546Sdv  * Specific bootloaders should clone this structure and override
71234ee546Sdv  * those fields as needed.
72234ee546Sdv  *
73234ee546Sdv  * Note - CR3 and various bits in CR0 may be overridden by vmm(4) based on
74234ee546Sdv  *        features of the CPU in use.
75234ee546Sdv  */
76234ee546Sdv static const struct vcpu_reg_state vcpu_init_flat64 = {
77234ee546Sdv 	.vrs_gprs[VCPU_REGS_RFLAGS] = 0x2,
78234ee546Sdv 	.vrs_gprs[VCPU_REGS_RIP] = 0x0,
79234ee546Sdv 	.vrs_gprs[VCPU_REGS_RSP] = 0x0,
80234ee546Sdv 	.vrs_crs[VCPU_REGS_CR0] = CR0_ET | CR0_PE | CR0_PG,
81234ee546Sdv 	.vrs_crs[VCPU_REGS_CR3] = PML4_PAGE,
82234ee546Sdv 	.vrs_crs[VCPU_REGS_CR4] = CR4_PAE | CR4_PSE,
83234ee546Sdv 	.vrs_crs[VCPU_REGS_PDPTE0] = 0ULL,
84234ee546Sdv 	.vrs_crs[VCPU_REGS_PDPTE1] = 0ULL,
85234ee546Sdv 	.vrs_crs[VCPU_REGS_PDPTE2] = 0ULL,
86234ee546Sdv 	.vrs_crs[VCPU_REGS_PDPTE3] = 0ULL,
87234ee546Sdv 	.vrs_sregs[VCPU_REGS_CS] = { 0x8, 0xFFFFFFFF, 0xC09F, 0x0},
88234ee546Sdv 	.vrs_sregs[VCPU_REGS_DS] = { 0x10, 0xFFFFFFFF, 0xC093, 0x0},
89234ee546Sdv 	.vrs_sregs[VCPU_REGS_ES] = { 0x10, 0xFFFFFFFF, 0xC093, 0x0},
90234ee546Sdv 	.vrs_sregs[VCPU_REGS_FS] = { 0x10, 0xFFFFFFFF, 0xC093, 0x0},
91234ee546Sdv 	.vrs_sregs[VCPU_REGS_GS] = { 0x10, 0xFFFFFFFF, 0xC093, 0x0},
92234ee546Sdv 	.vrs_sregs[VCPU_REGS_SS] = { 0x10, 0xFFFFFFFF, 0xC093, 0x0},
93234ee546Sdv 	.vrs_gdtr = { 0x0, 0xFFFF, 0x0, 0x0},
94234ee546Sdv 	.vrs_idtr = { 0x0, 0xFFFF, 0x0, 0x0},
95234ee546Sdv 	.vrs_sregs[VCPU_REGS_LDTR] = { 0x0, 0xFFFF, 0x0082, 0x0},
96234ee546Sdv 	.vrs_sregs[VCPU_REGS_TR] = { 0x0, 0xFFFF, 0x008B, 0x0},
97234ee546Sdv 	.vrs_msrs[VCPU_REGS_EFER] = EFER_LME | EFER_LMA,
98234ee546Sdv 	.vrs_drs[VCPU_REGS_DR0] = 0x0,
99234ee546Sdv 	.vrs_drs[VCPU_REGS_DR1] = 0x0,
100234ee546Sdv 	.vrs_drs[VCPU_REGS_DR2] = 0x0,
101234ee546Sdv 	.vrs_drs[VCPU_REGS_DR3] = 0x0,
102234ee546Sdv 	.vrs_drs[VCPU_REGS_DR6] = 0xFFFF0FF0,
103234ee546Sdv 	.vrs_drs[VCPU_REGS_DR7] = 0x400,
104234ee546Sdv 	.vrs_msrs[VCPU_REGS_STAR] = 0ULL,
105234ee546Sdv 	.vrs_msrs[VCPU_REGS_LSTAR] = 0ULL,
106234ee546Sdv 	.vrs_msrs[VCPU_REGS_CSTAR] = 0ULL,
107234ee546Sdv 	.vrs_msrs[VCPU_REGS_SFMASK] = 0ULL,
108234ee546Sdv 	.vrs_msrs[VCPU_REGS_KGSBASE] = 0ULL,
109234ee546Sdv 	.vrs_msrs[VCPU_REGS_MISC_ENABLE] = 0ULL,
110234ee546Sdv 	.vrs_crs[VCPU_REGS_XCR0] = XFEATURE_X87
111234ee546Sdv };
112234ee546Sdv 
113234ee546Sdv /*
114234ee546Sdv  * Represents a standard register set for an BIOS to be booted
115234ee546Sdv  * as a flat 16 bit address space.
116234ee546Sdv  */
117234ee546Sdv static const struct vcpu_reg_state vcpu_init_flat16 = {
118234ee546Sdv 	.vrs_gprs[VCPU_REGS_RFLAGS] = 0x2,
119234ee546Sdv 	.vrs_gprs[VCPU_REGS_RIP] = 0xFFF0,
120234ee546Sdv 	.vrs_gprs[VCPU_REGS_RSP] = 0x0,
121234ee546Sdv 	.vrs_crs[VCPU_REGS_CR0] = 0x60000010,
122234ee546Sdv 	.vrs_crs[VCPU_REGS_CR3] = 0,
123234ee546Sdv 	.vrs_sregs[VCPU_REGS_CS] = { 0xF000, 0xFFFF, 0x809F, 0xF0000},
124234ee546Sdv 	.vrs_sregs[VCPU_REGS_DS] = { 0x0, 0xFFFF, 0x8093, 0x0},
125234ee546Sdv 	.vrs_sregs[VCPU_REGS_ES] = { 0x0, 0xFFFF, 0x8093, 0x0},
126234ee546Sdv 	.vrs_sregs[VCPU_REGS_FS] = { 0x0, 0xFFFF, 0x8093, 0x0},
127234ee546Sdv 	.vrs_sregs[VCPU_REGS_GS] = { 0x0, 0xFFFF, 0x8093, 0x0},
128234ee546Sdv 	.vrs_sregs[VCPU_REGS_SS] = { 0x0, 0xFFFF, 0x8093, 0x0},
129234ee546Sdv 	.vrs_gdtr = { 0x0, 0xFFFF, 0x0, 0x0},
130234ee546Sdv 	.vrs_idtr = { 0x0, 0xFFFF, 0x0, 0x0},
131234ee546Sdv 	.vrs_sregs[VCPU_REGS_LDTR] = { 0x0, 0xFFFF, 0x0082, 0x0},
132234ee546Sdv 	.vrs_sregs[VCPU_REGS_TR] = { 0x0, 0xFFFF, 0x008B, 0x0},
133234ee546Sdv 	.vrs_msrs[VCPU_REGS_EFER] = 0ULL,
134234ee546Sdv 	.vrs_drs[VCPU_REGS_DR0] = 0x0,
135234ee546Sdv 	.vrs_drs[VCPU_REGS_DR1] = 0x0,
136234ee546Sdv 	.vrs_drs[VCPU_REGS_DR2] = 0x0,
137234ee546Sdv 	.vrs_drs[VCPU_REGS_DR3] = 0x0,
138234ee546Sdv 	.vrs_drs[VCPU_REGS_DR6] = 0xFFFF0FF0,
139234ee546Sdv 	.vrs_drs[VCPU_REGS_DR7] = 0x400,
140234ee546Sdv 	.vrs_msrs[VCPU_REGS_STAR] = 0ULL,
141234ee546Sdv 	.vrs_msrs[VCPU_REGS_LSTAR] = 0ULL,
142234ee546Sdv 	.vrs_msrs[VCPU_REGS_CSTAR] = 0ULL,
143234ee546Sdv 	.vrs_msrs[VCPU_REGS_SFMASK] = 0ULL,
144234ee546Sdv 	.vrs_msrs[VCPU_REGS_KGSBASE] = 0ULL,
145234ee546Sdv 	.vrs_crs[VCPU_REGS_XCR0] = XFEATURE_X87
146234ee546Sdv };
147234ee546Sdv 
148234ee546Sdv /*
149234ee546Sdv  * create_memory_map
150234ee546Sdv  *
151234ee546Sdv  * Sets up the guest physical memory ranges that the VM can access.
152234ee546Sdv  *
153234ee546Sdv  * Parameters:
154234ee546Sdv  *  vcp: VM create parameters describing the VM whose memory map
155234ee546Sdv  *       is being created
156234ee546Sdv  *
157234ee546Sdv  * Return values:
158234ee546Sdv  *  nothing
159234ee546Sdv  */
160234ee546Sdv void
161234ee546Sdv create_memory_map(struct vm_create_params *vcp)
162234ee546Sdv {
163234ee546Sdv 	size_t len, mem_bytes;
164234ee546Sdv 	size_t above_1m = 0, above_4g = 0;
165234ee546Sdv 
166234ee546Sdv 	mem_bytes = vcp->vcp_memranges[0].vmr_size;
167234ee546Sdv 	vcp->vcp_nmemranges = 0;
168234ee546Sdv 	if (mem_bytes == 0 || mem_bytes > VMM_MAX_VM_MEM_SIZE)
169234ee546Sdv 		return;
170234ee546Sdv 
171234ee546Sdv 	/* First memory region: 0 - LOWMEM_KB (DOS low mem) */
172234ee546Sdv 	len = LOWMEM_KB * 1024;
173234ee546Sdv 	vcp->vcp_memranges[0].vmr_gpa = 0x0;
174234ee546Sdv 	vcp->vcp_memranges[0].vmr_size = len;
175234ee546Sdv 	vcp->vcp_memranges[0].vmr_type = VM_MEM_RAM;
176234ee546Sdv 	mem_bytes -= len;
177234ee546Sdv 
178234ee546Sdv 	/*
179234ee546Sdv 	 * Second memory region: LOWMEM_KB - 1MB.
180234ee546Sdv 	 *
181234ee546Sdv 	 * N.B. - Normally ROMs or parts of video RAM are mapped here.
182234ee546Sdv 	 * We have to add this region, because some systems
183234ee546Sdv 	 * unconditionally write to 0xb8000 (VGA RAM), and
184234ee546Sdv 	 * we need to make sure that vmm(4) permits accesses
185234ee546Sdv 	 * to it. So allocate guest memory for it.
186234ee546Sdv 	 */
187234ee546Sdv 	len = MB(1) - (LOWMEM_KB * 1024);
188234ee546Sdv 	vcp->vcp_memranges[1].vmr_gpa = LOWMEM_KB * 1024;
189234ee546Sdv 	vcp->vcp_memranges[1].vmr_size = len;
190234ee546Sdv 	vcp->vcp_memranges[1].vmr_type = VM_MEM_RESERVED;
191234ee546Sdv 	mem_bytes -= len;
192234ee546Sdv 
193234ee546Sdv 	/* If we have less than 2MB remaining, still create a 2nd BIOS area. */
194234ee546Sdv 	if (mem_bytes <= MB(2)) {
195*a4052f0fSdv 		vcp->vcp_memranges[2].vmr_gpa = PCI_MMIO_BAR_END;
196234ee546Sdv 		vcp->vcp_memranges[2].vmr_size = MB(2);
197234ee546Sdv 		vcp->vcp_memranges[2].vmr_type = VM_MEM_RESERVED;
198234ee546Sdv 		vcp->vcp_nmemranges = 3;
199234ee546Sdv 		return;
200234ee546Sdv 	}
201234ee546Sdv 
202234ee546Sdv 	/*
203234ee546Sdv 	 * Calculate the how to split any remaining memory across the 4GB
204234ee546Sdv 	 * boundary while making sure we do not place physical memory into
205234ee546Sdv 	 * MMIO ranges.
206234ee546Sdv 	 */
207*a4052f0fSdv 	if (mem_bytes > PCI_MMIO_BAR_BASE - MB(1)) {
208*a4052f0fSdv 		above_1m = PCI_MMIO_BAR_BASE - MB(1);
209234ee546Sdv 		above_4g = mem_bytes - above_1m;
210234ee546Sdv 	} else {
211234ee546Sdv 		above_1m = mem_bytes;
212234ee546Sdv 		above_4g = 0;
213234ee546Sdv 	}
214234ee546Sdv 
215234ee546Sdv 	/* Third memory region: area above 1MB to MMIO region */
216234ee546Sdv 	vcp->vcp_memranges[2].vmr_gpa = MB(1);
217234ee546Sdv 	vcp->vcp_memranges[2].vmr_size = above_1m;
218234ee546Sdv 	vcp->vcp_memranges[2].vmr_type = VM_MEM_RAM;
219234ee546Sdv 
220234ee546Sdv 	/* Fourth region: PCI MMIO range */
221*a4052f0fSdv 	vcp->vcp_memranges[3].vmr_gpa = PCI_MMIO_BAR_BASE;
222*a4052f0fSdv 	vcp->vcp_memranges[3].vmr_size = PCI_MMIO_BAR_END -
223*a4052f0fSdv 	    PCI_MMIO_BAR_BASE + 1;
224234ee546Sdv 	vcp->vcp_memranges[3].vmr_type = VM_MEM_MMIO;
225234ee546Sdv 
226234ee546Sdv 	/* Fifth region: 2nd copy of BIOS above MMIO ending at 4GB */
227*a4052f0fSdv 	vcp->vcp_memranges[4].vmr_gpa = PCI_MMIO_BAR_END + 1;
228234ee546Sdv 	vcp->vcp_memranges[4].vmr_size = MB(2);
229234ee546Sdv 	vcp->vcp_memranges[4].vmr_type = VM_MEM_RESERVED;
230234ee546Sdv 
231234ee546Sdv 	/* Sixth region: any remainder above 4GB */
232234ee546Sdv 	if (above_4g > 0) {
233234ee546Sdv 		vcp->vcp_memranges[5].vmr_gpa = GB(4);
234234ee546Sdv 		vcp->vcp_memranges[5].vmr_size = above_4g;
235234ee546Sdv 		vcp->vcp_memranges[5].vmr_type = VM_MEM_RAM;
236234ee546Sdv 		vcp->vcp_nmemranges = 6;
237234ee546Sdv 	} else
238234ee546Sdv 		vcp->vcp_nmemranges = 5;
239234ee546Sdv }
240234ee546Sdv 
241234ee546Sdv int
242234ee546Sdv load_firmware(struct vmd_vm *vm, struct vcpu_reg_state *vrs)
243234ee546Sdv {
244234ee546Sdv 	int		ret;
245234ee546Sdv 	gzFile		fp;
246234ee546Sdv 	struct stat	sb;
247234ee546Sdv 
248234ee546Sdv 	/*
249234ee546Sdv 	 * Set up default "flat 64 bit" register state - RIP, RSP, and
250234ee546Sdv 	 * GDT info will be set in bootloader
251234ee546Sdv 	 */
252234ee546Sdv 	memcpy(vrs, &vcpu_init_flat64, sizeof(*vrs));
253234ee546Sdv 
254234ee546Sdv 	/* Find and open kernel image */
255234ee546Sdv 	if ((fp = gzdopen(vm->vm_kernel, "r")) == NULL)
256234ee546Sdv 		fatalx("failed to open kernel - exiting");
257234ee546Sdv 
258234ee546Sdv 	/* Load kernel image */
259234ee546Sdv 	ret = loadfile_elf(fp, vm, vrs, vm->vm_params.vmc_bootdevice);
260234ee546Sdv 
261234ee546Sdv 	/*
262234ee546Sdv 	 * Try BIOS as a fallback (only if it was provided as an image
263234ee546Sdv 	 * with vm->vm_kernel and the file is not compressed)
264234ee546Sdv 	 */
265234ee546Sdv 	if (ret && errno == ENOEXEC && vm->vm_kernel != -1 &&
266234ee546Sdv 	    gzdirect(fp) && (ret = fstat(vm->vm_kernel, &sb)) == 0)
267234ee546Sdv 		ret = loadfile_bios(fp, sb.st_size, vrs);
268234ee546Sdv 
269234ee546Sdv 	gzclose(fp);
270234ee546Sdv 
271234ee546Sdv 	return (ret);
272234ee546Sdv }
273234ee546Sdv 
274234ee546Sdv 
275234ee546Sdv /*
276234ee546Sdv  * loadfile_bios
277234ee546Sdv  *
278234ee546Sdv  * Alternatively to loadfile_elf, this function loads a non-ELF BIOS image
279234ee546Sdv  * directly into memory.
280234ee546Sdv  *
281234ee546Sdv  * Parameters:
282234ee546Sdv  *  fp: file of a kernel file to load
283234ee546Sdv  *  size: uncompressed size of the image
284234ee546Sdv  *  (out) vrs: register state to set on init for this kernel
285234ee546Sdv  *
286234ee546Sdv  * Return values:
287234ee546Sdv  *  0 if successful
288234ee546Sdv  *  various error codes returned from read(2) or loadelf functions
289234ee546Sdv  */
290234ee546Sdv int
291234ee546Sdv loadfile_bios(gzFile fp, off_t size, struct vcpu_reg_state *vrs)
292234ee546Sdv {
293234ee546Sdv 	off_t	 off;
294234ee546Sdv 
295234ee546Sdv 	/* Set up a "flat 16 bit" register state for BIOS */
296234ee546Sdv 	memcpy(vrs, &vcpu_init_flat16, sizeof(*vrs));
297234ee546Sdv 
298234ee546Sdv 	/* Seek to the beginning of the BIOS image */
299234ee546Sdv 	if (gzseek(fp, 0, SEEK_SET) == -1)
300234ee546Sdv 		return (-1);
301234ee546Sdv 
302234ee546Sdv 	/* The BIOS image must end at 1MB */
303234ee546Sdv 	if ((off = MB(1) - size) < 0)
304234ee546Sdv 		return (-1);
305234ee546Sdv 
306234ee546Sdv 	/* Read BIOS image into memory */
307234ee546Sdv 	if (mread(fp, off, size) != (size_t)size) {
308234ee546Sdv 		errno = EIO;
309234ee546Sdv 		return (-1);
310234ee546Sdv 	}
311234ee546Sdv 
312234ee546Sdv 	if (gzseek(fp, 0, SEEK_SET) == -1)
313234ee546Sdv 		return (-1);
314234ee546Sdv 
315234ee546Sdv 	/* Read a second BIOS copy into memory ending at 4GB */
316234ee546Sdv 	off = GB(4) - size;
317234ee546Sdv 	if (mread(fp, off, size) != (size_t)size) {
318234ee546Sdv 		errno = EIO;
319234ee546Sdv 		return (-1);
320234ee546Sdv 	}
321234ee546Sdv 
322234ee546Sdv 	log_debug("%s: loaded BIOS image", __func__);
323234ee546Sdv 
324234ee546Sdv 	return (0);
325234ee546Sdv }
326234ee546Sdv 
327234ee546Sdv /*
328234ee546Sdv  * init_emulated_hw
329234ee546Sdv  *
330234ee546Sdv  * Initializes the userspace hardware emulation
331234ee546Sdv  */
332234ee546Sdv void
333234ee546Sdv init_emulated_hw(struct vmop_create_params *vmc, int child_cdrom,
334234ee546Sdv     int child_disks[][VM_MAX_BASE_PER_DISK], int *child_taps)
335234ee546Sdv {
336234ee546Sdv 	struct vm_create_params *vcp = &vmc->vmc_params;
337234ee546Sdv 	size_t i;
338234ee546Sdv 	uint64_t memlo, memhi;
339234ee546Sdv 
340234ee546Sdv 	/* Calculate memory size for NVRAM registers */
341234ee546Sdv 	memlo = memhi = 0;
342234ee546Sdv 	for (i = 0; i < vcp->vcp_nmemranges; i++) {
343234ee546Sdv 		if (vcp->vcp_memranges[i].vmr_gpa == MB(1) &&
344234ee546Sdv 		    vcp->vcp_memranges[i].vmr_size > (15 * MB(1)))
345234ee546Sdv 			memlo = vcp->vcp_memranges[i].vmr_size - (15 * MB(1));
346234ee546Sdv 		else if (vcp->vcp_memranges[i].vmr_gpa == GB(4))
347234ee546Sdv 			memhi = vcp->vcp_memranges[i].vmr_size;
348234ee546Sdv 	}
349234ee546Sdv 
350234ee546Sdv 	/* Reset the IO port map */
351234ee546Sdv 	memset(&ioports_map, 0, sizeof(io_fn_t) * MAX_PORTS);
352234ee546Sdv 
353234ee546Sdv 	/* Init i8253 PIT */
354234ee546Sdv 	i8253_init(vcp->vcp_id);
355234ee546Sdv 	ioports_map[TIMER_CTRL] = vcpu_exit_i8253;
356234ee546Sdv 	ioports_map[TIMER_BASE + TIMER_CNTR0] = vcpu_exit_i8253;
357234ee546Sdv 	ioports_map[TIMER_BASE + TIMER_CNTR1] = vcpu_exit_i8253;
358234ee546Sdv 	ioports_map[TIMER_BASE + TIMER_CNTR2] = vcpu_exit_i8253;
359234ee546Sdv 	ioports_map[PCKBC_AUX] = vcpu_exit_i8253_misc;
360234ee546Sdv 
361234ee546Sdv 	/* Init mc146818 RTC */
362234ee546Sdv 	mc146818_init(vcp->vcp_id, memlo, memhi);
363234ee546Sdv 	ioports_map[IO_RTC] = vcpu_exit_mc146818;
364234ee546Sdv 	ioports_map[IO_RTC + 1] = vcpu_exit_mc146818;
365234ee546Sdv 
366234ee546Sdv 	/* Init master and slave PICs */
367234ee546Sdv 	i8259_init();
368234ee546Sdv 	ioports_map[IO_ICU1] = vcpu_exit_i8259;
369234ee546Sdv 	ioports_map[IO_ICU1 + 1] = vcpu_exit_i8259;
370234ee546Sdv 	ioports_map[IO_ICU2] = vcpu_exit_i8259;
371234ee546Sdv 	ioports_map[IO_ICU2 + 1] = vcpu_exit_i8259;
372234ee546Sdv 	ioports_map[ELCR0] = vcpu_exit_elcr;
373234ee546Sdv 	ioports_map[ELCR1] = vcpu_exit_elcr;
374234ee546Sdv 
375234ee546Sdv 	/* Init ns8250 UART */
376234ee546Sdv 	ns8250_init(con_fd, vcp->vcp_id);
377234ee546Sdv 	for (i = COM1_DATA; i <= COM1_SCR; i++)
378234ee546Sdv 		ioports_map[i] = vcpu_exit_com;
379234ee546Sdv 
380234ee546Sdv 	/* Initialize PCI */
381234ee546Sdv 	for (i = VM_PCI_IO_BAR_BASE; i <= VM_PCI_IO_BAR_END; i++)
382234ee546Sdv 		ioports_map[i] = vcpu_exit_pci;
383234ee546Sdv 
384234ee546Sdv 	ioports_map[PCI_MODE1_ADDRESS_REG] = vcpu_exit_pci;
385234ee546Sdv 	ioports_map[PCI_MODE1_DATA_REG] = vcpu_exit_pci;
386234ee546Sdv 	ioports_map[PCI_MODE1_DATA_REG + 1] = vcpu_exit_pci;
387234ee546Sdv 	ioports_map[PCI_MODE1_DATA_REG + 2] = vcpu_exit_pci;
388234ee546Sdv 	ioports_map[PCI_MODE1_DATA_REG + 3] = vcpu_exit_pci;
389234ee546Sdv 	pci_init();
390234ee546Sdv 
391234ee546Sdv 	/* Initialize virtio devices */
392234ee546Sdv 	virtio_init(current_vm, child_cdrom, child_disks, child_taps);
393234ee546Sdv 
394234ee546Sdv 	/*
395234ee546Sdv 	 * Init QEMU fw_cfg interface. Must be done last for pci hardware
396234ee546Sdv 	 * detection.
397234ee546Sdv 	 */
398234ee546Sdv 	fw_cfg_init(vmc);
399234ee546Sdv 	ioports_map[FW_CFG_IO_SELECT] = vcpu_exit_fw_cfg;
400234ee546Sdv 	ioports_map[FW_CFG_IO_DATA] = vcpu_exit_fw_cfg;
401234ee546Sdv 	ioports_map[FW_CFG_IO_DMA_ADDR_HIGH] = vcpu_exit_fw_cfg_dma;
402234ee546Sdv 	ioports_map[FW_CFG_IO_DMA_ADDR_LOW] = vcpu_exit_fw_cfg_dma;
403234ee546Sdv }
404234ee546Sdv 
405234ee546Sdv /*
406234ee546Sdv  * restore_emulated_hw
407234ee546Sdv  *
408234ee546Sdv  * Restores the userspace hardware emulation from fd
409234ee546Sdv  */
410234ee546Sdv void
411234ee546Sdv restore_emulated_hw(struct vm_create_params *vcp, int fd,
412234ee546Sdv     int *child_taps, int child_disks[][VM_MAX_BASE_PER_DISK], int child_cdrom)
413234ee546Sdv {
414234ee546Sdv 	/* struct vm_create_params *vcp = &vmc->vmc_params; */
415234ee546Sdv 	int i;
416234ee546Sdv 	memset(&ioports_map, 0, sizeof(io_fn_t) * MAX_PORTS);
417234ee546Sdv 
418234ee546Sdv 	/* Init i8253 PIT */
419234ee546Sdv 	i8253_restore(fd, vcp->vcp_id);
420234ee546Sdv 	ioports_map[TIMER_CTRL] = vcpu_exit_i8253;
421234ee546Sdv 	ioports_map[TIMER_BASE + TIMER_CNTR0] = vcpu_exit_i8253;
422234ee546Sdv 	ioports_map[TIMER_BASE + TIMER_CNTR1] = vcpu_exit_i8253;
423234ee546Sdv 	ioports_map[TIMER_BASE + TIMER_CNTR2] = vcpu_exit_i8253;
424234ee546Sdv 
425234ee546Sdv 	/* Init master and slave PICs */
426234ee546Sdv 	i8259_restore(fd);
427234ee546Sdv 	ioports_map[IO_ICU1] = vcpu_exit_i8259;
428234ee546Sdv 	ioports_map[IO_ICU1 + 1] = vcpu_exit_i8259;
429234ee546Sdv 	ioports_map[IO_ICU2] = vcpu_exit_i8259;
430234ee546Sdv 	ioports_map[IO_ICU2 + 1] = vcpu_exit_i8259;
431234ee546Sdv 
432234ee546Sdv 	/* Init ns8250 UART */
433234ee546Sdv 	ns8250_restore(fd, con_fd, vcp->vcp_id);
434234ee546Sdv 	for (i = COM1_DATA; i <= COM1_SCR; i++)
435234ee546Sdv 		ioports_map[i] = vcpu_exit_com;
436234ee546Sdv 
437234ee546Sdv 	/* Init mc146818 RTC */
438234ee546Sdv 	mc146818_restore(fd, vcp->vcp_id);
439234ee546Sdv 	ioports_map[IO_RTC] = vcpu_exit_mc146818;
440234ee546Sdv 	ioports_map[IO_RTC + 1] = vcpu_exit_mc146818;
441234ee546Sdv 
442234ee546Sdv 	/* Init QEMU fw_cfg interface */
443234ee546Sdv 	fw_cfg_restore(fd);
444234ee546Sdv 	ioports_map[FW_CFG_IO_SELECT] = vcpu_exit_fw_cfg;
445234ee546Sdv 	ioports_map[FW_CFG_IO_DATA] = vcpu_exit_fw_cfg;
446234ee546Sdv 	ioports_map[FW_CFG_IO_DMA_ADDR_HIGH] = vcpu_exit_fw_cfg_dma;
447234ee546Sdv 	ioports_map[FW_CFG_IO_DMA_ADDR_LOW] = vcpu_exit_fw_cfg_dma;
448234ee546Sdv 
449234ee546Sdv 	/* Initialize PCI */
450234ee546Sdv 	for (i = VM_PCI_IO_BAR_BASE; i <= VM_PCI_IO_BAR_END; i++)
451234ee546Sdv 		ioports_map[i] = vcpu_exit_pci;
452234ee546Sdv 
453234ee546Sdv 	ioports_map[PCI_MODE1_ADDRESS_REG] = vcpu_exit_pci;
454234ee546Sdv 	ioports_map[PCI_MODE1_DATA_REG] = vcpu_exit_pci;
455234ee546Sdv 	ioports_map[PCI_MODE1_DATA_REG + 1] = vcpu_exit_pci;
456234ee546Sdv 	ioports_map[PCI_MODE1_DATA_REG + 2] = vcpu_exit_pci;
457234ee546Sdv 	ioports_map[PCI_MODE1_DATA_REG + 3] = vcpu_exit_pci;
458234ee546Sdv 	pci_restore(fd);
459234ee546Sdv 	virtio_restore(fd, current_vm, child_cdrom, child_disks, child_taps);
460234ee546Sdv }
461234ee546Sdv 
462234ee546Sdv void
463234ee546Sdv pause_vm_md(struct vmd_vm *vm)
464234ee546Sdv {
465234ee546Sdv 	i8253_stop();
466234ee546Sdv 	mc146818_stop();
467234ee546Sdv 	ns8250_stop();
468234ee546Sdv 	virtio_stop(vm);
469234ee546Sdv }
470234ee546Sdv 
471234ee546Sdv void
472234ee546Sdv unpause_vm_md(struct vmd_vm *vm)
473234ee546Sdv {
474234ee546Sdv 	i8253_start();
475234ee546Sdv 	mc146818_start();
476234ee546Sdv 	ns8250_start();
477234ee546Sdv 	virtio_start(vm);
478234ee546Sdv }
479234ee546Sdv 
480234ee546Sdv int
481234ee546Sdv dump_devs(int fd)
482234ee546Sdv {
483234ee546Sdv 	int ret = 0;
484234ee546Sdv 
485234ee546Sdv 	if ((ret = i8253_dump(fd)))
486234ee546Sdv 		return ret;
487234ee546Sdv 	if ((ret = i8259_dump(fd)))
488234ee546Sdv 		return ret;
489234ee546Sdv 	if ((ret = ns8250_dump(fd)))
490234ee546Sdv 		return ret;
491234ee546Sdv 	if ((ret = mc146818_dump(fd)))
492234ee546Sdv 		return ret;
493234ee546Sdv 	ret = fw_cfg_dump(fd);
494234ee546Sdv 
495234ee546Sdv 	return ret;
496234ee546Sdv }
497234ee546Sdv 
498234ee546Sdv int
499234ee546Sdv dump_send_header(int fd) {
500234ee546Sdv 	struct vm_dump_header	   vmh;
501234ee546Sdv 	int			   i;
502234ee546Sdv 
503234ee546Sdv 	memcpy(&vmh.vmh_signature, VM_DUMP_SIGNATURE,
504234ee546Sdv 	    sizeof(vmh.vmh_signature));
505234ee546Sdv 
506234ee546Sdv 	vmh.vmh_cpuids[0].code = 0x00;
507234ee546Sdv 	vmh.vmh_cpuids[0].leaf = 0x00;
508234ee546Sdv 
509234ee546Sdv 	vmh.vmh_cpuids[1].code = 0x01;
510234ee546Sdv 	vmh.vmh_cpuids[1].leaf = 0x00;
511234ee546Sdv 
512234ee546Sdv 	vmh.vmh_cpuids[2].code = 0x07;
513234ee546Sdv 	vmh.vmh_cpuids[2].leaf = 0x00;
514234ee546Sdv 
515234ee546Sdv 	vmh.vmh_cpuids[3].code = 0x0d;
516234ee546Sdv 	vmh.vmh_cpuids[3].leaf = 0x00;
517234ee546Sdv 
518234ee546Sdv 	vmh.vmh_cpuids[4].code = 0x80000001;
519234ee546Sdv 	vmh.vmh_cpuids[4].leaf = 0x00;
520234ee546Sdv 
521234ee546Sdv 	vmh.vmh_version = VM_DUMP_VERSION;
522234ee546Sdv 
523234ee546Sdv 	for (i=0; i < VM_DUMP_HEADER_CPUID_COUNT; i++) {
524234ee546Sdv 		CPUID_LEAF(vmh.vmh_cpuids[i].code,
525234ee546Sdv 		    vmh.vmh_cpuids[i].leaf,
526234ee546Sdv 		    vmh.vmh_cpuids[i].a,
527234ee546Sdv 		    vmh.vmh_cpuids[i].b,
528234ee546Sdv 		    vmh.vmh_cpuids[i].c,
529234ee546Sdv 		    vmh.vmh_cpuids[i].d);
530234ee546Sdv 	}
531234ee546Sdv 
532234ee546Sdv 	if (atomicio(vwrite, fd, &vmh, sizeof(vmh)) != sizeof(vmh))
533234ee546Sdv 		return (-1);
534234ee546Sdv 
535234ee546Sdv 	return (0);
536234ee546Sdv }
537234ee546Sdv 
538234ee546Sdv 
539234ee546Sdv /*
540234ee546Sdv  * vcpu_exit_inout
541234ee546Sdv  *
542234ee546Sdv  * Handle all I/O exits that need to be emulated in vmd. This includes the
543234ee546Sdv  * i8253 PIT, the com1 ns8250 UART, and the MC146818 RTC/NVRAM device.
544234ee546Sdv  *
545234ee546Sdv  * Parameters:
546234ee546Sdv  *  vrp: vcpu run parameters containing guest state for this exit
547234ee546Sdv  */
548234ee546Sdv void
549234ee546Sdv vcpu_exit_inout(struct vm_run_params *vrp)
550234ee546Sdv {
551234ee546Sdv 	struct vm_exit *vei = vrp->vrp_exit;
552234ee546Sdv 	uint8_t intr = 0xFF;
553234ee546Sdv 
554234ee546Sdv 	if (vei->vei.vei_rep || vei->vei.vei_string) {
555234ee546Sdv #ifdef MMIO_DEBUG
556234ee546Sdv 		log_info("%s: %s%s%s %d-byte, enc=%d, data=0x%08x, port=0x%04x",
557234ee546Sdv 		    __func__,
558234ee546Sdv 		    vei->vei.vei_rep == 0 ? "" : "REP ",
559234ee546Sdv 		    vei->vei.vei_dir == VEI_DIR_IN ? "IN" : "OUT",
560234ee546Sdv 		    vei->vei.vei_string == 0 ? "" : "S",
561234ee546Sdv 		    vei->vei.vei_size, vei->vei.vei_encoding,
562234ee546Sdv 		    vei->vei.vei_data, vei->vei.vei_port);
563234ee546Sdv 		log_info("%s: ECX = 0x%llx, RDX = 0x%llx, RSI = 0x%llx",
564234ee546Sdv 		    __func__,
565234ee546Sdv 		    vei->vrs.vrs_gprs[VCPU_REGS_RCX],
566234ee546Sdv 		    vei->vrs.vrs_gprs[VCPU_REGS_RDX],
567234ee546Sdv 		    vei->vrs.vrs_gprs[VCPU_REGS_RSI]);
568234ee546Sdv #endif /* MMIO_DEBUG */
569234ee546Sdv 		fatalx("%s: can't emulate REP prefixed IN(S)/OUT(S)",
570234ee546Sdv 		    __func__);
571234ee546Sdv 	}
572234ee546Sdv 
573234ee546Sdv 	if (ioports_map[vei->vei.vei_port] != NULL)
574234ee546Sdv 		intr = ioports_map[vei->vei.vei_port](vrp);
575234ee546Sdv 	else if (vei->vei.vei_dir == VEI_DIR_IN)
576234ee546Sdv 		set_return_data(vei, 0xFFFFFFFF);
577234ee546Sdv 
578234ee546Sdv 	vei->vrs.vrs_gprs[VCPU_REGS_RIP] += vei->vei.vei_insn_len;
579234ee546Sdv 
580234ee546Sdv 	if (intr != 0xFF)
581234ee546Sdv 		vcpu_assert_irq(vrp->vrp_vm_id, vrp->vrp_vcpu_id, intr);
582234ee546Sdv }
583234ee546Sdv 
584234ee546Sdv /*
585234ee546Sdv  * vcpu_exit
586234ee546Sdv  *
587234ee546Sdv  * Handle a vcpu exit. This function is called when it is determined that
588234ee546Sdv  * vmm(4) requires the assistance of vmd to support a particular guest
589234ee546Sdv  * exit type (eg, accessing an I/O port or device). Guest state is contained
590234ee546Sdv  * in 'vrp', and will be resent to vmm(4) on exit completion.
591234ee546Sdv  *
592234ee546Sdv  * Upon conclusion of handling the exit, the function determines if any
593234ee546Sdv  * interrupts should be injected into the guest, and asserts the proper
594234ee546Sdv  * IRQ line whose interrupt should be vectored.
595234ee546Sdv  *
596234ee546Sdv  * Parameters:
597234ee546Sdv  *  vrp: vcpu run parameters containing guest state for this exit
598234ee546Sdv  *
599234ee546Sdv  * Return values:
600234ee546Sdv  *  0: the exit was handled successfully
601234ee546Sdv  *  1: an error occurred (eg, unknown exit reason passed in 'vrp')
602234ee546Sdv  */
603234ee546Sdv int
604234ee546Sdv vcpu_exit(struct vm_run_params *vrp)
605234ee546Sdv {
606234ee546Sdv 	int ret;
607234ee546Sdv 
608234ee546Sdv 	switch (vrp->vrp_exit_reason) {
609234ee546Sdv 	case VMX_EXIT_INT_WINDOW:
610234ee546Sdv 	case SVM_VMEXIT_VINTR:
611234ee546Sdv 	case VMX_EXIT_CPUID:
612234ee546Sdv 	case VMX_EXIT_EXTINT:
613234ee546Sdv 	case SVM_VMEXIT_INTR:
614234ee546Sdv 	case SVM_VMEXIT_MSR:
615234ee546Sdv 	case SVM_VMEXIT_CPUID:
616234ee546Sdv 		/*
617234ee546Sdv 		 * We may be exiting to vmd to handle a pending interrupt but
618234ee546Sdv 		 * at the same time the last exit type may have been one of
619234ee546Sdv 		 * these. In this case, there's nothing extra to be done
620234ee546Sdv 		 * here (and falling through to the default case below results
621234ee546Sdv 		 * in more vmd log spam).
622234ee546Sdv 		 */
623234ee546Sdv 		break;
624234ee546Sdv 	case SVM_VMEXIT_NPF:
625234ee546Sdv 	case VMX_EXIT_EPT_VIOLATION:
626234ee546Sdv 		ret = vcpu_exit_eptviolation(vrp);
627234ee546Sdv 		if (ret)
628234ee546Sdv 			return (ret);
629234ee546Sdv 		break;
630234ee546Sdv 	case VMX_EXIT_IO:
631234ee546Sdv 	case SVM_VMEXIT_IOIO:
632234ee546Sdv 		vcpu_exit_inout(vrp);
633234ee546Sdv 		break;
634234ee546Sdv 	case VMX_EXIT_HLT:
635234ee546Sdv 	case SVM_VMEXIT_HLT:
636234ee546Sdv 		vcpu_halt(vrp->vrp_vcpu_id);
637234ee546Sdv 		break;
638234ee546Sdv 	case VMX_EXIT_TRIPLE_FAULT:
639234ee546Sdv 	case SVM_VMEXIT_SHUTDOWN:
640234ee546Sdv 		/* reset VM */
641234ee546Sdv 		return (EAGAIN);
642234ee546Sdv 	default:
643234ee546Sdv 		log_debug("%s: unknown exit reason 0x%x",
644234ee546Sdv 		    __progname, vrp->vrp_exit_reason);
645234ee546Sdv 	}
646234ee546Sdv 
647234ee546Sdv 	return (0);
648234ee546Sdv }
649234ee546Sdv 
650234ee546Sdv /*
651234ee546Sdv  * vcpu_exit_eptviolation
652234ee546Sdv  *
653234ee546Sdv  * handle an EPT Violation
654234ee546Sdv  *
655234ee546Sdv  * Parameters:
656234ee546Sdv  *  vrp: vcpu run parameters containing guest state for this exit
657234ee546Sdv  *
658234ee546Sdv  * Return values:
659234ee546Sdv  *  0: no action required
660234ee546Sdv  *  EFAULT: a protection fault occured, kill the vm.
661234ee546Sdv  */
662234ee546Sdv static int
663234ee546Sdv vcpu_exit_eptviolation(struct vm_run_params *vrp)
664234ee546Sdv {
665234ee546Sdv 	struct vm_exit *ve = vrp->vrp_exit;
666234ee546Sdv 	int ret = 0;
667234ee546Sdv #if MMIO_NOTYET
668234ee546Sdv 	struct x86_insn insn;
669234ee546Sdv 	uint64_t va, pa;
670234ee546Sdv 	size_t len = 15;		/* Max instruction length in x86. */
671234ee546Sdv #endif /* MMIO_NOTYET */
672234ee546Sdv 	switch (ve->vee.vee_fault_type) {
673234ee546Sdv 	case VEE_FAULT_HANDLED:
674234ee546Sdv 		break;
675234ee546Sdv 
676234ee546Sdv #if MMIO_NOTYET
677234ee546Sdv 	case VEE_FAULT_MMIO_ASSIST:
678234ee546Sdv 		/* Intel VMX might give us the length of the instruction. */
679234ee546Sdv 		if (ve->vee.vee_insn_info & VEE_LEN_VALID)
680234ee546Sdv 			len = ve->vee.vee_insn_len;
681234ee546Sdv 
682234ee546Sdv 		if (len > 15)
683234ee546Sdv 			fatalx("%s: invalid instruction length %lu", __func__,
684234ee546Sdv 			    len);
685234ee546Sdv 
686234ee546Sdv 		/* If we weren't given instruction bytes, we need to fetch. */
687234ee546Sdv 		if (!(ve->vee.vee_insn_info & VEE_BYTES_VALID)) {
688234ee546Sdv 			memset(ve->vee.vee_insn_bytes, 0,
689234ee546Sdv 			    sizeof(ve->vee.vee_insn_bytes));
690234ee546Sdv 			va = ve->vrs.vrs_gprs[VCPU_REGS_RIP];
691234ee546Sdv 
692234ee546Sdv 			/* XXX Only support instructions that fit on 1 page. */
693234ee546Sdv 			if ((va & PAGE_MASK) + len > PAGE_SIZE) {
694234ee546Sdv 				log_warnx("%s: instruction might cross page "
695234ee546Sdv 				    "boundary", __func__);
696234ee546Sdv 				ret = EINVAL;
697234ee546Sdv 				break;
698234ee546Sdv 			}
699234ee546Sdv 
700234ee546Sdv 			ret = translate_gva(ve, va, &pa, PROT_EXEC);
701234ee546Sdv 			if (ret != 0) {
702234ee546Sdv 				log_warnx("%s: failed gva translation",
703234ee546Sdv 				    __func__);
704234ee546Sdv 				break;
705234ee546Sdv 			}
706234ee546Sdv 
707234ee546Sdv 			ret = read_mem(pa, ve->vee.vee_insn_bytes, len);
708234ee546Sdv 			if (ret != 0) {
709234ee546Sdv 				log_warnx("%s: failed to fetch instruction "
710234ee546Sdv 				    "bytes from 0x%llx", __func__, pa);
711234ee546Sdv 				break;
712234ee546Sdv 			}
713234ee546Sdv 		}
714234ee546Sdv 
715234ee546Sdv 		ret = insn_decode(ve, &insn);
716234ee546Sdv 		if (ret == 0)
717234ee546Sdv 			ret = insn_emulate(ve, &insn);
718234ee546Sdv 		break;
719234ee546Sdv #endif /* MMIO_NOTYET */
720234ee546Sdv 
721234ee546Sdv 	case VEE_FAULT_PROTECT:
722234ee546Sdv 		log_debug("%s: EPT Violation: rip=0x%llx", __progname,
723234ee546Sdv 		    ve->vrs.vrs_gprs[VCPU_REGS_RIP]);
724234ee546Sdv 		ret = EFAULT;
725234ee546Sdv 		break;
726234ee546Sdv 
727234ee546Sdv 	default:
728234ee546Sdv 		fatalx("%s: invalid fault_type %d", __progname,
729234ee546Sdv 		    ve->vee.vee_fault_type);
730234ee546Sdv 		/* UNREACHED */
731234ee546Sdv 	}
732234ee546Sdv 
733234ee546Sdv 	return (ret);
734234ee546Sdv }
735234ee546Sdv 
736234ee546Sdv /*
737234ee546Sdv  * vcpu_exit_pci
738234ee546Sdv  *
739234ee546Sdv  * Handle all I/O to the emulated PCI subsystem.
740234ee546Sdv  *
741234ee546Sdv  * Parameters:
742234ee546Sdv  *  vrp: vcpu run parameters containing guest state for this exit
743234ee546Sdv  *
744234ee546Sdv  * Return value:
745234ee546Sdv  *  Interrupt to inject to the guest VM, or 0xFF if no interrupt should
746234ee546Sdv  *      be injected.
747234ee546Sdv  */
748234ee546Sdv uint8_t
749234ee546Sdv vcpu_exit_pci(struct vm_run_params *vrp)
750234ee546Sdv {
751234ee546Sdv 	struct vm_exit *vei = vrp->vrp_exit;
752234ee546Sdv 	uint8_t intr;
753234ee546Sdv 
754234ee546Sdv 	intr = 0xFF;
755234ee546Sdv 
756234ee546Sdv 	switch (vei->vei.vei_port) {
757234ee546Sdv 	case PCI_MODE1_ADDRESS_REG:
758234ee546Sdv 		pci_handle_address_reg(vrp);
759234ee546Sdv 		break;
760234ee546Sdv 	case PCI_MODE1_DATA_REG:
761234ee546Sdv 	case PCI_MODE1_DATA_REG + 1:
762234ee546Sdv 	case PCI_MODE1_DATA_REG + 2:
763234ee546Sdv 	case PCI_MODE1_DATA_REG + 3:
764234ee546Sdv 		pci_handle_data_reg(vrp);
765234ee546Sdv 		break;
766234ee546Sdv 	case VM_PCI_IO_BAR_BASE ... VM_PCI_IO_BAR_END:
767234ee546Sdv 		intr = pci_handle_io(vrp);
768234ee546Sdv 		break;
769234ee546Sdv 	default:
770234ee546Sdv 		log_warnx("%s: unknown PCI register 0x%llx",
771234ee546Sdv 		    __progname, (uint64_t)vei->vei.vei_port);
772234ee546Sdv 		break;
773234ee546Sdv 	}
774234ee546Sdv 
775234ee546Sdv 	return (intr);
776234ee546Sdv }
777234ee546Sdv 
778234ee546Sdv /*
779234ee546Sdv  * find_gpa_range
780234ee546Sdv  *
781234ee546Sdv  * Search for a contiguous guest physical mem range.
782234ee546Sdv  *
783234ee546Sdv  * Parameters:
784234ee546Sdv  *  vcp: VM create parameters that contain the memory map to search in
785234ee546Sdv  *  gpa: the starting guest physical address
786234ee546Sdv  *  len: the length of the memory range
787234ee546Sdv  *
788234ee546Sdv  * Return values:
789234ee546Sdv  *  NULL: on failure if there is no memory range as described by the parameters
790234ee546Sdv  *  Pointer to vm_mem_range that contains the start of the range otherwise.
791234ee546Sdv  */
792f4b47ae8Sbluhm struct vm_mem_range *
793234ee546Sdv find_gpa_range(struct vm_create_params *vcp, paddr_t gpa, size_t len)
794234ee546Sdv {
795234ee546Sdv 	size_t i, n;
796234ee546Sdv 	struct vm_mem_range *vmr;
797234ee546Sdv 
798234ee546Sdv 	/* Find the first vm_mem_range that contains gpa */
799234ee546Sdv 	for (i = 0; i < vcp->vcp_nmemranges; i++) {
800234ee546Sdv 		vmr = &vcp->vcp_memranges[i];
801234ee546Sdv 		if (gpa < vmr->vmr_gpa + vmr->vmr_size)
802234ee546Sdv 			break;
803234ee546Sdv 	}
804234ee546Sdv 
805234ee546Sdv 	/* No range found. */
806234ee546Sdv 	if (i == vcp->vcp_nmemranges)
807234ee546Sdv 		return (NULL);
808234ee546Sdv 
809234ee546Sdv 	/*
810234ee546Sdv 	 * vmr may cover the range [gpa, gpa + len) only partly. Make
811234ee546Sdv 	 * sure that the following vm_mem_ranges are contiguous and
812234ee546Sdv 	 * cover the rest.
813234ee546Sdv 	 */
814234ee546Sdv 	n = vmr->vmr_size - (gpa - vmr->vmr_gpa);
815234ee546Sdv 	if (len < n)
816234ee546Sdv 		len = 0;
817234ee546Sdv 	else
818234ee546Sdv 		len -= n;
819234ee546Sdv 	gpa = vmr->vmr_gpa + vmr->vmr_size;
820234ee546Sdv 	for (i = i + 1; len != 0 && i < vcp->vcp_nmemranges; i++) {
821234ee546Sdv 		vmr = &vcp->vcp_memranges[i];
822234ee546Sdv 		if (gpa != vmr->vmr_gpa)
823234ee546Sdv 			return (NULL);
824234ee546Sdv 		if (len <= vmr->vmr_size)
825234ee546Sdv 			len = 0;
826234ee546Sdv 		else
827234ee546Sdv 			len -= vmr->vmr_size;
828234ee546Sdv 
829234ee546Sdv 		gpa = vmr->vmr_gpa + vmr->vmr_size;
830234ee546Sdv 	}
831234ee546Sdv 
832234ee546Sdv 	if (len != 0)
833234ee546Sdv 		return (NULL);
834234ee546Sdv 
835234ee546Sdv 	return (vmr);
836234ee546Sdv }
837234ee546Sdv /*
838234ee546Sdv  * write_mem
839234ee546Sdv  *
840234ee546Sdv  * Copies data from 'buf' into the guest VM's memory at paddr 'dst'.
841234ee546Sdv  *
842234ee546Sdv  * Parameters:
843234ee546Sdv  *  dst: the destination paddr_t in the guest VM
844234ee546Sdv  *  buf: data to copy (or NULL to zero the data)
845234ee546Sdv  *  len: number of bytes to copy
846234ee546Sdv  *
847234ee546Sdv  * Return values:
848234ee546Sdv  *  0: success
849234ee546Sdv  *  EINVAL: if the guest physical memory range [dst, dst + len) does not
850234ee546Sdv  *      exist in the guest.
851234ee546Sdv  */
852234ee546Sdv int
853234ee546Sdv write_mem(paddr_t dst, const void *buf, size_t len)
854234ee546Sdv {
855234ee546Sdv 	const char *from = buf;
856234ee546Sdv 	char *to;
857234ee546Sdv 	size_t n, off;
858234ee546Sdv 	struct vm_mem_range *vmr;
859234ee546Sdv 
860234ee546Sdv 	vmr = find_gpa_range(&current_vm->vm_params.vmc_params, dst, len);
861234ee546Sdv 	if (vmr == NULL) {
862234ee546Sdv 		errno = EINVAL;
863234ee546Sdv 		log_warn("%s: failed - invalid memory range dst = 0x%lx, "
864234ee546Sdv 		    "len = 0x%zx", __func__, dst, len);
865234ee546Sdv 		return (EINVAL);
866234ee546Sdv 	}
867234ee546Sdv 
868234ee546Sdv 	off = dst - vmr->vmr_gpa;
869234ee546Sdv 	while (len != 0) {
870234ee546Sdv 		n = vmr->vmr_size - off;
871234ee546Sdv 		if (len < n)
872234ee546Sdv 			n = len;
873234ee546Sdv 
874234ee546Sdv 		to = (char *)vmr->vmr_va + off;
875234ee546Sdv 		if (buf == NULL)
876234ee546Sdv 			memset(to, 0, n);
877234ee546Sdv 		else {
878234ee546Sdv 			memcpy(to, from, n);
879234ee546Sdv 			from += n;
880234ee546Sdv 		}
881234ee546Sdv 		len -= n;
882234ee546Sdv 		off = 0;
883234ee546Sdv 		vmr++;
884234ee546Sdv 	}
885234ee546Sdv 
886234ee546Sdv 	return (0);
887234ee546Sdv }
888234ee546Sdv 
889234ee546Sdv /*
890234ee546Sdv  * read_mem
891234ee546Sdv  *
892234ee546Sdv  * Reads memory at guest paddr 'src' into 'buf'.
893234ee546Sdv  *
894234ee546Sdv  * Parameters:
895234ee546Sdv  *  src: the source paddr_t in the guest VM to read from.
896234ee546Sdv  *  buf: destination (local) buffer
897234ee546Sdv  *  len: number of bytes to read
898234ee546Sdv  *
899234ee546Sdv  * Return values:
900234ee546Sdv  *  0: success
901234ee546Sdv  *  EINVAL: if the guest physical memory range [dst, dst + len) does not
902234ee546Sdv  *      exist in the guest.
903234ee546Sdv  */
904234ee546Sdv int
905234ee546Sdv read_mem(paddr_t src, void *buf, size_t len)
906234ee546Sdv {
907234ee546Sdv 	char *from, *to = buf;
908234ee546Sdv 	size_t n, off;
909234ee546Sdv 	struct vm_mem_range *vmr;
910234ee546Sdv 
911234ee546Sdv 	vmr = find_gpa_range(&current_vm->vm_params.vmc_params, src, len);
912234ee546Sdv 	if (vmr == NULL) {
913234ee546Sdv 		errno = EINVAL;
914234ee546Sdv 		log_warn("%s: failed - invalid memory range src = 0x%lx, "
915234ee546Sdv 		    "len = 0x%zx", __func__, src, len);
916234ee546Sdv 		return (EINVAL);
917234ee546Sdv 	}
918234ee546Sdv 
919234ee546Sdv 	off = src - vmr->vmr_gpa;
920234ee546Sdv 	while (len != 0) {
921234ee546Sdv 		n = vmr->vmr_size - off;
922234ee546Sdv 		if (len < n)
923234ee546Sdv 			n = len;
924234ee546Sdv 
925234ee546Sdv 		from = (char *)vmr->vmr_va + off;
926234ee546Sdv 		memcpy(to, from, n);
927234ee546Sdv 
928234ee546Sdv 		to += n;
929234ee546Sdv 		len -= n;
930234ee546Sdv 		off = 0;
931234ee546Sdv 		vmr++;
932234ee546Sdv 	}
933234ee546Sdv 
934234ee546Sdv 	return (0);
935234ee546Sdv }
936234ee546Sdv 
937234ee546Sdv /*
938234ee546Sdv  * hvaddr_mem
939234ee546Sdv  *
940234ee546Sdv  * Translate a guest physical address to a host virtual address, checking the
941234ee546Sdv  * provided memory range length to confirm it's contiguous within the same
942234ee546Sdv  * guest memory range (vm_mem_range).
943234ee546Sdv  *
944234ee546Sdv  * Parameters:
945234ee546Sdv  *  gpa: guest physical address to translate
946234ee546Sdv  *  len: number of bytes in the intended range
947234ee546Sdv  *
948234ee546Sdv  * Return values:
949234ee546Sdv  *  void* to host virtual memory on success
950234ee546Sdv  *  NULL on error, setting errno to:
951234ee546Sdv  *    EFAULT: gpa falls outside guest memory ranges
952234ee546Sdv  *    EINVAL: requested len extends beyond memory range
953234ee546Sdv  */
954234ee546Sdv void *
955234ee546Sdv hvaddr_mem(paddr_t gpa, size_t len)
956234ee546Sdv {
957234ee546Sdv 	struct vm_mem_range *vmr;
958234ee546Sdv 	size_t off;
959234ee546Sdv 
960234ee546Sdv 	vmr = find_gpa_range(&current_vm->vm_params.vmc_params, gpa, len);
961234ee546Sdv 	if (vmr == NULL) {
962234ee546Sdv 		log_warnx("%s: failed - invalid gpa: 0x%lx\n", __func__, gpa);
963234ee546Sdv 		errno = EFAULT;
964234ee546Sdv 		return (NULL);
965234ee546Sdv 	}
966234ee546Sdv 
967234ee546Sdv 	off = gpa - vmr->vmr_gpa;
968234ee546Sdv 	if (len > (vmr->vmr_size - off)) {
969234ee546Sdv 		log_warnx("%s: failed - invalid memory range: gpa=0x%lx, "
970234ee546Sdv 		    "len=%zu", __func__, gpa, len);
971234ee546Sdv 		errno = EINVAL;
972234ee546Sdv 		return (NULL);
973234ee546Sdv 	}
974234ee546Sdv 
975234ee546Sdv 	return ((char *)vmr->vmr_va + off);
976234ee546Sdv }
977234ee546Sdv 
978234ee546Sdv /*
979234ee546Sdv  * vcpu_assert_irq
980234ee546Sdv  *
981234ee546Sdv  * Injects the specified IRQ on the supplied vcpu/vm
982234ee546Sdv  *
983234ee546Sdv  * Parameters:
984234ee546Sdv  *  vm_id: VM ID to inject to
985234ee546Sdv  *  vcpu_id: VCPU ID to inject to
986234ee546Sdv  *  irq: IRQ to inject
987234ee546Sdv  */
988234ee546Sdv void
989234ee546Sdv vcpu_assert_irq(uint32_t vm_id, uint32_t vcpu_id, int irq)
990234ee546Sdv {
991234ee546Sdv 	i8259_assert_irq(irq);
992234ee546Sdv 
993234ee546Sdv 	if (i8259_is_pending()) {
994234ee546Sdv 		if (vcpu_intr(vm_id, vcpu_id, 1))
995234ee546Sdv 			fatalx("%s: can't assert INTR", __func__);
996234ee546Sdv 
997234ee546Sdv 		vcpu_unhalt(vcpu_id);
998234ee546Sdv 		vcpu_signal_run(vcpu_id);
999234ee546Sdv 	}
1000234ee546Sdv }
1001234ee546Sdv 
1002234ee546Sdv /*
1003234ee546Sdv  * vcpu_deassert_pic_irq
1004234ee546Sdv  *
1005234ee546Sdv  * Clears the specified IRQ on the supplied vcpu/vm
1006234ee546Sdv  *
1007234ee546Sdv  * Parameters:
1008234ee546Sdv  *  vm_id: VM ID to clear in
1009234ee546Sdv  *  vcpu_id: VCPU ID to clear in
1010234ee546Sdv  *  irq: IRQ to clear
1011234ee546Sdv  */
1012234ee546Sdv void
1013234ee546Sdv vcpu_deassert_irq(uint32_t vm_id, uint32_t vcpu_id, int irq)
1014234ee546Sdv {
1015234ee546Sdv 	i8259_deassert_irq(irq);
1016234ee546Sdv 
1017234ee546Sdv 	if (!i8259_is_pending()) {
1018234ee546Sdv 		if (vcpu_intr(vm_id, vcpu_id, 0))
1019234ee546Sdv 			fatalx("%s: can't deassert INTR for vm_id %d, "
1020234ee546Sdv 			    "vcpu_id %d", __func__, vm_id, vcpu_id);
1021234ee546Sdv 	}
1022234ee546Sdv }
1023234ee546Sdv /*
1024234ee546Sdv  * set_return_data
1025234ee546Sdv  *
1026234ee546Sdv  * Utility function for manipulating register data in vm exit info structs. This
1027234ee546Sdv  * function ensures that the data is copied to the vei->vei.vei_data field with
1028234ee546Sdv  * the proper size for the operation being performed.
1029234ee546Sdv  *
1030234ee546Sdv  * Parameters:
1031234ee546Sdv  *  vei: exit information
1032234ee546Sdv  *  data: return data
1033234ee546Sdv  */
1034234ee546Sdv void
1035234ee546Sdv set_return_data(struct vm_exit *vei, uint32_t data)
1036234ee546Sdv {
1037234ee546Sdv 	switch (vei->vei.vei_size) {
1038234ee546Sdv 	case 1:
1039234ee546Sdv 		vei->vei.vei_data &= ~0xFF;
1040234ee546Sdv 		vei->vei.vei_data |= (uint8_t)data;
1041234ee546Sdv 		break;
1042234ee546Sdv 	case 2:
1043234ee546Sdv 		vei->vei.vei_data &= ~0xFFFF;
1044234ee546Sdv 		vei->vei.vei_data |= (uint16_t)data;
1045234ee546Sdv 		break;
1046234ee546Sdv 	case 4:
1047234ee546Sdv 		vei->vei.vei_data = data;
1048234ee546Sdv 		break;
1049234ee546Sdv 	}
1050234ee546Sdv }
1051234ee546Sdv 
1052234ee546Sdv /*
1053234ee546Sdv  * get_input_data
1054234ee546Sdv  *
1055234ee546Sdv  * Utility function for manipulating register data in vm exit info
1056234ee546Sdv  * structs. This function ensures that the data is copied from the
1057234ee546Sdv  * vei->vei.vei_data field with the proper size for the operation being
1058234ee546Sdv  * performed.
1059234ee546Sdv  *
1060234ee546Sdv  * Parameters:
1061234ee546Sdv  *  vei: exit information
1062234ee546Sdv  *  data: location to store the result
1063234ee546Sdv  */
1064234ee546Sdv void
1065234ee546Sdv get_input_data(struct vm_exit *vei, uint32_t *data)
1066234ee546Sdv {
1067234ee546Sdv 	switch (vei->vei.vei_size) {
1068234ee546Sdv 	case 1:
1069234ee546Sdv 		*data &= 0xFFFFFF00;
1070234ee546Sdv 		*data |= (uint8_t)vei->vei.vei_data;
1071234ee546Sdv 		break;
1072234ee546Sdv 	case 2:
1073234ee546Sdv 		*data &= 0xFFFF0000;
1074234ee546Sdv 		*data |= (uint16_t)vei->vei.vei_data;
1075234ee546Sdv 		break;
1076234ee546Sdv 	case 4:
1077234ee546Sdv 		*data = vei->vei.vei_data;
1078234ee546Sdv 		break;
1079234ee546Sdv 	default:
1080234ee546Sdv 		log_warnx("%s: invalid i/o size %d", __func__,
1081234ee546Sdv 		    vei->vei.vei_size);
1082234ee546Sdv 	}
1083234ee546Sdv 
1084234ee546Sdv }
1085234ee546Sdv 
1086234ee546Sdv /*
1087234ee546Sdv  * translate_gva
1088234ee546Sdv  *
1089234ee546Sdv  * Translates a guest virtual address to a guest physical address by walking
1090234ee546Sdv  * the currently active page table (if needed).
1091234ee546Sdv  *
1092234ee546Sdv  * XXX ensure translate_gva updates the A bit in the PTE
1093234ee546Sdv  * XXX ensure translate_gva respects segment base and limits in i386 mode
1094234ee546Sdv  * XXX ensure translate_gva respects segment wraparound in i8086 mode
1095234ee546Sdv  * XXX ensure translate_gva updates the A bit in the segment selector
1096234ee546Sdv  * XXX ensure translate_gva respects CR4.LMSLE if available
1097234ee546Sdv  *
1098234ee546Sdv  * Parameters:
1099234ee546Sdv  *  exit: The VCPU this translation should be performed for (guest MMU settings
1100234ee546Sdv  *   are gathered from this VCPU)
1101234ee546Sdv  *  va: virtual address to translate
1102234ee546Sdv  *  pa: pointer to paddr_t variable that will receive the translated physical
1103234ee546Sdv  *   address. 'pa' is unchanged on error.
1104234ee546Sdv  *  mode: one of PROT_READ, PROT_WRITE, PROT_EXEC indicating the mode in which
1105234ee546Sdv  *   the address should be translated
1106234ee546Sdv  *
1107234ee546Sdv  * Return values:
1108234ee546Sdv  *  0: the address was successfully translated - 'pa' contains the physical
1109234ee546Sdv  *     address currently mapped by 'va'.
1110234ee546Sdv  *  EFAULT: the PTE for 'VA' is unmapped. A #PF will be injected in this case
1111234ee546Sdv  *     and %cr2 set in the vcpu structure.
1112234ee546Sdv  *  EINVAL: an error occurred reading paging table structures
1113234ee546Sdv  */
1114234ee546Sdv int
1115234ee546Sdv translate_gva(struct vm_exit* exit, uint64_t va, uint64_t* pa, int mode)
1116234ee546Sdv {
1117234ee546Sdv 	int level, shift, pdidx;
1118234ee546Sdv 	uint64_t pte, pt_paddr, pte_paddr, mask, low_mask, high_mask;
1119234ee546Sdv 	uint64_t shift_width, pte_size;
1120234ee546Sdv 	struct vcpu_reg_state *vrs;
1121234ee546Sdv 
1122234ee546Sdv 	vrs = &exit->vrs;
1123234ee546Sdv 
1124234ee546Sdv 	if (!pa)
1125234ee546Sdv 		return (EINVAL);
1126234ee546Sdv 
1127234ee546Sdv 	if (!(vrs->vrs_crs[VCPU_REGS_CR0] & CR0_PG)) {
1128234ee546Sdv 		log_debug("%s: unpaged, va=pa=0x%llx", __func__, va);
1129234ee546Sdv 		*pa = va;
1130234ee546Sdv 		return (0);
1131234ee546Sdv 	}
1132234ee546Sdv 
1133234ee546Sdv 	pt_paddr = vrs->vrs_crs[VCPU_REGS_CR3];
1134234ee546Sdv 
1135234ee546Sdv 	log_debug("%s: guest %%cr0=0x%llx, %%cr3=0x%llx", __func__,
1136234ee546Sdv 	    vrs->vrs_crs[VCPU_REGS_CR0], vrs->vrs_crs[VCPU_REGS_CR3]);
1137234ee546Sdv 
1138234ee546Sdv 	if (vrs->vrs_crs[VCPU_REGS_CR0] & CR0_PE) {
1139234ee546Sdv 		if (vrs->vrs_crs[VCPU_REGS_CR4] & CR4_PAE) {
1140234ee546Sdv 			pte_size = sizeof(uint64_t);
1141234ee546Sdv 			shift_width = 9;
1142234ee546Sdv 
1143234ee546Sdv 			if (vrs->vrs_msrs[VCPU_REGS_EFER] & EFER_LMA) {
1144234ee546Sdv 				/* 4 level paging */
1145234ee546Sdv 				level = 4;
1146234ee546Sdv 				mask = L4_MASK;
1147234ee546Sdv 				shift = L4_SHIFT;
1148234ee546Sdv 			} else {
1149234ee546Sdv 				/* 32 bit with PAE paging */
1150234ee546Sdv 				level = 3;
1151234ee546Sdv 				mask = L3_MASK;
1152234ee546Sdv 				shift = L3_SHIFT;
1153234ee546Sdv 			}
1154234ee546Sdv 		} else {
1155234ee546Sdv 			/* 32 bit paging */
1156234ee546Sdv 			level = 2;
1157234ee546Sdv 			shift_width = 10;
1158234ee546Sdv 			mask = 0xFFC00000;
1159234ee546Sdv 			shift = 22;
1160234ee546Sdv 			pte_size = sizeof(uint32_t);
1161234ee546Sdv 		}
1162234ee546Sdv 	} else
1163234ee546Sdv 		return (EINVAL);
1164234ee546Sdv 
1165234ee546Sdv 	/* XXX: Check for R bit in segment selector and set A bit */
1166234ee546Sdv 
1167234ee546Sdv 	for (;level > 0; level--) {
1168234ee546Sdv 		pdidx = (va & mask) >> shift;
1169234ee546Sdv 		pte_paddr = (pt_paddr) + (pdidx * pte_size);
1170234ee546Sdv 
1171234ee546Sdv 		log_debug("%s: read pte level %d @ GPA 0x%llx", __func__,
1172234ee546Sdv 		    level, pte_paddr);
1173234ee546Sdv 		if (read_mem(pte_paddr, &pte, pte_size)) {
1174234ee546Sdv 			log_warn("%s: failed to read pte", __func__);
1175234ee546Sdv 			return (EFAULT);
1176234ee546Sdv 		}
1177234ee546Sdv 
1178234ee546Sdv 		log_debug("%s: PTE @ 0x%llx = 0x%llx", __func__, pte_paddr,
1179234ee546Sdv 		    pte);
1180234ee546Sdv 
1181234ee546Sdv 		/* XXX: Set CR2  */
1182234ee546Sdv 		if (!(pte & PG_V))
1183234ee546Sdv 			return (EFAULT);
1184234ee546Sdv 
1185234ee546Sdv 		/* XXX: Check for SMAP */
1186234ee546Sdv 		if ((mode == PROT_WRITE) && !(pte & PG_RW))
1187234ee546Sdv 			return (EPERM);
1188234ee546Sdv 
1189234ee546Sdv 		if ((exit->cpl > 0) && !(pte & PG_u))
1190234ee546Sdv 			return (EPERM);
1191234ee546Sdv 
1192234ee546Sdv 		pte = pte | PG_U;
1193234ee546Sdv 		if (mode == PROT_WRITE)
1194234ee546Sdv 			pte = pte | PG_M;
1195234ee546Sdv 		if (write_mem(pte_paddr, &pte, pte_size)) {
1196234ee546Sdv 			log_warn("%s: failed to write back flags to pte",
1197234ee546Sdv 			    __func__);
1198234ee546Sdv 			return (EIO);
1199234ee546Sdv 		}
1200234ee546Sdv 
1201234ee546Sdv 		/* XXX: EINVAL if in 32bit and PG_PS is 1 but CR4.PSE is 0 */
1202234ee546Sdv 		if (pte & PG_PS)
1203234ee546Sdv 			break;
1204234ee546Sdv 
1205234ee546Sdv 		if (level > 1) {
1206234ee546Sdv 			pt_paddr = pte & PG_FRAME;
1207234ee546Sdv 			shift -= shift_width;
1208234ee546Sdv 			mask = mask >> shift_width;
1209234ee546Sdv 		}
1210234ee546Sdv 	}
1211234ee546Sdv 
1212234ee546Sdv 	low_mask = (1 << shift) - 1;
1213234ee546Sdv 	high_mask = (((uint64_t)1ULL << ((pte_size * 8) - 1)) - 1) ^ low_mask;
1214234ee546Sdv 	*pa = (pte & high_mask) | (va & low_mask);
1215234ee546Sdv 
1216234ee546Sdv 	log_debug("%s: final GPA for GVA 0x%llx = 0x%llx\n", __func__, va, *pa);
1217234ee546Sdv 
1218234ee546Sdv 	return (0);
1219234ee546Sdv }
1220234ee546Sdv 
1221234ee546Sdv int
1222234ee546Sdv intr_pending(struct vmd_vm *vm)
1223234ee546Sdv {
1224234ee546Sdv 	/* XXX select active interrupt controller */
1225234ee546Sdv 	return i8259_is_pending();
1226234ee546Sdv }
1227234ee546Sdv 
1228234ee546Sdv int
1229234ee546Sdv intr_ack(struct vmd_vm *vm)
1230234ee546Sdv {
1231234ee546Sdv 	/* XXX select active interrupt controller */
1232234ee546Sdv 	return i8259_ack();
1233234ee546Sdv }
1234234ee546Sdv 
1235234ee546Sdv void
1236234ee546Sdv intr_toggle_el(struct vmd_vm *vm, int irq, int val)
1237234ee546Sdv {
1238234ee546Sdv 	/* XXX select active interrupt controller */
1239234ee546Sdv 	pic_set_elcr(irq, val);
1240234ee546Sdv }
1241234ee546Sdv 
1242234ee546Sdv int
1243234ee546Sdv vmd_check_vmh(struct vm_dump_header *vmh)
1244234ee546Sdv {
1245234ee546Sdv 	int i;
1246234ee546Sdv 	unsigned int code, leaf;
1247234ee546Sdv 	unsigned int a, b, c, d;
1248234ee546Sdv 
1249234ee546Sdv 	if (strncmp(vmh->vmh_signature, VM_DUMP_SIGNATURE, strlen(VM_DUMP_SIGNATURE)) != 0) {
1250234ee546Sdv 		log_warnx("%s: incompatible dump signature", __func__);
1251234ee546Sdv 		return (-1);
1252234ee546Sdv 	}
1253234ee546Sdv 
1254234ee546Sdv 	if (vmh->vmh_version != VM_DUMP_VERSION) {
1255234ee546Sdv 		log_warnx("%s: incompatible dump version", __func__);
1256234ee546Sdv 		return (-1);
1257234ee546Sdv 	}
1258234ee546Sdv 
1259234ee546Sdv 	for (i = 0; i < VM_DUMP_HEADER_CPUID_COUNT; i++) {
1260234ee546Sdv 		code = vmh->vmh_cpuids[i].code;
1261234ee546Sdv 		leaf = vmh->vmh_cpuids[i].leaf;
1262234ee546Sdv 		if (leaf != 0x00) {
1263234ee546Sdv 			log_debug("%s: invalid leaf 0x%x for code 0x%x",
1264234ee546Sdv 			    __func__, leaf, code);
1265234ee546Sdv 			return (-1);
1266234ee546Sdv 		}
1267234ee546Sdv 
1268234ee546Sdv 		switch (code) {
1269234ee546Sdv 		case 0x00:
1270234ee546Sdv 			CPUID_LEAF(code, leaf, a, b, c, d);
1271234ee546Sdv 			if (vmh->vmh_cpuids[i].a > a) {
1272234ee546Sdv 				log_debug("%s: incompatible cpuid level",
1273234ee546Sdv 				    __func__);
1274234ee546Sdv 				return (-1);
1275234ee546Sdv 			}
1276234ee546Sdv 			if (!(vmh->vmh_cpuids[i].b == b &&
1277234ee546Sdv 			    vmh->vmh_cpuids[i].c == c &&
1278234ee546Sdv 			    vmh->vmh_cpuids[i].d == d)) {
1279234ee546Sdv 				log_debug("%s: incompatible cpu brand",
1280234ee546Sdv 				    __func__);
1281234ee546Sdv 				return (-1);
1282234ee546Sdv 			}
1283234ee546Sdv 			break;
1284234ee546Sdv 
1285234ee546Sdv 		case 0x01:
1286234ee546Sdv 			CPUID_LEAF(code, leaf, a, b, c, d);
1287234ee546Sdv 			if ((vmh->vmh_cpuids[i].c & c & VMM_CPUIDECX_MASK) !=
1288234ee546Sdv 			    (vmh->vmh_cpuids[i].c & VMM_CPUIDECX_MASK)) {
1289234ee546Sdv 				log_debug("%s: incompatible cpu features "
1290234ee546Sdv 				    "code: 0x%x leaf: 0x%x  reg: c", __func__,
1291234ee546Sdv 				    code, leaf);
1292234ee546Sdv 				return (-1);
1293234ee546Sdv 			}
1294234ee546Sdv 			if ((vmh->vmh_cpuids[i].d & d & VMM_CPUIDEDX_MASK) !=
1295234ee546Sdv 			    (vmh->vmh_cpuids[i].d & VMM_CPUIDEDX_MASK)) {
1296234ee546Sdv 				log_debug("%s: incompatible cpu features "
1297234ee546Sdv 				    "code: 0x%x leaf: 0x%x  reg: d", __func__,
1298234ee546Sdv 				    code, leaf);
1299234ee546Sdv 				return (-1);
1300234ee546Sdv 			}
1301234ee546Sdv 			break;
1302234ee546Sdv 
1303234ee546Sdv 		case 0x07:
1304234ee546Sdv 			CPUID_LEAF(code, leaf, a, b, c, d);
1305234ee546Sdv 			if ((vmh->vmh_cpuids[i].b & b & VMM_SEFF0EBX_MASK) !=
1306234ee546Sdv 			    (vmh->vmh_cpuids[i].b & VMM_SEFF0EBX_MASK)) {
1307234ee546Sdv 				log_debug("%s: incompatible cpu features "
1308234ee546Sdv 				    "code: 0x%x leaf: 0x%x  reg: c", __func__,
1309234ee546Sdv 				    code, leaf);
1310234ee546Sdv 				return (-1);
1311234ee546Sdv 			}
1312234ee546Sdv 			if ((vmh->vmh_cpuids[i].c & c & VMM_SEFF0ECX_MASK) !=
1313234ee546Sdv 			    (vmh->vmh_cpuids[i].c & VMM_SEFF0ECX_MASK)) {
1314234ee546Sdv 				log_debug("%s: incompatible cpu features "
1315234ee546Sdv 				    "code: 0x%x leaf: 0x%x  reg: d", __func__,
1316234ee546Sdv 				    code, leaf);
1317234ee546Sdv 				return (-1);
1318234ee546Sdv 			}
1319234ee546Sdv 			break;
1320234ee546Sdv 
1321234ee546Sdv 		case 0x0d:
1322234ee546Sdv 			CPUID_LEAF(code, leaf, a, b, c, d);
1323234ee546Sdv 			if (vmh->vmh_cpuids[i].b > b) {
1324234ee546Sdv 				log_debug("%s: incompatible cpu: insufficient "
1325234ee546Sdv 				    "max save area for enabled XCR0 features",
1326234ee546Sdv 				    __func__);
1327234ee546Sdv 				return (-1);
1328234ee546Sdv 			}
1329234ee546Sdv 			if (vmh->vmh_cpuids[i].c > c) {
1330234ee546Sdv 				log_debug("%s: incompatible cpu: insufficient "
1331234ee546Sdv 				    "max save area for supported XCR0 features",
1332234ee546Sdv 				    __func__);
1333234ee546Sdv 				return (-1);
1334234ee546Sdv 			}
1335234ee546Sdv 			break;
1336234ee546Sdv 
1337234ee546Sdv 		case 0x80000001:
1338234ee546Sdv 			CPUID_LEAF(code, leaf, a, b, c, d);
1339234ee546Sdv 			if ((vmh->vmh_cpuids[i].a & a) !=
1340234ee546Sdv 			    vmh->vmh_cpuids[i].a) {
1341234ee546Sdv 				log_debug("%s: incompatible cpu features "
1342234ee546Sdv 				    "code: 0x%x leaf: 0x%x  reg: a", __func__,
1343234ee546Sdv 				    code, leaf);
1344234ee546Sdv 				return (-1);
1345234ee546Sdv 			}
1346234ee546Sdv 			if ((vmh->vmh_cpuids[i].c & c) !=
1347234ee546Sdv 			    vmh->vmh_cpuids[i].c) {
1348234ee546Sdv 				log_debug("%s: incompatible cpu features "
1349234ee546Sdv 				    "code: 0x%x leaf: 0x%x  reg: c", __func__,
1350234ee546Sdv 				    code, leaf);
1351234ee546Sdv 				return (-1);
1352234ee546Sdv 			}
1353234ee546Sdv 			if ((vmh->vmh_cpuids[i].d & d) !=
1354234ee546Sdv 			    vmh->vmh_cpuids[i].d) {
1355234ee546Sdv 				log_debug("%s: incompatible cpu features "
1356234ee546Sdv 				    "code: 0x%x leaf: 0x%x  reg: d", __func__,
1357234ee546Sdv 				    code, leaf);
1358234ee546Sdv 				return (-1);
1359234ee546Sdv 			}
1360234ee546Sdv 			break;
1361234ee546Sdv 
1362234ee546Sdv 		default:
1363234ee546Sdv 			log_debug("%s: unknown code 0x%x", __func__, code);
1364234ee546Sdv 			return (-1);
1365234ee546Sdv 		}
1366234ee546Sdv 	}
1367234ee546Sdv 
1368234ee546Sdv 	return (0);
1369234ee546Sdv }
1370