xref: /openbsd-src/usr.sbin/vmd/vm.c (revision d1df930ffab53da22f3324c32bed7ac5709915e6)
1 /*	$OpenBSD: vm.c,v 1.40 2018/09/28 12:35:32 reyk Exp $	*/
2 
3 /*
4  * Copyright (c) 2015 Mike Larkin <mlarkin@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 
19 #include <sys/types.h>
20 #include <sys/ioctl.h>
21 #include <sys/queue.h>
22 #include <sys/wait.h>
23 #include <sys/uio.h>
24 #include <sys/socket.h>
25 #include <sys/time.h>
26 #include <sys/mman.h>
27 
28 #include <dev/ic/i8253reg.h>
29 #include <dev/isa/isareg.h>
30 #include <dev/pci/pcireg.h>
31 
32 #include <machine/param.h>
33 #include <machine/psl.h>
34 #include <machine/pte.h>
35 #include <machine/specialreg.h>
36 #include <machine/vmmvar.h>
37 
38 #include <net/if.h>
39 
40 #include <errno.h>
41 #include <event.h>
42 #include <fcntl.h>
43 #include <imsg.h>
44 #include <limits.h>
45 #include <poll.h>
46 #include <pthread.h>
47 #include <stddef.h>
48 #include <stdio.h>
49 #include <stdlib.h>
50 #include <string.h>
51 #include <unistd.h>
52 #include <util.h>
53 
54 #include "vmd.h"
55 #include "vmm.h"
56 #include "loadfile.h"
57 #include "pci.h"
58 #include "virtio.h"
59 #include "proc.h"
60 #include "i8253.h"
61 #include "i8259.h"
62 #include "ns8250.h"
63 #include "mc146818.h"
64 #include "atomicio.h"
65 
66 io_fn_t ioports_map[MAX_PORTS];
67 
68 int run_vm(int, int *, int *, struct vmop_create_params *,
69     struct vcpu_reg_state *);
70 void vm_dispatch_vmm(int, short, void *);
71 void *event_thread(void *);
72 void *vcpu_run_loop(void *);
73 int vcpu_exit(struct vm_run_params *);
74 int vcpu_reset(uint32_t, uint32_t, struct vcpu_reg_state *);
75 void create_memory_map(struct vm_create_params *);
76 int alloc_guest_mem(struct vm_create_params *);
77 int vmm_create_vm(struct vm_create_params *);
78 void init_emulated_hw(struct vmop_create_params *, int, int *, int *);
79 void restore_emulated_hw(struct vm_create_params *, int, int *, int *,int);
80 void vcpu_exit_inout(struct vm_run_params *);
81 uint8_t vcpu_exit_pci(struct vm_run_params *);
82 int vcpu_pic_intr(uint32_t, uint32_t, uint8_t);
83 int loadfile_bios(FILE *, struct vcpu_reg_state *);
84 int send_vm(int, struct vm_create_params *);
85 int dump_send_header(int);
86 int dump_vmr(int , struct vm_mem_range *);
87 int dump_mem(int, struct vm_create_params *);
88 void restore_vmr(int, struct vm_mem_range *);
89 void restore_mem(int, struct vm_create_params *);
90 void pause_vm(struct vm_create_params *);
91 void unpause_vm(struct vm_create_params *);
92 
93 static struct vm_mem_range *find_gpa_range(struct vm_create_params *, paddr_t,
94     size_t);
95 
96 int con_fd;
97 struct vmd_vm *current_vm;
98 
99 extern struct vmd *env;
100 
101 extern char *__progname;
102 
103 pthread_mutex_t threadmutex;
104 pthread_cond_t threadcond;
105 
106 pthread_cond_t vcpu_run_cond[VMM_MAX_VCPUS_PER_VM];
107 pthread_mutex_t vcpu_run_mtx[VMM_MAX_VCPUS_PER_VM];
108 uint8_t vcpu_hlt[VMM_MAX_VCPUS_PER_VM];
109 uint8_t vcpu_done[VMM_MAX_VCPUS_PER_VM];
110 
111 /*
112  * Represents a standard register set for an OS to be booted
113  * as a flat 64 bit address space.
114  *
115  * NOT set here are:
116  *  RIP
117  *  RSP
118  *  GDTR BASE
119  *
120  * Specific bootloaders should clone this structure and override
121  * those fields as needed.
122  *
123  * Note - CR3 and various bits in CR0 may be overridden by vmm(4) based on
124  *        features of the CPU in use.
125  */
126 static const struct vcpu_reg_state vcpu_init_flat64 = {
127 #ifdef __i386__
128 	.vrs_gprs[VCPU_REGS_EFLAGS] = 0x2,
129 	.vrs_gprs[VCPU_REGS_EIP] = 0x0,
130 	.vrs_gprs[VCPU_REGS_ESP] = 0x0,
131 #else
132 	.vrs_gprs[VCPU_REGS_RFLAGS] = 0x2,
133 	.vrs_gprs[VCPU_REGS_RIP] = 0x0,
134 	.vrs_gprs[VCPU_REGS_RSP] = 0x0,
135 #endif
136 	.vrs_crs[VCPU_REGS_CR0] = CR0_CD | CR0_NW | CR0_ET | CR0_PE | CR0_PG,
137 	.vrs_crs[VCPU_REGS_CR3] = PML4_PAGE,
138 	.vrs_crs[VCPU_REGS_CR4] = CR4_PAE | CR4_PSE,
139 	.vrs_crs[VCPU_REGS_PDPTE0] = 0ULL,
140 	.vrs_crs[VCPU_REGS_PDPTE1] = 0ULL,
141 	.vrs_crs[VCPU_REGS_PDPTE2] = 0ULL,
142 	.vrs_crs[VCPU_REGS_PDPTE3] = 0ULL,
143 	.vrs_sregs[VCPU_REGS_CS] = { 0x8, 0xFFFFFFFF, 0xC09F, 0x0},
144 	.vrs_sregs[VCPU_REGS_DS] = { 0x10, 0xFFFFFFFF, 0xC093, 0x0},
145 	.vrs_sregs[VCPU_REGS_ES] = { 0x10, 0xFFFFFFFF, 0xC093, 0x0},
146 	.vrs_sregs[VCPU_REGS_FS] = { 0x10, 0xFFFFFFFF, 0xC093, 0x0},
147 	.vrs_sregs[VCPU_REGS_GS] = { 0x10, 0xFFFFFFFF, 0xC093, 0x0},
148 	.vrs_sregs[VCPU_REGS_SS] = { 0x10, 0xFFFFFFFF, 0xC093, 0x0},
149 	.vrs_gdtr = { 0x0, 0xFFFF, 0x0, 0x0},
150 	.vrs_idtr = { 0x0, 0xFFFF, 0x0, 0x0},
151 	.vrs_sregs[VCPU_REGS_LDTR] = { 0x0, 0xFFFF, 0x0082, 0x0},
152 	.vrs_sregs[VCPU_REGS_TR] = { 0x0, 0xFFFF, 0x008B, 0x0},
153 	.vrs_msrs[VCPU_REGS_EFER] = EFER_LME | EFER_LMA,
154 #ifndef __i386__
155 	.vrs_msrs[VCPU_REGS_STAR] = 0ULL,
156 	.vrs_msrs[VCPU_REGS_LSTAR] = 0ULL,
157 	.vrs_msrs[VCPU_REGS_CSTAR] = 0ULL,
158 	.vrs_msrs[VCPU_REGS_SFMASK] = 0ULL,
159 	.vrs_msrs[VCPU_REGS_KGSBASE] = 0ULL,
160 	.vrs_msrs[VCPU_REGS_MISC_ENABLE] = 0ULL,
161 	.vrs_crs[VCPU_REGS_XCR0] = XCR0_X87
162 #endif
163 };
164 
165 /*
166  * Represents a standard register set for an BIOS to be booted
167  * as a flat 16 bit address space.
168  */
169 static const struct vcpu_reg_state vcpu_init_flat16 = {
170 #ifdef __i386__
171 	.vrs_gprs[VCPU_REGS_EFLAGS] = 0x2,
172 	.vrs_gprs[VCPU_REGS_EIP] = 0xFFF0,
173 	.vrs_gprs[VCPU_REGS_ESP] = 0x0,
174 #else
175 	.vrs_gprs[VCPU_REGS_RFLAGS] = 0x2,
176 	.vrs_gprs[VCPU_REGS_RIP] = 0xFFF0,
177 	.vrs_gprs[VCPU_REGS_RSP] = 0x0,
178 #endif
179 	.vrs_crs[VCPU_REGS_CR0] = 0x60000010,
180 	.vrs_crs[VCPU_REGS_CR3] = 0,
181 	.vrs_sregs[VCPU_REGS_CS] = { 0xF000, 0xFFFF, 0x809F, 0xF0000},
182 	.vrs_sregs[VCPU_REGS_DS] = { 0x0, 0xFFFF, 0x8093, 0x0},
183 	.vrs_sregs[VCPU_REGS_ES] = { 0x0, 0xFFFF, 0x8093, 0x0},
184 	.vrs_sregs[VCPU_REGS_FS] = { 0x0, 0xFFFF, 0x8093, 0x0},
185 	.vrs_sregs[VCPU_REGS_GS] = { 0x0, 0xFFFF, 0x8093, 0x0},
186 	.vrs_sregs[VCPU_REGS_SS] = { 0x0, 0xFFFF, 0x8093, 0x0},
187 	.vrs_gdtr = { 0x0, 0xFFFF, 0x0, 0x0},
188 	.vrs_idtr = { 0x0, 0xFFFF, 0x0, 0x0},
189 	.vrs_sregs[VCPU_REGS_LDTR] = { 0x0, 0xFFFF, 0x0082, 0x0},
190 	.vrs_sregs[VCPU_REGS_TR] = { 0x0, 0xFFFF, 0x008B, 0x0},
191 	.vrs_msrs[VCPU_REGS_EFER] = 0ULL,
192 #ifndef __i386__
193 	.vrs_msrs[VCPU_REGS_STAR] = 0ULL,
194 	.vrs_msrs[VCPU_REGS_LSTAR] = 0ULL,
195 	.vrs_msrs[VCPU_REGS_CSTAR] = 0ULL,
196 	.vrs_msrs[VCPU_REGS_SFMASK] = 0ULL,
197 	.vrs_msrs[VCPU_REGS_KGSBASE] = 0ULL,
198 	.vrs_crs[VCPU_REGS_XCR0] = XCR0_X87
199 #endif
200 };
201 
202 /*
203  * loadfile_bios
204  *
205  * Alternatively to loadfile_elf, this function loads a non-ELF BIOS image
206  * directly into memory.
207  *
208  * Parameters:
209  *  fp: file of a kernel file to load
210  *  (out) vrs: register state to set on init for this kernel
211  *
212  * Return values:
213  *  0 if successful
214  *  various error codes returned from read(2) or loadelf functions
215  */
216 int
217 loadfile_bios(FILE *fp, struct vcpu_reg_state *vrs)
218 {
219 	off_t	 size, off;
220 
221 	/* Set up a "flat 16 bit" register state for BIOS */
222 	memcpy(vrs, &vcpu_init_flat16, sizeof(*vrs));
223 
224 	/* Get the size of the BIOS image and seek to the beginning */
225 	if (fseeko(fp, 0, SEEK_END) == -1 || (size = ftello(fp)) == -1 ||
226 	    fseeko(fp, 0, SEEK_SET) == -1)
227 		return (-1);
228 
229 	/* The BIOS image must end at 1M */
230 	if ((off = 1048576 - size) < 0)
231 		return (-1);
232 
233 	/* Read BIOS image into memory */
234 	if (mread(fp, off, size) != (size_t)size) {
235 		errno = EIO;
236 		return (-1);
237 	}
238 
239 	log_debug("%s: loaded BIOS image", __func__);
240 
241 	return (0);
242 }
243 
244 /*
245  * start_vm
246  *
247  * After forking a new VM process, starts the new VM with the creation
248  * parameters supplied (in the incoming vm->vm_params field). This
249  * function performs a basic sanity check on the incoming parameters
250  * and then performs the following steps to complete the creation of the VM:
251  *
252  * 1. validates and create the new VM
253  * 2. opens the imsg control channel to the parent and drops more privilege
254  * 3. drops additional privleges by calling pledge(2)
255  * 4. loads the kernel from the disk image or file descriptor
256  * 5. runs the VM's VCPU loops.
257  *
258  * Parameters:
259  *  vm: The VM data structure that is including the VM create parameters.
260  *  fd: The imsg socket that is connected to the parent process.
261  *
262  * Return values:
263  *  0: success
264  *  !0 : failure - typically an errno indicating the source of the failure
265  */
266 int
267 start_vm(struct vmd_vm *vm, int fd)
268 {
269 	struct vmop_create_params *vmc = &vm->vm_params;
270 	struct vm_create_params	*vcp = &vmc->vmc_params;
271 	struct vcpu_reg_state	 vrs;
272 	int			 nicfds[VMM_MAX_NICS_PER_VM];
273 	int			 ret;
274 	FILE			*fp;
275 	struct vmboot_params	 vmboot;
276 	size_t			 i;
277 	struct vm_rwregs_params  vrp;
278 
279 	/* Child */
280 	setproctitle("%s", vcp->vcp_name);
281 	log_procinit(vcp->vcp_name);
282 
283 	if (!vm->vm_received)
284 		create_memory_map(vcp);
285 
286 	ret = alloc_guest_mem(vcp);
287 
288 	if (ret) {
289 		errno = ret;
290 		fatal("could not allocate guest memory - exiting");
291 	}
292 
293 	ret = vmm_create_vm(vcp);
294 	current_vm = vm;
295 
296 	/* send back the kernel-generated vm id (0 on error) */
297 	if (write(fd, &vcp->vcp_id, sizeof(vcp->vcp_id)) !=
298 	    sizeof(vcp->vcp_id))
299 		fatal("write vcp id");
300 
301 	if (ret) {
302 		errno = ret;
303 		fatal("create vmm ioctl failed - exiting");
304 	}
305 
306 	/*
307 	 * pledge in the vm processes:
308 	 * stdio - for malloc and basic I/O including events.
309 	 * recvfd - for send/recv.
310 	 * vmm - for the vmm ioctls and operations.
311 	 */
312 	if (pledge("stdio vmm recvfd", NULL) == -1)
313 		fatal("pledge");
314 
315 	if (vm->vm_received) {
316 		ret = read(vm->vm_receive_fd, &vrp, sizeof(vrp));
317 		if (ret != sizeof(vrp)) {
318 			fatal("received incomplete vrp - exiting");
319 		}
320 		vrs = vrp.vrwp_regs;
321 	} else {
322 		/*
323 		 * Set up default "flat 64 bit" register state - RIP,
324 		 * RSP, and GDT info will be set in bootloader
325 		 */
326 		memcpy(&vrs, &vcpu_init_flat64, sizeof(vrs));
327 
328 		/* Find and open kernel image */
329 		if ((fp = vmboot_open(vm->vm_kernel,
330 		    vm->vm_disks[0], vmc->vmc_disktypes[0], &vmboot)) == NULL)
331 			fatalx("failed to open kernel - exiting");
332 
333 		/* Load kernel image */
334 		ret = loadfile_elf(fp, vcp, &vrs,
335 		    vmboot.vbp_bootdev, vmboot.vbp_howto);
336 
337 		/*
338 		 * Try BIOS as a fallback (only if it was provided as an image
339 		 * with vm->vm_kernel and not loaded from the disk)
340 		 */
341 		if (ret && errno == ENOEXEC && vm->vm_kernel != -1)
342 			ret = loadfile_bios(fp, &vrs);
343 
344 		if (ret)
345 			fatal("failed to load kernel or BIOS - exiting");
346 
347 		vmboot_close(fp, &vmboot);
348 	}
349 
350 	if (vm->vm_kernel != -1)
351 		close(vm->vm_kernel);
352 
353 	con_fd = vm->vm_tty;
354 	if (fcntl(con_fd, F_SETFL, O_NONBLOCK) == -1)
355 		fatal("failed to set nonblocking mode on console");
356 
357 	for (i = 0; i < VMM_MAX_NICS_PER_VM; i++)
358 		nicfds[i] = vm->vm_ifs[i].vif_fd;
359 
360 	event_init();
361 
362 	if (vm->vm_received) {
363 		restore_emulated_hw(vcp, vm->vm_receive_fd, nicfds,
364 		    vm->vm_disks, vm->vm_cdrom);
365 		mc146818_start();
366 		restore_mem(vm->vm_receive_fd, vcp);
367 	}
368 
369 	if (vmm_pipe(vm, fd, vm_dispatch_vmm) == -1)
370 		fatal("setup vm pipe");
371 
372 	/* Execute the vcpu run loop(s) for this VM */
373 	ret = run_vm(vm->vm_cdrom, vm->vm_disks, nicfds, &vm->vm_params, &vrs);
374 
375 	/* Ensure that any in-flight data is written back */
376 	virtio_shutdown(vm);
377 
378 	return (ret);
379 }
380 
381 /*
382  * vm_dispatch_vmm
383  *
384  * imsg callback for messages that are received from the vmm parent process.
385  */
386 void
387 vm_dispatch_vmm(int fd, short event, void *arg)
388 {
389 	struct vmd_vm		*vm = arg;
390 	struct vmop_result	 vmr;
391 	struct imsgev		*iev = &vm->vm_iev;
392 	struct imsgbuf		*ibuf = &iev->ibuf;
393 	struct imsg		 imsg;
394 	ssize_t			 n;
395 	int			 verbose;
396 
397 	if (event & EV_READ) {
398 		if ((n = imsg_read(ibuf)) == -1 && errno != EAGAIN)
399 			fatal("%s: imsg_read", __func__);
400 		if (n == 0)
401 			_exit(0);
402 	}
403 
404 	if (event & EV_WRITE) {
405 		if ((n = msgbuf_write(&ibuf->w)) == -1 && errno != EAGAIN)
406 			fatal("%s: msgbuf_write fd %d", __func__, ibuf->fd);
407 		if (n == 0)
408 			_exit(0);
409 	}
410 
411 	for (;;) {
412 		if ((n = imsg_get(ibuf, &imsg)) == -1)
413 			fatal("%s: imsg_get", __func__);
414 		if (n == 0)
415 			break;
416 
417 #if DEBUG > 1
418 		log_debug("%s: got imsg %d from %s",
419 		    __func__, imsg.hdr.type,
420 		    vm->vm_params.vmc_params.vcp_name);
421 #endif
422 
423 		switch (imsg.hdr.type) {
424 		case IMSG_CTL_VERBOSE:
425 			IMSG_SIZE_CHECK(&imsg, &verbose);
426 			memcpy(&verbose, imsg.data, sizeof(verbose));
427 			log_setverbose(verbose);
428 			break;
429 		case IMSG_VMDOP_VM_SHUTDOWN:
430 			if (vmmci_ctl(VMMCI_SHUTDOWN) == -1)
431 				_exit(0);
432 			break;
433 		case IMSG_VMDOP_VM_REBOOT:
434 			if (vmmci_ctl(VMMCI_REBOOT) == -1)
435 				_exit(0);
436 			break;
437 		case IMSG_VMDOP_PAUSE_VM:
438 			vmr.vmr_result = 0;
439 			vmr.vmr_id = vm->vm_vmid;
440 			pause_vm(&vm->vm_params.vmc_params);
441 			imsg_compose_event(&vm->vm_iev,
442 			    IMSG_VMDOP_PAUSE_VM_RESPONSE,
443 			    imsg.hdr.peerid, imsg.hdr.pid, -1, &vmr,
444 			    sizeof(vmr));
445 			break;
446 		case IMSG_VMDOP_UNPAUSE_VM:
447 			vmr.vmr_result = 0;
448 			vmr.vmr_id = vm->vm_vmid;
449 			unpause_vm(&vm->vm_params.vmc_params);
450 			imsg_compose_event(&vm->vm_iev,
451 			    IMSG_VMDOP_UNPAUSE_VM_RESPONSE,
452 			    imsg.hdr.peerid, imsg.hdr.pid, -1, &vmr,
453 			    sizeof(vmr));
454 			break;
455 		case IMSG_VMDOP_SEND_VM_REQUEST:
456 			vmr.vmr_id = vm->vm_vmid;
457 			vmr.vmr_result = send_vm(imsg.fd,
458 			    &vm->vm_params.vmc_params);
459 			imsg_compose_event(&vm->vm_iev,
460 			    IMSG_VMDOP_SEND_VM_RESPONSE,
461 			    imsg.hdr.peerid, imsg.hdr.pid, -1, &vmr,
462 			    sizeof(vmr));
463 			break;
464 		default:
465 			fatalx("%s: got invalid imsg %d from %s",
466 			    __func__, imsg.hdr.type,
467 			    vm->vm_params.vmc_params.vcp_name);
468 		}
469 		imsg_free(&imsg);
470 	}
471 	imsg_event_add(iev);
472 }
473 
474 /*
475  * vm_ctl
476  *
477  * Tell the vmm parent process to shutdown or reboot the VM and exit.
478  */
479 __dead void
480 vm_shutdown(unsigned int cmd)
481 {
482 	switch (cmd) {
483 	case VMMCI_NONE:
484 	case VMMCI_SHUTDOWN:
485 		(void)imsg_compose_event(&current_vm->vm_iev,
486 		    IMSG_VMDOP_VM_SHUTDOWN, 0, 0, -1, NULL, 0);
487 		break;
488 	case VMMCI_REBOOT:
489 		(void)imsg_compose_event(&current_vm->vm_iev,
490 		    IMSG_VMDOP_VM_REBOOT, 0, 0, -1, NULL, 0);
491 		break;
492 	default:
493 		fatalx("invalid vm ctl command: %d", cmd);
494 	}
495 	imsg_flush(&current_vm->vm_iev.ibuf);
496 
497 	_exit(0);
498 }
499 
500 int
501 send_vm(int fd, struct vm_create_params *vcp)
502 {
503 	struct vm_rwregs_params	   vrp;
504 	struct vmop_create_params *vmc;
505 	struct vm_terminate_params vtp;
506 	unsigned int		   flags = 0;
507 	unsigned int		   i;
508 	int			   ret = 0;
509 	size_t			   sz;
510 
511 	if (dump_send_header(fd)) {
512 		log_info("%s: failed to send vm dump header", __func__);
513 		goto err;
514 	}
515 
516 	pause_vm(vcp);
517 
518 	vmc = calloc(1, sizeof(struct vmop_create_params));
519 	if (vmc == NULL) {
520 		log_warn("%s: calloc error geting vmc", __func__);
521 		ret = -1;
522 		goto err;
523 	}
524 
525 	flags |= VMOP_CREATE_MEMORY;
526 	memcpy(&vmc->vmc_params, &current_vm->vm_params, sizeof(struct
527 	    vmop_create_params));
528 	vmc->vmc_flags = flags;
529 	vrp.vrwp_vm_id = vcp->vcp_id;
530 	vrp.vrwp_mask = VM_RWREGS_ALL;
531 
532 	sz = atomicio(vwrite, fd, vmc,sizeof(struct vmop_create_params));
533 	if (sz != sizeof(struct vmop_create_params)) {
534 		ret = -1;
535 		goto err;
536 	}
537 
538 	for (i = 0; i < vcp->vcp_ncpus; i++) {
539 		vrp.vrwp_vcpu_id = i;
540 		if ((ret = ioctl(env->vmd_fd, VMM_IOC_READREGS, &vrp))) {
541 			log_warn("%s: readregs failed", __func__);
542 			goto err;
543 		}
544 
545 		sz = atomicio(vwrite, fd, &vrp,
546 		    sizeof(struct vm_rwregs_params));
547 		if (sz != sizeof(struct vm_rwregs_params)) {
548 			log_warn("%s: dumping registers failed", __func__);
549 			ret = -1;
550 			goto err;
551 		}
552 	}
553 
554 	if ((ret = i8253_dump(fd)))
555 		goto err;
556 	if ((ret = i8259_dump(fd)))
557 		goto err;
558 	if ((ret = ns8250_dump(fd)))
559 		goto err;
560 	if ((ret = mc146818_dump(fd)))
561 		goto err;
562 	if ((ret = pci_dump(fd)))
563 		goto err;
564 	if ((ret = virtio_dump(fd)))
565 		goto err;
566 	if ((ret = dump_mem(fd, vcp)))
567 		goto err;
568 
569 	vtp.vtp_vm_id = vcp->vcp_id;
570 	if (ioctl(env->vmd_fd, VMM_IOC_TERM, &vtp) < 0) {
571 		log_warnx("%s: term IOC error: %d, %d", __func__,
572 		    errno, ENOENT);
573 	}
574 err:
575 	close(fd);
576 	if (ret)
577 		unpause_vm(vcp);
578 	return ret;
579 }
580 
581 int
582 dump_send_header(int fd) {
583 	struct vm_dump_header	   vmh;
584 	int			   i;
585 
586 	vmh.vmh_cpuids[0].code = 0x00;
587 	vmh.vmh_cpuids[0].leaf = 0x00;
588 
589 	vmh.vmh_cpuids[1].code = 0x01;
590 	vmh.vmh_cpuids[1].leaf = 0x00;
591 
592 	vmh.vmh_cpuids[2].code = 0x07;
593 	vmh.vmh_cpuids[2].leaf = 0x00;
594 
595 	vmh.vmh_cpuids[3].code = 0x0d;
596 	vmh.vmh_cpuids[3].leaf = 0x00;
597 
598 	vmh.vmh_cpuids[4].code = 0x80000001;
599 	vmh.vmh_cpuids[4].leaf = 0x00;
600 
601 	vmh.vmh_version = VM_DUMP_VERSION;
602 
603 	for (i=0; i < VM_DUMP_HEADER_CPUID_COUNT; i++) {
604 		CPUID_LEAF(vmh.vmh_cpuids[i].code,
605 		    vmh.vmh_cpuids[i].leaf,
606 		    vmh.vmh_cpuids[i].a,
607 		    vmh.vmh_cpuids[i].b,
608 		    vmh.vmh_cpuids[i].c,
609 		    vmh.vmh_cpuids[i].d);
610 	}
611 
612 	if (atomicio(vwrite, fd, &vmh, sizeof(vmh)) != sizeof(vmh))
613 		return (-1);
614 
615 	return (0);
616 }
617 
618 int
619 dump_mem(int fd, struct vm_create_params *vcp)
620 {
621 	unsigned int	i;
622 	int		ret;
623 	struct		vm_mem_range *vmr;
624 
625 	for (i = 0; i < vcp->vcp_nmemranges; i++) {
626 		vmr = &vcp->vcp_memranges[i];
627 		ret = dump_vmr(fd, vmr);
628 		if (ret)
629 			return ret;
630 	}
631 	return (0);
632 }
633 
634 void
635 restore_mem(int fd, struct vm_create_params *vcp)
636 {
637 	unsigned int	     i;
638 	struct vm_mem_range *vmr;
639 
640 	for (i = 0; i < vcp->vcp_nmemranges; i++) {
641 		vmr = &vcp->vcp_memranges[i];
642 		restore_vmr(fd, vmr);
643 	}
644 }
645 
646 int
647 dump_vmr(int fd, struct vm_mem_range *vmr)
648 {
649 	size_t	rem = vmr->vmr_size, read=0;
650 	char	buf[PAGE_SIZE];
651 
652 	while (rem > 0) {
653 		if (read_mem(vmr->vmr_gpa + read, buf, PAGE_SIZE)) {
654 			log_warn("failed to read vmr");
655 			return (-1);
656 		}
657 		if (atomicio(vwrite, fd, buf, sizeof(buf)) != sizeof(buf)) {
658 			log_warn("failed to dump vmr");
659 			return (-1);
660 		}
661 		rem = rem - PAGE_SIZE;
662 		read = read + PAGE_SIZE;
663 	}
664 	return (0);
665 }
666 
667 void
668 restore_vmr(int fd, struct vm_mem_range *vmr)
669 {
670 	size_t	rem = vmr->vmr_size, wrote=0;
671 	char	buf[PAGE_SIZE];
672 
673 	while (rem > 0) {
674 		if (atomicio(read, fd, buf, sizeof(buf)) != sizeof(buf))
675 			fatal("failed to restore vmr");
676 		if (write_mem(vmr->vmr_gpa + wrote, buf, PAGE_SIZE))
677 			fatal("failed to write vmr");
678 		rem = rem - PAGE_SIZE;
679 		wrote = wrote + PAGE_SIZE;
680 	}
681 }
682 
683 void
684 pause_vm(struct vm_create_params *vcp)
685 {
686 	if (current_vm->vm_paused)
687 		return;
688 
689 	current_vm->vm_paused = 1;
690 
691 	/* XXX: vcpu_run_loop is running in another thread and we have to wait
692 	 * for the vm to exit before returning */
693 	sleep(1);
694 
695 	i8253_stop();
696 	mc146818_stop();
697 }
698 
699 void
700 unpause_vm(struct vm_create_params *vcp)
701 {
702 	unsigned int n;
703 	if (!current_vm->vm_paused)
704 		return;
705 
706 	current_vm->vm_paused = 0;
707 
708 	i8253_start();
709 	mc146818_start();
710 	for (n = 0; n <= vcp->vcp_ncpus; n++)
711 		pthread_cond_broadcast(&vcpu_run_cond[n]);
712 }
713 
714 /*
715  * vcpu_reset
716  *
717  * Requests vmm(4) to reset the VCPUs in the indicated VM to
718  * the register state provided
719  *
720  * Parameters
721  *  vmid: VM ID to reset
722  *  vcpu_id: VCPU ID to reset
723  *  vrs: the register state to initialize
724  *
725  * Return values:
726  *  0: success
727  *  !0 : ioctl to vmm(4) failed (eg, ENOENT if the supplied VM ID is not
728  *      valid)
729  */
730 int
731 vcpu_reset(uint32_t vmid, uint32_t vcpu_id, struct vcpu_reg_state *vrs)
732 {
733 	struct vm_resetcpu_params vrp;
734 
735 	memset(&vrp, 0, sizeof(vrp));
736 	vrp.vrp_vm_id = vmid;
737 	vrp.vrp_vcpu_id = vcpu_id;
738 	memcpy(&vrp.vrp_init_state, vrs, sizeof(struct vcpu_reg_state));
739 
740 	log_debug("%s: resetting vcpu %d for vm %d", __func__, vcpu_id, vmid);
741 
742 	if (ioctl(env->vmd_fd, VMM_IOC_RESETCPU, &vrp) < 0)
743 		return (errno);
744 
745 	return (0);
746 }
747 
748 /*
749  * create_memory_map
750  *
751  * Sets up the guest physical memory ranges that the VM can access.
752  *
753  * Parameters:
754  *  vcp: VM create parameters describing the VM whose memory map
755  *       is being created
756  *
757  * Return values:
758  *  nothing
759  */
760 void
761 create_memory_map(struct vm_create_params *vcp)
762 {
763 	size_t len, mem_bytes, mem_mb;
764 
765 	mem_mb = vcp->vcp_memranges[0].vmr_size;
766 	vcp->vcp_nmemranges = 0;
767 	if (mem_mb < 1 || mem_mb > VMM_MAX_VM_MEM_SIZE)
768 		return;
769 
770 	mem_bytes = mem_mb * 1024 * 1024;
771 
772 	/* First memory region: 0 - LOWMEM_KB (DOS low mem) */
773 	len = LOWMEM_KB * 1024;
774 	vcp->vcp_memranges[0].vmr_gpa = 0x0;
775 	vcp->vcp_memranges[0].vmr_size = len;
776 	mem_bytes -= len;
777 
778 	/*
779 	 * Second memory region: LOWMEM_KB - 1MB.
780 	 *
781 	 * N.B. - Normally ROMs or parts of video RAM are mapped here.
782 	 * We have to add this region, because some systems
783 	 * unconditionally write to 0xb8000 (VGA RAM), and
784 	 * we need to make sure that vmm(4) permits accesses
785 	 * to it. So allocate guest memory for it.
786 	 */
787 	len = 0x100000 - LOWMEM_KB * 1024;
788 	vcp->vcp_memranges[1].vmr_gpa = LOWMEM_KB * 1024;
789 	vcp->vcp_memranges[1].vmr_size = len;
790 	mem_bytes -= len;
791 
792 	/* Make sure that we do not place physical memory into MMIO ranges. */
793 	if (mem_bytes > VMM_PCI_MMIO_BAR_BASE - 0x100000)
794 		len = VMM_PCI_MMIO_BAR_BASE - 0x100000;
795 	else
796 		len = mem_bytes;
797 
798 	/* Third memory region: 1MB - (1MB + len) */
799 	vcp->vcp_memranges[2].vmr_gpa = 0x100000;
800 	vcp->vcp_memranges[2].vmr_size = len;
801 	mem_bytes -= len;
802 
803 	if (mem_bytes > 0) {
804 		/* Fourth memory region for the remaining memory (if any) */
805 		vcp->vcp_memranges[3].vmr_gpa = VMM_PCI_MMIO_BAR_END + 1;
806 		vcp->vcp_memranges[3].vmr_size = mem_bytes;
807 		vcp->vcp_nmemranges = 4;
808 	} else
809 		vcp->vcp_nmemranges = 3;
810 }
811 
812 /*
813  * alloc_guest_mem
814  *
815  * Allocates memory for the guest.
816  * Instead of doing a single allocation with one mmap(), we allocate memory
817  * separately for every range for the following reasons:
818  * - ASLR for the individual ranges
819  * - to reduce memory consumption in the UVM subsystem: if vmm(4) had to
820  *   map the single mmap'd userspace memory to the individual guest physical
821  *   memory ranges, the underlying amap of the single mmap'd range would have
822  *   to allocate per-page reference counters. The reason is that the
823  *   individual guest physical ranges would reference the single mmap'd region
824  *   only partially. However, if every guest physical range has its own
825  *   corresponding mmap'd userspace allocation, there are no partial
826  *   references: every guest physical range fully references an mmap'd
827  *   range => no per-page reference counters have to be allocated.
828  *
829  * Return values:
830  *  0: success
831  *  !0: failure - errno indicating the source of the failure
832  */
833 int
834 alloc_guest_mem(struct vm_create_params *vcp)
835 {
836 	void *p;
837 	int ret;
838 	size_t i, j;
839 	struct vm_mem_range *vmr;
840 
841 	for (i = 0; i < vcp->vcp_nmemranges; i++) {
842 		vmr = &vcp->vcp_memranges[i];
843 		p = mmap(NULL, vmr->vmr_size, PROT_READ | PROT_WRITE,
844 		    MAP_PRIVATE | MAP_ANON, -1, 0);
845 		if (p == MAP_FAILED) {
846 			ret = errno;
847 			for (j = 0; j < i; j++) {
848 				vmr = &vcp->vcp_memranges[j];
849 				munmap((void *)vmr->vmr_va, vmr->vmr_size);
850 			}
851 
852 			return (ret);
853 		}
854 
855 		vmr->vmr_va = (vaddr_t)p;
856 	}
857 
858 	return (0);
859 }
860 
861 /*
862  * vmm_create_vm
863  *
864  * Requests vmm(4) to create a new VM using the supplied creation
865  * parameters. This operation results in the creation of the in-kernel
866  * structures for the VM, but does not start the VM's vcpu(s).
867  *
868  * Parameters:
869  *  vcp: vm_create_params struct containing the VM's desired creation
870  *      configuration
871  *
872  * Return values:
873  *  0: success
874  *  !0 : ioctl to vmm(4) failed
875  */
876 int
877 vmm_create_vm(struct vm_create_params *vcp)
878 {
879 	/* Sanity check arguments */
880 	if (vcp->vcp_ncpus > VMM_MAX_VCPUS_PER_VM)
881 		return (EINVAL);
882 
883 	if (vcp->vcp_nmemranges == 0 ||
884 	    vcp->vcp_nmemranges > VMM_MAX_MEM_RANGES)
885 		return (EINVAL);
886 
887 	if (vcp->vcp_ndisks > VMM_MAX_DISKS_PER_VM)
888 		return (EINVAL);
889 
890 	if (vcp->vcp_nnics > VMM_MAX_NICS_PER_VM)
891 		return (EINVAL);
892 
893 	if (ioctl(env->vmd_fd, VMM_IOC_CREATE, vcp) < 0)
894 		return (errno);
895 
896 	return (0);
897 }
898 
899 /*
900  * init_emulated_hw
901  *
902  * Initializes the userspace hardware emulation
903  */
904 void
905 init_emulated_hw(struct vmop_create_params *vmc, int child_cdrom,
906     int *child_disks, int *child_taps)
907 {
908 	struct vm_create_params *vcp = &vmc->vmc_params;
909 	int i;
910 	uint64_t memlo, memhi;
911 
912 	/* Calculate memory size for NVRAM registers */
913 	memlo = memhi = 0;
914 	if (vcp->vcp_nmemranges > 2)
915 		memlo = vcp->vcp_memranges[2].vmr_size - 15 * 0x100000;
916 
917 	if (vcp->vcp_nmemranges > 3)
918 		memhi = vcp->vcp_memranges[3].vmr_size;
919 
920 	/* Reset the IO port map */
921 	memset(&ioports_map, 0, sizeof(io_fn_t) * MAX_PORTS);
922 
923 	/* Init i8253 PIT */
924 	i8253_init(vcp->vcp_id);
925 	ioports_map[TIMER_CTRL] = vcpu_exit_i8253;
926 	ioports_map[TIMER_BASE + TIMER_CNTR0] = vcpu_exit_i8253;
927 	ioports_map[TIMER_BASE + TIMER_CNTR1] = vcpu_exit_i8253;
928 	ioports_map[TIMER_BASE + TIMER_CNTR2] = vcpu_exit_i8253;
929 	ioports_map[PCKBC_AUX] = vcpu_exit_i8253_misc;
930 
931 	/* Init mc146818 RTC */
932 	mc146818_init(vcp->vcp_id, memlo, memhi);
933 	ioports_map[IO_RTC] = vcpu_exit_mc146818;
934 	ioports_map[IO_RTC + 1] = vcpu_exit_mc146818;
935 
936 	/* Init master and slave PICs */
937 	i8259_init();
938 	ioports_map[IO_ICU1] = vcpu_exit_i8259;
939 	ioports_map[IO_ICU1 + 1] = vcpu_exit_i8259;
940 	ioports_map[IO_ICU2] = vcpu_exit_i8259;
941 	ioports_map[IO_ICU2 + 1] = vcpu_exit_i8259;
942 	ioports_map[ELCR0] = vcpu_exit_elcr;
943 	ioports_map[ELCR1] = vcpu_exit_elcr;
944 
945 	/* Init ns8250 UART */
946 	ns8250_init(con_fd, vcp->vcp_id);
947 	for (i = COM1_DATA; i <= COM1_SCR; i++)
948 		ioports_map[i] = vcpu_exit_com;
949 
950 	/* Initialize PCI */
951 	for (i = VMM_PCI_IO_BAR_BASE; i <= VMM_PCI_IO_BAR_END; i++)
952 		ioports_map[i] = vcpu_exit_pci;
953 
954 	ioports_map[PCI_MODE1_ADDRESS_REG] = vcpu_exit_pci;
955 	ioports_map[PCI_MODE1_DATA_REG] = vcpu_exit_pci;
956 	ioports_map[PCI_MODE1_DATA_REG + 1] = vcpu_exit_pci;
957 	ioports_map[PCI_MODE1_DATA_REG + 2] = vcpu_exit_pci;
958 	ioports_map[PCI_MODE1_DATA_REG + 3] = vcpu_exit_pci;
959 	pci_init();
960 
961 	/* Initialize virtio devices */
962 	virtio_init(current_vm, child_cdrom, child_disks, child_taps);
963 }
964 /*
965  * restore_emulated_hw
966  *
967  * Restores the userspace hardware emulation from fd
968  */
969 void
970 restore_emulated_hw(struct vm_create_params *vcp, int fd,
971     int *child_taps, int *child_disks, int child_cdrom)
972 {
973 	/* struct vm_create_params *vcp = &vmc->vmc_params; */
974 	int i;
975 	memset(&ioports_map, 0, sizeof(io_fn_t) * MAX_PORTS);
976 
977 	/* Init i8253 PIT */
978 	i8253_restore(fd, vcp->vcp_id);
979 	ioports_map[TIMER_CTRL] = vcpu_exit_i8253;
980 	ioports_map[TIMER_BASE + TIMER_CNTR0] = vcpu_exit_i8253;
981 	ioports_map[TIMER_BASE + TIMER_CNTR1] = vcpu_exit_i8253;
982 	ioports_map[TIMER_BASE + TIMER_CNTR2] = vcpu_exit_i8253;
983 
984 	/* Init master and slave PICs */
985 	i8259_restore(fd);
986 	ioports_map[IO_ICU1] = vcpu_exit_i8259;
987 	ioports_map[IO_ICU1 + 1] = vcpu_exit_i8259;
988 	ioports_map[IO_ICU2] = vcpu_exit_i8259;
989 	ioports_map[IO_ICU2 + 1] = vcpu_exit_i8259;
990 
991 	/* Init ns8250 UART */
992 	ns8250_restore(fd, con_fd, vcp->vcp_id);
993 	for (i = COM1_DATA; i <= COM1_SCR; i++)
994 		ioports_map[i] = vcpu_exit_com;
995 
996 	/* Init mc146818 RTC */
997 	mc146818_restore(fd, vcp->vcp_id);
998 	ioports_map[IO_RTC] = vcpu_exit_mc146818;
999 	ioports_map[IO_RTC + 1] = vcpu_exit_mc146818;
1000 
1001 	/* Initialize PCI */
1002 	for (i = VMM_PCI_IO_BAR_BASE; i <= VMM_PCI_IO_BAR_END; i++)
1003 		ioports_map[i] = vcpu_exit_pci;
1004 
1005 	ioports_map[PCI_MODE1_ADDRESS_REG] = vcpu_exit_pci;
1006 	ioports_map[PCI_MODE1_DATA_REG] = vcpu_exit_pci;
1007 	ioports_map[PCI_MODE1_DATA_REG + 1] = vcpu_exit_pci;
1008 	ioports_map[PCI_MODE1_DATA_REG + 2] = vcpu_exit_pci;
1009 	ioports_map[PCI_MODE1_DATA_REG + 3] = vcpu_exit_pci;
1010 	pci_restore(fd);
1011 	virtio_restore(fd, current_vm, child_cdrom, child_disks, child_taps);
1012 }
1013 
1014 /*
1015  * run_vm
1016  *
1017  * Runs the VM whose creation parameters are specified in vcp
1018  *
1019  * Parameters:
1020  *  child_cdrom: previously-opened child ISO disk file descriptor
1021  *  child_disks: previously-opened child VM disk file file descriptors
1022  *  child_taps: previously-opened child tap file descriptors
1023  *  vmc: vmop_create_params struct containing the VM's desired creation
1024  *      configuration
1025  *  vrs: VCPU register state to initialize
1026  *
1027  * Return values:
1028  *  0: the VM exited normally
1029  *  !0 : the VM exited abnormally or failed to start
1030  */
1031 int
1032 run_vm(int child_cdrom, int *child_disks, int *child_taps,
1033     struct vmop_create_params *vmc, struct vcpu_reg_state *vrs)
1034 {
1035 	struct vm_create_params *vcp = &vmc->vmc_params;
1036 	struct vm_rwregs_params vregsp;
1037 	uint8_t evdone = 0;
1038 	size_t i;
1039 	int ret;
1040 	pthread_t *tid, evtid;
1041 	struct vm_run_params **vrp;
1042 	void *exit_status;
1043 
1044 	if (vcp == NULL)
1045 		return (EINVAL);
1046 
1047 	if (child_cdrom == -1 && strlen(vcp->vcp_cdrom))
1048 		return (EINVAL);
1049 
1050 	if (child_disks == NULL && vcp->vcp_ndisks != 0)
1051 		return (EINVAL);
1052 
1053 	if (child_taps == NULL && vcp->vcp_nnics != 0)
1054 		return (EINVAL);
1055 
1056 	if (vcp->vcp_ncpus > VMM_MAX_VCPUS_PER_VM)
1057 		return (EINVAL);
1058 
1059 	if (vcp->vcp_ndisks > VMM_MAX_DISKS_PER_VM)
1060 		return (EINVAL);
1061 
1062 	if (vcp->vcp_nnics > VMM_MAX_NICS_PER_VM)
1063 		return (EINVAL);
1064 
1065 	if (vcp->vcp_nmemranges == 0 ||
1066 	    vcp->vcp_nmemranges > VMM_MAX_MEM_RANGES)
1067 		return (EINVAL);
1068 
1069 	tid = calloc(vcp->vcp_ncpus, sizeof(pthread_t));
1070 	vrp = calloc(vcp->vcp_ncpus, sizeof(struct vm_run_params *));
1071 	if (tid == NULL || vrp == NULL) {
1072 		log_warn("%s: memory allocation error - exiting.",
1073 		    __progname);
1074 		return (ENOMEM);
1075 	}
1076 
1077 	log_debug("%s: initializing hardware for vm %s", __func__,
1078 	    vcp->vcp_name);
1079 
1080 	if (!current_vm->vm_received)
1081 		init_emulated_hw(vmc, child_cdrom, child_disks, child_taps);
1082 
1083 	ret = pthread_mutex_init(&threadmutex, NULL);
1084 	if (ret) {
1085 		log_warn("%s: could not initialize thread state mutex",
1086 		    __func__);
1087 		return (ret);
1088 	}
1089 	ret = pthread_cond_init(&threadcond, NULL);
1090 	if (ret) {
1091 		log_warn("%s: could not initialize thread state "
1092 		    "condition variable", __func__);
1093 		return (ret);
1094 	}
1095 
1096 	mutex_lock(&threadmutex);
1097 
1098 	log_debug("%s: starting vcpu threads for vm %s", __func__,
1099 	    vcp->vcp_name);
1100 
1101 	/*
1102 	 * Create and launch one thread for each VCPU. These threads may
1103 	 * migrate between PCPUs over time; the need to reload CPU state
1104 	 * in such situations is detected and performed by vmm(4) in the
1105 	 * kernel.
1106 	 */
1107 	for (i = 0 ; i < vcp->vcp_ncpus; i++) {
1108 		vrp[i] = malloc(sizeof(struct vm_run_params));
1109 		if (vrp[i] == NULL) {
1110 			log_warn("%s: memory allocation error - "
1111 			    "exiting.", __progname);
1112 			/* caller will exit, so skip freeing */
1113 			return (ENOMEM);
1114 		}
1115 		vrp[i]->vrp_exit = malloc(sizeof(struct vm_exit));
1116 		if (vrp[i]->vrp_exit == NULL) {
1117 			log_warn("%s: memory allocation error - "
1118 			    "exiting.", __progname);
1119 			/* caller will exit, so skip freeing */
1120 			return (ENOMEM);
1121 		}
1122 		vrp[i]->vrp_vm_id = vcp->vcp_id;
1123 		vrp[i]->vrp_vcpu_id = i;
1124 
1125 		if (vcpu_reset(vcp->vcp_id, i, vrs)) {
1126 			log_warnx("%s: cannot reset VCPU %zu - exiting.",
1127 			    __progname, i);
1128 			return (EIO);
1129 		}
1130 
1131 		/* once more because reset_cpu changes regs */
1132 		if (current_vm->vm_received) {
1133 			vregsp.vrwp_vm_id = vcp->vcp_id;
1134 			vregsp.vrwp_vcpu_id = i;
1135 			vregsp.vrwp_regs = *vrs;
1136 			vregsp.vrwp_mask = VM_RWREGS_ALL;
1137 			if ((ret = ioctl(env->vmd_fd, VMM_IOC_WRITEREGS,
1138 			    &vregsp)) < 0) {
1139 				log_warn("%s: writeregs failed", __func__);
1140 				return (ret);
1141 			}
1142 		}
1143 
1144 		ret = pthread_cond_init(&vcpu_run_cond[i], NULL);
1145 		if (ret) {
1146 			log_warnx("%s: cannot initialize cond var (%d)",
1147 			    __progname, ret);
1148 			return (ret);
1149 		}
1150 
1151 		ret = pthread_mutex_init(&vcpu_run_mtx[i], NULL);
1152 		if (ret) {
1153 			log_warnx("%s: cannot initialize mtx (%d)",
1154 			    __progname, ret);
1155 			return (ret);
1156 		}
1157 
1158 		vcpu_hlt[i] = 0;
1159 
1160 		/* Start each VCPU run thread at vcpu_run_loop */
1161 		ret = pthread_create(&tid[i], NULL, vcpu_run_loop, vrp[i]);
1162 		if (ret) {
1163 			/* caller will _exit after this return */
1164 			ret = errno;
1165 			log_warn("%s: could not create vcpu thread %zu",
1166 			    __func__, i);
1167 			return (ret);
1168 		}
1169 	}
1170 
1171 	log_debug("%s: waiting on events for VM %s", __func__, vcp->vcp_name);
1172 	ret = pthread_create(&evtid, NULL, event_thread, &evdone);
1173 	if (ret) {
1174 		errno = ret;
1175 		log_warn("%s: could not create event thread", __func__);
1176 		return (ret);
1177 	}
1178 
1179 	for (;;) {
1180 		ret = pthread_cond_wait(&threadcond, &threadmutex);
1181 		if (ret) {
1182 			log_warn("%s: waiting on thread state condition "
1183 			    "variable failed", __func__);
1184 			return (ret);
1185 		}
1186 
1187 		/*
1188 		 * Did a VCPU thread exit with an error? => return the first one
1189 		 */
1190 		for (i = 0; i < vcp->vcp_ncpus; i++) {
1191 			if (vcpu_done[i] == 0)
1192 				continue;
1193 
1194 			if (pthread_join(tid[i], &exit_status)) {
1195 				log_warn("%s: failed to join thread %zd - "
1196 				    "exiting", __progname, i);
1197 				return (EIO);
1198 			}
1199 
1200 			ret = (intptr_t)exit_status;
1201 		}
1202 
1203 		/* Did the event thread exit? => return with an error */
1204 		if (evdone) {
1205 			if (pthread_join(evtid, &exit_status)) {
1206 				log_warn("%s: failed to join event thread - "
1207 				    "exiting", __progname);
1208 				return (EIO);
1209 			}
1210 
1211 			log_warnx("%s: vm %d event thread exited "
1212 			    "unexpectedly", __progname, vcp->vcp_id);
1213 			return (EIO);
1214 		}
1215 
1216 		/* Did all VCPU threads exit successfully? => return */
1217 		for (i = 0; i < vcp->vcp_ncpus; i++) {
1218 			if (vcpu_done[i] == 0)
1219 				break;
1220 		}
1221 		if (i == vcp->vcp_ncpus)
1222 			return (ret);
1223 
1224 		/* Some more threads to wait for, start over */
1225 	}
1226 
1227 	return (ret);
1228 }
1229 
1230 void *
1231 event_thread(void *arg)
1232 {
1233 	uint8_t *donep = arg;
1234 	intptr_t ret;
1235 
1236 	ret = event_dispatch();
1237 
1238 	mutex_lock(&threadmutex);
1239 	*donep = 1;
1240 	pthread_cond_signal(&threadcond);
1241 	mutex_unlock(&threadmutex);
1242 
1243 	return (void *)ret;
1244  }
1245 
1246 /*
1247  * vcpu_run_loop
1248  *
1249  * Runs a single VCPU until vmm(4) requires help handling an exit,
1250  * or the VM terminates.
1251  *
1252  * Parameters:
1253  *  arg: vcpu_run_params for the VCPU being run by this thread
1254  *
1255  * Return values:
1256  *  NULL: the VCPU shutdown properly
1257  *  !NULL: error processing VCPU run, or the VCPU shutdown abnormally
1258  */
1259 void *
1260 vcpu_run_loop(void *arg)
1261 {
1262 	struct vm_run_params *vrp = (struct vm_run_params *)arg;
1263 	intptr_t ret = 0;
1264 	int irq;
1265 	uint32_t n;
1266 
1267 	vrp->vrp_continue = 0;
1268 	n = vrp->vrp_vcpu_id;
1269 
1270 	for (;;) {
1271 		ret = pthread_mutex_lock(&vcpu_run_mtx[n]);
1272 
1273 		if (ret) {
1274 			log_warnx("%s: can't lock vcpu run mtx (%d)",
1275 			    __func__, (int)ret);
1276 			return ((void *)ret);
1277 		}
1278 
1279 		/* If we are halted or paused, wait */
1280 		if (vcpu_hlt[n]) {
1281 			while (current_vm->vm_paused == 1) {
1282 				ret = pthread_cond_wait(&vcpu_run_cond[n],
1283 				    &vcpu_run_mtx[n]);
1284 				if (ret) {
1285 					log_warnx(
1286 					    "%s: can't wait on cond (%d)",
1287 					    __func__, (int)ret);
1288 					(void)pthread_mutex_unlock(
1289 					    &vcpu_run_mtx[n]);
1290 					break;
1291 				}
1292 			}
1293 			if (vcpu_hlt[n]) {
1294 				ret = pthread_cond_wait(&vcpu_run_cond[n],
1295 				    &vcpu_run_mtx[n]);
1296 
1297 				if (ret) {
1298 					log_warnx(
1299 					    "%s: can't wait on cond (%d)",
1300 					    __func__, (int)ret);
1301 					(void)pthread_mutex_unlock(
1302 					    &vcpu_run_mtx[n]);
1303 					break;
1304 				}
1305 			}
1306 		}
1307 
1308 		ret = pthread_mutex_unlock(&vcpu_run_mtx[n]);
1309 
1310 		if (ret) {
1311 			log_warnx("%s: can't unlock mutex on cond (%d)",
1312 			    __func__, (int)ret);
1313 			break;
1314 		}
1315 
1316 		if (vrp->vrp_irqready && i8259_is_pending()) {
1317 			irq = i8259_ack();
1318 			vrp->vrp_irq = irq;
1319 		} else
1320 			vrp->vrp_irq = 0xFFFF;
1321 
1322 		/* Still more pending? */
1323 		if (i8259_is_pending()) {
1324 			/* XXX can probably avoid ioctls here by providing intr in vrp */
1325 			if (vcpu_pic_intr(vrp->vrp_vm_id,
1326 			    vrp->vrp_vcpu_id, 1)) {
1327 				fatal("can't set INTR");
1328 			}
1329 		} else {
1330 			if (vcpu_pic_intr(vrp->vrp_vm_id,
1331 			    vrp->vrp_vcpu_id, 0)) {
1332 				fatal("can't clear INTR");
1333 			}
1334 		}
1335 
1336 		if (ioctl(env->vmd_fd, VMM_IOC_RUN, vrp) < 0) {
1337 			/* If run ioctl failed, exit */
1338 			ret = errno;
1339 			log_warn("%s: vm %d / vcpu %d run ioctl failed",
1340 			    __func__, vrp->vrp_vm_id, n);
1341 			break;
1342 		}
1343 
1344 		/* If the VM is terminating, exit normally */
1345 		if (vrp->vrp_exit_reason == VM_EXIT_TERMINATED) {
1346 			ret = (intptr_t)NULL;
1347 			break;
1348 		}
1349 
1350 		if (vrp->vrp_exit_reason != VM_EXIT_NONE) {
1351 			/*
1352 			 * vmm(4) needs help handling an exit, handle in
1353 			 * vcpu_exit.
1354 			 */
1355 			ret = vcpu_exit(vrp);
1356 			if (ret)
1357 				break;
1358 		}
1359 	}
1360 
1361 	mutex_lock(&threadmutex);
1362 	vcpu_done[n] = 1;
1363 	pthread_cond_signal(&threadcond);
1364 	mutex_unlock(&threadmutex);
1365 
1366 	return ((void *)ret);
1367 }
1368 
1369 int
1370 vcpu_pic_intr(uint32_t vm_id, uint32_t vcpu_id, uint8_t intr)
1371 {
1372 	struct vm_intr_params vip;
1373 
1374 	memset(&vip, 0, sizeof(vip));
1375 
1376 	vip.vip_vm_id = vm_id;
1377 	vip.vip_vcpu_id = vcpu_id; /* XXX always 0? */
1378 	vip.vip_intr = intr;
1379 
1380 	if (ioctl(env->vmd_fd, VMM_IOC_INTR, &vip) < 0)
1381 		return (errno);
1382 
1383 	return (0);
1384 }
1385 
1386 /*
1387  * vcpu_exit_pci
1388  *
1389  * Handle all I/O to the emulated PCI subsystem.
1390  *
1391  * Parameters:
1392  *  vrp: vcpu run paramters containing guest state for this exit
1393  *
1394  * Return value:
1395  *  Interrupt to inject to the guest VM, or 0xFF if no interrupt should
1396  *      be injected.
1397  */
1398 uint8_t
1399 vcpu_exit_pci(struct vm_run_params *vrp)
1400 {
1401 	struct vm_exit *vei = vrp->vrp_exit;
1402 	uint8_t intr;
1403 
1404 	intr = 0xFF;
1405 
1406 	switch (vei->vei.vei_port) {
1407 	case PCI_MODE1_ADDRESS_REG:
1408 		pci_handle_address_reg(vrp);
1409 		break;
1410 	case PCI_MODE1_DATA_REG:
1411 	case PCI_MODE1_DATA_REG + 1:
1412 	case PCI_MODE1_DATA_REG + 2:
1413 	case PCI_MODE1_DATA_REG + 3:
1414 		pci_handle_data_reg(vrp);
1415 		break;
1416 	case VMM_PCI_IO_BAR_BASE ... VMM_PCI_IO_BAR_END:
1417 		intr = pci_handle_io(vrp);
1418 		break;
1419 	default:
1420 		log_warnx("%s: unknown PCI register 0x%llx",
1421 		    __progname, (uint64_t)vei->vei.vei_port);
1422 		break;
1423 	}
1424 
1425 	return (intr);
1426 }
1427 
1428 /*
1429  * vcpu_exit_inout
1430  *
1431  * Handle all I/O exits that need to be emulated in vmd. This includes the
1432  * i8253 PIT, the com1 ns8250 UART, and the MC146818 RTC/NVRAM device.
1433  *
1434  * Parameters:
1435  *  vrp: vcpu run parameters containing guest state for this exit
1436  */
1437 void
1438 vcpu_exit_inout(struct vm_run_params *vrp)
1439 {
1440 	struct vm_exit *vei = vrp->vrp_exit;
1441 	uint8_t intr = 0xFF;
1442 
1443 	if (ioports_map[vei->vei.vei_port] != NULL)
1444 		intr = ioports_map[vei->vei.vei_port](vrp);
1445 	else if (vei->vei.vei_dir == VEI_DIR_IN)
1446 			set_return_data(vei, 0xFFFFFFFF);
1447 
1448 	if (intr != 0xFF)
1449 		vcpu_assert_pic_irq(vrp->vrp_vm_id, vrp->vrp_vcpu_id, intr);
1450 }
1451 
1452 /*
1453  * vcpu_exit
1454  *
1455  * Handle a vcpu exit. This function is called when it is determined that
1456  * vmm(4) requires the assistance of vmd to support a particular guest
1457  * exit type (eg, accessing an I/O port or device). Guest state is contained
1458  * in 'vrp', and will be resent to vmm(4) on exit completion.
1459  *
1460  * Upon conclusion of handling the exit, the function determines if any
1461  * interrupts should be injected into the guest, and asserts the proper
1462  * IRQ line whose interrupt should be vectored.
1463  *
1464  * Parameters:
1465  *  vrp: vcpu run parameters containing guest state for this exit
1466  *
1467  * Return values:
1468  *  0: the exit was handled successfully
1469  *  1: an error occurred (eg, unknown exit reason passed in 'vrp')
1470  */
1471 int
1472 vcpu_exit(struct vm_run_params *vrp)
1473 {
1474 	int ret;
1475 
1476 	switch (vrp->vrp_exit_reason) {
1477 	case VMX_EXIT_INT_WINDOW:
1478 	case SVM_VMEXIT_VINTR:
1479 	case VMX_EXIT_CPUID:
1480 	case VMX_EXIT_EXTINT:
1481 	case SVM_VMEXIT_INTR:
1482 	case VMX_EXIT_EPT_VIOLATION:
1483 	case SVM_VMEXIT_NPF:
1484 	case SVM_VMEXIT_MSR:
1485 	case SVM_VMEXIT_CPUID:
1486 		/*
1487 		 * We may be exiting to vmd to handle a pending interrupt but
1488 		 * at the same time the last exit type may have been one of
1489 		 * these. In this case, there's nothing extra to be done
1490 		 * here (and falling through to the default case below results
1491 		 * in more vmd log spam).
1492 		 */
1493 		break;
1494 	case VMX_EXIT_IO:
1495 	case SVM_VMEXIT_IOIO:
1496 		vcpu_exit_inout(vrp);
1497 		break;
1498 	case VMX_EXIT_HLT:
1499 	case SVM_VMEXIT_HLT:
1500 		ret = pthread_mutex_lock(&vcpu_run_mtx[vrp->vrp_vcpu_id]);
1501 		if (ret) {
1502 			log_warnx("%s: can't lock vcpu mutex (%d)",
1503 			    __func__, ret);
1504 			return (ret);
1505 		}
1506 		vcpu_hlt[vrp->vrp_vcpu_id] = 1;
1507 		ret = pthread_mutex_unlock(&vcpu_run_mtx[vrp->vrp_vcpu_id]);
1508 		if (ret) {
1509 			log_warnx("%s: can't unlock vcpu mutex (%d)",
1510 			    __func__, ret);
1511 			return (ret);
1512 		}
1513 		break;
1514 	case VMX_EXIT_TRIPLE_FAULT:
1515 	case SVM_VMEXIT_SHUTDOWN:
1516 		/* reset VM */
1517 		return (EAGAIN);
1518 	default:
1519 		log_debug("%s: unknown exit reason 0x%x",
1520 		    __progname, vrp->vrp_exit_reason);
1521 	}
1522 
1523 	/* Process any pending traffic */
1524 	vionet_process_rx(vrp->vrp_vm_id);
1525 
1526 	vrp->vrp_continue = 1;
1527 
1528 	return (0);
1529 }
1530 
1531 /*
1532  * find_gpa_range
1533  *
1534  * Search for a contiguous guest physical mem range.
1535  *
1536  * Parameters:
1537  *  vcp: VM create parameters that contain the memory map to search in
1538  *  gpa: the starting guest physical address
1539  *  len: the length of the memory range
1540  *
1541  * Return values:
1542  *  NULL: on failure if there is no memory range as described by the parameters
1543  *  Pointer to vm_mem_range that contains the start of the range otherwise.
1544  */
1545 static struct vm_mem_range *
1546 find_gpa_range(struct vm_create_params *vcp, paddr_t gpa, size_t len)
1547 {
1548 	size_t i, n;
1549 	struct vm_mem_range *vmr;
1550 
1551 	/* Find the first vm_mem_range that contains gpa */
1552 	for (i = 0; i < vcp->vcp_nmemranges; i++) {
1553 		vmr = &vcp->vcp_memranges[i];
1554 		if (vmr->vmr_gpa + vmr->vmr_size >= gpa)
1555 			break;
1556 	}
1557 
1558 	/* No range found. */
1559 	if (i == vcp->vcp_nmemranges)
1560 		return (NULL);
1561 
1562 	/*
1563 	 * vmr may cover the range [gpa, gpa + len) only partly. Make
1564 	 * sure that the following vm_mem_ranges are contiguous and
1565 	 * cover the rest.
1566 	 */
1567 	n = vmr->vmr_size - (gpa - vmr->vmr_gpa);
1568 	if (len < n)
1569 		len = 0;
1570 	else
1571 		len -= n;
1572 	gpa = vmr->vmr_gpa + vmr->vmr_size;
1573 	for (i = i + 1; len != 0 && i < vcp->vcp_nmemranges; i++) {
1574 		vmr = &vcp->vcp_memranges[i];
1575 		if (gpa != vmr->vmr_gpa)
1576 			return (NULL);
1577 		if (len <= vmr->vmr_size)
1578 			len = 0;
1579 		else
1580 			len -= vmr->vmr_size;
1581 
1582 		gpa = vmr->vmr_gpa + vmr->vmr_size;
1583 	}
1584 
1585 	if (len != 0)
1586 		return (NULL);
1587 
1588 	return (vmr);
1589 }
1590 
1591 void *
1592 vaddr_mem(paddr_t gpa, size_t len)
1593 {
1594 	struct vm_create_params *vcp = &current_vm->vm_params.vmc_params;
1595 	size_t i;
1596 	struct vm_mem_range *vmr;
1597 	paddr_t gpend = gpa + len;
1598 
1599 	/* Find the first vm_mem_range that contains gpa */
1600 	for (i = 0; i < vcp->vcp_nmemranges; i++) {
1601 		vmr = &vcp->vcp_memranges[i];
1602 		if (gpa < vmr->vmr_gpa)
1603 			continue;
1604 
1605 		if (gpend >= vmr->vmr_gpa + vmr->vmr_size)
1606 			continue;
1607 
1608 		return ((char *)vmr->vmr_va + (gpa - vmr->vmr_gpa));
1609 	}
1610 
1611 	return (NULL);
1612 }
1613 
1614 /*
1615  * write_mem
1616  *
1617  * Copies data from 'buf' into the guest VM's memory at paddr 'dst'.
1618  *
1619  * Parameters:
1620  *  dst: the destination paddr_t in the guest VM
1621  *  buf: data to copy
1622  *  len: number of bytes to copy
1623  *
1624  * Return values:
1625  *  0: success
1626  *  EINVAL: if the guest physical memory range [dst, dst + len) does not
1627  *      exist in the guest.
1628  */
1629 int
1630 write_mem(paddr_t dst, const void *buf, size_t len)
1631 {
1632 	const char *from = buf;
1633 	char *to;
1634 	size_t n, off;
1635 	struct vm_mem_range *vmr;
1636 
1637 	vmr = find_gpa_range(&current_vm->vm_params.vmc_params, dst, len);
1638 	if (vmr == NULL) {
1639 		errno = EINVAL;
1640 		log_warn("%s: failed - invalid memory range dst = 0x%lx, "
1641 		    "len = 0x%zx", __func__, dst, len);
1642 		return (EINVAL);
1643 	}
1644 
1645 	off = dst - vmr->vmr_gpa;
1646 	while (len != 0) {
1647 		n = vmr->vmr_size - off;
1648 		if (len < n)
1649 			n = len;
1650 
1651 		to = (char *)vmr->vmr_va + off;
1652 		memcpy(to, from, n);
1653 
1654 		from += n;
1655 		len -= n;
1656 		off = 0;
1657 		vmr++;
1658 	}
1659 
1660 	return (0);
1661 }
1662 
1663 /*
1664  * read_mem
1665  *
1666  * Reads memory at guest paddr 'src' into 'buf'.
1667  *
1668  * Parameters:
1669  *  src: the source paddr_t in the guest VM to read from.
1670  *  buf: destination (local) buffer
1671  *  len: number of bytes to read
1672  *
1673  * Return values:
1674  *  0: success
1675  *  EINVAL: if the guest physical memory range [dst, dst + len) does not
1676  *      exist in the guest.
1677  */
1678 int
1679 read_mem(paddr_t src, void *buf, size_t len)
1680 {
1681 	char *from, *to = buf;
1682 	size_t n, off;
1683 	struct vm_mem_range *vmr;
1684 
1685 	vmr = find_gpa_range(&current_vm->vm_params.vmc_params, src, len);
1686 	if (vmr == NULL) {
1687 		errno = EINVAL;
1688 		log_warn("%s: failed - invalid memory range src = 0x%lx, "
1689 		    "len = 0x%zx", __func__, src, len);
1690 		return (EINVAL);
1691 	}
1692 
1693 	off = src - vmr->vmr_gpa;
1694 	while (len != 0) {
1695 		n = vmr->vmr_size - off;
1696 		if (len < n)
1697 			n = len;
1698 
1699 		from = (char *)vmr->vmr_va + off;
1700 		memcpy(to, from, n);
1701 
1702 		to += n;
1703 		len -= n;
1704 		off = 0;
1705 		vmr++;
1706 	}
1707 
1708 	return (0);
1709 }
1710 
1711 int
1712 iovec_mem(paddr_t src, size_t len, struct iovec *iov, int iovcnt)
1713 {
1714 	size_t n, off;
1715 	struct vm_mem_range *vmr;
1716 	int niov = 0;
1717 
1718 	vmr = find_gpa_range(&current_vm->vm_params.vmc_params, src, len);
1719 	if (vmr == NULL) {
1720 		errno = EINVAL;
1721 		return (-1);
1722 	}
1723 
1724 	off = src - vmr->vmr_gpa;
1725 	while (len > 0) {
1726 		if (niov == iovcnt) {
1727 			errno = ENOMEM;
1728 			return (-1);
1729 		}
1730 
1731 		n = vmr->vmr_size - off;
1732 		if (len < n)
1733 			n = len;
1734 
1735 		iov[niov].iov_base = (char *)vmr->vmr_va + off;
1736 		iov[niov].iov_len = n;
1737 
1738 		niov++;
1739 
1740 		len -= n;
1741 		off = 0;
1742 		vmr++;
1743 	}
1744 
1745 	return (niov);
1746 }
1747 
1748 /*
1749  * vcpu_assert_pic_irq
1750  *
1751  * Injects the specified IRQ on the supplied vcpu/vm
1752  *
1753  * Parameters:
1754  *  vm_id: VM ID to inject to
1755  *  vcpu_id: VCPU ID to inject to
1756  *  irq: IRQ to inject
1757  */
1758 void
1759 vcpu_assert_pic_irq(uint32_t vm_id, uint32_t vcpu_id, int irq)
1760 {
1761 	int ret;
1762 
1763 	i8259_assert_irq(irq);
1764 
1765 	if (i8259_is_pending()) {
1766 		if (vcpu_pic_intr(vm_id, vcpu_id, 1))
1767 			fatalx("%s: can't assert INTR", __func__);
1768 
1769 		ret = pthread_mutex_lock(&vcpu_run_mtx[vcpu_id]);
1770 		if (ret)
1771 			fatalx("%s: can't lock vcpu mtx (%d)", __func__, ret);
1772 
1773 		vcpu_hlt[vcpu_id] = 0;
1774 		ret = pthread_cond_signal(&vcpu_run_cond[vcpu_id]);
1775 		if (ret)
1776 			fatalx("%s: can't signal (%d)", __func__, ret);
1777 		ret = pthread_mutex_unlock(&vcpu_run_mtx[vcpu_id]);
1778 		if (ret)
1779 			fatalx("%s: can't unlock vcpu mtx (%d)", __func__, ret);
1780 	}
1781 }
1782 
1783 /*
1784  * vcpu_deassert_pic_irq
1785  *
1786  * Clears the specified IRQ on the supplied vcpu/vm
1787  *
1788  * Parameters:
1789  *  vm_id: VM ID to clear in
1790  *  vcpu_id: VCPU ID to clear in
1791  *  irq: IRQ to clear
1792  */
1793 void
1794 vcpu_deassert_pic_irq(uint32_t vm_id, uint32_t vcpu_id, int irq)
1795 {
1796 	i8259_deassert_irq(irq);
1797 
1798 	if (!i8259_is_pending()) {
1799 		if (vcpu_pic_intr(vm_id, vcpu_id, 0))
1800 			fatalx("%s: can't deassert INTR for vm_id %d, "
1801 			    "vcpu_id %d", __func__, vm_id, vcpu_id);
1802 	}
1803 }
1804 
1805 /*
1806  * fd_hasdata
1807  *
1808  * Determines if data can be read from a file descriptor.
1809  *
1810  * Parameters:
1811  *  fd: the fd to check
1812  *
1813  * Return values:
1814  *  1 if data can be read from an fd, or 0 otherwise.
1815  */
1816 int
1817 fd_hasdata(int fd)
1818 {
1819 	struct pollfd pfd[1];
1820 	int nready, hasdata = 0;
1821 
1822 	pfd[0].fd = fd;
1823 	pfd[0].events = POLLIN;
1824 	nready = poll(pfd, 1, 0);
1825 	if (nready == -1)
1826 		log_warn("checking file descriptor for data failed");
1827 	else if (nready == 1 && pfd[0].revents & POLLIN)
1828 		hasdata = 1;
1829 	return (hasdata);
1830 }
1831 
1832 /*
1833  * mutex_lock
1834  *
1835  * Wrapper function for pthread_mutex_lock that does error checking and that
1836  * exits on failure
1837  */
1838 void
1839 mutex_lock(pthread_mutex_t *m)
1840 {
1841 	int ret;
1842 
1843 	ret = pthread_mutex_lock(m);
1844 	if (ret) {
1845 		errno = ret;
1846 		fatal("could not acquire mutex");
1847 	}
1848 }
1849 
1850 /*
1851  * mutex_unlock
1852  *
1853  * Wrapper function for pthread_mutex_unlock that does error checking and that
1854  * exits on failure
1855  */
1856 void
1857 mutex_unlock(pthread_mutex_t *m)
1858 {
1859 	int ret;
1860 
1861 	ret = pthread_mutex_unlock(m);
1862 	if (ret) {
1863 		errno = ret;
1864 		fatal("could not release mutex");
1865 	}
1866 }
1867 
1868 /*
1869  * set_return_data
1870  *
1871  * Utility function for manipulating register data in vm exit info structs. This
1872  * function ensures that the data is copied to the vei->vei.vei_data field with
1873  * the proper size for the operation being performed.
1874  *
1875  * Parameters:
1876  *  vei: exit information
1877  *  data: return data
1878  */
1879 void
1880 set_return_data(struct vm_exit *vei, uint32_t data)
1881 {
1882 	switch (vei->vei.vei_size) {
1883 	case 1:
1884 		vei->vei.vei_data &= ~0xFF;
1885 		vei->vei.vei_data |= (uint8_t)data;
1886 		break;
1887 	case 2:
1888 		vei->vei.vei_data &= ~0xFFFF;
1889 		vei->vei.vei_data |= (uint16_t)data;
1890 		break;
1891 	case 4:
1892 		vei->vei.vei_data = data;
1893 		break;
1894 	}
1895 }
1896 
1897 /*
1898  * get_input_data
1899  *
1900  * Utility function for manipulating register data in vm exit info
1901  * structs. This function ensures that the data is copied from the
1902  * vei->vei.vei_data field with the proper size for the operation being
1903  * performed.
1904  *
1905  * Parameters:
1906  *  vei: exit information
1907  *  data: location to store the result
1908  */
1909 void
1910 get_input_data(struct vm_exit *vei, uint32_t *data)
1911 {
1912 	switch (vei->vei.vei_size) {
1913 	case 1:
1914 		*data &= 0xFFFFFF00;
1915 		*data |= (uint8_t)vei->vei.vei_data;
1916 		break;
1917 	case 2:
1918 		*data &= 0xFFFF0000;
1919 		*data |= (uint16_t)vei->vei.vei_data;
1920 		break;
1921 	case 4:
1922 		*data = vei->vei.vei_data;
1923 		break;
1924 	default:
1925 		log_warnx("%s: invalid i/o size %d", __func__,
1926 		    vei->vei.vei_size);
1927 	}
1928 
1929 }
1930