xref: /openbsd-src/usr.sbin/vmd/x86_mmio.c (revision 234ee546287049cd0c06003ce677a6226d75654b)
1 /*	$OpenBSD: x86_mmio.c,v 1.1 2024/07/10 10:41:19 dv Exp $	*/
2 /*
3  * Copyright (c) 2022 Dave Voutila <dv@openbsd.org>
4  *
5  * Permission to use, copy, modify, and distribute this software for any
6  * purpose with or without fee is hereby granted, provided that the above
7  * copyright notice and this permission notice appear in all copies.
8  *
9  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16  */
17 
18 #include <errno.h>
19 #include <string.h>
20 
21 #include <sys/types.h>
22 #include <machine/specialreg.h>
23 
24 #include "vmd.h"
25 #include "mmio.h"
26 
27 #define MMIO_DEBUG 0
28 
29 extern char* __progname;
30 
31 struct x86_decode_state {
32 	uint8_t	s_bytes[15];
33 	size_t	s_len;
34 	size_t	s_idx;
35 };
36 
37 enum decode_result {
38 	DECODE_ERROR = 0,	/* Something went wrong. */
39 	DECODE_DONE,		/* Decode success and no more work needed. */
40 	DECODE_MORE,		/* Decode success and more work required. */
41 };
42 
43 static const char *str_cpu_mode(int);
44 static const char *str_decode_res(enum decode_result);
45 static const char *str_opcode(struct x86_opcode *);
46 static const char *str_operand_enc(struct x86_opcode *);
47 static const char *str_reg(int);
48 static const char *str_sreg(int);
49 static int detect_cpu_mode(struct vcpu_reg_state *);
50 
51 static enum decode_result decode_prefix(struct x86_decode_state *,
52     struct x86_insn *);
53 static enum decode_result decode_opcode(struct x86_decode_state *,
54     struct x86_insn *);
55 static enum decode_result decode_modrm(struct x86_decode_state *,
56     struct x86_insn *);
57 static int get_modrm_reg(struct x86_insn *);
58 static int get_modrm_addr(struct x86_insn *, struct vcpu_reg_state *vrs);
59 static enum decode_result decode_disp(struct x86_decode_state *,
60     struct x86_insn *);
61 static enum decode_result decode_sib(struct x86_decode_state *,
62     struct x86_insn *);
63 static enum decode_result decode_imm(struct x86_decode_state *,
64     struct x86_insn *);
65 
66 static enum decode_result peek_byte(struct x86_decode_state *, uint8_t *);
67 static enum decode_result next_byte(struct x86_decode_state *, uint8_t *);
68 static enum decode_result next_value(struct x86_decode_state *, size_t,
69     uint64_t *);
70 static int is_valid_state(struct x86_decode_state *, const char *);
71 
72 static int emulate_mov(struct x86_insn *, struct vm_exit *);
73 static int emulate_movzx(struct x86_insn *, struct vm_exit *);
74 
75 /* Lookup table for 1-byte opcodes, in opcode alphabetical order. */
76 const enum x86_opcode_type x86_1byte_opcode_tbl[255] = {
77 	/* MOV */
78 	[0x88] = OP_MOV,
79 	[0x89] = OP_MOV,
80 	[0x8A] = OP_MOV,
81 	[0x8B] = OP_MOV,
82 	[0x8C] = OP_MOV,
83 	[0xA0] = OP_MOV,
84 	[0xA1] = OP_MOV,
85 	[0xA2] = OP_MOV,
86 	[0xA3] = OP_MOV,
87 
88 	/* MOVS */
89 	[0xA4] = OP_UNSUPPORTED,
90 	[0xA5] = OP_UNSUPPORTED,
91 
92 	[ESCAPE] = OP_TWO_BYTE,
93 };
94 
95 /* Lookup table for 1-byte operand encodings, in opcode alphabetical order. */
96 const enum x86_operand_enc x86_1byte_operand_enc_tbl[255] = {
97 	/* MOV */
98 	[0x88] = OP_ENC_MR,
99 	[0x89] = OP_ENC_MR,
100 	[0x8A] = OP_ENC_RM,
101 	[0x8B] = OP_ENC_RM,
102 	[0x8C] = OP_ENC_MR,
103 	[0xA0] = OP_ENC_FD,
104 	[0xA1] = OP_ENC_FD,
105 	[0xA2] = OP_ENC_TD,
106 	[0xA3] = OP_ENC_TD,
107 
108 	/* MOVS */
109 	[0xA4] = OP_ENC_ZO,
110 	[0xA5] = OP_ENC_ZO,
111 };
112 
113 const enum x86_opcode_type x86_2byte_opcode_tbl[255] = {
114 	/* MOVZX */
115 	[0xB6] = OP_MOVZX,
116 	[0xB7] = OP_MOVZX,
117 };
118 
119 const enum x86_operand_enc x86_2byte_operand_enc_table[255] = {
120 	/* MOVZX */
121 	[0xB6] = OP_ENC_RM,
122 	[0xB7] = OP_ENC_RM,
123 };
124 
125 /*
126  * peek_byte
127  *
128  * Fetch the next byte fron the instruction bytes without advancing the
129  * position in the stream.
130  *
131  * Return values:
132  *  DECODE_DONE: byte was found and is the last in the stream
133  *  DECODE_MORE: byte was found and there are more remaining to be read
134  *  DECODE_ERROR: state is invalid and not byte was found, *byte left unchanged
135  */
136 static enum decode_result
peek_byte(struct x86_decode_state * state,uint8_t * byte)137 peek_byte(struct x86_decode_state *state, uint8_t *byte)
138 {
139 	enum decode_result res;
140 
141 	if (state == NULL)
142 		return (DECODE_ERROR);
143 
144 	if (state->s_idx == state->s_len)
145 		return (DECODE_ERROR);
146 
147 	if (state->s_idx + 1 == state->s_len)
148 		res = DECODE_DONE;
149 	else
150 		res = DECODE_MORE;
151 
152 	if (byte != NULL)
153 		*byte = state->s_bytes[state->s_idx];
154 	return (res);
155 }
156 
157 /*
158  * next_byte
159  *
160  * Fetch the next byte fron the instruction bytes, advancing the position in the
161  * stream and mutating decode state.
162  *
163  * Return values:
164  *  DECODE_DONE: byte was found and is the last in the stream
165  *  DECODE_MORE: byte was found and there are more remaining to be read
166  *  DECODE_ERROR: state is invalid and not byte was found, *byte left unchanged
167  */
168 static enum decode_result
next_byte(struct x86_decode_state * state,uint8_t * byte)169 next_byte(struct x86_decode_state *state, uint8_t *byte)
170 {
171 	uint8_t next;
172 
173 	/* Cheat and see if we're going to fail. */
174 	if (peek_byte(state, &next) == DECODE_ERROR)
175 		return (DECODE_ERROR);
176 
177 	if (byte != NULL)
178 		*byte = next;
179 	state->s_idx++;
180 
181 	return (state->s_idx < state->s_len ? DECODE_MORE : DECODE_DONE);
182 }
183 
184 /*
185  * Fetch the next `n' bytes as a single uint64_t value.
186  */
187 static enum decode_result
next_value(struct x86_decode_state * state,size_t n,uint64_t * value)188 next_value(struct x86_decode_state *state, size_t n, uint64_t *value)
189 {
190 	uint8_t bytes[8];
191 	size_t i;
192 	enum decode_result res;
193 
194 	if (value == NULL)
195 		return (DECODE_ERROR);
196 
197 	if (n == 0 || n > sizeof(bytes))
198 		return (DECODE_ERROR);
199 
200 	memset(bytes, 0, sizeof(bytes));
201 	for (i = 0; i < n; i++)
202 		if ((res = next_byte(state, &bytes[i])) == DECODE_ERROR)
203 			return (DECODE_ERROR);
204 
205 	*value = *((uint64_t*)bytes);
206 
207 	return (res);
208 }
209 
210 /*
211  * is_valid_state
212  *
213  * Validate the decode state looks viable.
214  *
215  * Returns:
216  *  1: if state is valid
217  *  0: if an invariant is detected
218  */
219 static int
is_valid_state(struct x86_decode_state * state,const char * fn_name)220 is_valid_state(struct x86_decode_state *state, const char *fn_name)
221 {
222 	const char *s = (fn_name != NULL) ? fn_name : __func__;
223 
224 	if (state == NULL) {
225 		log_warnx("%s: null state", s);
226 		return (0);
227 	}
228 	if (state->s_len > sizeof(state->s_bytes)) {
229 		log_warnx("%s: invalid length", s);
230 		return (0);
231 	}
232 	if (state->s_idx + 1 > state->s_len) {
233 		log_warnx("%s: invalid index", s);
234 		return (0);
235 	}
236 
237 	return (1);
238 }
239 
240 #ifdef MMIO_DEBUG
241 static void
dump_regs(struct vcpu_reg_state * vrs)242 dump_regs(struct vcpu_reg_state *vrs)
243 {
244 	size_t i;
245 	struct vcpu_segment_info *vsi;
246 
247 	for (i = 0; i < VCPU_REGS_NGPRS; i++)
248 		log_info("%s: %s 0x%llx", __progname, str_reg(i),
249 		    vrs->vrs_gprs[i]);
250 
251 	for (i = 0; i < VCPU_REGS_NSREGS; i++) {
252 		vsi = &vrs->vrs_sregs[i];
253 		log_info("%s: %s { sel: 0x%04x, lim: 0x%08x, ar: 0x%08x, "
254 		    "base: 0x%llx }", __progname, str_sreg(i),
255 		    vsi->vsi_sel, vsi->vsi_limit, vsi->vsi_ar, vsi->vsi_base);
256 	}
257 }
258 
259 static void
dump_insn(struct x86_insn * insn)260 dump_insn(struct x86_insn *insn)
261 {
262 	log_info("instruction { %s, enc=%s, len=%d, mod=0x%02x, ("
263 	    "reg=%s, addr=0x%lx) sib=0x%02x }",
264 	    str_opcode(&insn->insn_opcode),
265 	    str_operand_enc(&insn->insn_opcode), insn->insn_bytes_len,
266 	    insn->insn_modrm, str_reg(insn->insn_reg),
267 	    insn->insn_gva, insn->insn_sib);
268 }
269 #endif /* MMIO_DEBUG */
270 
271 static const char *
str_cpu_mode(int mode)272 str_cpu_mode(int mode)
273 {
274 	switch (mode) {
275 	case VMM_CPU_MODE_REAL: return "REAL";
276 	case VMM_CPU_MODE_PROT: return "PROT";
277 	case VMM_CPU_MODE_PROT32: return "PROT32";
278 	case VMM_CPU_MODE_COMPAT: return "COMPAT";
279 	case VMM_CPU_MODE_LONG: return "LONG";
280 	default: return "UKNOWN";
281 	}
282 }
283 
284 __unused static const char *
str_decode_res(enum decode_result res)285 str_decode_res(enum decode_result res) {
286 	switch (res) {
287 	case DECODE_DONE: return "DONE";
288 	case DECODE_MORE: return "MORE";
289 	case DECODE_ERROR: return "ERROR";
290 	default: return "UNKNOWN";
291 	}
292 }
293 
294 static const char *
str_opcode(struct x86_opcode * opcode)295 str_opcode(struct x86_opcode *opcode)
296 {
297 	switch (opcode->op_type) {
298 	case OP_IN: return "IN";
299 	case OP_INS: return "INS";
300 	case OP_MOV: return "MOV";
301 	case OP_MOVZX: return "MOVZX";
302 	case OP_OUT: return "OUT";
303 	case OP_OUTS: return "OUTS";
304 	case OP_UNSUPPORTED: return "UNSUPPORTED";
305 	default: return "UNKNOWN";
306 	}
307 }
308 
309 static const char *
str_operand_enc(struct x86_opcode * opcode)310 str_operand_enc(struct x86_opcode *opcode)
311 {
312 	switch (opcode->op_encoding) {
313 	case OP_ENC_I: return "I";
314 	case OP_ENC_MI: return "MI";
315 	case OP_ENC_MR: return "MR";
316 	case OP_ENC_RM: return "RM";
317 	case OP_ENC_FD: return "FD";
318 	case OP_ENC_TD: return "TD";
319 	case OP_ENC_OI: return "OI";
320 	case OP_ENC_ZO: return "ZO";
321 	default: return "UNKNOWN";
322 	}
323 }
324 
325 static const char *
str_reg(int reg)326 str_reg(int reg) {
327 	switch (reg) {
328 	case VCPU_REGS_RAX: return "RAX";
329 	case VCPU_REGS_RCX: return "RCX";
330 	case VCPU_REGS_RDX: return "RDX";
331 	case VCPU_REGS_RBX: return "RBX";
332 	case VCPU_REGS_RSI: return "RSI";
333 	case VCPU_REGS_RDI: return "RDI";
334 	case VCPU_REGS_R8:  return " R8";
335 	case VCPU_REGS_R9:  return " R9";
336 	case VCPU_REGS_R10: return "R10";
337 	case VCPU_REGS_R11: return "R11";
338 	case VCPU_REGS_R12: return "R12";
339 	case VCPU_REGS_R13: return "R13";
340 	case VCPU_REGS_R14: return "R14";
341 	case VCPU_REGS_R15: return "R15";
342 	case VCPU_REGS_RSP: return "RSP";
343 	case VCPU_REGS_RBP: return "RBP";
344 	case VCPU_REGS_RIP: return "RIP";
345 	case VCPU_REGS_RFLAGS: return "RFLAGS";
346 	default: return "UNKNOWN";
347 	}
348 }
349 
350 static const char *
str_sreg(int sreg)351 str_sreg(int sreg) {
352 	switch (sreg) {
353 	case VCPU_REGS_CS: return "CS";
354 	case VCPU_REGS_DS: return "DS";
355 	case VCPU_REGS_ES: return "ES";
356 	case VCPU_REGS_FS: return "FS";
357 	case VCPU_REGS_GS: return "GS";
358 	case VCPU_REGS_SS: return "GS";
359 	case VCPU_REGS_LDTR: return "LDTR";
360 	case VCPU_REGS_TR: return "TR";
361 	default: return "UKNOWN";
362 	}
363 }
364 
365 static int
detect_cpu_mode(struct vcpu_reg_state * vrs)366 detect_cpu_mode(struct vcpu_reg_state *vrs)
367 {
368 	uint64_t cr0, cr4, cs, efer, rflags;
369 
370 	/* Is protected mode enabled? */
371 	cr0 = vrs->vrs_crs[VCPU_REGS_CR0];
372 	if (!(cr0 & CR0_PE))
373 		return (VMM_CPU_MODE_REAL);
374 
375 	cr4 = vrs->vrs_crs[VCPU_REGS_CR4];
376 	cs = vrs->vrs_sregs[VCPU_REGS_CS].vsi_ar;
377 	efer = vrs->vrs_msrs[VCPU_REGS_EFER];
378 	rflags = vrs->vrs_gprs[VCPU_REGS_RFLAGS];
379 
380 	/* Check for Long modes. */
381 	if ((efer & EFER_LME) && (cr4 & CR4_PAE) && (cr0 & CR0_PG)) {
382 		if (cs & CS_L) {
383 			/* Long Modes */
384 			if (!(cs & CS_D))
385 				return (VMM_CPU_MODE_LONG);
386 			log_warnx("%s: invalid cpu mode", __progname);
387 			return (VMM_CPU_MODE_UNKNOWN);
388 		} else {
389 			/* Compatibility Modes */
390 			if (cs & CS_D) /* XXX Add Compat32 mode */
391 				return (VMM_CPU_MODE_UNKNOWN);
392 			return (VMM_CPU_MODE_COMPAT);
393 		}
394 	}
395 
396 	/* Check for 32-bit Protected Mode. */
397 	if (cs & CS_D)
398 		return (VMM_CPU_MODE_PROT32);
399 
400 	/* Check for virtual 8086 mode. */
401 	if (rflags & EFLAGS_VM) {
402 		/* XXX add Virtual8086 mode */
403 		log_warnx("%s: Virtual 8086 mode", __progname);
404 		return (VMM_CPU_MODE_UNKNOWN);
405 	}
406 
407 	/* Can't determine mode. */
408 	log_warnx("%s: invalid cpu mode", __progname);
409 	return (VMM_CPU_MODE_UNKNOWN);
410 }
411 
412 static enum decode_result
decode_prefix(struct x86_decode_state * state,struct x86_insn * insn)413 decode_prefix(struct x86_decode_state *state, struct x86_insn *insn)
414 {
415 	enum decode_result res = DECODE_ERROR;
416 	struct x86_prefix *prefix;
417 	uint8_t byte;
418 
419 	if (!is_valid_state(state, __func__) || insn == NULL)
420 		return (-1);
421 
422 	prefix = &insn->insn_prefix;
423 	memset(prefix, 0, sizeof(*prefix));
424 
425 	/*
426 	 * Decode prefixes. The last of its kind wins. The behavior is undefined
427 	 * in the Intel SDM (see Vol 2, 2.1.1 Instruction Prefixes.)
428 	 */
429 	while ((res = peek_byte(state, &byte)) != DECODE_ERROR) {
430 		switch (byte) {
431 		case LEG_1_LOCK:
432 		case LEG_1_REPNE:
433 		case LEG_1_REP:
434 			prefix->pfx_group1 = byte;
435 			break;
436 		case LEG_2_CS:
437 		case LEG_2_SS:
438 		case LEG_2_DS:
439 		case LEG_2_ES:
440 		case LEG_2_FS:
441 		case LEG_2_GS:
442 			prefix->pfx_group2 = byte;
443 			break;
444 		case LEG_3_OPSZ:
445 			prefix->pfx_group3 = byte;
446 			break;
447 		case LEG_4_ADDRSZ:
448 			prefix->pfx_group4 = byte;
449 			break;
450 		case REX_BASE...REX_BASE + 0x0F:
451 			if (insn->insn_cpu_mode == VMM_CPU_MODE_LONG)
452 				prefix->pfx_rex = byte;
453 			else /* INC encountered */
454 				return (DECODE_ERROR);
455 			break;
456 		case VEX_2_BYTE:
457 		case VEX_3_BYTE:
458 			log_warnx("%s: VEX not supported", __func__);
459 			return (DECODE_ERROR);
460 		default:
461 			/* Something other than a valid prefix. */
462 			return (DECODE_MORE);
463 		}
464 		/* Advance our position. */
465 		next_byte(state, NULL);
466 	}
467 
468 	return (res);
469 }
470 
471 static enum decode_result
decode_modrm(struct x86_decode_state * state,struct x86_insn * insn)472 decode_modrm(struct x86_decode_state *state, struct x86_insn *insn)
473 {
474 	enum decode_result res;
475 	uint8_t byte = 0;
476 
477 	if (!is_valid_state(state, __func__) || insn == NULL)
478 		return (DECODE_ERROR);
479 
480 	insn->insn_modrm_valid = 0;
481 
482 	/* Check the operand encoding to see if we fetch a byte or abort. */
483 	switch (insn->insn_opcode.op_encoding) {
484 	case OP_ENC_MR:
485 	case OP_ENC_RM:
486 	case OP_ENC_MI:
487 		res = next_byte(state, &byte);
488 		if (res == DECODE_ERROR) {
489 			log_warnx("%s: failed to get modrm byte", __func__);
490 			break;
491 		}
492 		insn->insn_modrm = byte;
493 		insn->insn_modrm_valid = 1;
494 		break;
495 
496 	case OP_ENC_I:
497 	case OP_ENC_OI:
498 		log_warnx("%s: instruction does not need memory assist",
499 		    __func__);
500 		res = DECODE_ERROR;
501 		break;
502 
503 	default:
504 		/* Peek to see if we're done decode. */
505 		res = peek_byte(state, NULL);
506 	}
507 
508 	return (res);
509 }
510 
511 static int
get_modrm_reg(struct x86_insn * insn)512 get_modrm_reg(struct x86_insn *insn)
513 {
514 	if (insn == NULL)
515 		return (-1);
516 
517 	if (insn->insn_modrm_valid) {
518 		switch (MODRM_REGOP(insn->insn_modrm)) {
519 		case 0:
520 			insn->insn_reg = VCPU_REGS_RAX;
521 			break;
522 		case 1:
523 			insn->insn_reg = VCPU_REGS_RCX;
524 			break;
525 		case 2:
526 			insn->insn_reg = VCPU_REGS_RDX;
527 			break;
528 		case 3:
529 			insn->insn_reg = VCPU_REGS_RBX;
530 			break;
531 		case 4:
532 			insn->insn_reg = VCPU_REGS_RSP;
533 			break;
534 		case 5:
535 			insn->insn_reg = VCPU_REGS_RBP;
536 			break;
537 		case 6:
538 			insn->insn_reg = VCPU_REGS_RSI;
539 			break;
540 		case 7:
541 			insn->insn_reg = VCPU_REGS_RDI;
542 			break;
543 		}
544 	}
545 
546 	/* REX R bit selects extended registers in LONG mode. */
547 	if (insn->insn_prefix.pfx_rex & REX_R)
548 		insn->insn_reg += 8;
549 
550 	return (0);
551 }
552 
553 static int
get_modrm_addr(struct x86_insn * insn,struct vcpu_reg_state * vrs)554 get_modrm_addr(struct x86_insn *insn, struct vcpu_reg_state *vrs)
555 {
556 	uint8_t mod, rm;
557 	vaddr_t addr = 0x0UL;
558 
559 	if (insn == NULL || vrs == NULL)
560 		return (-1);
561 
562 	if (insn->insn_modrm_valid) {
563 		rm = MODRM_RM(insn->insn_modrm);
564 		mod = MODRM_MOD(insn->insn_modrm);
565 
566 		switch (rm) {
567 		case 0b000:
568 			addr = vrs->vrs_gprs[VCPU_REGS_RAX];
569 			break;
570 		case 0b001:
571 			addr = vrs->vrs_gprs[VCPU_REGS_RCX];
572 			break;
573 		case 0b010:
574 			addr = vrs->vrs_gprs[VCPU_REGS_RDX];
575 			break;
576 		case 0b011:
577 			addr = vrs->vrs_gprs[VCPU_REGS_RBX];
578 			break;
579 		case 0b100:
580 			if (mod == 0b11)
581 				addr = vrs->vrs_gprs[VCPU_REGS_RSP];
582 			break;
583 		case 0b101:
584 			if (mod != 0b00)
585 				addr = vrs->vrs_gprs[VCPU_REGS_RBP];
586 			break;
587 		case 0b110:
588 			addr = vrs->vrs_gprs[VCPU_REGS_RSI];
589 			break;
590 		case 0b111:
591 			addr = vrs->vrs_gprs[VCPU_REGS_RDI];
592 			break;
593 		}
594 
595 		insn->insn_gva = addr;
596 	}
597 
598 	return (0);
599 }
600 
601 static enum decode_result
decode_disp(struct x86_decode_state * state,struct x86_insn * insn)602 decode_disp(struct x86_decode_state *state, struct x86_insn *insn)
603 {
604 	enum decode_result res = DECODE_ERROR;
605 	uint64_t disp = 0;
606 
607 	if (!is_valid_state(state, __func__) || insn == NULL)
608 		return (DECODE_ERROR);
609 
610 	if (!insn->insn_modrm_valid)
611 		return (DECODE_ERROR);
612 
613 	switch (MODRM_MOD(insn->insn_modrm)) {
614 	case 0x00:
615 		insn->insn_disp_type = DISP_0;
616 		res = DECODE_MORE;
617 		break;
618 	case 0x01:
619 		insn->insn_disp_type = DISP_1;
620 		res = next_value(state, 1, &disp);
621 		if (res == DECODE_ERROR)
622 			return (res);
623 		insn->insn_disp = disp;
624 		break;
625 	case 0x02:
626 		if (insn->insn_prefix.pfx_group4 == LEG_4_ADDRSZ) {
627 			insn->insn_disp_type = DISP_2;
628 			res = next_value(state, 2, &disp);
629 		} else {
630 			insn->insn_disp_type = DISP_4;
631 			res = next_value(state, 4, &disp);
632 		}
633 		if (res == DECODE_ERROR)
634 			return (res);
635 		insn->insn_disp = disp;
636 		break;
637 	default:
638 		insn->insn_disp_type = DISP_NONE;
639 		res = DECODE_MORE;
640 	}
641 
642 	return (res);
643 }
644 
645 static enum decode_result
decode_opcode(struct x86_decode_state * state,struct x86_insn * insn)646 decode_opcode(struct x86_decode_state *state, struct x86_insn *insn)
647 {
648 	enum decode_result res;
649 	enum x86_opcode_type type;
650 	enum x86_operand_enc enc;
651 	struct x86_opcode *opcode = &insn->insn_opcode;
652 	uint8_t byte, byte2;
653 
654 	if (!is_valid_state(state, __func__) || insn == NULL)
655 		return (-1);
656 
657 	memset(opcode, 0, sizeof(*opcode));
658 
659 	res = next_byte(state, &byte);
660 	if (res == DECODE_ERROR)
661 		return (res);
662 
663 	type = x86_1byte_opcode_tbl[byte];
664 	switch(type) {
665 	case OP_UNKNOWN:
666 	case OP_UNSUPPORTED:
667 		log_warnx("%s: unsupported opcode", __func__);
668 		return (DECODE_ERROR);
669 
670 	case OP_TWO_BYTE:
671 		res = next_byte(state, &byte2);
672 		if (res == DECODE_ERROR)
673 			return (res);
674 
675 		type = x86_2byte_opcode_tbl[byte2];
676 		if (type == OP_UNKNOWN || type == OP_UNSUPPORTED) {
677 			log_warnx("%s: unsupported 2-byte opcode", __func__);
678 			return (DECODE_ERROR);
679 		}
680 
681 		opcode->op_bytes[0] = byte;
682 		opcode->op_bytes[1] = byte2;
683 		opcode->op_bytes_len = 2;
684 		enc = x86_2byte_operand_enc_table[byte2];
685 		break;
686 
687 	default:
688 		/* We've potentially got a known 1-byte opcode. */
689 		opcode->op_bytes[0] = byte;
690 		opcode->op_bytes_len = 1;
691 		enc = x86_1byte_operand_enc_tbl[byte];
692 	}
693 
694 	if (enc == OP_ENC_UNKNOWN)
695 		return (DECODE_ERROR);
696 
697 	opcode->op_type = type;
698 	opcode->op_encoding = enc;
699 
700 	return (res);
701 }
702 
703 static enum decode_result
decode_sib(struct x86_decode_state * state,struct x86_insn * insn)704 decode_sib(struct x86_decode_state *state, struct x86_insn *insn)
705 {
706 	enum decode_result res;
707 	uint8_t byte;
708 
709 	if (!is_valid_state(state, __func__) || insn == NULL)
710 		return (-1);
711 
712 	/* SIB is optional, so assume we will be continuing. */
713 	res = DECODE_MORE;
714 
715 	insn->insn_sib_valid = 0;
716 	if (!insn->insn_modrm_valid)
717 		return (res);
718 
719 	/* XXX is SIB valid in all cpu modes? */
720 	if (MODRM_RM(insn->insn_modrm) == 0b100) {
721 		res = next_byte(state, &byte);
722 		if (res != DECODE_ERROR) {
723 			insn->insn_sib_valid = 1;
724 			insn->insn_sib = byte;
725 		}
726 	}
727 
728 	return (res);
729 }
730 
731 static enum decode_result
decode_imm(struct x86_decode_state * state,struct x86_insn * insn)732 decode_imm(struct x86_decode_state *state, struct x86_insn *insn)
733 {
734 	enum decode_result res;
735 	size_t num_bytes;
736 	uint64_t value;
737 
738 	if (!is_valid_state(state, __func__) || insn == NULL)
739 		return (DECODE_ERROR);
740 
741 	/* Only handle MI encoded instructions. Others shouldn't need assist. */
742 	if (insn->insn_opcode.op_encoding != OP_ENC_MI)
743 		return (DECODE_DONE);
744 
745 	/* Exceptions related to MOV instructions. */
746 	if (insn->insn_opcode.op_type == OP_MOV) {
747 		switch (insn->insn_opcode.op_bytes[0]) {
748 		case 0xC6:
749 			num_bytes = 1;
750 			break;
751 		case 0xC7:
752 			if (insn->insn_cpu_mode == VMM_CPU_MODE_REAL)
753 				num_bytes = 2;
754 			else
755 				num_bytes = 4;
756 			break;
757 		default:
758 			log_warnx("%s: cannot decode immediate bytes for MOV",
759 			    __func__);
760 			return (DECODE_ERROR);
761 		}
762 	} else {
763 		/* Fallback to interpreting based on cpu mode and REX. */
764 		if (insn->insn_cpu_mode == VMM_CPU_MODE_REAL)
765 			num_bytes = 2;
766 		else if (insn->insn_prefix.pfx_rex == REX_NONE)
767 			num_bytes = 4;
768 		else
769 			num_bytes = 8;
770 	}
771 
772 	res = next_value(state, num_bytes, &value);
773 	if (res != DECODE_ERROR) {
774 		insn->insn_immediate = value;
775 		insn->insn_immediate_len = num_bytes;
776 	}
777 
778 	return (res);
779 }
780 
781 
782 /*
783  * insn_decode
784  *
785  * Decode an x86 instruction from the provided instruction bytes.
786  *
787  * Return values:
788  *  0: successful decode
789  *  Non-zero: an exception occurred during decode
790  */
791 int
insn_decode(struct vm_exit * exit,struct x86_insn * insn)792 insn_decode(struct vm_exit *exit, struct x86_insn *insn)
793 {
794 	enum decode_result res;
795 	struct vcpu_reg_state *vrs = &exit->vrs;
796 	struct x86_decode_state state;
797 	uint8_t *bytes, len;
798 	int mode;
799 
800 	if (exit == NULL || insn == NULL) {
801 		log_warnx("%s: invalid input", __func__);
802 		return (DECODE_ERROR);
803 	}
804 
805 	bytes = exit->vee.vee_insn_bytes;
806 	len = exit->vee.vee_insn_len;
807 
808 	/* 0. Initialize state and instruction objects. */
809 	memset(insn, 0, sizeof(*insn));
810 	memset(&state, 0, sizeof(state));
811 	state.s_len = len;
812 	memcpy(&state.s_bytes, bytes, len);
813 
814 	/* 1. Detect CPU mode. */
815 	mode = detect_cpu_mode(vrs);
816 	if (mode == VMM_CPU_MODE_UNKNOWN) {
817 		log_warnx("%s: failed to identify cpu mode", __func__);
818 #ifdef MMIO_DEBUG
819 		dump_regs(vrs);
820 #endif
821 		return (-1);
822 	}
823 	insn->insn_cpu_mode = mode;
824 
825 #ifdef MMIO_DEBUG
826 	log_info("%s: cpu mode %s detected", __progname, str_cpu_mode(mode));
827 	printf("%s: got bytes: [ ", __progname);
828 	for (int i = 0; i < len; i++) {
829 		printf("%02x ", bytes[i]);
830 	}
831 	printf("]\n");
832 #endif
833 	/* 2. Decode prefixes. */
834 	res = decode_prefix(&state, insn);
835 	if (res == DECODE_ERROR) {
836 		log_warnx("%s: error decoding prefixes", __func__);
837 		goto err;
838 	} else if (res == DECODE_DONE)
839 		goto done;
840 
841 #ifdef MMIO_DEBUG
842 	log_info("%s: prefixes {g1: 0x%02x, g2: 0x%02x, g3: 0x%02x, g4: 0x%02x,"
843 	    " rex: 0x%02x }", __progname, insn->insn_prefix.pfx_group1,
844 	    insn->insn_prefix.pfx_group2, insn->insn_prefix.pfx_group3,
845 	    insn->insn_prefix.pfx_group4, insn->insn_prefix.pfx_rex);
846 #endif
847 
848 	/* 3. Pick apart opcode. Here we can start short-circuiting. */
849 	res = decode_opcode(&state, insn);
850 	if (res == DECODE_ERROR) {
851 		log_warnx("%s: error decoding opcode", __func__);
852 		goto err;
853 	} else if (res == DECODE_DONE)
854 		goto done;
855 
856 #ifdef MMIO_DEBUG
857 	log_info("%s: found opcode %s (operand encoding %s) (%s)", __progname,
858 	    str_opcode(&insn->insn_opcode), str_operand_enc(&insn->insn_opcode),
859 	    str_decode_res(res));
860 #endif
861 
862 	/* Process optional ModR/M byte. */
863 	res = decode_modrm(&state, insn);
864 	if (res == DECODE_ERROR) {
865 		log_warnx("%s: error decoding modrm", __func__);
866 		goto err;
867 	}
868 	if (get_modrm_addr(insn, vrs) != 0)
869 		goto err;
870 	if (get_modrm_reg(insn) != 0)
871 		goto err;
872 	if (res == DECODE_DONE)
873 		goto done;
874 
875 #ifdef MMIO_DEBUG
876 	if (insn->insn_modrm_valid)
877 		log_info("%s: found ModRM 0x%02x (%s)", __progname,
878 		    insn->insn_modrm, str_decode_res(res));
879 #endif
880 
881 	/* Process optional SIB byte. */
882 	res = decode_sib(&state, insn);
883 	if (res == DECODE_ERROR) {
884 		log_warnx("%s: error decoding sib", __func__);
885 		goto err;
886 	} else if (res == DECODE_DONE)
887 		goto done;
888 
889 #ifdef MMIO_DEBUG
890 	if (insn->insn_sib_valid)
891 		log_info("%s: found SIB 0x%02x (%s)", __progname,
892 		    insn->insn_sib, str_decode_res(res));
893 #endif
894 
895 	/* Process any Displacement bytes. */
896 	res = decode_disp(&state, insn);
897 	if (res == DECODE_ERROR) {
898 		log_warnx("%s: error decoding displacement", __func__);
899 		goto err;
900 	} else if (res == DECODE_DONE)
901 		goto done;
902 
903 	/* Process any Immediate data bytes. */
904 	res = decode_imm(&state, insn);
905 	if (res == DECODE_ERROR) {
906 		log_warnx("%s: error decoding immediate bytes", __func__);
907 		goto err;
908 	}
909 
910 done:
911 	insn->insn_bytes_len = state.s_idx;
912 
913 #ifdef MMIO_DEBUG
914 	log_info("%s: final instruction length is %u", __func__,
915 		insn->insn_bytes_len);
916 	dump_insn(insn);
917 	log_info("%s: modrm: {mod: %d, regop: %d, rm: %d}", __func__,
918 	    MODRM_MOD(insn->insn_modrm), MODRM_REGOP(insn->insn_modrm),
919 	    MODRM_RM(insn->insn_modrm));
920 	dump_regs(vrs);
921 #endif /* MMIO_DEBUG */
922 	return (0);
923 
924 err:
925 #ifdef MMIO_DEBUG
926 	dump_insn(insn);
927 	log_info("%s: modrm: {mod: %d, regop: %d, rm: %d}", __func__,
928 	    MODRM_MOD(insn->insn_modrm), MODRM_REGOP(insn->insn_modrm),
929 	    MODRM_RM(insn->insn_modrm));
930 	dump_regs(vrs);
931 #endif /* MMIO_DEBUG */
932 	return (-1);
933 }
934 
935 static int
emulate_mov(struct x86_insn * insn,struct vm_exit * exit)936 emulate_mov(struct x86_insn *insn, struct vm_exit *exit)
937 {
938 	/* XXX Only supports read to register for now */
939 	if (insn->insn_opcode.op_encoding != OP_ENC_RM)
940 		return (-1);
941 
942 	/* XXX No device emulation yet. Fill with 0xFFs. */
943 	exit->vrs.vrs_gprs[insn->insn_reg] = 0xFFFFFFFFFFFFFFFF;
944 
945 	return (0);
946 }
947 
948 static int
emulate_movzx(struct x86_insn * insn,struct vm_exit * exit)949 emulate_movzx(struct x86_insn *insn, struct vm_exit *exit)
950 {
951 	uint8_t byte, len, src = 1, dst = 2;
952 	uint64_t value = 0;
953 
954 	/* Only RM is valid for MOVZX. */
955 	if (insn->insn_opcode.op_encoding != OP_ENC_RM) {
956 		log_warnx("invalid op encoding for MOVZX: %d",
957 		    insn->insn_opcode.op_encoding);
958 		return (-1);
959 	}
960 
961 	len = insn->insn_opcode.op_bytes_len;
962 	if (len < 1 || len > sizeof(insn->insn_opcode.op_bytes)) {
963 		log_warnx("invalid opcode byte length: %d", len);
964 		return (-1);
965 	}
966 
967 	byte = insn->insn_opcode.op_bytes[len - 1];
968 	switch (byte) {
969 	case 0xB6:
970 		src = 1;
971 		if (insn->insn_cpu_mode == VMM_CPU_MODE_PROT
972 		    || insn->insn_cpu_mode == VMM_CPU_MODE_REAL)
973 			dst = 2;
974 		else if (insn->insn_prefix.pfx_rex == REX_NONE)
975 			dst = 4;
976 		else // XXX validate CPU mode
977 			dst = 8;
978 		break;
979 	case 0xB7:
980 		src = 2;
981 		if (insn->insn_prefix.pfx_rex == REX_NONE)
982 			dst = 4;
983 		else // XXX validate CPU mode
984 			dst = 8;
985 		break;
986 	default:
987 		log_warnx("invalid byte in MOVZX opcode: %x", byte);
988 		return (-1);
989 	}
990 
991 	if (dst == 4)
992 		exit->vrs.vrs_gprs[insn->insn_reg] &= 0xFFFFFFFF00000000;
993 	else
994 		exit->vrs.vrs_gprs[insn->insn_reg] = 0x0UL;
995 
996 	/* XXX No device emulation yet. Fill with 0xFFs. */
997 	switch (src) {
998 	case 1: value = 0xFF; break;
999 	case 2: value = 0xFFFF; break;
1000 	case 4: value = 0xFFFFFFFF; break;
1001 	case 8: value = 0xFFFFFFFFFFFFFFFF; break;
1002 	default:
1003 		log_warnx("invalid source size: %d", src);
1004 		return (-1);
1005 	}
1006 
1007 	exit->vrs.vrs_gprs[insn->insn_reg] |= value;
1008 
1009 	return (0);
1010 }
1011 
1012 /*
1013  * insn_emulate
1014  *
1015  * Returns:
1016  *  0: success
1017  *  EINVAL: exception occurred
1018  *  EFAULT: page fault occurred, requires retry
1019  *  ENOTSUP: an unsupported instruction was provided
1020  */
1021 int
insn_emulate(struct vm_exit * exit,struct x86_insn * insn)1022 insn_emulate(struct vm_exit *exit, struct x86_insn *insn)
1023 {
1024 	int res;
1025 
1026 	switch (insn->insn_opcode.op_type) {
1027 	case OP_MOV:
1028 		res = emulate_mov(insn, exit);
1029 		break;
1030 
1031 	case OP_MOVZX:
1032 		res = emulate_movzx(insn, exit);
1033 		break;
1034 
1035 	default:
1036 		log_warnx("%s: emulation not defined for %s", __func__,
1037 		    str_opcode(&insn->insn_opcode));
1038 		res = ENOTSUP;
1039 	}
1040 
1041 	if (res == 0)
1042 		exit->vrs.vrs_gprs[VCPU_REGS_RIP] += insn->insn_bytes_len;
1043 
1044 	return (res);
1045 }
1046