1 /* $NetBSD: nvmm_x86_svm.c,v 1.57 2020/03/14 18:08:39 ad Exp $ */ 2 3 /* 4 * Copyright (c) 2018-2020 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Maxime Villard. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 #include <sys/cdefs.h> 33 __KERNEL_RCSID(0, "$NetBSD: nvmm_x86_svm.c,v 1.57 2020/03/14 18:08:39 ad Exp $"); 34 35 #include <sys/param.h> 36 #include <sys/systm.h> 37 #include <sys/kernel.h> 38 #include <sys/kmem.h> 39 #include <sys/cpu.h> 40 #include <sys/xcall.h> 41 #include <sys/mman.h> 42 43 #include <uvm/uvm.h> 44 #include <uvm/uvm_page.h> 45 46 #include <x86/cputypes.h> 47 #include <x86/specialreg.h> 48 #include <x86/pmap.h> 49 #include <x86/dbregs.h> 50 #include <x86/cpu_counter.h> 51 #include <machine/cpuvar.h> 52 53 #include <dev/nvmm/nvmm.h> 54 #include <dev/nvmm/nvmm_internal.h> 55 #include <dev/nvmm/x86/nvmm_x86.h> 56 57 int svm_vmrun(paddr_t, uint64_t *); 58 59 #define MSR_VM_HSAVE_PA 0xC0010117 60 61 /* -------------------------------------------------------------------------- */ 62 63 #define VMCB_EXITCODE_CR0_READ 0x0000 64 #define VMCB_EXITCODE_CR1_READ 0x0001 65 #define VMCB_EXITCODE_CR2_READ 0x0002 66 #define VMCB_EXITCODE_CR3_READ 0x0003 67 #define VMCB_EXITCODE_CR4_READ 0x0004 68 #define VMCB_EXITCODE_CR5_READ 0x0005 69 #define VMCB_EXITCODE_CR6_READ 0x0006 70 #define VMCB_EXITCODE_CR7_READ 0x0007 71 #define VMCB_EXITCODE_CR8_READ 0x0008 72 #define VMCB_EXITCODE_CR9_READ 0x0009 73 #define VMCB_EXITCODE_CR10_READ 0x000A 74 #define VMCB_EXITCODE_CR11_READ 0x000B 75 #define VMCB_EXITCODE_CR12_READ 0x000C 76 #define VMCB_EXITCODE_CR13_READ 0x000D 77 #define VMCB_EXITCODE_CR14_READ 0x000E 78 #define VMCB_EXITCODE_CR15_READ 0x000F 79 #define VMCB_EXITCODE_CR0_WRITE 0x0010 80 #define VMCB_EXITCODE_CR1_WRITE 0x0011 81 #define VMCB_EXITCODE_CR2_WRITE 0x0012 82 #define VMCB_EXITCODE_CR3_WRITE 0x0013 83 #define VMCB_EXITCODE_CR4_WRITE 0x0014 84 #define VMCB_EXITCODE_CR5_WRITE 0x0015 85 #define VMCB_EXITCODE_CR6_WRITE 0x0016 86 #define VMCB_EXITCODE_CR7_WRITE 0x0017 87 #define VMCB_EXITCODE_CR8_WRITE 0x0018 88 #define VMCB_EXITCODE_CR9_WRITE 0x0019 89 #define VMCB_EXITCODE_CR10_WRITE 0x001A 90 #define VMCB_EXITCODE_CR11_WRITE 0x001B 91 #define VMCB_EXITCODE_CR12_WRITE 0x001C 92 #define VMCB_EXITCODE_CR13_WRITE 0x001D 93 #define VMCB_EXITCODE_CR14_WRITE 0x001E 94 #define VMCB_EXITCODE_CR15_WRITE 0x001F 95 #define VMCB_EXITCODE_DR0_READ 0x0020 96 #define VMCB_EXITCODE_DR1_READ 0x0021 97 #define VMCB_EXITCODE_DR2_READ 0x0022 98 #define VMCB_EXITCODE_DR3_READ 0x0023 99 #define VMCB_EXITCODE_DR4_READ 0x0024 100 #define VMCB_EXITCODE_DR5_READ 0x0025 101 #define VMCB_EXITCODE_DR6_READ 0x0026 102 #define VMCB_EXITCODE_DR7_READ 0x0027 103 #define VMCB_EXITCODE_DR8_READ 0x0028 104 #define VMCB_EXITCODE_DR9_READ 0x0029 105 #define VMCB_EXITCODE_DR10_READ 0x002A 106 #define VMCB_EXITCODE_DR11_READ 0x002B 107 #define VMCB_EXITCODE_DR12_READ 0x002C 108 #define VMCB_EXITCODE_DR13_READ 0x002D 109 #define VMCB_EXITCODE_DR14_READ 0x002E 110 #define VMCB_EXITCODE_DR15_READ 0x002F 111 #define VMCB_EXITCODE_DR0_WRITE 0x0030 112 #define VMCB_EXITCODE_DR1_WRITE 0x0031 113 #define VMCB_EXITCODE_DR2_WRITE 0x0032 114 #define VMCB_EXITCODE_DR3_WRITE 0x0033 115 #define VMCB_EXITCODE_DR4_WRITE 0x0034 116 #define VMCB_EXITCODE_DR5_WRITE 0x0035 117 #define VMCB_EXITCODE_DR6_WRITE 0x0036 118 #define VMCB_EXITCODE_DR7_WRITE 0x0037 119 #define VMCB_EXITCODE_DR8_WRITE 0x0038 120 #define VMCB_EXITCODE_DR9_WRITE 0x0039 121 #define VMCB_EXITCODE_DR10_WRITE 0x003A 122 #define VMCB_EXITCODE_DR11_WRITE 0x003B 123 #define VMCB_EXITCODE_DR12_WRITE 0x003C 124 #define VMCB_EXITCODE_DR13_WRITE 0x003D 125 #define VMCB_EXITCODE_DR14_WRITE 0x003E 126 #define VMCB_EXITCODE_DR15_WRITE 0x003F 127 #define VMCB_EXITCODE_EXCP0 0x0040 128 #define VMCB_EXITCODE_EXCP1 0x0041 129 #define VMCB_EXITCODE_EXCP2 0x0042 130 #define VMCB_EXITCODE_EXCP3 0x0043 131 #define VMCB_EXITCODE_EXCP4 0x0044 132 #define VMCB_EXITCODE_EXCP5 0x0045 133 #define VMCB_EXITCODE_EXCP6 0x0046 134 #define VMCB_EXITCODE_EXCP7 0x0047 135 #define VMCB_EXITCODE_EXCP8 0x0048 136 #define VMCB_EXITCODE_EXCP9 0x0049 137 #define VMCB_EXITCODE_EXCP10 0x004A 138 #define VMCB_EXITCODE_EXCP11 0x004B 139 #define VMCB_EXITCODE_EXCP12 0x004C 140 #define VMCB_EXITCODE_EXCP13 0x004D 141 #define VMCB_EXITCODE_EXCP14 0x004E 142 #define VMCB_EXITCODE_EXCP15 0x004F 143 #define VMCB_EXITCODE_EXCP16 0x0050 144 #define VMCB_EXITCODE_EXCP17 0x0051 145 #define VMCB_EXITCODE_EXCP18 0x0052 146 #define VMCB_EXITCODE_EXCP19 0x0053 147 #define VMCB_EXITCODE_EXCP20 0x0054 148 #define VMCB_EXITCODE_EXCP21 0x0055 149 #define VMCB_EXITCODE_EXCP22 0x0056 150 #define VMCB_EXITCODE_EXCP23 0x0057 151 #define VMCB_EXITCODE_EXCP24 0x0058 152 #define VMCB_EXITCODE_EXCP25 0x0059 153 #define VMCB_EXITCODE_EXCP26 0x005A 154 #define VMCB_EXITCODE_EXCP27 0x005B 155 #define VMCB_EXITCODE_EXCP28 0x005C 156 #define VMCB_EXITCODE_EXCP29 0x005D 157 #define VMCB_EXITCODE_EXCP30 0x005E 158 #define VMCB_EXITCODE_EXCP31 0x005F 159 #define VMCB_EXITCODE_INTR 0x0060 160 #define VMCB_EXITCODE_NMI 0x0061 161 #define VMCB_EXITCODE_SMI 0x0062 162 #define VMCB_EXITCODE_INIT 0x0063 163 #define VMCB_EXITCODE_VINTR 0x0064 164 #define VMCB_EXITCODE_CR0_SEL_WRITE 0x0065 165 #define VMCB_EXITCODE_IDTR_READ 0x0066 166 #define VMCB_EXITCODE_GDTR_READ 0x0067 167 #define VMCB_EXITCODE_LDTR_READ 0x0068 168 #define VMCB_EXITCODE_TR_READ 0x0069 169 #define VMCB_EXITCODE_IDTR_WRITE 0x006A 170 #define VMCB_EXITCODE_GDTR_WRITE 0x006B 171 #define VMCB_EXITCODE_LDTR_WRITE 0x006C 172 #define VMCB_EXITCODE_TR_WRITE 0x006D 173 #define VMCB_EXITCODE_RDTSC 0x006E 174 #define VMCB_EXITCODE_RDPMC 0x006F 175 #define VMCB_EXITCODE_PUSHF 0x0070 176 #define VMCB_EXITCODE_POPF 0x0071 177 #define VMCB_EXITCODE_CPUID 0x0072 178 #define VMCB_EXITCODE_RSM 0x0073 179 #define VMCB_EXITCODE_IRET 0x0074 180 #define VMCB_EXITCODE_SWINT 0x0075 181 #define VMCB_EXITCODE_INVD 0x0076 182 #define VMCB_EXITCODE_PAUSE 0x0077 183 #define VMCB_EXITCODE_HLT 0x0078 184 #define VMCB_EXITCODE_INVLPG 0x0079 185 #define VMCB_EXITCODE_INVLPGA 0x007A 186 #define VMCB_EXITCODE_IOIO 0x007B 187 #define VMCB_EXITCODE_MSR 0x007C 188 #define VMCB_EXITCODE_TASK_SWITCH 0x007D 189 #define VMCB_EXITCODE_FERR_FREEZE 0x007E 190 #define VMCB_EXITCODE_SHUTDOWN 0x007F 191 #define VMCB_EXITCODE_VMRUN 0x0080 192 #define VMCB_EXITCODE_VMMCALL 0x0081 193 #define VMCB_EXITCODE_VMLOAD 0x0082 194 #define VMCB_EXITCODE_VMSAVE 0x0083 195 #define VMCB_EXITCODE_STGI 0x0084 196 #define VMCB_EXITCODE_CLGI 0x0085 197 #define VMCB_EXITCODE_SKINIT 0x0086 198 #define VMCB_EXITCODE_RDTSCP 0x0087 199 #define VMCB_EXITCODE_ICEBP 0x0088 200 #define VMCB_EXITCODE_WBINVD 0x0089 201 #define VMCB_EXITCODE_MONITOR 0x008A 202 #define VMCB_EXITCODE_MWAIT 0x008B 203 #define VMCB_EXITCODE_MWAIT_CONDITIONAL 0x008C 204 #define VMCB_EXITCODE_XSETBV 0x008D 205 #define VMCB_EXITCODE_RDPRU 0x008E 206 #define VMCB_EXITCODE_EFER_WRITE_TRAP 0x008F 207 #define VMCB_EXITCODE_CR0_WRITE_TRAP 0x0090 208 #define VMCB_EXITCODE_CR1_WRITE_TRAP 0x0091 209 #define VMCB_EXITCODE_CR2_WRITE_TRAP 0x0092 210 #define VMCB_EXITCODE_CR3_WRITE_TRAP 0x0093 211 #define VMCB_EXITCODE_CR4_WRITE_TRAP 0x0094 212 #define VMCB_EXITCODE_CR5_WRITE_TRAP 0x0095 213 #define VMCB_EXITCODE_CR6_WRITE_TRAP 0x0096 214 #define VMCB_EXITCODE_CR7_WRITE_TRAP 0x0097 215 #define VMCB_EXITCODE_CR8_WRITE_TRAP 0x0098 216 #define VMCB_EXITCODE_CR9_WRITE_TRAP 0x0099 217 #define VMCB_EXITCODE_CR10_WRITE_TRAP 0x009A 218 #define VMCB_EXITCODE_CR11_WRITE_TRAP 0x009B 219 #define VMCB_EXITCODE_CR12_WRITE_TRAP 0x009C 220 #define VMCB_EXITCODE_CR13_WRITE_TRAP 0x009D 221 #define VMCB_EXITCODE_CR14_WRITE_TRAP 0x009E 222 #define VMCB_EXITCODE_CR15_WRITE_TRAP 0x009F 223 #define VMCB_EXITCODE_MCOMMIT 0x00A3 224 #define VMCB_EXITCODE_NPF 0x0400 225 #define VMCB_EXITCODE_AVIC_INCOMP_IPI 0x0401 226 #define VMCB_EXITCODE_AVIC_NOACCEL 0x0402 227 #define VMCB_EXITCODE_VMGEXIT 0x0403 228 #define VMCB_EXITCODE_INVALID -1 229 230 /* -------------------------------------------------------------------------- */ 231 232 struct vmcb_ctrl { 233 uint32_t intercept_cr; 234 #define VMCB_CTRL_INTERCEPT_RCR(x) __BIT( 0 + x) 235 #define VMCB_CTRL_INTERCEPT_WCR(x) __BIT(16 + x) 236 237 uint32_t intercept_dr; 238 #define VMCB_CTRL_INTERCEPT_RDR(x) __BIT( 0 + x) 239 #define VMCB_CTRL_INTERCEPT_WDR(x) __BIT(16 + x) 240 241 uint32_t intercept_vec; 242 #define VMCB_CTRL_INTERCEPT_VEC(x) __BIT(x) 243 244 uint32_t intercept_misc1; 245 #define VMCB_CTRL_INTERCEPT_INTR __BIT(0) 246 #define VMCB_CTRL_INTERCEPT_NMI __BIT(1) 247 #define VMCB_CTRL_INTERCEPT_SMI __BIT(2) 248 #define VMCB_CTRL_INTERCEPT_INIT __BIT(3) 249 #define VMCB_CTRL_INTERCEPT_VINTR __BIT(4) 250 #define VMCB_CTRL_INTERCEPT_CR0_SPEC __BIT(5) 251 #define VMCB_CTRL_INTERCEPT_RIDTR __BIT(6) 252 #define VMCB_CTRL_INTERCEPT_RGDTR __BIT(7) 253 #define VMCB_CTRL_INTERCEPT_RLDTR __BIT(8) 254 #define VMCB_CTRL_INTERCEPT_RTR __BIT(9) 255 #define VMCB_CTRL_INTERCEPT_WIDTR __BIT(10) 256 #define VMCB_CTRL_INTERCEPT_WGDTR __BIT(11) 257 #define VMCB_CTRL_INTERCEPT_WLDTR __BIT(12) 258 #define VMCB_CTRL_INTERCEPT_WTR __BIT(13) 259 #define VMCB_CTRL_INTERCEPT_RDTSC __BIT(14) 260 #define VMCB_CTRL_INTERCEPT_RDPMC __BIT(15) 261 #define VMCB_CTRL_INTERCEPT_PUSHF __BIT(16) 262 #define VMCB_CTRL_INTERCEPT_POPF __BIT(17) 263 #define VMCB_CTRL_INTERCEPT_CPUID __BIT(18) 264 #define VMCB_CTRL_INTERCEPT_RSM __BIT(19) 265 #define VMCB_CTRL_INTERCEPT_IRET __BIT(20) 266 #define VMCB_CTRL_INTERCEPT_INTN __BIT(21) 267 #define VMCB_CTRL_INTERCEPT_INVD __BIT(22) 268 #define VMCB_CTRL_INTERCEPT_PAUSE __BIT(23) 269 #define VMCB_CTRL_INTERCEPT_HLT __BIT(24) 270 #define VMCB_CTRL_INTERCEPT_INVLPG __BIT(25) 271 #define VMCB_CTRL_INTERCEPT_INVLPGA __BIT(26) 272 #define VMCB_CTRL_INTERCEPT_IOIO_PROT __BIT(27) 273 #define VMCB_CTRL_INTERCEPT_MSR_PROT __BIT(28) 274 #define VMCB_CTRL_INTERCEPT_TASKSW __BIT(29) 275 #define VMCB_CTRL_INTERCEPT_FERR_FREEZE __BIT(30) 276 #define VMCB_CTRL_INTERCEPT_SHUTDOWN __BIT(31) 277 278 uint32_t intercept_misc2; 279 #define VMCB_CTRL_INTERCEPT_VMRUN __BIT(0) 280 #define VMCB_CTRL_INTERCEPT_VMMCALL __BIT(1) 281 #define VMCB_CTRL_INTERCEPT_VMLOAD __BIT(2) 282 #define VMCB_CTRL_INTERCEPT_VMSAVE __BIT(3) 283 #define VMCB_CTRL_INTERCEPT_STGI __BIT(4) 284 #define VMCB_CTRL_INTERCEPT_CLGI __BIT(5) 285 #define VMCB_CTRL_INTERCEPT_SKINIT __BIT(6) 286 #define VMCB_CTRL_INTERCEPT_RDTSCP __BIT(7) 287 #define VMCB_CTRL_INTERCEPT_ICEBP __BIT(8) 288 #define VMCB_CTRL_INTERCEPT_WBINVD __BIT(9) 289 #define VMCB_CTRL_INTERCEPT_MONITOR __BIT(10) 290 #define VMCB_CTRL_INTERCEPT_MWAIT __BIT(11) 291 #define VMCB_CTRL_INTERCEPT_MWAIT_ARMED __BIT(12) 292 #define VMCB_CTRL_INTERCEPT_XSETBV __BIT(13) 293 #define VMCB_CTRL_INTERCEPT_RDPRU __BIT(14) 294 #define VMCB_CTRL_INTERCEPT_EFER_SPEC __BIT(15) 295 #define VMCB_CTRL_INTERCEPT_WCR_SPEC(x) __BIT(16 + x) 296 297 uint32_t intercept_misc3; 298 #define VMCB_CTRL_INTERCEPT_MCOMMIT __BIT(3) 299 300 uint8_t rsvd1[36]; 301 uint16_t pause_filt_thresh; 302 uint16_t pause_filt_cnt; 303 uint64_t iopm_base_pa; 304 uint64_t msrpm_base_pa; 305 uint64_t tsc_offset; 306 uint32_t guest_asid; 307 308 uint32_t tlb_ctrl; 309 #define VMCB_CTRL_TLB_CTRL_FLUSH_ALL 0x01 310 #define VMCB_CTRL_TLB_CTRL_FLUSH_GUEST 0x03 311 #define VMCB_CTRL_TLB_CTRL_FLUSH_GUEST_NONGLOBAL 0x07 312 313 uint64_t v; 314 #define VMCB_CTRL_V_TPR __BITS(3,0) 315 #define VMCB_CTRL_V_IRQ __BIT(8) 316 #define VMCB_CTRL_V_VGIF __BIT(9) 317 #define VMCB_CTRL_V_INTR_PRIO __BITS(19,16) 318 #define VMCB_CTRL_V_IGN_TPR __BIT(20) 319 #define VMCB_CTRL_V_INTR_MASKING __BIT(24) 320 #define VMCB_CTRL_V_GUEST_VGIF __BIT(25) 321 #define VMCB_CTRL_V_AVIC_EN __BIT(31) 322 #define VMCB_CTRL_V_INTR_VECTOR __BITS(39,32) 323 324 uint64_t intr; 325 #define VMCB_CTRL_INTR_SHADOW __BIT(0) 326 327 uint64_t exitcode; 328 uint64_t exitinfo1; 329 uint64_t exitinfo2; 330 331 uint64_t exitintinfo; 332 #define VMCB_CTRL_EXITINTINFO_VECTOR __BITS(7,0) 333 #define VMCB_CTRL_EXITINTINFO_TYPE __BITS(10,8) 334 #define VMCB_CTRL_EXITINTINFO_EV __BIT(11) 335 #define VMCB_CTRL_EXITINTINFO_V __BIT(31) 336 #define VMCB_CTRL_EXITINTINFO_ERRORCODE __BITS(63,32) 337 338 uint64_t enable1; 339 #define VMCB_CTRL_ENABLE_NP __BIT(0) 340 #define VMCB_CTRL_ENABLE_SEV __BIT(1) 341 #define VMCB_CTRL_ENABLE_ES_SEV __BIT(2) 342 #define VMCB_CTRL_ENABLE_GMET __BIT(3) 343 #define VMCB_CTRL_ENABLE_VTE __BIT(5) 344 345 uint64_t avic; 346 #define VMCB_CTRL_AVIC_APIC_BAR __BITS(51,0) 347 348 uint64_t ghcb; 349 350 uint64_t eventinj; 351 #define VMCB_CTRL_EVENTINJ_VECTOR __BITS(7,0) 352 #define VMCB_CTRL_EVENTINJ_TYPE __BITS(10,8) 353 #define VMCB_CTRL_EVENTINJ_EV __BIT(11) 354 #define VMCB_CTRL_EVENTINJ_V __BIT(31) 355 #define VMCB_CTRL_EVENTINJ_ERRORCODE __BITS(63,32) 356 357 uint64_t n_cr3; 358 359 uint64_t enable2; 360 #define VMCB_CTRL_ENABLE_LBR __BIT(0) 361 #define VMCB_CTRL_ENABLE_VVMSAVE __BIT(1) 362 363 uint32_t vmcb_clean; 364 #define VMCB_CTRL_VMCB_CLEAN_I __BIT(0) 365 #define VMCB_CTRL_VMCB_CLEAN_IOPM __BIT(1) 366 #define VMCB_CTRL_VMCB_CLEAN_ASID __BIT(2) 367 #define VMCB_CTRL_VMCB_CLEAN_TPR __BIT(3) 368 #define VMCB_CTRL_VMCB_CLEAN_NP __BIT(4) 369 #define VMCB_CTRL_VMCB_CLEAN_CR __BIT(5) 370 #define VMCB_CTRL_VMCB_CLEAN_DR __BIT(6) 371 #define VMCB_CTRL_VMCB_CLEAN_DT __BIT(7) 372 #define VMCB_CTRL_VMCB_CLEAN_SEG __BIT(8) 373 #define VMCB_CTRL_VMCB_CLEAN_CR2 __BIT(9) 374 #define VMCB_CTRL_VMCB_CLEAN_LBR __BIT(10) 375 #define VMCB_CTRL_VMCB_CLEAN_AVIC __BIT(11) 376 377 uint32_t rsvd2; 378 uint64_t nrip; 379 uint8_t inst_len; 380 uint8_t inst_bytes[15]; 381 uint64_t avic_abpp; 382 uint64_t rsvd3; 383 uint64_t avic_ltp; 384 385 uint64_t avic_phys; 386 #define VMCB_CTRL_AVIC_PHYS_TABLE_PTR __BITS(51,12) 387 #define VMCB_CTRL_AVIC_PHYS_MAX_INDEX __BITS(7,0) 388 389 uint64_t rsvd4; 390 uint64_t vmcb_ptr; 391 392 uint8_t pad[752]; 393 } __packed; 394 395 CTASSERT(sizeof(struct vmcb_ctrl) == 1024); 396 397 struct vmcb_segment { 398 uint16_t selector; 399 uint16_t attrib; /* hidden */ 400 uint32_t limit; /* hidden */ 401 uint64_t base; /* hidden */ 402 } __packed; 403 404 CTASSERT(sizeof(struct vmcb_segment) == 16); 405 406 struct vmcb_state { 407 struct vmcb_segment es; 408 struct vmcb_segment cs; 409 struct vmcb_segment ss; 410 struct vmcb_segment ds; 411 struct vmcb_segment fs; 412 struct vmcb_segment gs; 413 struct vmcb_segment gdt; 414 struct vmcb_segment ldt; 415 struct vmcb_segment idt; 416 struct vmcb_segment tr; 417 uint8_t rsvd1[43]; 418 uint8_t cpl; 419 uint8_t rsvd2[4]; 420 uint64_t efer; 421 uint8_t rsvd3[112]; 422 uint64_t cr4; 423 uint64_t cr3; 424 uint64_t cr0; 425 uint64_t dr7; 426 uint64_t dr6; 427 uint64_t rflags; 428 uint64_t rip; 429 uint8_t rsvd4[88]; 430 uint64_t rsp; 431 uint8_t rsvd5[24]; 432 uint64_t rax; 433 uint64_t star; 434 uint64_t lstar; 435 uint64_t cstar; 436 uint64_t sfmask; 437 uint64_t kernelgsbase; 438 uint64_t sysenter_cs; 439 uint64_t sysenter_esp; 440 uint64_t sysenter_eip; 441 uint64_t cr2; 442 uint8_t rsvd6[32]; 443 uint64_t g_pat; 444 uint64_t dbgctl; 445 uint64_t br_from; 446 uint64_t br_to; 447 uint64_t int_from; 448 uint64_t int_to; 449 uint8_t pad[2408]; 450 } __packed; 451 452 CTASSERT(sizeof(struct vmcb_state) == 0xC00); 453 454 struct vmcb { 455 struct vmcb_ctrl ctrl; 456 struct vmcb_state state; 457 } __packed; 458 459 CTASSERT(sizeof(struct vmcb) == PAGE_SIZE); 460 CTASSERT(offsetof(struct vmcb, state) == 0x400); 461 462 /* -------------------------------------------------------------------------- */ 463 464 static void svm_vcpu_state_provide(struct nvmm_cpu *, uint64_t); 465 static void svm_vcpu_state_commit(struct nvmm_cpu *); 466 467 struct svm_hsave { 468 paddr_t pa; 469 }; 470 471 static struct svm_hsave hsave[MAXCPUS]; 472 473 static uint8_t *svm_asidmap __read_mostly; 474 static uint32_t svm_maxasid __read_mostly; 475 static kmutex_t svm_asidlock __cacheline_aligned; 476 477 static bool svm_decode_assist __read_mostly; 478 static uint32_t svm_ctrl_tlb_flush __read_mostly; 479 480 #define SVM_XCR0_MASK_DEFAULT (XCR0_X87|XCR0_SSE) 481 static uint64_t svm_xcr0_mask __read_mostly; 482 483 #define SVM_NCPUIDS 32 484 485 #define VMCB_NPAGES 1 486 487 #define MSRBM_NPAGES 2 488 #define MSRBM_SIZE (MSRBM_NPAGES * PAGE_SIZE) 489 490 #define IOBM_NPAGES 3 491 #define IOBM_SIZE (IOBM_NPAGES * PAGE_SIZE) 492 493 /* Does not include EFER_LMSLE. */ 494 #define EFER_VALID \ 495 (EFER_SCE|EFER_LME|EFER_LMA|EFER_NXE|EFER_SVME|EFER_FFXSR|EFER_TCE) 496 497 #define EFER_TLB_FLUSH \ 498 (EFER_NXE|EFER_LMA|EFER_LME) 499 #define CR0_TLB_FLUSH \ 500 (CR0_PG|CR0_WP|CR0_CD|CR0_NW) 501 #define CR4_TLB_FLUSH \ 502 (CR4_PGE|CR4_PAE|CR4_PSE) 503 504 /* -------------------------------------------------------------------------- */ 505 506 struct svm_machdata { 507 volatile uint64_t mach_htlb_gen; 508 }; 509 510 static const size_t svm_vcpu_conf_sizes[NVMM_X86_VCPU_NCONF] = { 511 [NVMM_VCPU_CONF_MD(NVMM_VCPU_CONF_CPUID)] = 512 sizeof(struct nvmm_vcpu_conf_cpuid), 513 [NVMM_VCPU_CONF_MD(NVMM_VCPU_CONF_TPR)] = 514 sizeof(struct nvmm_vcpu_conf_tpr) 515 }; 516 517 struct svm_cpudata { 518 /* General */ 519 bool shared_asid; 520 bool gtlb_want_flush; 521 bool gtsc_want_update; 522 uint64_t vcpu_htlb_gen; 523 524 /* VMCB */ 525 struct vmcb *vmcb; 526 paddr_t vmcb_pa; 527 528 /* I/O bitmap */ 529 uint8_t *iobm; 530 paddr_t iobm_pa; 531 532 /* MSR bitmap */ 533 uint8_t *msrbm; 534 paddr_t msrbm_pa; 535 536 /* Host state */ 537 uint64_t hxcr0; 538 uint64_t star; 539 uint64_t lstar; 540 uint64_t cstar; 541 uint64_t sfmask; 542 uint64_t fsbase; 543 uint64_t kernelgsbase; 544 545 /* Intr state */ 546 bool int_window_exit; 547 bool nmi_window_exit; 548 bool evt_pending; 549 550 /* Guest state */ 551 uint64_t gxcr0; 552 uint64_t gprs[NVMM_X64_NGPR]; 553 uint64_t drs[NVMM_X64_NDR]; 554 uint64_t gtsc; 555 struct xsave_header gfpu __aligned(64); 556 557 /* VCPU configuration. */ 558 bool cpuidpresent[SVM_NCPUIDS]; 559 struct nvmm_vcpu_conf_cpuid cpuid[SVM_NCPUIDS]; 560 }; 561 562 static void 563 svm_vmcb_cache_default(struct vmcb *vmcb) 564 { 565 vmcb->ctrl.vmcb_clean = 566 VMCB_CTRL_VMCB_CLEAN_I | 567 VMCB_CTRL_VMCB_CLEAN_IOPM | 568 VMCB_CTRL_VMCB_CLEAN_ASID | 569 VMCB_CTRL_VMCB_CLEAN_TPR | 570 VMCB_CTRL_VMCB_CLEAN_NP | 571 VMCB_CTRL_VMCB_CLEAN_CR | 572 VMCB_CTRL_VMCB_CLEAN_DR | 573 VMCB_CTRL_VMCB_CLEAN_DT | 574 VMCB_CTRL_VMCB_CLEAN_SEG | 575 VMCB_CTRL_VMCB_CLEAN_CR2 | 576 VMCB_CTRL_VMCB_CLEAN_LBR | 577 VMCB_CTRL_VMCB_CLEAN_AVIC; 578 } 579 580 static void 581 svm_vmcb_cache_update(struct vmcb *vmcb, uint64_t flags) 582 { 583 if (flags & NVMM_X64_STATE_SEGS) { 584 vmcb->ctrl.vmcb_clean &= 585 ~(VMCB_CTRL_VMCB_CLEAN_SEG | VMCB_CTRL_VMCB_CLEAN_DT); 586 } 587 if (flags & NVMM_X64_STATE_CRS) { 588 vmcb->ctrl.vmcb_clean &= 589 ~(VMCB_CTRL_VMCB_CLEAN_CR | VMCB_CTRL_VMCB_CLEAN_CR2 | 590 VMCB_CTRL_VMCB_CLEAN_TPR); 591 } 592 if (flags & NVMM_X64_STATE_DRS) { 593 vmcb->ctrl.vmcb_clean &= ~VMCB_CTRL_VMCB_CLEAN_DR; 594 } 595 if (flags & NVMM_X64_STATE_MSRS) { 596 /* CR for EFER, NP for PAT. */ 597 vmcb->ctrl.vmcb_clean &= 598 ~(VMCB_CTRL_VMCB_CLEAN_CR | VMCB_CTRL_VMCB_CLEAN_NP); 599 } 600 } 601 602 static inline void 603 svm_vmcb_cache_flush(struct vmcb *vmcb, uint64_t flags) 604 { 605 vmcb->ctrl.vmcb_clean &= ~flags; 606 } 607 608 static inline void 609 svm_vmcb_cache_flush_all(struct vmcb *vmcb) 610 { 611 vmcb->ctrl.vmcb_clean = 0; 612 } 613 614 #define SVM_EVENT_TYPE_HW_INT 0 615 #define SVM_EVENT_TYPE_NMI 2 616 #define SVM_EVENT_TYPE_EXC 3 617 #define SVM_EVENT_TYPE_SW_INT 4 618 619 static void 620 svm_event_waitexit_enable(struct nvmm_cpu *vcpu, bool nmi) 621 { 622 struct svm_cpudata *cpudata = vcpu->cpudata; 623 struct vmcb *vmcb = cpudata->vmcb; 624 625 if (nmi) { 626 vmcb->ctrl.intercept_misc1 |= VMCB_CTRL_INTERCEPT_IRET; 627 cpudata->nmi_window_exit = true; 628 } else { 629 vmcb->ctrl.intercept_misc1 |= VMCB_CTRL_INTERCEPT_VINTR; 630 vmcb->ctrl.v |= (VMCB_CTRL_V_IRQ | VMCB_CTRL_V_IGN_TPR); 631 svm_vmcb_cache_flush(vmcb, VMCB_CTRL_VMCB_CLEAN_TPR); 632 cpudata->int_window_exit = true; 633 } 634 635 svm_vmcb_cache_flush(vmcb, VMCB_CTRL_VMCB_CLEAN_I); 636 } 637 638 static void 639 svm_event_waitexit_disable(struct nvmm_cpu *vcpu, bool nmi) 640 { 641 struct svm_cpudata *cpudata = vcpu->cpudata; 642 struct vmcb *vmcb = cpudata->vmcb; 643 644 if (nmi) { 645 vmcb->ctrl.intercept_misc1 &= ~VMCB_CTRL_INTERCEPT_IRET; 646 cpudata->nmi_window_exit = false; 647 } else { 648 vmcb->ctrl.intercept_misc1 &= ~VMCB_CTRL_INTERCEPT_VINTR; 649 vmcb->ctrl.v &= ~(VMCB_CTRL_V_IRQ | VMCB_CTRL_V_IGN_TPR); 650 svm_vmcb_cache_flush(vmcb, VMCB_CTRL_VMCB_CLEAN_TPR); 651 cpudata->int_window_exit = false; 652 } 653 654 svm_vmcb_cache_flush(vmcb, VMCB_CTRL_VMCB_CLEAN_I); 655 } 656 657 static inline int 658 svm_event_has_error(uint8_t vector) 659 { 660 switch (vector) { 661 case 8: /* #DF */ 662 case 10: /* #TS */ 663 case 11: /* #NP */ 664 case 12: /* #SS */ 665 case 13: /* #GP */ 666 case 14: /* #PF */ 667 case 17: /* #AC */ 668 case 30: /* #SX */ 669 return 1; 670 default: 671 return 0; 672 } 673 } 674 675 static int 676 svm_vcpu_inject(struct nvmm_cpu *vcpu) 677 { 678 struct nvmm_comm_page *comm = vcpu->comm; 679 struct svm_cpudata *cpudata = vcpu->cpudata; 680 struct vmcb *vmcb = cpudata->vmcb; 681 u_int evtype; 682 uint8_t vector; 683 uint64_t error; 684 int type = 0, err = 0; 685 686 evtype = comm->event.type; 687 vector = comm->event.vector; 688 error = comm->event.u.excp.error; 689 __insn_barrier(); 690 691 switch (evtype) { 692 case NVMM_VCPU_EVENT_EXCP: 693 type = SVM_EVENT_TYPE_EXC; 694 if (vector == 2 || vector >= 32) 695 return EINVAL; 696 if (vector == 3 || vector == 0) 697 return EINVAL; 698 err = svm_event_has_error(vector); 699 break; 700 case NVMM_VCPU_EVENT_INTR: 701 type = SVM_EVENT_TYPE_HW_INT; 702 if (vector == 2) { 703 type = SVM_EVENT_TYPE_NMI; 704 svm_event_waitexit_enable(vcpu, true); 705 } 706 err = 0; 707 break; 708 default: 709 return EINVAL; 710 } 711 712 vmcb->ctrl.eventinj = 713 __SHIFTIN((uint64_t)vector, VMCB_CTRL_EVENTINJ_VECTOR) | 714 __SHIFTIN((uint64_t)type, VMCB_CTRL_EVENTINJ_TYPE) | 715 __SHIFTIN((uint64_t)err, VMCB_CTRL_EVENTINJ_EV) | 716 __SHIFTIN((uint64_t)1, VMCB_CTRL_EVENTINJ_V) | 717 __SHIFTIN((uint64_t)error, VMCB_CTRL_EVENTINJ_ERRORCODE); 718 719 cpudata->evt_pending = true; 720 721 return 0; 722 } 723 724 static void 725 svm_inject_ud(struct nvmm_cpu *vcpu) 726 { 727 struct nvmm_comm_page *comm = vcpu->comm; 728 int ret __diagused; 729 730 comm->event.type = NVMM_VCPU_EVENT_EXCP; 731 comm->event.vector = 6; 732 comm->event.u.excp.error = 0; 733 734 ret = svm_vcpu_inject(vcpu); 735 KASSERT(ret == 0); 736 } 737 738 static void 739 svm_inject_gp(struct nvmm_cpu *vcpu) 740 { 741 struct nvmm_comm_page *comm = vcpu->comm; 742 int ret __diagused; 743 744 comm->event.type = NVMM_VCPU_EVENT_EXCP; 745 comm->event.vector = 13; 746 comm->event.u.excp.error = 0; 747 748 ret = svm_vcpu_inject(vcpu); 749 KASSERT(ret == 0); 750 } 751 752 static inline int 753 svm_vcpu_event_commit(struct nvmm_cpu *vcpu) 754 { 755 if (__predict_true(!vcpu->comm->event_commit)) { 756 return 0; 757 } 758 vcpu->comm->event_commit = false; 759 return svm_vcpu_inject(vcpu); 760 } 761 762 static inline void 763 svm_inkernel_advance(struct vmcb *vmcb) 764 { 765 /* 766 * Maybe we should also apply single-stepping and debug exceptions. 767 * Matters for guest-ring3, because it can execute 'cpuid' under a 768 * debugger. 769 */ 770 vmcb->state.rip = vmcb->ctrl.nrip; 771 vmcb->ctrl.intr &= ~VMCB_CTRL_INTR_SHADOW; 772 } 773 774 static void 775 svm_inkernel_handle_cpuid(struct nvmm_cpu *vcpu, uint64_t eax, uint64_t ecx) 776 { 777 struct svm_cpudata *cpudata = vcpu->cpudata; 778 uint64_t cr4; 779 780 switch (eax) { 781 case 0x00000001: 782 cpudata->vmcb->state.rax &= nvmm_cpuid_00000001.eax; 783 784 cpudata->gprs[NVMM_X64_GPR_RBX] &= ~CPUID_LOCAL_APIC_ID; 785 cpudata->gprs[NVMM_X64_GPR_RBX] |= __SHIFTIN(vcpu->cpuid, 786 CPUID_LOCAL_APIC_ID); 787 788 cpudata->gprs[NVMM_X64_GPR_RCX] &= nvmm_cpuid_00000001.ecx; 789 cpudata->gprs[NVMM_X64_GPR_RCX] |= CPUID2_RAZ; 790 791 cpudata->gprs[NVMM_X64_GPR_RDX] &= nvmm_cpuid_00000001.edx; 792 793 /* CPUID2_OSXSAVE depends on CR4. */ 794 cr4 = cpudata->vmcb->state.cr4; 795 if (!(cr4 & CR4_OSXSAVE)) { 796 cpudata->gprs[NVMM_X64_GPR_RCX] &= ~CPUID2_OSXSAVE; 797 } 798 break; 799 case 0x00000005: 800 case 0x00000006: 801 cpudata->vmcb->state.rax = 0; 802 cpudata->gprs[NVMM_X64_GPR_RBX] = 0; 803 cpudata->gprs[NVMM_X64_GPR_RCX] = 0; 804 cpudata->gprs[NVMM_X64_GPR_RDX] = 0; 805 break; 806 case 0x00000007: 807 cpudata->vmcb->state.rax &= nvmm_cpuid_00000007.eax; 808 cpudata->gprs[NVMM_X64_GPR_RBX] &= nvmm_cpuid_00000007.ebx; 809 cpudata->gprs[NVMM_X64_GPR_RCX] &= nvmm_cpuid_00000007.ecx; 810 cpudata->gprs[NVMM_X64_GPR_RDX] &= nvmm_cpuid_00000007.edx; 811 break; 812 case 0x0000000D: 813 if (svm_xcr0_mask == 0) { 814 break; 815 } 816 switch (ecx) { 817 case 0: 818 cpudata->vmcb->state.rax = svm_xcr0_mask & 0xFFFFFFFF; 819 if (cpudata->gxcr0 & XCR0_SSE) { 820 cpudata->gprs[NVMM_X64_GPR_RBX] = sizeof(struct fxsave); 821 } else { 822 cpudata->gprs[NVMM_X64_GPR_RBX] = sizeof(struct save87); 823 } 824 cpudata->gprs[NVMM_X64_GPR_RBX] += 64; /* XSAVE header */ 825 cpudata->gprs[NVMM_X64_GPR_RCX] = sizeof(struct fxsave) + 64; 826 cpudata->gprs[NVMM_X64_GPR_RDX] = svm_xcr0_mask >> 32; 827 break; 828 case 1: 829 cpudata->vmcb->state.rax &= 830 (CPUID_PES1_XSAVEOPT | CPUID_PES1_XSAVEC | 831 CPUID_PES1_XGETBV); 832 cpudata->gprs[NVMM_X64_GPR_RBX] = 0; 833 cpudata->gprs[NVMM_X64_GPR_RCX] = 0; 834 cpudata->gprs[NVMM_X64_GPR_RDX] = 0; 835 break; 836 default: 837 cpudata->vmcb->state.rax = 0; 838 cpudata->gprs[NVMM_X64_GPR_RBX] = 0; 839 cpudata->gprs[NVMM_X64_GPR_RCX] = 0; 840 cpudata->gprs[NVMM_X64_GPR_RDX] = 0; 841 break; 842 } 843 break; 844 case 0x40000000: 845 cpudata->gprs[NVMM_X64_GPR_RBX] = 0; 846 cpudata->gprs[NVMM_X64_GPR_RCX] = 0; 847 cpudata->gprs[NVMM_X64_GPR_RDX] = 0; 848 memcpy(&cpudata->gprs[NVMM_X64_GPR_RBX], "___ ", 4); 849 memcpy(&cpudata->gprs[NVMM_X64_GPR_RCX], "NVMM", 4); 850 memcpy(&cpudata->gprs[NVMM_X64_GPR_RDX], " ___", 4); 851 break; 852 case 0x80000001: 853 cpudata->vmcb->state.rax &= nvmm_cpuid_80000001.eax; 854 cpudata->gprs[NVMM_X64_GPR_RBX] &= nvmm_cpuid_80000001.ebx; 855 cpudata->gprs[NVMM_X64_GPR_RCX] &= nvmm_cpuid_80000001.ecx; 856 cpudata->gprs[NVMM_X64_GPR_RDX] &= nvmm_cpuid_80000001.edx; 857 break; 858 default: 859 break; 860 } 861 } 862 863 static void 864 svm_exit_insn(struct vmcb *vmcb, struct nvmm_vcpu_exit *exit, uint64_t reason) 865 { 866 exit->u.insn.npc = vmcb->ctrl.nrip; 867 exit->reason = reason; 868 } 869 870 static void 871 svm_exit_cpuid(struct nvmm_machine *mach, struct nvmm_cpu *vcpu, 872 struct nvmm_vcpu_exit *exit) 873 { 874 struct svm_cpudata *cpudata = vcpu->cpudata; 875 struct nvmm_vcpu_conf_cpuid *cpuid; 876 uint64_t eax, ecx; 877 u_int descs[4]; 878 size_t i; 879 880 eax = cpudata->vmcb->state.rax; 881 ecx = cpudata->gprs[NVMM_X64_GPR_RCX]; 882 x86_cpuid2(eax, ecx, descs); 883 884 cpudata->vmcb->state.rax = descs[0]; 885 cpudata->gprs[NVMM_X64_GPR_RBX] = descs[1]; 886 cpudata->gprs[NVMM_X64_GPR_RCX] = descs[2]; 887 cpudata->gprs[NVMM_X64_GPR_RDX] = descs[3]; 888 889 svm_inkernel_handle_cpuid(vcpu, eax, ecx); 890 891 for (i = 0; i < SVM_NCPUIDS; i++) { 892 if (!cpudata->cpuidpresent[i]) { 893 continue; 894 } 895 cpuid = &cpudata->cpuid[i]; 896 if (cpuid->leaf != eax) { 897 continue; 898 } 899 900 if (cpuid->exit) { 901 svm_exit_insn(cpudata->vmcb, exit, NVMM_VCPU_EXIT_CPUID); 902 return; 903 } 904 KASSERT(cpuid->mask); 905 906 /* del */ 907 cpudata->vmcb->state.rax &= ~cpuid->u.mask.del.eax; 908 cpudata->gprs[NVMM_X64_GPR_RBX] &= ~cpuid->u.mask.del.ebx; 909 cpudata->gprs[NVMM_X64_GPR_RCX] &= ~cpuid->u.mask.del.ecx; 910 cpudata->gprs[NVMM_X64_GPR_RDX] &= ~cpuid->u.mask.del.edx; 911 912 /* set */ 913 cpudata->vmcb->state.rax |= cpuid->u.mask.set.eax; 914 cpudata->gprs[NVMM_X64_GPR_RBX] |= cpuid->u.mask.set.ebx; 915 cpudata->gprs[NVMM_X64_GPR_RCX] |= cpuid->u.mask.set.ecx; 916 cpudata->gprs[NVMM_X64_GPR_RDX] |= cpuid->u.mask.set.edx; 917 918 break; 919 } 920 921 svm_inkernel_advance(cpudata->vmcb); 922 exit->reason = NVMM_VCPU_EXIT_NONE; 923 } 924 925 static void 926 svm_exit_hlt(struct nvmm_machine *mach, struct nvmm_cpu *vcpu, 927 struct nvmm_vcpu_exit *exit) 928 { 929 struct svm_cpudata *cpudata = vcpu->cpudata; 930 struct vmcb *vmcb = cpudata->vmcb; 931 932 if (cpudata->int_window_exit && (vmcb->state.rflags & PSL_I)) { 933 svm_event_waitexit_disable(vcpu, false); 934 } 935 936 svm_inkernel_advance(cpudata->vmcb); 937 exit->reason = NVMM_VCPU_EXIT_HALTED; 938 } 939 940 #define SVM_EXIT_IO_PORT __BITS(31,16) 941 #define SVM_EXIT_IO_SEG __BITS(12,10) 942 #define SVM_EXIT_IO_A64 __BIT(9) 943 #define SVM_EXIT_IO_A32 __BIT(8) 944 #define SVM_EXIT_IO_A16 __BIT(7) 945 #define SVM_EXIT_IO_SZ32 __BIT(6) 946 #define SVM_EXIT_IO_SZ16 __BIT(5) 947 #define SVM_EXIT_IO_SZ8 __BIT(4) 948 #define SVM_EXIT_IO_REP __BIT(3) 949 #define SVM_EXIT_IO_STR __BIT(2) 950 #define SVM_EXIT_IO_IN __BIT(0) 951 952 static void 953 svm_exit_io(struct nvmm_machine *mach, struct nvmm_cpu *vcpu, 954 struct nvmm_vcpu_exit *exit) 955 { 956 struct svm_cpudata *cpudata = vcpu->cpudata; 957 uint64_t info = cpudata->vmcb->ctrl.exitinfo1; 958 uint64_t nextpc = cpudata->vmcb->ctrl.exitinfo2; 959 960 exit->reason = NVMM_VCPU_EXIT_IO; 961 962 exit->u.io.in = (info & SVM_EXIT_IO_IN) != 0; 963 exit->u.io.port = __SHIFTOUT(info, SVM_EXIT_IO_PORT); 964 965 if (svm_decode_assist) { 966 KASSERT(__SHIFTOUT(info, SVM_EXIT_IO_SEG) < 6); 967 exit->u.io.seg = __SHIFTOUT(info, SVM_EXIT_IO_SEG); 968 } else { 969 exit->u.io.seg = -1; 970 } 971 972 if (info & SVM_EXIT_IO_A64) { 973 exit->u.io.address_size = 8; 974 } else if (info & SVM_EXIT_IO_A32) { 975 exit->u.io.address_size = 4; 976 } else if (info & SVM_EXIT_IO_A16) { 977 exit->u.io.address_size = 2; 978 } 979 980 if (info & SVM_EXIT_IO_SZ32) { 981 exit->u.io.operand_size = 4; 982 } else if (info & SVM_EXIT_IO_SZ16) { 983 exit->u.io.operand_size = 2; 984 } else if (info & SVM_EXIT_IO_SZ8) { 985 exit->u.io.operand_size = 1; 986 } 987 988 exit->u.io.rep = (info & SVM_EXIT_IO_REP) != 0; 989 exit->u.io.str = (info & SVM_EXIT_IO_STR) != 0; 990 exit->u.io.npc = nextpc; 991 992 svm_vcpu_state_provide(vcpu, 993 NVMM_X64_STATE_GPRS | NVMM_X64_STATE_SEGS | 994 NVMM_X64_STATE_CRS | NVMM_X64_STATE_MSRS); 995 } 996 997 static const uint64_t msr_ignore_list[] = { 998 0xc0010055, /* MSR_CMPHALT */ 999 MSR_DE_CFG, 1000 MSR_IC_CFG, 1001 MSR_UCODE_AMD_PATCHLEVEL 1002 }; 1003 1004 static bool 1005 svm_inkernel_handle_msr(struct nvmm_machine *mach, struct nvmm_cpu *vcpu, 1006 struct nvmm_vcpu_exit *exit) 1007 { 1008 struct svm_cpudata *cpudata = vcpu->cpudata; 1009 struct vmcb *vmcb = cpudata->vmcb; 1010 uint64_t val; 1011 size_t i; 1012 1013 if (exit->reason == NVMM_VCPU_EXIT_RDMSR) { 1014 if (exit->u.rdmsr.msr == MSR_NB_CFG) { 1015 val = NB_CFG_INITAPICCPUIDLO; 1016 vmcb->state.rax = (val & 0xFFFFFFFF); 1017 cpudata->gprs[NVMM_X64_GPR_RDX] = (val >> 32); 1018 goto handled; 1019 } 1020 for (i = 0; i < __arraycount(msr_ignore_list); i++) { 1021 if (msr_ignore_list[i] != exit->u.rdmsr.msr) 1022 continue; 1023 val = 0; 1024 vmcb->state.rax = (val & 0xFFFFFFFF); 1025 cpudata->gprs[NVMM_X64_GPR_RDX] = (val >> 32); 1026 goto handled; 1027 } 1028 } else { 1029 if (exit->u.wrmsr.msr == MSR_EFER) { 1030 if (__predict_false(exit->u.wrmsr.val & ~EFER_VALID)) { 1031 goto error; 1032 } 1033 if ((vmcb->state.efer ^ exit->u.wrmsr.val) & 1034 EFER_TLB_FLUSH) { 1035 cpudata->gtlb_want_flush = true; 1036 } 1037 vmcb->state.efer = exit->u.wrmsr.val | EFER_SVME; 1038 svm_vmcb_cache_flush(vmcb, VMCB_CTRL_VMCB_CLEAN_CR); 1039 goto handled; 1040 } 1041 if (exit->u.wrmsr.msr == MSR_TSC) { 1042 cpudata->gtsc = exit->u.wrmsr.val; 1043 cpudata->gtsc_want_update = true; 1044 goto handled; 1045 } 1046 for (i = 0; i < __arraycount(msr_ignore_list); i++) { 1047 if (msr_ignore_list[i] != exit->u.wrmsr.msr) 1048 continue; 1049 goto handled; 1050 } 1051 } 1052 1053 return false; 1054 1055 handled: 1056 svm_inkernel_advance(cpudata->vmcb); 1057 return true; 1058 1059 error: 1060 svm_inject_gp(vcpu); 1061 return true; 1062 } 1063 1064 static inline void 1065 svm_exit_rdmsr(struct nvmm_machine *mach, struct nvmm_cpu *vcpu, 1066 struct nvmm_vcpu_exit *exit) 1067 { 1068 struct svm_cpudata *cpudata = vcpu->cpudata; 1069 1070 exit->reason = NVMM_VCPU_EXIT_RDMSR; 1071 exit->u.rdmsr.msr = (cpudata->gprs[NVMM_X64_GPR_RCX] & 0xFFFFFFFF); 1072 exit->u.rdmsr.npc = cpudata->vmcb->ctrl.nrip; 1073 1074 if (svm_inkernel_handle_msr(mach, vcpu, exit)) { 1075 exit->reason = NVMM_VCPU_EXIT_NONE; 1076 return; 1077 } 1078 1079 svm_vcpu_state_provide(vcpu, NVMM_X64_STATE_GPRS); 1080 } 1081 1082 static inline void 1083 svm_exit_wrmsr(struct nvmm_machine *mach, struct nvmm_cpu *vcpu, 1084 struct nvmm_vcpu_exit *exit) 1085 { 1086 struct svm_cpudata *cpudata = vcpu->cpudata; 1087 uint64_t rdx, rax; 1088 1089 rdx = cpudata->gprs[NVMM_X64_GPR_RDX]; 1090 rax = cpudata->vmcb->state.rax; 1091 1092 exit->reason = NVMM_VCPU_EXIT_WRMSR; 1093 exit->u.wrmsr.msr = (cpudata->gprs[NVMM_X64_GPR_RCX] & 0xFFFFFFFF); 1094 exit->u.wrmsr.val = (rdx << 32) | (rax & 0xFFFFFFFF); 1095 exit->u.wrmsr.npc = cpudata->vmcb->ctrl.nrip; 1096 1097 if (svm_inkernel_handle_msr(mach, vcpu, exit)) { 1098 exit->reason = NVMM_VCPU_EXIT_NONE; 1099 return; 1100 } 1101 1102 svm_vcpu_state_provide(vcpu, NVMM_X64_STATE_GPRS); 1103 } 1104 1105 static void 1106 svm_exit_msr(struct nvmm_machine *mach, struct nvmm_cpu *vcpu, 1107 struct nvmm_vcpu_exit *exit) 1108 { 1109 struct svm_cpudata *cpudata = vcpu->cpudata; 1110 uint64_t info = cpudata->vmcb->ctrl.exitinfo1; 1111 1112 if (info == 0) { 1113 svm_exit_rdmsr(mach, vcpu, exit); 1114 } else { 1115 svm_exit_wrmsr(mach, vcpu, exit); 1116 } 1117 } 1118 1119 static void 1120 svm_exit_npf(struct nvmm_machine *mach, struct nvmm_cpu *vcpu, 1121 struct nvmm_vcpu_exit *exit) 1122 { 1123 struct svm_cpudata *cpudata = vcpu->cpudata; 1124 gpaddr_t gpa = cpudata->vmcb->ctrl.exitinfo2; 1125 1126 exit->reason = NVMM_VCPU_EXIT_MEMORY; 1127 if (cpudata->vmcb->ctrl.exitinfo1 & PGEX_W) 1128 exit->u.mem.prot = PROT_WRITE; 1129 else if (cpudata->vmcb->ctrl.exitinfo1 & PGEX_X) 1130 exit->u.mem.prot = PROT_EXEC; 1131 else 1132 exit->u.mem.prot = PROT_READ; 1133 exit->u.mem.gpa = gpa; 1134 exit->u.mem.inst_len = cpudata->vmcb->ctrl.inst_len; 1135 memcpy(exit->u.mem.inst_bytes, cpudata->vmcb->ctrl.inst_bytes, 1136 sizeof(exit->u.mem.inst_bytes)); 1137 1138 svm_vcpu_state_provide(vcpu, 1139 NVMM_X64_STATE_GPRS | NVMM_X64_STATE_SEGS | 1140 NVMM_X64_STATE_CRS | NVMM_X64_STATE_MSRS); 1141 } 1142 1143 static void 1144 svm_exit_xsetbv(struct nvmm_machine *mach, struct nvmm_cpu *vcpu, 1145 struct nvmm_vcpu_exit *exit) 1146 { 1147 struct svm_cpudata *cpudata = vcpu->cpudata; 1148 struct vmcb *vmcb = cpudata->vmcb; 1149 uint64_t val; 1150 1151 exit->reason = NVMM_VCPU_EXIT_NONE; 1152 1153 val = (cpudata->gprs[NVMM_X64_GPR_RDX] << 32) | 1154 (vmcb->state.rax & 0xFFFFFFFF); 1155 1156 if (__predict_false(cpudata->gprs[NVMM_X64_GPR_RCX] != 0)) { 1157 goto error; 1158 } else if (__predict_false(vmcb->state.cpl != 0)) { 1159 goto error; 1160 } else if (__predict_false((val & ~svm_xcr0_mask) != 0)) { 1161 goto error; 1162 } else if (__predict_false((val & XCR0_X87) == 0)) { 1163 goto error; 1164 } 1165 1166 cpudata->gxcr0 = val; 1167 if (svm_xcr0_mask != 0) { 1168 wrxcr(0, cpudata->gxcr0); 1169 } 1170 1171 svm_inkernel_advance(cpudata->vmcb); 1172 return; 1173 1174 error: 1175 svm_inject_gp(vcpu); 1176 } 1177 1178 static void 1179 svm_exit_invalid(struct nvmm_vcpu_exit *exit, uint64_t code) 1180 { 1181 exit->u.inv.hwcode = code; 1182 exit->reason = NVMM_VCPU_EXIT_INVALID; 1183 } 1184 1185 /* -------------------------------------------------------------------------- */ 1186 1187 static void 1188 svm_vcpu_guest_fpu_enter(struct nvmm_cpu *vcpu) 1189 { 1190 struct svm_cpudata *cpudata = vcpu->cpudata; 1191 1192 fpu_save(); 1193 fpu_area_restore(&cpudata->gfpu, svm_xcr0_mask); 1194 1195 if (svm_xcr0_mask != 0) { 1196 cpudata->hxcr0 = rdxcr(0); 1197 wrxcr(0, cpudata->gxcr0); 1198 } 1199 } 1200 1201 static void 1202 svm_vcpu_guest_fpu_leave(struct nvmm_cpu *vcpu) 1203 { 1204 struct svm_cpudata *cpudata = vcpu->cpudata; 1205 1206 if (svm_xcr0_mask != 0) { 1207 cpudata->gxcr0 = rdxcr(0); 1208 wrxcr(0, cpudata->hxcr0); 1209 } 1210 1211 fpu_area_save(&cpudata->gfpu, svm_xcr0_mask); 1212 } 1213 1214 static void 1215 svm_vcpu_guest_dbregs_enter(struct nvmm_cpu *vcpu) 1216 { 1217 struct svm_cpudata *cpudata = vcpu->cpudata; 1218 1219 x86_dbregs_save(curlwp); 1220 1221 ldr7(0); 1222 1223 ldr0(cpudata->drs[NVMM_X64_DR_DR0]); 1224 ldr1(cpudata->drs[NVMM_X64_DR_DR1]); 1225 ldr2(cpudata->drs[NVMM_X64_DR_DR2]); 1226 ldr3(cpudata->drs[NVMM_X64_DR_DR3]); 1227 } 1228 1229 static void 1230 svm_vcpu_guest_dbregs_leave(struct nvmm_cpu *vcpu) 1231 { 1232 struct svm_cpudata *cpudata = vcpu->cpudata; 1233 1234 cpudata->drs[NVMM_X64_DR_DR0] = rdr0(); 1235 cpudata->drs[NVMM_X64_DR_DR1] = rdr1(); 1236 cpudata->drs[NVMM_X64_DR_DR2] = rdr2(); 1237 cpudata->drs[NVMM_X64_DR_DR3] = rdr3(); 1238 1239 x86_dbregs_restore(curlwp); 1240 } 1241 1242 static void 1243 svm_vcpu_guest_misc_enter(struct nvmm_cpu *vcpu) 1244 { 1245 struct svm_cpudata *cpudata = vcpu->cpudata; 1246 1247 cpudata->fsbase = rdmsr(MSR_FSBASE); 1248 cpudata->kernelgsbase = rdmsr(MSR_KERNELGSBASE); 1249 } 1250 1251 static void 1252 svm_vcpu_guest_misc_leave(struct nvmm_cpu *vcpu) 1253 { 1254 struct svm_cpudata *cpudata = vcpu->cpudata; 1255 1256 wrmsr(MSR_STAR, cpudata->star); 1257 wrmsr(MSR_LSTAR, cpudata->lstar); 1258 wrmsr(MSR_CSTAR, cpudata->cstar); 1259 wrmsr(MSR_SFMASK, cpudata->sfmask); 1260 wrmsr(MSR_FSBASE, cpudata->fsbase); 1261 wrmsr(MSR_KERNELGSBASE, cpudata->kernelgsbase); 1262 } 1263 1264 /* -------------------------------------------------------------------------- */ 1265 1266 static inline void 1267 svm_gtlb_catchup(struct nvmm_cpu *vcpu, int hcpu) 1268 { 1269 struct svm_cpudata *cpudata = vcpu->cpudata; 1270 1271 if (vcpu->hcpu_last != hcpu || cpudata->shared_asid) { 1272 cpudata->gtlb_want_flush = true; 1273 } 1274 } 1275 1276 static inline void 1277 svm_htlb_catchup(struct nvmm_cpu *vcpu, int hcpu) 1278 { 1279 /* 1280 * Nothing to do. If an hTLB flush was needed, either the VCPU was 1281 * executing on this hCPU and the hTLB already got flushed, or it 1282 * was executing on another hCPU in which case the catchup is done 1283 * in svm_gtlb_catchup(). 1284 */ 1285 } 1286 1287 static inline uint64_t 1288 svm_htlb_flush(struct svm_machdata *machdata, struct svm_cpudata *cpudata) 1289 { 1290 struct vmcb *vmcb = cpudata->vmcb; 1291 uint64_t machgen; 1292 1293 machgen = machdata->mach_htlb_gen; 1294 if (__predict_true(machgen == cpudata->vcpu_htlb_gen)) { 1295 return machgen; 1296 } 1297 1298 vmcb->ctrl.tlb_ctrl = svm_ctrl_tlb_flush; 1299 return machgen; 1300 } 1301 1302 static inline void 1303 svm_htlb_flush_ack(struct svm_cpudata *cpudata, uint64_t machgen) 1304 { 1305 struct vmcb *vmcb = cpudata->vmcb; 1306 1307 if (__predict_true(vmcb->ctrl.exitcode != VMCB_EXITCODE_INVALID)) { 1308 cpudata->vcpu_htlb_gen = machgen; 1309 } 1310 } 1311 1312 static inline void 1313 svm_exit_evt(struct svm_cpudata *cpudata, struct vmcb *vmcb) 1314 { 1315 cpudata->evt_pending = false; 1316 1317 if (__predict_false(vmcb->ctrl.exitintinfo & VMCB_CTRL_EXITINTINFO_V)) { 1318 vmcb->ctrl.eventinj = vmcb->ctrl.exitintinfo; 1319 cpudata->evt_pending = true; 1320 } 1321 } 1322 1323 static int 1324 svm_vcpu_run(struct nvmm_machine *mach, struct nvmm_cpu *vcpu, 1325 struct nvmm_vcpu_exit *exit) 1326 { 1327 struct nvmm_comm_page *comm = vcpu->comm; 1328 struct svm_machdata *machdata = mach->machdata; 1329 struct svm_cpudata *cpudata = vcpu->cpudata; 1330 struct vmcb *vmcb = cpudata->vmcb; 1331 uint64_t machgen; 1332 int hcpu, s; 1333 1334 if (__predict_false(svm_vcpu_event_commit(vcpu) != 0)) { 1335 return EINVAL; 1336 } 1337 svm_vcpu_state_commit(vcpu); 1338 comm->state_cached = 0; 1339 1340 kpreempt_disable(); 1341 hcpu = cpu_number(); 1342 1343 svm_gtlb_catchup(vcpu, hcpu); 1344 svm_htlb_catchup(vcpu, hcpu); 1345 1346 if (vcpu->hcpu_last != hcpu) { 1347 svm_vmcb_cache_flush_all(vmcb); 1348 cpudata->gtsc_want_update = true; 1349 } 1350 1351 svm_vcpu_guest_dbregs_enter(vcpu); 1352 svm_vcpu_guest_misc_enter(vcpu); 1353 svm_vcpu_guest_fpu_enter(vcpu); 1354 1355 while (1) { 1356 if (cpudata->gtlb_want_flush) { 1357 vmcb->ctrl.tlb_ctrl = svm_ctrl_tlb_flush; 1358 } else { 1359 vmcb->ctrl.tlb_ctrl = 0; 1360 } 1361 1362 if (__predict_false(cpudata->gtsc_want_update)) { 1363 vmcb->ctrl.tsc_offset = cpudata->gtsc - rdtsc(); 1364 svm_vmcb_cache_flush(vmcb, VMCB_CTRL_VMCB_CLEAN_I); 1365 } 1366 1367 s = splhigh(); 1368 machgen = svm_htlb_flush(machdata, cpudata); 1369 svm_vmrun(cpudata->vmcb_pa, cpudata->gprs); 1370 svm_htlb_flush_ack(cpudata, machgen); 1371 splx(s); 1372 1373 svm_vmcb_cache_default(vmcb); 1374 1375 if (vmcb->ctrl.exitcode != VMCB_EXITCODE_INVALID) { 1376 cpudata->gtlb_want_flush = false; 1377 cpudata->gtsc_want_update = false; 1378 vcpu->hcpu_last = hcpu; 1379 } 1380 svm_exit_evt(cpudata, vmcb); 1381 1382 switch (vmcb->ctrl.exitcode) { 1383 case VMCB_EXITCODE_INTR: 1384 case VMCB_EXITCODE_NMI: 1385 exit->reason = NVMM_VCPU_EXIT_NONE; 1386 break; 1387 case VMCB_EXITCODE_VINTR: 1388 svm_event_waitexit_disable(vcpu, false); 1389 exit->reason = NVMM_VCPU_EXIT_INT_READY; 1390 break; 1391 case VMCB_EXITCODE_IRET: 1392 svm_event_waitexit_disable(vcpu, true); 1393 exit->reason = NVMM_VCPU_EXIT_NMI_READY; 1394 break; 1395 case VMCB_EXITCODE_CPUID: 1396 svm_exit_cpuid(mach, vcpu, exit); 1397 break; 1398 case VMCB_EXITCODE_HLT: 1399 svm_exit_hlt(mach, vcpu, exit); 1400 break; 1401 case VMCB_EXITCODE_IOIO: 1402 svm_exit_io(mach, vcpu, exit); 1403 break; 1404 case VMCB_EXITCODE_MSR: 1405 svm_exit_msr(mach, vcpu, exit); 1406 break; 1407 case VMCB_EXITCODE_SHUTDOWN: 1408 exit->reason = NVMM_VCPU_EXIT_SHUTDOWN; 1409 break; 1410 case VMCB_EXITCODE_RDPMC: 1411 case VMCB_EXITCODE_RSM: 1412 case VMCB_EXITCODE_INVLPGA: 1413 case VMCB_EXITCODE_VMRUN: 1414 case VMCB_EXITCODE_VMMCALL: 1415 case VMCB_EXITCODE_VMLOAD: 1416 case VMCB_EXITCODE_VMSAVE: 1417 case VMCB_EXITCODE_STGI: 1418 case VMCB_EXITCODE_CLGI: 1419 case VMCB_EXITCODE_SKINIT: 1420 case VMCB_EXITCODE_RDTSCP: 1421 svm_inject_ud(vcpu); 1422 exit->reason = NVMM_VCPU_EXIT_NONE; 1423 break; 1424 case VMCB_EXITCODE_MONITOR: 1425 svm_exit_insn(vmcb, exit, NVMM_VCPU_EXIT_MONITOR); 1426 break; 1427 case VMCB_EXITCODE_MWAIT: 1428 case VMCB_EXITCODE_MWAIT_CONDITIONAL: 1429 svm_exit_insn(vmcb, exit, NVMM_VCPU_EXIT_MWAIT); 1430 break; 1431 case VMCB_EXITCODE_XSETBV: 1432 svm_exit_xsetbv(mach, vcpu, exit); 1433 break; 1434 case VMCB_EXITCODE_NPF: 1435 svm_exit_npf(mach, vcpu, exit); 1436 break; 1437 case VMCB_EXITCODE_FERR_FREEZE: /* ? */ 1438 default: 1439 svm_exit_invalid(exit, vmcb->ctrl.exitcode); 1440 break; 1441 } 1442 1443 /* If no reason to return to userland, keep rolling. */ 1444 if (preempt_needed()) { 1445 break; 1446 } 1447 if (curlwp->l_flag & LW_USERRET) { 1448 break; 1449 } 1450 if (exit->reason != NVMM_VCPU_EXIT_NONE) { 1451 break; 1452 } 1453 } 1454 1455 cpudata->gtsc = rdtsc() + vmcb->ctrl.tsc_offset; 1456 1457 svm_vcpu_guest_fpu_leave(vcpu); 1458 svm_vcpu_guest_misc_leave(vcpu); 1459 svm_vcpu_guest_dbregs_leave(vcpu); 1460 1461 kpreempt_enable(); 1462 1463 exit->exitstate.rflags = vmcb->state.rflags; 1464 exit->exitstate.cr8 = __SHIFTOUT(vmcb->ctrl.v, VMCB_CTRL_V_TPR); 1465 exit->exitstate.int_shadow = 1466 ((vmcb->ctrl.intr & VMCB_CTRL_INTR_SHADOW) != 0); 1467 exit->exitstate.int_window_exiting = cpudata->int_window_exit; 1468 exit->exitstate.nmi_window_exiting = cpudata->nmi_window_exit; 1469 exit->exitstate.evt_pending = cpudata->evt_pending; 1470 1471 return 0; 1472 } 1473 1474 /* -------------------------------------------------------------------------- */ 1475 1476 static int 1477 svm_memalloc(paddr_t *pa, vaddr_t *va, size_t npages) 1478 { 1479 struct pglist pglist; 1480 paddr_t _pa; 1481 vaddr_t _va; 1482 size_t i; 1483 int ret; 1484 1485 ret = uvm_pglistalloc(npages * PAGE_SIZE, 0, ~0UL, PAGE_SIZE, 0, 1486 &pglist, 1, 0); 1487 if (ret != 0) 1488 return ENOMEM; 1489 _pa = VM_PAGE_TO_PHYS(TAILQ_FIRST(&pglist)); 1490 _va = uvm_km_alloc(kernel_map, npages * PAGE_SIZE, 0, 1491 UVM_KMF_VAONLY | UVM_KMF_NOWAIT); 1492 if (_va == 0) 1493 goto error; 1494 1495 for (i = 0; i < npages; i++) { 1496 pmap_kenter_pa(_va + i * PAGE_SIZE, _pa + i * PAGE_SIZE, 1497 VM_PROT_READ | VM_PROT_WRITE, PMAP_WRITE_BACK); 1498 } 1499 pmap_update(pmap_kernel()); 1500 1501 memset((void *)_va, 0, npages * PAGE_SIZE); 1502 1503 *pa = _pa; 1504 *va = _va; 1505 return 0; 1506 1507 error: 1508 for (i = 0; i < npages; i++) { 1509 uvm_pagefree(PHYS_TO_VM_PAGE(_pa + i * PAGE_SIZE)); 1510 } 1511 return ENOMEM; 1512 } 1513 1514 static void 1515 svm_memfree(paddr_t pa, vaddr_t va, size_t npages) 1516 { 1517 size_t i; 1518 1519 pmap_kremove(va, npages * PAGE_SIZE); 1520 pmap_update(pmap_kernel()); 1521 uvm_km_free(kernel_map, va, npages * PAGE_SIZE, UVM_KMF_VAONLY); 1522 for (i = 0; i < npages; i++) { 1523 uvm_pagefree(PHYS_TO_VM_PAGE(pa + i * PAGE_SIZE)); 1524 } 1525 } 1526 1527 /* -------------------------------------------------------------------------- */ 1528 1529 #define SVM_MSRBM_READ __BIT(0) 1530 #define SVM_MSRBM_WRITE __BIT(1) 1531 1532 static void 1533 svm_vcpu_msr_allow(uint8_t *bitmap, uint64_t msr, bool read, bool write) 1534 { 1535 uint64_t byte; 1536 uint8_t bitoff; 1537 1538 if (msr < 0x00002000) { 1539 /* Range 1 */ 1540 byte = ((msr - 0x00000000) >> 2UL) + 0x0000; 1541 } else if (msr >= 0xC0000000 && msr < 0xC0002000) { 1542 /* Range 2 */ 1543 byte = ((msr - 0xC0000000) >> 2UL) + 0x0800; 1544 } else if (msr >= 0xC0010000 && msr < 0xC0012000) { 1545 /* Range 3 */ 1546 byte = ((msr - 0xC0010000) >> 2UL) + 0x1000; 1547 } else { 1548 panic("%s: wrong range", __func__); 1549 } 1550 1551 bitoff = (msr & 0x3) << 1; 1552 1553 if (read) { 1554 bitmap[byte] &= ~(SVM_MSRBM_READ << bitoff); 1555 } 1556 if (write) { 1557 bitmap[byte] &= ~(SVM_MSRBM_WRITE << bitoff); 1558 } 1559 } 1560 1561 #define SVM_SEG_ATTRIB_TYPE __BITS(3,0) 1562 #define SVM_SEG_ATTRIB_S __BIT(4) 1563 #define SVM_SEG_ATTRIB_DPL __BITS(6,5) 1564 #define SVM_SEG_ATTRIB_P __BIT(7) 1565 #define SVM_SEG_ATTRIB_AVL __BIT(8) 1566 #define SVM_SEG_ATTRIB_L __BIT(9) 1567 #define SVM_SEG_ATTRIB_DEF __BIT(10) 1568 #define SVM_SEG_ATTRIB_G __BIT(11) 1569 1570 static void 1571 svm_vcpu_setstate_seg(const struct nvmm_x64_state_seg *seg, 1572 struct vmcb_segment *vseg) 1573 { 1574 vseg->selector = seg->selector; 1575 vseg->attrib = 1576 __SHIFTIN(seg->attrib.type, SVM_SEG_ATTRIB_TYPE) | 1577 __SHIFTIN(seg->attrib.s, SVM_SEG_ATTRIB_S) | 1578 __SHIFTIN(seg->attrib.dpl, SVM_SEG_ATTRIB_DPL) | 1579 __SHIFTIN(seg->attrib.p, SVM_SEG_ATTRIB_P) | 1580 __SHIFTIN(seg->attrib.avl, SVM_SEG_ATTRIB_AVL) | 1581 __SHIFTIN(seg->attrib.l, SVM_SEG_ATTRIB_L) | 1582 __SHIFTIN(seg->attrib.def, SVM_SEG_ATTRIB_DEF) | 1583 __SHIFTIN(seg->attrib.g, SVM_SEG_ATTRIB_G); 1584 vseg->limit = seg->limit; 1585 vseg->base = seg->base; 1586 } 1587 1588 static void 1589 svm_vcpu_getstate_seg(struct nvmm_x64_state_seg *seg, struct vmcb_segment *vseg) 1590 { 1591 seg->selector = vseg->selector; 1592 seg->attrib.type = __SHIFTOUT(vseg->attrib, SVM_SEG_ATTRIB_TYPE); 1593 seg->attrib.s = __SHIFTOUT(vseg->attrib, SVM_SEG_ATTRIB_S); 1594 seg->attrib.dpl = __SHIFTOUT(vseg->attrib, SVM_SEG_ATTRIB_DPL); 1595 seg->attrib.p = __SHIFTOUT(vseg->attrib, SVM_SEG_ATTRIB_P); 1596 seg->attrib.avl = __SHIFTOUT(vseg->attrib, SVM_SEG_ATTRIB_AVL); 1597 seg->attrib.l = __SHIFTOUT(vseg->attrib, SVM_SEG_ATTRIB_L); 1598 seg->attrib.def = __SHIFTOUT(vseg->attrib, SVM_SEG_ATTRIB_DEF); 1599 seg->attrib.g = __SHIFTOUT(vseg->attrib, SVM_SEG_ATTRIB_G); 1600 seg->limit = vseg->limit; 1601 seg->base = vseg->base; 1602 } 1603 1604 static inline bool 1605 svm_state_tlb_flush(const struct vmcb *vmcb, const struct nvmm_x64_state *state, 1606 uint64_t flags) 1607 { 1608 if (flags & NVMM_X64_STATE_CRS) { 1609 if ((vmcb->state.cr0 ^ 1610 state->crs[NVMM_X64_CR_CR0]) & CR0_TLB_FLUSH) { 1611 return true; 1612 } 1613 if (vmcb->state.cr3 != state->crs[NVMM_X64_CR_CR3]) { 1614 return true; 1615 } 1616 if ((vmcb->state.cr4 ^ 1617 state->crs[NVMM_X64_CR_CR4]) & CR4_TLB_FLUSH) { 1618 return true; 1619 } 1620 } 1621 1622 if (flags & NVMM_X64_STATE_MSRS) { 1623 if ((vmcb->state.efer ^ 1624 state->msrs[NVMM_X64_MSR_EFER]) & EFER_TLB_FLUSH) { 1625 return true; 1626 } 1627 } 1628 1629 return false; 1630 } 1631 1632 static void 1633 svm_vcpu_setstate(struct nvmm_cpu *vcpu) 1634 { 1635 struct nvmm_comm_page *comm = vcpu->comm; 1636 const struct nvmm_x64_state *state = &comm->state; 1637 struct svm_cpudata *cpudata = vcpu->cpudata; 1638 struct vmcb *vmcb = cpudata->vmcb; 1639 struct fxsave *fpustate; 1640 uint64_t flags; 1641 1642 flags = comm->state_wanted; 1643 1644 if (svm_state_tlb_flush(vmcb, state, flags)) { 1645 cpudata->gtlb_want_flush = true; 1646 } 1647 1648 if (flags & NVMM_X64_STATE_SEGS) { 1649 svm_vcpu_setstate_seg(&state->segs[NVMM_X64_SEG_CS], 1650 &vmcb->state.cs); 1651 svm_vcpu_setstate_seg(&state->segs[NVMM_X64_SEG_DS], 1652 &vmcb->state.ds); 1653 svm_vcpu_setstate_seg(&state->segs[NVMM_X64_SEG_ES], 1654 &vmcb->state.es); 1655 svm_vcpu_setstate_seg(&state->segs[NVMM_X64_SEG_FS], 1656 &vmcb->state.fs); 1657 svm_vcpu_setstate_seg(&state->segs[NVMM_X64_SEG_GS], 1658 &vmcb->state.gs); 1659 svm_vcpu_setstate_seg(&state->segs[NVMM_X64_SEG_SS], 1660 &vmcb->state.ss); 1661 svm_vcpu_setstate_seg(&state->segs[NVMM_X64_SEG_GDT], 1662 &vmcb->state.gdt); 1663 svm_vcpu_setstate_seg(&state->segs[NVMM_X64_SEG_IDT], 1664 &vmcb->state.idt); 1665 svm_vcpu_setstate_seg(&state->segs[NVMM_X64_SEG_LDT], 1666 &vmcb->state.ldt); 1667 svm_vcpu_setstate_seg(&state->segs[NVMM_X64_SEG_TR], 1668 &vmcb->state.tr); 1669 1670 vmcb->state.cpl = state->segs[NVMM_X64_SEG_SS].attrib.dpl; 1671 } 1672 1673 CTASSERT(sizeof(cpudata->gprs) == sizeof(state->gprs)); 1674 if (flags & NVMM_X64_STATE_GPRS) { 1675 memcpy(cpudata->gprs, state->gprs, sizeof(state->gprs)); 1676 1677 vmcb->state.rip = state->gprs[NVMM_X64_GPR_RIP]; 1678 vmcb->state.rsp = state->gprs[NVMM_X64_GPR_RSP]; 1679 vmcb->state.rax = state->gprs[NVMM_X64_GPR_RAX]; 1680 vmcb->state.rflags = state->gprs[NVMM_X64_GPR_RFLAGS]; 1681 } 1682 1683 if (flags & NVMM_X64_STATE_CRS) { 1684 vmcb->state.cr0 = state->crs[NVMM_X64_CR_CR0]; 1685 vmcb->state.cr2 = state->crs[NVMM_X64_CR_CR2]; 1686 vmcb->state.cr3 = state->crs[NVMM_X64_CR_CR3]; 1687 vmcb->state.cr4 = state->crs[NVMM_X64_CR_CR4]; 1688 1689 vmcb->ctrl.v &= ~VMCB_CTRL_V_TPR; 1690 vmcb->ctrl.v |= __SHIFTIN(state->crs[NVMM_X64_CR_CR8], 1691 VMCB_CTRL_V_TPR); 1692 1693 if (svm_xcr0_mask != 0) { 1694 /* Clear illegal XCR0 bits, set mandatory X87 bit. */ 1695 cpudata->gxcr0 = state->crs[NVMM_X64_CR_XCR0]; 1696 cpudata->gxcr0 &= svm_xcr0_mask; 1697 cpudata->gxcr0 |= XCR0_X87; 1698 } 1699 } 1700 1701 CTASSERT(sizeof(cpudata->drs) == sizeof(state->drs)); 1702 if (flags & NVMM_X64_STATE_DRS) { 1703 memcpy(cpudata->drs, state->drs, sizeof(state->drs)); 1704 1705 vmcb->state.dr6 = state->drs[NVMM_X64_DR_DR6]; 1706 vmcb->state.dr7 = state->drs[NVMM_X64_DR_DR7]; 1707 } 1708 1709 if (flags & NVMM_X64_STATE_MSRS) { 1710 /* 1711 * EFER_SVME is mandatory. 1712 */ 1713 vmcb->state.efer = state->msrs[NVMM_X64_MSR_EFER] | EFER_SVME; 1714 vmcb->state.star = state->msrs[NVMM_X64_MSR_STAR]; 1715 vmcb->state.lstar = state->msrs[NVMM_X64_MSR_LSTAR]; 1716 vmcb->state.cstar = state->msrs[NVMM_X64_MSR_CSTAR]; 1717 vmcb->state.sfmask = state->msrs[NVMM_X64_MSR_SFMASK]; 1718 vmcb->state.kernelgsbase = 1719 state->msrs[NVMM_X64_MSR_KERNELGSBASE]; 1720 vmcb->state.sysenter_cs = 1721 state->msrs[NVMM_X64_MSR_SYSENTER_CS]; 1722 vmcb->state.sysenter_esp = 1723 state->msrs[NVMM_X64_MSR_SYSENTER_ESP]; 1724 vmcb->state.sysenter_eip = 1725 state->msrs[NVMM_X64_MSR_SYSENTER_EIP]; 1726 vmcb->state.g_pat = state->msrs[NVMM_X64_MSR_PAT]; 1727 1728 cpudata->gtsc = state->msrs[NVMM_X64_MSR_TSC]; 1729 cpudata->gtsc_want_update = true; 1730 } 1731 1732 if (flags & NVMM_X64_STATE_INTR) { 1733 if (state->intr.int_shadow) { 1734 vmcb->ctrl.intr |= VMCB_CTRL_INTR_SHADOW; 1735 } else { 1736 vmcb->ctrl.intr &= ~VMCB_CTRL_INTR_SHADOW; 1737 } 1738 1739 if (state->intr.int_window_exiting) { 1740 svm_event_waitexit_enable(vcpu, false); 1741 } else { 1742 svm_event_waitexit_disable(vcpu, false); 1743 } 1744 1745 if (state->intr.nmi_window_exiting) { 1746 svm_event_waitexit_enable(vcpu, true); 1747 } else { 1748 svm_event_waitexit_disable(vcpu, true); 1749 } 1750 } 1751 1752 CTASSERT(sizeof(cpudata->gfpu.xsh_fxsave) == sizeof(state->fpu)); 1753 if (flags & NVMM_X64_STATE_FPU) { 1754 memcpy(cpudata->gfpu.xsh_fxsave, &state->fpu, 1755 sizeof(state->fpu)); 1756 1757 fpustate = (struct fxsave *)cpudata->gfpu.xsh_fxsave; 1758 fpustate->fx_mxcsr_mask &= x86_fpu_mxcsr_mask; 1759 fpustate->fx_mxcsr &= fpustate->fx_mxcsr_mask; 1760 1761 if (svm_xcr0_mask != 0) { 1762 /* Reset XSTATE_BV, to force a reload. */ 1763 cpudata->gfpu.xsh_xstate_bv = svm_xcr0_mask; 1764 } 1765 } 1766 1767 svm_vmcb_cache_update(vmcb, flags); 1768 1769 comm->state_wanted = 0; 1770 comm->state_cached |= flags; 1771 } 1772 1773 static void 1774 svm_vcpu_getstate(struct nvmm_cpu *vcpu) 1775 { 1776 struct nvmm_comm_page *comm = vcpu->comm; 1777 struct nvmm_x64_state *state = &comm->state; 1778 struct svm_cpudata *cpudata = vcpu->cpudata; 1779 struct vmcb *vmcb = cpudata->vmcb; 1780 uint64_t flags; 1781 1782 flags = comm->state_wanted; 1783 1784 if (flags & NVMM_X64_STATE_SEGS) { 1785 svm_vcpu_getstate_seg(&state->segs[NVMM_X64_SEG_CS], 1786 &vmcb->state.cs); 1787 svm_vcpu_getstate_seg(&state->segs[NVMM_X64_SEG_DS], 1788 &vmcb->state.ds); 1789 svm_vcpu_getstate_seg(&state->segs[NVMM_X64_SEG_ES], 1790 &vmcb->state.es); 1791 svm_vcpu_getstate_seg(&state->segs[NVMM_X64_SEG_FS], 1792 &vmcb->state.fs); 1793 svm_vcpu_getstate_seg(&state->segs[NVMM_X64_SEG_GS], 1794 &vmcb->state.gs); 1795 svm_vcpu_getstate_seg(&state->segs[NVMM_X64_SEG_SS], 1796 &vmcb->state.ss); 1797 svm_vcpu_getstate_seg(&state->segs[NVMM_X64_SEG_GDT], 1798 &vmcb->state.gdt); 1799 svm_vcpu_getstate_seg(&state->segs[NVMM_X64_SEG_IDT], 1800 &vmcb->state.idt); 1801 svm_vcpu_getstate_seg(&state->segs[NVMM_X64_SEG_LDT], 1802 &vmcb->state.ldt); 1803 svm_vcpu_getstate_seg(&state->segs[NVMM_X64_SEG_TR], 1804 &vmcb->state.tr); 1805 1806 state->segs[NVMM_X64_SEG_SS].attrib.dpl = vmcb->state.cpl; 1807 } 1808 1809 CTASSERT(sizeof(cpudata->gprs) == sizeof(state->gprs)); 1810 if (flags & NVMM_X64_STATE_GPRS) { 1811 memcpy(state->gprs, cpudata->gprs, sizeof(state->gprs)); 1812 1813 state->gprs[NVMM_X64_GPR_RIP] = vmcb->state.rip; 1814 state->gprs[NVMM_X64_GPR_RSP] = vmcb->state.rsp; 1815 state->gprs[NVMM_X64_GPR_RAX] = vmcb->state.rax; 1816 state->gprs[NVMM_X64_GPR_RFLAGS] = vmcb->state.rflags; 1817 } 1818 1819 if (flags & NVMM_X64_STATE_CRS) { 1820 state->crs[NVMM_X64_CR_CR0] = vmcb->state.cr0; 1821 state->crs[NVMM_X64_CR_CR2] = vmcb->state.cr2; 1822 state->crs[NVMM_X64_CR_CR3] = vmcb->state.cr3; 1823 state->crs[NVMM_X64_CR_CR4] = vmcb->state.cr4; 1824 state->crs[NVMM_X64_CR_CR8] = __SHIFTOUT(vmcb->ctrl.v, 1825 VMCB_CTRL_V_TPR); 1826 state->crs[NVMM_X64_CR_XCR0] = cpudata->gxcr0; 1827 } 1828 1829 CTASSERT(sizeof(cpudata->drs) == sizeof(state->drs)); 1830 if (flags & NVMM_X64_STATE_DRS) { 1831 memcpy(state->drs, cpudata->drs, sizeof(state->drs)); 1832 1833 state->drs[NVMM_X64_DR_DR6] = vmcb->state.dr6; 1834 state->drs[NVMM_X64_DR_DR7] = vmcb->state.dr7; 1835 } 1836 1837 if (flags & NVMM_X64_STATE_MSRS) { 1838 state->msrs[NVMM_X64_MSR_EFER] = vmcb->state.efer; 1839 state->msrs[NVMM_X64_MSR_STAR] = vmcb->state.star; 1840 state->msrs[NVMM_X64_MSR_LSTAR] = vmcb->state.lstar; 1841 state->msrs[NVMM_X64_MSR_CSTAR] = vmcb->state.cstar; 1842 state->msrs[NVMM_X64_MSR_SFMASK] = vmcb->state.sfmask; 1843 state->msrs[NVMM_X64_MSR_KERNELGSBASE] = 1844 vmcb->state.kernelgsbase; 1845 state->msrs[NVMM_X64_MSR_SYSENTER_CS] = 1846 vmcb->state.sysenter_cs; 1847 state->msrs[NVMM_X64_MSR_SYSENTER_ESP] = 1848 vmcb->state.sysenter_esp; 1849 state->msrs[NVMM_X64_MSR_SYSENTER_EIP] = 1850 vmcb->state.sysenter_eip; 1851 state->msrs[NVMM_X64_MSR_PAT] = vmcb->state.g_pat; 1852 state->msrs[NVMM_X64_MSR_TSC] = cpudata->gtsc; 1853 1854 /* Hide SVME. */ 1855 state->msrs[NVMM_X64_MSR_EFER] &= ~EFER_SVME; 1856 } 1857 1858 if (flags & NVMM_X64_STATE_INTR) { 1859 state->intr.int_shadow = 1860 (vmcb->ctrl.intr & VMCB_CTRL_INTR_SHADOW) != 0; 1861 state->intr.int_window_exiting = cpudata->int_window_exit; 1862 state->intr.nmi_window_exiting = cpudata->nmi_window_exit; 1863 state->intr.evt_pending = cpudata->evt_pending; 1864 } 1865 1866 CTASSERT(sizeof(cpudata->gfpu.xsh_fxsave) == sizeof(state->fpu)); 1867 if (flags & NVMM_X64_STATE_FPU) { 1868 memcpy(&state->fpu, cpudata->gfpu.xsh_fxsave, 1869 sizeof(state->fpu)); 1870 } 1871 1872 comm->state_wanted = 0; 1873 comm->state_cached |= flags; 1874 } 1875 1876 static void 1877 svm_vcpu_state_provide(struct nvmm_cpu *vcpu, uint64_t flags) 1878 { 1879 vcpu->comm->state_wanted = flags; 1880 svm_vcpu_getstate(vcpu); 1881 } 1882 1883 static void 1884 svm_vcpu_state_commit(struct nvmm_cpu *vcpu) 1885 { 1886 vcpu->comm->state_wanted = vcpu->comm->state_commit; 1887 vcpu->comm->state_commit = 0; 1888 svm_vcpu_setstate(vcpu); 1889 } 1890 1891 /* -------------------------------------------------------------------------- */ 1892 1893 static void 1894 svm_asid_alloc(struct nvmm_cpu *vcpu) 1895 { 1896 struct svm_cpudata *cpudata = vcpu->cpudata; 1897 struct vmcb *vmcb = cpudata->vmcb; 1898 size_t i, oct, bit; 1899 1900 mutex_enter(&svm_asidlock); 1901 1902 for (i = 0; i < svm_maxasid; i++) { 1903 oct = i / 8; 1904 bit = i % 8; 1905 1906 if (svm_asidmap[oct] & __BIT(bit)) { 1907 continue; 1908 } 1909 1910 svm_asidmap[oct] |= __BIT(bit); 1911 vmcb->ctrl.guest_asid = i; 1912 mutex_exit(&svm_asidlock); 1913 return; 1914 } 1915 1916 /* 1917 * No free ASID. Use the last one, which is shared and requires 1918 * special TLB handling. 1919 */ 1920 cpudata->shared_asid = true; 1921 vmcb->ctrl.guest_asid = svm_maxasid - 1; 1922 mutex_exit(&svm_asidlock); 1923 } 1924 1925 static void 1926 svm_asid_free(struct nvmm_cpu *vcpu) 1927 { 1928 struct svm_cpudata *cpudata = vcpu->cpudata; 1929 struct vmcb *vmcb = cpudata->vmcb; 1930 size_t oct, bit; 1931 1932 if (cpudata->shared_asid) { 1933 return; 1934 } 1935 1936 oct = vmcb->ctrl.guest_asid / 8; 1937 bit = vmcb->ctrl.guest_asid % 8; 1938 1939 mutex_enter(&svm_asidlock); 1940 svm_asidmap[oct] &= ~__BIT(bit); 1941 mutex_exit(&svm_asidlock); 1942 } 1943 1944 static void 1945 svm_vcpu_init(struct nvmm_machine *mach, struct nvmm_cpu *vcpu) 1946 { 1947 struct svm_cpudata *cpudata = vcpu->cpudata; 1948 struct vmcb *vmcb = cpudata->vmcb; 1949 1950 /* Allow reads/writes of Control Registers. */ 1951 vmcb->ctrl.intercept_cr = 0; 1952 1953 /* Allow reads/writes of Debug Registers. */ 1954 vmcb->ctrl.intercept_dr = 0; 1955 1956 /* Allow exceptions 0 to 31. */ 1957 vmcb->ctrl.intercept_vec = 0; 1958 1959 /* 1960 * Allow: 1961 * - SMI [smm interrupts] 1962 * - VINTR [virtual interrupts] 1963 * - CR0_SPEC [CR0 writes changing other fields than CR0.TS or CR0.MP] 1964 * - RIDTR [reads of IDTR] 1965 * - RGDTR [reads of GDTR] 1966 * - RLDTR [reads of LDTR] 1967 * - RTR [reads of TR] 1968 * - WIDTR [writes of IDTR] 1969 * - WGDTR [writes of GDTR] 1970 * - WLDTR [writes of LDTR] 1971 * - WTR [writes of TR] 1972 * - RDTSC [rdtsc instruction] 1973 * - PUSHF [pushf instruction] 1974 * - POPF [popf instruction] 1975 * - IRET [iret instruction] 1976 * - INTN [int $n instructions] 1977 * - INVD [invd instruction] 1978 * - PAUSE [pause instruction] 1979 * - INVLPG [invplg instruction] 1980 * - TASKSW [task switches] 1981 * 1982 * Intercept the rest below. 1983 */ 1984 vmcb->ctrl.intercept_misc1 = 1985 VMCB_CTRL_INTERCEPT_INTR | 1986 VMCB_CTRL_INTERCEPT_NMI | 1987 VMCB_CTRL_INTERCEPT_INIT | 1988 VMCB_CTRL_INTERCEPT_RDPMC | 1989 VMCB_CTRL_INTERCEPT_CPUID | 1990 VMCB_CTRL_INTERCEPT_RSM | 1991 VMCB_CTRL_INTERCEPT_HLT | 1992 VMCB_CTRL_INTERCEPT_INVLPGA | 1993 VMCB_CTRL_INTERCEPT_IOIO_PROT | 1994 VMCB_CTRL_INTERCEPT_MSR_PROT | 1995 VMCB_CTRL_INTERCEPT_FERR_FREEZE | 1996 VMCB_CTRL_INTERCEPT_SHUTDOWN; 1997 1998 /* 1999 * Allow: 2000 * - ICEBP [icebp instruction] 2001 * - WBINVD [wbinvd instruction] 2002 * - WCR_SPEC(0..15) [writes of CR0-15, received after instruction] 2003 * 2004 * Intercept the rest below. 2005 */ 2006 vmcb->ctrl.intercept_misc2 = 2007 VMCB_CTRL_INTERCEPT_VMRUN | 2008 VMCB_CTRL_INTERCEPT_VMMCALL | 2009 VMCB_CTRL_INTERCEPT_VMLOAD | 2010 VMCB_CTRL_INTERCEPT_VMSAVE | 2011 VMCB_CTRL_INTERCEPT_STGI | 2012 VMCB_CTRL_INTERCEPT_CLGI | 2013 VMCB_CTRL_INTERCEPT_SKINIT | 2014 VMCB_CTRL_INTERCEPT_RDTSCP | 2015 VMCB_CTRL_INTERCEPT_MONITOR | 2016 VMCB_CTRL_INTERCEPT_MWAIT | 2017 VMCB_CTRL_INTERCEPT_XSETBV; 2018 2019 /* Intercept all I/O accesses. */ 2020 memset(cpudata->iobm, 0xFF, IOBM_SIZE); 2021 vmcb->ctrl.iopm_base_pa = cpudata->iobm_pa; 2022 2023 /* Allow direct access to certain MSRs. */ 2024 memset(cpudata->msrbm, 0xFF, MSRBM_SIZE); 2025 svm_vcpu_msr_allow(cpudata->msrbm, MSR_EFER, true, false); 2026 svm_vcpu_msr_allow(cpudata->msrbm, MSR_STAR, true, true); 2027 svm_vcpu_msr_allow(cpudata->msrbm, MSR_LSTAR, true, true); 2028 svm_vcpu_msr_allow(cpudata->msrbm, MSR_CSTAR, true, true); 2029 svm_vcpu_msr_allow(cpudata->msrbm, MSR_SFMASK, true, true); 2030 svm_vcpu_msr_allow(cpudata->msrbm, MSR_KERNELGSBASE, true, true); 2031 svm_vcpu_msr_allow(cpudata->msrbm, MSR_SYSENTER_CS, true, true); 2032 svm_vcpu_msr_allow(cpudata->msrbm, MSR_SYSENTER_ESP, true, true); 2033 svm_vcpu_msr_allow(cpudata->msrbm, MSR_SYSENTER_EIP, true, true); 2034 svm_vcpu_msr_allow(cpudata->msrbm, MSR_FSBASE, true, true); 2035 svm_vcpu_msr_allow(cpudata->msrbm, MSR_GSBASE, true, true); 2036 svm_vcpu_msr_allow(cpudata->msrbm, MSR_CR_PAT, true, true); 2037 svm_vcpu_msr_allow(cpudata->msrbm, MSR_TSC, true, false); 2038 vmcb->ctrl.msrpm_base_pa = cpudata->msrbm_pa; 2039 2040 /* Generate ASID. */ 2041 svm_asid_alloc(vcpu); 2042 2043 /* Virtual TPR. */ 2044 vmcb->ctrl.v = VMCB_CTRL_V_INTR_MASKING; 2045 2046 /* Enable Nested Paging. */ 2047 vmcb->ctrl.enable1 = VMCB_CTRL_ENABLE_NP; 2048 vmcb->ctrl.n_cr3 = mach->vm->vm_map.pmap->pm_pdirpa[0]; 2049 2050 /* Init XSAVE header. */ 2051 cpudata->gfpu.xsh_xstate_bv = svm_xcr0_mask; 2052 cpudata->gfpu.xsh_xcomp_bv = 0; 2053 2054 /* These MSRs are static. */ 2055 cpudata->star = rdmsr(MSR_STAR); 2056 cpudata->lstar = rdmsr(MSR_LSTAR); 2057 cpudata->cstar = rdmsr(MSR_CSTAR); 2058 cpudata->sfmask = rdmsr(MSR_SFMASK); 2059 2060 /* Install the RESET state. */ 2061 memcpy(&vcpu->comm->state, &nvmm_x86_reset_state, 2062 sizeof(nvmm_x86_reset_state)); 2063 vcpu->comm->state_wanted = NVMM_X64_STATE_ALL; 2064 vcpu->comm->state_cached = 0; 2065 svm_vcpu_setstate(vcpu); 2066 } 2067 2068 static int 2069 svm_vcpu_create(struct nvmm_machine *mach, struct nvmm_cpu *vcpu) 2070 { 2071 struct svm_cpudata *cpudata; 2072 int error; 2073 2074 /* Allocate the SVM cpudata. */ 2075 cpudata = (struct svm_cpudata *)uvm_km_alloc(kernel_map, 2076 roundup(sizeof(*cpudata), PAGE_SIZE), 0, 2077 UVM_KMF_WIRED|UVM_KMF_ZERO); 2078 vcpu->cpudata = cpudata; 2079 2080 /* VMCB */ 2081 error = svm_memalloc(&cpudata->vmcb_pa, (vaddr_t *)&cpudata->vmcb, 2082 VMCB_NPAGES); 2083 if (error) 2084 goto error; 2085 2086 /* I/O Bitmap */ 2087 error = svm_memalloc(&cpudata->iobm_pa, (vaddr_t *)&cpudata->iobm, 2088 IOBM_NPAGES); 2089 if (error) 2090 goto error; 2091 2092 /* MSR Bitmap */ 2093 error = svm_memalloc(&cpudata->msrbm_pa, (vaddr_t *)&cpudata->msrbm, 2094 MSRBM_NPAGES); 2095 if (error) 2096 goto error; 2097 2098 /* Init the VCPU info. */ 2099 svm_vcpu_init(mach, vcpu); 2100 2101 return 0; 2102 2103 error: 2104 if (cpudata->vmcb_pa) { 2105 svm_memfree(cpudata->vmcb_pa, (vaddr_t)cpudata->vmcb, 2106 VMCB_NPAGES); 2107 } 2108 if (cpudata->iobm_pa) { 2109 svm_memfree(cpudata->iobm_pa, (vaddr_t)cpudata->iobm, 2110 IOBM_NPAGES); 2111 } 2112 if (cpudata->msrbm_pa) { 2113 svm_memfree(cpudata->msrbm_pa, (vaddr_t)cpudata->msrbm, 2114 MSRBM_NPAGES); 2115 } 2116 uvm_km_free(kernel_map, (vaddr_t)cpudata, 2117 roundup(sizeof(*cpudata), PAGE_SIZE), UVM_KMF_WIRED); 2118 return error; 2119 } 2120 2121 static void 2122 svm_vcpu_destroy(struct nvmm_machine *mach, struct nvmm_cpu *vcpu) 2123 { 2124 struct svm_cpudata *cpudata = vcpu->cpudata; 2125 2126 svm_asid_free(vcpu); 2127 2128 svm_memfree(cpudata->vmcb_pa, (vaddr_t)cpudata->vmcb, VMCB_NPAGES); 2129 svm_memfree(cpudata->iobm_pa, (vaddr_t)cpudata->iobm, IOBM_NPAGES); 2130 svm_memfree(cpudata->msrbm_pa, (vaddr_t)cpudata->msrbm, MSRBM_NPAGES); 2131 2132 uvm_km_free(kernel_map, (vaddr_t)cpudata, 2133 roundup(sizeof(*cpudata), PAGE_SIZE), UVM_KMF_WIRED); 2134 } 2135 2136 /* -------------------------------------------------------------------------- */ 2137 2138 static int 2139 svm_vcpu_configure_cpuid(struct svm_cpudata *cpudata, void *data) 2140 { 2141 struct nvmm_vcpu_conf_cpuid *cpuid = data; 2142 size_t i; 2143 2144 if (__predict_false(cpuid->mask && cpuid->exit)) { 2145 return EINVAL; 2146 } 2147 if (__predict_false(cpuid->mask && 2148 ((cpuid->u.mask.set.eax & cpuid->u.mask.del.eax) || 2149 (cpuid->u.mask.set.ebx & cpuid->u.mask.del.ebx) || 2150 (cpuid->u.mask.set.ecx & cpuid->u.mask.del.ecx) || 2151 (cpuid->u.mask.set.edx & cpuid->u.mask.del.edx)))) { 2152 return EINVAL; 2153 } 2154 2155 /* If unset, delete, to restore the default behavior. */ 2156 if (!cpuid->mask && !cpuid->exit) { 2157 for (i = 0; i < SVM_NCPUIDS; i++) { 2158 if (!cpudata->cpuidpresent[i]) { 2159 continue; 2160 } 2161 if (cpudata->cpuid[i].leaf == cpuid->leaf) { 2162 cpudata->cpuidpresent[i] = false; 2163 } 2164 } 2165 return 0; 2166 } 2167 2168 /* If already here, replace. */ 2169 for (i = 0; i < SVM_NCPUIDS; i++) { 2170 if (!cpudata->cpuidpresent[i]) { 2171 continue; 2172 } 2173 if (cpudata->cpuid[i].leaf == cpuid->leaf) { 2174 memcpy(&cpudata->cpuid[i], cpuid, 2175 sizeof(struct nvmm_vcpu_conf_cpuid)); 2176 return 0; 2177 } 2178 } 2179 2180 /* Not here, insert. */ 2181 for (i = 0; i < SVM_NCPUIDS; i++) { 2182 if (!cpudata->cpuidpresent[i]) { 2183 cpudata->cpuidpresent[i] = true; 2184 memcpy(&cpudata->cpuid[i], cpuid, 2185 sizeof(struct nvmm_vcpu_conf_cpuid)); 2186 return 0; 2187 } 2188 } 2189 2190 return ENOBUFS; 2191 } 2192 2193 static int 2194 svm_vcpu_configure(struct nvmm_cpu *vcpu, uint64_t op, void *data) 2195 { 2196 struct svm_cpudata *cpudata = vcpu->cpudata; 2197 2198 switch (op) { 2199 case NVMM_VCPU_CONF_MD(NVMM_VCPU_CONF_CPUID): 2200 return svm_vcpu_configure_cpuid(cpudata, data); 2201 default: 2202 return EINVAL; 2203 } 2204 } 2205 2206 /* -------------------------------------------------------------------------- */ 2207 2208 static void 2209 svm_tlb_flush(struct pmap *pm) 2210 { 2211 struct nvmm_machine *mach = pm->pm_data; 2212 struct svm_machdata *machdata = mach->machdata; 2213 2214 atomic_inc_64(&machdata->mach_htlb_gen); 2215 2216 /* Generates IPIs, which cause #VMEXITs. */ 2217 pmap_tlb_shootdown(pmap_kernel(), -1, PTE_G, TLBSHOOT_UPDATE); 2218 } 2219 2220 static void 2221 svm_machine_create(struct nvmm_machine *mach) 2222 { 2223 struct svm_machdata *machdata; 2224 2225 /* Fill in pmap info. */ 2226 mach->vm->vm_map.pmap->pm_data = (void *)mach; 2227 mach->vm->vm_map.pmap->pm_tlb_flush = svm_tlb_flush; 2228 2229 machdata = kmem_zalloc(sizeof(struct svm_machdata), KM_SLEEP); 2230 mach->machdata = machdata; 2231 2232 /* Start with an hTLB flush everywhere. */ 2233 machdata->mach_htlb_gen = 1; 2234 } 2235 2236 static void 2237 svm_machine_destroy(struct nvmm_machine *mach) 2238 { 2239 kmem_free(mach->machdata, sizeof(struct svm_machdata)); 2240 } 2241 2242 static int 2243 svm_machine_configure(struct nvmm_machine *mach, uint64_t op, void *data) 2244 { 2245 panic("%s: impossible", __func__); 2246 } 2247 2248 /* -------------------------------------------------------------------------- */ 2249 2250 static bool 2251 svm_ident(void) 2252 { 2253 u_int descs[4]; 2254 uint64_t msr; 2255 2256 if (cpu_vendor != CPUVENDOR_AMD) { 2257 return false; 2258 } 2259 if (!(cpu_feature[3] & CPUID_SVM)) { 2260 return false; 2261 } 2262 2263 if (curcpu()->ci_max_ext_cpuid < 0x8000000a) { 2264 return false; 2265 } 2266 x86_cpuid(0x8000000a, descs); 2267 2268 /* Want Nested Paging. */ 2269 if (!(descs[3] & CPUID_AMD_SVM_NP)) { 2270 return false; 2271 } 2272 2273 /* Want nRIP. */ 2274 if (!(descs[3] & CPUID_AMD_SVM_NRIPS)) { 2275 return false; 2276 } 2277 2278 svm_decode_assist = (descs[3] & CPUID_AMD_SVM_DecodeAssist) != 0; 2279 2280 msr = rdmsr(MSR_VMCR); 2281 if ((msr & VMCR_SVMED) && (msr & VMCR_LOCK)) { 2282 return false; 2283 } 2284 2285 return true; 2286 } 2287 2288 static void 2289 svm_init_asid(uint32_t maxasid) 2290 { 2291 size_t i, j, allocsz; 2292 2293 mutex_init(&svm_asidlock, MUTEX_DEFAULT, IPL_NONE); 2294 2295 /* Arbitrarily limit. */ 2296 maxasid = uimin(maxasid, 8192); 2297 2298 svm_maxasid = maxasid; 2299 allocsz = roundup(maxasid, 8) / 8; 2300 svm_asidmap = kmem_zalloc(allocsz, KM_SLEEP); 2301 2302 /* ASID 0 is reserved for the host. */ 2303 svm_asidmap[0] |= __BIT(0); 2304 2305 /* ASID n-1 is special, we share it. */ 2306 i = (maxasid - 1) / 8; 2307 j = (maxasid - 1) % 8; 2308 svm_asidmap[i] |= __BIT(j); 2309 } 2310 2311 static void 2312 svm_change_cpu(void *arg1, void *arg2) 2313 { 2314 bool enable = arg1 != NULL; 2315 uint64_t msr; 2316 2317 msr = rdmsr(MSR_VMCR); 2318 if (msr & VMCR_SVMED) { 2319 wrmsr(MSR_VMCR, msr & ~VMCR_SVMED); 2320 } 2321 2322 if (!enable) { 2323 wrmsr(MSR_VM_HSAVE_PA, 0); 2324 } 2325 2326 msr = rdmsr(MSR_EFER); 2327 if (enable) { 2328 msr |= EFER_SVME; 2329 } else { 2330 msr &= ~EFER_SVME; 2331 } 2332 wrmsr(MSR_EFER, msr); 2333 2334 if (enable) { 2335 wrmsr(MSR_VM_HSAVE_PA, hsave[cpu_index(curcpu())].pa); 2336 } 2337 } 2338 2339 static void 2340 svm_init(void) 2341 { 2342 CPU_INFO_ITERATOR cii; 2343 struct cpu_info *ci; 2344 struct vm_page *pg; 2345 u_int descs[4]; 2346 uint64_t xc; 2347 2348 x86_cpuid(0x8000000a, descs); 2349 2350 /* The guest TLB flush command. */ 2351 if (descs[3] & CPUID_AMD_SVM_FlushByASID) { 2352 svm_ctrl_tlb_flush = VMCB_CTRL_TLB_CTRL_FLUSH_GUEST; 2353 } else { 2354 svm_ctrl_tlb_flush = VMCB_CTRL_TLB_CTRL_FLUSH_ALL; 2355 } 2356 2357 /* Init the ASID. */ 2358 svm_init_asid(descs[1]); 2359 2360 /* Init the XCR0 mask. */ 2361 svm_xcr0_mask = SVM_XCR0_MASK_DEFAULT & x86_xsave_features; 2362 2363 memset(hsave, 0, sizeof(hsave)); 2364 for (CPU_INFO_FOREACH(cii, ci)) { 2365 pg = uvm_pagealloc(NULL, 0, NULL, UVM_PGA_ZERO); 2366 hsave[cpu_index(ci)].pa = VM_PAGE_TO_PHYS(pg); 2367 } 2368 2369 xc = xc_broadcast(0, svm_change_cpu, (void *)true, NULL); 2370 xc_wait(xc); 2371 } 2372 2373 static void 2374 svm_fini_asid(void) 2375 { 2376 size_t allocsz; 2377 2378 allocsz = roundup(svm_maxasid, 8) / 8; 2379 kmem_free(svm_asidmap, allocsz); 2380 2381 mutex_destroy(&svm_asidlock); 2382 } 2383 2384 static void 2385 svm_fini(void) 2386 { 2387 uint64_t xc; 2388 size_t i; 2389 2390 xc = xc_broadcast(0, svm_change_cpu, (void *)false, NULL); 2391 xc_wait(xc); 2392 2393 for (i = 0; i < MAXCPUS; i++) { 2394 if (hsave[i].pa != 0) 2395 uvm_pagefree(PHYS_TO_VM_PAGE(hsave[i].pa)); 2396 } 2397 2398 svm_fini_asid(); 2399 } 2400 2401 static void 2402 svm_capability(struct nvmm_capability *cap) 2403 { 2404 cap->arch.mach_conf_support = 0; 2405 cap->arch.vcpu_conf_support = 2406 NVMM_CAP_ARCH_VCPU_CONF_CPUID; 2407 cap->arch.xcr0_mask = svm_xcr0_mask; 2408 cap->arch.mxcsr_mask = x86_fpu_mxcsr_mask; 2409 cap->arch.conf_cpuid_maxops = SVM_NCPUIDS; 2410 } 2411 2412 const struct nvmm_impl nvmm_x86_svm = { 2413 .ident = svm_ident, 2414 .init = svm_init, 2415 .fini = svm_fini, 2416 .capability = svm_capability, 2417 .mach_conf_max = NVMM_X86_MACH_NCONF, 2418 .mach_conf_sizes = NULL, 2419 .vcpu_conf_max = NVMM_X86_VCPU_NCONF, 2420 .vcpu_conf_sizes = svm_vcpu_conf_sizes, 2421 .state_size = sizeof(struct nvmm_x64_state), 2422 .machine_create = svm_machine_create, 2423 .machine_destroy = svm_machine_destroy, 2424 .machine_configure = svm_machine_configure, 2425 .vcpu_create = svm_vcpu_create, 2426 .vcpu_destroy = svm_vcpu_destroy, 2427 .vcpu_configure = svm_vcpu_configure, 2428 .vcpu_setstate = svm_vcpu_setstate, 2429 .vcpu_getstate = svm_vcpu_getstate, 2430 .vcpu_inject = svm_vcpu_inject, 2431 .vcpu_run = svm_vcpu_run 2432 }; 2433