1/* $NetBSD: cpufunc.S,v 1.68 2024/07/16 22:44:38 riastradh Exp $ */ 2 3/* 4 * Copyright (c) 1998, 2007, 2008, 2020, 2023 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Charles M. Hannum, and by Andrew Doran. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32#include <sys/errno.h> 33 34#include <machine/asm.h> 35#include <machine/frameasm.h> 36#include <machine/specialreg.h> 37#include <machine/segments.h> 38 39#include "opt_dtrace.h" 40#include "opt_xen.h" 41#include "opt_svs.h" 42 43#include "assym.h" 44 45/* Small and slow, so align less. */ 46#undef _ALIGN_TEXT 47#define _ALIGN_TEXT .align 8 48 49ENTRY(x86_lfence) 50 lfence 51 ret 52END(x86_lfence) 53 54ENTRY(x86_sfence) 55 sfence 56 ret 57END(x86_sfence) 58 59ENTRY(x86_mfence) 60 mfence 61 ret 62END(x86_mfence) 63 64#ifdef XEN 65ENTRY(xen_mb) 66 /* 67 * Store-before-load ordering with respect to matching logic 68 * on the hypervisor side. 69 * 70 * This is the same as membar_sync, but without hotpatching 71 * away the LOCK prefix on uniprocessor boots -- because under 72 * Xen, we still have to coordinate with a `device' backed by a 73 * hypervisor that is potentially on another physical CPU even 74 * if we observe only one virtual CPU as the guest. 75 * 76 * See common/lib/libc/arch/x86_64/atomic/atomic.S for 77 * rationale and keep this in sync with the implementation 78 * of membar_sync there. 79 */ 80 lock 81 addq $0,-8(%rsp) 82 ret 83END(xen_mb) 84#endif /* XEN */ 85 86#ifndef XENPV 87ENTRY(invlpg) 88#ifdef SVS 89 movb _C_LABEL(svs_pcid),%al 90 testb %al,%al 91 jz 1f 92 pushq %rdi 93 pushq $PMAP_PCID_USER 94 movq $INVPCID_ADDRESS,%rax 95 invpcid (%rsp),%rax 96 addq $16,%rsp 971: /* FALLTHROUGH */ 98#endif 99 invlpg (%rdi) 100 ret 101END(invlpg) 102 103ENTRY(lgdt) 104 /* Reload the descriptor table. */ 105 movq %rdi,%rax 106 lgdt (%rax) 107 /* Flush the prefetch queue. */ 108 jmp 1f 109 nop 1101: jmp _C_LABEL(lgdt_finish) 111END(lgdt) 112 113ENTRY(lidt) 114 lidt (%rdi) 115 ret 116END(lidt) 117 118ENTRY(lldt) 119 cmpl %edi, CPUVAR(CURLDT) 120 jne 1f 121 ret 1221: 123 movl %edi, CPUVAR(CURLDT) 124 lldt %di 125 ret 126END(lldt) 127 128ENTRY(ltr) 129 ltr %di 130 ret 131END(ltr) 132 133ENTRY(tlbflushg) 134 movq %cr4, %rax 135 testq $CR4_PGE, %rax 136 jz tlbflush 137 movq %rax, %rdx 138 andq $~CR4_PGE, %rdx 139 movq %rdx, %cr4 140 movq %rax, %cr4 141 ret 142END(tlbflushg) 143 144ENTRY(tlbflush) 145#ifdef SVS 146 movb _C_LABEL(svs_pcid),%al 147 testb %al,%al 148 jz 1f 149 xorq %rax,%rax 150 pushq %rax 151 pushq %rax 152 movq $INVPCID_ALL_NONGLOBAL,%rax 153 invpcid (%rsp),%rax 154 addq $16,%rsp 155 ret 156#endif 1571: movq %cr3, %rax 158 movq %rax, %cr3 159 ret 160END(tlbflush) 161 162ENTRY(wbinvd) 163 wbinvd 164 ret 165END(wbinvd) 166 167ENTRY(setusergs) 168 CLI(ax) 169 swapgs 170 movw %di, %gs 171 swapgs 172 STI(ax) 173 ret 174END(setusergs) 175 176ENTRY(x86_read_flags) 177 pushfq 178 popq %rax 179 KMSAN_INIT_RET(8) 180 ret 181END(x86_read_flags) 182 183STRONG_ALIAS(x86_read_psl,x86_read_flags) 184 185ENTRY(x86_write_flags) 186 pushq %rdi 187 popfq 188 ret 189END(x86_write_flags) 190 191STRONG_ALIAS(x86_write_psl,x86_write_flags) 192 193ENTRY(smap_enable) 194 SMAP_ENABLE 195 ret 196END(smap_enable) 197 198ENTRY(smap_disable) 199 SMAP_DISABLE 200 ret 201END(smap_disable) 202 203#ifdef KDTRACE_HOOKS 204ENTRY(dtrace_smap_enable) 205 SMAP_ENABLE 206 ret 207END(dtrace_smap_enable) 208 209ENTRY(dtrace_smap_disable) 210 SMAP_DISABLE 211 ret 212END(dtrace_smap_disable) 213#endif 214 215/* 216 * %rdi = name 217 * %rsi = sel 218 */ 219ENTRY(x86_hotpatch) 220 /* save RFLAGS, and disable intrs */ 221 pushfq 222 cli 223 224 /* save CR0, and disable WP */ 225 movq %cr0,%rcx 226 pushq %rcx 227 andq $~CR0_WP,%rcx 228 movq %rcx,%cr0 229 230 callq _C_LABEL(x86_hotpatch_apply) 231 232 /* write back and invalidate cache */ 233 wbinvd 234 235 /* restore CR0 */ 236 popq %rcx 237 movq %rcx,%cr0 238 239 /* flush instruction pipeline */ 240 pushq %rax 241 callq x86_flush 242 popq %rax 243 244 /* clean up */ 245 movq %rax,%rdi 246 callq _C_LABEL(x86_hotpatch_cleanup) 247 248 /* restore RFLAGS */ 249 popfq 250 ret 251END(x86_hotpatch) 252#endif /* !XENPV */ 253 254/* 255 * cpu_counter and cpu_counter32 could be exact same, but KMSAN needs to have 256 * the correct size of the return value. 257 */ 258#define SERIALIZE_lfence lfence 259#define SERIALIZE_mfence mfence 260 261#define ADD_counter32 addl CPUVAR(CC_SKEW), %eax 262#define ADD_counter shlq $32, %rdx ;\ 263 orq %rdx, %rax ;\ 264 addq CPUVAR(CC_SKEW), %rax 265 266#define RSIZE_counter32 4 267#define RSIZE_counter 8 268 269#define CPU_COUNTER_FENCE(counter, fence) \ 270ENTRY(cpu_ ## counter ## _ ## fence) ;\ 271 movq CPUVAR(CURLWP), %rcx ;\ 272 leaq L_RU+RU_NIVCSW(%rcx), %rcx ;\ 2731: ;\ 274 movq (%rcx), %rdi ;\ 275 SERIALIZE_ ## fence ;\ 276 rdtsc ;\ 277 ADD_ ## counter ;\ 278 cmpq %rdi, (%rcx) ;\ 279 jne 2f ;\ 280 KMSAN_INIT_RET(RSIZE_ ## counter) ;\ 281 ret ;\ 2822: ;\ 283 jmp 1b ;\ 284END(cpu_ ## counter ## _ ## fence) 285 286CPU_COUNTER_FENCE(counter, lfence) 287CPU_COUNTER_FENCE(counter, mfence) 288CPU_COUNTER_FENCE(counter32, lfence) 289CPU_COUNTER_FENCE(counter32, mfence) 290 291#define CPU_COUNTER_CPUID(counter) \ 292ENTRY(cpu_ ## counter ## _cpuid) ;\ 293 movq %rbx, %r9 ;\ 294 movq CPUVAR(CURLWP), %r8 ;\ 295 leaq L_RU+RU_NIVCSW(%r8), %r8 ;\ 2961: ;\ 297 movq (%r8), %rdi ;\ 298 xor %eax, %eax ;\ 299 cpuid ;\ 300 rdtsc ;\ 301 ADD_ ## counter ;\ 302 cmpq %rdi, (%r8) ;\ 303 jne 2f ;\ 304 movq %r9, %rbx ;\ 305 KMSAN_INIT_RET(RSIZE_ ## counter) ;\ 306 ret ;\ 3072: ;\ 308 jmp 1b ;\ 309END(cpu_ ## counter ## _cpuid) 310 311CPU_COUNTER_CPUID(counter) 312CPU_COUNTER_CPUID(counter32) 313 314ENTRY(rdmsr_safe) 315 movq CPUVAR(CURLWP), %r8 316 movq L_PCB(%r8), %r8 317 movq $_C_LABEL(msr_onfault), PCB_ONFAULT(%r8) 318 319 movl %edi, %ecx 320 rdmsr 321 salq $32, %rdx 322 movl %eax, %eax /* zero-extend %eax -> %rax */ 323 orq %rdx, %rax 324 movq %rax, (%rsi) 325 326 xorq %rax, %rax 327 movq %rax, PCB_ONFAULT(%r8) 328#ifdef KMSAN 329 movq %rsi,%rdi 330 movq $8,%rsi 331 xorq %rdx,%rdx 332 callq _C_LABEL(kmsan_mark) 333#endif 334 KMSAN_INIT_RET(4) 335 ret 336END(rdmsr_safe) 337 338ENTRY(msr_onfault) 339 movq CPUVAR(CURLWP), %r8 340 movq L_PCB(%r8), %r8 341 movq $0, PCB_ONFAULT(%r8) 342 movl $EFAULT, %eax 343 ret 344END(msr_onfault) 345 346ENTRY(breakpoint) 347 pushq %rbp 348 movq %rsp, %rbp 349 int $0x03 /* paranoid, not 'int3' */ 350 leave 351 ret 352END(breakpoint) 353 354ENTRY(x86_curcpu) 355 movq %gs:(CPU_INFO_SELF), %rax 356 KMSAN_INIT_RET(8) 357 ret 358END(x86_curcpu) 359 360ENTRY(x86_curlwp) 361 movq %gs:(CPU_INFO_CURLWP), %rax 362 KMSAN_INIT_RET(8) 363 ret 364END(x86_curlwp) 365 366ENTRY(__byte_swap_u32_variable) 367 movl %edi, %eax 368 bswapl %eax 369 KMSAN_INIT_RET(4) 370 ret 371END(__byte_swap_u32_variable) 372 373ENTRY(__byte_swap_u16_variable) 374 movl %edi, %eax 375 xchgb %al, %ah 376 KMSAN_INIT_RET(2) 377 ret 378END(__byte_swap_u16_variable) 379 380/* 381 * Reload segments after a GDT change. 382 */ 383ENTRY(lgdt_finish) 384 movl $GSEL(GDATA_SEL, SEL_KPL),%eax 385 movl %eax,%ds 386 movl %eax,%es 387 movl %eax,%ss 388 jmp _C_LABEL(x86_flush) 389END(lgdt_finish) 390 391/* 392 * Flush instruction pipelines by doing an intersegment (far) return. 393 */ 394ENTRY(x86_flush) 395 popq %rax 396 pushq $GSEL(GCODE_SEL, SEL_KPL) 397 pushq %rax 398 lretq 399END(x86_flush) 400 401/* Waits - set up stack frame. */ 402ENTRY(x86_hlt) 403 pushq %rbp 404 movq %rsp, %rbp 405 hlt 406 leave 407 ret 408END(x86_hlt) 409 410/* Waits - set up stack frame. */ 411ENTRY(x86_stihlt) 412 pushq %rbp 413 movq %rsp, %rbp 414 sti 415 hlt 416 leave 417 ret 418END(x86_stihlt) 419 420ENTRY(x86_monitor) 421 movq %rdi, %rax 422 movq %rsi, %rcx 423 monitor %rax, %rcx, %rdx 424 ret 425END(x86_monitor) 426 427/* Waits - set up stack frame. */ 428ENTRY(x86_mwait) 429 pushq %rbp 430 movq %rsp, %rbp 431 movq %rdi, %rax 432 movq %rsi, %rcx 433 mwait %rax, %rcx 434 leave 435 ret 436END(x86_mwait) 437 438ENTRY(stts) 439 movq %cr0, %rax 440 orq $CR0_TS, %rax 441 movq %rax, %cr0 442 ret 443END(stts) 444 445ENTRY(fldummy) 446 ffree %st(7) 447 fldz 448 ret 449END(fldummy) 450 451ENTRY(inb) 452 movq %rdi, %rdx 453 xorq %rax, %rax 454 inb %dx, %al 455 KMSAN_INIT_RET(1) 456 ret 457END(inb) 458 459ENTRY(inw) 460 movq %rdi, %rdx 461 xorq %rax, %rax 462 inw %dx, %ax 463 KMSAN_INIT_RET(2) 464 ret 465END(inw) 466 467ENTRY(inl) 468 movq %rdi, %rdx 469 xorq %rax, %rax 470 inl %dx, %eax 471 KMSAN_INIT_RET(4) 472 ret 473END(inl) 474 475ENTRY(outb) 476 movq %rdi, %rdx 477 movq %rsi, %rax 478 outb %al, %dx 479 ret 480END(outb) 481 482ENTRY(outw) 483 movq %rdi, %rdx 484 movq %rsi, %rax 485 outw %ax, %dx 486 ret 487END(outw) 488 489ENTRY(outl) 490 movq %rdi, %rdx 491 movq %rsi, %rax 492 outl %eax, %dx 493 ret 494END(outl) 495 496/* 497 * Used by SVS only, to make an atomic but fast copy. Doesn't have 498 * sanitizer instrumentation, but sanitizers disable SVS, so no problem. 499 */ 500ENTRY(svs_quad_copy) 501 movq %rdx,%rcx 502 rep 503 movsq 504 ret 505END(svs_quad_copy) 506