xref: /netbsd-src/sys/arch/amd64/amd64/cpufunc.S (revision f183eaeed00232c8d8b44a8611d174d9ff1b475b)
1/*	$NetBSD: cpufunc.S,v 1.68 2024/07/16 22:44:38 riastradh Exp $	*/
2
3/*
4 * Copyright (c) 1998, 2007, 2008, 2020, 2023 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Charles M. Hannum, and by Andrew Doran.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32#include <sys/errno.h>
33
34#include <machine/asm.h>
35#include <machine/frameasm.h>
36#include <machine/specialreg.h>
37#include <machine/segments.h>
38
39#include "opt_dtrace.h"
40#include "opt_xen.h"
41#include "opt_svs.h"
42
43#include "assym.h"
44
45/* Small and slow, so align less. */
46#undef _ALIGN_TEXT
47#define	_ALIGN_TEXT	.align 8
48
49ENTRY(x86_lfence)
50	lfence
51	ret
52END(x86_lfence)
53
54ENTRY(x86_sfence)
55	sfence
56	ret
57END(x86_sfence)
58
59ENTRY(x86_mfence)
60	mfence
61	ret
62END(x86_mfence)
63
64#ifdef XEN
65ENTRY(xen_mb)
66	/*
67	 * Store-before-load ordering with respect to matching logic
68	 * on the hypervisor side.
69	 *
70	 * This is the same as membar_sync, but without hotpatching
71	 * away the LOCK prefix on uniprocessor boots -- because under
72	 * Xen, we still have to coordinate with a `device' backed by a
73	 * hypervisor that is potentially on another physical CPU even
74	 * if we observe only one virtual CPU as the guest.
75	 *
76	 * See common/lib/libc/arch/x86_64/atomic/atomic.S for
77	 * rationale and keep this in sync with the implementation
78	 * of membar_sync there.
79	 */
80	lock
81	addq	$0,-8(%rsp)
82	ret
83END(xen_mb)
84#endif	/* XEN */
85
86#ifndef XENPV
87ENTRY(invlpg)
88#ifdef SVS
89	movb	_C_LABEL(svs_pcid),%al
90	testb	%al,%al
91	jz	1f
92	pushq	%rdi
93	pushq	$PMAP_PCID_USER
94	movq	$INVPCID_ADDRESS,%rax
95	invpcid	(%rsp),%rax
96	addq	$16,%rsp
971:	/* FALLTHROUGH */
98#endif
99	invlpg	(%rdi)
100	ret
101END(invlpg)
102
103ENTRY(lgdt)
104	/* Reload the descriptor table. */
105	movq	%rdi,%rax
106	lgdt	(%rax)
107	/* Flush the prefetch queue. */
108	jmp	1f
109	nop
1101:	jmp	_C_LABEL(lgdt_finish)
111END(lgdt)
112
113ENTRY(lidt)
114	lidt	(%rdi)
115	ret
116END(lidt)
117
118ENTRY(lldt)
119	cmpl	%edi, CPUVAR(CURLDT)
120	jne	1f
121	ret
1221:
123	movl	%edi, CPUVAR(CURLDT)
124	lldt	%di
125	ret
126END(lldt)
127
128ENTRY(ltr)
129	ltr	%di
130	ret
131END(ltr)
132
133ENTRY(tlbflushg)
134	movq	%cr4, %rax
135	testq	$CR4_PGE, %rax
136	jz	tlbflush
137	movq	%rax, %rdx
138	andq	$~CR4_PGE, %rdx
139	movq	%rdx, %cr4
140	movq	%rax, %cr4
141	ret
142END(tlbflushg)
143
144ENTRY(tlbflush)
145#ifdef SVS
146	movb	_C_LABEL(svs_pcid),%al
147	testb	%al,%al
148	jz	1f
149	xorq	%rax,%rax
150	pushq	%rax
151	pushq	%rax
152	movq	$INVPCID_ALL_NONGLOBAL,%rax
153	invpcid	(%rsp),%rax
154	addq	$16,%rsp
155	ret
156#endif
1571:	movq	%cr3, %rax
158	movq	%rax, %cr3
159	ret
160END(tlbflush)
161
162ENTRY(wbinvd)
163	wbinvd
164	ret
165END(wbinvd)
166
167ENTRY(setusergs)
168	CLI(ax)
169	swapgs
170	movw	%di, %gs
171	swapgs
172	STI(ax)
173	ret
174END(setusergs)
175
176ENTRY(x86_read_flags)
177	pushfq
178	popq	%rax
179	KMSAN_INIT_RET(8)
180	ret
181END(x86_read_flags)
182
183STRONG_ALIAS(x86_read_psl,x86_read_flags)
184
185ENTRY(x86_write_flags)
186	pushq	%rdi
187	popfq
188	ret
189END(x86_write_flags)
190
191STRONG_ALIAS(x86_write_psl,x86_write_flags)
192
193ENTRY(smap_enable)
194	SMAP_ENABLE
195	ret
196END(smap_enable)
197
198ENTRY(smap_disable)
199	SMAP_DISABLE
200	ret
201END(smap_disable)
202
203#ifdef KDTRACE_HOOKS
204ENTRY(dtrace_smap_enable)
205	SMAP_ENABLE
206	ret
207END(dtrace_smap_enable)
208
209ENTRY(dtrace_smap_disable)
210	SMAP_DISABLE
211	ret
212END(dtrace_smap_disable)
213#endif
214
215/*
216 * %rdi = name
217 * %rsi = sel
218 */
219ENTRY(x86_hotpatch)
220	/* save RFLAGS, and disable intrs */
221	pushfq
222	cli
223
224	/* save CR0, and disable WP */
225	movq	%cr0,%rcx
226	pushq	%rcx
227	andq	$~CR0_WP,%rcx
228	movq	%rcx,%cr0
229
230	callq	_C_LABEL(x86_hotpatch_apply)
231
232	/* write back and invalidate cache */
233	wbinvd
234
235	/* restore CR0 */
236	popq	%rcx
237	movq	%rcx,%cr0
238
239	/* flush instruction pipeline */
240	pushq	%rax
241	callq	x86_flush
242	popq	%rax
243
244	/* clean up */
245	movq	%rax,%rdi
246	callq	_C_LABEL(x86_hotpatch_cleanup)
247
248	/* restore RFLAGS */
249	popfq
250	ret
251END(x86_hotpatch)
252#endif /* !XENPV */
253
254/*
255 * cpu_counter and cpu_counter32 could be exact same, but KMSAN needs to have
256 * the correct size of the return value.
257 */
258#define SERIALIZE_lfence	lfence
259#define SERIALIZE_mfence	mfence
260
261#define ADD_counter32	addl	CPUVAR(CC_SKEW), %eax
262#define ADD_counter	shlq	$32, %rdx		;\
263			orq	%rdx, %rax		;\
264			addq	CPUVAR(CC_SKEW), %rax
265
266#define RSIZE_counter32	4
267#define RSIZE_counter	8
268
269#define CPU_COUNTER_FENCE(counter, fence)	\
270ENTRY(cpu_ ## counter ## _ ## fence)		;\
271	movq	CPUVAR(CURLWP), %rcx		;\
272	leaq	L_RU+RU_NIVCSW(%rcx), %rcx	;\
2731:						;\
274	movq	(%rcx), %rdi			;\
275	SERIALIZE_ ## fence			;\
276	rdtsc					;\
277	ADD_ ## counter				;\
278	cmpq	%rdi, (%rcx)			;\
279	jne	2f				;\
280	KMSAN_INIT_RET(RSIZE_ ## counter)	;\
281	ret					;\
2822:						;\
283	jmp	1b				;\
284END(cpu_ ## counter ## _ ## fence)
285
286CPU_COUNTER_FENCE(counter, lfence)
287CPU_COUNTER_FENCE(counter, mfence)
288CPU_COUNTER_FENCE(counter32, lfence)
289CPU_COUNTER_FENCE(counter32, mfence)
290
291#define CPU_COUNTER_CPUID(counter)		\
292ENTRY(cpu_ ## counter ## _cpuid)		;\
293	movq	%rbx, %r9			;\
294	movq	CPUVAR(CURLWP), %r8		;\
295	leaq	L_RU+RU_NIVCSW(%r8), %r8	;\
2961:						;\
297	movq	(%r8), %rdi			;\
298	xor	%eax, %eax			;\
299	cpuid					;\
300	rdtsc					;\
301	ADD_ ## counter				;\
302	cmpq	%rdi, (%r8)			;\
303	jne	2f				;\
304	movq	%r9, %rbx			;\
305	KMSAN_INIT_RET(RSIZE_ ## counter)	;\
306	ret					;\
3072:						;\
308	jmp	1b				;\
309END(cpu_ ## counter ## _cpuid)
310
311CPU_COUNTER_CPUID(counter)
312CPU_COUNTER_CPUID(counter32)
313
314ENTRY(rdmsr_safe)
315	movq	CPUVAR(CURLWP), %r8
316	movq	L_PCB(%r8), %r8
317	movq	$_C_LABEL(msr_onfault), PCB_ONFAULT(%r8)
318
319	movl	%edi, %ecx
320	rdmsr
321	salq	$32, %rdx
322	movl	%eax, %eax	/* zero-extend %eax -> %rax */
323	orq	%rdx, %rax
324	movq	%rax, (%rsi)
325
326	xorq	%rax, %rax
327	movq	%rax, PCB_ONFAULT(%r8)
328#ifdef KMSAN
329	movq	%rsi,%rdi
330	movq	$8,%rsi
331	xorq	%rdx,%rdx
332	callq	_C_LABEL(kmsan_mark)
333#endif
334	KMSAN_INIT_RET(4)
335	ret
336END(rdmsr_safe)
337
338ENTRY(msr_onfault)
339	movq	CPUVAR(CURLWP), %r8
340	movq	L_PCB(%r8), %r8
341	movq	$0, PCB_ONFAULT(%r8)
342	movl	$EFAULT, %eax
343	ret
344END(msr_onfault)
345
346ENTRY(breakpoint)
347	pushq	%rbp
348	movq	%rsp, %rbp
349	int	$0x03		/* paranoid, not 'int3' */
350	leave
351	ret
352END(breakpoint)
353
354ENTRY(x86_curcpu)
355	movq	%gs:(CPU_INFO_SELF), %rax
356	KMSAN_INIT_RET(8)
357	ret
358END(x86_curcpu)
359
360ENTRY(x86_curlwp)
361	movq	%gs:(CPU_INFO_CURLWP), %rax
362	KMSAN_INIT_RET(8)
363	ret
364END(x86_curlwp)
365
366ENTRY(__byte_swap_u32_variable)
367	movl	%edi, %eax
368	bswapl	%eax
369	KMSAN_INIT_RET(4)
370	ret
371END(__byte_swap_u32_variable)
372
373ENTRY(__byte_swap_u16_variable)
374	movl	%edi, %eax
375	xchgb	%al, %ah
376	KMSAN_INIT_RET(2)
377	ret
378END(__byte_swap_u16_variable)
379
380/*
381 * Reload segments after a GDT change.
382 */
383ENTRY(lgdt_finish)
384	movl	$GSEL(GDATA_SEL, SEL_KPL),%eax
385	movl	%eax,%ds
386	movl	%eax,%es
387	movl	%eax,%ss
388	jmp	_C_LABEL(x86_flush)
389END(lgdt_finish)
390
391/*
392 * Flush instruction pipelines by doing an intersegment (far) return.
393 */
394ENTRY(x86_flush)
395	popq	%rax
396	pushq	$GSEL(GCODE_SEL, SEL_KPL)
397	pushq	%rax
398	lretq
399END(x86_flush)
400
401/* Waits - set up stack frame. */
402ENTRY(x86_hlt)
403	pushq	%rbp
404	movq	%rsp, %rbp
405	hlt
406	leave
407	ret
408END(x86_hlt)
409
410/* Waits - set up stack frame. */
411ENTRY(x86_stihlt)
412	pushq	%rbp
413	movq	%rsp, %rbp
414	sti
415	hlt
416	leave
417	ret
418END(x86_stihlt)
419
420ENTRY(x86_monitor)
421	movq	%rdi, %rax
422	movq	%rsi, %rcx
423	monitor	%rax, %rcx, %rdx
424	ret
425END(x86_monitor)
426
427/* Waits - set up stack frame. */
428ENTRY(x86_mwait)
429	pushq	%rbp
430	movq	%rsp, %rbp
431	movq	%rdi, %rax
432	movq	%rsi, %rcx
433	mwait	%rax, %rcx
434	leave
435	ret
436END(x86_mwait)
437
438ENTRY(stts)
439	movq	%cr0, %rax
440	orq	$CR0_TS, %rax
441	movq	%rax, %cr0
442	ret
443END(stts)
444
445ENTRY(fldummy)
446	ffree	%st(7)
447	fldz
448	ret
449END(fldummy)
450
451ENTRY(inb)
452	movq	%rdi, %rdx
453	xorq	%rax, %rax
454	inb	%dx, %al
455	KMSAN_INIT_RET(1)
456	ret
457END(inb)
458
459ENTRY(inw)
460	movq	%rdi, %rdx
461	xorq	%rax, %rax
462	inw	%dx, %ax
463	KMSAN_INIT_RET(2)
464	ret
465END(inw)
466
467ENTRY(inl)
468	movq	%rdi, %rdx
469	xorq	%rax, %rax
470	inl	%dx, %eax
471	KMSAN_INIT_RET(4)
472	ret
473END(inl)
474
475ENTRY(outb)
476	movq	%rdi, %rdx
477	movq	%rsi, %rax
478	outb	%al, %dx
479	ret
480END(outb)
481
482ENTRY(outw)
483	movq	%rdi, %rdx
484	movq	%rsi, %rax
485	outw	%ax, %dx
486	ret
487END(outw)
488
489ENTRY(outl)
490	movq	%rdi, %rdx
491	movq	%rsi, %rax
492	outl	%eax, %dx
493	ret
494END(outl)
495
496/*
497 * Used by SVS only, to make an atomic but fast copy. Doesn't have
498 * sanitizer instrumentation, but sanitizers disable SVS, so no problem.
499 */
500ENTRY(svs_quad_copy)
501	movq	%rdx,%rcx
502	rep
503	movsq
504	ret
505END(svs_quad_copy)
506