xref: /openbsd-src/sys/arch/amd64/include/fpu.h (revision c0f33c9875c4ab47e986b698610630b6cbf21c6c)
1*c0f33c98Skettenis /*	$OpenBSD: fpu.h,v 1.20 2024/04/14 09:59:04 kettenis Exp $	*/
2f5df1827Smickey /*	$NetBSD: fpu.h,v 1.1 2003/04/26 18:39:40 fvdl Exp $	*/
3f5df1827Smickey 
42fa72412Spirofti #ifndef	_MACHINE_FPU_H_
52fa72412Spirofti #define	_MACHINE_FPU_H_
6f5df1827Smickey 
7f5df1827Smickey #include <sys/types.h>
8f5df1827Smickey 
9f5df1827Smickey /*
10b14d10f3Sguenther  * If the CPU supports xsave/xrstor then we use them so that we can provide
11b14d10f3Sguenther  * AVX support.  Otherwise we require fxsave/fxrstor, as the SSE registers
12b14d10f3Sguenther  * are part of the ABI for passing floating point values.
13b14d10f3Sguenther  * While fxsave/fxrstor only required 16-byte alignment for the save area,
14b14d10f3Sguenther  * xsave/xrstor requires the save area to have 64-byte alignment.
15f5df1827Smickey  */
16f5df1827Smickey 
17f5df1827Smickey struct fxsave64 {
18f5df1827Smickey 	u_int16_t  fx_fcw;
19f5df1827Smickey 	u_int16_t  fx_fsw;
20f5df1827Smickey 	u_int8_t   fx_ftw;
21f5df1827Smickey 	u_int8_t   fx_unused1;
22f5df1827Smickey 	u_int16_t  fx_fop;
23f5df1827Smickey 	u_int64_t  fx_rip;
24f5df1827Smickey 	u_int64_t  fx_rdp;
25f5df1827Smickey 	u_int32_t  fx_mxcsr;
26f5df1827Smickey 	u_int32_t  fx_mxcsr_mask;
27f5df1827Smickey 	u_int64_t  fx_st[8][2];   /* 8 normal FP regs */
28f5df1827Smickey 	u_int64_t  fx_xmm[16][2]; /* 16 SSE2 registers */
29f5df1827Smickey 	u_int8_t   fx_unused3[96];
307fdc08e5Smiod } __packed;
31f5df1827Smickey 
32b03cf8e0Skettenis struct xstate_hdr {
33b03cf8e0Skettenis 	uint64_t	xstate_bv;
34b03cf8e0Skettenis 	uint64_t	xstate_xcomp_bv;
354a353890Smortimer 	uint8_t		xstate_rsrv0[8];
36b03cf8e0Skettenis 	uint8_t		xstate_rsrv[40];
37b03cf8e0Skettenis } __packed;
38b03cf8e0Skettenis 
39f5df1827Smickey struct savefpu {
40f5df1827Smickey 	struct fxsave64 fp_fxsave;	/* see above */
41b03cf8e0Skettenis 	struct xstate_hdr fp_xstate;
42b03cf8e0Skettenis 	u_int64_t fp_ymm[16][2];
43*c0f33c98Skettenis 	u_int8_t fp_components[1856];	/* enough for AVX-512 */
44f5df1827Smickey };
45f5df1827Smickey 
46f5df1827Smickey /*
47f5df1827Smickey  * The i387 defaults to Intel extended precision mode and round to nearest,
48f5df1827Smickey  * with all exceptions masked.
49f5df1827Smickey  */
50f5df1827Smickey #define	__INITIAL_NPXCW__	0x037f
51f5df1827Smickey #define __INITIAL_MXCSR__ 	0x1f80
52f5df1827Smickey #define __INITIAL_MXCSR_MASK__	0xffbf
53f5df1827Smickey 
54f5df1827Smickey #ifdef _KERNEL
55f5df1827Smickey /*
56f5df1827Smickey  * XXX
57f5df1827Smickey  */
58f5df1827Smickey struct trapframe;
59f5df1827Smickey struct cpu_info;
60f5df1827Smickey 
61a4ab44f3Skettenis extern size_t	fpu_save_len;
629ffc1ca4Sguenther extern uint32_t	fpu_mxcsr_mask;
63a4ab44f3Skettenis extern uint64_t	xsave_mask;
6455fdb5faSguenther extern int cpu_use_xsaves;
659ffc1ca4Sguenther 
66f5df1827Smickey void fpuinit(struct cpu_info *);
670dc8bfa2Sguenther int fputrap(int _type);
68c9de630fSguenther void fpusave(struct savefpu *);
69c9de630fSguenther void fpusavereset(struct savefpu *);
70f7ffb223Sthib void fpu_kernel_enter(void);
71f7ffb223Sthib void fpu_kernel_exit(void);
72f5df1827Smickey 
7355fdb5faSguenther /* pointer to fxsave/xsave/xsaves data with everything reset */
7455fdb5faSguenther #define	fpu_cleandata	(&proc0.p_addr->u_pcb.pcb_savefpu)
7555fdb5faSguenther 
76c9de630fSguenther int	xrstor_user(struct savefpu *_addr, uint64_t _mask);
7755fdb5faSguenther void	xrstor_kern(struct savefpu *_addr, uint64_t _mask);
78c9de630fSguenther #define	fpureset() \
7955fdb5faSguenther 	xrstor_kern(fpu_cleandata, xsave_mask)
80c4fce443Sguenther int	xsetbv_user(uint32_t _reg, uint64_t _mask);
81c9de630fSguenther 
82c86bb406Smlarkin #define fninit()		__asm("fninit")
83c86bb406Smlarkin #define fwait()			__asm("fwait")
84c9de630fSguenther /* should be fxsave64, but where we use this it doesn't matter */
85c86bb406Smlarkin #define fxsave(addr)		__asm("fxsave %0" : "=m" (*addr))
86c86bb406Smlarkin #define ldmxcsr(addr)		__asm("ldmxcsr %0" : : "m" (*addr))
87c86bb406Smlarkin #define fldcw(addr)		__asm("fldcw %0" : : "m" (*addr))
88c86bb406Smlarkin 
89c86bb406Smlarkin static inline void
xsave(struct savefpu * addr,uint64_t mask)90c86bb406Smlarkin xsave(struct savefpu *addr, uint64_t mask)
91c86bb406Smlarkin {
92c86bb406Smlarkin 	uint32_t lo, hi;
93c86bb406Smlarkin 
94c86bb406Smlarkin 	lo = mask;
95c86bb406Smlarkin 	hi = mask >> 32;
9655fdb5faSguenther 	__asm volatile("xsave64 %0" : "+m" (*addr) : "a" (lo), "d" (hi));
9755fdb5faSguenther }
9855fdb5faSguenther 
9955fdb5faSguenther static inline void
xrstors(const struct savefpu * addr,uint64_t mask)10055fdb5faSguenther xrstors(const struct savefpu *addr, uint64_t mask)
10155fdb5faSguenther {
10255fdb5faSguenther 	uint32_t lo, hi;
10355fdb5faSguenther 
10455fdb5faSguenther 	lo = mask;
10555fdb5faSguenther 	hi = mask >> 32;
10655fdb5faSguenther 	__asm volatile("xrstors64 %0" : : "m" (*addr), "a" (lo), "d" (hi));
107c86bb406Smlarkin }
108c86bb406Smlarkin 
109f5df1827Smickey #endif
110f5df1827Smickey 
1112fa72412Spirofti #endif /* _MACHINE_FPU_H_ */
112