xref: /plan9-contrib/sys/src/9k/k10/fpu.c (revision 9ef1f84b659abcb917c5c090acbce0772e494f21)
1 /*
2  * SIMD Floating Point.
3  * Assembler support to get at the individual instructions
4  * is in l64fpu.s.
5  * There are opportunities to be lazier about saving and
6  * restoring the state and allocating the storage needed.
7  */
8 #include "u.h"
9 #include "../port/lib.h"
10 #include "mem.h"
11 #include "dat.h"
12 #include "fns.h"
13 
14 #include "amd64.h"
15 #include "ureg.h"
16 
17 enum {						/* FCW, FSW and MXCSR */
18 	I		= 0x00000001,		/* Invalid-Operation */
19 	D		= 0x00000002,		/* Denormalized-Operand */
20 	Z		= 0x00000004,		/* Zero-Divide */
21 	O		= 0x00000008,		/* Overflow */
22 	U		= 0x00000010,		/* Underflow */
23 	P		= 0x00000020,		/* Precision */
24 };
25 
26 enum {						/* FCW */
27 	PCs		= 0x00000000,		/* Precision Control -Single */
28 	PCd		= 0x00000200,		/* -Double */
29 	PCde		= 0x00000300,		/* -Double Extended */
30 	RCn		= 0x00000000,		/* Rounding Control -Nearest */
31 	RCd		= 0x00000400,		/* -Down */
32 	RCu		= 0x00000800,		/* -Up */
33 	RCz		= 0x00000C00,		/* -Toward Zero */
34 };
35 
36 enum {						/* FSW */
37 	Sff		= 0x00000040,		/* Stack Fault Flag */
38 	Es		= 0x00000080,		/* Error Summary Status */
39 	C0		= 0x00000100,		/* ZF - Condition Code Bits */
40 	C1		= 0x00000200,		/* O/U# */
41 	C2		= 0x00000400,		/* PF */
42 	C3		= 0x00004000,		/* ZF */
43 	B		= 0x00008000,		/* Busy */
44 };
45 
46 enum {						/* MXCSR */
47 	Daz		= 0x00000040,		/* Denormals are Zeros */
48 	Im		= 0x00000080,		/* I Mask */
49 	Dm		= 0x00000100,		/* D Mask */
50 	Zm		= 0x00000200,		/* Z Mask */
51 	Om		= 0x00000400,		/* O Mask */
52 	Um		= 0x00000800,		/* U Mask */
53 	Pm		= 0x00001000,		/* P Mask */
54 	Rn		= 0x00000000,		/* Round to Nearest */
55 	Rd		= 0x00002000,		/* Round Down */
56 	Ru		= 0x00004000,		/* Round Up */
57 	Rz		= 0x00006000,		/* Round toward Zero */
58 	Fz		= 0x00008000,		/* Flush to Zero for Um */
59 };
60 
61 enum {						/* PFPU.state */
62 	Init		= 0,			/* The FPU has not been used */
63 	Busy		= 1,			/* The FPU is being used */
64 	Idle		= 2,			/* The FPU has been used */
65 
66 	Hold		= 4,			/* Handling an FPU note */
67 };
68 
69 extern void _clts(void);
70 extern void _fldcw(u16int);
71 extern void _fnclex(void);
72 extern void _fninit(void);
73 extern void _fxrstor(Fxsave*);
74 extern void _fxsave(Fxsave*);
75 extern void _fwait(void);
76 extern void _ldmxcsr(u32int);
77 extern void _stts(void);
78 
79 int
fpudevprocio(Proc * proc,void * a,long n,uintptr offset,int write)80 fpudevprocio(Proc* proc, void* a, long n, uintptr offset, int write)
81 {
82 	uchar *p;
83 
84 	/*
85 	 * Called from procdevtab.read and procdevtab.write
86 	 * allow user process access to the FPU registers.
87 	 * This is the only FPU routine which is called directly
88 	 * from the port code; it would be nice to have dynamic
89 	 * creation of entries in the device file trees...
90 	 */
91 	if(offset >= sizeof(Fxsave))
92 		return 0;
93 	if((p = proc->fpusave) == nil)
94 		return 0;
95 	switch(write){
96 	default:
97 		if(offset+n > sizeof(Fxsave))
98 			n = sizeof(Fxsave) - offset;
99 		memmove(p+offset, a, n);
100 		break;
101 	case 0:
102 		if(offset+n > sizeof(Fxsave))
103 			n = sizeof(Fxsave) - offset;
104 		memmove(a, p+offset, n);
105 		break;
106 	}
107 
108 	return n;
109 }
110 
111 void
fpunotify(Ureg *)112 fpunotify(Ureg*)
113 {
114 	/*
115 	 * Called when a note is about to be delivered to a
116 	 * user process, usually at the end of a system call.
117 	 * Note handlers are not allowed to use the FPU so
118 	 * the state is marked (after saving if necessary) and
119 	 * checked in the Device Not Available handler.
120 	 */
121 	if(up->fpustate == Busy){
122 		_fxsave(up->fpusave);
123 		_stts();
124 		up->fpustate = Idle;
125 	}
126 	up->fpustate |= Hold;
127 }
128 
129 void
fpunoted(void)130 fpunoted(void)
131 {
132 	/*
133 	 * Called from sysnoted() via the machine-dependent
134 	 * noted() routine.
135 	 * Clear the flag set above in fpunotify().
136 	 */
137 	up->fpustate &= ~Hold;
138 }
139 
140 void
fpusysrfork(Ureg *)141 fpusysrfork(Ureg*)
142 {
143 	/*
144 	 * Called early in the non-interruptible path of
145 	 * sysrfork() via the machine-dependent syscall() routine.
146 	 * Save the state so that it can be easily copied
147 	 * to the child process later.
148 	 */
149 	if(up->fpustate != Busy)
150 		return;
151 
152 	_fxsave(up->fpusave);
153 	_stts();
154 	up->fpustate = Idle;
155 }
156 
157 void
fpusysrforkchild(Proc * child,Proc * parent)158 fpusysrforkchild(Proc* child, Proc* parent)
159 {
160 	/*
161 	 * Called later in sysrfork() via the machine-dependent
162 	 * sysrforkchild() routine.
163 	 * Copy the parent FPU state to the child.
164 	 */
165 	child->fpustate = parent->fpustate;
166 	child->fpusave = (void*)((PTR2UINT(up->fxsave) + 15) & ~15);
167 	if(child->fpustate == Init)
168 		return;
169 
170 	memmove(child->fpusave, parent->fpusave, sizeof(Fxsave));
171 }
172 
173 void
fpuprocsave(Proc * p)174 fpuprocsave(Proc* p)
175 {
176 	/*
177 	 * Called from sched() and sleep() via the machine-dependent
178 	 * procsave() routine.
179 	 * About to go in to the scheduler.
180 	 * If the process wasn't using the FPU
181 	 * there's nothing to do.
182 	 */
183 	if(p->fpustate != Busy)
184 		return;
185 
186 	/*
187 	 * The process is dead so clear and disable the FPU
188 	 * and set the state for whoever gets this proc struct
189 	 * next.
190 	 */
191 	if(p->state == Moribund){
192 		_clts();
193 		_fnclex();
194 		_stts();
195 		p->fpustate = Init;
196 		return;
197 	}
198 
199 	/*
200 	 * Save the FPU state without handling pending
201 	 * unmasked exceptions and disable. Postnote() can't
202 	 * be called here as sleep() already has up->rlock,
203 	 * so the handling of pending exceptions is delayed
204 	 * until the process runs again and generates a
205 	 * Device Not Available exception fault to activate
206 	 * the FPU.
207 	 */
208 	_fxsave(p->fpusave);
209 	_stts();
210 	p->fpustate = Idle;
211 }
212 
213 void
fpuprocrestore(Proc * p)214 fpuprocrestore(Proc* p)
215 {
216 	/*
217 	 * The process has been rescheduled and is about to run.
218 	 * Nothing to do here right now. If the process tries to use
219 	 * the FPU again it will cause a Device Not Available
220 	 * exception and the state will then be restored.
221 	 */
222 	USED(p);
223 }
224 
225 void
fpusysprocsetup(Proc * p)226 fpusysprocsetup(Proc* p)
227 {
228 	/*
229 	 * Disable the FPU.
230 	 * Called from sysexec() via sysprocsetup() to
231 	 * set the FPU for the new process.
232 	 */
233 	if(p->fpustate != Init){
234 		_clts();
235 		_fnclex();
236 		_stts();
237 		p->fpustate = Init;
238 	}
239 }
240 
241 static void
fpupostnote(void)242 fpupostnote(void)
243 {
244 	ushort fsw;
245 	Fxsave *fpusave;
246 	char *m, n[ERRMAX];
247 
248 	/*
249 	 * The Sff bit is sticky, meaning it should be explicitly
250 	 * cleared or there's no way to tell if the exception was an
251 	 * invalid operation or a stack fault.
252 	 */
253 	fpusave = up->fpusave;
254 	fsw = (fpusave->fsw & ~fpusave->fcw) & (Sff|P|U|O|Z|D|I);
255 	if(fsw & I){
256 		if(fsw & Sff){
257 			if(fsw & C1)
258 				m = "Stack Overflow";
259 			else
260 				m = "Stack Underflow";
261 		}
262 		else
263 			m = "Invalid Operation";
264 	}
265 	else if(fsw & D)
266 		m = "Denormal Operand";
267 	else if(fsw & Z)
268 		m = "Divide-By-Zero";
269 	else if(fsw & O)
270 		m = "Numeric Overflow";
271 	else if(fsw & U)
272 		m = "Numeric Underflow";
273 	else if(fsw & P)
274 		m = "Precision";
275 	else
276 		m =  "Unknown";
277 
278 	snprint(n, sizeof(n), "sys: fp: %s Exception ipo=%#llux fsw=%#ux",
279 		m, fpusave->rip, fsw);
280 	postnote(up, 1, n, NDebug);
281 }
282 
283 static void
fpuxf(Ureg * ureg,void *)284 fpuxf(Ureg* ureg, void*)
285 {
286 	u32int mxcsr;
287 	Fxsave *fpusave;
288 	char *m, n[ERRMAX];
289 
290 	/*
291 	 * #XF - SIMD Floating Point Exception (Vector 18).
292 	 */
293 
294 	/*
295 	 * Save FPU state to check out the error.
296 	 */
297 	fpusave = up->fpusave;
298 	_fxsave(fpusave);
299 	_stts();
300 	up->fpustate = Idle;
301 
302 	if(ureg->ip & KZERO)
303 		panic("#MF: ip=%#p", ureg->ip);
304 
305 	/*
306 	 * Notify the user process.
307 	 * The path here is similar to the x87 path described
308 	 * in fpupostnote above but without the fpupostnote()
309 	 * call.
310 	 */
311 	mxcsr = fpusave->mxcsr;
312 	if((mxcsr & (Im|I)) == I)
313 		m = "Invalid Operation";
314 	else if((mxcsr & (Dm|D)) == D)
315 		m = "Denormal Operand";
316 	else if((mxcsr & (Zm|Z)) == Z)
317 		m = "Divide-By-Zero";
318 	else if((mxcsr & (Om|O)) == O)
319 		m = "Numeric Overflow";
320 	else if((mxcsr & (Um|U)) == U)
321 		m = "Numeric Underflow";
322 	else if((mxcsr & (Pm|P)) == P)
323 		m = "Precision";
324 	else
325 		m =  "Unknown";
326 
327 	snprint(n, sizeof(n), "sys: fp: %s Exception mxcsr=%#ux", m, mxcsr);
328 	postnote(up, 1, n, NDebug);
329 }
330 
331 static void
fpumf(Ureg * ureg,void *)332 fpumf(Ureg* ureg, void*)
333 {
334 	Fxsave *fpusave;
335 
336 	/*
337 	 * #MF - x87 Floating Point Exception Pending (Vector 16).
338 	 */
339 
340 	/*
341 	 * Save FPU state to check out the error.
342 	 */
343 	fpusave = up->fpusave;
344 	_fxsave(fpusave);
345 	_stts();
346 	up->fpustate = Idle;
347 
348 	if(ureg->ip & KZERO)
349 		panic("#MF: ip=%#p rip=%#p", ureg->ip, fpusave->rip);
350 
351 	/*
352 	 * Notify the user process.
353 	 * The path here is
354 	 *	call trap->fpumf->fpupostnote->postnote
355 	 *	return ->fpupostnote->fpumf->trap
356 	 *	call notify->fpunotify
357 	 *	return ->notify
358 	 * then either
359 	 *	call pexit
360 	 * or
361 	 *	return ->trap
362 	 *	return ->user note handler
363 	 */
364 	fpupostnote();
365 }
366 
367 static void
fpunm(Ureg * ureg,void *)368 fpunm(Ureg* ureg, void*)
369 {
370 	Fxsave *fpusave;
371 
372 	/*
373 	 * #NM - Device Not Available (Vector 7).
374 	 */
375 	if(up == nil)
376 		panic("#NM: fpu in kernel: ip %#p\n", ureg->ip);
377 
378 	/*
379 	 * Someone tried to use the FPU in a note handler.
380 	 * That's a no-no.
381 	 */
382 	if(up->fpustate & Hold){
383 		postnote(up, 1, "sys: floating point in note handler", NDebug);
384 		return;
385 	}
386 	if(ureg->ip & KZERO)
387 		panic("#NM: proc %d %s state %d ip %#p\n",
388 			up->pid, up->text, up->fpustate, ureg->ip);
389 
390 	switch(up->fpustate){
391 	case Busy:
392 	default:
393 		panic("#NM: state %d ip %#p\n", up->fpustate, ureg->ip);
394 		break;
395 	case Init:
396 		/*
397 		 * A process tries to use the FPU for the
398 		 * first time and generates a 'device not available'
399 		 * exception.
400 		 * Turn the FPU on and initialise it for use.
401 		 * Set the precision and mask the exceptions
402 		 * we don't care about from the generic Mach value.
403 		 */
404 		_clts();
405 		_fninit();
406 		_fwait();
407 		_fldcw(m->fcw);
408 		_ldmxcsr(m->mxcsr);
409 		up->fpusave = (void*)((PTR2UINT(up->fxsave) + 15) & ~15);
410 		up->fpustate = Busy;
411 		break;
412 	case Idle:
413 		/*
414 		 * Before restoring the state, check for any pending
415 		 * exceptions, there's no way to restore the state without
416 		 * generating an unmasked exception.
417 		 */
418 		fpusave = up->fpusave;
419 		if((fpusave->fsw & ~fpusave->fcw) & (Sff|P|U|O|Z|D|I)){
420 			fpupostnote();
421 			break;
422 		}
423 
424 		/*
425 		 * Sff is sticky.
426 		 */
427 		fpusave->fcw &= ~Sff;
428 		_clts();
429 		_fxrstor(fpusave);
430 		up->fpustate = Busy;
431 		break;
432 	}
433 }
434 
435 void
fpuinit(void)436 fpuinit(void)
437 {
438 	u64int r;
439 	Fxsave *fxsave;
440 	uchar buf[sizeof(Fxsave)+15];
441 
442 	/*
443 	 * It's assumed there is an integrated FPU, so Em is cleared;
444 	 */
445 	r = cr0get();
446 	r &= ~(Ts|Em);
447 	r |= Ne|Mp;
448 	cr0put(r);
449 
450 	r = cr4get();
451 	r |= Osxmmexcpt|Osfxsr;
452 	cr4put(r);
453 
454 	_fninit();
455 	fxsave = (Fxsave*)((PTR2UINT(buf) + 15) & ~15);
456 	memset(fxsave, 0, sizeof(Fxsave));
457 	_fxsave(fxsave);
458 	m->fcw = RCn|PCd|P|U|D;
459 	if(fxsave->mxcsrmask == 0)
460 		m->mxcsrmask = 0x0000FFBF;
461 	else
462 		m->mxcsrmask = fxsave->mxcsrmask;
463 	m->mxcsr = (Rn|Pm|Um|Dm) & m->mxcsrmask;
464 	_stts();
465 
466 	if(m->machno != 0)
467 		return;
468 
469 	/*
470 	 * Set up the exception handlers.
471 	 */
472 	trapenable(IdtNM, fpunm, 0, "#NM");
473 	trapenable(IdtMF, fpumf, 0, "#MF");
474 	trapenable(IdtXF, fpuxf, 0, "#XF");
475 }
476