1 /*
2 * SIMD Floating Point.
3 * Assembler support to get at the individual instructions
4 * is in l64fpu.s.
5 * There are opportunities to be lazier about saving and
6 * restoring the state and allocating the storage needed.
7 */
8 #include "u.h"
9 #include "../port/lib.h"
10 #include "mem.h"
11 #include "dat.h"
12 #include "fns.h"
13
14 #include "amd64.h"
15 #include "ureg.h"
16
17 enum { /* FCW, FSW and MXCSR */
18 I = 0x00000001, /* Invalid-Operation */
19 D = 0x00000002, /* Denormalized-Operand */
20 Z = 0x00000004, /* Zero-Divide */
21 O = 0x00000008, /* Overflow */
22 U = 0x00000010, /* Underflow */
23 P = 0x00000020, /* Precision */
24 };
25
26 enum { /* FCW */
27 PCs = 0x00000000, /* Precision Control -Single */
28 PCd = 0x00000200, /* -Double */
29 PCde = 0x00000300, /* -Double Extended */
30 RCn = 0x00000000, /* Rounding Control -Nearest */
31 RCd = 0x00000400, /* -Down */
32 RCu = 0x00000800, /* -Up */
33 RCz = 0x00000C00, /* -Toward Zero */
34 };
35
36 enum { /* FSW */
37 Sff = 0x00000040, /* Stack Fault Flag */
38 Es = 0x00000080, /* Error Summary Status */
39 C0 = 0x00000100, /* ZF - Condition Code Bits */
40 C1 = 0x00000200, /* O/U# */
41 C2 = 0x00000400, /* PF */
42 C3 = 0x00004000, /* ZF */
43 B = 0x00008000, /* Busy */
44 };
45
46 enum { /* MXCSR */
47 Daz = 0x00000040, /* Denormals are Zeros */
48 Im = 0x00000080, /* I Mask */
49 Dm = 0x00000100, /* D Mask */
50 Zm = 0x00000200, /* Z Mask */
51 Om = 0x00000400, /* O Mask */
52 Um = 0x00000800, /* U Mask */
53 Pm = 0x00001000, /* P Mask */
54 Rn = 0x00000000, /* Round to Nearest */
55 Rd = 0x00002000, /* Round Down */
56 Ru = 0x00004000, /* Round Up */
57 Rz = 0x00006000, /* Round toward Zero */
58 Fz = 0x00008000, /* Flush to Zero for Um */
59 };
60
61 enum { /* PFPU.state */
62 Init = 0, /* The FPU has not been used */
63 Busy = 1, /* The FPU is being used */
64 Idle = 2, /* The FPU has been used */
65
66 Hold = 4, /* Handling an FPU note */
67 };
68
69 extern void _clts(void);
70 extern void _fldcw(u16int);
71 extern void _fnclex(void);
72 extern void _fninit(void);
73 extern void _fxrstor(Fxsave*);
74 extern void _fxsave(Fxsave*);
75 extern void _fwait(void);
76 extern void _ldmxcsr(u32int);
77 extern void _stts(void);
78
79 int
fpudevprocio(Proc * proc,void * a,long n,uintptr offset,int write)80 fpudevprocio(Proc* proc, void* a, long n, uintptr offset, int write)
81 {
82 uchar *p;
83
84 /*
85 * Called from procdevtab.read and procdevtab.write
86 * allow user process access to the FPU registers.
87 * This is the only FPU routine which is called directly
88 * from the port code; it would be nice to have dynamic
89 * creation of entries in the device file trees...
90 */
91 if(offset >= sizeof(Fxsave))
92 return 0;
93 if((p = proc->fpusave) == nil)
94 return 0;
95 switch(write){
96 default:
97 if(offset+n > sizeof(Fxsave))
98 n = sizeof(Fxsave) - offset;
99 memmove(p+offset, a, n);
100 break;
101 case 0:
102 if(offset+n > sizeof(Fxsave))
103 n = sizeof(Fxsave) - offset;
104 memmove(a, p+offset, n);
105 break;
106 }
107
108 return n;
109 }
110
111 void
fpunotify(Ureg *)112 fpunotify(Ureg*)
113 {
114 /*
115 * Called when a note is about to be delivered to a
116 * user process, usually at the end of a system call.
117 * Note handlers are not allowed to use the FPU so
118 * the state is marked (after saving if necessary) and
119 * checked in the Device Not Available handler.
120 */
121 if(up->fpustate == Busy){
122 _fxsave(up->fpusave);
123 _stts();
124 up->fpustate = Idle;
125 }
126 up->fpustate |= Hold;
127 }
128
129 void
fpunoted(void)130 fpunoted(void)
131 {
132 /*
133 * Called from sysnoted() via the machine-dependent
134 * noted() routine.
135 * Clear the flag set above in fpunotify().
136 */
137 up->fpustate &= ~Hold;
138 }
139
140 void
fpusysrfork(Ureg *)141 fpusysrfork(Ureg*)
142 {
143 /*
144 * Called early in the non-interruptible path of
145 * sysrfork() via the machine-dependent syscall() routine.
146 * Save the state so that it can be easily copied
147 * to the child process later.
148 */
149 if(up->fpustate != Busy)
150 return;
151
152 _fxsave(up->fpusave);
153 _stts();
154 up->fpustate = Idle;
155 }
156
157 void
fpusysrforkchild(Proc * child,Proc * parent)158 fpusysrforkchild(Proc* child, Proc* parent)
159 {
160 /*
161 * Called later in sysrfork() via the machine-dependent
162 * sysrforkchild() routine.
163 * Copy the parent FPU state to the child.
164 */
165 child->fpustate = parent->fpustate;
166 child->fpusave = (void*)((PTR2UINT(up->fxsave) + 15) & ~15);
167 if(child->fpustate == Init)
168 return;
169
170 memmove(child->fpusave, parent->fpusave, sizeof(Fxsave));
171 }
172
173 void
fpuprocsave(Proc * p)174 fpuprocsave(Proc* p)
175 {
176 /*
177 * Called from sched() and sleep() via the machine-dependent
178 * procsave() routine.
179 * About to go in to the scheduler.
180 * If the process wasn't using the FPU
181 * there's nothing to do.
182 */
183 if(p->fpustate != Busy)
184 return;
185
186 /*
187 * The process is dead so clear and disable the FPU
188 * and set the state for whoever gets this proc struct
189 * next.
190 */
191 if(p->state == Moribund){
192 _clts();
193 _fnclex();
194 _stts();
195 p->fpustate = Init;
196 return;
197 }
198
199 /*
200 * Save the FPU state without handling pending
201 * unmasked exceptions and disable. Postnote() can't
202 * be called here as sleep() already has up->rlock,
203 * so the handling of pending exceptions is delayed
204 * until the process runs again and generates a
205 * Device Not Available exception fault to activate
206 * the FPU.
207 */
208 _fxsave(p->fpusave);
209 _stts();
210 p->fpustate = Idle;
211 }
212
213 void
fpuprocrestore(Proc * p)214 fpuprocrestore(Proc* p)
215 {
216 /*
217 * The process has been rescheduled and is about to run.
218 * Nothing to do here right now. If the process tries to use
219 * the FPU again it will cause a Device Not Available
220 * exception and the state will then be restored.
221 */
222 USED(p);
223 }
224
225 void
fpusysprocsetup(Proc * p)226 fpusysprocsetup(Proc* p)
227 {
228 /*
229 * Disable the FPU.
230 * Called from sysexec() via sysprocsetup() to
231 * set the FPU for the new process.
232 */
233 if(p->fpustate != Init){
234 _clts();
235 _fnclex();
236 _stts();
237 p->fpustate = Init;
238 }
239 }
240
241 static void
fpupostnote(void)242 fpupostnote(void)
243 {
244 ushort fsw;
245 Fxsave *fpusave;
246 char *m, n[ERRMAX];
247
248 /*
249 * The Sff bit is sticky, meaning it should be explicitly
250 * cleared or there's no way to tell if the exception was an
251 * invalid operation or a stack fault.
252 */
253 fpusave = up->fpusave;
254 fsw = (fpusave->fsw & ~fpusave->fcw) & (Sff|P|U|O|Z|D|I);
255 if(fsw & I){
256 if(fsw & Sff){
257 if(fsw & C1)
258 m = "Stack Overflow";
259 else
260 m = "Stack Underflow";
261 }
262 else
263 m = "Invalid Operation";
264 }
265 else if(fsw & D)
266 m = "Denormal Operand";
267 else if(fsw & Z)
268 m = "Divide-By-Zero";
269 else if(fsw & O)
270 m = "Numeric Overflow";
271 else if(fsw & U)
272 m = "Numeric Underflow";
273 else if(fsw & P)
274 m = "Precision";
275 else
276 m = "Unknown";
277
278 snprint(n, sizeof(n), "sys: fp: %s Exception ipo=%#llux fsw=%#ux",
279 m, fpusave->rip, fsw);
280 postnote(up, 1, n, NDebug);
281 }
282
283 static void
fpuxf(Ureg * ureg,void *)284 fpuxf(Ureg* ureg, void*)
285 {
286 u32int mxcsr;
287 Fxsave *fpusave;
288 char *m, n[ERRMAX];
289
290 /*
291 * #XF - SIMD Floating Point Exception (Vector 18).
292 */
293
294 /*
295 * Save FPU state to check out the error.
296 */
297 fpusave = up->fpusave;
298 _fxsave(fpusave);
299 _stts();
300 up->fpustate = Idle;
301
302 if(ureg->ip & KZERO)
303 panic("#MF: ip=%#p", ureg->ip);
304
305 /*
306 * Notify the user process.
307 * The path here is similar to the x87 path described
308 * in fpupostnote above but without the fpupostnote()
309 * call.
310 */
311 mxcsr = fpusave->mxcsr;
312 if((mxcsr & (Im|I)) == I)
313 m = "Invalid Operation";
314 else if((mxcsr & (Dm|D)) == D)
315 m = "Denormal Operand";
316 else if((mxcsr & (Zm|Z)) == Z)
317 m = "Divide-By-Zero";
318 else if((mxcsr & (Om|O)) == O)
319 m = "Numeric Overflow";
320 else if((mxcsr & (Um|U)) == U)
321 m = "Numeric Underflow";
322 else if((mxcsr & (Pm|P)) == P)
323 m = "Precision";
324 else
325 m = "Unknown";
326
327 snprint(n, sizeof(n), "sys: fp: %s Exception mxcsr=%#ux", m, mxcsr);
328 postnote(up, 1, n, NDebug);
329 }
330
331 static void
fpumf(Ureg * ureg,void *)332 fpumf(Ureg* ureg, void*)
333 {
334 Fxsave *fpusave;
335
336 /*
337 * #MF - x87 Floating Point Exception Pending (Vector 16).
338 */
339
340 /*
341 * Save FPU state to check out the error.
342 */
343 fpusave = up->fpusave;
344 _fxsave(fpusave);
345 _stts();
346 up->fpustate = Idle;
347
348 if(ureg->ip & KZERO)
349 panic("#MF: ip=%#p rip=%#p", ureg->ip, fpusave->rip);
350
351 /*
352 * Notify the user process.
353 * The path here is
354 * call trap->fpumf->fpupostnote->postnote
355 * return ->fpupostnote->fpumf->trap
356 * call notify->fpunotify
357 * return ->notify
358 * then either
359 * call pexit
360 * or
361 * return ->trap
362 * return ->user note handler
363 */
364 fpupostnote();
365 }
366
367 static void
fpunm(Ureg * ureg,void *)368 fpunm(Ureg* ureg, void*)
369 {
370 Fxsave *fpusave;
371
372 /*
373 * #NM - Device Not Available (Vector 7).
374 */
375 if(up == nil)
376 panic("#NM: fpu in kernel: ip %#p\n", ureg->ip);
377
378 /*
379 * Someone tried to use the FPU in a note handler.
380 * That's a no-no.
381 */
382 if(up->fpustate & Hold){
383 postnote(up, 1, "sys: floating point in note handler", NDebug);
384 return;
385 }
386 if(ureg->ip & KZERO)
387 panic("#NM: proc %d %s state %d ip %#p\n",
388 up->pid, up->text, up->fpustate, ureg->ip);
389
390 switch(up->fpustate){
391 case Busy:
392 default:
393 panic("#NM: state %d ip %#p\n", up->fpustate, ureg->ip);
394 break;
395 case Init:
396 /*
397 * A process tries to use the FPU for the
398 * first time and generates a 'device not available'
399 * exception.
400 * Turn the FPU on and initialise it for use.
401 * Set the precision and mask the exceptions
402 * we don't care about from the generic Mach value.
403 */
404 _clts();
405 _fninit();
406 _fwait();
407 _fldcw(m->fcw);
408 _ldmxcsr(m->mxcsr);
409 up->fpusave = (void*)((PTR2UINT(up->fxsave) + 15) & ~15);
410 up->fpustate = Busy;
411 break;
412 case Idle:
413 /*
414 * Before restoring the state, check for any pending
415 * exceptions, there's no way to restore the state without
416 * generating an unmasked exception.
417 */
418 fpusave = up->fpusave;
419 if((fpusave->fsw & ~fpusave->fcw) & (Sff|P|U|O|Z|D|I)){
420 fpupostnote();
421 break;
422 }
423
424 /*
425 * Sff is sticky.
426 */
427 fpusave->fcw &= ~Sff;
428 _clts();
429 _fxrstor(fpusave);
430 up->fpustate = Busy;
431 break;
432 }
433 }
434
435 void
fpuinit(void)436 fpuinit(void)
437 {
438 u64int r;
439 Fxsave *fxsave;
440 uchar buf[sizeof(Fxsave)+15];
441
442 /*
443 * It's assumed there is an integrated FPU, so Em is cleared;
444 */
445 r = cr0get();
446 r &= ~(Ts|Em);
447 r |= Ne|Mp;
448 cr0put(r);
449
450 r = cr4get();
451 r |= Osxmmexcpt|Osfxsr;
452 cr4put(r);
453
454 _fninit();
455 fxsave = (Fxsave*)((PTR2UINT(buf) + 15) & ~15);
456 memset(fxsave, 0, sizeof(Fxsave));
457 _fxsave(fxsave);
458 m->fcw = RCn|PCd|P|U|D;
459 if(fxsave->mxcsrmask == 0)
460 m->mxcsrmask = 0x0000FFBF;
461 else
462 m->mxcsrmask = fxsave->mxcsrmask;
463 m->mxcsr = (Rn|Pm|Um|Dm) & m->mxcsrmask;
464 _stts();
465
466 if(m->machno != 0)
467 return;
468
469 /*
470 * Set up the exception handlers.
471 */
472 trapenable(IdtNM, fpunm, 0, "#NM");
473 trapenable(IdtMF, fpumf, 0, "#MF");
474 trapenable(IdtXF, fpuxf, 0, "#XF");
475 }
476