xref: /netbsd-src/external/gpl3/gcc.old/dist/libgfortran/config/fpu-387.h (revision 4c3eb207d36f67d31994830c0a694161fc1ca39b)
1 /* FPU-related code for x86 and x86_64 processors.
2    Copyright (C) 2005-2020 Free Software Foundation, Inc.
3    Contributed by Francois-Xavier Coudert <coudert@clipper.ens.fr>
4 
5 This file is part of the GNU Fortran 95 runtime library (libgfortran).
6 
7 Libgfortran is free software; you can redistribute it and/or
8 modify it under the terms of the GNU General Public
9 License as published by the Free Software Foundation; either
10 version 3 of the License, or (at your option) any later version.
11 
12 Libgfortran is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 GNU General Public License for more details.
16 
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
20 
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
24 <http://www.gnu.org/licenses/>.  */
25 
26 #ifndef __SSE_MATH__
27 #include "cpuid.h"
28 #endif
29 
30 static int
has_sse(void)31 has_sse (void)
32 {
33 #ifndef __SSE_MATH__
34   unsigned int eax, ebx, ecx, edx;
35 
36   if (!__get_cpuid (1, &eax, &ebx, &ecx, &edx))
37     return 0;
38 
39   return edx & bit_SSE;
40 #else
41   return 1;
42 #endif
43 }
44 
45 /* i387 exceptions -- see linux <fpu_control.h> header file for details.  */
46 #define _FPU_MASK_IM  0x01
47 #define _FPU_MASK_DM  0x02
48 #define _FPU_MASK_ZM  0x04
49 #define _FPU_MASK_OM  0x08
50 #define _FPU_MASK_UM  0x10
51 #define _FPU_MASK_PM  0x20
52 #define _FPU_MASK_ALL 0x3f
53 
54 #define _FPU_EX_ALL   0x3f
55 
56 /* i387 rounding modes.  */
57 
58 #define _FPU_RC_NEAREST 0x0
59 #define _FPU_RC_DOWN    0x1
60 #define _FPU_RC_UP      0x2
61 #define _FPU_RC_ZERO    0x3
62 
63 #define _FPU_RC_MASK    0x3
64 
65 /* Enable flush to zero mode.  */
66 
67 #define MXCSR_FTZ (1 << 15)
68 
69 
70 /* This structure corresponds to the layout of the block
71    written by FSTENV.  */
72 typedef struct
73 {
74   unsigned short int __control_word;
75   unsigned short int __unused1;
76   unsigned short int __status_word;
77   unsigned short int __unused2;
78   unsigned short int __tags;
79   unsigned short int __unused3;
80   unsigned int __eip;
81   unsigned short int __cs_selector;
82   unsigned short int __opcode;
83   unsigned int __data_offset;
84   unsigned short int __data_selector;
85   unsigned short int __unused5;
86   unsigned int __mxcsr;
87 }
88 my_fenv_t;
89 
90 /* Check we can actually store the FPU state in the allocated size.  */
91 _Static_assert (sizeof(my_fenv_t) <= (size_t) GFC_FPE_STATE_BUFFER_SIZE,
92 		"GFC_FPE_STATE_BUFFER_SIZE is too small");
93 
94 
95 /* Raise the supported floating-point exceptions from EXCEPTS.  Other
96    bits in EXCEPTS are ignored.  Code originally borrowed from
97    libatomic/config/x86/fenv.c.  */
98 
99 static void
local_feraiseexcept(int excepts)100 local_feraiseexcept (int excepts)
101 {
102   if (excepts & _FPU_MASK_IM)
103     {
104       float f = 0.0f;
105 #ifdef __SSE_MATH__
106       __asm__ __volatile__ ("%vdivss\t{%0, %d0|%d0, %0}" : "+x" (f));
107 #else
108       __asm__ __volatile__ ("fdiv\t{%y0, %0|%0, %y0}" : "+t" (f));
109       /* No need for fwait, exception is triggered by emitted fstp.  */
110 #endif
111     }
112   if (excepts & _FPU_MASK_DM)
113     {
114       my_fenv_t temp;
115       __asm__ __volatile__ ("fnstenv\t%0" : "=m" (temp));
116       temp.__status_word |= _FPU_MASK_DM;
117       __asm__ __volatile__ ("fldenv\t%0" : : "m" (temp));
118       __asm__ __volatile__ ("fwait");
119     }
120   if (excepts & _FPU_MASK_ZM)
121     {
122       float f = 1.0f, g = 0.0f;
123 #ifdef __SSE_MATH__
124       __asm__ __volatile__ ("%vdivss\t{%1, %d0|%d0, %1}" : "+x" (f) : "xm" (g));
125 #else
126       __asm__ __volatile__ ("fdivs\t%1" : "+t" (f) : "m" (g));
127       /* No need for fwait, exception is triggered by emitted fstp.  */
128 #endif
129     }
130   if (excepts & _FPU_MASK_OM)
131     {
132       my_fenv_t temp;
133       __asm__ __volatile__ ("fnstenv\t%0" : "=m" (temp));
134       temp.__status_word |= _FPU_MASK_OM;
135       __asm__ __volatile__ ("fldenv\t%0" : : "m" (temp));
136       __asm__ __volatile__ ("fwait");
137     }
138   if (excepts & _FPU_MASK_UM)
139     {
140       my_fenv_t temp;
141       __asm__ __volatile__ ("fnstenv\t%0" : "=m" (temp));
142       temp.__status_word |= _FPU_MASK_UM;
143       __asm__ __volatile__ ("fldenv\t%0" : : "m" (temp));
144       __asm__ __volatile__ ("fwait");
145     }
146   if (excepts & _FPU_MASK_PM)
147     {
148       float f = 1.0f, g = 3.0f;
149 #ifdef __SSE_MATH__
150       __asm__ __volatile__ ("%vdivss\t{%1, %d0|%d0, %1}" : "+x" (f) : "xm" (g));
151 #else
152       __asm__ __volatile__ ("fdivs\t%1" : "+t" (f) : "m" (g));
153       /* No need for fwait, exception is triggered by emitted fstp.  */
154 #endif
155     }
156 }
157 
158 
159 void
set_fpu_trap_exceptions(int trap,int notrap)160 set_fpu_trap_exceptions (int trap, int notrap)
161 {
162   int exc_set = 0, exc_clr = 0;
163   unsigned short cw;
164 
165   if (trap & GFC_FPE_INVALID) exc_set |= _FPU_MASK_IM;
166   if (trap & GFC_FPE_DENORMAL) exc_set |= _FPU_MASK_DM;
167   if (trap & GFC_FPE_ZERO) exc_set |= _FPU_MASK_ZM;
168   if (trap & GFC_FPE_OVERFLOW) exc_set |= _FPU_MASK_OM;
169   if (trap & GFC_FPE_UNDERFLOW) exc_set |= _FPU_MASK_UM;
170   if (trap & GFC_FPE_INEXACT) exc_set |= _FPU_MASK_PM;
171 
172   if (notrap & GFC_FPE_INVALID) exc_clr |= _FPU_MASK_IM;
173   if (notrap & GFC_FPE_DENORMAL) exc_clr |= _FPU_MASK_DM;
174   if (notrap & GFC_FPE_ZERO) exc_clr |= _FPU_MASK_ZM;
175   if (notrap & GFC_FPE_OVERFLOW) exc_clr |= _FPU_MASK_OM;
176   if (notrap & GFC_FPE_UNDERFLOW) exc_clr |= _FPU_MASK_UM;
177   if (notrap & GFC_FPE_INEXACT) exc_clr |= _FPU_MASK_PM;
178 
179   __asm__ __volatile__ ("fstcw\t%0" : "=m" (cw));
180 
181   cw |= exc_clr;
182   cw &= ~exc_set;
183 
184   __asm__ __volatile__ ("fnclex\n\tfldcw\t%0" : : "m" (cw));
185 
186   if (has_sse())
187     {
188       unsigned int cw_sse;
189 
190       __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
191 
192       /* The SSE exception masks are shifted by 7 bits.  */
193       cw_sse |= (exc_clr << 7);
194       cw_sse &= ~(exc_set << 7);
195 
196       /* Clear stalled exception flags.  */
197       cw_sse &= ~_FPU_EX_ALL;
198 
199       __asm__ __volatile__ ("%vldmxcsr\t%0" : : "m" (cw_sse));
200     }
201 }
202 
203 void
set_fpu(void)204 set_fpu (void)
205 {
206   set_fpu_trap_exceptions (options.fpe, 0);
207 }
208 
209 int
get_fpu_trap_exceptions(void)210 get_fpu_trap_exceptions (void)
211 {
212   unsigned short cw;
213   int mask;
214   int res = 0;
215 
216   __asm__ __volatile__ ("fstcw\t%0" : "=m" (cw));
217   mask = cw;
218 
219   if (has_sse())
220     {
221       unsigned int cw_sse;
222 
223       __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
224 
225       /* The SSE exception masks are shifted by 7 bits.  */
226       mask |= (cw_sse >> 7);
227     }
228 
229   mask = ~mask & _FPU_MASK_ALL;
230 
231   if (mask & _FPU_MASK_IM) res |= GFC_FPE_INVALID;
232   if (mask & _FPU_MASK_DM) res |= GFC_FPE_DENORMAL;
233   if (mask & _FPU_MASK_ZM) res |= GFC_FPE_ZERO;
234   if (mask & _FPU_MASK_OM) res |= GFC_FPE_OVERFLOW;
235   if (mask & _FPU_MASK_UM) res |= GFC_FPE_UNDERFLOW;
236   if (mask & _FPU_MASK_PM) res |= GFC_FPE_INEXACT;
237 
238   return res;
239 }
240 
241 int
support_fpu_trap(int flag)242 support_fpu_trap (int flag __attribute__((unused)))
243 {
244   return 1;
245 }
246 
247 int
get_fpu_except_flags(void)248 get_fpu_except_flags (void)
249 {
250   unsigned short cw;
251   int excepts;
252   int res = 0;
253 
254   __asm__ __volatile__ ("fnstsw\t%0" : "=am" (cw));
255   excepts = cw;
256 
257   if (has_sse())
258     {
259       unsigned int cw_sse;
260 
261       __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
262       excepts |= cw_sse;
263     }
264 
265   excepts &= _FPU_EX_ALL;
266 
267   if (excepts & _FPU_MASK_IM) res |= GFC_FPE_INVALID;
268   if (excepts & _FPU_MASK_DM) res |= GFC_FPE_DENORMAL;
269   if (excepts & _FPU_MASK_ZM) res |= GFC_FPE_ZERO;
270   if (excepts & _FPU_MASK_OM) res |= GFC_FPE_OVERFLOW;
271   if (excepts & _FPU_MASK_UM) res |= GFC_FPE_UNDERFLOW;
272   if (excepts & _FPU_MASK_PM) res |= GFC_FPE_INEXACT;
273 
274   return res;
275 }
276 
277 void
set_fpu_except_flags(int set,int clear)278 set_fpu_except_flags (int set, int clear)
279 {
280   my_fenv_t temp;
281   int exc_set = 0, exc_clr = 0;
282 
283   /* Translate from GFC_PE_* values to _FPU_MASK_* values.  */
284   if (set & GFC_FPE_INVALID)
285     exc_set |= _FPU_MASK_IM;
286   if (clear & GFC_FPE_INVALID)
287     exc_clr |= _FPU_MASK_IM;
288 
289   if (set & GFC_FPE_DENORMAL)
290     exc_set |= _FPU_MASK_DM;
291   if (clear & GFC_FPE_DENORMAL)
292     exc_clr |= _FPU_MASK_DM;
293 
294   if (set & GFC_FPE_ZERO)
295     exc_set |= _FPU_MASK_ZM;
296   if (clear & GFC_FPE_ZERO)
297     exc_clr |= _FPU_MASK_ZM;
298 
299   if (set & GFC_FPE_OVERFLOW)
300     exc_set |= _FPU_MASK_OM;
301   if (clear & GFC_FPE_OVERFLOW)
302     exc_clr |= _FPU_MASK_OM;
303 
304   if (set & GFC_FPE_UNDERFLOW)
305     exc_set |= _FPU_MASK_UM;
306   if (clear & GFC_FPE_UNDERFLOW)
307     exc_clr |= _FPU_MASK_UM;
308 
309   if (set & GFC_FPE_INEXACT)
310     exc_set |= _FPU_MASK_PM;
311   if (clear & GFC_FPE_INEXACT)
312     exc_clr |= _FPU_MASK_PM;
313 
314 
315   /* Change the flags. This is tricky on 387 (unlike SSE), because we have
316      FNSTSW but no FLDSW instruction.  */
317   __asm__ __volatile__ ("fnstenv\t%0" : "=m" (temp));
318   temp.__status_word &= ~exc_clr;
319   __asm__ __volatile__ ("fldenv\t%0" : : "m" (temp));
320 
321   /* Change the flags on SSE.  */
322 
323   if (has_sse())
324   {
325     unsigned int cw_sse;
326 
327     __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
328     cw_sse &= ~exc_clr;
329     __asm__ __volatile__ ("%vldmxcsr\t%0" : : "m" (cw_sse));
330   }
331 
332   local_feraiseexcept (exc_set);
333 }
334 
335 int
support_fpu_flag(int flag)336 support_fpu_flag (int flag __attribute__((unused)))
337 {
338   return 1;
339 }
340 
341 void
set_fpu_rounding_mode(int round)342 set_fpu_rounding_mode (int round)
343 {
344   int round_mode;
345   unsigned short cw;
346 
347   switch (round)
348     {
349     case GFC_FPE_TONEAREST:
350       round_mode = _FPU_RC_NEAREST;
351       break;
352     case GFC_FPE_UPWARD:
353       round_mode = _FPU_RC_UP;
354       break;
355     case GFC_FPE_DOWNWARD:
356       round_mode = _FPU_RC_DOWN;
357       break;
358     case GFC_FPE_TOWARDZERO:
359       round_mode = _FPU_RC_ZERO;
360       break;
361     default:
362       return; /* Should be unreachable.  */
363     }
364 
365   __asm__ __volatile__ ("fnstcw\t%0" : "=m" (cw));
366 
367   /* The x87 round control bits are shifted by 10 bits.  */
368   cw &= ~(_FPU_RC_MASK << 10);
369   cw |= round_mode << 10;
370 
371   __asm__ __volatile__ ("fldcw\t%0" : : "m" (cw));
372 
373   if (has_sse())
374     {
375       unsigned int cw_sse;
376 
377       __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
378 
379       /* The SSE round control bits are shifted by 13 bits.  */
380       cw_sse &= ~(_FPU_RC_MASK << 13);
381       cw_sse |= round_mode << 13;
382 
383       __asm__ __volatile__ ("%vldmxcsr\t%0" : : "m" (cw_sse));
384     }
385 }
386 
387 int
get_fpu_rounding_mode(void)388 get_fpu_rounding_mode (void)
389 {
390   int round_mode;
391 
392 #ifdef __SSE_MATH__
393   unsigned int cw;
394 
395   __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw));
396 
397   /* The SSE round control bits are shifted by 13 bits.  */
398   round_mode = cw >> 13;
399 #else
400   unsigned short cw;
401 
402   __asm__ __volatile__ ("fnstcw\t%0" : "=m" (cw));
403 
404   /* The x87 round control bits are shifted by 10 bits.  */
405   round_mode = cw >> 10;
406 #endif
407 
408   round_mode &= _FPU_RC_MASK;
409 
410   switch (round_mode)
411     {
412     case _FPU_RC_NEAREST:
413       return GFC_FPE_TONEAREST;
414     case _FPU_RC_UP:
415       return GFC_FPE_UPWARD;
416     case _FPU_RC_DOWN:
417       return GFC_FPE_DOWNWARD;
418     case _FPU_RC_ZERO:
419       return GFC_FPE_TOWARDZERO;
420     default:
421       return 0; /* Should be unreachable.  */
422     }
423 }
424 
425 int
support_fpu_rounding_mode(int mode)426 support_fpu_rounding_mode (int mode __attribute__((unused)))
427 {
428   return 1;
429 }
430 
431 void
get_fpu_state(void * state)432 get_fpu_state (void *state)
433 {
434   my_fenv_t *envp = state;
435 
436   __asm__ __volatile__ ("fnstenv\t%0" : "=m" (*envp));
437 
438   /* fnstenv has the side effect of masking all exceptions, so we need
439      to restore the control word after that.  */
440   __asm__ __volatile__ ("fldcw\t%0" : : "m" (envp->__control_word));
441 
442   if (has_sse())
443     __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (envp->__mxcsr));
444 }
445 
446 void
set_fpu_state(void * state)447 set_fpu_state (void *state)
448 {
449   my_fenv_t *envp = state;
450 
451   /* glibc sources (sysdeps/x86_64/fpu/fesetenv.c) do something more
452      complex than this, but I think it suffices in our case.  */
453   __asm__ __volatile__ ("fldenv\t%0" : : "m" (*envp));
454 
455   if (has_sse())
456     __asm__ __volatile__ ("%vldmxcsr\t%0" : : "m" (envp->__mxcsr));
457 }
458 
459 
460 int
support_fpu_underflow_control(int kind)461 support_fpu_underflow_control (int kind)
462 {
463   if (!has_sse())
464     return 0;
465 
466   return (kind == 4 || kind == 8) ? 1 : 0;
467 }
468 
469 
470 int
get_fpu_underflow_mode(void)471 get_fpu_underflow_mode (void)
472 {
473   unsigned int cw_sse;
474 
475   if (!has_sse())
476     return 1;
477 
478   __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
479 
480   /* Return 0 for abrupt underflow (flush to zero), 1 for gradual underflow.  */
481   return (cw_sse & MXCSR_FTZ) ? 0 : 1;
482 }
483 
484 
485 void
set_fpu_underflow_mode(int gradual)486 set_fpu_underflow_mode (int gradual)
487 {
488   unsigned int cw_sse;
489 
490   if (!has_sse())
491     return;
492 
493   __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
494 
495   if (gradual)
496     cw_sse &= ~MXCSR_FTZ;
497   else
498     cw_sse |= MXCSR_FTZ;
499 
500   __asm__ __volatile__ ("%vldmxcsr\t%0" : : "m" (cw_sse));
501 }
502 
503