xref: /netbsd-src/external/gpl3/gcc/dist/libgfortran/config/fpu-387.h (revision b1e838363e3c6fc78a55519254d99869742dd33c)
1 /* FPU-related code for x86 and x86_64 processors.
2    Copyright (C) 2005-2022 Free Software Foundation, Inc.
3    Contributed by Francois-Xavier Coudert <coudert@clipper.ens.fr>
4 
5 This file is part of the GNU Fortran 95 runtime library (libgfortran).
6 
7 Libgfortran is free software; you can redistribute it and/or
8 modify it under the terms of the GNU General Public
9 License as published by the Free Software Foundation; either
10 version 3 of the License, or (at your option) any later version.
11 
12 Libgfortran is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 GNU General Public License for more details.
16 
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
20 
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
24 <http://www.gnu.org/licenses/>.  */
25 
26 #ifndef __SSE_MATH__
27 #include "cpuid.h"
28 #endif
29 
30 static int
has_sse(void)31 has_sse (void)
32 {
33 #ifndef __SSE_MATH__
34   unsigned int eax, ebx, ecx, edx;
35 
36   if (!__get_cpuid (1, &eax, &ebx, &ecx, &edx))
37     return 0;
38 
39   return edx & bit_SSE;
40 #else
41   return 1;
42 #endif
43 }
44 
45 /* i387 exceptions -- see linux <fpu_control.h> header file for details.  */
46 #define _FPU_MASK_IM  0x01
47 #define _FPU_MASK_DM  0x02
48 #define _FPU_MASK_ZM  0x04
49 #define _FPU_MASK_OM  0x08
50 #define _FPU_MASK_UM  0x10
51 #define _FPU_MASK_PM  0x20
52 #define _FPU_MASK_ALL 0x3f
53 
54 #define _FPU_EX_ALL   0x3f
55 
56 /* i387 rounding modes.  */
57 
58 #define _FPU_RC_NEAREST 0x0
59 #define _FPU_RC_DOWN    0x1
60 #define _FPU_RC_UP      0x2
61 #define _FPU_RC_ZERO    0x3
62 
63 #define _FPU_RC_MASK    0x3
64 
65 /* Enable flush to zero mode.  */
66 
67 #define MXCSR_FTZ (1 << 15)
68 
69 
70 /* This structure corresponds to the layout of the block
71    written by FSTENV.  */
72 struct fenv
73 {
74   unsigned short int __control_word;
75   unsigned short int __unused1;
76   unsigned short int __status_word;
77   unsigned short int __unused2;
78   unsigned short int __tags;
79   unsigned short int __unused3;
80   unsigned int __eip;
81   unsigned short int __cs_selector;
82   unsigned int __opcode:11;
83   unsigned int __unused4:5;
84   unsigned int __data_offset;
85   unsigned short int __data_selector;
86   unsigned short int __unused5;
87   unsigned int __mxcsr;
88 } __attribute__ ((gcc_struct));
89 
90 /* Check we can actually store the FPU state in the allocated size.  */
91 _Static_assert (sizeof(struct fenv) <= (size_t) GFC_FPE_STATE_BUFFER_SIZE,
92 		"GFC_FPE_STATE_BUFFER_SIZE is too small");
93 
94 #ifdef __SSE_MATH__
95 # define __math_force_eval_div(x, y)					\
96   do {									\
97     __asm__ ("" : "+x" (x)); __asm__ __volatile__ ("" : : "x" (x / y));	\
98   } while (0)
99 #else
100 # define __math_force_eval_div(x, y)					\
101   do {									\
102     __asm__ ("" : "+t" (x)); __asm__ __volatile__ ("" : : "f" (x / y));	\
103   } while (0)
104 #endif
105 
106 /* Raise the supported floating-point exceptions from EXCEPTS.  Other
107    bits in EXCEPTS are ignored.  Code originally borrowed from
108    libatomic/config/x86/fenv.c.  */
109 
110 static void
local_feraiseexcept(int excepts)111 local_feraiseexcept (int excepts)
112 {
113   struct fenv temp;
114 
115   if (excepts & _FPU_MASK_IM)
116     {
117       float f = 0.0f;
118       __math_force_eval_div (f, f);
119     }
120   if (excepts & _FPU_MASK_DM)
121     {
122       __asm__ __volatile__ ("fnstenv\t%0" : "=m" (temp));
123       temp.__status_word |= _FPU_MASK_DM;
124       __asm__ __volatile__ ("fldenv\t%0" : : "m" (temp));
125       __asm__ __volatile__ ("fwait");
126     }
127   if (excepts & _FPU_MASK_ZM)
128     {
129       float f = 1.0f, g = 0.0f;
130       __math_force_eval_div (f, g);
131     }
132   if (excepts & _FPU_MASK_OM)
133     {
134       __asm__ __volatile__ ("fnstenv\t%0" : "=m" (temp));
135       temp.__status_word |= _FPU_MASK_OM;
136       __asm__ __volatile__ ("fldenv\t%0" : : "m" (temp));
137       __asm__ __volatile__ ("fwait");
138     }
139   if (excepts & _FPU_MASK_UM)
140     {
141       __asm__ __volatile__ ("fnstenv\t%0" : "=m" (temp));
142       temp.__status_word |= _FPU_MASK_UM;
143       __asm__ __volatile__ ("fldenv\t%0" : : "m" (temp));
144       __asm__ __volatile__ ("fwait");
145     }
146   if (excepts & _FPU_MASK_PM)
147     {
148       float f = 1.0f, g = 3.0f;
149       __math_force_eval_div (f, g);
150     }
151 }
152 
153 
154 void
set_fpu_trap_exceptions(int trap,int notrap)155 set_fpu_trap_exceptions (int trap, int notrap)
156 {
157   int exc_set = 0, exc_clr = 0;
158   unsigned short cw;
159 
160   if (trap & GFC_FPE_INVALID) exc_set |= _FPU_MASK_IM;
161   if (trap & GFC_FPE_DENORMAL) exc_set |= _FPU_MASK_DM;
162   if (trap & GFC_FPE_ZERO) exc_set |= _FPU_MASK_ZM;
163   if (trap & GFC_FPE_OVERFLOW) exc_set |= _FPU_MASK_OM;
164   if (trap & GFC_FPE_UNDERFLOW) exc_set |= _FPU_MASK_UM;
165   if (trap & GFC_FPE_INEXACT) exc_set |= _FPU_MASK_PM;
166 
167   if (notrap & GFC_FPE_INVALID) exc_clr |= _FPU_MASK_IM;
168   if (notrap & GFC_FPE_DENORMAL) exc_clr |= _FPU_MASK_DM;
169   if (notrap & GFC_FPE_ZERO) exc_clr |= _FPU_MASK_ZM;
170   if (notrap & GFC_FPE_OVERFLOW) exc_clr |= _FPU_MASK_OM;
171   if (notrap & GFC_FPE_UNDERFLOW) exc_clr |= _FPU_MASK_UM;
172   if (notrap & GFC_FPE_INEXACT) exc_clr |= _FPU_MASK_PM;
173 
174   __asm__ __volatile__ ("fstcw\t%0" : "=m" (cw));
175 
176   cw |= exc_clr;
177   cw &= ~exc_set;
178 
179   __asm__ __volatile__ ("fnclex\n\tfldcw\t%0" : : "m" (cw));
180 
181   if (has_sse())
182     {
183       unsigned int cw_sse;
184 
185       __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
186 
187       /* The SSE exception masks are shifted by 7 bits.  */
188       cw_sse |= (exc_clr << 7);
189       cw_sse &= ~(exc_set << 7);
190 
191       /* Clear stalled exception flags.  */
192       cw_sse &= ~_FPU_EX_ALL;
193 
194       __asm__ __volatile__ ("%vldmxcsr\t%0" : : "m" (cw_sse));
195     }
196 }
197 
198 void
set_fpu(void)199 set_fpu (void)
200 {
201   set_fpu_trap_exceptions (options.fpe, 0);
202 }
203 
204 int
get_fpu_trap_exceptions(void)205 get_fpu_trap_exceptions (void)
206 {
207   unsigned short cw;
208   int mask;
209   int res = 0;
210 
211   __asm__ __volatile__ ("fstcw\t%0" : "=m" (cw));
212   mask = cw;
213 
214   if (has_sse())
215     {
216       unsigned int cw_sse;
217 
218       __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
219 
220       /* The SSE exception masks are shifted by 7 bits.  */
221       mask |= (cw_sse >> 7);
222     }
223 
224   mask = ~mask & _FPU_MASK_ALL;
225 
226   if (mask & _FPU_MASK_IM) res |= GFC_FPE_INVALID;
227   if (mask & _FPU_MASK_DM) res |= GFC_FPE_DENORMAL;
228   if (mask & _FPU_MASK_ZM) res |= GFC_FPE_ZERO;
229   if (mask & _FPU_MASK_OM) res |= GFC_FPE_OVERFLOW;
230   if (mask & _FPU_MASK_UM) res |= GFC_FPE_UNDERFLOW;
231   if (mask & _FPU_MASK_PM) res |= GFC_FPE_INEXACT;
232 
233   return res;
234 }
235 
236 int
support_fpu_trap(int flag)237 support_fpu_trap (int flag __attribute__((unused)))
238 {
239   return 1;
240 }
241 
242 int
get_fpu_except_flags(void)243 get_fpu_except_flags (void)
244 {
245   unsigned short cw;
246   int excepts;
247   int res = 0;
248 
249   __asm__ __volatile__ ("fnstsw\t%0" : "=am" (cw));
250   excepts = cw;
251 
252   if (has_sse())
253     {
254       unsigned int cw_sse;
255 
256       __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
257       excepts |= cw_sse;
258     }
259 
260   excepts &= _FPU_EX_ALL;
261 
262   if (excepts & _FPU_MASK_IM) res |= GFC_FPE_INVALID;
263   if (excepts & _FPU_MASK_DM) res |= GFC_FPE_DENORMAL;
264   if (excepts & _FPU_MASK_ZM) res |= GFC_FPE_ZERO;
265   if (excepts & _FPU_MASK_OM) res |= GFC_FPE_OVERFLOW;
266   if (excepts & _FPU_MASK_UM) res |= GFC_FPE_UNDERFLOW;
267   if (excepts & _FPU_MASK_PM) res |= GFC_FPE_INEXACT;
268 
269   return res;
270 }
271 
272 void
set_fpu_except_flags(int set,int clear)273 set_fpu_except_flags (int set, int clear)
274 {
275   struct fenv temp;
276   int exc_set = 0, exc_clr = 0;
277 
278   /* Translate from GFC_PE_* values to _FPU_MASK_* values.  */
279   if (set & GFC_FPE_INVALID)
280     exc_set |= _FPU_MASK_IM;
281   if (clear & GFC_FPE_INVALID)
282     exc_clr |= _FPU_MASK_IM;
283 
284   if (set & GFC_FPE_DENORMAL)
285     exc_set |= _FPU_MASK_DM;
286   if (clear & GFC_FPE_DENORMAL)
287     exc_clr |= _FPU_MASK_DM;
288 
289   if (set & GFC_FPE_ZERO)
290     exc_set |= _FPU_MASK_ZM;
291   if (clear & GFC_FPE_ZERO)
292     exc_clr |= _FPU_MASK_ZM;
293 
294   if (set & GFC_FPE_OVERFLOW)
295     exc_set |= _FPU_MASK_OM;
296   if (clear & GFC_FPE_OVERFLOW)
297     exc_clr |= _FPU_MASK_OM;
298 
299   if (set & GFC_FPE_UNDERFLOW)
300     exc_set |= _FPU_MASK_UM;
301   if (clear & GFC_FPE_UNDERFLOW)
302     exc_clr |= _FPU_MASK_UM;
303 
304   if (set & GFC_FPE_INEXACT)
305     exc_set |= _FPU_MASK_PM;
306   if (clear & GFC_FPE_INEXACT)
307     exc_clr |= _FPU_MASK_PM;
308 
309 
310   /* Change the flags. This is tricky on 387 (unlike SSE), because we have
311      FNSTSW but no FLDSW instruction.  */
312   __asm__ __volatile__ ("fnstenv\t%0" : "=m" (temp));
313   temp.__status_word &= ~exc_clr;
314   __asm__ __volatile__ ("fldenv\t%0" : : "m" (temp));
315 
316   /* Change the flags on SSE.  */
317 
318   if (has_sse())
319   {
320     unsigned int cw_sse;
321 
322     __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
323     cw_sse &= ~exc_clr;
324     __asm__ __volatile__ ("%vldmxcsr\t%0" : : "m" (cw_sse));
325   }
326 
327   local_feraiseexcept (exc_set);
328 }
329 
330 int
support_fpu_flag(int flag)331 support_fpu_flag (int flag __attribute__((unused)))
332 {
333   return 1;
334 }
335 
336 void
set_fpu_rounding_mode(int round)337 set_fpu_rounding_mode (int round)
338 {
339   int round_mode;
340   unsigned short cw;
341 
342   switch (round)
343     {
344     case GFC_FPE_TONEAREST:
345       round_mode = _FPU_RC_NEAREST;
346       break;
347     case GFC_FPE_UPWARD:
348       round_mode = _FPU_RC_UP;
349       break;
350     case GFC_FPE_DOWNWARD:
351       round_mode = _FPU_RC_DOWN;
352       break;
353     case GFC_FPE_TOWARDZERO:
354       round_mode = _FPU_RC_ZERO;
355       break;
356     default:
357       return; /* Should be unreachable.  */
358     }
359 
360   __asm__ __volatile__ ("fnstcw\t%0" : "=m" (cw));
361 
362   /* The x87 round control bits are shifted by 10 bits.  */
363   cw &= ~(_FPU_RC_MASK << 10);
364   cw |= round_mode << 10;
365 
366   __asm__ __volatile__ ("fldcw\t%0" : : "m" (cw));
367 
368   if (has_sse())
369     {
370       unsigned int cw_sse;
371 
372       __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
373 
374       /* The SSE round control bits are shifted by 13 bits.  */
375       cw_sse &= ~(_FPU_RC_MASK << 13);
376       cw_sse |= round_mode << 13;
377 
378       __asm__ __volatile__ ("%vldmxcsr\t%0" : : "m" (cw_sse));
379     }
380 }
381 
382 int
get_fpu_rounding_mode(void)383 get_fpu_rounding_mode (void)
384 {
385   int round_mode;
386 
387 #ifdef __SSE_MATH__
388   unsigned int cw;
389 
390   __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw));
391 
392   /* The SSE round control bits are shifted by 13 bits.  */
393   round_mode = cw >> 13;
394 #else
395   unsigned short cw;
396 
397   __asm__ __volatile__ ("fnstcw\t%0" : "=m" (cw));
398 
399   /* The x87 round control bits are shifted by 10 bits.  */
400   round_mode = cw >> 10;
401 #endif
402 
403   round_mode &= _FPU_RC_MASK;
404 
405   switch (round_mode)
406     {
407     case _FPU_RC_NEAREST:
408       return GFC_FPE_TONEAREST;
409     case _FPU_RC_UP:
410       return GFC_FPE_UPWARD;
411     case _FPU_RC_DOWN:
412       return GFC_FPE_DOWNWARD;
413     case _FPU_RC_ZERO:
414       return GFC_FPE_TOWARDZERO;
415     default:
416       return 0; /* Should be unreachable.  */
417     }
418 }
419 
420 int
support_fpu_rounding_mode(int mode)421 support_fpu_rounding_mode (int mode __attribute__((unused)))
422 {
423   return 1;
424 }
425 
426 void
get_fpu_state(void * state)427 get_fpu_state (void *state)
428 {
429   struct fenv *envp = state;
430 
431   __asm__ __volatile__ ("fnstenv\t%0" : "=m" (*envp));
432 
433   /* fnstenv has the side effect of masking all exceptions, so we need
434      to restore the control word after that.  */
435   __asm__ __volatile__ ("fldcw\t%0" : : "m" (envp->__control_word));
436 
437   if (has_sse())
438     __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (envp->__mxcsr));
439 }
440 
441 void
set_fpu_state(void * state)442 set_fpu_state (void *state)
443 {
444   struct fenv *envp = state;
445 
446   /* glibc sources (sysdeps/x86_64/fpu/fesetenv.c) do something more
447      complex than this, but I think it suffices in our case.  */
448   __asm__ __volatile__ ("fldenv\t%0" : : "m" (*envp));
449 
450   if (has_sse())
451     __asm__ __volatile__ ("%vldmxcsr\t%0" : : "m" (envp->__mxcsr));
452 }
453 
454 
455 int
support_fpu_underflow_control(int kind)456 support_fpu_underflow_control (int kind)
457 {
458   if (!has_sse())
459     return 0;
460 
461   return (kind == 4 || kind == 8) ? 1 : 0;
462 }
463 
464 
465 int
get_fpu_underflow_mode(void)466 get_fpu_underflow_mode (void)
467 {
468   unsigned int cw_sse;
469 
470   if (!has_sse())
471     return 1;
472 
473   __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
474 
475   /* Return 0 for abrupt underflow (flush to zero), 1 for gradual underflow.  */
476   return (cw_sse & MXCSR_FTZ) ? 0 : 1;
477 }
478 
479 
480 void
set_fpu_underflow_mode(int gradual)481 set_fpu_underflow_mode (int gradual)
482 {
483   unsigned int cw_sse;
484 
485   if (!has_sse())
486     return;
487 
488   __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
489 
490   if (gradual)
491     cw_sse &= ~MXCSR_FTZ;
492   else
493     cw_sse |= MXCSR_FTZ;
494 
495   __asm__ __volatile__ ("%vldmxcsr\t%0" : : "m" (cw_sse));
496 }
497 
498