1 /* FPU-related code for x86 and x86_64 processors.
2 Copyright (C) 2005-2020 Free Software Foundation, Inc.
3 Contributed by Francois-Xavier Coudert <coudert@clipper.ens.fr>
4
5 This file is part of the GNU Fortran 95 runtime library (libgfortran).
6
7 Libgfortran is free software; you can redistribute it and/or
8 modify it under the terms of the GNU General Public
9 License as published by the Free Software Foundation; either
10 version 3 of the License, or (at your option) any later version.
11
12 Libgfortran is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
20
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
25
26 #ifndef __SSE_MATH__
27 #include "cpuid.h"
28 #endif
29
30 static int
has_sse(void)31 has_sse (void)
32 {
33 #ifndef __SSE_MATH__
34 unsigned int eax, ebx, ecx, edx;
35
36 if (!__get_cpuid (1, &eax, &ebx, &ecx, &edx))
37 return 0;
38
39 return edx & bit_SSE;
40 #else
41 return 1;
42 #endif
43 }
44
45 /* i387 exceptions -- see linux <fpu_control.h> header file for details. */
46 #define _FPU_MASK_IM 0x01
47 #define _FPU_MASK_DM 0x02
48 #define _FPU_MASK_ZM 0x04
49 #define _FPU_MASK_OM 0x08
50 #define _FPU_MASK_UM 0x10
51 #define _FPU_MASK_PM 0x20
52 #define _FPU_MASK_ALL 0x3f
53
54 #define _FPU_EX_ALL 0x3f
55
56 /* i387 rounding modes. */
57
58 #define _FPU_RC_NEAREST 0x0
59 #define _FPU_RC_DOWN 0x1
60 #define _FPU_RC_UP 0x2
61 #define _FPU_RC_ZERO 0x3
62
63 #define _FPU_RC_MASK 0x3
64
65 /* Enable flush to zero mode. */
66
67 #define MXCSR_FTZ (1 << 15)
68
69
70 /* This structure corresponds to the layout of the block
71 written by FSTENV. */
72 typedef struct
73 {
74 unsigned short int __control_word;
75 unsigned short int __unused1;
76 unsigned short int __status_word;
77 unsigned short int __unused2;
78 unsigned short int __tags;
79 unsigned short int __unused3;
80 unsigned int __eip;
81 unsigned short int __cs_selector;
82 unsigned short int __opcode;
83 unsigned int __data_offset;
84 unsigned short int __data_selector;
85 unsigned short int __unused5;
86 unsigned int __mxcsr;
87 }
88 my_fenv_t;
89
90 /* Check we can actually store the FPU state in the allocated size. */
91 _Static_assert (sizeof(my_fenv_t) <= (size_t) GFC_FPE_STATE_BUFFER_SIZE,
92 "GFC_FPE_STATE_BUFFER_SIZE is too small");
93
94
95 /* Raise the supported floating-point exceptions from EXCEPTS. Other
96 bits in EXCEPTS are ignored. Code originally borrowed from
97 libatomic/config/x86/fenv.c. */
98
99 static void
local_feraiseexcept(int excepts)100 local_feraiseexcept (int excepts)
101 {
102 if (excepts & _FPU_MASK_IM)
103 {
104 float f = 0.0f;
105 #ifdef __SSE_MATH__
106 __asm__ __volatile__ ("%vdivss\t{%0, %d0|%d0, %0}" : "+x" (f));
107 #else
108 __asm__ __volatile__ ("fdiv\t{%y0, %0|%0, %y0}" : "+t" (f));
109 /* No need for fwait, exception is triggered by emitted fstp. */
110 #endif
111 }
112 if (excepts & _FPU_MASK_DM)
113 {
114 my_fenv_t temp;
115 __asm__ __volatile__ ("fnstenv\t%0" : "=m" (temp));
116 temp.__status_word |= _FPU_MASK_DM;
117 __asm__ __volatile__ ("fldenv\t%0" : : "m" (temp));
118 __asm__ __volatile__ ("fwait");
119 }
120 if (excepts & _FPU_MASK_ZM)
121 {
122 float f = 1.0f, g = 0.0f;
123 #ifdef __SSE_MATH__
124 __asm__ __volatile__ ("%vdivss\t{%1, %d0|%d0, %1}" : "+x" (f) : "xm" (g));
125 #else
126 __asm__ __volatile__ ("fdivs\t%1" : "+t" (f) : "m" (g));
127 /* No need for fwait, exception is triggered by emitted fstp. */
128 #endif
129 }
130 if (excepts & _FPU_MASK_OM)
131 {
132 my_fenv_t temp;
133 __asm__ __volatile__ ("fnstenv\t%0" : "=m" (temp));
134 temp.__status_word |= _FPU_MASK_OM;
135 __asm__ __volatile__ ("fldenv\t%0" : : "m" (temp));
136 __asm__ __volatile__ ("fwait");
137 }
138 if (excepts & _FPU_MASK_UM)
139 {
140 my_fenv_t temp;
141 __asm__ __volatile__ ("fnstenv\t%0" : "=m" (temp));
142 temp.__status_word |= _FPU_MASK_UM;
143 __asm__ __volatile__ ("fldenv\t%0" : : "m" (temp));
144 __asm__ __volatile__ ("fwait");
145 }
146 if (excepts & _FPU_MASK_PM)
147 {
148 float f = 1.0f, g = 3.0f;
149 #ifdef __SSE_MATH__
150 __asm__ __volatile__ ("%vdivss\t{%1, %d0|%d0, %1}" : "+x" (f) : "xm" (g));
151 #else
152 __asm__ __volatile__ ("fdivs\t%1" : "+t" (f) : "m" (g));
153 /* No need for fwait, exception is triggered by emitted fstp. */
154 #endif
155 }
156 }
157
158
159 void
set_fpu_trap_exceptions(int trap,int notrap)160 set_fpu_trap_exceptions (int trap, int notrap)
161 {
162 int exc_set = 0, exc_clr = 0;
163 unsigned short cw;
164
165 if (trap & GFC_FPE_INVALID) exc_set |= _FPU_MASK_IM;
166 if (trap & GFC_FPE_DENORMAL) exc_set |= _FPU_MASK_DM;
167 if (trap & GFC_FPE_ZERO) exc_set |= _FPU_MASK_ZM;
168 if (trap & GFC_FPE_OVERFLOW) exc_set |= _FPU_MASK_OM;
169 if (trap & GFC_FPE_UNDERFLOW) exc_set |= _FPU_MASK_UM;
170 if (trap & GFC_FPE_INEXACT) exc_set |= _FPU_MASK_PM;
171
172 if (notrap & GFC_FPE_INVALID) exc_clr |= _FPU_MASK_IM;
173 if (notrap & GFC_FPE_DENORMAL) exc_clr |= _FPU_MASK_DM;
174 if (notrap & GFC_FPE_ZERO) exc_clr |= _FPU_MASK_ZM;
175 if (notrap & GFC_FPE_OVERFLOW) exc_clr |= _FPU_MASK_OM;
176 if (notrap & GFC_FPE_UNDERFLOW) exc_clr |= _FPU_MASK_UM;
177 if (notrap & GFC_FPE_INEXACT) exc_clr |= _FPU_MASK_PM;
178
179 __asm__ __volatile__ ("fstcw\t%0" : "=m" (cw));
180
181 cw |= exc_clr;
182 cw &= ~exc_set;
183
184 __asm__ __volatile__ ("fnclex\n\tfldcw\t%0" : : "m" (cw));
185
186 if (has_sse())
187 {
188 unsigned int cw_sse;
189
190 __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
191
192 /* The SSE exception masks are shifted by 7 bits. */
193 cw_sse |= (exc_clr << 7);
194 cw_sse &= ~(exc_set << 7);
195
196 /* Clear stalled exception flags. */
197 cw_sse &= ~_FPU_EX_ALL;
198
199 __asm__ __volatile__ ("%vldmxcsr\t%0" : : "m" (cw_sse));
200 }
201 }
202
203 void
set_fpu(void)204 set_fpu (void)
205 {
206 set_fpu_trap_exceptions (options.fpe, 0);
207 }
208
209 int
get_fpu_trap_exceptions(void)210 get_fpu_trap_exceptions (void)
211 {
212 unsigned short cw;
213 int mask;
214 int res = 0;
215
216 __asm__ __volatile__ ("fstcw\t%0" : "=m" (cw));
217 mask = cw;
218
219 if (has_sse())
220 {
221 unsigned int cw_sse;
222
223 __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
224
225 /* The SSE exception masks are shifted by 7 bits. */
226 mask |= (cw_sse >> 7);
227 }
228
229 mask = ~mask & _FPU_MASK_ALL;
230
231 if (mask & _FPU_MASK_IM) res |= GFC_FPE_INVALID;
232 if (mask & _FPU_MASK_DM) res |= GFC_FPE_DENORMAL;
233 if (mask & _FPU_MASK_ZM) res |= GFC_FPE_ZERO;
234 if (mask & _FPU_MASK_OM) res |= GFC_FPE_OVERFLOW;
235 if (mask & _FPU_MASK_UM) res |= GFC_FPE_UNDERFLOW;
236 if (mask & _FPU_MASK_PM) res |= GFC_FPE_INEXACT;
237
238 return res;
239 }
240
241 int
support_fpu_trap(int flag)242 support_fpu_trap (int flag __attribute__((unused)))
243 {
244 return 1;
245 }
246
247 int
get_fpu_except_flags(void)248 get_fpu_except_flags (void)
249 {
250 unsigned short cw;
251 int excepts;
252 int res = 0;
253
254 __asm__ __volatile__ ("fnstsw\t%0" : "=am" (cw));
255 excepts = cw;
256
257 if (has_sse())
258 {
259 unsigned int cw_sse;
260
261 __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
262 excepts |= cw_sse;
263 }
264
265 excepts &= _FPU_EX_ALL;
266
267 if (excepts & _FPU_MASK_IM) res |= GFC_FPE_INVALID;
268 if (excepts & _FPU_MASK_DM) res |= GFC_FPE_DENORMAL;
269 if (excepts & _FPU_MASK_ZM) res |= GFC_FPE_ZERO;
270 if (excepts & _FPU_MASK_OM) res |= GFC_FPE_OVERFLOW;
271 if (excepts & _FPU_MASK_UM) res |= GFC_FPE_UNDERFLOW;
272 if (excepts & _FPU_MASK_PM) res |= GFC_FPE_INEXACT;
273
274 return res;
275 }
276
277 void
set_fpu_except_flags(int set,int clear)278 set_fpu_except_flags (int set, int clear)
279 {
280 my_fenv_t temp;
281 int exc_set = 0, exc_clr = 0;
282
283 /* Translate from GFC_PE_* values to _FPU_MASK_* values. */
284 if (set & GFC_FPE_INVALID)
285 exc_set |= _FPU_MASK_IM;
286 if (clear & GFC_FPE_INVALID)
287 exc_clr |= _FPU_MASK_IM;
288
289 if (set & GFC_FPE_DENORMAL)
290 exc_set |= _FPU_MASK_DM;
291 if (clear & GFC_FPE_DENORMAL)
292 exc_clr |= _FPU_MASK_DM;
293
294 if (set & GFC_FPE_ZERO)
295 exc_set |= _FPU_MASK_ZM;
296 if (clear & GFC_FPE_ZERO)
297 exc_clr |= _FPU_MASK_ZM;
298
299 if (set & GFC_FPE_OVERFLOW)
300 exc_set |= _FPU_MASK_OM;
301 if (clear & GFC_FPE_OVERFLOW)
302 exc_clr |= _FPU_MASK_OM;
303
304 if (set & GFC_FPE_UNDERFLOW)
305 exc_set |= _FPU_MASK_UM;
306 if (clear & GFC_FPE_UNDERFLOW)
307 exc_clr |= _FPU_MASK_UM;
308
309 if (set & GFC_FPE_INEXACT)
310 exc_set |= _FPU_MASK_PM;
311 if (clear & GFC_FPE_INEXACT)
312 exc_clr |= _FPU_MASK_PM;
313
314
315 /* Change the flags. This is tricky on 387 (unlike SSE), because we have
316 FNSTSW but no FLDSW instruction. */
317 __asm__ __volatile__ ("fnstenv\t%0" : "=m" (temp));
318 temp.__status_word &= ~exc_clr;
319 __asm__ __volatile__ ("fldenv\t%0" : : "m" (temp));
320
321 /* Change the flags on SSE. */
322
323 if (has_sse())
324 {
325 unsigned int cw_sse;
326
327 __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
328 cw_sse &= ~exc_clr;
329 __asm__ __volatile__ ("%vldmxcsr\t%0" : : "m" (cw_sse));
330 }
331
332 local_feraiseexcept (exc_set);
333 }
334
335 int
support_fpu_flag(int flag)336 support_fpu_flag (int flag __attribute__((unused)))
337 {
338 return 1;
339 }
340
341 void
set_fpu_rounding_mode(int round)342 set_fpu_rounding_mode (int round)
343 {
344 int round_mode;
345 unsigned short cw;
346
347 switch (round)
348 {
349 case GFC_FPE_TONEAREST:
350 round_mode = _FPU_RC_NEAREST;
351 break;
352 case GFC_FPE_UPWARD:
353 round_mode = _FPU_RC_UP;
354 break;
355 case GFC_FPE_DOWNWARD:
356 round_mode = _FPU_RC_DOWN;
357 break;
358 case GFC_FPE_TOWARDZERO:
359 round_mode = _FPU_RC_ZERO;
360 break;
361 default:
362 return; /* Should be unreachable. */
363 }
364
365 __asm__ __volatile__ ("fnstcw\t%0" : "=m" (cw));
366
367 /* The x87 round control bits are shifted by 10 bits. */
368 cw &= ~(_FPU_RC_MASK << 10);
369 cw |= round_mode << 10;
370
371 __asm__ __volatile__ ("fldcw\t%0" : : "m" (cw));
372
373 if (has_sse())
374 {
375 unsigned int cw_sse;
376
377 __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
378
379 /* The SSE round control bits are shifted by 13 bits. */
380 cw_sse &= ~(_FPU_RC_MASK << 13);
381 cw_sse |= round_mode << 13;
382
383 __asm__ __volatile__ ("%vldmxcsr\t%0" : : "m" (cw_sse));
384 }
385 }
386
387 int
get_fpu_rounding_mode(void)388 get_fpu_rounding_mode (void)
389 {
390 int round_mode;
391
392 #ifdef __SSE_MATH__
393 unsigned int cw;
394
395 __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw));
396
397 /* The SSE round control bits are shifted by 13 bits. */
398 round_mode = cw >> 13;
399 #else
400 unsigned short cw;
401
402 __asm__ __volatile__ ("fnstcw\t%0" : "=m" (cw));
403
404 /* The x87 round control bits are shifted by 10 bits. */
405 round_mode = cw >> 10;
406 #endif
407
408 round_mode &= _FPU_RC_MASK;
409
410 switch (round_mode)
411 {
412 case _FPU_RC_NEAREST:
413 return GFC_FPE_TONEAREST;
414 case _FPU_RC_UP:
415 return GFC_FPE_UPWARD;
416 case _FPU_RC_DOWN:
417 return GFC_FPE_DOWNWARD;
418 case _FPU_RC_ZERO:
419 return GFC_FPE_TOWARDZERO;
420 default:
421 return 0; /* Should be unreachable. */
422 }
423 }
424
425 int
support_fpu_rounding_mode(int mode)426 support_fpu_rounding_mode (int mode __attribute__((unused)))
427 {
428 return 1;
429 }
430
431 void
get_fpu_state(void * state)432 get_fpu_state (void *state)
433 {
434 my_fenv_t *envp = state;
435
436 __asm__ __volatile__ ("fnstenv\t%0" : "=m" (*envp));
437
438 /* fnstenv has the side effect of masking all exceptions, so we need
439 to restore the control word after that. */
440 __asm__ __volatile__ ("fldcw\t%0" : : "m" (envp->__control_word));
441
442 if (has_sse())
443 __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (envp->__mxcsr));
444 }
445
446 void
set_fpu_state(void * state)447 set_fpu_state (void *state)
448 {
449 my_fenv_t *envp = state;
450
451 /* glibc sources (sysdeps/x86_64/fpu/fesetenv.c) do something more
452 complex than this, but I think it suffices in our case. */
453 __asm__ __volatile__ ("fldenv\t%0" : : "m" (*envp));
454
455 if (has_sse())
456 __asm__ __volatile__ ("%vldmxcsr\t%0" : : "m" (envp->__mxcsr));
457 }
458
459
460 int
support_fpu_underflow_control(int kind)461 support_fpu_underflow_control (int kind)
462 {
463 if (!has_sse())
464 return 0;
465
466 return (kind == 4 || kind == 8) ? 1 : 0;
467 }
468
469
470 int
get_fpu_underflow_mode(void)471 get_fpu_underflow_mode (void)
472 {
473 unsigned int cw_sse;
474
475 if (!has_sse())
476 return 1;
477
478 __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
479
480 /* Return 0 for abrupt underflow (flush to zero), 1 for gradual underflow. */
481 return (cw_sse & MXCSR_FTZ) ? 0 : 1;
482 }
483
484
485 void
set_fpu_underflow_mode(int gradual)486 set_fpu_underflow_mode (int gradual)
487 {
488 unsigned int cw_sse;
489
490 if (!has_sse())
491 return;
492
493 __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
494
495 if (gradual)
496 cw_sse &= ~MXCSR_FTZ;
497 else
498 cw_sse |= MXCSR_FTZ;
499
500 __asm__ __volatile__ ("%vldmxcsr\t%0" : : "m" (cw_sse));
501 }
502
503