1 /* FPU-related code for x86 and x86_64 processors. 2 Copyright (C) 2005-2020 Free Software Foundation, Inc. 3 Contributed by Francois-Xavier Coudert <coudert@clipper.ens.fr> 4 5 This file is part of the GNU Fortran 95 runtime library (libgfortran). 6 7 Libgfortran is free software; you can redistribute it and/or 8 modify it under the terms of the GNU General Public 9 License as published by the Free Software Foundation; either 10 version 3 of the License, or (at your option) any later version. 11 12 Libgfortran is distributed in the hope that it will be useful, 13 but WITHOUT ANY WARRANTY; without even the implied warranty of 14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 GNU General Public License for more details. 16 17 Under Section 7 of GPL version 3, you are granted additional 18 permissions described in the GCC Runtime Library Exception, version 19 3.1, as published by the Free Software Foundation. 20 21 You should have received a copy of the GNU General Public License and 22 a copy of the GCC Runtime Library Exception along with this program; 23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 24 <http://www.gnu.org/licenses/>. */ 25 26 #ifndef __SSE_MATH__ 27 #include "cpuid.h" 28 #endif 29 30 static int 31 has_sse (void) 32 { 33 #ifndef __SSE_MATH__ 34 unsigned int eax, ebx, ecx, edx; 35 36 if (!__get_cpuid (1, &eax, &ebx, &ecx, &edx)) 37 return 0; 38 39 return edx & bit_SSE; 40 #else 41 return 1; 42 #endif 43 } 44 45 /* i387 exceptions -- see linux <fpu_control.h> header file for details. */ 46 #define _FPU_MASK_IM 0x01 47 #define _FPU_MASK_DM 0x02 48 #define _FPU_MASK_ZM 0x04 49 #define _FPU_MASK_OM 0x08 50 #define _FPU_MASK_UM 0x10 51 #define _FPU_MASK_PM 0x20 52 #define _FPU_MASK_ALL 0x3f 53 54 #define _FPU_EX_ALL 0x3f 55 56 /* i387 rounding modes. */ 57 58 #define _FPU_RC_NEAREST 0x0 59 #define _FPU_RC_DOWN 0x1 60 #define _FPU_RC_UP 0x2 61 #define _FPU_RC_ZERO 0x3 62 63 #define _FPU_RC_MASK 0x3 64 65 /* Enable flush to zero mode. */ 66 67 #define MXCSR_FTZ (1 << 15) 68 69 70 /* This structure corresponds to the layout of the block 71 written by FSTENV. */ 72 typedef struct 73 { 74 unsigned short int __control_word; 75 unsigned short int __unused1; 76 unsigned short int __status_word; 77 unsigned short int __unused2; 78 unsigned short int __tags; 79 unsigned short int __unused3; 80 unsigned int __eip; 81 unsigned short int __cs_selector; 82 unsigned short int __opcode; 83 unsigned int __data_offset; 84 unsigned short int __data_selector; 85 unsigned short int __unused5; 86 unsigned int __mxcsr; 87 } 88 my_fenv_t; 89 90 /* Check we can actually store the FPU state in the allocated size. */ 91 _Static_assert (sizeof(my_fenv_t) <= (size_t) GFC_FPE_STATE_BUFFER_SIZE, 92 "GFC_FPE_STATE_BUFFER_SIZE is too small"); 93 94 95 /* Raise the supported floating-point exceptions from EXCEPTS. Other 96 bits in EXCEPTS are ignored. Code originally borrowed from 97 libatomic/config/x86/fenv.c. */ 98 99 static void 100 local_feraiseexcept (int excepts) 101 { 102 if (excepts & _FPU_MASK_IM) 103 { 104 float f = 0.0f; 105 #ifdef __SSE_MATH__ 106 __asm__ __volatile__ ("%vdivss\t{%0, %d0|%d0, %0}" : "+x" (f)); 107 #else 108 __asm__ __volatile__ ("fdiv\t{%y0, %0|%0, %y0}" : "+t" (f)); 109 /* No need for fwait, exception is triggered by emitted fstp. */ 110 #endif 111 } 112 if (excepts & _FPU_MASK_DM) 113 { 114 my_fenv_t temp; 115 __asm__ __volatile__ ("fnstenv\t%0" : "=m" (temp)); 116 temp.__status_word |= _FPU_MASK_DM; 117 __asm__ __volatile__ ("fldenv\t%0" : : "m" (temp)); 118 __asm__ __volatile__ ("fwait"); 119 } 120 if (excepts & _FPU_MASK_ZM) 121 { 122 float f = 1.0f, g = 0.0f; 123 #ifdef __SSE_MATH__ 124 __asm__ __volatile__ ("%vdivss\t{%1, %d0|%d0, %1}" : "+x" (f) : "xm" (g)); 125 #else 126 __asm__ __volatile__ ("fdivs\t%1" : "+t" (f) : "m" (g)); 127 /* No need for fwait, exception is triggered by emitted fstp. */ 128 #endif 129 } 130 if (excepts & _FPU_MASK_OM) 131 { 132 my_fenv_t temp; 133 __asm__ __volatile__ ("fnstenv\t%0" : "=m" (temp)); 134 temp.__status_word |= _FPU_MASK_OM; 135 __asm__ __volatile__ ("fldenv\t%0" : : "m" (temp)); 136 __asm__ __volatile__ ("fwait"); 137 } 138 if (excepts & _FPU_MASK_UM) 139 { 140 my_fenv_t temp; 141 __asm__ __volatile__ ("fnstenv\t%0" : "=m" (temp)); 142 temp.__status_word |= _FPU_MASK_UM; 143 __asm__ __volatile__ ("fldenv\t%0" : : "m" (temp)); 144 __asm__ __volatile__ ("fwait"); 145 } 146 if (excepts & _FPU_MASK_PM) 147 { 148 float f = 1.0f, g = 3.0f; 149 #ifdef __SSE_MATH__ 150 __asm__ __volatile__ ("%vdivss\t{%1, %d0|%d0, %1}" : "+x" (f) : "xm" (g)); 151 #else 152 __asm__ __volatile__ ("fdivs\t%1" : "+t" (f) : "m" (g)); 153 /* No need for fwait, exception is triggered by emitted fstp. */ 154 #endif 155 } 156 } 157 158 159 void 160 set_fpu_trap_exceptions (int trap, int notrap) 161 { 162 int exc_set = 0, exc_clr = 0; 163 unsigned short cw; 164 165 if (trap & GFC_FPE_INVALID) exc_set |= _FPU_MASK_IM; 166 if (trap & GFC_FPE_DENORMAL) exc_set |= _FPU_MASK_DM; 167 if (trap & GFC_FPE_ZERO) exc_set |= _FPU_MASK_ZM; 168 if (trap & GFC_FPE_OVERFLOW) exc_set |= _FPU_MASK_OM; 169 if (trap & GFC_FPE_UNDERFLOW) exc_set |= _FPU_MASK_UM; 170 if (trap & GFC_FPE_INEXACT) exc_set |= _FPU_MASK_PM; 171 172 if (notrap & GFC_FPE_INVALID) exc_clr |= _FPU_MASK_IM; 173 if (notrap & GFC_FPE_DENORMAL) exc_clr |= _FPU_MASK_DM; 174 if (notrap & GFC_FPE_ZERO) exc_clr |= _FPU_MASK_ZM; 175 if (notrap & GFC_FPE_OVERFLOW) exc_clr |= _FPU_MASK_OM; 176 if (notrap & GFC_FPE_UNDERFLOW) exc_clr |= _FPU_MASK_UM; 177 if (notrap & GFC_FPE_INEXACT) exc_clr |= _FPU_MASK_PM; 178 179 __asm__ __volatile__ ("fstcw\t%0" : "=m" (cw)); 180 181 cw |= exc_clr; 182 cw &= ~exc_set; 183 184 __asm__ __volatile__ ("fnclex\n\tfldcw\t%0" : : "m" (cw)); 185 186 if (has_sse()) 187 { 188 unsigned int cw_sse; 189 190 __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse)); 191 192 /* The SSE exception masks are shifted by 7 bits. */ 193 cw_sse |= (exc_clr << 7); 194 cw_sse &= ~(exc_set << 7); 195 196 /* Clear stalled exception flags. */ 197 cw_sse &= ~_FPU_EX_ALL; 198 199 __asm__ __volatile__ ("%vldmxcsr\t%0" : : "m" (cw_sse)); 200 } 201 } 202 203 void 204 set_fpu (void) 205 { 206 set_fpu_trap_exceptions (options.fpe, 0); 207 } 208 209 int 210 get_fpu_trap_exceptions (void) 211 { 212 unsigned short cw; 213 int mask; 214 int res = 0; 215 216 __asm__ __volatile__ ("fstcw\t%0" : "=m" (cw)); 217 mask = cw; 218 219 if (has_sse()) 220 { 221 unsigned int cw_sse; 222 223 __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse)); 224 225 /* The SSE exception masks are shifted by 7 bits. */ 226 mask |= (cw_sse >> 7); 227 } 228 229 mask = ~mask & _FPU_MASK_ALL; 230 231 if (mask & _FPU_MASK_IM) res |= GFC_FPE_INVALID; 232 if (mask & _FPU_MASK_DM) res |= GFC_FPE_DENORMAL; 233 if (mask & _FPU_MASK_ZM) res |= GFC_FPE_ZERO; 234 if (mask & _FPU_MASK_OM) res |= GFC_FPE_OVERFLOW; 235 if (mask & _FPU_MASK_UM) res |= GFC_FPE_UNDERFLOW; 236 if (mask & _FPU_MASK_PM) res |= GFC_FPE_INEXACT; 237 238 return res; 239 } 240 241 int 242 support_fpu_trap (int flag __attribute__((unused))) 243 { 244 return 1; 245 } 246 247 int 248 get_fpu_except_flags (void) 249 { 250 unsigned short cw; 251 int excepts; 252 int res = 0; 253 254 __asm__ __volatile__ ("fnstsw\t%0" : "=am" (cw)); 255 excepts = cw; 256 257 if (has_sse()) 258 { 259 unsigned int cw_sse; 260 261 __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse)); 262 excepts |= cw_sse; 263 } 264 265 excepts &= _FPU_EX_ALL; 266 267 if (excepts & _FPU_MASK_IM) res |= GFC_FPE_INVALID; 268 if (excepts & _FPU_MASK_DM) res |= GFC_FPE_DENORMAL; 269 if (excepts & _FPU_MASK_ZM) res |= GFC_FPE_ZERO; 270 if (excepts & _FPU_MASK_OM) res |= GFC_FPE_OVERFLOW; 271 if (excepts & _FPU_MASK_UM) res |= GFC_FPE_UNDERFLOW; 272 if (excepts & _FPU_MASK_PM) res |= GFC_FPE_INEXACT; 273 274 return res; 275 } 276 277 void 278 set_fpu_except_flags (int set, int clear) 279 { 280 my_fenv_t temp; 281 int exc_set = 0, exc_clr = 0; 282 283 /* Translate from GFC_PE_* values to _FPU_MASK_* values. */ 284 if (set & GFC_FPE_INVALID) 285 exc_set |= _FPU_MASK_IM; 286 if (clear & GFC_FPE_INVALID) 287 exc_clr |= _FPU_MASK_IM; 288 289 if (set & GFC_FPE_DENORMAL) 290 exc_set |= _FPU_MASK_DM; 291 if (clear & GFC_FPE_DENORMAL) 292 exc_clr |= _FPU_MASK_DM; 293 294 if (set & GFC_FPE_ZERO) 295 exc_set |= _FPU_MASK_ZM; 296 if (clear & GFC_FPE_ZERO) 297 exc_clr |= _FPU_MASK_ZM; 298 299 if (set & GFC_FPE_OVERFLOW) 300 exc_set |= _FPU_MASK_OM; 301 if (clear & GFC_FPE_OVERFLOW) 302 exc_clr |= _FPU_MASK_OM; 303 304 if (set & GFC_FPE_UNDERFLOW) 305 exc_set |= _FPU_MASK_UM; 306 if (clear & GFC_FPE_UNDERFLOW) 307 exc_clr |= _FPU_MASK_UM; 308 309 if (set & GFC_FPE_INEXACT) 310 exc_set |= _FPU_MASK_PM; 311 if (clear & GFC_FPE_INEXACT) 312 exc_clr |= _FPU_MASK_PM; 313 314 315 /* Change the flags. This is tricky on 387 (unlike SSE), because we have 316 FNSTSW but no FLDSW instruction. */ 317 __asm__ __volatile__ ("fnstenv\t%0" : "=m" (temp)); 318 temp.__status_word &= ~exc_clr; 319 __asm__ __volatile__ ("fldenv\t%0" : : "m" (temp)); 320 321 /* Change the flags on SSE. */ 322 323 if (has_sse()) 324 { 325 unsigned int cw_sse; 326 327 __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse)); 328 cw_sse &= ~exc_clr; 329 __asm__ __volatile__ ("%vldmxcsr\t%0" : : "m" (cw_sse)); 330 } 331 332 local_feraiseexcept (exc_set); 333 } 334 335 int 336 support_fpu_flag (int flag __attribute__((unused))) 337 { 338 return 1; 339 } 340 341 void 342 set_fpu_rounding_mode (int round) 343 { 344 int round_mode; 345 unsigned short cw; 346 347 switch (round) 348 { 349 case GFC_FPE_TONEAREST: 350 round_mode = _FPU_RC_NEAREST; 351 break; 352 case GFC_FPE_UPWARD: 353 round_mode = _FPU_RC_UP; 354 break; 355 case GFC_FPE_DOWNWARD: 356 round_mode = _FPU_RC_DOWN; 357 break; 358 case GFC_FPE_TOWARDZERO: 359 round_mode = _FPU_RC_ZERO; 360 break; 361 default: 362 return; /* Should be unreachable. */ 363 } 364 365 __asm__ __volatile__ ("fnstcw\t%0" : "=m" (cw)); 366 367 /* The x87 round control bits are shifted by 10 bits. */ 368 cw &= ~(_FPU_RC_MASK << 10); 369 cw |= round_mode << 10; 370 371 __asm__ __volatile__ ("fldcw\t%0" : : "m" (cw)); 372 373 if (has_sse()) 374 { 375 unsigned int cw_sse; 376 377 __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse)); 378 379 /* The SSE round control bits are shifted by 13 bits. */ 380 cw_sse &= ~(_FPU_RC_MASK << 13); 381 cw_sse |= round_mode << 13; 382 383 __asm__ __volatile__ ("%vldmxcsr\t%0" : : "m" (cw_sse)); 384 } 385 } 386 387 int 388 get_fpu_rounding_mode (void) 389 { 390 int round_mode; 391 392 #ifdef __SSE_MATH__ 393 unsigned int cw; 394 395 __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw)); 396 397 /* The SSE round control bits are shifted by 13 bits. */ 398 round_mode = cw >> 13; 399 #else 400 unsigned short cw; 401 402 __asm__ __volatile__ ("fnstcw\t%0" : "=m" (cw)); 403 404 /* The x87 round control bits are shifted by 10 bits. */ 405 round_mode = cw >> 10; 406 #endif 407 408 round_mode &= _FPU_RC_MASK; 409 410 switch (round_mode) 411 { 412 case _FPU_RC_NEAREST: 413 return GFC_FPE_TONEAREST; 414 case _FPU_RC_UP: 415 return GFC_FPE_UPWARD; 416 case _FPU_RC_DOWN: 417 return GFC_FPE_DOWNWARD; 418 case _FPU_RC_ZERO: 419 return GFC_FPE_TOWARDZERO; 420 default: 421 return 0; /* Should be unreachable. */ 422 } 423 } 424 425 int 426 support_fpu_rounding_mode (int mode __attribute__((unused))) 427 { 428 return 1; 429 } 430 431 void 432 get_fpu_state (void *state) 433 { 434 my_fenv_t *envp = state; 435 436 __asm__ __volatile__ ("fnstenv\t%0" : "=m" (*envp)); 437 438 /* fnstenv has the side effect of masking all exceptions, so we need 439 to restore the control word after that. */ 440 __asm__ __volatile__ ("fldcw\t%0" : : "m" (envp->__control_word)); 441 442 if (has_sse()) 443 __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (envp->__mxcsr)); 444 } 445 446 void 447 set_fpu_state (void *state) 448 { 449 my_fenv_t *envp = state; 450 451 /* glibc sources (sysdeps/x86_64/fpu/fesetenv.c) do something more 452 complex than this, but I think it suffices in our case. */ 453 __asm__ __volatile__ ("fldenv\t%0" : : "m" (*envp)); 454 455 if (has_sse()) 456 __asm__ __volatile__ ("%vldmxcsr\t%0" : : "m" (envp->__mxcsr)); 457 } 458 459 460 int 461 support_fpu_underflow_control (int kind) 462 { 463 if (!has_sse()) 464 return 0; 465 466 return (kind == 4 || kind == 8) ? 1 : 0; 467 } 468 469 470 int 471 get_fpu_underflow_mode (void) 472 { 473 unsigned int cw_sse; 474 475 if (!has_sse()) 476 return 1; 477 478 __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse)); 479 480 /* Return 0 for abrupt underflow (flush to zero), 1 for gradual underflow. */ 481 return (cw_sse & MXCSR_FTZ) ? 0 : 1; 482 } 483 484 485 void 486 set_fpu_underflow_mode (int gradual) 487 { 488 unsigned int cw_sse; 489 490 if (!has_sse()) 491 return; 492 493 __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse)); 494 495 if (gradual) 496 cw_sse &= ~MXCSR_FTZ; 497 else 498 cw_sse |= MXCSR_FTZ; 499 500 __asm__ __volatile__ ("%vldmxcsr\t%0" : : "m" (cw_sse)); 501 } 502 503