1 /* 2 * Copyright (C) 2005-2013 Free Software Foundation, Inc. 3 * 4 * This file is free software; you can redistribute it and/or modify it 5 * under the terms of the GNU General Public License as published by the 6 * Free Software Foundation; either version 3, or (at your option) any 7 * later version. 8 * 9 * This file is distributed in the hope that it will be useful, but 10 * WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 12 * General Public License for more details. 13 * 14 * Under Section 7 of GPL version 3, you are granted additional 15 * permissions described in the GCC Runtime Library Exception, version 16 * 3.1, as published by the Free Software Foundation. 17 * 18 * You should have received a copy of the GNU General Public License and 19 * a copy of the GCC Runtime Library Exception along with this program; 20 * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 21 * <http://www.gnu.org/licenses/>. 22 */ 23 24 #define MXCSR_DAZ (1 << 6) /* Enable denormals are zero mode */ 25 #define MXCSR_FTZ (1 << 15) /* Enable flush to zero mode */ 26 27 #ifndef __x86_64__ 28 /* All 64-bit targets have SSE and DAZ; 29 only check them explicitly for 32-bit ones. */ 30 #include "cpuid.h" 31 #endif 32 33 #if !defined __x86_64__ && defined __sun__ && defined __svr4__ 34 #include <signal.h> 35 #include <ucontext.h> 36 37 static volatile sig_atomic_t sigill_caught; 38 39 static void 40 sigill_hdlr (int sig __attribute((unused)), 41 siginfo_t *sip __attribute__((unused)), 42 ucontext_t *ucp) 43 { 44 sigill_caught = 1; 45 /* Set PC to the instruction after the faulting one to skip over it, 46 otherwise we enter an infinite loop. 3 is the size of the movaps 47 instruction. */ 48 ucp->uc_mcontext.gregs[EIP] += 3; 49 setcontext (ucp); 50 } 51 #endif 52 53 static void __attribute__((constructor)) 54 #ifndef __x86_64__ 55 /* The i386 ABI only requires 4-byte stack alignment, so this is necessary 56 to make sure the fxsave struct gets correct alignment. 57 See PR27537 and PR28621. */ 58 __attribute__ ((force_align_arg_pointer)) 59 #endif 60 set_fast_math (void) 61 { 62 #ifndef __x86_64__ 63 unsigned int eax, ebx, ecx, edx; 64 65 if (!__get_cpuid (1, &eax, &ebx, &ecx, &edx)) 66 return; 67 68 if (edx & bit_SSE) 69 { 70 unsigned int mxcsr; 71 72 #if defined __sun__ && defined __svr4__ 73 /* Solaris 2 before Solaris 9 4/04 cannot execute SSE instructions even 74 if the CPU supports them. Programs receive SIGILL instead, so check 75 for that at runtime. */ 76 struct sigaction act, oact; 77 78 act.sa_handler = sigill_hdlr; 79 sigemptyset (&act.sa_mask); 80 /* Need to set SA_SIGINFO so a ucontext_t * is passed to the handler. */ 81 act.sa_flags = SA_SIGINFO; 82 sigaction (SIGILL, &act, &oact); 83 84 /* We need a single SSE instruction here so the handler can safely skip 85 over it. */ 86 __asm__ volatile ("movaps %xmm0,%xmm0"); 87 88 sigaction (SIGILL, &oact, NULL); 89 90 if (sigill_caught) 91 return; 92 #endif /* __sun__ && __svr4__ */ 93 94 if (edx & bit_FXSAVE) 95 { 96 /* Check if DAZ is available. */ 97 struct 98 { 99 unsigned short cwd; 100 unsigned short swd; 101 unsigned short twd; 102 unsigned short fop; 103 unsigned int fip; 104 unsigned int fcs; 105 unsigned int foo; 106 unsigned int fos; 107 unsigned int mxcsr; 108 unsigned int mxcsr_mask; 109 unsigned int st_space[32]; 110 unsigned int xmm_space[32]; 111 unsigned int padding[56]; 112 } __attribute__ ((aligned (16))) fxsave; 113 114 /* This is necessary since some implementations of FXSAVE 115 do not modify reserved areas within the image. */ 116 fxsave.mxcsr_mask = 0; 117 118 __builtin_ia32_fxsave (&fxsave); 119 120 mxcsr = fxsave.mxcsr; 121 122 if (fxsave.mxcsr_mask & MXCSR_DAZ) 123 mxcsr |= MXCSR_DAZ; 124 } 125 else 126 mxcsr = __builtin_ia32_stmxcsr (); 127 128 mxcsr |= MXCSR_FTZ; 129 __builtin_ia32_ldmxcsr (mxcsr); 130 } 131 #else 132 unsigned int mxcsr = __builtin_ia32_stmxcsr (); 133 mxcsr |= MXCSR_DAZ | MXCSR_FTZ; 134 __builtin_ia32_ldmxcsr (mxcsr); 135 #endif 136 } 137