xref: /netbsd-src/external/gpl3/gcc/dist/libphobos/src/std/math/hardware.d (revision b1e838363e3c6fc78a55519254d99869742dd33c)
1*b1e83836Smrg // Written in the D programming language.
2*b1e83836Smrg 
3*b1e83836Smrg /**
4*b1e83836Smrg This is a submodule of $(MREF std, math).
5*b1e83836Smrg 
6*b1e83836Smrg It contains hardware support for floating point numbers.
7*b1e83836Smrg 
8*b1e83836Smrg Copyright: Copyright The D Language Foundation 2000 - 2011.
9*b1e83836Smrg License:   $(HTTP www.boost.org/LICENSE_1_0.txt, Boost License 1.0).
10*b1e83836Smrg Authors:   $(HTTP digitalmars.com, Walter Bright), Don Clugston,
11*b1e83836Smrg            Conversion of CEPHES math library to D by Iain Buclaw and David Nadlinger
12*b1e83836Smrg Source: $(PHOBOSSRC std/math/hardware.d)
13*b1e83836Smrg  */
14*b1e83836Smrg 
15*b1e83836Smrg /* NOTE: This file has been patched from the original DMD distribution to
16*b1e83836Smrg  * work with the GDC compiler.
17*b1e83836Smrg  */
18*b1e83836Smrg module std.math.hardware;
19*b1e83836Smrg 
20*b1e83836Smrg static import core.stdc.fenv;
21*b1e83836Smrg 
22*b1e83836Smrg version (X86)       version = X86_Any;
23*b1e83836Smrg version (X86_64)    version = X86_Any;
24*b1e83836Smrg version (PPC)       version = PPC_Any;
25*b1e83836Smrg version (PPC64)     version = PPC_Any;
26*b1e83836Smrg version (MIPS32)    version = MIPS_Any;
27*b1e83836Smrg version (MIPS64)    version = MIPS_Any;
28*b1e83836Smrg version (AArch64)   version = ARM_Any;
29*b1e83836Smrg version (ARM)       version = ARM_Any;
30*b1e83836Smrg version (S390)      version = IBMZ_Any;
31*b1e83836Smrg version (SPARC)     version = SPARC_Any;
32*b1e83836Smrg version (SPARC64)   version = SPARC_Any;
33*b1e83836Smrg version (SystemZ)   version = IBMZ_Any;
34*b1e83836Smrg version (RISCV32)   version = RISCV_Any;
35*b1e83836Smrg version (RISCV64)   version = RISCV_Any;
36*b1e83836Smrg 
37*b1e83836Smrg version (D_InlineAsm_X86)    version = InlineAsm_X86_Any;
38*b1e83836Smrg version (D_InlineAsm_X86_64) version = InlineAsm_X86_Any;
39*b1e83836Smrg 
40*b1e83836Smrg version (InlineAsm_X86_Any) version = InlineAsm_X87;
version(InlineAsm_X87)41*b1e83836Smrg version (InlineAsm_X87)
42*b1e83836Smrg {
43*b1e83836Smrg     static assert(real.mant_dig == 64);
44*b1e83836Smrg     version (CRuntime_Microsoft) version = InlineAsm_X87_MSVC;
45*b1e83836Smrg }
46*b1e83836Smrg 
47*b1e83836Smrg version (X86_64) version = StaticallyHaveSSE;
48*b1e83836Smrg version (X86) version (OSX) version = StaticallyHaveSSE;
49*b1e83836Smrg 
version(StaticallyHaveSSE)50*b1e83836Smrg version (StaticallyHaveSSE)
51*b1e83836Smrg {
52*b1e83836Smrg     private enum bool haveSSE = true;
53*b1e83836Smrg }
version(X86)54*b1e83836Smrg else version (X86)
55*b1e83836Smrg {
56*b1e83836Smrg     static import core.cpuid;
57*b1e83836Smrg     private alias haveSSE = core.cpuid.sse;
58*b1e83836Smrg }
59*b1e83836Smrg 
version(D_SoftFloat)60*b1e83836Smrg version (D_SoftFloat)
61*b1e83836Smrg {
62*b1e83836Smrg     // Some soft float implementations may support IEEE floating flags.
63*b1e83836Smrg     // The implementation here supports hardware flags only and is so currently
64*b1e83836Smrg     // only available for supported targets.
65*b1e83836Smrg }
66*b1e83836Smrg else version (X86_Any)   version = IeeeFlagsSupport;
67*b1e83836Smrg else version (PPC_Any)   version = IeeeFlagsSupport;
68*b1e83836Smrg else version (RISCV_Any) version = IeeeFlagsSupport;
69*b1e83836Smrg else version (MIPS_Any)  version = IeeeFlagsSupport;
70*b1e83836Smrg else version (ARM_Any)   version = IeeeFlagsSupport;
71*b1e83836Smrg 
72*b1e83836Smrg // Struct FloatingPointControl is only available if hardware FP units are available.
version(D_HardFloat)73*b1e83836Smrg version (D_HardFloat)
74*b1e83836Smrg {
75*b1e83836Smrg     // FloatingPointControl.clearExceptions() depends on version IeeeFlagsSupport
76*b1e83836Smrg     version (IeeeFlagsSupport) version = FloatingPointControlSupport;
77*b1e83836Smrg }
78*b1e83836Smrg 
version(GNU)79*b1e83836Smrg version (GNU)
80*b1e83836Smrg {
81*b1e83836Smrg     // The compiler can unexpectedly rearrange floating point operations and
82*b1e83836Smrg     // access to the floating point status flags when optimizing. This means
83*b1e83836Smrg     // ieeeFlags tests cannot be reliably checked in optimized code.
84*b1e83836Smrg     // See https://github.com/ldc-developers/ldc/issues/888
85*b1e83836Smrg }
86*b1e83836Smrg else
87*b1e83836Smrg {
88*b1e83836Smrg     version = IeeeFlagsUnittest;
89*b1e83836Smrg     version = FloatingPointControlUnittest;
90*b1e83836Smrg }
91*b1e83836Smrg 
version(IeeeFlagsSupport)92*b1e83836Smrg version (IeeeFlagsSupport)
93*b1e83836Smrg {
94*b1e83836Smrg 
95*b1e83836Smrg /** IEEE exception status flags ('sticky bits')
96*b1e83836Smrg 
97*b1e83836Smrg  These flags indicate that an exceptional floating-point condition has occurred.
98*b1e83836Smrg  They indicate that a NaN or an infinity has been generated, that a result
99*b1e83836Smrg  is inexact, or that a signalling NaN has been encountered. If floating-point
100*b1e83836Smrg  exceptions are enabled (unmasked), a hardware exception will be generated
101*b1e83836Smrg  instead of setting these flags.
102*b1e83836Smrg  */
103*b1e83836Smrg struct IeeeFlags
104*b1e83836Smrg {
105*b1e83836Smrg nothrow @nogc:
106*b1e83836Smrg 
107*b1e83836Smrg private:
108*b1e83836Smrg     // The x87 FPU status register is 16 bits.
109*b1e83836Smrg     // The Pentium SSE2 status register is 32 bits.
110*b1e83836Smrg     // The ARM and PowerPC FPSCR is a 32-bit register.
111*b1e83836Smrg     // The SPARC FSR is a 32bit register (64 bits for SPARC 7 & 8, but high bits are uninteresting).
112*b1e83836Smrg     // The RISC-V (32 & 64 bit) fcsr is 32-bit register.
113*b1e83836Smrg     uint flags;
114*b1e83836Smrg 
115*b1e83836Smrg     version (CRuntime_Microsoft)
116*b1e83836Smrg     {
117*b1e83836Smrg         // Microsoft uses hardware-incompatible custom constants in fenv.h (core.stdc.fenv).
118*b1e83836Smrg         // Applies to both x87 status word (16 bits) and SSE2 status word(32 bits).
119*b1e83836Smrg         enum : int
120*b1e83836Smrg         {
121*b1e83836Smrg             INEXACT_MASK   = 0x20,
122*b1e83836Smrg             UNDERFLOW_MASK = 0x10,
123*b1e83836Smrg             OVERFLOW_MASK  = 0x08,
124*b1e83836Smrg             DIVBYZERO_MASK = 0x04,
125*b1e83836Smrg             INVALID_MASK   = 0x01,
126*b1e83836Smrg 
127*b1e83836Smrg             EXCEPTIONS_MASK = 0b11_1111
128*b1e83836Smrg         }
129*b1e83836Smrg         // Don't bother about subnormals, they are not supported on most CPUs.
130*b1e83836Smrg         //  SUBNORMAL_MASK = 0x02;
131*b1e83836Smrg     }
132*b1e83836Smrg     else
133*b1e83836Smrg     {
134*b1e83836Smrg         enum : int
135*b1e83836Smrg         {
136*b1e83836Smrg             INEXACT_MASK    = core.stdc.fenv.FE_INEXACT,
137*b1e83836Smrg             UNDERFLOW_MASK  = core.stdc.fenv.FE_UNDERFLOW,
138*b1e83836Smrg             OVERFLOW_MASK   = core.stdc.fenv.FE_OVERFLOW,
139*b1e83836Smrg             DIVBYZERO_MASK  = core.stdc.fenv.FE_DIVBYZERO,
140*b1e83836Smrg             INVALID_MASK    = core.stdc.fenv.FE_INVALID,
141*b1e83836Smrg             EXCEPTIONS_MASK = core.stdc.fenv.FE_ALL_EXCEPT,
142*b1e83836Smrg         }
143*b1e83836Smrg     }
144*b1e83836Smrg 
145*b1e83836Smrg     static uint getIeeeFlags() @trusted pure
146*b1e83836Smrg     {
147*b1e83836Smrg         version (GNU)
148*b1e83836Smrg         {
149*b1e83836Smrg             version (X86_Any)
150*b1e83836Smrg             {
151*b1e83836Smrg                 ushort sw;
152*b1e83836Smrg                 asm pure nothrow @nogc
153*b1e83836Smrg                 {
154*b1e83836Smrg                     "fstsw %0" : "=a" (sw);
155*b1e83836Smrg                 }
156*b1e83836Smrg                 // OR the result with the SSE2 status register (MXCSR).
157*b1e83836Smrg                 if (haveSSE)
158*b1e83836Smrg                 {
159*b1e83836Smrg                     uint mxcsr;
160*b1e83836Smrg                     asm pure nothrow @nogc
161*b1e83836Smrg                     {
162*b1e83836Smrg                         "stmxcsr %0" : "=m" (mxcsr);
163*b1e83836Smrg                     }
164*b1e83836Smrg                     return (sw | mxcsr) & EXCEPTIONS_MASK;
165*b1e83836Smrg                 }
166*b1e83836Smrg                 else
167*b1e83836Smrg                     return sw & EXCEPTIONS_MASK;
168*b1e83836Smrg             }
169*b1e83836Smrg             else version (ARM)
170*b1e83836Smrg             {
171*b1e83836Smrg                 version (ARM_SoftFloat)
172*b1e83836Smrg                     return 0;
173*b1e83836Smrg                 else
174*b1e83836Smrg                 {
175*b1e83836Smrg                     uint result = void;
176*b1e83836Smrg                     asm pure nothrow @nogc
177*b1e83836Smrg                     {
178*b1e83836Smrg                         "vmrs %0, FPSCR; and %0, %0, #0x1F;" : "=r" (result);
179*b1e83836Smrg                     }
180*b1e83836Smrg                     return result;
181*b1e83836Smrg                 }
182*b1e83836Smrg             }
183*b1e83836Smrg             else version (RISCV_Any)
184*b1e83836Smrg             {
185*b1e83836Smrg                 version (D_SoftFloat)
186*b1e83836Smrg                     return 0;
187*b1e83836Smrg                 else
188*b1e83836Smrg                 {
189*b1e83836Smrg                     uint result = void;
190*b1e83836Smrg                     asm pure nothrow @nogc
191*b1e83836Smrg                     {
192*b1e83836Smrg                         "frflags %0" : "=r" (result);
193*b1e83836Smrg                     }
194*b1e83836Smrg                     return result;
195*b1e83836Smrg                 }
196*b1e83836Smrg             }
197*b1e83836Smrg             else
198*b1e83836Smrg                 assert(0, "Not yet supported");
199*b1e83836Smrg         }
200*b1e83836Smrg         else
201*b1e83836Smrg         version (InlineAsm_X86_Any)
202*b1e83836Smrg         {
203*b1e83836Smrg             ushort sw;
204*b1e83836Smrg             asm pure nothrow @nogc { fstsw sw; }
205*b1e83836Smrg 
206*b1e83836Smrg             // OR the result with the SSE2 status register (MXCSR).
207*b1e83836Smrg             if (haveSSE)
208*b1e83836Smrg             {
209*b1e83836Smrg                 uint mxcsr;
210*b1e83836Smrg                 asm pure nothrow @nogc { stmxcsr mxcsr; }
211*b1e83836Smrg                 return (sw | mxcsr) & EXCEPTIONS_MASK;
212*b1e83836Smrg             }
213*b1e83836Smrg             else return sw & EXCEPTIONS_MASK;
214*b1e83836Smrg         }
215*b1e83836Smrg         else version (SPARC)
216*b1e83836Smrg         {
217*b1e83836Smrg            /*
218*b1e83836Smrg                int retval;
219*b1e83836Smrg                asm pure nothrow @nogc { st %fsr, retval; }
220*b1e83836Smrg                return retval;
221*b1e83836Smrg             */
222*b1e83836Smrg            assert(0, "Not yet supported");
223*b1e83836Smrg         }
224*b1e83836Smrg         else version (ARM)
225*b1e83836Smrg         {
226*b1e83836Smrg             assert(false, "Not yet supported.");
227*b1e83836Smrg         }
228*b1e83836Smrg         else version (RISCV_Any)
229*b1e83836Smrg         {
230*b1e83836Smrg             mixin(`
231*b1e83836Smrg             uint result = void;
232*b1e83836Smrg             asm pure nothrow @nogc
233*b1e83836Smrg             {
234*b1e83836Smrg                 "frflags %0" : "=r" (result);
235*b1e83836Smrg             }
236*b1e83836Smrg             return result;
237*b1e83836Smrg             `);
238*b1e83836Smrg         }
239*b1e83836Smrg         else
240*b1e83836Smrg             assert(0, "Not yet supported");
241*b1e83836Smrg     }
242*b1e83836Smrg 
243*b1e83836Smrg     static void resetIeeeFlags() @trusted
244*b1e83836Smrg     {
245*b1e83836Smrg         version (GNU)
246*b1e83836Smrg         {
247*b1e83836Smrg             version (X86_Any)
248*b1e83836Smrg             {
249*b1e83836Smrg                 asm nothrow @nogc
250*b1e83836Smrg                 {
251*b1e83836Smrg                     "fnclex";
252*b1e83836Smrg                 }
253*b1e83836Smrg 
254*b1e83836Smrg                 // Also clear exception flags in MXCSR, SSE's control register.
255*b1e83836Smrg                 if (haveSSE)
256*b1e83836Smrg                 {
257*b1e83836Smrg                     uint mxcsr;
258*b1e83836Smrg                     asm nothrow @nogc
259*b1e83836Smrg                     {
260*b1e83836Smrg                         "stmxcsr %0" : "=m" (mxcsr);
261*b1e83836Smrg                     }
262*b1e83836Smrg                     mxcsr &= ~EXCEPTIONS_MASK;
263*b1e83836Smrg                     asm nothrow @nogc
264*b1e83836Smrg                     {
265*b1e83836Smrg                         "ldmxcsr %0" : : "m" (mxcsr);
266*b1e83836Smrg                     }
267*b1e83836Smrg                 }
268*b1e83836Smrg             }
269*b1e83836Smrg             else version (ARM)
270*b1e83836Smrg             {
271*b1e83836Smrg                 version (ARM_SoftFloat)
272*b1e83836Smrg                     return;
273*b1e83836Smrg                 else
274*b1e83836Smrg                 {
275*b1e83836Smrg                     uint old = FloatingPointControl.getControlState();
276*b1e83836Smrg                     old &= ~0b11111; // http://infocenter.arm.com/help/topic/com.arm.doc.ddi0408i/Chdfifdc.html
277*b1e83836Smrg                     asm nothrow @nogc
278*b1e83836Smrg                     {
279*b1e83836Smrg                         "vmsr FPSCR, %0" : : "r" (old);
280*b1e83836Smrg                     }
281*b1e83836Smrg                 }
282*b1e83836Smrg             }
283*b1e83836Smrg             else version (RISCV_Any)
284*b1e83836Smrg             {
285*b1e83836Smrg                 version (D_SoftFloat)
286*b1e83836Smrg                     return;
287*b1e83836Smrg                 else
288*b1e83836Smrg                 {
289*b1e83836Smrg                     uint newValues = 0x0;
290*b1e83836Smrg                     asm nothrow @nogc
291*b1e83836Smrg                     {
292*b1e83836Smrg                         "fsflags %0" : : "r" (newValues);
293*b1e83836Smrg                     }
294*b1e83836Smrg                 }
295*b1e83836Smrg             }
296*b1e83836Smrg             else
297*b1e83836Smrg                 assert(0, "Not yet supported");
298*b1e83836Smrg         }
299*b1e83836Smrg         else
300*b1e83836Smrg         version (InlineAsm_X86_Any)
301*b1e83836Smrg         {
302*b1e83836Smrg             asm nothrow @nogc
303*b1e83836Smrg             {
304*b1e83836Smrg                 fnclex;
305*b1e83836Smrg             }
306*b1e83836Smrg 
307*b1e83836Smrg             // Also clear exception flags in MXCSR, SSE's control register.
308*b1e83836Smrg             if (haveSSE)
309*b1e83836Smrg             {
310*b1e83836Smrg                 uint mxcsr;
311*b1e83836Smrg                 asm nothrow @nogc { stmxcsr mxcsr; }
312*b1e83836Smrg                 mxcsr &= ~EXCEPTIONS_MASK;
313*b1e83836Smrg                 asm nothrow @nogc { ldmxcsr mxcsr; }
314*b1e83836Smrg             }
315*b1e83836Smrg         }
316*b1e83836Smrg         else version (RISCV_Any)
317*b1e83836Smrg         {
318*b1e83836Smrg             mixin(`
319*b1e83836Smrg             uint newValues = 0x0;
320*b1e83836Smrg             asm pure nothrow @nogc
321*b1e83836Smrg             {
322*b1e83836Smrg                 "fsflags %0" : : "r" (newValues);
323*b1e83836Smrg             }
324*b1e83836Smrg             `);
325*b1e83836Smrg         }
326*b1e83836Smrg         else
327*b1e83836Smrg         {
328*b1e83836Smrg             /* SPARC:
329*b1e83836Smrg               int tmpval;
330*b1e83836Smrg               asm pure nothrow @nogc { st %fsr, tmpval; }
331*b1e83836Smrg               tmpval &=0xFFFF_FC00;
332*b1e83836Smrg               asm pure nothrow @nogc { ld tmpval, %fsr; }
333*b1e83836Smrg             */
334*b1e83836Smrg            assert(0, "Not yet supported");
335*b1e83836Smrg         }
336*b1e83836Smrg     }
337*b1e83836Smrg 
338*b1e83836Smrg public:
339*b1e83836Smrg     /**
340*b1e83836Smrg      * The result cannot be represented exactly, so rounding occurred.
341*b1e83836Smrg      * Example: `x = sin(0.1);`
342*b1e83836Smrg      */
343*b1e83836Smrg     @property bool inexact() @safe const { return (flags & INEXACT_MASK) != 0; }
344*b1e83836Smrg 
345*b1e83836Smrg     /**
346*b1e83836Smrg      * A zero was generated by underflow
347*b1e83836Smrg      * Example: `x = real.min*real.epsilon/2;`
348*b1e83836Smrg      */
349*b1e83836Smrg     @property bool underflow() @safe const { return (flags & UNDERFLOW_MASK) != 0; }
350*b1e83836Smrg 
351*b1e83836Smrg     /**
352*b1e83836Smrg      * An infinity was generated by overflow
353*b1e83836Smrg      * Example: `x = real.max*2;`
354*b1e83836Smrg      */
355*b1e83836Smrg     @property bool overflow() @safe const { return (flags & OVERFLOW_MASK) != 0; }
356*b1e83836Smrg 
357*b1e83836Smrg     /**
358*b1e83836Smrg      * An infinity was generated by division by zero
359*b1e83836Smrg      * Example: `x = 3/0.0;`
360*b1e83836Smrg      */
361*b1e83836Smrg     @property bool divByZero() @safe const { return (flags & DIVBYZERO_MASK) != 0; }
362*b1e83836Smrg 
363*b1e83836Smrg     /**
364*b1e83836Smrg      * A machine NaN was generated.
365*b1e83836Smrg      * Example: `x = real.infinity * 0.0;`
366*b1e83836Smrg      */
367*b1e83836Smrg     @property bool invalid() @safe const { return (flags & INVALID_MASK) != 0; }
368*b1e83836Smrg }
369*b1e83836Smrg 
370*b1e83836Smrg ///
371*b1e83836Smrg version (IeeeFlagsUnittest)
372*b1e83836Smrg @safe unittest
373*b1e83836Smrg {
374*b1e83836Smrg     import std.math.traits : isNaN;
375*b1e83836Smrg 
376*b1e83836Smrg     static void func() {
377*b1e83836Smrg         int a = 10 * 10;
378*b1e83836Smrg     }
379*b1e83836Smrg     pragma(inline, false) static void blockopt(ref real x) {}
380*b1e83836Smrg     real a = 3.5;
381*b1e83836Smrg     // Set all the flags to zero
382*b1e83836Smrg     resetIeeeFlags();
383*b1e83836Smrg     assert(!ieeeFlags.divByZero);
384*b1e83836Smrg     blockopt(a); // avoid constant propagation by the optimizer
385*b1e83836Smrg     // Perform a division by zero.
386*b1e83836Smrg     a /= 0.0L;
387*b1e83836Smrg     assert(a == real.infinity);
388*b1e83836Smrg     assert(ieeeFlags.divByZero);
389*b1e83836Smrg     blockopt(a); // avoid constant propagation by the optimizer
390*b1e83836Smrg     // Create a NaN
391*b1e83836Smrg     a *= 0.0L;
392*b1e83836Smrg     assert(ieeeFlags.invalid);
393*b1e83836Smrg     assert(isNaN(a));
394*b1e83836Smrg 
395*b1e83836Smrg     // Check that calling func() has no effect on the
396*b1e83836Smrg     // status flags.
397*b1e83836Smrg     IeeeFlags f = ieeeFlags;
398*b1e83836Smrg     func();
399*b1e83836Smrg     assert(ieeeFlags == f);
400*b1e83836Smrg }
401*b1e83836Smrg 
402*b1e83836Smrg version (IeeeFlagsUnittest)
403*b1e83836Smrg @safe unittest
404*b1e83836Smrg {
405*b1e83836Smrg     import std.meta : AliasSeq;
406*b1e83836Smrg 
407*b1e83836Smrg     static struct Test
408*b1e83836Smrg     {
409*b1e83836Smrg         void delegate() @trusted action;
410*b1e83836Smrg         bool function() @trusted ieeeCheck;
411*b1e83836Smrg     }
412*b1e83836Smrg 
413*b1e83836Smrg     static foreach (T; AliasSeq!(float, double, real))
414*b1e83836Smrg     {{
415*b1e83836Smrg         T x; /* Needs to be here to trick -O. It would optimize away the
416*b1e83836Smrg             calculations if x were local to the function literals. */
417*b1e83836Smrg         auto tests = [
418*b1e83836Smrg             Test(
419*b1e83836Smrg                 () { x = 1; x += 0.1L; },
420*b1e83836Smrg                 () => ieeeFlags.inexact
421*b1e83836Smrg             ),
422*b1e83836Smrg             Test(
423*b1e83836Smrg                 () { x = T.min_normal; x /= T.max; },
424*b1e83836Smrg                 () => ieeeFlags.underflow
425*b1e83836Smrg             ),
426*b1e83836Smrg             Test(
427*b1e83836Smrg                 () { x = T.max; x += T.max; },
428*b1e83836Smrg                 () => ieeeFlags.overflow
429*b1e83836Smrg             ),
430*b1e83836Smrg             Test(
431*b1e83836Smrg                 () { x = 1; x /= 0; },
432*b1e83836Smrg                 () => ieeeFlags.divByZero
433*b1e83836Smrg             ),
434*b1e83836Smrg             Test(
435*b1e83836Smrg                 () { x = 0; x /= 0; },
436*b1e83836Smrg                 () => ieeeFlags.invalid
437*b1e83836Smrg             )
438*b1e83836Smrg         ];
439*b1e83836Smrg         foreach (test; tests)
440*b1e83836Smrg         {
441*b1e83836Smrg             resetIeeeFlags();
442*b1e83836Smrg             assert(!test.ieeeCheck());
443*b1e83836Smrg             test.action();
444*b1e83836Smrg             assert(test.ieeeCheck());
445*b1e83836Smrg         }
446*b1e83836Smrg     }}
447*b1e83836Smrg }
448*b1e83836Smrg 
449*b1e83836Smrg /// Set all of the floating-point status flags to false.
450*b1e83836Smrg void resetIeeeFlags() @trusted nothrow @nogc
451*b1e83836Smrg {
452*b1e83836Smrg     IeeeFlags.resetIeeeFlags();
453*b1e83836Smrg }
454*b1e83836Smrg 
455*b1e83836Smrg ///
456*b1e83836Smrg @safe unittest
457*b1e83836Smrg {
458*b1e83836Smrg     pragma(inline, false) static void blockopt(ref real x) {}
459*b1e83836Smrg     resetIeeeFlags();
460*b1e83836Smrg     real a = 3.5;
461*b1e83836Smrg     blockopt(a); // avoid constant propagation by the optimizer
462*b1e83836Smrg     a /= 0.0L;
463*b1e83836Smrg     blockopt(a); // avoid constant propagation by the optimizer
464*b1e83836Smrg     assert(a == real.infinity);
465*b1e83836Smrg     assert(ieeeFlags.divByZero);
466*b1e83836Smrg 
467*b1e83836Smrg     resetIeeeFlags();
468*b1e83836Smrg     assert(!ieeeFlags.divByZero);
469*b1e83836Smrg }
470*b1e83836Smrg 
471*b1e83836Smrg /// Returns: snapshot of the current state of the floating-point status flags
472*b1e83836Smrg @property IeeeFlags ieeeFlags() @trusted pure nothrow @nogc
473*b1e83836Smrg {
474*b1e83836Smrg    return IeeeFlags(IeeeFlags.getIeeeFlags());
475*b1e83836Smrg }
476*b1e83836Smrg 
477*b1e83836Smrg ///
478*b1e83836Smrg @safe nothrow unittest
479*b1e83836Smrg {
480*b1e83836Smrg     import std.math.traits : isNaN;
481*b1e83836Smrg 
482*b1e83836Smrg     pragma(inline, false) static void blockopt(ref real x) {}
483*b1e83836Smrg     resetIeeeFlags();
484*b1e83836Smrg     real a = 3.5;
485*b1e83836Smrg     blockopt(a); // avoid constant propagation by the optimizer
486*b1e83836Smrg 
487*b1e83836Smrg     a /= 0.0L;
488*b1e83836Smrg     assert(a == real.infinity);
489*b1e83836Smrg     assert(ieeeFlags.divByZero);
490*b1e83836Smrg     blockopt(a); // avoid constant propagation by the optimizer
491*b1e83836Smrg 
492*b1e83836Smrg     a *= 0.0L;
493*b1e83836Smrg     assert(isNaN(a));
494*b1e83836Smrg     assert(ieeeFlags.invalid);
495*b1e83836Smrg }
496*b1e83836Smrg 
497*b1e83836Smrg } // IeeeFlagsSupport
498*b1e83836Smrg 
499*b1e83836Smrg 
500*b1e83836Smrg version (FloatingPointControlSupport)
501*b1e83836Smrg {
502*b1e83836Smrg 
503*b1e83836Smrg /** Control the Floating point hardware
504*b1e83836Smrg 
505*b1e83836Smrg   Change the IEEE754 floating-point rounding mode and the floating-point
506*b1e83836Smrg   hardware exceptions.
507*b1e83836Smrg 
508*b1e83836Smrg   By default, the rounding mode is roundToNearest and all hardware exceptions
509*b1e83836Smrg   are disabled. For most applications, debugging is easier if the $(I division
510*b1e83836Smrg   by zero), $(I overflow), and $(I invalid operation) exceptions are enabled.
511*b1e83836Smrg   These three are combined into a $(I severeExceptions) value for convenience.
512*b1e83836Smrg   Note in particular that if $(I invalidException) is enabled, a hardware trap
513*b1e83836Smrg   will be generated whenever an uninitialized floating-point variable is used.
514*b1e83836Smrg 
515*b1e83836Smrg   All changes are temporary. The previous state is restored at the
516*b1e83836Smrg   end of the scope.
517*b1e83836Smrg 
518*b1e83836Smrg 
519*b1e83836Smrg Example:
520*b1e83836Smrg ----
521*b1e83836Smrg {
522*b1e83836Smrg     FloatingPointControl fpctrl;
523*b1e83836Smrg 
524*b1e83836Smrg     // Enable hardware exceptions for division by zero, overflow to infinity,
525*b1e83836Smrg     // invalid operations, and uninitialized floating-point variables.
526*b1e83836Smrg     fpctrl.enableExceptions(FloatingPointControl.severeExceptions);
527*b1e83836Smrg 
528*b1e83836Smrg     // This will generate a hardware exception, if x is a
529*b1e83836Smrg     // default-initialized floating point variable:
530*b1e83836Smrg     real x; // Add `= 0` or even `= real.nan` to not throw the exception.
531*b1e83836Smrg     real y = x * 3.0;
532*b1e83836Smrg 
533*b1e83836Smrg     // The exception is only thrown for default-uninitialized NaN-s.
534*b1e83836Smrg     // NaN-s with other payload are valid:
535*b1e83836Smrg     real z = y * real.nan; // ok
536*b1e83836Smrg 
537*b1e83836Smrg     // The set hardware exceptions and rounding modes will be disabled when
538*b1e83836Smrg     // leaving this scope.
539*b1e83836Smrg }
540*b1e83836Smrg ----
541*b1e83836Smrg 
542*b1e83836Smrg  */
543*b1e83836Smrg struct FloatingPointControl
544*b1e83836Smrg {
545*b1e83836Smrg nothrow @nogc:
546*b1e83836Smrg 
547*b1e83836Smrg     alias RoundingMode = uint; ///
548*b1e83836Smrg 
549*b1e83836Smrg     version (StdDdoc)
550*b1e83836Smrg     {
551*b1e83836Smrg         enum : RoundingMode
552*b1e83836Smrg         {
553*b1e83836Smrg             /** IEEE rounding modes.
554*b1e83836Smrg              * The default mode is roundToNearest.
555*b1e83836Smrg              *
556*b1e83836Smrg              *  roundingMask = A mask of all rounding modes.
557*b1e83836Smrg              */
558*b1e83836Smrg             roundToNearest,
559*b1e83836Smrg             roundDown, /// ditto
560*b1e83836Smrg             roundUp, /// ditto
561*b1e83836Smrg             roundToZero, /// ditto
562*b1e83836Smrg             roundingMask, /// ditto
563*b1e83836Smrg         }
564*b1e83836Smrg     }
565*b1e83836Smrg     else version (CRuntime_Microsoft)
566*b1e83836Smrg     {
567*b1e83836Smrg         // Microsoft uses hardware-incompatible custom constants in fenv.h (core.stdc.fenv).
568*b1e83836Smrg         enum : RoundingMode
569*b1e83836Smrg         {
570*b1e83836Smrg             roundToNearest = 0x0000,
571*b1e83836Smrg             roundDown      = 0x0400,
572*b1e83836Smrg             roundUp        = 0x0800,
573*b1e83836Smrg             roundToZero    = 0x0C00,
574*b1e83836Smrg             roundingMask   = roundToNearest | roundDown
575*b1e83836Smrg                              | roundUp | roundToZero,
576*b1e83836Smrg         }
577*b1e83836Smrg     }
578*b1e83836Smrg     else
579*b1e83836Smrg     {
580*b1e83836Smrg         enum : RoundingMode
581*b1e83836Smrg         {
582*b1e83836Smrg             roundToNearest = core.stdc.fenv.FE_TONEAREST,
583*b1e83836Smrg             roundDown      = core.stdc.fenv.FE_DOWNWARD,
584*b1e83836Smrg             roundUp        = core.stdc.fenv.FE_UPWARD,
585*b1e83836Smrg             roundToZero    = core.stdc.fenv.FE_TOWARDZERO,
586*b1e83836Smrg             roundingMask   = roundToNearest | roundDown
587*b1e83836Smrg                              | roundUp | roundToZero,
588*b1e83836Smrg         }
589*b1e83836Smrg     }
590*b1e83836Smrg 
591*b1e83836Smrg     /***
592*b1e83836Smrg      * Change the floating-point hardware rounding mode
593*b1e83836Smrg      *
594*b1e83836Smrg      * Changing the rounding mode in the middle of a function can interfere
595*b1e83836Smrg      * with optimizations of floating point expressions, as the optimizer assumes
596*b1e83836Smrg      * that the rounding mode does not change.
597*b1e83836Smrg      * It is best to change the rounding mode only at the
598*b1e83836Smrg      * beginning of the function, and keep it until the function returns.
599*b1e83836Smrg      * It is also best to add the line:
600*b1e83836Smrg      * ---
601*b1e83836Smrg      * pragma(inline, false);
602*b1e83836Smrg      * ---
603*b1e83836Smrg      * as the first line of the function so it will not get inlined.
604*b1e83836Smrg      * Params:
605*b1e83836Smrg      *    newMode = the new rounding mode
606*b1e83836Smrg      */
607*b1e83836Smrg     @property void rounding(RoundingMode newMode) @trusted
608*b1e83836Smrg     {
609*b1e83836Smrg         initialize();
610*b1e83836Smrg         setControlState((getControlState() & (-1 - roundingMask)) | (newMode & roundingMask));
611*b1e83836Smrg     }
612*b1e83836Smrg 
613*b1e83836Smrg     /// Returns: the currently active rounding mode
614*b1e83836Smrg     @property static RoundingMode rounding() @trusted pure
615*b1e83836Smrg     {
616*b1e83836Smrg         return cast(RoundingMode)(getControlState() & roundingMask);
617*b1e83836Smrg     }
618*b1e83836Smrg 
619*b1e83836Smrg     alias ExceptionMask = uint; ///
620*b1e83836Smrg 
621*b1e83836Smrg     version (StdDdoc)
622*b1e83836Smrg     {
623*b1e83836Smrg         enum : ExceptionMask
624*b1e83836Smrg         {
625*b1e83836Smrg             /** IEEE hardware exceptions.
626*b1e83836Smrg              *  By default, all exceptions are masked (disabled).
627*b1e83836Smrg              *
628*b1e83836Smrg              *  severeExceptions = The overflow, division by zero, and invalid
629*b1e83836Smrg              *  exceptions.
630*b1e83836Smrg              */
631*b1e83836Smrg             subnormalException,
632*b1e83836Smrg             inexactException, /// ditto
633*b1e83836Smrg             underflowException, /// ditto
634*b1e83836Smrg             overflowException, /// ditto
635*b1e83836Smrg             divByZeroException, /// ditto
636*b1e83836Smrg             invalidException, /// ditto
637*b1e83836Smrg             severeExceptions, /// ditto
638*b1e83836Smrg             allExceptions, /// ditto
639*b1e83836Smrg         }
640*b1e83836Smrg     }
641*b1e83836Smrg     else version (ARM_Any)
642*b1e83836Smrg     {
643*b1e83836Smrg         enum : ExceptionMask
644*b1e83836Smrg         {
645*b1e83836Smrg             subnormalException    = 0x8000,
646*b1e83836Smrg             inexactException      = 0x1000,
647*b1e83836Smrg             underflowException    = 0x0800,
648*b1e83836Smrg             overflowException     = 0x0400,
649*b1e83836Smrg             divByZeroException    = 0x0200,
650*b1e83836Smrg             invalidException      = 0x0100,
651*b1e83836Smrg             severeExceptions   = overflowException | divByZeroException
652*b1e83836Smrg                                  | invalidException,
653*b1e83836Smrg             allExceptions      = severeExceptions | underflowException
654*b1e83836Smrg                                  | inexactException | subnormalException,
655*b1e83836Smrg         }
656*b1e83836Smrg     }
657*b1e83836Smrg     else version (PPC_Any)
658*b1e83836Smrg     {
659*b1e83836Smrg         enum : ExceptionMask
660*b1e83836Smrg         {
661*b1e83836Smrg             inexactException      = 0x0008,
662*b1e83836Smrg             divByZeroException    = 0x0010,
663*b1e83836Smrg             underflowException    = 0x0020,
664*b1e83836Smrg             overflowException     = 0x0040,
665*b1e83836Smrg             invalidException      = 0x0080,
666*b1e83836Smrg             severeExceptions   = overflowException | divByZeroException
667*b1e83836Smrg                                  | invalidException,
668*b1e83836Smrg             allExceptions      = severeExceptions | underflowException
669*b1e83836Smrg                                  | inexactException,
670*b1e83836Smrg         }
671*b1e83836Smrg     }
672*b1e83836Smrg     else version (RISCV_Any)
673*b1e83836Smrg     {
674*b1e83836Smrg         enum : ExceptionMask
675*b1e83836Smrg         {
676*b1e83836Smrg             inexactException      = 0x01,
677*b1e83836Smrg             divByZeroException    = 0x02,
678*b1e83836Smrg             underflowException    = 0x04,
679*b1e83836Smrg             overflowException     = 0x08,
680*b1e83836Smrg             invalidException      = 0x10,
681*b1e83836Smrg             severeExceptions   = overflowException | divByZeroException
682*b1e83836Smrg                                  | invalidException,
683*b1e83836Smrg             allExceptions      = severeExceptions | underflowException
684*b1e83836Smrg                                  | inexactException,
685*b1e83836Smrg         }
686*b1e83836Smrg     }
687*b1e83836Smrg     else version (HPPA)
688*b1e83836Smrg     {
689*b1e83836Smrg         enum : ExceptionMask
690*b1e83836Smrg         {
691*b1e83836Smrg             inexactException      = 0x01,
692*b1e83836Smrg             underflowException    = 0x02,
693*b1e83836Smrg             overflowException     = 0x04,
694*b1e83836Smrg             divByZeroException    = 0x08,
695*b1e83836Smrg             invalidException      = 0x10,
696*b1e83836Smrg             severeExceptions   = overflowException | divByZeroException
697*b1e83836Smrg                                  | invalidException,
698*b1e83836Smrg             allExceptions      = severeExceptions | underflowException
699*b1e83836Smrg                                  | inexactException,
700*b1e83836Smrg         }
701*b1e83836Smrg     }
702*b1e83836Smrg     else version (MIPS_Any)
703*b1e83836Smrg     {
704*b1e83836Smrg         enum : ExceptionMask
705*b1e83836Smrg         {
706*b1e83836Smrg             inexactException      = 0x0080,
707*b1e83836Smrg             divByZeroException    = 0x0400,
708*b1e83836Smrg             overflowException     = 0x0200,
709*b1e83836Smrg             underflowException    = 0x0100,
710*b1e83836Smrg             invalidException      = 0x0800,
711*b1e83836Smrg             severeExceptions   = overflowException | divByZeroException
712*b1e83836Smrg                                  | invalidException,
713*b1e83836Smrg             allExceptions      = severeExceptions | underflowException
714*b1e83836Smrg                                  | inexactException,
715*b1e83836Smrg         }
716*b1e83836Smrg     }
717*b1e83836Smrg     else version (SPARC_Any)
718*b1e83836Smrg     {
719*b1e83836Smrg         enum : ExceptionMask
720*b1e83836Smrg         {
721*b1e83836Smrg             inexactException      = 0x0800000,
722*b1e83836Smrg             divByZeroException    = 0x1000000,
723*b1e83836Smrg             overflowException     = 0x4000000,
724*b1e83836Smrg             underflowException    = 0x2000000,
725*b1e83836Smrg             invalidException      = 0x8000000,
726*b1e83836Smrg             severeExceptions   = overflowException | divByZeroException
727*b1e83836Smrg                                  | invalidException,
728*b1e83836Smrg             allExceptions      = severeExceptions | underflowException
729*b1e83836Smrg                                  | inexactException,
730*b1e83836Smrg         }
731*b1e83836Smrg     }
732*b1e83836Smrg     else version (IBMZ_Any)
733*b1e83836Smrg     {
734*b1e83836Smrg         enum : ExceptionMask
735*b1e83836Smrg         {
736*b1e83836Smrg             inexactException      = 0x08000000,
737*b1e83836Smrg             divByZeroException    = 0x40000000,
738*b1e83836Smrg             overflowException     = 0x20000000,
739*b1e83836Smrg             underflowException    = 0x10000000,
740*b1e83836Smrg             invalidException      = 0x80000000,
741*b1e83836Smrg             severeExceptions   = overflowException | divByZeroException
742*b1e83836Smrg                                  | invalidException,
743*b1e83836Smrg             allExceptions      = severeExceptions | underflowException
744*b1e83836Smrg                                  | inexactException,
745*b1e83836Smrg         }
746*b1e83836Smrg     }
747*b1e83836Smrg     else version (X86_Any)
748*b1e83836Smrg     {
749*b1e83836Smrg         enum : ExceptionMask
750*b1e83836Smrg         {
751*b1e83836Smrg             inexactException      = 0x20,
752*b1e83836Smrg             underflowException    = 0x10,
753*b1e83836Smrg             overflowException     = 0x08,
754*b1e83836Smrg             divByZeroException    = 0x04,
755*b1e83836Smrg             subnormalException    = 0x02,
756*b1e83836Smrg             invalidException      = 0x01,
757*b1e83836Smrg             severeExceptions   = overflowException | divByZeroException
758*b1e83836Smrg                                  | invalidException,
759*b1e83836Smrg             allExceptions      = severeExceptions | underflowException
760*b1e83836Smrg                                  | inexactException | subnormalException,
761*b1e83836Smrg         }
762*b1e83836Smrg     }
763*b1e83836Smrg     else
764*b1e83836Smrg         static assert(false, "Not implemented for this architecture");
765*b1e83836Smrg 
766*b1e83836Smrg     version (ARM_Any)
767*b1e83836Smrg     {
768*b1e83836Smrg         static bool hasExceptionTraps_impl() @safe
769*b1e83836Smrg         {
770*b1e83836Smrg             auto oldState = getControlState();
771*b1e83836Smrg             // If exceptions are not supported, we set the bit but read it back as zero
772*b1e83836Smrg             // https://sourceware.org/ml/libc-ports/2012-06/msg00091.html
773*b1e83836Smrg             setControlState(oldState | divByZeroException);
774*b1e83836Smrg             immutable result = (getControlState() & allExceptions) != 0;
775*b1e83836Smrg             setControlState(oldState);
776*b1e83836Smrg             return result;
777*b1e83836Smrg         }
778*b1e83836Smrg     }
779*b1e83836Smrg 
780*b1e83836Smrg     /// Returns: true if the current FPU supports exception trapping
781*b1e83836Smrg     @property static bool hasExceptionTraps() @safe pure
782*b1e83836Smrg     {
783*b1e83836Smrg         version (X86_Any)
784*b1e83836Smrg             return true;
785*b1e83836Smrg         else version (PPC_Any)
786*b1e83836Smrg             return true;
787*b1e83836Smrg         else version (MIPS_Any)
788*b1e83836Smrg             return true;
789*b1e83836Smrg         else version (ARM_Any)
790*b1e83836Smrg         {
791*b1e83836Smrg             // The hasExceptionTraps_impl function is basically pure,
792*b1e83836Smrg             // as it restores all global state
793*b1e83836Smrg             auto fptr = ( () @trusted => cast(bool function() @safe
794*b1e83836Smrg                 pure nothrow @nogc)&hasExceptionTraps_impl)();
795*b1e83836Smrg             return fptr();
796*b1e83836Smrg         }
797*b1e83836Smrg         else
798*b1e83836Smrg             assert(0, "Not yet supported");
799*b1e83836Smrg     }
800*b1e83836Smrg 
801*b1e83836Smrg     /// Enable (unmask) specific hardware exceptions. Multiple exceptions may be ORed together.
802*b1e83836Smrg     void enableExceptions(ExceptionMask exceptions) @trusted
803*b1e83836Smrg     {
804*b1e83836Smrg         assert(hasExceptionTraps);
805*b1e83836Smrg         initialize();
806*b1e83836Smrg         version (X86_Any)
807*b1e83836Smrg             setControlState(getControlState() & ~(exceptions & allExceptions));
808*b1e83836Smrg         else
809*b1e83836Smrg             setControlState(getControlState() | (exceptions & allExceptions));
810*b1e83836Smrg     }
811*b1e83836Smrg 
812*b1e83836Smrg     /// Disable (mask) specific hardware exceptions. Multiple exceptions may be ORed together.
813*b1e83836Smrg     void disableExceptions(ExceptionMask exceptions) @trusted
814*b1e83836Smrg     {
815*b1e83836Smrg         assert(hasExceptionTraps);
816*b1e83836Smrg         initialize();
817*b1e83836Smrg         version (X86_Any)
818*b1e83836Smrg             setControlState(getControlState() | (exceptions & allExceptions));
819*b1e83836Smrg         else
820*b1e83836Smrg             setControlState(getControlState() & ~(exceptions & allExceptions));
821*b1e83836Smrg     }
822*b1e83836Smrg 
823*b1e83836Smrg     /// Returns: the exceptions which are currently enabled (unmasked)
824*b1e83836Smrg     @property static ExceptionMask enabledExceptions() @trusted pure
825*b1e83836Smrg     {
826*b1e83836Smrg         assert(hasExceptionTraps);
827*b1e83836Smrg         version (X86_Any)
828*b1e83836Smrg             return (getControlState() & allExceptions) ^ allExceptions;
829*b1e83836Smrg         else
830*b1e83836Smrg             return (getControlState() & allExceptions);
831*b1e83836Smrg     }
832*b1e83836Smrg 
833*b1e83836Smrg     ///  Clear all pending exceptions, then restore the original exception state and rounding mode.
834*b1e83836Smrg     ~this() @trusted
835*b1e83836Smrg     {
836*b1e83836Smrg         clearExceptions();
837*b1e83836Smrg         if (initialized)
838*b1e83836Smrg             setControlState(savedState);
839*b1e83836Smrg     }
840*b1e83836Smrg 
841*b1e83836Smrg private:
842*b1e83836Smrg     ControlState savedState;
843*b1e83836Smrg 
844*b1e83836Smrg     bool initialized = false;
845*b1e83836Smrg 
846*b1e83836Smrg     version (ARM_Any)
847*b1e83836Smrg     {
848*b1e83836Smrg         alias ControlState = uint;
849*b1e83836Smrg     }
850*b1e83836Smrg     else version (HPPA)
851*b1e83836Smrg     {
852*b1e83836Smrg         alias ControlState = uint;
853*b1e83836Smrg     }
854*b1e83836Smrg     else version (PPC_Any)
855*b1e83836Smrg     {
856*b1e83836Smrg         alias ControlState = uint;
857*b1e83836Smrg     }
858*b1e83836Smrg     else version (RISCV_Any)
859*b1e83836Smrg     {
860*b1e83836Smrg         alias ControlState = uint;
861*b1e83836Smrg     }
862*b1e83836Smrg     else version (MIPS_Any)
863*b1e83836Smrg     {
864*b1e83836Smrg         alias ControlState = uint;
865*b1e83836Smrg     }
866*b1e83836Smrg     else version (SPARC_Any)
867*b1e83836Smrg     {
868*b1e83836Smrg         alias ControlState = ulong;
869*b1e83836Smrg     }
870*b1e83836Smrg     else version (IBMZ_Any)
871*b1e83836Smrg     {
872*b1e83836Smrg         alias ControlState = uint;
873*b1e83836Smrg     }
874*b1e83836Smrg     else version (X86_Any)
875*b1e83836Smrg     {
876*b1e83836Smrg         alias ControlState = ushort;
877*b1e83836Smrg     }
878*b1e83836Smrg     else
879*b1e83836Smrg         static assert(false, "Not implemented for this architecture");
880*b1e83836Smrg 
881*b1e83836Smrg     void initialize() @safe
882*b1e83836Smrg     {
883*b1e83836Smrg         // BUG: This works around the absence of this() constructors.
884*b1e83836Smrg         if (initialized) return;
885*b1e83836Smrg         clearExceptions();
886*b1e83836Smrg         savedState = getControlState();
887*b1e83836Smrg         initialized = true;
888*b1e83836Smrg     }
889*b1e83836Smrg 
890*b1e83836Smrg     // Clear all pending exceptions
891*b1e83836Smrg     static void clearExceptions() @safe
892*b1e83836Smrg     {
893*b1e83836Smrg         version (IeeeFlagsSupport)
894*b1e83836Smrg             resetIeeeFlags();
895*b1e83836Smrg         else
896*b1e83836Smrg             static assert(false, "Not implemented for this architecture");
897*b1e83836Smrg     }
898*b1e83836Smrg 
899*b1e83836Smrg     // Read from the control register
900*b1e83836Smrg     package(std.math) static ControlState getControlState() @trusted pure
901*b1e83836Smrg     {
902*b1e83836Smrg         version (GNU)
903*b1e83836Smrg         {
904*b1e83836Smrg             version (X86_Any)
905*b1e83836Smrg             {
906*b1e83836Smrg                 ControlState cont;
907*b1e83836Smrg                 asm pure nothrow @nogc
908*b1e83836Smrg                 {
909*b1e83836Smrg                     "fstcw %0" : "=m" (cont);
910*b1e83836Smrg                 }
911*b1e83836Smrg                 return cont;
912*b1e83836Smrg             }
913*b1e83836Smrg             else version (AArch64)
914*b1e83836Smrg             {
915*b1e83836Smrg                 ControlState cont;
916*b1e83836Smrg                 asm pure nothrow @nogc
917*b1e83836Smrg                 {
918*b1e83836Smrg                     "mrs %0, FPCR;" : "=r" (cont);
919*b1e83836Smrg                 }
920*b1e83836Smrg                 return cont;
921*b1e83836Smrg             }
922*b1e83836Smrg             else version (ARM)
923*b1e83836Smrg             {
924*b1e83836Smrg                 ControlState cont;
925*b1e83836Smrg                 version (ARM_SoftFloat)
926*b1e83836Smrg                    cont = 0;
927*b1e83836Smrg                 else
928*b1e83836Smrg                 {
929*b1e83836Smrg                     asm pure nothrow @nogc
930*b1e83836Smrg                     {
931*b1e83836Smrg                         "vmrs %0, FPSCR" : "=r" (cont);
932*b1e83836Smrg                     }
933*b1e83836Smrg                 }
934*b1e83836Smrg                 return cont;
935*b1e83836Smrg             }
936*b1e83836Smrg             else version (RISCV_Any)
937*b1e83836Smrg             {
938*b1e83836Smrg                 version (D_SoftFloat)
939*b1e83836Smrg                     return 0;
940*b1e83836Smrg                 else
941*b1e83836Smrg                 {
942*b1e83836Smrg                     ControlState cont;
943*b1e83836Smrg                     asm pure nothrow @nogc
944*b1e83836Smrg                     {
945*b1e83836Smrg                         "frcsr %0" : "=r" (cont);
946*b1e83836Smrg                     }
947*b1e83836Smrg                     return cont;
948*b1e83836Smrg                 }
949*b1e83836Smrg             }
950*b1e83836Smrg             else
951*b1e83836Smrg                 assert(0, "Not yet supported");
952*b1e83836Smrg         }
953*b1e83836Smrg         else
954*b1e83836Smrg         version (D_InlineAsm_X86)
955*b1e83836Smrg         {
956*b1e83836Smrg             short cont;
957*b1e83836Smrg             asm pure nothrow @nogc
958*b1e83836Smrg             {
959*b1e83836Smrg                 xor EAX, EAX;
960*b1e83836Smrg                 fstcw cont;
961*b1e83836Smrg             }
962*b1e83836Smrg             return cont;
963*b1e83836Smrg         }
964*b1e83836Smrg         else version (D_InlineAsm_X86_64)
965*b1e83836Smrg         {
966*b1e83836Smrg             short cont;
967*b1e83836Smrg             asm pure nothrow @nogc
968*b1e83836Smrg             {
969*b1e83836Smrg                 xor RAX, RAX;
970*b1e83836Smrg                 fstcw cont;
971*b1e83836Smrg             }
972*b1e83836Smrg             return cont;
973*b1e83836Smrg         }
974*b1e83836Smrg         else version (RISCV_Any)
975*b1e83836Smrg         {
976*b1e83836Smrg             mixin(`
977*b1e83836Smrg             ControlState cont;
978*b1e83836Smrg             asm pure nothrow @nogc
979*b1e83836Smrg             {
980*b1e83836Smrg                 "frcsr %0" : "=r" (cont);
981*b1e83836Smrg             }
982*b1e83836Smrg             return cont;
983*b1e83836Smrg             `);
984*b1e83836Smrg         }
985*b1e83836Smrg         else
986*b1e83836Smrg             assert(0, "Not yet supported");
987*b1e83836Smrg     }
988*b1e83836Smrg 
989*b1e83836Smrg     // Set the control register
990*b1e83836Smrg     package(std.math) static void setControlState(ControlState newState) @trusted
991*b1e83836Smrg     {
992*b1e83836Smrg         version (GNU)
993*b1e83836Smrg         {
994*b1e83836Smrg             version (X86_Any)
995*b1e83836Smrg             {
996*b1e83836Smrg                 asm nothrow @nogc
997*b1e83836Smrg                 {
998*b1e83836Smrg                     "fclex; fldcw %0" : : "m" (newState);
999*b1e83836Smrg                 }
1000*b1e83836Smrg 
1001*b1e83836Smrg                 // Also update MXCSR, SSE's control register.
1002*b1e83836Smrg                 if (haveSSE)
1003*b1e83836Smrg                 {
1004*b1e83836Smrg                     uint mxcsr;
1005*b1e83836Smrg                     asm nothrow @nogc
1006*b1e83836Smrg                     {
1007*b1e83836Smrg                         "stmxcsr %0" : "=m" (mxcsr);
1008*b1e83836Smrg                     }
1009*b1e83836Smrg 
1010*b1e83836Smrg                     /* In the FPU control register, rounding mode is in bits 10 and
1011*b1e83836Smrg                        11. In MXCSR it's in bits 13 and 14. */
1012*b1e83836Smrg                     mxcsr &= ~(roundingMask << 3);             // delete old rounding mode
1013*b1e83836Smrg                     mxcsr |= (newState & roundingMask) << 3;   // write new rounding mode
1014*b1e83836Smrg 
1015*b1e83836Smrg                     /* In the FPU control register, masks are bits 0 through 5.
1016*b1e83836Smrg                        In MXCSR they're 7 through 12. */
1017*b1e83836Smrg                     mxcsr &= ~(allExceptions << 7);            // delete old masks
1018*b1e83836Smrg                     mxcsr |= (newState & allExceptions) << 7;  // write new exception masks
1019*b1e83836Smrg 
1020*b1e83836Smrg                     asm nothrow @nogc
1021*b1e83836Smrg                     {
1022*b1e83836Smrg                         "ldmxcsr %0" : : "m" (mxcsr);
1023*b1e83836Smrg                     }
1024*b1e83836Smrg                 }
1025*b1e83836Smrg             }
1026*b1e83836Smrg             else version (AArch64)
1027*b1e83836Smrg             {
1028*b1e83836Smrg                 asm nothrow @nogc
1029*b1e83836Smrg                 {
1030*b1e83836Smrg                     "msr FPCR, %0;" : : "r" (newState);
1031*b1e83836Smrg                 }
1032*b1e83836Smrg             }
1033*b1e83836Smrg             else version (ARM)
1034*b1e83836Smrg             {
1035*b1e83836Smrg                 version (ARM_SoftFloat)
1036*b1e83836Smrg                    return;
1037*b1e83836Smrg                 else
1038*b1e83836Smrg                 {
1039*b1e83836Smrg                     asm nothrow @nogc
1040*b1e83836Smrg                     {
1041*b1e83836Smrg                         "vmsr FPSCR, %0" : : "r" (newState);
1042*b1e83836Smrg                     }
1043*b1e83836Smrg                 }
1044*b1e83836Smrg             }
1045*b1e83836Smrg             else version (RISCV_Any)
1046*b1e83836Smrg             {
1047*b1e83836Smrg                 version (D_SoftFloat)
1048*b1e83836Smrg                     return;
1049*b1e83836Smrg                 else
1050*b1e83836Smrg                 {
1051*b1e83836Smrg                     asm nothrow @nogc
1052*b1e83836Smrg                     {
1053*b1e83836Smrg                         "fscsr %0" : : "r" (newState);
1054*b1e83836Smrg                     }
1055*b1e83836Smrg                 }
1056*b1e83836Smrg             }
1057*b1e83836Smrg             else
1058*b1e83836Smrg                 assert(0, "Not yet supported");
1059*b1e83836Smrg         }
1060*b1e83836Smrg         else
1061*b1e83836Smrg         version (InlineAsm_X86_Any)
1062*b1e83836Smrg         {
1063*b1e83836Smrg             asm nothrow @nogc
1064*b1e83836Smrg             {
1065*b1e83836Smrg                 fclex;
1066*b1e83836Smrg                 fldcw newState;
1067*b1e83836Smrg             }
1068*b1e83836Smrg 
1069*b1e83836Smrg             // Also update MXCSR, SSE's control register.
1070*b1e83836Smrg             if (haveSSE)
1071*b1e83836Smrg             {
1072*b1e83836Smrg                 uint mxcsr;
1073*b1e83836Smrg                 asm nothrow @nogc { stmxcsr mxcsr; }
1074*b1e83836Smrg 
1075*b1e83836Smrg                 /* In the FPU control register, rounding mode is in bits 10 and
1076*b1e83836Smrg                 11. In MXCSR it's in bits 13 and 14. */
1077*b1e83836Smrg                 mxcsr &= ~(roundingMask << 3);             // delete old rounding mode
1078*b1e83836Smrg                 mxcsr |= (newState & roundingMask) << 3;   // write new rounding mode
1079*b1e83836Smrg 
1080*b1e83836Smrg                 /* In the FPU control register, masks are bits 0 through 5.
1081*b1e83836Smrg                 In MXCSR they're 7 through 12. */
1082*b1e83836Smrg                 mxcsr &= ~(allExceptions << 7);            // delete old masks
1083*b1e83836Smrg                 mxcsr |= (newState & allExceptions) << 7;  // write new exception masks
1084*b1e83836Smrg 
1085*b1e83836Smrg                 asm nothrow @nogc { ldmxcsr mxcsr; }
1086*b1e83836Smrg             }
1087*b1e83836Smrg         }
1088*b1e83836Smrg         else version (RISCV_Any)
1089*b1e83836Smrg         {
1090*b1e83836Smrg             mixin(`
1091*b1e83836Smrg             asm pure nothrow @nogc
1092*b1e83836Smrg             {
1093*b1e83836Smrg                 "fscsr %0" : : "r" (newState);
1094*b1e83836Smrg             }
1095*b1e83836Smrg             `);
1096*b1e83836Smrg         }
1097*b1e83836Smrg         else
1098*b1e83836Smrg             assert(0, "Not yet supported");
1099*b1e83836Smrg     }
1100*b1e83836Smrg }
1101*b1e83836Smrg 
1102*b1e83836Smrg ///
1103*b1e83836Smrg version (FloatingPointControlUnittest)
1104*b1e83836Smrg @safe unittest
1105*b1e83836Smrg {
1106*b1e83836Smrg     import std.math.rounding : lrint;
1107*b1e83836Smrg 
1108*b1e83836Smrg     FloatingPointControl fpctrl;
1109*b1e83836Smrg 
1110*b1e83836Smrg     fpctrl.rounding = FloatingPointControl.roundDown;
1111*b1e83836Smrg     assert(lrint(1.5) == 1.0);
1112*b1e83836Smrg 
1113*b1e83836Smrg     fpctrl.rounding = FloatingPointControl.roundUp;
1114*b1e83836Smrg     assert(lrint(1.4) == 2.0);
1115*b1e83836Smrg 
1116*b1e83836Smrg     fpctrl.rounding = FloatingPointControl.roundToNearest;
1117*b1e83836Smrg     assert(lrint(1.5) == 2.0);
1118*b1e83836Smrg }
1119*b1e83836Smrg 
1120*b1e83836Smrg @safe unittest
1121*b1e83836Smrg {
1122*b1e83836Smrg     void ensureDefaults()
1123*b1e83836Smrg     {
1124*b1e83836Smrg         assert(FloatingPointControl.rounding
1125*b1e83836Smrg                == FloatingPointControl.roundToNearest);
1126*b1e83836Smrg         if (FloatingPointControl.hasExceptionTraps)
1127*b1e83836Smrg             assert(FloatingPointControl.enabledExceptions == 0);
1128*b1e83836Smrg     }
1129*b1e83836Smrg 
1130*b1e83836Smrg     {
1131*b1e83836Smrg         FloatingPointControl ctrl;
1132*b1e83836Smrg     }
1133*b1e83836Smrg     ensureDefaults();
1134*b1e83836Smrg 
1135*b1e83836Smrg     {
1136*b1e83836Smrg         FloatingPointControl ctrl;
1137*b1e83836Smrg         ctrl.rounding = FloatingPointControl.roundDown;
1138*b1e83836Smrg         assert(FloatingPointControl.rounding == FloatingPointControl.roundDown);
1139*b1e83836Smrg     }
1140*b1e83836Smrg     ensureDefaults();
1141*b1e83836Smrg 
1142*b1e83836Smrg     if (FloatingPointControl.hasExceptionTraps)
1143*b1e83836Smrg     {
1144*b1e83836Smrg         FloatingPointControl ctrl;
1145*b1e83836Smrg         ctrl.enableExceptions(FloatingPointControl.divByZeroException
1146*b1e83836Smrg                               | FloatingPointControl.overflowException);
1147*b1e83836Smrg         assert(ctrl.enabledExceptions ==
1148*b1e83836Smrg                (FloatingPointControl.divByZeroException
1149*b1e83836Smrg                 | FloatingPointControl.overflowException));
1150*b1e83836Smrg 
1151*b1e83836Smrg         ctrl.rounding = FloatingPointControl.roundUp;
1152*b1e83836Smrg         assert(FloatingPointControl.rounding == FloatingPointControl.roundUp);
1153*b1e83836Smrg     }
1154*b1e83836Smrg     ensureDefaults();
1155*b1e83836Smrg }
1156*b1e83836Smrg 
1157*b1e83836Smrg version (FloatingPointControlUnittest)
1158*b1e83836Smrg @safe unittest // rounding
1159*b1e83836Smrg {
1160*b1e83836Smrg     import std.meta : AliasSeq;
1161*b1e83836Smrg 
1162*b1e83836Smrg     static T addRound(T)(uint rm)
1163*b1e83836Smrg     {
1164*b1e83836Smrg         pragma(inline, false) static void blockopt(ref T x) {}
1165*b1e83836Smrg         pragma(inline, false);
1166*b1e83836Smrg         FloatingPointControl fpctrl;
1167*b1e83836Smrg         fpctrl.rounding = rm;
1168*b1e83836Smrg         T x = 1;
1169*b1e83836Smrg         blockopt(x); // avoid constant propagation by the optimizer
1170*b1e83836Smrg         x += 0.1L;
1171*b1e83836Smrg         return x;
1172*b1e83836Smrg     }
1173*b1e83836Smrg 
1174*b1e83836Smrg     static T subRound(T)(uint rm)
1175*b1e83836Smrg     {
1176*b1e83836Smrg         pragma(inline, false) static void blockopt(ref T x) {}
1177*b1e83836Smrg         pragma(inline, false);
1178*b1e83836Smrg         FloatingPointControl fpctrl;
1179*b1e83836Smrg         fpctrl.rounding = rm;
1180*b1e83836Smrg         T x = -1;
1181*b1e83836Smrg         blockopt(x); // avoid constant propagation by the optimizer
1182*b1e83836Smrg         x -= 0.1L;
1183*b1e83836Smrg         return x;
1184*b1e83836Smrg     }
1185*b1e83836Smrg 
1186*b1e83836Smrg     static foreach (T; AliasSeq!(float, double, real))
1187*b1e83836Smrg     {{
1188*b1e83836Smrg         /* Be careful with changing the rounding mode, it interferes
1189*b1e83836Smrg          * with common subexpressions. Changing rounding modes should
1190*b1e83836Smrg          * be done with separate functions that are not inlined.
1191*b1e83836Smrg          */
1192*b1e83836Smrg 
1193*b1e83836Smrg         {
1194*b1e83836Smrg             T u = addRound!(T)(FloatingPointControl.roundUp);
1195*b1e83836Smrg             T d = addRound!(T)(FloatingPointControl.roundDown);
1196*b1e83836Smrg             T z = addRound!(T)(FloatingPointControl.roundToZero);
1197*b1e83836Smrg 
1198*b1e83836Smrg             assert(u > d);
1199*b1e83836Smrg             assert(z == d);
1200*b1e83836Smrg         }
1201*b1e83836Smrg 
1202*b1e83836Smrg         {
1203*b1e83836Smrg             T u = subRound!(T)(FloatingPointControl.roundUp);
1204*b1e83836Smrg             T d = subRound!(T)(FloatingPointControl.roundDown);
1205*b1e83836Smrg             T z = subRound!(T)(FloatingPointControl.roundToZero);
1206*b1e83836Smrg 
1207*b1e83836Smrg             assert(u > d);
1208*b1e83836Smrg             assert(z == u);
1209*b1e83836Smrg         }
1210*b1e83836Smrg     }}
1211*b1e83836Smrg }
1212*b1e83836Smrg 
1213*b1e83836Smrg }
1214