xref: /openbsd-src/lib/libm/arch/i387/fenv.c (revision 46035553bfdd96e63c94e32da0210227ec2e3cf1)
1 /*	$OpenBSD: fenv.c,v 1.5 2016/09/12 19:47:02 guenther Exp $	*/
2 /*	$NetBSD: fenv.c,v 1.3 2010/08/01 06:34:38 taca Exp $	*/
3 
4 /*-
5  * Copyright (c) 2004-2005 David Schultz <das@FreeBSD.ORG>
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29 
30 #include <sys/param.h>
31 #include <sys/sysctl.h>
32 #include <machine/cpu.h>
33 #include <machine/npx.h>
34 
35 #include <fenv.h>
36 
37 /*
38  * The following constant represents the default floating-point environment
39  * (that is, the one installed at program startup) and has type pointer to
40  * const-qualified fenv_t.
41  *
42  * It can be used as an argument to the functions within the <fenv.h> header
43  * that manage the floating-point environment, namely fesetenv() and
44  * feupdateenv().
45  *
46  * x87 fpu registers are 16bit wide. The upper bits, 31-16, are marked as
47  * RESERVED.
48  */
49 fenv_t __fe_dfl_env = {
50 	{
51 		0xffff0000 | __INITIAL_NPXCW__,	/* Control word register */
52 		0xffff0000,			/* Status word register */
53 		0xffffffff,			/* Tag word register */
54 		{
55 			0x00000000,
56 			0x00000000,
57 			0x00000000,
58 			0xffff0000
59 		}
60 	},
61 	__INITIAL_MXCSR__		/* MXCSR register */
62 };
63 
64 /*
65  * Test for SSE support on this processor.
66  *
67  * We need to use ldmxcsr/stmxcsr to get correct results if any part
68  * of the program was compiled to use SSE floating-point, but we can't
69  * use SSE on older processors.
70  *
71  * In order to do so, we need to query the processor capabilities via the CPUID
72  * instruction. We can make it even simpler though, by querying the machdep.sse
73  * sysctl.
74  */
75 static int __HAS_SSE = 0;
76 
77 static void __test_sse(void) __attribute__ ((constructor));
78 
79 static void __test_sse(void)
80 {
81 	size_t oldlen = sizeof(__HAS_SSE);
82 	int mib[2] = { CTL_MACHDEP, CPU_SSE };
83 	int rv;
84 
85 	rv = sysctl(mib, 2, &__HAS_SSE, &oldlen, NULL, 0);
86 	if (rv == -1)
87 		__HAS_SSE = 0;
88 }
89 
90 /*
91  * The feclearexcept() function clears the supported floating-point exceptions
92  * represented by `excepts'.
93  */
94 int
95 feclearexcept(int excepts)
96 {
97 	fenv_t fenv;
98 	unsigned int mxcsr;
99 
100 	excepts &= FE_ALL_EXCEPT;
101 
102 	/* Store the current x87 floating-point environment */
103 	__asm__ volatile ("fnstenv %0" : "=m" (fenv));
104 
105 	/* Clear the requested floating-point exceptions */
106 	fenv.__x87.__status &= ~excepts;
107 
108 	/* Load the x87 floating-point environent */
109 	__asm__ volatile ("fldenv %0" : : "m" (fenv));
110 
111 	/* Same for SSE environment */
112 	if (__HAS_SSE) {
113 		__asm__ volatile ("stmxcsr %0" : "=m" (mxcsr));
114 		mxcsr &= ~excepts;
115 		__asm__ volatile ("ldmxcsr %0" : : "m" (mxcsr));
116 	}
117 
118 	return (0);
119 }
120 DEF_STD(feclearexcept);
121 
122 /*
123  * The fegetexceptflag() function stores an implementation-defined
124  * representation of the states of the floating-point status flags indicated by
125  * the argument excepts in the object pointed to by the argument flagp.
126  */
127 int
128 fegetexceptflag(fexcept_t *flagp, int excepts)
129 {
130 	unsigned short status;
131 	unsigned int mxcsr = 0;
132 
133 	excepts &= FE_ALL_EXCEPT;
134 
135 	/* Store the current x87 status register */
136 	__asm__ volatile ("fnstsw %0" : "=am" (status));
137 
138 	/* Store the MXCSR register */
139 	if (__HAS_SSE)
140 		__asm__ volatile ("stmxcsr %0" : "=m" (mxcsr));
141 
142 	/* Store the results in flagp */
143 	*flagp = (status | mxcsr) & excepts;
144 
145 	return (0);
146 }
147 
148 /*
149  * The feraiseexcept() function raises the supported floating-point exceptions
150  * represented by the argument `excepts'.
151  *
152  * The standard explicitly allows us to execute an instruction that has the
153  * exception as a side effect, but we choose to manipulate the status register
154  * directly.
155  *
156  * The validation of input is being deferred to fesetexceptflag().
157  */
158 int
159 feraiseexcept(int excepts)
160 {
161 	excepts &= FE_ALL_EXCEPT;
162 
163 	fesetexceptflag((fexcept_t *)&excepts, excepts);
164 	__asm__ volatile ("fwait");
165 
166 	return (0);
167 }
168 DEF_STD(feraiseexcept);
169 
170 /*
171  * This function sets the floating-point status flags indicated by the argument
172  * `excepts' to the states stored in the object pointed to by `flagp'. It does
173  * NOT raise any floating-point exceptions, but only sets the state of the flags.
174  */
175 int
176 fesetexceptflag(const fexcept_t *flagp, int excepts)
177 {
178 	fenv_t fenv;
179 	unsigned int mxcsr;
180 
181 	excepts &= FE_ALL_EXCEPT;
182 
183 	/* Store the current x87 floating-point environment */
184 	__asm__ volatile ("fnstenv %0" : "=m" (fenv));
185 
186 	/* Set the requested status flags */
187 	fenv.__x87.__status &= ~excepts;
188 	fenv.__x87.__status |= *flagp & excepts;
189 
190 	/* Load the x87 floating-point environent */
191 	__asm__ volatile ("fldenv %0" : : "m" (fenv));
192 
193 	/* Same for SSE environment */
194 	if (__HAS_SSE) {
195 		__asm__ volatile ("stmxcsr %0" : "=m" (mxcsr));
196 		mxcsr &= ~excepts;
197 		mxcsr |= *flagp & excepts;
198 		__asm__ volatile ("ldmxcsr %0" : : "m" (mxcsr));
199 	}
200 
201 	return (0);
202 }
203 DEF_STD(fesetexceptflag);
204 
205 /*
206  * The fetestexcept() function determines which of a specified subset of the
207  * floating-point exception flags are currently set. The `excepts' argument
208  * specifies the floating-point status flags to be queried.
209  */
210 int
211 fetestexcept(int excepts)
212 {
213 	unsigned short status;
214 	unsigned int mxcsr = 0;
215 
216 	excepts &= FE_ALL_EXCEPT;
217 
218 	/* Store the current x87 status register */
219 	__asm__ volatile ("fnstsw %0" : "=am" (status));
220 
221 	/* Store the MXCSR register state */
222 	if (__HAS_SSE)
223 		__asm__ volatile ("stmxcsr %0" : "=m" (mxcsr));
224 
225 	return ((status | mxcsr) & excepts);
226 }
227 DEF_STD(fetestexcept);
228 
229 /*
230  * The fegetround() function gets the current rounding direction.
231  */
232 int
233 fegetround(void)
234 {
235 	unsigned short control;
236 
237 	/*
238 	 * We assume that the x87 and the SSE unit agree on the
239 	 * rounding mode.  Reading the control word on the x87 turns
240 	 * out to be about 5 times faster than reading it on the SSE
241 	 * unit on an Opteron 244.
242 	 */
243 	__asm__ volatile ("fnstcw %0" : "=m" (control));
244 
245 	return (control & _X87_ROUND_MASK);
246 }
247 DEF_STD(fegetround);
248 
249 /*
250  * The fesetround() function establishes the rounding direction represented by
251  * its argument `round'. If the argument is not equal to the value of a rounding
252  * direction macro, the rounding direction is not changed.
253  */
254 int
255 fesetround(int round)
256 {
257 	unsigned short control;
258 	unsigned int mxcsr;
259 
260 	/* Check whether requested rounding direction is supported */
261 	if (round & ~_X87_ROUND_MASK)
262 		return (-1);
263 
264 	/* Store the current x87 control word register */
265 	__asm__ volatile ("fnstcw %0" : "=m" (control));
266 
267 	/* Set the rounding direction */
268 	control &= ~_X87_ROUND_MASK;
269 	control |= round;
270 
271 	/* Load the x87 control word register */
272 	__asm__ volatile ("fldcw %0" : : "m" (control));
273 
274 	/* Same for the SSE environment */
275 	if (__HAS_SSE) {
276 		__asm__ volatile ("stmxcsr %0" : "=m" (mxcsr));
277 		mxcsr &= ~(_X87_ROUND_MASK << _SSE_ROUND_SHIFT);
278 		mxcsr |= round << _SSE_ROUND_SHIFT;
279 		__asm__ volatile ("ldmxcsr %0" : : "m" (mxcsr));
280 	}
281 
282 	return (0);
283 }
284 DEF_STD(fesetround);
285 
286 /*
287  * The fegetenv() function attempts to store the current floating-point
288  * environment in the object pointed to by envp.
289  */
290 int
291 fegetenv(fenv_t *envp)
292 {
293 	/* Store the current x87 floating-point environment */
294 	__asm__ volatile ("fnstenv %0" : "=m" (*envp));
295 
296 	/* Store the MXCSR register state */
297 	if (__HAS_SSE)
298 		__asm__ volatile ("stmxcsr %0" : "=m" (envp->__mxcsr));
299 
300 	/*
301 	 * When an FNSTENV instruction is executed, all pending exceptions are
302 	 * essentially lost (either the x87 FPU status register is cleared or
303 	 * all exceptions are masked).
304 	 *
305 	 * 8.6 X87 FPU EXCEPTION SYNCHRONIZATION -
306 	 * Intel(R) 64 and IA-32 Architectures Softare Developer's Manual - Vol1
307 	 */
308 	__asm__ volatile ("fldcw %0" : : "m" (envp->__x87.__control));
309 
310 	return (0);
311 }
312 DEF_STD(fegetenv);
313 
314 /*
315  * The feholdexcept() function saves the current floating-point environment
316  * in the object pointed to by envp, clears the floating-point status flags, and
317  * then installs a non-stop (continue on floating-point exceptions) mode, if
318  * available, for all floating-point exceptions.
319  */
320 int
321 feholdexcept(fenv_t *envp)
322 {
323 	unsigned int mxcsr;
324 
325 	/* Store the current x87 floating-point environment */
326 	__asm__ volatile ("fnstenv %0" : "=m" (*envp));
327 
328 	/* Clear all exception flags in FPU */
329 	__asm__ volatile ("fnclex");
330 
331 	if (__HAS_SSE) {
332 		/* Store the MXCSR register state */
333 		__asm__ volatile ("stmxcsr %0" : "=m" (envp->__mxcsr));
334 
335 		/* Clear exception flags in MXCSR */
336 		mxcsr = envp->__mxcsr;
337 		mxcsr &= ~FE_ALL_EXCEPT;
338 
339 		/* Mask all exceptions */
340 		mxcsr |= FE_ALL_EXCEPT << _SSE_MASK_SHIFT;
341 
342 		/* Store the MXCSR register */
343 		__asm__ volatile ("ldmxcsr %0" : : "m" (mxcsr));
344 	}
345 
346 	return (0);
347 }
348 DEF_STD(feholdexcept);
349 
350 /*
351  * The fesetenv() function attempts to establish the floating-point environment
352  * represented by the object pointed to by envp. The argument `envp' points
353  * to an object set by a call to fegetenv() or feholdexcept(), or equal a
354  * floating-point environment macro. The fesetenv() function does not raise
355  * floating-point exceptions, but only installs the state of the floating-point
356  * status flags represented through its argument.
357  */
358 int
359 fesetenv(const fenv_t *envp)
360 {
361 	/* Load the x87 floating-point environent */
362 	__asm__ volatile ("fldenv %0" : : "m" (*envp));
363 
364 	/* Store the MXCSR register */
365 	if (__HAS_SSE)
366 		__asm__ volatile ("ldmxcsr %0" : : "m" (envp->__mxcsr));
367 
368 	return (0);
369 }
370 DEF_STD(fesetenv);
371 
372 /*
373  * The feupdateenv() function saves the currently raised floating-point
374  * exceptions in its automatic storage, installs the floating-point environment
375  * represented by the object pointed to by `envp', and then raises the saved
376  * floating-point exceptions. The argument `envp' shall point to an object set
377  * by a call to feholdexcept() or fegetenv(), or equal a floating-point
378  * environment macro.
379  */
380 int
381 feupdateenv(const fenv_t *envp)
382 {
383 	unsigned short status;
384 	unsigned int mxcsr = 0;
385 
386 	/* Store the x87 status register */
387 	__asm__ volatile ("fnstsw %0" : "=am" (status));
388 
389 	/* Store the MXCSR register */
390 	if (__HAS_SSE)
391 		__asm__ volatile ("stmxcsr %0" : "=m" (mxcsr));
392 
393 	/* Install new floating-point environment */
394 	fesetenv(envp);
395 
396 	/* Raise any previously accumulated exceptions */
397 	feraiseexcept(status | mxcsr);
398 
399 	return (0);
400 }
401 DEF_STD(feupdateenv);
402 
403 /*
404  * The following functions are extentions to the standard
405  */
406 int
407 feenableexcept(int mask)
408 {
409 	unsigned int mxcsr = 0, omask;
410 	unsigned short control;
411 
412 	mask &= FE_ALL_EXCEPT;
413 
414 	__asm__ volatile ("fnstcw %0" : "=m" (control));
415 	if (__HAS_SSE)
416 		__asm__ volatile ("stmxcsr %0" : "=m" (mxcsr));
417 
418 	omask = ~(control | (mxcsr >> _SSE_MASK_SHIFT)) & FE_ALL_EXCEPT;
419 	control &= ~mask;
420 	__asm__ volatile ("fldcw %0" : : "m" (control));
421 
422 	if (__HAS_SSE) {
423 		mxcsr &= ~(mask << _SSE_MASK_SHIFT);
424 		__asm__ volatile ("ldmxcsr %0" : : "m" (mxcsr));
425 	}
426 
427 	return (omask);
428 }
429 
430 int
431 fedisableexcept(int mask)
432 {
433 	unsigned int mxcsr = 0, omask;
434 	unsigned short control;
435 
436 	mask &= FE_ALL_EXCEPT;
437 
438 	__asm__ volatile ("fnstcw %0" : "=m" (control));
439 	if (__HAS_SSE)
440 		__asm__ volatile ("stmxcsr %0" : "=m" (mxcsr));
441 
442 	omask = ~(control | (mxcsr >> _SSE_MASK_SHIFT)) & FE_ALL_EXCEPT;
443 	control |= mask;
444 	__asm__ volatile ("fldcw %0" : : "m" (control));
445 
446 	if (__HAS_SSE) {
447 		mxcsr |= mask << _SSE_MASK_SHIFT;
448 		__asm__ volatile ("ldmxcsr %0" : : "m" (mxcsr));
449 	}
450 
451 	return (omask);
452 }
453 
454 int
455 fegetexcept(void)
456 {
457 	unsigned short control;
458 
459 	/*
460 	 * We assume that the masks for the x87 and the SSE unit are
461 	 * the same.
462 	 */
463 	__asm__ volatile ("fnstcw %0" : "=m" (control));
464 
465 	return (~control & FE_ALL_EXCEPT);
466 }
467