xref: /openbsd-src/lib/libm/arch/i387/fenv.c (revision d13be5d47e4149db2549a9828e244d59dbc43f15)
1 /*	$OpenBSD: fenv.c,v 1.2 2011/04/28 17:34:23 martynas Exp $	*/
2 /*	$NetBSD: fenv.c,v 1.3 2010/08/01 06:34:38 taca Exp $	*/
3 
4 /*-
5  * Copyright (c) 2004-2005 David Schultz <das@FreeBSD.ORG>
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29 
30 #include <sys/cdefs.h>
31 #include <machine/cpu.h>
32 #include <machine/npx.h>
33 #include <sys/param.h>
34 #include <sys/sysctl.h>
35 
36 #include <fenv.h>
37 
38 /*
39  * The following constant represents the default floating-point environment
40  * (that is, the one installed at program startup) and has type pointer to
41  * const-qualified fenv_t.
42  *
43  * It can be used as an argument to the functions within the <fenv.h> header
44  * that manage the floating-point environment, namely fesetenv() and
45  * feupdateenv().
46  *
47  * x87 fpu registers are 16bit wide. The upper bits, 31-16, are marked as
48  * RESERVED.
49  */
50 fenv_t __fe_dfl_env = {
51 	{
52 		0xffff0000 | __INITIAL_NPXCW__,	/* Control word register */
53 		0xffff0000,			/* Status word register */
54 		0xffffffff,			/* Tag word register */
55 		{
56 			0x00000000,
57 			0x00000000,
58 			0x00000000,
59 			0xffff0000
60 		}
61 	},
62 	__INITIAL_MXCSR__		/* MXCSR register */
63 };
64 
65 /*
66  * Test for SSE support on this processor.
67  *
68  * We need to use ldmxcsr/stmxcsr to get correct results if any part
69  * of the program was compiled to use SSE floating-point, but we can't
70  * use SSE on older processors.
71  *
72  * In order to do so, we need to query the processor capabilities via the CPUID
73  * instruction. We can make it even simpler though, by querying the machdep.sse
74  * sysctl.
75  */
76 static int __HAS_SSE = 0;
77 
78 static void __test_sse(void) __attribute__ ((constructor));
79 
80 static void __test_sse(void)
81 {
82 	size_t oldlen = sizeof(__HAS_SSE);
83 	int mib[2] = { CTL_MACHDEP, CPU_SSE };
84 	int rv;
85 
86 	rv = sysctl(mib, 2, &__HAS_SSE, &oldlen, NULL, 0);
87 	if (rv == -1)
88 		__HAS_SSE = 0;
89 }
90 
91 /*
92  * The feclearexcept() function clears the supported floating-point exceptions
93  * represented by `excepts'.
94  */
95 int
96 feclearexcept(int excepts)
97 {
98 	fenv_t fenv;
99 	unsigned int mxcsr;
100 
101 	excepts &= FE_ALL_EXCEPT;
102 
103 	/* Store the current x87 floating-point environment */
104 	__asm__ __volatile__ ("fnstenv %0" : "=m" (fenv));
105 
106 	/* Clear the requested floating-point exceptions */
107 	fenv.__x87.__status &= ~excepts;
108 
109 	/* Load the x87 floating-point environent */
110 	__asm__ __volatile__ ("fldenv %0" : : "m" (fenv));
111 
112 	/* Same for SSE environment */
113 	if (__HAS_SSE) {
114 		__asm__ __volatile__ ("stmxcsr %0" : "=m" (mxcsr));
115 		mxcsr &= ~excepts;
116 		__asm__ __volatile__ ("ldmxcsr %0" : : "m" (mxcsr));
117 	}
118 
119 	return (0);
120 }
121 
122 /*
123  * The fegetexceptflag() function stores an implementation-defined
124  * representation of the states of the floating-point status flags indicated by
125  * the argument excepts in the object pointed to by the argument flagp.
126  */
127 int
128 fegetexceptflag(fexcept_t *flagp, int excepts)
129 {
130 	unsigned short status;
131 	unsigned int mxcsr = 0;
132 
133 	excepts &= FE_ALL_EXCEPT;
134 
135 	/* Store the current x87 status register */
136 	__asm__ __volatile__ ("fnstsw %0" : "=am" (status));
137 
138 	/* Store the MXCSR register */
139 	if (__HAS_SSE)
140 		__asm__ __volatile__ ("stmxcsr %0" : "=m" (mxcsr));
141 
142 	/* Store the results in flagp */
143 	*flagp = (status | mxcsr) & excepts;
144 
145 	return (0);
146 }
147 
148 /*
149  * The feraiseexcept() function raises the supported floating-point exceptions
150  * represented by the argument `excepts'.
151  *
152  * The standard explicitly allows us to execute an instruction that has the
153  * exception as a side effect, but we choose to manipulate the status register
154  * directly.
155  *
156  * The validation of input is being deferred to fesetexceptflag().
157  */
158 int
159 feraiseexcept(int excepts)
160 {
161 	excepts &= FE_ALL_EXCEPT;
162 
163 	fesetexceptflag((fexcept_t *)&excepts, excepts);
164 	__asm__ __volatile__ ("fwait");
165 
166 	return (0);
167 }
168 
169 /*
170  * This function sets the floating-point status flags indicated by the argument
171  * `excepts' to the states stored in the object pointed to by `flagp'. It does
172  * NOT raise any floating-point exceptions, but only sets the state of the flags.
173  */
174 int
175 fesetexceptflag(const fexcept_t *flagp, int excepts)
176 {
177 	fenv_t fenv;
178 	unsigned int mxcsr;
179 
180 	excepts &= FE_ALL_EXCEPT;
181 
182 	/* Store the current x87 floating-point environment */
183 	__asm__ __volatile__ ("fnstenv %0" : "=m" (fenv));
184 
185 	/* Set the requested status flags */
186 	fenv.__x87.__status &= ~excepts;
187 	fenv.__x87.__status |= *flagp & excepts;
188 
189 	/* Load the x87 floating-point environent */
190 	__asm__ __volatile__ ("fldenv %0" : : "m" (fenv));
191 
192 	/* Same for SSE environment */
193 	if (__HAS_SSE) {
194 		__asm__ __volatile__ ("stmxcsr %0" : "=m" (mxcsr));
195 		mxcsr &= ~excepts;
196 		mxcsr |= *flagp & excepts;
197 		__asm__ __volatile__ ("ldmxcsr %0" : : "m" (mxcsr));
198 	}
199 
200 	return (0);
201 }
202 
203 /*
204  * The fetestexcept() function determines which of a specified subset of the
205  * floating-point exception flags are currently set. The `excepts' argument
206  * specifies the floating-point status flags to be queried.
207  */
208 int
209 fetestexcept(int excepts)
210 {
211 	unsigned short status;
212 	unsigned int mxcsr = 0;
213 
214 	excepts &= FE_ALL_EXCEPT;
215 
216 	/* Store the current x87 status register */
217 	__asm__ __volatile__ ("fnstsw %0" : "=am" (status));
218 
219 	/* Store the MXCSR register state */
220 	if (__HAS_SSE)
221 		__asm__ __volatile__ ("stmxcsr %0" : "=m" (mxcsr));
222 
223 	return ((status | mxcsr) & excepts);
224 }
225 
226 /*
227  * The fegetround() function gets the current rounding direction.
228  */
229 int
230 fegetround(void)
231 {
232 	unsigned short control;
233 
234 	/*
235 	 * We assume that the x87 and the SSE unit agree on the
236 	 * rounding mode.  Reading the control word on the x87 turns
237 	 * out to be about 5 times faster than reading it on the SSE
238 	 * unit on an Opteron 244.
239 	 */
240 	__asm__ __volatile__ ("fnstcw %0" : "=m" (control));
241 
242 	return (control & _X87_ROUND_MASK);
243 }
244 
245 /*
246  * The fesetround() function establishes the rounding direction represented by
247  * its argument `round'. If the argument is not equal to the value of a rounding
248  * direction macro, the rounding direction is not changed.
249  */
250 int
251 fesetround(int round)
252 {
253 	unsigned short control;
254 	unsigned int mxcsr;
255 
256 	/* Check whether requested rounding direction is supported */
257 	if (round & ~_X87_ROUND_MASK)
258 		return (-1);
259 
260 	/* Store the current x87 control word register */
261 	__asm__ __volatile__ ("fnstcw %0" : "=m" (control));
262 
263 	/* Set the rounding direction */
264 	control &= ~_X87_ROUND_MASK;
265 	control |= round;
266 
267 	/* Load the x87 control word register */
268 	__asm__ __volatile__ ("fldcw %0" : : "m" (control));
269 
270 	/* Same for the SSE environment */
271 	if (__HAS_SSE) {
272 		__asm__ __volatile__ ("stmxcsr %0" : "=m" (mxcsr));
273 		mxcsr &= ~(_X87_ROUND_MASK << _SSE_ROUND_SHIFT);
274 		mxcsr |= round << _SSE_ROUND_SHIFT;
275 		__asm__ __volatile__ ("ldmxcsr %0" : : "m" (mxcsr));
276 	}
277 
278 	return (0);
279 }
280 
281 /*
282  * The fegetenv() function attempts to store the current floating-point
283  * environment in the object pointed to by envp.
284  */
285 int
286 fegetenv(fenv_t *envp)
287 {
288 	/* Store the current x87 floating-point environment */
289 	__asm__ __volatile__ ("fnstenv %0" : "=m" (*envp));
290 
291 	/* Store the MXCSR register state */
292 	if (__HAS_SSE)
293 		__asm__ __volatile__ ("stmxcsr %0" : "=m" (envp->__mxcsr));
294 
295 	/*
296 	 * When an FNSTENV instruction is executed, all pending exceptions are
297 	 * essentially lost (either the x87 FPU status register is cleared or
298 	 * all exceptions are masked).
299 	 *
300 	 * 8.6 X87 FPU EXCEPTION SYNCHRONIZATION -
301 	 * Intel(R) 64 and IA-32 Architectures Softare Developer's Manual - Vol1
302 	 */
303 	__asm__ __volatile__ ("fldcw %0" : : "m" (envp->__x87.__control));
304 
305 	return (0);
306 }
307 
308 /*
309  * The feholdexcept() function saves the current floating-point environment
310  * in the object pointed to by envp, clears the floating-point status flags, and
311  * then installs a non-stop (continue on floating-point exceptions) mode, if
312  * available, for all floating-point exceptions.
313  */
314 int
315 feholdexcept(fenv_t *envp)
316 {
317 	unsigned int mxcsr;
318 
319 	/* Store the current x87 floating-point environment */
320 	__asm__ __volatile__ ("fnstenv %0" : "=m" (*envp));
321 
322 	/* Clear all exception flags in FPU */
323 	__asm__ __volatile__ ("fnclex");
324 
325 	if (__HAS_SSE) {
326 		/* Store the MXCSR register state */
327 		__asm__ __volatile__ ("stmxcsr %0" : "=m" (envp->__mxcsr));
328 
329 		/* Clear exception flags in MXCSR */
330 		mxcsr = envp->__mxcsr;
331 		mxcsr &= ~FE_ALL_EXCEPT;
332 
333 		/* Mask all exceptions */
334 		mxcsr |= FE_ALL_EXCEPT << _SSE_MASK_SHIFT;
335 
336 		/* Store the MXCSR register */
337 		__asm__ __volatile__ ("ldmxcsr %0" : : "m" (mxcsr));
338 	}
339 
340 	return (0);
341 }
342 
343 /*
344  * The fesetenv() function attempts to establish the floating-point environment
345  * represented by the object pointed to by envp. The argument `envp' points
346  * to an object set by a call to fegetenv() or feholdexcept(), or equal a
347  * floating-point environment macro. The fesetenv() function does not raise
348  * floating-point exceptions, but only installs the state of the floating-point
349  * status flags represented through its argument.
350  */
351 int
352 fesetenv(const fenv_t *envp)
353 {
354 	/* Load the x87 floating-point environent */
355 	__asm__ __volatile__ ("fldenv %0" : : "m" (*envp));
356 
357 	/* Store the MXCSR register */
358 	if (__HAS_SSE)
359 		__asm__ __volatile__ ("ldmxcsr %0" : : "m" (envp->__mxcsr));
360 
361 	return (0);
362 }
363 
364 /*
365  * The feupdateenv() function saves the currently raised floating-point
366  * exceptions in its automatic storage, installs the floating-point environment
367  * represented by the object pointed to by `envp', and then raises the saved
368  * floating-point exceptions. The argument `envp' shall point to an object set
369  * by a call to feholdexcept() or fegetenv(), or equal a floating-point
370  * environment macro.
371  */
372 int
373 feupdateenv(const fenv_t *envp)
374 {
375 	unsigned short status;
376 	unsigned int mxcsr = 0;
377 
378 	/* Store the x87 status register */
379 	__asm__ __volatile__ ("fnstsw %0" : "=am" (status));
380 
381 	/* Store the MXCSR register */
382 	if (__HAS_SSE)
383 		__asm__ __volatile__ ("stmxcsr %0" : "=m" (mxcsr));
384 
385 	/* Install new floating-point environment */
386 	fesetenv(envp);
387 
388 	/* Raise any previously accumulated exceptions */
389 	feraiseexcept(status | mxcsr);
390 
391 	return (0);
392 }
393 
394 /*
395  * The following functions are extentions to the standard
396  */
397 int
398 feenableexcept(int mask)
399 {
400 	unsigned int mxcsr = 0, omask;
401 	unsigned short control;
402 
403 	mask &= FE_ALL_EXCEPT;
404 
405 	__asm__ __volatile__ ("fnstcw %0" : "=m" (control));
406 	if (__HAS_SSE)
407 		__asm__ __volatile__ ("stmxcsr %0" : "=m" (mxcsr));
408 
409 	omask = ~(control | (mxcsr >> _SSE_MASK_SHIFT)) & FE_ALL_EXCEPT;
410 	control &= ~mask;
411 	__asm__ __volatile__ ("fldcw %0" : : "m" (control));
412 
413 	if (__HAS_SSE) {
414 		mxcsr &= ~(mask << _SSE_MASK_SHIFT);
415 		__asm__ __volatile__ ("ldmxcsr %0" : : "m" (mxcsr));
416 	}
417 
418 	return (omask);
419 }
420 
421 int
422 fedisableexcept(int mask)
423 {
424 	unsigned int mxcsr = 0, omask;
425 	unsigned short control;
426 
427 	mask &= FE_ALL_EXCEPT;
428 
429 	__asm__ __volatile__ ("fnstcw %0" : "=m" (control));
430 	if (__HAS_SSE)
431 		__asm__ __volatile__ ("stmxcsr %0" : "=m" (mxcsr));
432 
433 	omask = ~(control | (mxcsr >> _SSE_MASK_SHIFT)) & FE_ALL_EXCEPT;
434 	control |= mask;
435 	__asm__ __volatile__ ("fldcw %0" : : "m" (control));
436 
437 	if (__HAS_SSE) {
438 		mxcsr |= mask << _SSE_MASK_SHIFT;
439 		__asm__ __volatile__ ("ldmxcsr %0" : : "m" (mxcsr));
440 	}
441 
442 	return (omask);
443 }
444 
445 int
446 fegetexcept(void)
447 {
448 	unsigned short control;
449 
450 	/*
451 	 * We assume that the masks for the x87 and the SSE unit are
452 	 * the same.
453 	 */
454 	__asm__ __volatile__ ("fnstcw %0" : "=m" (control));
455 
456 	return (~control & FE_ALL_EXCEPT);
457 }
458