xref: /netbsd-src/lib/libm/arch/i387/fenv.c (revision b1c86f5f087524e68db12794ee9c3e3da1ab17a0)
1 /* $NetBSD: fenv.c,v 1.3 2010/08/01 06:34:38 taca Exp $ */
2 
3 /*-
4  * Copyright (c) 2004-2005 David Schultz <das@FreeBSD.ORG>
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __RCSID("$NetBSD: fenv.c,v 1.3 2010/08/01 06:34:38 taca Exp $");
31 
32 #include <sys/param.h>
33 #include <sys/sysctl.h>
34 #include <assert.h>
35 #include <fenv.h>
36 #include <stddef.h>
37 #include <string.h>
38 
39 /* Load x87 Control Word */
40 #define	__fldcw(__cw)		__asm__ __volatile__	\
41 	("fldcw %0" : : "m" (__cw))
42 
43 /* No-Wait Store Control Word */
44 #define	__fnstcw(__cw)		__asm__ __volatile__	\
45 	("fnstcw %0" : "=m" (*(__cw)))
46 
47 /* No-Wait Store Status Word */
48 #define	__fnstsw(__sw)		__asm__ __volatile__	\
49 	("fnstsw %0" : "=am" (*(__sw)))
50 
51 /* No-Wait Clear Exception Flags */
52 #define	__fnclex()		__asm__ __volatile__	\
53 	("fnclex")
54 
55 /* Load x87 Environment */
56 #define	__fldenv(__env)		__asm__ __volatile__	\
57 	("fldenv %0" : : "m" (__env))
58 
59 /* No-Wait Store x87 environment */
60 #define	__fnstenv(__env)	__asm__ __volatile__	\
61 	("fnstenv %0" : "=m" (*(__env)))
62 
63 /* Check for and handle pending unmasked x87 pending FPU exceptions */
64 #define	__fwait(__env)		__asm__	__volatile__	\
65 	("fwait")
66 
67 /* Load the MXCSR register */
68 #define	__ldmxcsr(__mxcsr)	__asm__ __volatile__	\
69 	("ldmxcsr %0" : : "m" (__mxcsr))
70 
71 /* Store the MXCSR register state */
72 #define	__stmxcsr(__mxcsr)	__asm__ __volatile__	\
73 	("stmxcsr %0" : "=m" (*(__mxcsr)))
74 
75 /*
76  * The following constant represents the default floating-point environment
77  * (that is, the one installed at program startup) and has type pointer to
78  * const-qualified fenv_t.
79  *
80  * It can be used as an argument to the functions within the <fenv.h> header
81  * that manage the floating-point environment, namely fesetenv() and
82  * feupdateenv().
83  *
84  * x87 fpu registers are 16bit wide. The upper bits, 31-16, are marked as
85  * RESERVED. We provide a partial floating-point environment, where we
86  * define only the lower bits. The reserved bits are extracted and set by the
87  * consumers of FE_DFL_ENV, during runtime.
88  */
89 fenv_t __fe_dfl_env = {
90 	{
91 		__NetBSD_NPXCW__,       /* Control word register */
92 		0x0,			/* Unused */
93 		0x0000,                 /* Status word register */
94 		0x0,			/* Unused */
95 		0x0000ffff,             /* Tag word register */
96 		0x0,			/* Unused */
97 		{
98 			0x0000, 0x0000,
99 			0x0000, 0xffff
100 		}
101 	},
102 	__INITIAL_MXCSR__		/* MXCSR register */
103 };
104 
105 /*
106  * Test for SSE support on this processor.
107  *
108  * We need to use ldmxcsr/stmxcsr to get correct results if any part
109  * of the program was compiled to use SSE floating-point, but we can't
110  * use SSE on older processors.
111  *
112  * In order to do so, we need to query the processor capabilities via the CPUID
113  * instruction. We can make it even simpler though, by querying the machdep.sse
114  * sysctl.
115  */
116 static int __HAS_SSE = 0;
117 
118 static void __test_sse(void) __attribute__ ((constructor));
119 
120 static void __test_sse(void)
121 {
122 	size_t oldlen = sizeof(__HAS_SSE);
123 	int rv;
124 
125 	rv = sysctlbyname("machdep.sse", &__HAS_SSE, &oldlen, NULL, 0);
126 	if (rv == -1)
127 		__HAS_SSE = 0;
128 }
129 
130 /*
131  * The feclearexcept() function clears the supported floating-point exceptions
132  * represented by `excepts'.
133  */
134 int
135 feclearexcept(int excepts)
136 {
137 	fenv_t env;
138 	uint32_t mxcsr;
139 	int ex;
140 
141 	_DIAGASSERT((excepts & ~FE_ALL_EXCEPT) == 0);
142 
143 	ex = excepts & FE_ALL_EXCEPT;
144 
145 	/* It's ~3x faster to call fnclex, than store/load fp env */
146 	if (ex == FE_ALL_EXCEPT) {
147 		__fnclex();
148 	} else {
149 		__fnstenv(&env);
150 		env.x87.status &= ~ex;
151 		__fldenv(env);
152 	}
153 
154 	if (__HAS_SSE) {
155 		__stmxcsr(&mxcsr);
156 		mxcsr &= ~ex;
157 		__ldmxcsr(mxcsr);
158 	}
159 
160 	/* Success */
161 	return (0);
162 }
163 
164 /*
165  * The fegetexceptflag() function stores an implementation-defined
166  * representation of the states of the floating-point status flags indicated by
167  * the argument excepts in the object pointed to by the argument flagp.
168  */
169 int
170 fegetexceptflag(fexcept_t *flagp, int excepts)
171 {
172 	uint32_t mxcsr;
173 	uint16_t status;
174 	int ex;
175 
176 	_DIAGASSERT(flagp != NULL);
177 	_DIAGASSERT((excepts & ~FE_ALL_EXCEPT) == 0);
178 
179 	ex = excepts & FE_ALL_EXCEPT;
180 
181 	__fnstsw(&status);
182 	if (__HAS_SSE)
183 		__stmxcsr(&mxcsr);
184 	else
185 		mxcsr = 0;
186 
187 	*flagp = (mxcsr | status) & ex;
188 
189 	/* Success */
190 	return (0);
191 }
192 
193 /*
194  * The feraiseexcept() function raises the supported floating-point exceptions
195  * represented by the argument `excepts'.
196  *
197  * The standard explicitly allows us to execute an instruction that has the
198  * exception as a side effect, but we choose to manipulate the status register
199  * directly.
200  *
201  * The validation of input is being deferred to fesetexceptflag().
202  */
203 int
204 feraiseexcept(int excepts)
205 {
206 	fexcept_t ex;
207 
208 	_DIAGASSERT((excepts & ~FE_ALL_EXCEPT) == 0);
209 
210 	ex = excepts & FE_ALL_EXCEPT;
211 	fesetexceptflag(&ex, excepts);
212 	__fwait();
213 
214 	/* Success */
215 	return (0);
216 }
217 
218 /*
219  * This function sets the floating-point status flags indicated by the argument
220  * `excepts' to the states stored in the object pointed to by `flagp'. It does
221  * NOT raise any floating-point exceptions, but only sets the state of the flags.
222  */
223 int
224 fesetexceptflag(const fexcept_t *flagp, int excepts)
225 {
226 	fenv_t env;
227 	uint32_t mxcsr;
228 	int ex;
229 
230 	_DIAGASSERT(flagp != NULL);
231 	_DIAGASSERT((excepts & ~FE_ALL_EXCEPT) == 0);
232 
233 	ex = excepts & FE_ALL_EXCEPT;
234 
235 	__fnstenv(&env);
236 	env.x87.status &= ~ex;
237 	env.x87.status |= *flagp & ex;
238 	__fldenv(env);
239 
240 	if (__HAS_SSE) {
241 		__stmxcsr(&mxcsr);
242 		mxcsr &= ~ex;
243 		mxcsr |= *flagp & ex;
244 		__ldmxcsr(mxcsr);
245 	}
246 
247 	/* Success */
248 	return (0);
249 }
250 
251 /*
252  * The fetestexcept() function determines which of a specified subset of the
253  * floating-point exception flags are currently set. The `excepts' argument
254  * specifies the floating-point status flags to be queried.
255  */
256 int
257 fetestexcept(int excepts)
258 {
259 	uint32_t mxcsr;
260 	uint16_t status;
261 	int ex;
262 
263 	_DIAGASSERT((excepts & ~FE_ALL_EXCEPT) == 0);
264 
265 	ex = excepts & FE_ALL_EXCEPT;
266 
267 	__fnstsw(&status);
268 	if (__HAS_SSE)
269 		__stmxcsr(&mxcsr);
270 	else
271 		mxcsr = 0;
272 
273 	return ((status | mxcsr) & ex);
274 }
275 
276 int
277 fegetround(void)
278 {
279 	uint16_t control;
280 
281 	/*
282 	 * We assume that the x87 and the SSE unit agree on the
283 	 * rounding mode.  Reading the control word on the x87 turns
284 	 * out to be about 5 times faster than reading it on the SSE
285 	 * unit on an Opteron 244.
286 	 */
287 	__fnstcw(&control);
288 
289 	return (control & __X87_ROUND_MASK);
290 }
291 
292 /*
293  * The fesetround() function shall establish the rounding direction represented
294  * by its argument round. If the argument is not equal to the value of a
295  * rounding direction macro, the rounding direction is not changed.
296  */
297 int
298 fesetround(int round)
299 {
300 	uint32_t mxcsr;
301 	uint16_t control;
302 
303 	if (round & ~__X87_ROUND_MASK) {
304 		/* Failure */
305 		return (-1);
306 	}
307 
308 	__fnstcw(&control);
309 	control &= ~__X87_ROUND_MASK;
310 	control |= round;
311 	__fldcw(control);
312 
313 	if (__HAS_SSE) {
314 		__stmxcsr(&mxcsr);
315 		mxcsr &= ~(__X87_ROUND_MASK << __SSE_ROUND_SHIFT);
316 		mxcsr |= round << __SSE_ROUND_SHIFT;
317 		__ldmxcsr(mxcsr);
318 	}
319 
320 	/* Success */
321 	return (0);
322 }
323 
324 /*
325  * The fegetenv() function attempts to store the current floating-point
326  * environment in the object pointed to by envp.
327  */
328 int
329 fegetenv(fenv_t *envp)
330 {
331 	uint32_t mxcsr;
332 
333 	_DIAGASSERT(flagp != NULL);
334 
335 	/*
336 	 * fnstenv masks all exceptions, so we need to restore the old control
337 	 * word to avoid this side effect.
338 	 */
339 	__fnstenv(envp);
340 	__fldcw(envp->x87.control);
341 	if (__HAS_SSE) {
342 		__stmxcsr(&mxcsr);
343 		envp->mxcsr = mxcsr;
344 	}
345 
346 	/* Success */
347 	return (0);
348 }
349 
350 /*
351  * The feholdexcept() function saves the current floating-point environment in
352  * the object pointed to by envp, clears the floating-point status flags, and
353  * then installs a non-stop (continue on floating-point exceptions) mode, if
354  * available, for all floating-point exceptions.
355  */
356 int
357 feholdexcept(fenv_t *envp)
358 {
359 	uint32_t mxcsr;
360 
361 	_DIAGASSERT(envp != NULL);
362 
363 	__fnstenv(envp);
364 	__fnclex();
365 	if (__HAS_SSE) {
366 		__stmxcsr(&mxcsr);
367 		envp->mxcsr = mxcsr;
368 		mxcsr &= ~FE_ALL_EXCEPT;
369 		mxcsr |= FE_ALL_EXCEPT << __SSE_EMASK_SHIFT;
370 		__ldmxcsr(mxcsr);
371 	}
372 
373 	/* Success */
374 	return (0);
375 }
376 
377 /*
378  * The fesetenv() function attempts to establish the floating-point environment
379  * represented by the object pointed to by envp. The argument `envp' points
380  * to an object set by a call to fegetenv() or feholdexcept(), or equal a
381  * floating-point environment macro. The fesetenv() function does not raise
382  * floating-point exceptions, but only installs the state of the floating-point
383  * status flags represented through its argument.
384  */
385 int
386 fesetenv(const fenv_t *envp)
387 {
388 	fenv_t env;
389 
390 	_DIAGASSERT(envp != NULL);
391 
392 	/* Store the x87 floating-point environment */
393 	memset(&env, 0, sizeof(env));
394 	__fnstenv(&env);
395 
396 	__fe_dfl_env.x87.unused1 = env.x87.unused1;
397 	__fe_dfl_env.x87.unused2 = env.x87.unused2;
398 	__fe_dfl_env.x87.unused3 = env.x87.unused3;
399 	memcpy(__fe_dfl_env.x87.others,
400 	       env.x87.others,
401 	       sizeof(__fe_dfl_env.x87.others) / sizeof(uint32_t));
402 
403 	__fldenv(envp->x87);
404 	if (__HAS_SSE)
405 		__ldmxcsr(envp->mxcsr);
406 
407 	/* Success */
408 	return (0);
409 }
410 
411 /*
412  * The feupdateenv() function saves the currently raised floating-point
413  * exceptions in its automatic storage, installs the floating-point environment
414  * represented by the object pointed to by `envp', and then raises the saved
415  * floating-point exceptions. The argument `envp' shall point to an object set
416  * by a call to feholdexcept() or fegetenv(), or equal a floating-point
417  * environment macro.
418  */
419 int
420 feupdateenv(const fenv_t *envp)
421 {
422 	fenv_t env;
423 	uint32_t mxcsr;
424 	uint16_t status;
425 
426 	_DIAGASSERT(envp != NULL);
427 
428 	/* Store the x87 floating-point environment */
429 	memset(&env, 0, sizeof(env));
430 	__fnstenv(&env);
431 
432 	__fe_dfl_env.x87.unused1 = env.x87.unused1;
433 	__fe_dfl_env.x87.unused2 = env.x87.unused2;
434 	__fe_dfl_env.x87.unused3 = env.x87.unused3;
435 	memcpy(__fe_dfl_env.x87.others,
436 	       env.x87.others,
437 	       sizeof(__fe_dfl_env.x87.others) / sizeof(uint32_t));
438 
439 	__fnstsw(&status);
440 	if (__HAS_SSE)
441 		__stmxcsr(&mxcsr);
442 	else
443 		mxcsr = 0;
444 	fesetenv(envp);
445 	feraiseexcept((mxcsr | status) & FE_ALL_EXCEPT);
446 
447 	/* Success */
448 	return (0);
449 }
450 
451 /*
452  * The following functions are extentions to the standard
453  */
454 int
455 feenableexcept(int mask)
456 {
457 	uint32_t mxcsr, omask;
458 	uint16_t control;
459 
460 	mask &= FE_ALL_EXCEPT;
461 	__fnstcw(&control);
462 	if (__HAS_SSE)
463 		__stmxcsr(&mxcsr);
464 	else
465 		mxcsr = 0;
466 
467 	omask = (control | mxcsr >> __SSE_EMASK_SHIFT) & FE_ALL_EXCEPT;
468 	control &= ~mask;
469 	__fldcw(control);
470 	if (__HAS_SSE) {
471 		mxcsr &= ~(mask << __SSE_EMASK_SHIFT);
472 		__ldmxcsr(mxcsr);
473 	}
474 
475 	return (~omask);
476 }
477 
478 int
479 fedisableexcept(int mask)
480 {
481 	uint32_t mxcsr, omask;
482 	uint16_t control;
483 
484 	mask &= FE_ALL_EXCEPT;
485 	__fnstcw(&control);
486 	if (__HAS_SSE)
487 		__stmxcsr(&mxcsr);
488 	else
489 		mxcsr = 0;
490 
491 	omask = (control | mxcsr >> __SSE_EMASK_SHIFT) & FE_ALL_EXCEPT;
492 	control |= mask;
493 	__fldcw(control);
494 	if (__HAS_SSE) {
495 		mxcsr |= mask << __SSE_EMASK_SHIFT;
496 		__ldmxcsr(mxcsr);
497 	}
498 
499 	return (~omask);
500 }
501 
502 int
503 fegetexcept(void)
504 {
505 	uint16_t control;
506 
507 	/*
508 	 * We assume that the masks for the x87 and the SSE unit are
509 	 * the same.
510 	 */
511 	__fnstcw(&control);
512 
513 	return (control & FE_ALL_EXCEPT);
514 }
515