xref: /netbsd-src/lib/libm/arch/i387/fenv.c (revision 87d689fb734c654d2486f87f7be32f1b53ecdbec)
1 /* $NetBSD: fenv.c,v 1.8 2017/03/22 23:11:08 chs Exp $ */
2 
3 /*-
4  * Copyright (c) 2004-2005 David Schultz <das@FreeBSD.ORG>
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __RCSID("$NetBSD: fenv.c,v 1.8 2017/03/22 23:11:08 chs Exp $");
31 
32 #include "namespace.h"
33 
34 #include <sys/param.h>
35 #include <sys/sysctl.h>
36 #include <assert.h>
37 #include <fenv.h>
38 #include <stddef.h>
39 #include <string.h>
40 
41 #ifdef __weak_alias
42 __weak_alias(feclearexcept,_feclearexcept)
43 __weak_alias(fedisableexcept,_fedisableexcept)
44 __weak_alias(feenableexcept,_feenableexcept)
45 __weak_alias(fegetenv,_fegetenv)
46 __weak_alias(fegetexcept,_fegetexcept)
47 __weak_alias(fegetexceptflag,_fegetexceptflag)
48 __weak_alias(fegetround,_fegetround)
49 __weak_alias(feholdexcept,_feholdexcept)
50 __weak_alias(feraiseexcept,_feraiseexcept)
51 __weak_alias(fesetenv,_fesetenv)
52 __weak_alias(fesetexceptflag,_fesetexceptflag)
53 __weak_alias(fesetround,_fesetround)
54 __weak_alias(fetestexcept,_fetestexcept)
55 __weak_alias(feupdateenv,_feupdateenv)
56 #endif
57 
58 /* Load x87 Control Word */
59 #define	__fldcw(__cw)		__asm__ __volatile__	\
60 	("fldcw %0" : : "m" (__cw))
61 
62 /* No-Wait Store Control Word */
63 #define	__fnstcw(__cw)		__asm__ __volatile__	\
64 	("fnstcw %0" : "=m" (*(__cw)))
65 
66 /* No-Wait Store Status Word */
67 #define	__fnstsw(__sw)		__asm__ __volatile__	\
68 	("fnstsw %0" : "=am" (*(__sw)))
69 
70 /* No-Wait Clear Exception Flags */
71 #define	__fnclex()		__asm__ __volatile__	\
72 	("fnclex")
73 
74 /* Load x87 Environment */
75 #define	__fldenv(__env)		__asm__ __volatile__	\
76 	("fldenv %0" : : "m" (__env))
77 
78 /* No-Wait Store x87 environment */
79 #define	__fnstenv(__env)	__asm__ __volatile__	\
80 	("fnstenv %0" : "=m" (*(__env)))
81 
82 /* Check for and handle pending unmasked x87 pending FPU exceptions */
83 #define	__fwait(__env)		__asm__	__volatile__	\
84 	("fwait")
85 
86 /* Load the MXCSR register */
87 #define	__ldmxcsr(__mxcsr)	__asm__ __volatile__	\
88 	("ldmxcsr %0" : : "m" (__mxcsr))
89 
90 /* Store the MXCSR register state */
91 #define	__stmxcsr(__mxcsr)	__asm__ __volatile__	\
92 	("stmxcsr %0" : "=m" (*(__mxcsr)))
93 
94 /*
95  * The following constant represents the default floating-point environment
96  * (that is, the one installed at program startup) and has type pointer to
97  * const-qualified fenv_t.
98  *
99  * It can be used as an argument to the functions within the <fenv.h> header
100  * that manage the floating-point environment, namely fesetenv() and
101  * feupdateenv().
102  *
103  * x87 fpu registers are 16bit wide. The upper bits, 31-16, are marked as
104  * RESERVED. We provide a partial floating-point environment, where we
105  * define only the lower bits. The reserved bits are extracted and set by the
106  * consumers of FE_DFL_ENV, during runtime.
107  */
108 fenv_t __fe_dfl_env = {
109 	{
110 		__NetBSD_NPXCW__,       /* Control word register */
111 		0x0,			/* Unused */
112 		0x0000,                 /* Status word register */
113 		0x0,			/* Unused */
114 		0x0000ffff,             /* Tag word register */
115 		0x0,			/* Unused */
116 		{
117 			0x0000, 0x0000,
118 			0x0000, 0xffff
119 		}
120 	},
121 	__INITIAL_MXCSR__		/* MXCSR register */
122 };
123 
124 /*
125  * Test for SSE support on this processor.
126  *
127  * We need to use ldmxcsr/stmxcsr to get correct results if any part
128  * of the program was compiled to use SSE floating-point, but we can't
129  * use SSE on older processors.
130  *
131  * In order to do so, we need to query the processor capabilities via the CPUID
132  * instruction. We can make it even simpler though, by querying the machdep.sse
133  * sysctl.
134  */
135 static int __HAS_SSE = 0;
136 
137 static void __init_libm(void) __attribute__ ((constructor, used));
138 
139 static void __init_libm(void)
140 {
141 	size_t oldlen = sizeof(__HAS_SSE);
142 	int rv;
143 	uint16_t control;
144 
145 	rv = sysctlbyname("machdep.sse", &__HAS_SSE, &oldlen, NULL, 0);
146 	if (rv == -1)
147 		__HAS_SSE = 0;
148 
149 	__fnstcw(&control);
150 	__fe_dfl_env.x87.control = control;
151 }
152 
153 /*
154  * The feclearexcept() function clears the supported floating-point exceptions
155  * represented by `excepts'.
156  */
157 int
158 feclearexcept(int excepts)
159 {
160 	fenv_t env;
161 	uint32_t mxcsr;
162 	int ex;
163 
164 	_DIAGASSERT((excepts & ~FE_ALL_EXCEPT) == 0);
165 
166 	ex = excepts & FE_ALL_EXCEPT;
167 
168 	/* It's ~3x faster to call fnclex, than store/load fp env */
169 	if (ex == FE_ALL_EXCEPT) {
170 		__fnclex();
171 	} else {
172 		__fnstenv(&env);
173 		env.x87.status &= ~ex;
174 		__fldenv(env);
175 	}
176 
177 	if (__HAS_SSE) {
178 		__stmxcsr(&mxcsr);
179 		mxcsr &= ~ex;
180 		__ldmxcsr(mxcsr);
181 	}
182 
183 	/* Success */
184 	return (0);
185 }
186 
187 /*
188  * The fegetexceptflag() function stores an implementation-defined
189  * representation of the states of the floating-point status flags indicated by
190  * the argument excepts in the object pointed to by the argument flagp.
191  */
192 int
193 fegetexceptflag(fexcept_t *flagp, int excepts)
194 {
195 	uint32_t mxcsr;
196 	uint16_t status;
197 	int ex;
198 
199 	_DIAGASSERT(flagp != NULL);
200 	_DIAGASSERT((excepts & ~FE_ALL_EXCEPT) == 0);
201 
202 	ex = excepts & FE_ALL_EXCEPT;
203 
204 	__fnstsw(&status);
205 	if (__HAS_SSE)
206 		__stmxcsr(&mxcsr);
207 	else
208 		mxcsr = 0;
209 
210 	*flagp = (mxcsr | status) & ex;
211 
212 	/* Success */
213 	return (0);
214 }
215 
216 /*
217  * The feraiseexcept() function raises the supported floating-point exceptions
218  * represented by the argument `excepts'.
219  *
220  * The standard explicitly allows us to execute an instruction that has the
221  * exception as a side effect, but we choose to manipulate the status register
222  * directly.
223  *
224  * The validation of input is being deferred to fesetexceptflag().
225  */
226 int
227 feraiseexcept(int excepts)
228 {
229 	fexcept_t ex;
230 
231 	_DIAGASSERT((excepts & ~FE_ALL_EXCEPT) == 0);
232 
233 	ex = excepts & FE_ALL_EXCEPT;
234 	fesetexceptflag(&ex, excepts);
235 	__fwait();
236 
237 	/* Success */
238 	return (0);
239 }
240 
241 /*
242  * This function sets the floating-point status flags indicated by the argument
243  * `excepts' to the states stored in the object pointed to by `flagp'. It does
244  * NOT raise any floating-point exceptions, but only sets the state of the flags.
245  */
246 int
247 fesetexceptflag(const fexcept_t *flagp, int excepts)
248 {
249 	fenv_t env;
250 	uint32_t mxcsr;
251 	int ex;
252 
253 	_DIAGASSERT(flagp != NULL);
254 	_DIAGASSERT((excepts & ~FE_ALL_EXCEPT) == 0);
255 
256 	ex = excepts & FE_ALL_EXCEPT;
257 
258 	__fnstenv(&env);
259 	env.x87.status &= ~ex;
260 	env.x87.status |= *flagp & ex;
261 	__fldenv(env);
262 
263 	if (__HAS_SSE) {
264 		__stmxcsr(&mxcsr);
265 		mxcsr &= ~ex;
266 		mxcsr |= *flagp & ex;
267 		__ldmxcsr(mxcsr);
268 	}
269 
270 	/* Success */
271 	return (0);
272 }
273 
274 /*
275  * The fetestexcept() function determines which of a specified subset of the
276  * floating-point exception flags are currently set. The `excepts' argument
277  * specifies the floating-point status flags to be queried.
278  */
279 int
280 fetestexcept(int excepts)
281 {
282 	uint32_t mxcsr;
283 	uint16_t status;
284 	int ex;
285 
286 	_DIAGASSERT((excepts & ~FE_ALL_EXCEPT) == 0);
287 
288 	ex = excepts & FE_ALL_EXCEPT;
289 
290 	__fnstsw(&status);
291 	if (__HAS_SSE)
292 		__stmxcsr(&mxcsr);
293 	else
294 		mxcsr = 0;
295 
296 	return ((status | mxcsr) & ex);
297 }
298 
299 int
300 fegetround(void)
301 {
302 	uint16_t control;
303 
304 	/*
305 	 * We assume that the x87 and the SSE unit agree on the
306 	 * rounding mode.  Reading the control word on the x87 turns
307 	 * out to be about 5 times faster than reading it on the SSE
308 	 * unit on an Opteron 244.
309 	 */
310 	__fnstcw(&control);
311 
312 	return (control & __X87_ROUND_MASK);
313 }
314 
315 /*
316  * The fesetround() function shall establish the rounding direction represented
317  * by its argument round. If the argument is not equal to the value of a
318  * rounding direction macro, the rounding direction is not changed.
319  */
320 int
321 fesetround(int round)
322 {
323 	uint32_t mxcsr;
324 	uint16_t control;
325 
326 	if (round & ~__X87_ROUND_MASK) {
327 		/* Failure */
328 		return (-1);
329 	}
330 
331 	__fnstcw(&control);
332 	control &= ~__X87_ROUND_MASK;
333 	control |= round;
334 	__fldcw(control);
335 
336 	if (__HAS_SSE) {
337 		__stmxcsr(&mxcsr);
338 		mxcsr &= ~(__X87_ROUND_MASK << __SSE_ROUND_SHIFT);
339 		mxcsr |= round << __SSE_ROUND_SHIFT;
340 		__ldmxcsr(mxcsr);
341 	}
342 
343 	/* Success */
344 	return (0);
345 }
346 
347 /*
348  * The fegetenv() function attempts to store the current floating-point
349  * environment in the object pointed to by envp.
350  */
351 int
352 fegetenv(fenv_t *envp)
353 {
354 	uint32_t mxcsr;
355 
356 	_DIAGASSERT(flagp != NULL);
357 
358 	/*
359 	 * fnstenv masks all exceptions, so we need to restore the old control
360 	 * word to avoid this side effect.
361 	 */
362 	__fnstenv(envp);
363 	__fldcw(envp->x87.control);
364 	if (__HAS_SSE) {
365 		__stmxcsr(&mxcsr);
366 		envp->mxcsr = mxcsr;
367 	}
368 
369 	/* Success */
370 	return (0);
371 }
372 
373 /*
374  * The feholdexcept() function saves the current floating-point environment in
375  * the object pointed to by envp, clears the floating-point status flags, and
376  * then installs a non-stop (continue on floating-point exceptions) mode, if
377  * available, for all floating-point exceptions.
378  */
379 int
380 feholdexcept(fenv_t *envp)
381 {
382 	uint32_t mxcsr;
383 
384 	_DIAGASSERT(envp != NULL);
385 
386 	__fnstenv(envp);
387 	__fnclex();
388 	if (__HAS_SSE) {
389 		__stmxcsr(&mxcsr);
390 		envp->mxcsr = mxcsr;
391 		mxcsr &= ~FE_ALL_EXCEPT;
392 		mxcsr |= FE_ALL_EXCEPT << __SSE_EMASK_SHIFT;
393 		__ldmxcsr(mxcsr);
394 	}
395 
396 	/* Success */
397 	return (0);
398 }
399 
400 /*
401  * The fesetenv() function attempts to establish the floating-point environment
402  * represented by the object pointed to by envp. The argument `envp' points
403  * to an object set by a call to fegetenv() or feholdexcept(), or equal a
404  * floating-point environment macro. The fesetenv() function does not raise
405  * floating-point exceptions, but only installs the state of the floating-point
406  * status flags represented through its argument.
407  */
408 int
409 fesetenv(const fenv_t *envp)
410 {
411 	fenv_t env;
412 
413 	_DIAGASSERT(envp != NULL);
414 
415 	/* Store the x87 floating-point environment */
416 	memset(&env, 0, sizeof(env));
417 	__fnstenv(&env);
418 
419 	__fe_dfl_env.x87.unused1 = env.x87.unused1;
420 	__fe_dfl_env.x87.unused2 = env.x87.unused2;
421 	__fe_dfl_env.x87.unused3 = env.x87.unused3;
422 	memcpy(__fe_dfl_env.x87.others, env.x87.others,
423 	    sizeof(__fe_dfl_env.x87.others));
424 
425 	__fldenv(envp->x87);
426 	if (__HAS_SSE)
427 		__ldmxcsr(envp->mxcsr);
428 
429 	/* Success */
430 	return (0);
431 }
432 
433 /*
434  * The feupdateenv() function saves the currently raised floating-point
435  * exceptions in its automatic storage, installs the floating-point environment
436  * represented by the object pointed to by `envp', and then raises the saved
437  * floating-point exceptions. The argument `envp' shall point to an object set
438  * by a call to feholdexcept() or fegetenv(), or equal a floating-point
439  * environment macro.
440  */
441 int
442 feupdateenv(const fenv_t *envp)
443 {
444 	fenv_t env;
445 	uint32_t mxcsr;
446 	uint16_t status;
447 
448 	_DIAGASSERT(envp != NULL);
449 
450 	/* Store the x87 floating-point environment */
451 	memset(&env, 0, sizeof(env));
452 	__fnstenv(&env);
453 
454 	__fe_dfl_env.x87.unused1 = env.x87.unused1;
455 	__fe_dfl_env.x87.unused2 = env.x87.unused2;
456 	__fe_dfl_env.x87.unused3 = env.x87.unused3;
457 	memcpy(__fe_dfl_env.x87.others, env.x87.others,
458 	    sizeof(__fe_dfl_env.x87.others));
459 
460 	__fnstsw(&status);
461 	if (__HAS_SSE)
462 		__stmxcsr(&mxcsr);
463 	else
464 		mxcsr = 0;
465 	fesetenv(envp);
466 	feraiseexcept((mxcsr | status) & FE_ALL_EXCEPT);
467 
468 	/* Success */
469 	return (0);
470 }
471 
472 /*
473  * The following functions are extentions to the standard
474  */
475 int
476 feenableexcept(int mask)
477 {
478 	uint32_t mxcsr, omask;
479 	uint16_t control;
480 
481 	mask &= FE_ALL_EXCEPT;
482 	__fnstcw(&control);
483 	if (__HAS_SSE)
484 		__stmxcsr(&mxcsr);
485 	else
486 		mxcsr = 0;
487 
488 	omask = (control | mxcsr >> __SSE_EMASK_SHIFT) & FE_ALL_EXCEPT;
489 	control &= ~mask;
490 	__fldcw(control);
491 	if (__HAS_SSE) {
492 		mxcsr &= ~(mask << __SSE_EMASK_SHIFT);
493 		__ldmxcsr(mxcsr);
494 	}
495 
496 	return (FE_ALL_EXCEPT & ~omask);
497 }
498 
499 int
500 fedisableexcept(int mask)
501 {
502 	uint32_t mxcsr, omask;
503 	uint16_t control;
504 
505 	mask &= FE_ALL_EXCEPT;
506 	__fnstcw(&control);
507 	if (__HAS_SSE)
508 		__stmxcsr(&mxcsr);
509 	else
510 		mxcsr = 0;
511 
512 	omask = (control | mxcsr >> __SSE_EMASK_SHIFT) & FE_ALL_EXCEPT;
513 	control |= mask;
514 	__fldcw(control);
515 	if (__HAS_SSE) {
516 		mxcsr |= mask << __SSE_EMASK_SHIFT;
517 		__ldmxcsr(mxcsr);
518 	}
519 
520 	return (FE_ALL_EXCEPT & ~omask);
521 }
522 
523 int
524 fegetexcept(void)
525 {
526 	uint16_t control;
527 
528 	/*
529 	 * We assume that the masks for the x87 and the SSE unit are
530 	 * the same.
531 	 */
532 	__fnstcw(&control);
533 
534 	return (~control & FE_ALL_EXCEPT);
535 }
536