1 /* $NetBSD: fenv.c,v 1.9 2018/01/25 03:54:21 christos Exp $ */ 2 3 /*- 4 * Copyright (c) 2004-2005 David Schultz <das@FreeBSD.ORG> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 #include <sys/cdefs.h> 30 __RCSID("$NetBSD: fenv.c,v 1.9 2018/01/25 03:54:21 christos Exp $"); 31 32 #include "namespace.h" 33 34 #include <sys/param.h> 35 #include <sys/sysctl.h> 36 #include <assert.h> 37 #include <fenv.h> 38 #include <stddef.h> 39 #include <string.h> 40 41 #ifdef __weak_alias 42 __weak_alias(feclearexcept,_feclearexcept) 43 __weak_alias(fedisableexcept,_fedisableexcept) 44 __weak_alias(feenableexcept,_feenableexcept) 45 __weak_alias(fegetenv,_fegetenv) 46 __weak_alias(fegetexcept,_fegetexcept) 47 __weak_alias(fegetexceptflag,_fegetexceptflag) 48 __weak_alias(fegetround,_fegetround) 49 __weak_alias(feholdexcept,_feholdexcept) 50 __weak_alias(feraiseexcept,_feraiseexcept) 51 __weak_alias(fesetenv,_fesetenv) 52 __weak_alias(fesetexceptflag,_fesetexceptflag) 53 __weak_alias(fesetround,_fesetround) 54 __weak_alias(fetestexcept,_fetestexcept) 55 __weak_alias(feupdateenv,_feupdateenv) 56 #endif 57 58 /* Load x87 Control Word */ 59 #define __fldcw(__cw) __asm__ __volatile__ \ 60 ("fldcw %0" : : "m" (__cw)) 61 62 /* No-Wait Store Control Word */ 63 #define __fnstcw(__cw) __asm__ __volatile__ \ 64 ("fnstcw %0" : "=m" (*(__cw))) 65 66 /* No-Wait Store Status Word */ 67 #define __fnstsw(__sw) __asm__ __volatile__ \ 68 ("fnstsw %0" : "=am" (*(__sw))) 69 70 /* No-Wait Clear Exception Flags */ 71 #define __fnclex() __asm__ __volatile__ \ 72 ("fnclex") 73 74 /* Load x87 Environment */ 75 #define __fldenv(__env) __asm__ __volatile__ \ 76 ("fldenv %0" : : "m" (__env)) 77 78 /* No-Wait Store x87 environment */ 79 #define __fnstenv(__env) __asm__ __volatile__ \ 80 ("fnstenv %0" : "=m" (*(__env))) 81 82 /* Check for and handle pending unmasked x87 pending FPU exceptions */ 83 #define __fwait(__env) __asm__ __volatile__ \ 84 ("fwait") 85 86 /* Load the MXCSR register */ 87 #define __ldmxcsr(__mxcsr) __asm__ __volatile__ \ 88 ("ldmxcsr %0" : : "m" (__mxcsr)) 89 90 /* Store the MXCSR register state */ 91 #define __stmxcsr(__mxcsr) __asm__ __volatile__ \ 92 ("stmxcsr %0" : "=m" (*(__mxcsr))) 93 94 /* 95 * The following constant represents the default floating-point environment 96 * (that is, the one installed at program startup) and has type pointer to 97 * const-qualified fenv_t. 98 * 99 * It can be used as an argument to the functions within the <fenv.h> header 100 * that manage the floating-point environment, namely fesetenv() and 101 * feupdateenv(). 102 * 103 * x87 fpu registers are 16bit wide. The upper bits, 31-16, are marked as 104 * RESERVED. We provide a partial floating-point environment, where we 105 * define only the lower bits. The reserved bits are extracted and set by the 106 * consumers of FE_DFL_ENV, during runtime. 107 */ 108 fenv_t __fe_dfl_env = { 109 .x87 = { 110 .control = __NetBSD_NPXCW__, /* Control word register */ 111 .unused1 = 0, /* Unused */ 112 .status = 0, /* Status word register */ 113 .unused2 = 0, /* Unused */ 114 .tag = 0xffff, /* Tag word register */ 115 .unused3 = 0, /* Unused */ 116 .others = { 117 0, 0, 0, 0x0000ffff, 118 } 119 }, 120 .mxcsr = __INITIAL_MXCSR__ /* MXCSR register */ 121 }; 122 123 /* 124 * Test for SSE support on this processor. 125 * 126 * We need to use ldmxcsr/stmxcsr to get correct results if any part 127 * of the program was compiled to use SSE floating-point, but we can't 128 * use SSE on older processors. 129 * 130 * In order to do so, we need to query the processor capabilities via the CPUID 131 * instruction. We can make it even simpler though, by querying the machdep.sse 132 * sysctl. 133 */ 134 static int __HAS_SSE = 0; 135 136 static void __init_libm(void) __attribute__ ((constructor, used)); 137 138 static void __init_libm(void) 139 { 140 size_t oldlen = sizeof(__HAS_SSE); 141 int rv; 142 uint16_t control; 143 144 rv = sysctlbyname("machdep.sse", &__HAS_SSE, &oldlen, NULL, 0); 145 if (rv == -1) 146 __HAS_SSE = 0; 147 148 __fnstcw(&control); 149 __fe_dfl_env.x87.control = control; 150 } 151 152 /* 153 * The feclearexcept() function clears the supported floating-point exceptions 154 * represented by `excepts'. 155 */ 156 int 157 feclearexcept(int excepts) 158 { 159 fenv_t env; 160 uint32_t mxcsr; 161 int ex; 162 163 _DIAGASSERT((excepts & ~FE_ALL_EXCEPT) == 0); 164 165 ex = excepts & FE_ALL_EXCEPT; 166 167 /* It's ~3x faster to call fnclex, than store/load fp env */ 168 if (ex == FE_ALL_EXCEPT) { 169 __fnclex(); 170 } else { 171 __fnstenv(&env); 172 env.x87.status &= ~ex; 173 __fldenv(env); 174 } 175 176 if (__HAS_SSE) { 177 __stmxcsr(&mxcsr); 178 mxcsr &= ~ex; 179 __ldmxcsr(mxcsr); 180 } 181 182 /* Success */ 183 return (0); 184 } 185 186 /* 187 * The fegetexceptflag() function stores an implementation-defined 188 * representation of the states of the floating-point status flags indicated by 189 * the argument excepts in the object pointed to by the argument flagp. 190 */ 191 int 192 fegetexceptflag(fexcept_t *flagp, int excepts) 193 { 194 uint32_t mxcsr; 195 uint16_t status; 196 int ex; 197 198 _DIAGASSERT(flagp != NULL); 199 _DIAGASSERT((excepts & ~FE_ALL_EXCEPT) == 0); 200 201 ex = excepts & FE_ALL_EXCEPT; 202 203 __fnstsw(&status); 204 if (__HAS_SSE) 205 __stmxcsr(&mxcsr); 206 else 207 mxcsr = 0; 208 209 *flagp = (mxcsr | status) & ex; 210 211 /* Success */ 212 return (0); 213 } 214 215 /* 216 * The feraiseexcept() function raises the supported floating-point exceptions 217 * represented by the argument `excepts'. 218 * 219 * The standard explicitly allows us to execute an instruction that has the 220 * exception as a side effect, but we choose to manipulate the status register 221 * directly. 222 * 223 * The validation of input is being deferred to fesetexceptflag(). 224 */ 225 int 226 feraiseexcept(int excepts) 227 { 228 fexcept_t ex; 229 230 _DIAGASSERT((excepts & ~FE_ALL_EXCEPT) == 0); 231 232 ex = excepts & FE_ALL_EXCEPT; 233 fesetexceptflag(&ex, excepts); 234 __fwait(); 235 236 /* Success */ 237 return (0); 238 } 239 240 /* 241 * This function sets the floating-point status flags indicated by the argument 242 * `excepts' to the states stored in the object pointed to by `flagp'. It does 243 * NOT raise any floating-point exceptions, but only sets the state of the flags. 244 */ 245 int 246 fesetexceptflag(const fexcept_t *flagp, int excepts) 247 { 248 fenv_t env; 249 uint32_t mxcsr; 250 int ex; 251 252 _DIAGASSERT(flagp != NULL); 253 _DIAGASSERT((excepts & ~FE_ALL_EXCEPT) == 0); 254 255 ex = excepts & FE_ALL_EXCEPT; 256 257 __fnstenv(&env); 258 env.x87.status &= ~ex; 259 env.x87.status |= *flagp & ex; 260 __fldenv(env); 261 262 if (__HAS_SSE) { 263 __stmxcsr(&mxcsr); 264 mxcsr &= ~ex; 265 mxcsr |= *flagp & ex; 266 __ldmxcsr(mxcsr); 267 } 268 269 /* Success */ 270 return (0); 271 } 272 273 /* 274 * The fetestexcept() function determines which of a specified subset of the 275 * floating-point exception flags are currently set. The `excepts' argument 276 * specifies the floating-point status flags to be queried. 277 */ 278 int 279 fetestexcept(int excepts) 280 { 281 uint32_t mxcsr; 282 uint16_t status; 283 int ex; 284 285 _DIAGASSERT((excepts & ~FE_ALL_EXCEPT) == 0); 286 287 ex = excepts & FE_ALL_EXCEPT; 288 289 __fnstsw(&status); 290 if (__HAS_SSE) 291 __stmxcsr(&mxcsr); 292 else 293 mxcsr = 0; 294 295 return ((status | mxcsr) & ex); 296 } 297 298 int 299 fegetround(void) 300 { 301 uint16_t control; 302 303 /* 304 * We assume that the x87 and the SSE unit agree on the 305 * rounding mode. Reading the control word on the x87 turns 306 * out to be about 5 times faster than reading it on the SSE 307 * unit on an Opteron 244. 308 */ 309 __fnstcw(&control); 310 311 return (control & __X87_ROUND_MASK); 312 } 313 314 /* 315 * The fesetround() function shall establish the rounding direction represented 316 * by its argument round. If the argument is not equal to the value of a 317 * rounding direction macro, the rounding direction is not changed. 318 */ 319 int 320 fesetround(int round) 321 { 322 uint32_t mxcsr; 323 uint16_t control; 324 325 if (round & ~__X87_ROUND_MASK) { 326 /* Failure */ 327 return (-1); 328 } 329 330 __fnstcw(&control); 331 control &= ~__X87_ROUND_MASK; 332 control |= round; 333 __fldcw(control); 334 335 if (__HAS_SSE) { 336 __stmxcsr(&mxcsr); 337 mxcsr &= ~(__X87_ROUND_MASK << __SSE_ROUND_SHIFT); 338 mxcsr |= round << __SSE_ROUND_SHIFT; 339 __ldmxcsr(mxcsr); 340 } 341 342 /* Success */ 343 return (0); 344 } 345 346 /* 347 * The fegetenv() function attempts to store the current floating-point 348 * environment in the object pointed to by envp. 349 */ 350 int 351 fegetenv(fenv_t *envp) 352 { 353 uint32_t mxcsr; 354 355 _DIAGASSERT(flagp != NULL); 356 357 /* 358 * fnstenv masks all exceptions, so we need to restore the old control 359 * word to avoid this side effect. 360 */ 361 __fnstenv(envp); 362 __fldcw(envp->x87.control); 363 if (__HAS_SSE) { 364 __stmxcsr(&mxcsr); 365 envp->mxcsr = mxcsr; 366 } 367 368 /* Success */ 369 return (0); 370 } 371 372 /* 373 * The feholdexcept() function saves the current floating-point environment in 374 * the object pointed to by envp, clears the floating-point status flags, and 375 * then installs a non-stop (continue on floating-point exceptions) mode, if 376 * available, for all floating-point exceptions. 377 */ 378 int 379 feholdexcept(fenv_t *envp) 380 { 381 uint32_t mxcsr; 382 383 _DIAGASSERT(envp != NULL); 384 385 __fnstenv(envp); 386 __fnclex(); 387 if (__HAS_SSE) { 388 __stmxcsr(&mxcsr); 389 envp->mxcsr = mxcsr; 390 mxcsr &= ~FE_ALL_EXCEPT; 391 mxcsr |= FE_ALL_EXCEPT << __SSE_EMASK_SHIFT; 392 __ldmxcsr(mxcsr); 393 } 394 395 /* Success */ 396 return (0); 397 } 398 399 /* 400 * The fesetenv() function attempts to establish the floating-point environment 401 * represented by the object pointed to by envp. The argument `envp' points 402 * to an object set by a call to fegetenv() or feholdexcept(), or equal a 403 * floating-point environment macro. The fesetenv() function does not raise 404 * floating-point exceptions, but only installs the state of the floating-point 405 * status flags represented through its argument. 406 */ 407 int 408 fesetenv(const fenv_t *envp) 409 { 410 fenv_t env; 411 412 _DIAGASSERT(envp != NULL); 413 414 /* Store the x87 floating-point environment */ 415 memset(&env, 0, sizeof(env)); 416 __fnstenv(&env); 417 418 __fe_dfl_env.x87.unused1 = env.x87.unused1; 419 __fe_dfl_env.x87.unused2 = env.x87.unused2; 420 __fe_dfl_env.x87.unused3 = env.x87.unused3; 421 memcpy(__fe_dfl_env.x87.others, env.x87.others, 422 sizeof(__fe_dfl_env.x87.others)); 423 424 __fldenv(envp->x87); 425 if (__HAS_SSE) 426 __ldmxcsr(envp->mxcsr); 427 428 /* Success */ 429 return (0); 430 } 431 432 /* 433 * The feupdateenv() function saves the currently raised floating-point 434 * exceptions in its automatic storage, installs the floating-point environment 435 * represented by the object pointed to by `envp', and then raises the saved 436 * floating-point exceptions. The argument `envp' shall point to an object set 437 * by a call to feholdexcept() or fegetenv(), or equal a floating-point 438 * environment macro. 439 */ 440 int 441 feupdateenv(const fenv_t *envp) 442 { 443 fenv_t env; 444 uint32_t mxcsr; 445 uint16_t status; 446 447 _DIAGASSERT(envp != NULL); 448 449 /* Store the x87 floating-point environment */ 450 memset(&env, 0, sizeof(env)); 451 __fnstenv(&env); 452 453 __fe_dfl_env.x87.unused1 = env.x87.unused1; 454 __fe_dfl_env.x87.unused2 = env.x87.unused2; 455 __fe_dfl_env.x87.unused3 = env.x87.unused3; 456 memcpy(__fe_dfl_env.x87.others, env.x87.others, 457 sizeof(__fe_dfl_env.x87.others)); 458 459 __fnstsw(&status); 460 if (__HAS_SSE) 461 __stmxcsr(&mxcsr); 462 else 463 mxcsr = 0; 464 fesetenv(envp); 465 feraiseexcept((mxcsr | status) & FE_ALL_EXCEPT); 466 467 /* Success */ 468 return (0); 469 } 470 471 /* 472 * The following functions are extentions to the standard 473 */ 474 int 475 feenableexcept(int mask) 476 { 477 uint32_t mxcsr, omask; 478 uint16_t control; 479 480 mask &= FE_ALL_EXCEPT; 481 __fnstcw(&control); 482 if (__HAS_SSE) 483 __stmxcsr(&mxcsr); 484 else 485 mxcsr = 0; 486 487 omask = (control | mxcsr >> __SSE_EMASK_SHIFT) & FE_ALL_EXCEPT; 488 control &= ~mask; 489 __fldcw(control); 490 if (__HAS_SSE) { 491 mxcsr &= ~(mask << __SSE_EMASK_SHIFT); 492 __ldmxcsr(mxcsr); 493 } 494 495 return (FE_ALL_EXCEPT & ~omask); 496 } 497 498 int 499 fedisableexcept(int mask) 500 { 501 uint32_t mxcsr, omask; 502 uint16_t control; 503 504 mask &= FE_ALL_EXCEPT; 505 __fnstcw(&control); 506 if (__HAS_SSE) 507 __stmxcsr(&mxcsr); 508 else 509 mxcsr = 0; 510 511 omask = (control | mxcsr >> __SSE_EMASK_SHIFT) & FE_ALL_EXCEPT; 512 control |= mask; 513 __fldcw(control); 514 if (__HAS_SSE) { 515 mxcsr |= mask << __SSE_EMASK_SHIFT; 516 __ldmxcsr(mxcsr); 517 } 518 519 return (FE_ALL_EXCEPT & ~omask); 520 } 521 522 int 523 fegetexcept(void) 524 { 525 uint16_t control; 526 527 /* 528 * We assume that the masks for the x87 and the SSE unit are 529 * the same. 530 */ 531 __fnstcw(&control); 532 533 return (~control & FE_ALL_EXCEPT); 534 } 535