1 /* 2 * ==================================================== 3 * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. 4 * 5 * Developed at SunPro, a Sun Microsystems, Inc. business. 6 * Permission to use, copy, modify, and distribute this 7 * software is freely granted, provided that this notice 8 * is preserved. 9 * ==================================================== 10 */ 11 12 /* 13 * from: @(#)fdlibm.h 5.1 93/09/24 14 * $NetBSD: math_private.h,v 1.26 2022/08/27 08:31:59 christos Exp $ 15 */ 16 17 #ifndef _MATH_PRIVATE_H_ 18 #define _MATH_PRIVATE_H_ 19 20 #include <sys/types.h> 21 22 /* The original fdlibm code used statements like: 23 n0 = ((*(int*)&one)>>29)^1; * index of high word * 24 ix0 = *(n0+(int*)&x); * high word of x * 25 ix1 = *((1-n0)+(int*)&x); * low word of x * 26 to dig two 32 bit words out of the 64 bit IEEE floating point 27 value. That is non-ANSI, and, moreover, the gcc instruction 28 scheduler gets it wrong. We instead use the following macros. 29 Unlike the original code, we determine the endianness at compile 30 time, not at run time; I don't see much benefit to selecting 31 endianness at run time. */ 32 33 /* A union which permits us to convert between a double and two 32 bit 34 ints. */ 35 36 /* 37 * The ARM ports are little endian except for the FPA word order which is 38 * big endian. 39 */ 40 41 #if (BYTE_ORDER == BIG_ENDIAN) || (defined(__arm__) && !defined(__VFP_FP__)) 42 43 typedef union 44 { 45 double value; 46 struct 47 { 48 u_int32_t msw; 49 u_int32_t lsw; 50 } parts; 51 struct { 52 u_int64_t w; 53 } xparts; 54 } ieee_double_shape_type; 55 56 #endif 57 58 #if (BYTE_ORDER == LITTLE_ENDIAN) && \ 59 !(defined(__arm__) && !defined(__VFP_FP__)) 60 61 typedef union 62 { 63 double value; 64 struct 65 { 66 u_int32_t lsw; 67 u_int32_t msw; 68 } parts; 69 struct { 70 u_int64_t w; 71 } xparts; 72 } ieee_double_shape_type; 73 74 #endif 75 76 /* Get two 32 bit ints from a double. */ 77 78 #define EXTRACT_WORDS(ix0,ix1,d) \ 79 do { \ 80 ieee_double_shape_type ew_u; \ 81 ew_u.value = (d); \ 82 (ix0) = ew_u.parts.msw; \ 83 (ix1) = ew_u.parts.lsw; \ 84 } while (0) 85 86 /* Get a 64-bit int from a double. */ 87 #define EXTRACT_WORD64(ix,d) \ 88 do { \ 89 ieee_double_shape_type ew_u; \ 90 ew_u.value = (d); \ 91 (ix) = ew_u.xparts.w; \ 92 } while (0) 93 94 95 /* Get the more significant 32 bit int from a double. */ 96 97 #define GET_HIGH_WORD(i,d) \ 98 do { \ 99 ieee_double_shape_type gh_u; \ 100 gh_u.value = (d); \ 101 (i) = gh_u.parts.msw; \ 102 } while (0) 103 104 /* Get the less significant 32 bit int from a double. */ 105 106 #define GET_LOW_WORD(i,d) \ 107 do { \ 108 ieee_double_shape_type gl_u; \ 109 gl_u.value = (d); \ 110 (i) = gl_u.parts.lsw; \ 111 } while (0) 112 113 /* Set a double from two 32 bit ints. */ 114 115 #define INSERT_WORDS(d,ix0,ix1) \ 116 do { \ 117 ieee_double_shape_type iw_u; \ 118 iw_u.parts.msw = (ix0); \ 119 iw_u.parts.lsw = (ix1); \ 120 (d) = iw_u.value; \ 121 } while (0) 122 123 /* Set a double from a 64-bit int. */ 124 #define INSERT_WORD64(d,ix) \ 125 do { \ 126 ieee_double_shape_type iw_u; \ 127 iw_u.xparts.w = (ix); \ 128 (d) = iw_u.value; \ 129 } while (0) 130 131 132 /* Set the more significant 32 bits of a double from an int. */ 133 134 #define SET_HIGH_WORD(d,v) \ 135 do { \ 136 ieee_double_shape_type sh_u; \ 137 sh_u.value = (d); \ 138 sh_u.parts.msw = (v); \ 139 (d) = sh_u.value; \ 140 } while (0) 141 142 /* Set the less significant 32 bits of a double from an int. */ 143 144 #define SET_LOW_WORD(d,v) \ 145 do { \ 146 ieee_double_shape_type sl_u; \ 147 sl_u.value = (d); \ 148 sl_u.parts.lsw = (v); \ 149 (d) = sl_u.value; \ 150 } while (0) 151 152 /* A union which permits us to convert between a float and a 32 bit 153 int. */ 154 155 typedef union 156 { 157 float value; 158 u_int32_t word; 159 } ieee_float_shape_type; 160 161 /* Get a 32 bit int from a float. */ 162 163 #define GET_FLOAT_WORD(i,d) \ 164 do { \ 165 ieee_float_shape_type gf_u; \ 166 gf_u.value = (d); \ 167 (i) = gf_u.word; \ 168 } while (0) 169 170 /* Set a float from a 32 bit int. */ 171 172 #define SET_FLOAT_WORD(d,i) \ 173 do { \ 174 ieee_float_shape_type sf_u; \ 175 sf_u.word = (i); \ 176 (d) = sf_u.value; \ 177 } while (0) 178 179 /* 180 * Attempt to get strict C99 semantics for assignment with non-C99 compilers. 181 */ 182 #if FLT_EVAL_METHOD == 0 || __GNUC__ == 0 183 #define STRICT_ASSIGN(type, lval, rval) ((lval) = (rval)) 184 #else 185 #define STRICT_ASSIGN(type, lval, rval) do { \ 186 volatile type __lval; \ 187 \ 188 if (sizeof(type) >= sizeof(long double)) \ 189 (lval) = (rval); \ 190 else { \ 191 __lval = (rval); \ 192 (lval) = __lval; \ 193 } \ 194 } while (0) 195 #endif 196 197 /* Support switching the mode to FP_PE if necessary. */ 198 #if defined(__i386__) && !defined(NO_FPSETPREC) 199 #define ENTERI() ENTERIT(long double) 200 #define ENTERIT(returntype) \ 201 returntype __retval; \ 202 fp_prec_t __oprec; \ 203 \ 204 if ((__oprec = fpgetprec()) != FP_PE) \ 205 fpsetprec(FP_PE) 206 #define RETURNI(x) do { \ 207 __retval = (x); \ 208 if (__oprec != FP_PE) \ 209 fpsetprec(__oprec); \ 210 RETURNF(__retval); \ 211 } while (0) 212 #define ENTERV() \ 213 fp_prec_t __oprec; \ 214 \ 215 if ((__oprec = fpgetprec()) != FP_PE) \ 216 fpsetprec(FP_PE) 217 #define RETURNV() do { \ 218 if (__oprec != FP_PE) \ 219 fpsetprec(__oprec); \ 220 return; \ 221 } while (0) 222 #else 223 #define ENTERI() 224 #define ENTERIT(x) 225 #define RETURNI(x) RETURNF(x) 226 #define ENTERV() 227 #define RETURNV() return 228 #endif 229 230 #ifdef _COMPLEX_H 231 232 /* 233 * Quoting from ISO/IEC 9899:TC2: 234 * 235 * 6.2.5.13 Types 236 * Each complex type has the same representation and alignment requirements as 237 * an array type containing exactly two elements of the corresponding real type; 238 * the first element is equal to the real part, and the second element to the 239 * imaginary part, of the complex number. 240 */ 241 typedef union { 242 float complex z; 243 float parts[2]; 244 } float_complex; 245 246 typedef union { 247 double complex z; 248 double parts[2]; 249 } double_complex; 250 251 typedef union { 252 long double complex z; 253 long double parts[2]; 254 } long_double_complex; 255 256 #define REAL_PART(z) ((z).parts[0]) 257 #define IMAG_PART(z) ((z).parts[1]) 258 259 /* 260 * Inline functions that can be used to construct complex values. 261 * 262 * The C99 standard intends x+I*y to be used for this, but x+I*y is 263 * currently unusable in general since gcc introduces many overflow, 264 * underflow, sign and efficiency bugs by rewriting I*y as 265 * (0.0+I)*(y+0.0*I) and laboriously computing the full complex product. 266 * In particular, I*Inf is corrupted to NaN+I*Inf, and I*-0 is corrupted 267 * to -0.0+I*0.0. 268 * 269 * The C11 standard introduced the macros CMPLX(), CMPLXF() and CMPLXL() 270 * to construct complex values. Compilers that conform to the C99 271 * standard require the following functions to avoid the above issues. 272 */ 273 274 #ifndef CMPLXF 275 static __inline float complex 276 CMPLXF(float x, float y) 277 { 278 float_complex z; 279 280 REAL_PART(z) = x; 281 IMAG_PART(z) = y; 282 return (z.z); 283 } 284 #endif 285 286 #ifndef CMPLX 287 static __inline double complex 288 CMPLX(double x, double y) 289 { 290 double_complex z; 291 292 REAL_PART(z) = x; 293 IMAG_PART(z) = y; 294 return (z.z); 295 } 296 #endif 297 298 #ifndef CMPLXL 299 static __inline long double complex 300 CMPLXL(long double x, long double y) 301 { 302 long_double_complex z; 303 304 REAL_PART(z) = x; 305 IMAG_PART(z) = y; 306 return (z.z); 307 } 308 #endif 309 310 #endif /* _COMPLEX_H */ 311 312 /* ieee style elementary functions */ 313 extern double __ieee754_sqrt __P((double)); 314 extern double __ieee754_acos __P((double)); 315 extern double __ieee754_acosh __P((double)); 316 extern double __ieee754_log __P((double)); 317 extern double __ieee754_atanh __P((double)); 318 extern double __ieee754_asin __P((double)); 319 extern double __ieee754_atan2 __P((double,double)); 320 extern double __ieee754_exp __P((double)); 321 extern double __ieee754_cosh __P((double)); 322 extern double __ieee754_fmod __P((double,double)); 323 extern double __ieee754_pow __P((double,double)); 324 extern double __ieee754_lgamma_r __P((double,int *)); 325 extern double __ieee754_gamma_r __P((double,int *)); 326 extern double __ieee754_lgamma __P((double)); 327 extern double __ieee754_gamma __P((double)); 328 extern double __ieee754_log10 __P((double)); 329 extern double __ieee754_log2 __P((double)); 330 extern double __ieee754_sinh __P((double)); 331 extern double __ieee754_hypot __P((double,double)); 332 extern double __ieee754_j0 __P((double)); 333 extern double __ieee754_j1 __P((double)); 334 extern double __ieee754_y0 __P((double)); 335 extern double __ieee754_y1 __P((double)); 336 extern double __ieee754_jn __P((int,double)); 337 extern double __ieee754_yn __P((int,double)); 338 extern double __ieee754_remainder __P((double,double)); 339 extern int32_t __ieee754_rem_pio2 __P((double,double*)); 340 extern double __ieee754_scalb __P((double,double)); 341 342 /* fdlibm kernel function */ 343 extern double __kernel_standard __P((double,double,int)); 344 extern double __kernel_sin __P((double,double,int)); 345 extern double __kernel_cos __P((double,double)); 346 extern double __kernel_tan __P((double,double,int)); 347 extern int __kernel_rem_pio2 __P((double*,double*,int,int,int)); 348 349 350 /* ieee style elementary float functions */ 351 extern float __ieee754_sqrtf __P((float)); 352 extern float __ieee754_acosf __P((float)); 353 extern float __ieee754_acoshf __P((float)); 354 extern float __ieee754_logf __P((float)); 355 extern float __ieee754_atanhf __P((float)); 356 extern float __ieee754_asinf __P((float)); 357 extern float __ieee754_atan2f __P((float,float)); 358 extern float __ieee754_expf __P((float)); 359 extern float __ieee754_coshf __P((float)); 360 extern float __ieee754_fmodf __P((float,float)); 361 extern float __ieee754_powf __P((float,float)); 362 extern float __ieee754_lgammaf_r __P((float,int *)); 363 extern float __ieee754_gammaf_r __P((float,int *)); 364 extern float __ieee754_lgammaf __P((float)); 365 extern float __ieee754_gammaf __P((float)); 366 extern float __ieee754_log10f __P((float)); 367 extern float __ieee754_log2f __P((float)); 368 extern float __ieee754_sinhf __P((float)); 369 extern float __ieee754_hypotf __P((float,float)); 370 extern float __ieee754_j0f __P((float)); 371 extern float __ieee754_j1f __P((float)); 372 extern float __ieee754_y0f __P((float)); 373 extern float __ieee754_y1f __P((float)); 374 extern float __ieee754_jnf __P((int,float)); 375 extern float __ieee754_ynf __P((int,float)); 376 extern float __ieee754_remainderf __P((float,float)); 377 extern int32_t __ieee754_rem_pio2f __P((float,float*)); 378 extern float __ieee754_scalbf __P((float,float)); 379 380 /* float versions of fdlibm kernel functions */ 381 extern float __kernel_sinf __P((float,float,int)); 382 extern float __kernel_cosf __P((float,float)); 383 extern float __kernel_tanf __P((float,float,int)); 384 extern int __kernel_rem_pio2f __P((float*,float*,int,int,int,const int32_t*)); 385 386 /* ieee style elementary long double functions */ 387 extern long double __ieee754_fmodl(long double, long double); 388 extern long double __ieee754_sqrtl(long double); 389 390 /* 391 * TRUNC() is a macro that sets the trailing 27 bits in the mantissa of an 392 * IEEE double variable to zero. It must be expression-like for syntactic 393 * reasons, and we implement this expression using an inline function 394 * instead of a pure macro to avoid depending on the gcc feature of 395 * statement-expressions. 396 */ 397 #define TRUNC(d) (_b_trunc(&(d))) 398 399 static __inline void 400 _b_trunc(volatile double *_dp) 401 { 402 uint32_t _lw; 403 404 GET_LOW_WORD(_lw, *_dp); 405 SET_LOW_WORD(*_dp, _lw & 0xf8000000); 406 } 407 408 struct Double { 409 double a; 410 double b; 411 }; 412 413 /* 414 * Functions internal to the math package, yet not static. 415 */ 416 double __exp__D(double, double); 417 struct Double __log__D(double); 418 419 /* 420 * The rnint() family rounds to the nearest integer for a restricted range 421 * range of args (up to about 2**MANT_DIG). We assume that the current 422 * rounding mode is FE_TONEAREST so that this can be done efficiently. 423 * Extra precision causes more problems in practice, and we only centralize 424 * this here to reduce those problems, and have not solved the efficiency 425 * problems. The exp2() family uses a more delicate version of this that 426 * requires extracting bits from the intermediate value, so it is not 427 * centralized here and should copy any solution of the efficiency problems. 428 */ 429 430 static inline double 431 rnint(double x) 432 { 433 /* 434 * This casts to double to kill any extra precision. This depends 435 * on the cast being applied to a double_t to avoid compiler bugs 436 * (this is a cleaner version of STRICT_ASSIGN()). This is 437 * inefficient if there actually is extra precision, but is hard 438 * to improve on. We use double_t in the API to minimise conversions 439 * for just calling here. Note that we cannot easily change the 440 * magic number to the one that works directly with double_t, since 441 * the rounding precision is variable at runtime on x86 so the 442 * magic number would need to be variable. Assuming that the 443 * rounding precision is always the default is too fragile. This 444 * and many other complications will move when the default is 445 * changed to FP_PE. 446 */ 447 return ((double)(x + 0x1.8p52) - 0x1.8p52); 448 } 449 450 static inline float 451 rnintf(float x) 452 { 453 /* 454 * As for rnint(), except we could just call that to handle the 455 * extra precision case, usually without losing efficiency. 456 */ 457 return ((float)(x + 0x1.8p23F) - 0x1.8p23F); 458 } 459 460 #ifdef LDBL_MANT_DIG 461 /* 462 * The complications for extra precision are smaller for rnintl() since it 463 * can safely assume that the rounding precision has been increased from 464 * its default to FP_PE on x86. We don't exploit that here to get small 465 * optimizations from limiting the rangle to double. We just need it for 466 * the magic number to work with long doubles. ld128 callers should use 467 * rnint() instead of this if possible. ld80 callers should prefer 468 * rnintl() since for amd64 this avoids swapping the register set, while 469 * for i386 it makes no difference (assuming FP_PE), and for other arches 470 * it makes little difference. 471 */ 472 static inline long double 473 rnintl(long double x) 474 { 475 return (x + ___CONCAT(0x1.8p,LDBL_MANT_DIG) / 2 - 476 ___CONCAT(0x1.8p,LDBL_MANT_DIG) / 2); 477 } 478 #endif /* LDBL_MANT_DIG */ 479 480 /* 481 * irint() and i64rint() give the same result as casting to their integer 482 * return type provided their arg is a floating point integer. They can 483 * sometimes be more efficient because no rounding is required. 484 */ 485 #if (defined(amd64) || defined(__i386__)) && defined(__GNUCLIKE_ASM) 486 #define irint(x) \ 487 (sizeof(x) == sizeof(float) && \ 488 sizeof(__float_t) == sizeof(long double) ? irintf(x) : \ 489 sizeof(x) == sizeof(double) && \ 490 sizeof(__double_t) == sizeof(long double) ? irintd(x) : \ 491 sizeof(x) == sizeof(long double) ? irintl(x) : (int)(x)) 492 #else 493 #define irint(x) ((int)(x)) 494 #endif 495 496 #define i64rint(x) ((int64_t)(x)) /* only needed for ld128 so not opt. */ 497 498 #if defined(__i386__) && defined(__GNUCLIKE_ASM) 499 static __inline int 500 irintf(float x) 501 { 502 int n; 503 504 __asm("fistl %0" : "=m" (n) : "t" (x)); 505 return (n); 506 } 507 508 static __inline int 509 irintd(double x) 510 { 511 int n; 512 513 __asm("fistl %0" : "=m" (n) : "t" (x)); 514 return (n); 515 } 516 #endif 517 518 #if (defined(__amd64__) || defined(__i386__)) && defined(__GNUCLIKE_ASM) 519 static __inline int 520 irintl(long double x) 521 { 522 int n; 523 524 __asm("fistl %0" : "=m" (n) : "t" (x)); 525 return (n); 526 } 527 #endif 528 529 #endif /* _MATH_PRIVATE_H_ */ 530