1/* IEEE-754 single-precision functions for Xtensa 2 Copyright (C) 2006-2015 Free Software Foundation, Inc. 3 Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica. 4 5 This file is part of GCC. 6 7 GCC is free software; you can redistribute it and/or modify it 8 under the terms of the GNU General Public License as published by 9 the Free Software Foundation; either version 3, or (at your option) 10 any later version. 11 12 GCC is distributed in the hope that it will be useful, but WITHOUT 13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public 15 License for more details. 16 17 Under Section 7 of GPL version 3, you are granted additional 18 permissions described in the GCC Runtime Library Exception, version 19 3.1, as published by the Free Software Foundation. 20 21 You should have received a copy of the GNU General Public License and 22 a copy of the GCC Runtime Library Exception along with this program; 23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 24 <http://www.gnu.org/licenses/>. */ 25 26#ifdef __XTENSA_EB__ 27#define xh a2 28#define xl a3 29#define yh a4 30#define yl a5 31#else 32#define xh a3 33#define xl a2 34#define yh a5 35#define yl a4 36#endif 37 38/* Warning! The branch displacements for some Xtensa branch instructions 39 are quite small, and this code has been carefully laid out to keep 40 branch targets in range. If you change anything, be sure to check that 41 the assembler is not relaxing anything to branch over a jump. */ 42 43#ifdef L_negsf2 44 45 .align 4 46 .global __negsf2 47 .type __negsf2, @function 48__negsf2: 49 leaf_entry sp, 16 50 movi a4, 0x80000000 51 xor a2, a2, a4 52 leaf_return 53 54#endif /* L_negsf2 */ 55 56#ifdef L_addsubsf3 57 58 /* Addition */ 59__addsf3_aux: 60 61 /* Handle NaNs and Infinities. (This code is placed before the 62 start of the function just to keep it in range of the limited 63 branch displacements.) */ 64 65.Ladd_xnan_or_inf: 66 /* If y is neither Infinity nor NaN, return x. */ 67 bnall a3, a6, 1f 68 /* If x is a NaN, return it. Otherwise, return y. */ 69 slli a7, a2, 9 70 beqz a7, .Ladd_ynan_or_inf 711: leaf_return 72 73.Ladd_ynan_or_inf: 74 /* Return y. */ 75 mov a2, a3 76 leaf_return 77 78.Ladd_opposite_signs: 79 /* Operand signs differ. Do a subtraction. */ 80 slli a7, a6, 8 81 xor a3, a3, a7 82 j .Lsub_same_sign 83 84 .align 4 85 .global __addsf3 86 .type __addsf3, @function 87__addsf3: 88 leaf_entry sp, 16 89 movi a6, 0x7f800000 90 91 /* Check if the two operands have the same sign. */ 92 xor a7, a2, a3 93 bltz a7, .Ladd_opposite_signs 94 95.Ladd_same_sign: 96 /* Check if either exponent == 0x7f8 (i.e., NaN or Infinity). */ 97 ball a2, a6, .Ladd_xnan_or_inf 98 ball a3, a6, .Ladd_ynan_or_inf 99 100 /* Compare the exponents. The smaller operand will be shifted 101 right by the exponent difference and added to the larger 102 one. */ 103 extui a7, a2, 23, 9 104 extui a8, a3, 23, 9 105 bltu a7, a8, .Ladd_shiftx 106 107.Ladd_shifty: 108 /* Check if the smaller (or equal) exponent is zero. */ 109 bnone a3, a6, .Ladd_yexpzero 110 111 /* Replace y sign/exponent with 0x008. */ 112 or a3, a3, a6 113 slli a3, a3, 8 114 srli a3, a3, 8 115 116.Ladd_yexpdiff: 117 /* Compute the exponent difference. */ 118 sub a10, a7, a8 119 120 /* Exponent difference > 32 -- just return the bigger value. */ 121 bgeui a10, 32, 1f 122 123 /* Shift y right by the exponent difference. Any bits that are 124 shifted out of y are saved in a9 for rounding the result. */ 125 ssr a10 126 movi a9, 0 127 src a9, a3, a9 128 srl a3, a3 129 130 /* Do the addition. */ 131 add a2, a2, a3 132 133 /* Check if the add overflowed into the exponent. */ 134 extui a10, a2, 23, 9 135 beq a10, a7, .Ladd_round 136 mov a8, a7 137 j .Ladd_carry 138 139.Ladd_yexpzero: 140 /* y is a subnormal value. Replace its sign/exponent with zero, 141 i.e., no implicit "1.0", and increment the apparent exponent 142 because subnormals behave as if they had the minimum (nonzero) 143 exponent. Test for the case when both exponents are zero. */ 144 slli a3, a3, 9 145 srli a3, a3, 9 146 bnone a2, a6, .Ladd_bothexpzero 147 addi a8, a8, 1 148 j .Ladd_yexpdiff 149 150.Ladd_bothexpzero: 151 /* Both exponents are zero. Handle this as a special case. There 152 is no need to shift or round, and the normal code for handling 153 a carry into the exponent field will not work because it 154 assumes there is an implicit "1.0" that needs to be added. */ 155 add a2, a2, a3 1561: leaf_return 157 158.Ladd_xexpzero: 159 /* Same as "yexpzero" except skip handling the case when both 160 exponents are zero. */ 161 slli a2, a2, 9 162 srli a2, a2, 9 163 addi a7, a7, 1 164 j .Ladd_xexpdiff 165 166.Ladd_shiftx: 167 /* Same thing as the "shifty" code, but with x and y swapped. Also, 168 because the exponent difference is always nonzero in this version, 169 the shift sequence can use SLL and skip loading a constant zero. */ 170 bnone a2, a6, .Ladd_xexpzero 171 172 or a2, a2, a6 173 slli a2, a2, 8 174 srli a2, a2, 8 175 176.Ladd_xexpdiff: 177 sub a10, a8, a7 178 bgeui a10, 32, .Ladd_returny 179 180 ssr a10 181 sll a9, a2 182 srl a2, a2 183 184 add a2, a2, a3 185 186 /* Check if the add overflowed into the exponent. */ 187 extui a10, a2, 23, 9 188 bne a10, a8, .Ladd_carry 189 190.Ladd_round: 191 /* Round up if the leftover fraction is >= 1/2. */ 192 bgez a9, 1f 193 addi a2, a2, 1 194 195 /* Check if the leftover fraction is exactly 1/2. */ 196 slli a9, a9, 1 197 beqz a9, .Ladd_exactlyhalf 1981: leaf_return 199 200.Ladd_returny: 201 mov a2, a3 202 leaf_return 203 204.Ladd_carry: 205 /* The addition has overflowed into the exponent field, so the 206 value needs to be renormalized. The mantissa of the result 207 can be recovered by subtracting the original exponent and 208 adding 0x800000 (which is the explicit "1.0" for the 209 mantissa of the non-shifted operand -- the "1.0" for the 210 shifted operand was already added). The mantissa can then 211 be shifted right by one bit. The explicit "1.0" of the 212 shifted mantissa then needs to be replaced by the exponent, 213 incremented by one to account for the normalizing shift. 214 It is faster to combine these operations: do the shift first 215 and combine the additions and subtractions. If x is the 216 original exponent, the result is: 217 shifted mantissa - (x << 22) + (1 << 22) + (x << 23) 218 or: 219 shifted mantissa + ((x + 1) << 22) 220 Note that the exponent is incremented here by leaving the 221 explicit "1.0" of the mantissa in the exponent field. */ 222 223 /* Shift x right by one bit. Save the lsb. */ 224 mov a10, a2 225 srli a2, a2, 1 226 227 /* See explanation above. The original exponent is in a8. */ 228 addi a8, a8, 1 229 slli a8, a8, 22 230 add a2, a2, a8 231 232 /* Return an Infinity if the exponent overflowed. */ 233 ball a2, a6, .Ladd_infinity 234 235 /* Same thing as the "round" code except the msb of the leftover 236 fraction is bit 0 of a10, with the rest of the fraction in a9. */ 237 bbci.l a10, 0, 1f 238 addi a2, a2, 1 239 beqz a9, .Ladd_exactlyhalf 2401: leaf_return 241 242.Ladd_infinity: 243 /* Clear the mantissa. */ 244 srli a2, a2, 23 245 slli a2, a2, 23 246 247 /* The sign bit may have been lost in a carry-out. Put it back. */ 248 slli a8, a8, 1 249 or a2, a2, a8 250 leaf_return 251 252.Ladd_exactlyhalf: 253 /* Round down to the nearest even value. */ 254 srli a2, a2, 1 255 slli a2, a2, 1 256 leaf_return 257 258 259 /* Subtraction */ 260__subsf3_aux: 261 262 /* Handle NaNs and Infinities. (This code is placed before the 263 start of the function just to keep it in range of the limited 264 branch displacements.) */ 265 266.Lsub_xnan_or_inf: 267 /* If y is neither Infinity nor NaN, return x. */ 268 bnall a3, a6, 1f 269 /* Both x and y are either NaN or Inf, so the result is NaN. */ 270 movi a4, 0x400000 /* make it a quiet NaN */ 271 or a2, a2, a4 2721: leaf_return 273 274.Lsub_ynan_or_inf: 275 /* Negate y and return it. */ 276 slli a7, a6, 8 277 xor a2, a3, a7 278 leaf_return 279 280.Lsub_opposite_signs: 281 /* Operand signs differ. Do an addition. */ 282 slli a7, a6, 8 283 xor a3, a3, a7 284 j .Ladd_same_sign 285 286 .align 4 287 .global __subsf3 288 .type __subsf3, @function 289__subsf3: 290 leaf_entry sp, 16 291 movi a6, 0x7f800000 292 293 /* Check if the two operands have the same sign. */ 294 xor a7, a2, a3 295 bltz a7, .Lsub_opposite_signs 296 297.Lsub_same_sign: 298 /* Check if either exponent == 0x7f8 (i.e., NaN or Infinity). */ 299 ball a2, a6, .Lsub_xnan_or_inf 300 ball a3, a6, .Lsub_ynan_or_inf 301 302 /* Compare the operands. In contrast to addition, the entire 303 value matters here. */ 304 extui a7, a2, 23, 8 305 extui a8, a3, 23, 8 306 bltu a2, a3, .Lsub_xsmaller 307 308.Lsub_ysmaller: 309 /* Check if the smaller (or equal) exponent is zero. */ 310 bnone a3, a6, .Lsub_yexpzero 311 312 /* Replace y sign/exponent with 0x008. */ 313 or a3, a3, a6 314 slli a3, a3, 8 315 srli a3, a3, 8 316 317.Lsub_yexpdiff: 318 /* Compute the exponent difference. */ 319 sub a10, a7, a8 320 321 /* Exponent difference > 32 -- just return the bigger value. */ 322 bgeui a10, 32, 1f 323 324 /* Shift y right by the exponent difference. Any bits that are 325 shifted out of y are saved in a9 for rounding the result. */ 326 ssr a10 327 movi a9, 0 328 src a9, a3, a9 329 srl a3, a3 330 331 sub a2, a2, a3 332 333 /* Subtract the leftover bits in a9 from zero and propagate any 334 borrow from a2. */ 335 neg a9, a9 336 addi a10, a2, -1 337 movnez a2, a10, a9 338 339 /* Check if the subtract underflowed into the exponent. */ 340 extui a10, a2, 23, 8 341 beq a10, a7, .Lsub_round 342 j .Lsub_borrow 343 344.Lsub_yexpzero: 345 /* Return zero if the inputs are equal. (For the non-subnormal 346 case, subtracting the "1.0" will cause a borrow from the exponent 347 and this case can be detected when handling the borrow.) */ 348 beq a2, a3, .Lsub_return_zero 349 350 /* y is a subnormal value. Replace its sign/exponent with zero, 351 i.e., no implicit "1.0". Unless x is also a subnormal, increment 352 y's apparent exponent because subnormals behave as if they had 353 the minimum (nonzero) exponent. */ 354 slli a3, a3, 9 355 srli a3, a3, 9 356 bnone a2, a6, .Lsub_yexpdiff 357 addi a8, a8, 1 358 j .Lsub_yexpdiff 359 360.Lsub_returny: 361 /* Negate and return y. */ 362 slli a7, a6, 8 363 xor a2, a3, a7 3641: leaf_return 365 366.Lsub_xsmaller: 367 /* Same thing as the "ysmaller" code, but with x and y swapped and 368 with y negated. */ 369 bnone a2, a6, .Lsub_xexpzero 370 371 or a2, a2, a6 372 slli a2, a2, 8 373 srli a2, a2, 8 374 375.Lsub_xexpdiff: 376 sub a10, a8, a7 377 bgeui a10, 32, .Lsub_returny 378 379 ssr a10 380 movi a9, 0 381 src a9, a2, a9 382 srl a2, a2 383 384 /* Negate y. */ 385 slli a11, a6, 8 386 xor a3, a3, a11 387 388 sub a2, a3, a2 389 390 neg a9, a9 391 addi a10, a2, -1 392 movnez a2, a10, a9 393 394 /* Check if the subtract underflowed into the exponent. */ 395 extui a10, a2, 23, 8 396 bne a10, a8, .Lsub_borrow 397 398.Lsub_round: 399 /* Round up if the leftover fraction is >= 1/2. */ 400 bgez a9, 1f 401 addi a2, a2, 1 402 403 /* Check if the leftover fraction is exactly 1/2. */ 404 slli a9, a9, 1 405 beqz a9, .Lsub_exactlyhalf 4061: leaf_return 407 408.Lsub_xexpzero: 409 /* Same as "yexpzero". */ 410 beq a2, a3, .Lsub_return_zero 411 slli a2, a2, 9 412 srli a2, a2, 9 413 bnone a3, a6, .Lsub_xexpdiff 414 addi a7, a7, 1 415 j .Lsub_xexpdiff 416 417.Lsub_return_zero: 418 movi a2, 0 419 leaf_return 420 421.Lsub_borrow: 422 /* The subtraction has underflowed into the exponent field, so the 423 value needs to be renormalized. Shift the mantissa left as 424 needed to remove any leading zeros and adjust the exponent 425 accordingly. If the exponent is not large enough to remove 426 all the leading zeros, the result will be a subnormal value. */ 427 428 slli a8, a2, 9 429 beqz a8, .Lsub_xzero 430 do_nsau a6, a8, a7, a11 431 srli a8, a8, 9 432 bge a6, a10, .Lsub_subnormal 433 addi a6, a6, 1 434 435.Lsub_normalize_shift: 436 /* Shift the mantissa (a8/a9) left by a6. */ 437 ssl a6 438 src a8, a8, a9 439 sll a9, a9 440 441 /* Combine the shifted mantissa with the sign and exponent, 442 decrementing the exponent by a6. (The exponent has already 443 been decremented by one due to the borrow from the subtraction, 444 but adding the mantissa will increment the exponent by one.) */ 445 srli a2, a2, 23 446 sub a2, a2, a6 447 slli a2, a2, 23 448 add a2, a2, a8 449 j .Lsub_round 450 451.Lsub_exactlyhalf: 452 /* Round down to the nearest even value. */ 453 srli a2, a2, 1 454 slli a2, a2, 1 455 leaf_return 456 457.Lsub_xzero: 458 /* If there was a borrow from the exponent, and the mantissa and 459 guard digits are all zero, then the inputs were equal and the 460 result should be zero. */ 461 beqz a9, .Lsub_return_zero 462 463 /* Only the guard digit is nonzero. Shift by min(24, a10). */ 464 addi a11, a10, -24 465 movi a6, 24 466 movltz a6, a10, a11 467 j .Lsub_normalize_shift 468 469.Lsub_subnormal: 470 /* The exponent is too small to shift away all the leading zeros. 471 Set a6 to the current exponent (which has already been 472 decremented by the borrow) so that the exponent of the result 473 will be zero. Do not add 1 to a6 in this case, because: (1) 474 adding the mantissa will not increment the exponent, so there is 475 no need to subtract anything extra from the exponent to 476 compensate, and (2) the effective exponent of a subnormal is 1 477 not 0 so the shift amount must be 1 smaller than normal. */ 478 mov a6, a10 479 j .Lsub_normalize_shift 480 481#endif /* L_addsubsf3 */ 482 483#ifdef L_mulsf3 484 485 /* Multiplication */ 486#if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16 487#define XCHAL_NO_MUL 1 488#endif 489 490 .literal_position 491__mulsf3_aux: 492 493 /* Handle unusual cases (zeros, subnormals, NaNs and Infinities). 494 (This code is placed before the start of the function just to 495 keep it in range of the limited branch displacements.) */ 496 497.Lmul_xexpzero: 498 /* Clear the sign bit of x. */ 499 slli a2, a2, 1 500 srli a2, a2, 1 501 502 /* If x is zero, return zero. */ 503 beqz a2, .Lmul_return_zero 504 505 /* Normalize x. Adjust the exponent in a8. */ 506 do_nsau a10, a2, a11, a12 507 addi a10, a10, -8 508 ssl a10 509 sll a2, a2 510 movi a8, 1 511 sub a8, a8, a10 512 j .Lmul_xnormalized 513 514.Lmul_yexpzero: 515 /* Clear the sign bit of y. */ 516 slli a3, a3, 1 517 srli a3, a3, 1 518 519 /* If y is zero, return zero. */ 520 beqz a3, .Lmul_return_zero 521 522 /* Normalize y. Adjust the exponent in a9. */ 523 do_nsau a10, a3, a11, a12 524 addi a10, a10, -8 525 ssl a10 526 sll a3, a3 527 movi a9, 1 528 sub a9, a9, a10 529 j .Lmul_ynormalized 530 531.Lmul_return_zero: 532 /* Return zero with the appropriate sign bit. */ 533 srli a2, a7, 31 534 slli a2, a2, 31 535 j .Lmul_done 536 537.Lmul_xnan_or_inf: 538 /* If y is zero, return NaN. */ 539 slli a8, a3, 1 540 bnez a8, 1f 541 movi a4, 0x400000 /* make it a quiet NaN */ 542 or a2, a2, a4 543 j .Lmul_done 5441: 545 /* If y is NaN, return y. */ 546 bnall a3, a6, .Lmul_returnx 547 slli a8, a3, 9 548 beqz a8, .Lmul_returnx 549 550.Lmul_returny: 551 mov a2, a3 552 553.Lmul_returnx: 554 /* Set the sign bit and return. */ 555 extui a7, a7, 31, 1 556 slli a2, a2, 1 557 ssai 1 558 src a2, a7, a2 559 j .Lmul_done 560 561.Lmul_ynan_or_inf: 562 /* If x is zero, return NaN. */ 563 slli a8, a2, 1 564 bnez a8, .Lmul_returny 565 movi a7, 0x400000 /* make it a quiet NaN */ 566 or a2, a3, a7 567 j .Lmul_done 568 569 .align 4 570 .global __mulsf3 571 .type __mulsf3, @function 572__mulsf3: 573#if __XTENSA_CALL0_ABI__ 574 leaf_entry sp, 32 575 addi sp, sp, -32 576 s32i a12, sp, 16 577 s32i a13, sp, 20 578 s32i a14, sp, 24 579 s32i a15, sp, 28 580#elif XCHAL_NO_MUL 581 /* This is not really a leaf function; allocate enough stack space 582 to allow CALL12s to a helper function. */ 583 leaf_entry sp, 64 584#else 585 leaf_entry sp, 32 586#endif 587 movi a6, 0x7f800000 588 589 /* Get the sign of the result. */ 590 xor a7, a2, a3 591 592 /* Check for NaN and infinity. */ 593 ball a2, a6, .Lmul_xnan_or_inf 594 ball a3, a6, .Lmul_ynan_or_inf 595 596 /* Extract the exponents. */ 597 extui a8, a2, 23, 8 598 extui a9, a3, 23, 8 599 600 beqz a8, .Lmul_xexpzero 601.Lmul_xnormalized: 602 beqz a9, .Lmul_yexpzero 603.Lmul_ynormalized: 604 605 /* Add the exponents. */ 606 add a8, a8, a9 607 608 /* Replace sign/exponent fields with explicit "1.0". */ 609 movi a10, 0xffffff 610 or a2, a2, a6 611 and a2, a2, a10 612 or a3, a3, a6 613 and a3, a3, a10 614 615 /* Multiply 32x32 to 64 bits. The result ends up in a2/a6. */ 616 617#if XCHAL_HAVE_MUL32_HIGH 618 619 mull a6, a2, a3 620 muluh a2, a2, a3 621 622#else 623 624 /* Break the inputs into 16-bit chunks and compute 4 32-bit partial 625 products. These partial products are: 626 627 0 xl * yl 628 629 1 xl * yh 630 2 xh * yl 631 632 3 xh * yh 633 634 If using the Mul16 or Mul32 multiplier options, these input 635 chunks must be stored in separate registers. For Mac16, the 636 UMUL.AA.* opcodes can specify that the inputs come from either 637 half of the registers, so there is no need to shift them out 638 ahead of time. If there is no multiply hardware, the 16-bit 639 chunks can be extracted when setting up the arguments to the 640 separate multiply function. */ 641 642#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL 643 /* Calling a separate multiply function will clobber a0 and requires 644 use of a8 as a temporary, so save those values now. (The function 645 uses a custom ABI so nothing else needs to be saved.) */ 646 s32i a0, sp, 0 647 s32i a8, sp, 4 648#endif 649 650#if XCHAL_HAVE_MUL16 || XCHAL_HAVE_MUL32 651 652#define a2h a4 653#define a3h a5 654 655 /* Get the high halves of the inputs into registers. */ 656 srli a2h, a2, 16 657 srli a3h, a3, 16 658 659#define a2l a2 660#define a3l a3 661 662#if XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MUL16 663 /* Clear the high halves of the inputs. This does not matter 664 for MUL16 because the high bits are ignored. */ 665 extui a2, a2, 0, 16 666 extui a3, a3, 0, 16 667#endif 668#endif /* MUL16 || MUL32 */ 669 670 671#if XCHAL_HAVE_MUL16 672 673#define do_mul(dst, xreg, xhalf, yreg, yhalf) \ 674 mul16u dst, xreg ## xhalf, yreg ## yhalf 675 676#elif XCHAL_HAVE_MUL32 677 678#define do_mul(dst, xreg, xhalf, yreg, yhalf) \ 679 mull dst, xreg ## xhalf, yreg ## yhalf 680 681#elif XCHAL_HAVE_MAC16 682 683/* The preprocessor insists on inserting a space when concatenating after 684 a period in the definition of do_mul below. These macros are a workaround 685 using underscores instead of periods when doing the concatenation. */ 686#define umul_aa_ll umul.aa.ll 687#define umul_aa_lh umul.aa.lh 688#define umul_aa_hl umul.aa.hl 689#define umul_aa_hh umul.aa.hh 690 691#define do_mul(dst, xreg, xhalf, yreg, yhalf) \ 692 umul_aa_ ## xhalf ## yhalf xreg, yreg; \ 693 rsr dst, ACCLO 694 695#else /* no multiply hardware */ 696 697#define set_arg_l(dst, src) \ 698 extui dst, src, 0, 16 699#define set_arg_h(dst, src) \ 700 srli dst, src, 16 701 702#if __XTENSA_CALL0_ABI__ 703#define do_mul(dst, xreg, xhalf, yreg, yhalf) \ 704 set_arg_ ## xhalf (a13, xreg); \ 705 set_arg_ ## yhalf (a14, yreg); \ 706 call0 .Lmul_mulsi3; \ 707 mov dst, a12 708#else 709#define do_mul(dst, xreg, xhalf, yreg, yhalf) \ 710 set_arg_ ## xhalf (a14, xreg); \ 711 set_arg_ ## yhalf (a15, yreg); \ 712 call12 .Lmul_mulsi3; \ 713 mov dst, a14 714#endif /* __XTENSA_CALL0_ABI__ */ 715 716#endif /* no multiply hardware */ 717 718 /* Add pp1 and pp2 into a6 with carry-out in a9. */ 719 do_mul(a6, a2, l, a3, h) /* pp 1 */ 720 do_mul(a11, a2, h, a3, l) /* pp 2 */ 721 movi a9, 0 722 add a6, a6, a11 723 bgeu a6, a11, 1f 724 addi a9, a9, 1 7251: 726 /* Shift the high half of a9/a6 into position in a9. Note that 727 this value can be safely incremented without any carry-outs. */ 728 ssai 16 729 src a9, a9, a6 730 731 /* Compute the low word into a6. */ 732 do_mul(a11, a2, l, a3, l) /* pp 0 */ 733 sll a6, a6 734 add a6, a6, a11 735 bgeu a6, a11, 1f 736 addi a9, a9, 1 7371: 738 /* Compute the high word into a2. */ 739 do_mul(a2, a2, h, a3, h) /* pp 3 */ 740 add a2, a2, a9 741 742#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL 743 /* Restore values saved on the stack during the multiplication. */ 744 l32i a0, sp, 0 745 l32i a8, sp, 4 746#endif 747#endif /* ! XCHAL_HAVE_MUL32_HIGH */ 748 749 /* Shift left by 9 bits, unless there was a carry-out from the 750 multiply, in which case, shift by 8 bits and increment the 751 exponent. */ 752 movi a4, 9 753 srli a5, a2, 24 - 9 754 beqz a5, 1f 755 addi a4, a4, -1 756 addi a8, a8, 1 7571: ssl a4 758 src a2, a2, a6 759 sll a6, a6 760 761 /* Subtract the extra bias from the exponent sum (plus one to account 762 for the explicit "1.0" of the mantissa that will be added to the 763 exponent in the final result). */ 764 movi a4, 0x80 765 sub a8, a8, a4 766 767 /* Check for over/underflow. The value in a8 is one less than the 768 final exponent, so values in the range 0..fd are OK here. */ 769 movi a4, 0xfe 770 bgeu a8, a4, .Lmul_overflow 771 772.Lmul_round: 773 /* Round. */ 774 bgez a6, .Lmul_rounded 775 addi a2, a2, 1 776 slli a6, a6, 1 777 beqz a6, .Lmul_exactlyhalf 778 779.Lmul_rounded: 780 /* Add the exponent to the mantissa. */ 781 slli a8, a8, 23 782 add a2, a2, a8 783 784.Lmul_addsign: 785 /* Add the sign bit. */ 786 srli a7, a7, 31 787 slli a7, a7, 31 788 or a2, a2, a7 789 790.Lmul_done: 791#if __XTENSA_CALL0_ABI__ 792 l32i a12, sp, 16 793 l32i a13, sp, 20 794 l32i a14, sp, 24 795 l32i a15, sp, 28 796 addi sp, sp, 32 797#endif 798 leaf_return 799 800.Lmul_exactlyhalf: 801 /* Round down to the nearest even value. */ 802 srli a2, a2, 1 803 slli a2, a2, 1 804 j .Lmul_rounded 805 806.Lmul_overflow: 807 bltz a8, .Lmul_underflow 808 /* Return +/- Infinity. */ 809 movi a8, 0xff 810 slli a2, a8, 23 811 j .Lmul_addsign 812 813.Lmul_underflow: 814 /* Create a subnormal value, where the exponent field contains zero, 815 but the effective exponent is 1. The value of a8 is one less than 816 the actual exponent, so just negate it to get the shift amount. */ 817 neg a8, a8 818 mov a9, a6 819 ssr a8 820 bgeui a8, 32, .Lmul_flush_to_zero 821 822 /* Shift a2 right. Any bits that are shifted out of a2 are saved 823 in a6 (combined with the shifted-out bits currently in a6) for 824 rounding the result. */ 825 sll a6, a2 826 srl a2, a2 827 828 /* Set the exponent to zero. */ 829 movi a8, 0 830 831 /* Pack any nonzero bits shifted out into a6. */ 832 beqz a9, .Lmul_round 833 movi a9, 1 834 or a6, a6, a9 835 j .Lmul_round 836 837.Lmul_flush_to_zero: 838 /* Return zero with the appropriate sign bit. */ 839 srli a2, a7, 31 840 slli a2, a2, 31 841 j .Lmul_done 842 843#if XCHAL_NO_MUL 844 845 /* For Xtensa processors with no multiply hardware, this simplified 846 version of _mulsi3 is used for multiplying 16-bit chunks of 847 the floating-point mantissas. When using CALL0, this function 848 uses a custom ABI: the inputs are passed in a13 and a14, the 849 result is returned in a12, and a8 and a15 are clobbered. */ 850 .align 4 851.Lmul_mulsi3: 852 leaf_entry sp, 16 853 .macro mul_mulsi3_body dst, src1, src2, tmp1, tmp2 854 movi \dst, 0 8551: add \tmp1, \src2, \dst 856 extui \tmp2, \src1, 0, 1 857 movnez \dst, \tmp1, \tmp2 858 859 do_addx2 \tmp1, \src2, \dst, \tmp1 860 extui \tmp2, \src1, 1, 1 861 movnez \dst, \tmp1, \tmp2 862 863 do_addx4 \tmp1, \src2, \dst, \tmp1 864 extui \tmp2, \src1, 2, 1 865 movnez \dst, \tmp1, \tmp2 866 867 do_addx8 \tmp1, \src2, \dst, \tmp1 868 extui \tmp2, \src1, 3, 1 869 movnez \dst, \tmp1, \tmp2 870 871 srli \src1, \src1, 4 872 slli \src2, \src2, 4 873 bnez \src1, 1b 874 .endm 875#if __XTENSA_CALL0_ABI__ 876 mul_mulsi3_body a12, a13, a14, a15, a8 877#else 878 /* The result will be written into a2, so save that argument in a4. */ 879 mov a4, a2 880 mul_mulsi3_body a2, a4, a3, a5, a6 881#endif 882 leaf_return 883#endif /* XCHAL_NO_MUL */ 884#endif /* L_mulsf3 */ 885 886#ifdef L_divsf3 887 888 .literal_position 889 /* Division */ 890__divsf3_aux: 891 892 /* Handle unusual cases (zeros, subnormals, NaNs and Infinities). 893 (This code is placed before the start of the function just to 894 keep it in range of the limited branch displacements.) */ 895 896.Ldiv_yexpzero: 897 /* Clear the sign bit of y. */ 898 slli a3, a3, 1 899 srli a3, a3, 1 900 901 /* Check for division by zero. */ 902 beqz a3, .Ldiv_yzero 903 904 /* Normalize y. Adjust the exponent in a9. */ 905 do_nsau a10, a3, a4, a5 906 addi a10, a10, -8 907 ssl a10 908 sll a3, a3 909 movi a9, 1 910 sub a9, a9, a10 911 j .Ldiv_ynormalized 912 913.Ldiv_yzero: 914 /* y is zero. Return NaN if x is also zero; otherwise, infinity. */ 915 slli a4, a2, 1 916 srli a4, a4, 1 917 srli a2, a7, 31 918 slli a2, a2, 31 919 or a2, a2, a6 920 bnez a4, 1f 921 movi a4, 0x400000 /* make it a quiet NaN */ 922 or a2, a2, a4 9231: leaf_return 924 925.Ldiv_xexpzero: 926 /* Clear the sign bit of x. */ 927 slli a2, a2, 1 928 srli a2, a2, 1 929 930 /* If x is zero, return zero. */ 931 beqz a2, .Ldiv_return_zero 932 933 /* Normalize x. Adjust the exponent in a8. */ 934 do_nsau a10, a2, a4, a5 935 addi a10, a10, -8 936 ssl a10 937 sll a2, a2 938 movi a8, 1 939 sub a8, a8, a10 940 j .Ldiv_xnormalized 941 942.Ldiv_return_zero: 943 /* Return zero with the appropriate sign bit. */ 944 srli a2, a7, 31 945 slli a2, a2, 31 946 leaf_return 947 948.Ldiv_xnan_or_inf: 949 /* Set the sign bit of the result. */ 950 srli a7, a3, 31 951 slli a7, a7, 31 952 xor a2, a2, a7 953 /* If y is NaN or Inf, return NaN. */ 954 bnall a3, a6, 1f 955 movi a4, 0x400000 /* make it a quiet NaN */ 956 or a2, a2, a4 9571: leaf_return 958 959.Ldiv_ynan_or_inf: 960 /* If y is Infinity, return zero. */ 961 slli a8, a3, 9 962 beqz a8, .Ldiv_return_zero 963 /* y is NaN; return it. */ 964 mov a2, a3 965 leaf_return 966 967 .align 4 968 .global __divsf3 969 .type __divsf3, @function 970__divsf3: 971 leaf_entry sp, 16 972 movi a6, 0x7f800000 973 974 /* Get the sign of the result. */ 975 xor a7, a2, a3 976 977 /* Check for NaN and infinity. */ 978 ball a2, a6, .Ldiv_xnan_or_inf 979 ball a3, a6, .Ldiv_ynan_or_inf 980 981 /* Extract the exponents. */ 982 extui a8, a2, 23, 8 983 extui a9, a3, 23, 8 984 985 beqz a9, .Ldiv_yexpzero 986.Ldiv_ynormalized: 987 beqz a8, .Ldiv_xexpzero 988.Ldiv_xnormalized: 989 990 /* Subtract the exponents. */ 991 sub a8, a8, a9 992 993 /* Replace sign/exponent fields with explicit "1.0". */ 994 movi a10, 0xffffff 995 or a2, a2, a6 996 and a2, a2, a10 997 or a3, a3, a6 998 and a3, a3, a10 999 1000 /* The first digit of the mantissa division must be a one. 1001 Shift x (and adjust the exponent) as needed to make this true. */ 1002 bltu a3, a2, 1f 1003 slli a2, a2, 1 1004 addi a8, a8, -1 10051: 1006 /* Do the first subtraction and shift. */ 1007 sub a2, a2, a3 1008 slli a2, a2, 1 1009 1010 /* Put the quotient into a10. */ 1011 movi a10, 1 1012 1013 /* Divide one bit at a time for 23 bits. */ 1014 movi a9, 23 1015#if XCHAL_HAVE_LOOPS 1016 loop a9, .Ldiv_loopend 1017#endif 1018.Ldiv_loop: 1019 /* Shift the quotient << 1. */ 1020 slli a10, a10, 1 1021 1022 /* Is this digit a 0 or 1? */ 1023 bltu a2, a3, 1f 1024 1025 /* Output a 1 and subtract. */ 1026 addi a10, a10, 1 1027 sub a2, a2, a3 1028 1029 /* Shift the dividend << 1. */ 10301: slli a2, a2, 1 1031 1032#if !XCHAL_HAVE_LOOPS 1033 addi a9, a9, -1 1034 bnez a9, .Ldiv_loop 1035#endif 1036.Ldiv_loopend: 1037 1038 /* Add the exponent bias (less one to account for the explicit "1.0" 1039 of the mantissa that will be added to the exponent in the final 1040 result). */ 1041 addi a8, a8, 0x7e 1042 1043 /* Check for over/underflow. The value in a8 is one less than the 1044 final exponent, so values in the range 0..fd are OK here. */ 1045 movi a4, 0xfe 1046 bgeu a8, a4, .Ldiv_overflow 1047 1048.Ldiv_round: 1049 /* Round. The remainder (<< 1) is in a2. */ 1050 bltu a2, a3, .Ldiv_rounded 1051 addi a10, a10, 1 1052 beq a2, a3, .Ldiv_exactlyhalf 1053 1054.Ldiv_rounded: 1055 /* Add the exponent to the mantissa. */ 1056 slli a8, a8, 23 1057 add a2, a10, a8 1058 1059.Ldiv_addsign: 1060 /* Add the sign bit. */ 1061 srli a7, a7, 31 1062 slli a7, a7, 31 1063 or a2, a2, a7 1064 leaf_return 1065 1066.Ldiv_overflow: 1067 bltz a8, .Ldiv_underflow 1068 /* Return +/- Infinity. */ 1069 addi a8, a4, 1 /* 0xff */ 1070 slli a2, a8, 23 1071 j .Ldiv_addsign 1072 1073.Ldiv_exactlyhalf: 1074 /* Remainder is exactly half the divisor. Round even. */ 1075 srli a10, a10, 1 1076 slli a10, a10, 1 1077 j .Ldiv_rounded 1078 1079.Ldiv_underflow: 1080 /* Create a subnormal value, where the exponent field contains zero, 1081 but the effective exponent is 1. The value of a8 is one less than 1082 the actual exponent, so just negate it to get the shift amount. */ 1083 neg a8, a8 1084 ssr a8 1085 bgeui a8, 32, .Ldiv_flush_to_zero 1086 1087 /* Shift a10 right. Any bits that are shifted out of a10 are 1088 saved in a6 for rounding the result. */ 1089 sll a6, a10 1090 srl a10, a10 1091 1092 /* Set the exponent to zero. */ 1093 movi a8, 0 1094 1095 /* Pack any nonzero remainder (in a2) into a6. */ 1096 beqz a2, 1f 1097 movi a9, 1 1098 or a6, a6, a9 1099 1100 /* Round a10 based on the bits shifted out into a6. */ 11011: bgez a6, .Ldiv_rounded 1102 addi a10, a10, 1 1103 slli a6, a6, 1 1104 bnez a6, .Ldiv_rounded 1105 srli a10, a10, 1 1106 slli a10, a10, 1 1107 j .Ldiv_rounded 1108 1109.Ldiv_flush_to_zero: 1110 /* Return zero with the appropriate sign bit. */ 1111 srli a2, a7, 31 1112 slli a2, a2, 31 1113 leaf_return 1114 1115#endif /* L_divsf3 */ 1116 1117#ifdef L_cmpsf2 1118 1119 /* Equal and Not Equal */ 1120 1121 .align 4 1122 .global __eqsf2 1123 .global __nesf2 1124 .set __nesf2, __eqsf2 1125 .type __eqsf2, @function 1126__eqsf2: 1127 leaf_entry sp, 16 1128 bne a2, a3, 4f 1129 1130 /* The values are equal but NaN != NaN. Check the exponent. */ 1131 movi a6, 0x7f800000 1132 ball a2, a6, 3f 1133 1134 /* Equal. */ 1135 movi a2, 0 1136 leaf_return 1137 1138 /* Not equal. */ 11392: movi a2, 1 1140 leaf_return 1141 1142 /* Check if the mantissas are nonzero. */ 11433: slli a7, a2, 9 1144 j 5f 1145 1146 /* Check if x and y are zero with different signs. */ 11474: or a7, a2, a3 1148 slli a7, a7, 1 1149 1150 /* Equal if a7 == 0, where a7 is either abs(x | y) or the mantissa 1151 or x when exponent(x) = 0x7f8 and x == y. */ 11525: movi a2, 0 1153 movi a3, 1 1154 movnez a2, a3, a7 1155 leaf_return 1156 1157 1158 /* Greater Than */ 1159 1160 .align 4 1161 .global __gtsf2 1162 .type __gtsf2, @function 1163__gtsf2: 1164 leaf_entry sp, 16 1165 movi a6, 0x7f800000 1166 ball a2, a6, 2f 11671: bnall a3, a6, .Lle_cmp 1168 1169 /* Check if y is a NaN. */ 1170 slli a7, a3, 9 1171 beqz a7, .Lle_cmp 1172 movi a2, 0 1173 leaf_return 1174 1175 /* Check if x is a NaN. */ 11762: slli a7, a2, 9 1177 beqz a7, 1b 1178 movi a2, 0 1179 leaf_return 1180 1181 1182 /* Less Than or Equal */ 1183 1184 .align 4 1185 .global __lesf2 1186 .type __lesf2, @function 1187__lesf2: 1188 leaf_entry sp, 16 1189 movi a6, 0x7f800000 1190 ball a2, a6, 2f 11911: bnall a3, a6, .Lle_cmp 1192 1193 /* Check if y is a NaN. */ 1194 slli a7, a3, 9 1195 beqz a7, .Lle_cmp 1196 movi a2, 1 1197 leaf_return 1198 1199 /* Check if x is a NaN. */ 12002: slli a7, a2, 9 1201 beqz a7, 1b 1202 movi a2, 1 1203 leaf_return 1204 1205.Lle_cmp: 1206 /* Check if x and y have different signs. */ 1207 xor a7, a2, a3 1208 bltz a7, .Lle_diff_signs 1209 1210 /* Check if x is negative. */ 1211 bltz a2, .Lle_xneg 1212 1213 /* Check if x <= y. */ 1214 bltu a3, a2, 5f 12154: movi a2, 0 1216 leaf_return 1217 1218.Lle_xneg: 1219 /* Check if y <= x. */ 1220 bgeu a2, a3, 4b 12215: movi a2, 1 1222 leaf_return 1223 1224.Lle_diff_signs: 1225 bltz a2, 4b 1226 1227 /* Check if both x and y are zero. */ 1228 or a7, a2, a3 1229 slli a7, a7, 1 1230 movi a2, 1 1231 movi a3, 0 1232 moveqz a2, a3, a7 1233 leaf_return 1234 1235 1236 /* Greater Than or Equal */ 1237 1238 .align 4 1239 .global __gesf2 1240 .type __gesf2, @function 1241__gesf2: 1242 leaf_entry sp, 16 1243 movi a6, 0x7f800000 1244 ball a2, a6, 2f 12451: bnall a3, a6, .Llt_cmp 1246 1247 /* Check if y is a NaN. */ 1248 slli a7, a3, 9 1249 beqz a7, .Llt_cmp 1250 movi a2, -1 1251 leaf_return 1252 1253 /* Check if x is a NaN. */ 12542: slli a7, a2, 9 1255 beqz a7, 1b 1256 movi a2, -1 1257 leaf_return 1258 1259 1260 /* Less Than */ 1261 1262 .align 4 1263 .global __ltsf2 1264 .type __ltsf2, @function 1265__ltsf2: 1266 leaf_entry sp, 16 1267 movi a6, 0x7f800000 1268 ball a2, a6, 2f 12691: bnall a3, a6, .Llt_cmp 1270 1271 /* Check if y is a NaN. */ 1272 slli a7, a3, 9 1273 beqz a7, .Llt_cmp 1274 movi a2, 0 1275 leaf_return 1276 1277 /* Check if x is a NaN. */ 12782: slli a7, a2, 9 1279 beqz a7, 1b 1280 movi a2, 0 1281 leaf_return 1282 1283.Llt_cmp: 1284 /* Check if x and y have different signs. */ 1285 xor a7, a2, a3 1286 bltz a7, .Llt_diff_signs 1287 1288 /* Check if x is negative. */ 1289 bltz a2, .Llt_xneg 1290 1291 /* Check if x < y. */ 1292 bgeu a2, a3, 5f 12934: movi a2, -1 1294 leaf_return 1295 1296.Llt_xneg: 1297 /* Check if y < x. */ 1298 bltu a3, a2, 4b 12995: movi a2, 0 1300 leaf_return 1301 1302.Llt_diff_signs: 1303 bgez a2, 5b 1304 1305 /* Check if both x and y are nonzero. */ 1306 or a7, a2, a3 1307 slli a7, a7, 1 1308 movi a2, 0 1309 movi a3, -1 1310 movnez a2, a3, a7 1311 leaf_return 1312 1313 1314 /* Unordered */ 1315 1316 .align 4 1317 .global __unordsf2 1318 .type __unordsf2, @function 1319__unordsf2: 1320 leaf_entry sp, 16 1321 movi a6, 0x7f800000 1322 ball a2, a6, 3f 13231: ball a3, a6, 4f 13242: movi a2, 0 1325 leaf_return 1326 13273: slli a7, a2, 9 1328 beqz a7, 1b 1329 movi a2, 1 1330 leaf_return 1331 13324: slli a7, a3, 9 1333 beqz a7, 2b 1334 movi a2, 1 1335 leaf_return 1336 1337#endif /* L_cmpsf2 */ 1338 1339#ifdef L_fixsfsi 1340 1341 .align 4 1342 .global __fixsfsi 1343 .type __fixsfsi, @function 1344__fixsfsi: 1345 leaf_entry sp, 16 1346 1347 /* Check for NaN and Infinity. */ 1348 movi a6, 0x7f800000 1349 ball a2, a6, .Lfixsfsi_nan_or_inf 1350 1351 /* Extract the exponent and check if 0 < (exp - 0x7e) < 32. */ 1352 extui a4, a2, 23, 8 1353 addi a4, a4, -0x7e 1354 bgei a4, 32, .Lfixsfsi_maxint 1355 blti a4, 1, .Lfixsfsi_zero 1356 1357 /* Add explicit "1.0" and shift << 8. */ 1358 or a7, a2, a6 1359 slli a5, a7, 8 1360 1361 /* Shift back to the right, based on the exponent. */ 1362 ssl a4 /* shift by 32 - a4 */ 1363 srl a5, a5 1364 1365 /* Negate the result if sign != 0. */ 1366 neg a2, a5 1367 movgez a2, a5, a7 1368 leaf_return 1369 1370.Lfixsfsi_nan_or_inf: 1371 /* Handle Infinity and NaN. */ 1372 slli a4, a2, 9 1373 beqz a4, .Lfixsfsi_maxint 1374 1375 /* Translate NaN to +maxint. */ 1376 movi a2, 0 1377 1378.Lfixsfsi_maxint: 1379 slli a4, a6, 8 /* 0x80000000 */ 1380 addi a5, a4, -1 /* 0x7fffffff */ 1381 movgez a4, a5, a2 1382 mov a2, a4 1383 leaf_return 1384 1385.Lfixsfsi_zero: 1386 movi a2, 0 1387 leaf_return 1388 1389#endif /* L_fixsfsi */ 1390 1391#ifdef L_fixsfdi 1392 1393 .align 4 1394 .global __fixsfdi 1395 .type __fixsfdi, @function 1396__fixsfdi: 1397 leaf_entry sp, 16 1398 1399 /* Check for NaN and Infinity. */ 1400 movi a6, 0x7f800000 1401 ball a2, a6, .Lfixsfdi_nan_or_inf 1402 1403 /* Extract the exponent and check if 0 < (exp - 0x7e) < 64. */ 1404 extui a4, a2, 23, 8 1405 addi a4, a4, -0x7e 1406 bgei a4, 64, .Lfixsfdi_maxint 1407 blti a4, 1, .Lfixsfdi_zero 1408 1409 /* Add explicit "1.0" and shift << 8. */ 1410 or a7, a2, a6 1411 slli xh, a7, 8 1412 1413 /* Shift back to the right, based on the exponent. */ 1414 ssl a4 /* shift by 64 - a4 */ 1415 bgei a4, 32, .Lfixsfdi_smallshift 1416 srl xl, xh 1417 movi xh, 0 1418 1419.Lfixsfdi_shifted: 1420 /* Negate the result if sign != 0. */ 1421 bgez a7, 1f 1422 neg xl, xl 1423 neg xh, xh 1424 beqz xl, 1f 1425 addi xh, xh, -1 14261: leaf_return 1427 1428.Lfixsfdi_smallshift: 1429 movi xl, 0 1430 sll xl, xh 1431 srl xh, xh 1432 j .Lfixsfdi_shifted 1433 1434.Lfixsfdi_nan_or_inf: 1435 /* Handle Infinity and NaN. */ 1436 slli a4, a2, 9 1437 beqz a4, .Lfixsfdi_maxint 1438 1439 /* Translate NaN to +maxint. */ 1440 movi a2, 0 1441 1442.Lfixsfdi_maxint: 1443 slli a7, a6, 8 /* 0x80000000 */ 1444 bgez a2, 1f 1445 mov xh, a7 1446 movi xl, 0 1447 leaf_return 1448 14491: addi xh, a7, -1 /* 0x7fffffff */ 1450 movi xl, -1 1451 leaf_return 1452 1453.Lfixsfdi_zero: 1454 movi xh, 0 1455 movi xl, 0 1456 leaf_return 1457 1458#endif /* L_fixsfdi */ 1459 1460#ifdef L_fixunssfsi 1461 1462 .align 4 1463 .global __fixunssfsi 1464 .type __fixunssfsi, @function 1465__fixunssfsi: 1466 leaf_entry sp, 16 1467 1468 /* Check for NaN and Infinity. */ 1469 movi a6, 0x7f800000 1470 ball a2, a6, .Lfixunssfsi_nan_or_inf 1471 1472 /* Extract the exponent and check if 0 <= (exp - 0x7f) < 32. */ 1473 extui a4, a2, 23, 8 1474 addi a4, a4, -0x7f 1475 bgei a4, 32, .Lfixunssfsi_maxint 1476 bltz a4, .Lfixunssfsi_zero 1477 1478 /* Add explicit "1.0" and shift << 8. */ 1479 or a7, a2, a6 1480 slli a5, a7, 8 1481 1482 /* Shift back to the right, based on the exponent. */ 1483 addi a4, a4, 1 1484 beqi a4, 32, .Lfixunssfsi_bigexp 1485 ssl a4 /* shift by 32 - a4 */ 1486 srl a5, a5 1487 1488 /* Negate the result if sign != 0. */ 1489 neg a2, a5 1490 movgez a2, a5, a7 1491 leaf_return 1492 1493.Lfixunssfsi_nan_or_inf: 1494 /* Handle Infinity and NaN. */ 1495 slli a4, a2, 9 1496 beqz a4, .Lfixunssfsi_maxint 1497 1498 /* Translate NaN to 0xffffffff. */ 1499 movi a2, -1 1500 leaf_return 1501 1502.Lfixunssfsi_maxint: 1503 slli a4, a6, 8 /* 0x80000000 */ 1504 movi a5, -1 /* 0xffffffff */ 1505 movgez a4, a5, a2 1506 mov a2, a4 1507 leaf_return 1508 1509.Lfixunssfsi_zero: 1510 movi a2, 0 1511 leaf_return 1512 1513.Lfixunssfsi_bigexp: 1514 /* Handle unsigned maximum exponent case. */ 1515 bltz a2, 1f 1516 mov a2, a5 /* no shift needed */ 1517 leaf_return 1518 1519 /* Return 0x80000000 if negative. */ 15201: slli a2, a6, 8 1521 leaf_return 1522 1523#endif /* L_fixunssfsi */ 1524 1525#ifdef L_fixunssfdi 1526 1527 .align 4 1528 .global __fixunssfdi 1529 .type __fixunssfdi, @function 1530__fixunssfdi: 1531 leaf_entry sp, 16 1532 1533 /* Check for NaN and Infinity. */ 1534 movi a6, 0x7f800000 1535 ball a2, a6, .Lfixunssfdi_nan_or_inf 1536 1537 /* Extract the exponent and check if 0 <= (exp - 0x7f) < 64. */ 1538 extui a4, a2, 23, 8 1539 addi a4, a4, -0x7f 1540 bgei a4, 64, .Lfixunssfdi_maxint 1541 bltz a4, .Lfixunssfdi_zero 1542 1543 /* Add explicit "1.0" and shift << 8. */ 1544 or a7, a2, a6 1545 slli xh, a7, 8 1546 1547 /* Shift back to the right, based on the exponent. */ 1548 addi a4, a4, 1 1549 beqi a4, 64, .Lfixunssfdi_bigexp 1550 ssl a4 /* shift by 64 - a4 */ 1551 bgei a4, 32, .Lfixunssfdi_smallshift 1552 srl xl, xh 1553 movi xh, 0 1554 1555.Lfixunssfdi_shifted: 1556 /* Negate the result if sign != 0. */ 1557 bgez a7, 1f 1558 neg xl, xl 1559 neg xh, xh 1560 beqz xl, 1f 1561 addi xh, xh, -1 15621: leaf_return 1563 1564.Lfixunssfdi_smallshift: 1565 movi xl, 0 1566 src xl, xh, xl 1567 srl xh, xh 1568 j .Lfixunssfdi_shifted 1569 1570.Lfixunssfdi_nan_or_inf: 1571 /* Handle Infinity and NaN. */ 1572 slli a4, a2, 9 1573 beqz a4, .Lfixunssfdi_maxint 1574 1575 /* Translate NaN to 0xffffffff.... */ 15761: movi xh, -1 1577 movi xl, -1 1578 leaf_return 1579 1580.Lfixunssfdi_maxint: 1581 bgez a2, 1b 15822: slli xh, a6, 8 /* 0x80000000 */ 1583 movi xl, 0 1584 leaf_return 1585 1586.Lfixunssfdi_zero: 1587 movi xh, 0 1588 movi xl, 0 1589 leaf_return 1590 1591.Lfixunssfdi_bigexp: 1592 /* Handle unsigned maximum exponent case. */ 1593 bltz a7, 2b 1594 movi xl, 0 1595 leaf_return /* no shift needed */ 1596 1597#endif /* L_fixunssfdi */ 1598 1599#ifdef L_floatsisf 1600 1601 .align 4 1602 .global __floatunsisf 1603 .type __floatunsisf, @function 1604__floatunsisf: 1605 leaf_entry sp, 16 1606 beqz a2, .Lfloatsisf_return 1607 1608 /* Set the sign to zero and jump to the floatsisf code. */ 1609 movi a7, 0 1610 j .Lfloatsisf_normalize 1611 1612 .align 4 1613 .global __floatsisf 1614 .type __floatsisf, @function 1615__floatsisf: 1616 leaf_entry sp, 16 1617 1618 /* Check for zero. */ 1619 beqz a2, .Lfloatsisf_return 1620 1621 /* Save the sign. */ 1622 extui a7, a2, 31, 1 1623 1624 /* Get the absolute value. */ 1625#if XCHAL_HAVE_ABS 1626 abs a2, a2 1627#else 1628 neg a4, a2 1629 movltz a2, a4, a2 1630#endif 1631 1632.Lfloatsisf_normalize: 1633 /* Normalize with the first 1 bit in the msb. */ 1634 do_nsau a4, a2, a5, a6 1635 ssl a4 1636 sll a5, a2 1637 1638 /* Shift the mantissa into position, with rounding bits in a6. */ 1639 srli a2, a5, 8 1640 slli a6, a5, (32 - 8) 1641 1642 /* Set the exponent. */ 1643 movi a5, 0x9d /* 0x7e + 31 */ 1644 sub a5, a5, a4 1645 slli a5, a5, 23 1646 add a2, a2, a5 1647 1648 /* Add the sign. */ 1649 slli a7, a7, 31 1650 or a2, a2, a7 1651 1652 /* Round up if the leftover fraction is >= 1/2. */ 1653 bgez a6, .Lfloatsisf_return 1654 addi a2, a2, 1 /* Overflow to the exponent is OK. */ 1655 1656 /* Check if the leftover fraction is exactly 1/2. */ 1657 slli a6, a6, 1 1658 beqz a6, .Lfloatsisf_exactlyhalf 1659 1660.Lfloatsisf_return: 1661 leaf_return 1662 1663.Lfloatsisf_exactlyhalf: 1664 /* Round down to the nearest even value. */ 1665 srli a2, a2, 1 1666 slli a2, a2, 1 1667 leaf_return 1668 1669#endif /* L_floatsisf */ 1670 1671#ifdef L_floatdisf 1672 1673 .align 4 1674 .global __floatundisf 1675 .type __floatundisf, @function 1676__floatundisf: 1677 leaf_entry sp, 16 1678 1679 /* Check for zero. */ 1680 or a4, xh, xl 1681 beqz a4, 2f 1682 1683 /* Set the sign to zero and jump to the floatdisf code. */ 1684 movi a7, 0 1685 j .Lfloatdisf_normalize 1686 1687 .align 4 1688 .global __floatdisf 1689 .type __floatdisf, @function 1690__floatdisf: 1691 leaf_entry sp, 16 1692 1693 /* Check for zero. */ 1694 or a4, xh, xl 1695 beqz a4, 2f 1696 1697 /* Save the sign. */ 1698 extui a7, xh, 31, 1 1699 1700 /* Get the absolute value. */ 1701 bgez xh, .Lfloatdisf_normalize 1702 neg xl, xl 1703 neg xh, xh 1704 beqz xl, .Lfloatdisf_normalize 1705 addi xh, xh, -1 1706 1707.Lfloatdisf_normalize: 1708 /* Normalize with the first 1 bit in the msb of xh. */ 1709 beqz xh, .Lfloatdisf_bigshift 1710 do_nsau a4, xh, a5, a6 1711 ssl a4 1712 src xh, xh, xl 1713 sll xl, xl 1714 1715.Lfloatdisf_shifted: 1716 /* Shift the mantissa into position, with rounding bits in a6. */ 1717 ssai 8 1718 sll a5, xl 1719 src a6, xh, xl 1720 srl xh, xh 1721 beqz a5, 1f 1722 movi a5, 1 1723 or a6, a6, a5 17241: 1725 /* Set the exponent. */ 1726 movi a5, 0xbd /* 0x7e + 63 */ 1727 sub a5, a5, a4 1728 slli a5, a5, 23 1729 add a2, xh, a5 1730 1731 /* Add the sign. */ 1732 slli a7, a7, 31 1733 or a2, a2, a7 1734 1735 /* Round up if the leftover fraction is >= 1/2. */ 1736 bgez a6, 2f 1737 addi a2, a2, 1 /* Overflow to the exponent is OK. */ 1738 1739 /* Check if the leftover fraction is exactly 1/2. */ 1740 slli a6, a6, 1 1741 beqz a6, .Lfloatdisf_exactlyhalf 17422: leaf_return 1743 1744.Lfloatdisf_bigshift: 1745 /* xh is zero. Normalize with first 1 bit of xl in the msb of xh. */ 1746 do_nsau a4, xl, a5, a6 1747 ssl a4 1748 sll xh, xl 1749 movi xl, 0 1750 addi a4, a4, 32 1751 j .Lfloatdisf_shifted 1752 1753.Lfloatdisf_exactlyhalf: 1754 /* Round down to the nearest even value. */ 1755 srli a2, a2, 1 1756 slli a2, a2, 1 1757 leaf_return 1758 1759#endif /* L_floatdisf */ 1760