1/* Copyright (C) 1994-2020 Free Software Foundation, Inc. 2 3This file is free software; you can redistribute it and/or modify it 4under the terms of the GNU General Public License as published by the 5Free Software Foundation; either version 3, or (at your option) any 6later version. 7 8This file is distributed in the hope that it will be useful, but 9WITHOUT ANY WARRANTY; without even the implied warranty of 10MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11General Public License for more details. 12 13Under Section 7 of GPL version 3, you are granted additional 14permissions described in the GCC Runtime Library Exception, version 153.1, as published by the Free Software Foundation. 16 17You should have received a copy of the GNU General Public License and 18a copy of the GCC Runtime Library Exception along with this program; 19see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 20<http://www.gnu.org/licenses/>. */ 21 22 23!! libgcc routines for the Renesas / SuperH SH CPUs. 24!! Contributed by Steve Chamberlain. 25!! sac@cygnus.com 26 27!! ashiftrt_r4_x, ___ashrsi3, ___ashlsi3, ___lshrsi3 routines 28!! recoded in assembly by Toshiyasu Morita 29!! tm@netcom.com 30 31#if defined(__ELF__) && defined(__linux__) 32.section .note.GNU-stack,"",%progbits 33.previous 34#endif 35 36/* SH2 optimizations for ___ashrsi3, ___ashlsi3, ___lshrsi3 and 37 ELF local label prefixes by J"orn Rennecke 38 amylaar@cygnus.com */ 39 40#include "lib1funcs.h" 41 42/* t-vxworks needs to build both PIC and non-PIC versions of libgcc, 43 so it is more convenient to define NO_FPSCR_VALUES here than to 44 define it on the command line. */ 45#if defined __vxworks && defined __PIC__ 46#define NO_FPSCR_VALUES 47#endif 48 49#ifdef L_ashiftrt 50 .global GLOBAL(ashiftrt_r4_0) 51 .global GLOBAL(ashiftrt_r4_1) 52 .global GLOBAL(ashiftrt_r4_2) 53 .global GLOBAL(ashiftrt_r4_3) 54 .global GLOBAL(ashiftrt_r4_4) 55 .global GLOBAL(ashiftrt_r4_5) 56 .global GLOBAL(ashiftrt_r4_6) 57 .global GLOBAL(ashiftrt_r4_7) 58 .global GLOBAL(ashiftrt_r4_8) 59 .global GLOBAL(ashiftrt_r4_9) 60 .global GLOBAL(ashiftrt_r4_10) 61 .global GLOBAL(ashiftrt_r4_11) 62 .global GLOBAL(ashiftrt_r4_12) 63 .global GLOBAL(ashiftrt_r4_13) 64 .global GLOBAL(ashiftrt_r4_14) 65 .global GLOBAL(ashiftrt_r4_15) 66 .global GLOBAL(ashiftrt_r4_16) 67 .global GLOBAL(ashiftrt_r4_17) 68 .global GLOBAL(ashiftrt_r4_18) 69 .global GLOBAL(ashiftrt_r4_19) 70 .global GLOBAL(ashiftrt_r4_20) 71 .global GLOBAL(ashiftrt_r4_21) 72 .global GLOBAL(ashiftrt_r4_22) 73 .global GLOBAL(ashiftrt_r4_23) 74 .global GLOBAL(ashiftrt_r4_24) 75 .global GLOBAL(ashiftrt_r4_25) 76 .global GLOBAL(ashiftrt_r4_26) 77 .global GLOBAL(ashiftrt_r4_27) 78 .global GLOBAL(ashiftrt_r4_28) 79 .global GLOBAL(ashiftrt_r4_29) 80 .global GLOBAL(ashiftrt_r4_30) 81 .global GLOBAL(ashiftrt_r4_31) 82 .global GLOBAL(ashiftrt_r4_32) 83 84 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_0)) 85 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_1)) 86 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_2)) 87 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_3)) 88 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_4)) 89 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_5)) 90 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_6)) 91 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_7)) 92 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_8)) 93 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_9)) 94 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_10)) 95 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_11)) 96 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_12)) 97 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_13)) 98 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_14)) 99 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_15)) 100 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_16)) 101 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_17)) 102 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_18)) 103 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_19)) 104 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_20)) 105 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_21)) 106 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_22)) 107 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_23)) 108 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_24)) 109 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_25)) 110 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_26)) 111 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_27)) 112 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_28)) 113 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_29)) 114 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_30)) 115 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_31)) 116 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_32)) 117 118 .align 1 119GLOBAL(ashiftrt_r4_32): 120GLOBAL(ashiftrt_r4_31): 121 rotcl r4 122 rts 123 subc r4,r4 124 125GLOBAL(ashiftrt_r4_30): 126 shar r4 127GLOBAL(ashiftrt_r4_29): 128 shar r4 129GLOBAL(ashiftrt_r4_28): 130 shar r4 131GLOBAL(ashiftrt_r4_27): 132 shar r4 133GLOBAL(ashiftrt_r4_26): 134 shar r4 135GLOBAL(ashiftrt_r4_25): 136 shar r4 137GLOBAL(ashiftrt_r4_24): 138 shlr16 r4 139 shlr8 r4 140 rts 141 exts.b r4,r4 142 143GLOBAL(ashiftrt_r4_23): 144 shar r4 145GLOBAL(ashiftrt_r4_22): 146 shar r4 147GLOBAL(ashiftrt_r4_21): 148 shar r4 149GLOBAL(ashiftrt_r4_20): 150 shar r4 151GLOBAL(ashiftrt_r4_19): 152 shar r4 153GLOBAL(ashiftrt_r4_18): 154 shar r4 155GLOBAL(ashiftrt_r4_17): 156 shar r4 157GLOBAL(ashiftrt_r4_16): 158 shlr16 r4 159 rts 160 exts.w r4,r4 161 162GLOBAL(ashiftrt_r4_15): 163 shar r4 164GLOBAL(ashiftrt_r4_14): 165 shar r4 166GLOBAL(ashiftrt_r4_13): 167 shar r4 168GLOBAL(ashiftrt_r4_12): 169 shar r4 170GLOBAL(ashiftrt_r4_11): 171 shar r4 172GLOBAL(ashiftrt_r4_10): 173 shar r4 174GLOBAL(ashiftrt_r4_9): 175 shar r4 176GLOBAL(ashiftrt_r4_8): 177 shar r4 178GLOBAL(ashiftrt_r4_7): 179 shar r4 180GLOBAL(ashiftrt_r4_6): 181 shar r4 182GLOBAL(ashiftrt_r4_5): 183 shar r4 184GLOBAL(ashiftrt_r4_4): 185 shar r4 186GLOBAL(ashiftrt_r4_3): 187 shar r4 188GLOBAL(ashiftrt_r4_2): 189 shar r4 190GLOBAL(ashiftrt_r4_1): 191 rts 192 shar r4 193 194GLOBAL(ashiftrt_r4_0): 195 rts 196 nop 197 198 ENDFUNC(GLOBAL(ashiftrt_r4_0)) 199 ENDFUNC(GLOBAL(ashiftrt_r4_1)) 200 ENDFUNC(GLOBAL(ashiftrt_r4_2)) 201 ENDFUNC(GLOBAL(ashiftrt_r4_3)) 202 ENDFUNC(GLOBAL(ashiftrt_r4_4)) 203 ENDFUNC(GLOBAL(ashiftrt_r4_5)) 204 ENDFUNC(GLOBAL(ashiftrt_r4_6)) 205 ENDFUNC(GLOBAL(ashiftrt_r4_7)) 206 ENDFUNC(GLOBAL(ashiftrt_r4_8)) 207 ENDFUNC(GLOBAL(ashiftrt_r4_9)) 208 ENDFUNC(GLOBAL(ashiftrt_r4_10)) 209 ENDFUNC(GLOBAL(ashiftrt_r4_11)) 210 ENDFUNC(GLOBAL(ashiftrt_r4_12)) 211 ENDFUNC(GLOBAL(ashiftrt_r4_13)) 212 ENDFUNC(GLOBAL(ashiftrt_r4_14)) 213 ENDFUNC(GLOBAL(ashiftrt_r4_15)) 214 ENDFUNC(GLOBAL(ashiftrt_r4_16)) 215 ENDFUNC(GLOBAL(ashiftrt_r4_17)) 216 ENDFUNC(GLOBAL(ashiftrt_r4_18)) 217 ENDFUNC(GLOBAL(ashiftrt_r4_19)) 218 ENDFUNC(GLOBAL(ashiftrt_r4_20)) 219 ENDFUNC(GLOBAL(ashiftrt_r4_21)) 220 ENDFUNC(GLOBAL(ashiftrt_r4_22)) 221 ENDFUNC(GLOBAL(ashiftrt_r4_23)) 222 ENDFUNC(GLOBAL(ashiftrt_r4_24)) 223 ENDFUNC(GLOBAL(ashiftrt_r4_25)) 224 ENDFUNC(GLOBAL(ashiftrt_r4_26)) 225 ENDFUNC(GLOBAL(ashiftrt_r4_27)) 226 ENDFUNC(GLOBAL(ashiftrt_r4_28)) 227 ENDFUNC(GLOBAL(ashiftrt_r4_29)) 228 ENDFUNC(GLOBAL(ashiftrt_r4_30)) 229 ENDFUNC(GLOBAL(ashiftrt_r4_31)) 230 ENDFUNC(GLOBAL(ashiftrt_r4_32)) 231#endif 232 233#ifdef L_ashiftrt_n 234 235! 236! GLOBAL(ashrsi3) 237! 238! Entry: 239! 240! r4: Value to shift 241! r5: Shift count 242! 243! Exit: 244! 245! r0: Result 246! 247! Destroys: 248! 249! T bit, r5 250! 251 252 .global GLOBAL(ashrsi3) 253 HIDDEN_FUNC(GLOBAL(ashrsi3)) 254 .align 2 255GLOBAL(ashrsi3): 256 mov #31,r0 257 and r0,r5 258 mova LOCAL(ashrsi3_table),r0 259 mov.b @(r0,r5),r5 260#ifdef __sh1__ 261 add r5,r0 262 jmp @r0 263#else 264 braf r5 265#endif 266 mov r4,r0 267 268 .align 2 269LOCAL(ashrsi3_table): 270 .byte LOCAL(ashrsi3_0)-LOCAL(ashrsi3_table) 271 .byte LOCAL(ashrsi3_1)-LOCAL(ashrsi3_table) 272 .byte LOCAL(ashrsi3_2)-LOCAL(ashrsi3_table) 273 .byte LOCAL(ashrsi3_3)-LOCAL(ashrsi3_table) 274 .byte LOCAL(ashrsi3_4)-LOCAL(ashrsi3_table) 275 .byte LOCAL(ashrsi3_5)-LOCAL(ashrsi3_table) 276 .byte LOCAL(ashrsi3_6)-LOCAL(ashrsi3_table) 277 .byte LOCAL(ashrsi3_7)-LOCAL(ashrsi3_table) 278 .byte LOCAL(ashrsi3_8)-LOCAL(ashrsi3_table) 279 .byte LOCAL(ashrsi3_9)-LOCAL(ashrsi3_table) 280 .byte LOCAL(ashrsi3_10)-LOCAL(ashrsi3_table) 281 .byte LOCAL(ashrsi3_11)-LOCAL(ashrsi3_table) 282 .byte LOCAL(ashrsi3_12)-LOCAL(ashrsi3_table) 283 .byte LOCAL(ashrsi3_13)-LOCAL(ashrsi3_table) 284 .byte LOCAL(ashrsi3_14)-LOCAL(ashrsi3_table) 285 .byte LOCAL(ashrsi3_15)-LOCAL(ashrsi3_table) 286 .byte LOCAL(ashrsi3_16)-LOCAL(ashrsi3_table) 287 .byte LOCAL(ashrsi3_17)-LOCAL(ashrsi3_table) 288 .byte LOCAL(ashrsi3_18)-LOCAL(ashrsi3_table) 289 .byte LOCAL(ashrsi3_19)-LOCAL(ashrsi3_table) 290 .byte LOCAL(ashrsi3_20)-LOCAL(ashrsi3_table) 291 .byte LOCAL(ashrsi3_21)-LOCAL(ashrsi3_table) 292 .byte LOCAL(ashrsi3_22)-LOCAL(ashrsi3_table) 293 .byte LOCAL(ashrsi3_23)-LOCAL(ashrsi3_table) 294 .byte LOCAL(ashrsi3_24)-LOCAL(ashrsi3_table) 295 .byte LOCAL(ashrsi3_25)-LOCAL(ashrsi3_table) 296 .byte LOCAL(ashrsi3_26)-LOCAL(ashrsi3_table) 297 .byte LOCAL(ashrsi3_27)-LOCAL(ashrsi3_table) 298 .byte LOCAL(ashrsi3_28)-LOCAL(ashrsi3_table) 299 .byte LOCAL(ashrsi3_29)-LOCAL(ashrsi3_table) 300 .byte LOCAL(ashrsi3_30)-LOCAL(ashrsi3_table) 301 .byte LOCAL(ashrsi3_31)-LOCAL(ashrsi3_table) 302 303LOCAL(ashrsi3_31): 304 rotcl r0 305 rts 306 subc r0,r0 307 308LOCAL(ashrsi3_30): 309 shar r0 310LOCAL(ashrsi3_29): 311 shar r0 312LOCAL(ashrsi3_28): 313 shar r0 314LOCAL(ashrsi3_27): 315 shar r0 316LOCAL(ashrsi3_26): 317 shar r0 318LOCAL(ashrsi3_25): 319 shar r0 320LOCAL(ashrsi3_24): 321 shlr16 r0 322 shlr8 r0 323 rts 324 exts.b r0,r0 325 326LOCAL(ashrsi3_23): 327 shar r0 328LOCAL(ashrsi3_22): 329 shar r0 330LOCAL(ashrsi3_21): 331 shar r0 332LOCAL(ashrsi3_20): 333 shar r0 334LOCAL(ashrsi3_19): 335 shar r0 336LOCAL(ashrsi3_18): 337 shar r0 338LOCAL(ashrsi3_17): 339 shar r0 340LOCAL(ashrsi3_16): 341 shlr16 r0 342 rts 343 exts.w r0,r0 344 345LOCAL(ashrsi3_15): 346 shar r0 347LOCAL(ashrsi3_14): 348 shar r0 349LOCAL(ashrsi3_13): 350 shar r0 351LOCAL(ashrsi3_12): 352 shar r0 353LOCAL(ashrsi3_11): 354 shar r0 355LOCAL(ashrsi3_10): 356 shar r0 357LOCAL(ashrsi3_9): 358 shar r0 359LOCAL(ashrsi3_8): 360 shar r0 361LOCAL(ashrsi3_7): 362 shar r0 363LOCAL(ashrsi3_6): 364 shar r0 365LOCAL(ashrsi3_5): 366 shar r0 367LOCAL(ashrsi3_4): 368 shar r0 369LOCAL(ashrsi3_3): 370 shar r0 371LOCAL(ashrsi3_2): 372 shar r0 373LOCAL(ashrsi3_1): 374 rts 375 shar r0 376 377LOCAL(ashrsi3_0): 378 rts 379 nop 380 381 ENDFUNC(GLOBAL(ashrsi3)) 382#endif 383 384#ifdef L_ashiftlt 385 386! 387! GLOBAL(ashlsi3) 388! (For compatibility with older binaries, not used by compiler) 389! 390! Entry: 391! r4: Value to shift 392! r5: Shift count 393! 394! Exit: 395! r0: Result 396! 397! Destroys: 398! T bit 399! 400! 401! GLOBAL(ashlsi3_r0) 402! 403! Entry: 404! r4: Value to shift 405! r0: Shift count 406! 407! Exit: 408! r0: Result 409! 410! Destroys: 411! T bit 412 413 .global GLOBAL(ashlsi3) 414 .global GLOBAL(ashlsi3_r0) 415 HIDDEN_FUNC(GLOBAL(ashlsi3)) 416 HIDDEN_FUNC(GLOBAL(ashlsi3_r0)) 417GLOBAL(ashlsi3): 418 mov r5,r0 419 .align 2 420GLOBAL(ashlsi3_r0): 421 422#ifdef __sh1__ 423 and #31,r0 424 shll2 r0 425 mov.l r4,@-r15 426 mov r0,r4 427 mova LOCAL(ashlsi3_table),r0 428 add r4,r0 429 mov.l @r15+,r4 430 jmp @r0 431 mov r4,r0 432 .align 2 433#else 434 and #31,r0 435 shll2 r0 436 braf r0 437 mov r4,r0 438#endif 439 440LOCAL(ashlsi3_table): 441 rts // << 0 442 nop 443LOCAL(ashlsi_1): 444 rts // << 1 445 shll r0 446LOCAL(ashlsi_2): // << 2 447 rts 448 shll2 r0 449 bra LOCAL(ashlsi_1) // << 3 450 shll2 r0 451 bra LOCAL(ashlsi_2) // << 4 452 shll2 r0 453 bra LOCAL(ashlsi_5) // << 5 454 shll r0 455 bra LOCAL(ashlsi_6) // << 6 456 shll2 r0 457 bra LOCAL(ashlsi_7) // << 7 458 shll r0 459LOCAL(ashlsi_8): // << 8 460 rts 461 shll8 r0 462 bra LOCAL(ashlsi_8) // << 9 463 shll r0 464 bra LOCAL(ashlsi_8) // << 10 465 shll2 r0 466 bra LOCAL(ashlsi_11) // << 11 467 shll r0 468 bra LOCAL(ashlsi_12) // << 12 469 shll2 r0 470 bra LOCAL(ashlsi_13) // << 13 471 shll r0 472 bra LOCAL(ashlsi_14) // << 14 473 shll8 r0 474 bra LOCAL(ashlsi_15) // << 15 475 shll8 r0 476LOCAL(ashlsi_16): // << 16 477 rts 478 shll16 r0 479 bra LOCAL(ashlsi_16) // << 17 480 shll r0 481 bra LOCAL(ashlsi_16) // << 18 482 shll2 r0 483 bra LOCAL(ashlsi_19) // << 19 484 shll r0 485 bra LOCAL(ashlsi_20) // << 20 486 shll2 r0 487 bra LOCAL(ashlsi_21) // << 21 488 shll r0 489 bra LOCAL(ashlsi_22) // << 22 490 shll16 r0 491 bra LOCAL(ashlsi_23) // << 23 492 shll16 r0 493 bra LOCAL(ashlsi_16) // << 24 494 shll8 r0 495 bra LOCAL(ashlsi_25) // << 25 496 shll r0 497 bra LOCAL(ashlsi_26) // << 26 498 shll2 r0 499 bra LOCAL(ashlsi_27) // << 27 500 shll r0 501 bra LOCAL(ashlsi_28) // << 28 502 shll2 r0 503 bra LOCAL(ashlsi_29) // << 29 504 shll16 r0 505 bra LOCAL(ashlsi_30) // << 30 506 shll16 r0 507 and #1,r0 // << 31 508 rts 509 rotr r0 510 511LOCAL(ashlsi_7): 512 shll2 r0 513LOCAL(ashlsi_5): 514LOCAL(ashlsi_6): 515 shll2 r0 516 rts 517LOCAL(ashlsi_13): 518 shll2 r0 519LOCAL(ashlsi_12): 520LOCAL(ashlsi_11): 521 shll8 r0 522 rts 523LOCAL(ashlsi_21): 524 shll2 r0 525LOCAL(ashlsi_20): 526LOCAL(ashlsi_19): 527 shll16 r0 528 rts 529LOCAL(ashlsi_28): 530LOCAL(ashlsi_27): 531 shll2 r0 532LOCAL(ashlsi_26): 533LOCAL(ashlsi_25): 534 shll16 r0 535 rts 536 shll8 r0 537 538LOCAL(ashlsi_22): 539LOCAL(ashlsi_14): 540 shlr2 r0 541 rts 542 shll8 r0 543 544LOCAL(ashlsi_23): 545LOCAL(ashlsi_15): 546 shlr r0 547 rts 548 shll8 r0 549 550LOCAL(ashlsi_29): 551 shlr r0 552LOCAL(ashlsi_30): 553 shlr2 r0 554 rts 555 shll16 r0 556 557 ENDFUNC(GLOBAL(ashlsi3)) 558 ENDFUNC(GLOBAL(ashlsi3_r0)) 559#endif 560 561#ifdef L_lshiftrt 562 563! 564! GLOBAL(lshrsi3) 565! (For compatibility with older binaries, not used by compiler) 566! 567! Entry: 568! r4: Value to shift 569! r5: Shift count 570! 571! Exit: 572! r0: Result 573! 574! Destroys: 575! T bit 576! 577! 578! GLOBAL(lshrsi3_r0) 579! 580! Entry: 581! r4: Value to shift 582! r0: Shift count 583! 584! Exit: 585! r0: Result 586! 587! Destroys: 588! T bit 589 590 .global GLOBAL(lshrsi3) 591 .global GLOBAL(lshrsi3_r0) 592 HIDDEN_FUNC(GLOBAL(lshrsi3)) 593 HIDDEN_FUNC(GLOBAL(lshrsi3_r0)) 594GLOBAL(lshrsi3): 595 mov r5,r0 596 .align 2 597GLOBAL(lshrsi3_r0): 598 599#ifdef __sh1__ 600 and #31,r0 601 shll2 r0 602 mov.l r4,@-r15 603 mov r0,r4 604 mova LOCAL(lshrsi3_table),r0 605 add r4,r0 606 mov.l @r15+,r4 607 jmp @r0 608 mov r4,r0 609 .align 2 610#else 611 and #31,r0 612 shll2 r0 613 braf r0 614 mov r4,r0 615#endif 616LOCAL(lshrsi3_table): 617 rts // >> 0 618 nop 619LOCAL(lshrsi_1): // >> 1 620 rts 621 shlr r0 622LOCAL(lshrsi_2): // >> 2 623 rts 624 shlr2 r0 625 bra LOCAL(lshrsi_1) // >> 3 626 shlr2 r0 627 bra LOCAL(lshrsi_2) // >> 4 628 shlr2 r0 629 bra LOCAL(lshrsi_5) // >> 5 630 shlr r0 631 bra LOCAL(lshrsi_6) // >> 6 632 shlr2 r0 633 bra LOCAL(lshrsi_7) // >> 7 634 shlr r0 635LOCAL(lshrsi_8): // >> 8 636 rts 637 shlr8 r0 638 bra LOCAL(lshrsi_8) // >> 9 639 shlr r0 640 bra LOCAL(lshrsi_8) // >> 10 641 shlr2 r0 642 bra LOCAL(lshrsi_11) // >> 11 643 shlr r0 644 bra LOCAL(lshrsi_12) // >> 12 645 shlr2 r0 646 bra LOCAL(lshrsi_13) // >> 13 647 shlr r0 648 bra LOCAL(lshrsi_14) // >> 14 649 shlr8 r0 650 bra LOCAL(lshrsi_15) // >> 15 651 shlr8 r0 652LOCAL(lshrsi_16): // >> 16 653 rts 654 shlr16 r0 655 bra LOCAL(lshrsi_16) // >> 17 656 shlr r0 657 bra LOCAL(lshrsi_16) // >> 18 658 shlr2 r0 659 bra LOCAL(lshrsi_19) // >> 19 660 shlr r0 661 bra LOCAL(lshrsi_20) // >> 20 662 shlr2 r0 663 bra LOCAL(lshrsi_21) // >> 21 664 shlr r0 665 bra LOCAL(lshrsi_22) // >> 22 666 shlr16 r0 667 bra LOCAL(lshrsi_23) // >> 23 668 shlr16 r0 669 bra LOCAL(lshrsi_16) // >> 24 670 shlr8 r0 671 bra LOCAL(lshrsi_25) // >> 25 672 shlr r0 673 bra LOCAL(lshrsi_26) // >> 26 674 shlr2 r0 675 bra LOCAL(lshrsi_27) // >> 27 676 shlr r0 677 bra LOCAL(lshrsi_28) // >> 28 678 shlr2 r0 679 bra LOCAL(lshrsi_29) // >> 29 680 shlr16 r0 681 bra LOCAL(lshrsi_30) // >> 30 682 shlr16 r0 683 shll r0 // >> 31 684 rts 685 movt r0 686 687LOCAL(lshrsi_7): 688 shlr2 r0 689LOCAL(lshrsi_5): 690LOCAL(lshrsi_6): 691 shlr2 r0 692 rts 693LOCAL(lshrsi_13): 694 shlr2 r0 695LOCAL(lshrsi_12): 696LOCAL(lshrsi_11): 697 shlr8 r0 698 rts 699LOCAL(lshrsi_21): 700 shlr2 r0 701LOCAL(lshrsi_20): 702LOCAL(lshrsi_19): 703 shlr16 r0 704 rts 705LOCAL(lshrsi_28): 706LOCAL(lshrsi_27): 707 shlr2 r0 708LOCAL(lshrsi_26): 709LOCAL(lshrsi_25): 710 shlr16 r0 711 rts 712 shlr8 r0 713 714LOCAL(lshrsi_22): 715LOCAL(lshrsi_14): 716 shll2 r0 717 rts 718 shlr8 r0 719 720LOCAL(lshrsi_23): 721LOCAL(lshrsi_15): 722 shll r0 723 rts 724 shlr8 r0 725 726LOCAL(lshrsi_29): 727 shll r0 728LOCAL(lshrsi_30): 729 shll2 r0 730 rts 731 shlr16 r0 732 733 ENDFUNC(GLOBAL(lshrsi3)) 734 ENDFUNC(GLOBAL(lshrsi3_r0)) 735#endif 736 737#ifdef L_movmem 738 .text 739 .balign 4 740 .global GLOBAL(movmem) 741 HIDDEN_FUNC(GLOBAL(movmem)) 742 HIDDEN_ALIAS(movstr,movmem) 743 /* This would be a lot simpler if r6 contained the byte count 744 minus 64, and we wouldn't be called here for a byte count of 64. */ 745GLOBAL(movmem): 746 sts.l pr,@-r15 747 shll2 r6 748 bsr GLOBAL(movmemSI52+2) 749 mov.l @(48,r5),r0 750 .balign 4 751LOCAL(movmem_loop): /* Reached with rts */ 752 mov.l @(60,r5),r0 753 add #-64,r6 754 mov.l r0,@(60,r4) 755 tst r6,r6 756 mov.l @(56,r5),r0 757 bt LOCAL(movmem_done) 758 mov.l r0,@(56,r4) 759 cmp/pl r6 760 mov.l @(52,r5),r0 761 add #64,r5 762 mov.l r0,@(52,r4) 763 add #64,r4 764 bt GLOBAL(movmemSI52) 765! done all the large groups, do the remainder 766! jump to movmem+ 767 mova GLOBAL(movmemSI4)+4,r0 768 add r6,r0 769 jmp @r0 770LOCAL(movmem_done): ! share slot insn, works out aligned. 771 lds.l @r15+,pr 772 mov.l r0,@(56,r4) 773 mov.l @(52,r5),r0 774 rts 775 mov.l r0,@(52,r4) 776 .balign 4 777! ??? We need aliases movstr* for movmem* for the older libraries. These 778! aliases will be removed at the some point in the future. 779 .global GLOBAL(movmemSI64) 780 HIDDEN_FUNC(GLOBAL(movmemSI64)) 781 HIDDEN_ALIAS(movstrSI64,movmemSI64) 782GLOBAL(movmemSI64): 783 mov.l @(60,r5),r0 784 mov.l r0,@(60,r4) 785 .global GLOBAL(movmemSI60) 786 HIDDEN_FUNC(GLOBAL(movmemSI60)) 787 HIDDEN_ALIAS(movstrSI60,movmemSI60) 788GLOBAL(movmemSI60): 789 mov.l @(56,r5),r0 790 mov.l r0,@(56,r4) 791 .global GLOBAL(movmemSI56) 792 HIDDEN_FUNC(GLOBAL(movmemSI56)) 793 HIDDEN_ALIAS(movstrSI56,movmemSI56) 794GLOBAL(movmemSI56): 795 mov.l @(52,r5),r0 796 mov.l r0,@(52,r4) 797 .global GLOBAL(movmemSI52) 798 HIDDEN_FUNC(GLOBAL(movmemSI52)) 799 HIDDEN_ALIAS(movstrSI52,movmemSI52) 800GLOBAL(movmemSI52): 801 mov.l @(48,r5),r0 802 mov.l r0,@(48,r4) 803 .global GLOBAL(movmemSI48) 804 HIDDEN_FUNC(GLOBAL(movmemSI48)) 805 HIDDEN_ALIAS(movstrSI48,movmemSI48) 806GLOBAL(movmemSI48): 807 mov.l @(44,r5),r0 808 mov.l r0,@(44,r4) 809 .global GLOBAL(movmemSI44) 810 HIDDEN_FUNC(GLOBAL(movmemSI44)) 811 HIDDEN_ALIAS(movstrSI44,movmemSI44) 812GLOBAL(movmemSI44): 813 mov.l @(40,r5),r0 814 mov.l r0,@(40,r4) 815 .global GLOBAL(movmemSI40) 816 HIDDEN_FUNC(GLOBAL(movmemSI40)) 817 HIDDEN_ALIAS(movstrSI40,movmemSI40) 818GLOBAL(movmemSI40): 819 mov.l @(36,r5),r0 820 mov.l r0,@(36,r4) 821 .global GLOBAL(movmemSI36) 822 HIDDEN_FUNC(GLOBAL(movmemSI36)) 823 HIDDEN_ALIAS(movstrSI36,movmemSI36) 824GLOBAL(movmemSI36): 825 mov.l @(32,r5),r0 826 mov.l r0,@(32,r4) 827 .global GLOBAL(movmemSI32) 828 HIDDEN_FUNC(GLOBAL(movmemSI32)) 829 HIDDEN_ALIAS(movstrSI32,movmemSI32) 830GLOBAL(movmemSI32): 831 mov.l @(28,r5),r0 832 mov.l r0,@(28,r4) 833 .global GLOBAL(movmemSI28) 834 HIDDEN_FUNC(GLOBAL(movmemSI28)) 835 HIDDEN_ALIAS(movstrSI28,movmemSI28) 836GLOBAL(movmemSI28): 837 mov.l @(24,r5),r0 838 mov.l r0,@(24,r4) 839 .global GLOBAL(movmemSI24) 840 HIDDEN_FUNC(GLOBAL(movmemSI24)) 841 HIDDEN_ALIAS(movstrSI24,movmemSI24) 842GLOBAL(movmemSI24): 843 mov.l @(20,r5),r0 844 mov.l r0,@(20,r4) 845 .global GLOBAL(movmemSI20) 846 HIDDEN_FUNC(GLOBAL(movmemSI20)) 847 HIDDEN_ALIAS(movstrSI20,movmemSI20) 848GLOBAL(movmemSI20): 849 mov.l @(16,r5),r0 850 mov.l r0,@(16,r4) 851 .global GLOBAL(movmemSI16) 852 HIDDEN_FUNC(GLOBAL(movmemSI16)) 853 HIDDEN_ALIAS(movstrSI16,movmemSI16) 854GLOBAL(movmemSI16): 855 mov.l @(12,r5),r0 856 mov.l r0,@(12,r4) 857 .global GLOBAL(movmemSI12) 858 HIDDEN_FUNC(GLOBAL(movmemSI12)) 859 HIDDEN_ALIAS(movstrSI12,movmemSI12) 860GLOBAL(movmemSI12): 861 mov.l @(8,r5),r0 862 mov.l r0,@(8,r4) 863 .global GLOBAL(movmemSI8) 864 HIDDEN_FUNC(GLOBAL(movmemSI8)) 865 HIDDEN_ALIAS(movstrSI8,movmemSI8) 866GLOBAL(movmemSI8): 867 mov.l @(4,r5),r0 868 mov.l r0,@(4,r4) 869 .global GLOBAL(movmemSI4) 870 HIDDEN_FUNC(GLOBAL(movmemSI4)) 871 HIDDEN_ALIAS(movstrSI4,movmemSI4) 872GLOBAL(movmemSI4): 873 mov.l @(0,r5),r0 874 rts 875 mov.l r0,@(0,r4) 876 877 ENDFUNC(GLOBAL(movmemSI64)) 878 ENDFUNC(GLOBAL(movmemSI60)) 879 ENDFUNC(GLOBAL(movmemSI56)) 880 ENDFUNC(GLOBAL(movmemSI52)) 881 ENDFUNC(GLOBAL(movmemSI48)) 882 ENDFUNC(GLOBAL(movmemSI44)) 883 ENDFUNC(GLOBAL(movmemSI40)) 884 ENDFUNC(GLOBAL(movmemSI36)) 885 ENDFUNC(GLOBAL(movmemSI32)) 886 ENDFUNC(GLOBAL(movmemSI28)) 887 ENDFUNC(GLOBAL(movmemSI24)) 888 ENDFUNC(GLOBAL(movmemSI20)) 889 ENDFUNC(GLOBAL(movmemSI16)) 890 ENDFUNC(GLOBAL(movmemSI12)) 891 ENDFUNC(GLOBAL(movmemSI8)) 892 ENDFUNC(GLOBAL(movmemSI4)) 893 ENDFUNC(GLOBAL(movmem)) 894#endif 895 896#ifdef L_movmem_i4 897 .text 898 .global GLOBAL(movmem_i4_even) 899 .global GLOBAL(movmem_i4_odd) 900 .global GLOBAL(movmemSI12_i4) 901 902 HIDDEN_FUNC(GLOBAL(movmem_i4_even)) 903 HIDDEN_FUNC(GLOBAL(movmem_i4_odd)) 904 HIDDEN_FUNC(GLOBAL(movmemSI12_i4)) 905 906 HIDDEN_ALIAS(movstr_i4_even,movmem_i4_even) 907 HIDDEN_ALIAS(movstr_i4_odd,movmem_i4_odd) 908 HIDDEN_ALIAS(movstrSI12_i4,movmemSI12_i4) 909 910 .p2align 5 911L_movmem_2mod4_end: 912 mov.l r0,@(16,r4) 913 rts 914 mov.l r1,@(20,r4) 915 916 .p2align 2 917 918GLOBAL(movmem_i4_even): 919 mov.l @r5+,r0 920 bra L_movmem_start_even 921 mov.l @r5+,r1 922 923GLOBAL(movmem_i4_odd): 924 mov.l @r5+,r1 925 add #-4,r4 926 mov.l @r5+,r2 927 mov.l @r5+,r3 928 mov.l r1,@(4,r4) 929 mov.l r2,@(8,r4) 930 931L_movmem_loop: 932 mov.l r3,@(12,r4) 933 dt r6 934 mov.l @r5+,r0 935 bt/s L_movmem_2mod4_end 936 mov.l @r5+,r1 937 add #16,r4 938L_movmem_start_even: 939 mov.l @r5+,r2 940 mov.l @r5+,r3 941 mov.l r0,@r4 942 dt r6 943 mov.l r1,@(4,r4) 944 bf/s L_movmem_loop 945 mov.l r2,@(8,r4) 946 rts 947 mov.l r3,@(12,r4) 948 949 ENDFUNC(GLOBAL(movmem_i4_even)) 950 ENDFUNC(GLOBAL(movmem_i4_odd)) 951 952 .p2align 4 953GLOBAL(movmemSI12_i4): 954 mov.l @r5,r0 955 mov.l @(4,r5),r1 956 mov.l @(8,r5),r2 957 mov.l r0,@r4 958 mov.l r1,@(4,r4) 959 rts 960 mov.l r2,@(8,r4) 961 962 ENDFUNC(GLOBAL(movmemSI12_i4)) 963#endif 964 965#ifdef L_mulsi3 966 967 968 .global GLOBAL(mulsi3) 969 HIDDEN_FUNC(GLOBAL(mulsi3)) 970 971! r4 = aabb 972! r5 = ccdd 973! r0 = aabb*ccdd via partial products 974! 975! if aa == 0 and cc = 0 976! r0 = bb*dd 977! 978! else 979! aa = bb*dd + (aa*dd*65536) + (cc*bb*65536) 980! 981 982GLOBAL(mulsi3): 983 mulu.w r4,r5 ! multiply the lsws macl=bb*dd 984 mov r5,r3 ! r3 = ccdd 985 swap.w r4,r2 ! r2 = bbaa 986 xtrct r2,r3 ! r3 = aacc 987 tst r3,r3 ! msws zero ? 988 bf hiset 989 rts ! yes - then we have the answer 990 sts macl,r0 991 992hiset: sts macl,r0 ! r0 = bb*dd 993 mulu.w r2,r5 ! brewing macl = aa*dd 994 sts macl,r1 995 mulu.w r3,r4 ! brewing macl = cc*bb 996 sts macl,r2 997 add r1,r2 998 shll16 r2 999 rts 1000 add r2,r0 1001 1002 ENDFUNC(GLOBAL(mulsi3)) 1003#endif 1004 1005/*------------------------------------------------------------------------------ 1006 32 bit signed integer division that uses FPU double precision division. */ 1007 1008#ifdef L_sdivsi3_i4 1009 .title "SH DIVIDE" 1010 1011#if defined (__SH4__) || defined (__SH2A__) 1012/* This variant is used when FPSCR.PR = 1 (double precision) is the default 1013 setting. 1014 Args in r4 and r5, result in fpul, clobber dr0, dr2. */ 1015 1016 .global GLOBAL(sdivsi3_i4) 1017 HIDDEN_FUNC(GLOBAL(sdivsi3_i4)) 1018GLOBAL(sdivsi3_i4): 1019 lds r4,fpul 1020 float fpul,dr0 1021 lds r5,fpul 1022 float fpul,dr2 1023 fdiv dr2,dr0 1024 rts 1025 ftrc dr0,fpul 1026 1027 ENDFUNC(GLOBAL(sdivsi3_i4)) 1028 1029#elif defined (__SH2A_SINGLE__) || defined (__SH2A_SINGLE_ONLY__) || defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__) 1030/* This variant is used when FPSCR.PR = 0 (sigle precision) is the default 1031 setting. 1032 Args in r4 and r5, result in fpul, clobber r2, dr0, dr2. 1033 For this to work, we must temporarily switch the FPU do double precision, 1034 but we better do not touch FPSCR.FR. See PR 6526. */ 1035 1036 .global GLOBAL(sdivsi3_i4) 1037 HIDDEN_FUNC(GLOBAL(sdivsi3_i4)) 1038GLOBAL(sdivsi3_i4): 1039 1040#ifndef __SH4A__ 1041 mov.l r3,@-r15 1042 sts fpscr,r2 1043 mov #8,r3 1044 swap.w r3,r3 // r3 = 1 << 19 (FPSCR.PR bit) 1045 or r2,r3 1046 lds r3,fpscr // Set FPSCR.PR = 1. 1047 lds r4,fpul 1048 float fpul,dr0 1049 lds r5,fpul 1050 float fpul,dr2 1051 fdiv dr2,dr0 1052 ftrc dr0,fpul 1053 lds r2,fpscr 1054 rts 1055 mov.l @r15+,r3 1056#else 1057/* On SH4A we can use the fpchg instruction to flip the FPSCR.PR bit. */ 1058 fpchg 1059 lds r4,fpul 1060 float fpul,dr0 1061 lds r5,fpul 1062 float fpul,dr2 1063 fdiv dr2,dr0 1064 ftrc dr0,fpul 1065 rts 1066 fpchg 1067 1068#endif /* __SH4A__ */ 1069 1070 ENDFUNC(GLOBAL(sdivsi3_i4)) 1071#endif /* ! __SH4__ || __SH2A__ */ 1072#endif /* L_sdivsi3_i4 */ 1073 1074//------------------------------------------------------------------------------ 1075#ifdef L_sdivsi3 1076/* __SH4_SINGLE_ONLY__ keeps this part for link compatibility with 1077 sh2e/sh3e code. */ 1078!! 1079!! Steve Chamberlain 1080!! sac@cygnus.com 1081!! 1082!! 1083 1084!! args in r4 and r5, result in r0 clobber r1, r2, r3, and t bit 1085 1086 .global GLOBAL(sdivsi3) 1087 .align 2 1088 1089 FUNC(GLOBAL(sdivsi3)) 1090GLOBAL(sdivsi3): 1091 mov r4,r1 1092 mov r5,r0 1093 1094 tst r0,r0 1095 bt div0 1096 mov #0,r2 1097 div0s r2,r1 1098 subc r3,r3 1099 subc r2,r1 1100 div0s r0,r3 1101 rotcl r1 1102 div1 r0,r3 1103 rotcl r1 1104 div1 r0,r3 1105 rotcl r1 1106 div1 r0,r3 1107 rotcl r1 1108 div1 r0,r3 1109 rotcl r1 1110 div1 r0,r3 1111 rotcl r1 1112 div1 r0,r3 1113 rotcl r1 1114 div1 r0,r3 1115 rotcl r1 1116 div1 r0,r3 1117 rotcl r1 1118 div1 r0,r3 1119 rotcl r1 1120 div1 r0,r3 1121 rotcl r1 1122 div1 r0,r3 1123 rotcl r1 1124 div1 r0,r3 1125 rotcl r1 1126 div1 r0,r3 1127 rotcl r1 1128 div1 r0,r3 1129 rotcl r1 1130 div1 r0,r3 1131 rotcl r1 1132 div1 r0,r3 1133 rotcl r1 1134 div1 r0,r3 1135 rotcl r1 1136 div1 r0,r3 1137 rotcl r1 1138 div1 r0,r3 1139 rotcl r1 1140 div1 r0,r3 1141 rotcl r1 1142 div1 r0,r3 1143 rotcl r1 1144 div1 r0,r3 1145 rotcl r1 1146 div1 r0,r3 1147 rotcl r1 1148 div1 r0,r3 1149 rotcl r1 1150 div1 r0,r3 1151 rotcl r1 1152 div1 r0,r3 1153 rotcl r1 1154 div1 r0,r3 1155 rotcl r1 1156 div1 r0,r3 1157 rotcl r1 1158 div1 r0,r3 1159 rotcl r1 1160 div1 r0,r3 1161 rotcl r1 1162 div1 r0,r3 1163 rotcl r1 1164 div1 r0,r3 1165 rotcl r1 1166 addc r2,r1 1167 rts 1168 mov r1,r0 1169 1170 1171div0: rts 1172 mov #0,r0 1173 1174 ENDFUNC(GLOBAL(sdivsi3)) 1175#endif /* L_sdivsi3 */ 1176 1177/*------------------------------------------------------------------------------ 1178 32 bit unsigned integer division that uses FPU double precision division. */ 1179 1180#ifdef L_udivsi3_i4 1181 .title "SH DIVIDE" 1182 1183#if defined (__SH4__) || defined (__SH2A__) 1184/* This variant is used when FPSCR.PR = 1 (double precision) is the default 1185 setting. 1186 Args in r4 and r5, result in fpul, 1187 clobber r0, r1, r4, r5, dr0, dr2, dr4, and t bit */ 1188 1189 .global GLOBAL(udivsi3_i4) 1190 HIDDEN_FUNC(GLOBAL(udivsi3_i4)) 1191GLOBAL(udivsi3_i4): 1192 mov #1,r1 1193 cmp/hi r1,r5 1194 bf/s trivial 1195 rotr r1 1196 xor r1,r4 1197 lds r4,fpul 1198 mova L1,r0 1199#ifdef FMOVD_WORKS 1200 fmov.d @r0+,dr4 1201#else 1202 fmov.s @r0+,DR40 1203 fmov.s @r0,DR41 1204#endif 1205 float fpul,dr0 1206 xor r1,r5 1207 lds r5,fpul 1208 float fpul,dr2 1209 fadd dr4,dr0 1210 fadd dr4,dr2 1211 fdiv dr2,dr0 1212 rts 1213 ftrc dr0,fpul 1214 1215trivial: 1216 rts 1217 lds r4,fpul 1218 1219 .align 2 1220#ifdef FMOVD_WORKS 1221 .align 3 // Make the double below 8 byte aligned. 1222#endif 1223L1: 1224 .double 2147483648 1225 1226 ENDFUNC(GLOBAL(udivsi3_i4)) 1227 1228#elif defined (__SH2A_SINGLE__) || defined (__SH2A_SINGLE_ONLY__) || defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__) 1229/* This variant is used when FPSCR.PR = 0 (sigle precision) is the default 1230 setting. 1231 Args in r4 and r5, result in fpul, 1232 clobber r0, r1, r4, r5, dr0, dr2, dr4. 1233 For this to work, we must temporarily switch the FPU do double precision, 1234 but we better do not touch FPSCR.FR. See PR 6526. */ 1235 1236 .global GLOBAL(udivsi3_i4) 1237 HIDDEN_FUNC(GLOBAL(udivsi3_i4)) 1238GLOBAL(udivsi3_i4): 1239 1240#ifndef __SH4A__ 1241 mov #1,r1 1242 cmp/hi r1,r5 1243 bf/s trivial 1244 rotr r1 // r1 = 1 << 31 1245 sts.l fpscr,@-r15 1246 xor r1,r4 1247 mov.l @(0,r15),r0 1248 xor r1,r5 1249 mov.l L2,r1 1250 lds r4,fpul 1251 or r0,r1 1252 mova L1,r0 1253 lds r1,fpscr 1254#ifdef FMOVD_WORKS 1255 fmov.d @r0+,dr4 1256#else 1257 fmov.s @r0+,DR40 1258 fmov.s @r0,DR41 1259#endif 1260 float fpul,dr0 1261 lds r5,fpul 1262 float fpul,dr2 1263 fadd dr4,dr0 1264 fadd dr4,dr2 1265 fdiv dr2,dr0 1266 ftrc dr0,fpul 1267 rts 1268 lds.l @r15+,fpscr 1269 1270#ifdef FMOVD_WORKS 1271 .align 3 // Make the double below 8 byte aligned. 1272#endif 1273trivial: 1274 rts 1275 lds r4,fpul 1276 1277 .align 2 1278L2: 1279#ifdef FMOVD_WORKS 1280 .long 0x180000 // FPSCR.PR = 1, FPSCR.SZ = 1 1281#else 1282 .long 0x80000 // FPSCR.PR = 1 1283#endif 1284L1: 1285 .double 2147483648 1286 1287#else 1288/* On SH4A we can use the fpchg instruction to flip the FPSCR.PR bit. 1289 Although on SH4A fmovd usually works, it would require either additional 1290 two fschg instructions or an FPSCR push + pop. It's not worth the effort 1291 for loading only one double constant. */ 1292 mov #1,r1 1293 cmp/hi r1,r5 1294 bf/s trivial 1295 rotr r1 // r1 = 1 << 31 1296 fpchg 1297 mova L1,r0 1298 xor r1,r4 1299 fmov.s @r0+,DR40 1300 lds r4,fpul 1301 fmov.s @r0,DR41 1302 xor r1,r5 1303 float fpul,dr0 1304 lds r5,fpul 1305 float fpul,dr2 1306 fadd dr4,dr0 1307 fadd dr4,dr2 1308 fdiv dr2,dr0 1309 ftrc dr0,fpul 1310 rts 1311 fpchg 1312 1313trivial: 1314 rts 1315 lds r4,fpul 1316 1317 .align 2 1318L1: 1319 .double 2147483648 1320 1321#endif /* __SH4A__ */ 1322 1323 1324 ENDFUNC(GLOBAL(udivsi3_i4)) 1325#endif /* ! __SH4__ */ 1326#endif /* L_udivsi3_i4 */ 1327 1328#ifdef L_udivsi3 1329/* __SH4_SINGLE_ONLY__ keeps this part for link compatibility with 1330 sh2e/sh3e code. */ 1331 1332!! args in r4 and r5, result in r0, clobbers r4, pr, and t bit 1333 .global GLOBAL(udivsi3) 1334 HIDDEN_FUNC(GLOBAL(udivsi3)) 1335 1336LOCAL(div8): 1337 div1 r5,r4 1338LOCAL(div7): 1339 div1 r5,r4; div1 r5,r4; div1 r5,r4 1340 div1 r5,r4; div1 r5,r4; div1 r5,r4; rts; div1 r5,r4 1341 1342LOCAL(divx4): 1343 div1 r5,r4; rotcl r0 1344 div1 r5,r4; rotcl r0 1345 div1 r5,r4; rotcl r0 1346 rts; div1 r5,r4 1347 1348GLOBAL(udivsi3): 1349 sts.l pr,@-r15 1350 extu.w r5,r0 1351 cmp/eq r5,r0 1352#ifdef __sh1__ 1353 bf LOCAL(large_divisor) 1354#else 1355 bf/s LOCAL(large_divisor) 1356#endif 1357 div0u 1358 swap.w r4,r0 1359 shlr16 r4 1360 bsr LOCAL(div8) 1361 shll16 r5 1362 bsr LOCAL(div7) 1363 div1 r5,r4 1364 xtrct r4,r0 1365 xtrct r0,r4 1366 bsr LOCAL(div8) 1367 swap.w r4,r4 1368 bsr LOCAL(div7) 1369 div1 r5,r4 1370 lds.l @r15+,pr 1371 xtrct r4,r0 1372 swap.w r0,r0 1373 rotcl r0 1374 rts 1375 shlr16 r5 1376 1377LOCAL(large_divisor): 1378#ifdef __sh1__ 1379 div0u 1380#endif 1381 mov #0,r0 1382 xtrct r4,r0 1383 xtrct r0,r4 1384 bsr LOCAL(divx4) 1385 rotcl r0 1386 bsr LOCAL(divx4) 1387 rotcl r0 1388 bsr LOCAL(divx4) 1389 rotcl r0 1390 bsr LOCAL(divx4) 1391 rotcl r0 1392 lds.l @r15+,pr 1393 rts 1394 rotcl r0 1395 1396 ENDFUNC(GLOBAL(udivsi3)) 1397#endif /* L_udivsi3 */ 1398 1399#ifdef L_set_fpscr 1400#if !defined (__SH2A_NOFPU__) 1401#if defined (__SH2E__) || defined (__SH2A__) || defined (__SH3E__) || defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) 1402 .global GLOBAL(set_fpscr) 1403 HIDDEN_FUNC(GLOBAL(set_fpscr)) 1404GLOBAL(set_fpscr): 1405 lds r4,fpscr 1406#ifdef __PIC__ 1407 mov.l r12,@-r15 1408#ifdef __vxworks 1409 mov.l LOCAL(set_fpscr_L0_base),r12 1410 mov.l LOCAL(set_fpscr_L0_index),r0 1411 mov.l @r12,r12 1412 mov.l @(r0,r12),r12 1413#else 1414 mova LOCAL(set_fpscr_L0),r0 1415 mov.l LOCAL(set_fpscr_L0),r12 1416 add r0,r12 1417#endif 1418 mov.l LOCAL(set_fpscr_L1),r0 1419 mov.l @(r0,r12),r1 1420 mov.l @r15+,r12 1421#else 1422 mov.l LOCAL(set_fpscr_L1),r1 1423#endif 1424 swap.w r4,r0 1425 or #24,r0 1426#ifndef FMOVD_WORKS 1427 xor #16,r0 1428#endif 1429#if defined(__SH4__) || defined (__SH2A_DOUBLE__) 1430 swap.w r0,r3 1431 mov.l r3,@(4,r1) 1432#else /* defined (__SH2E__) || defined(__SH3E__) || defined(__SH4_SINGLE*__) */ 1433 swap.w r0,r2 1434 mov.l r2,@r1 1435#endif 1436#ifndef FMOVD_WORKS 1437 xor #8,r0 1438#else 1439 xor #24,r0 1440#endif 1441#if defined(__SH4__) || defined (__SH2A_DOUBLE__) 1442 swap.w r0,r2 1443 rts 1444 mov.l r2,@r1 1445#else /* defined(__SH2E__) || defined(__SH3E__) || defined(__SH4_SINGLE*__) */ 1446 swap.w r0,r3 1447 rts 1448 mov.l r3,@(4,r1) 1449#endif 1450 .align 2 1451#ifdef __PIC__ 1452#ifdef __vxworks 1453LOCAL(set_fpscr_L0_base): 1454 .long ___GOTT_BASE__ 1455LOCAL(set_fpscr_L0_index): 1456 .long ___GOTT_INDEX__ 1457#else 1458LOCAL(set_fpscr_L0): 1459 .long _GLOBAL_OFFSET_TABLE_ 1460#endif 1461LOCAL(set_fpscr_L1): 1462 .long GLOBAL(fpscr_values@GOT) 1463#else 1464LOCAL(set_fpscr_L1): 1465 .long GLOBAL(fpscr_values) 1466#endif 1467 1468 ENDFUNC(GLOBAL(set_fpscr)) 1469#ifndef NO_FPSCR_VALUES 1470#ifdef __ELF__ 1471 .comm GLOBAL(fpscr_values),8,4 1472#else 1473 .comm GLOBAL(fpscr_values),8 1474#endif /* ELF */ 1475#endif /* NO_FPSCR_VALUES */ 1476#endif /* SH2E / SH3E / SH4 */ 1477#endif /* __SH2A_NOFPU__ */ 1478#endif /* L_set_fpscr */ 1479#ifdef L_ic_invalidate 1480 1481#if defined(__SH4A__) 1482 .global GLOBAL(ic_invalidate) 1483 HIDDEN_FUNC(GLOBAL(ic_invalidate)) 1484GLOBAL(ic_invalidate): 1485 ocbwb @r4 1486 synco 1487 icbi @r4 1488 rts 1489 nop 1490 ENDFUNC(GLOBAL(ic_invalidate)) 1491#elif defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || defined(__SH4_NOFPU__) 1492 /* For system code, we use ic_invalidate_line_i, but user code 1493 needs a different mechanism. A kernel call is generally not 1494 available, and it would also be slow. Different SH4 variants use 1495 different sizes and associativities of the Icache. We use a small 1496 bit of dispatch code that can be put hidden in every shared object, 1497 which calls the actual processor-specific invalidation code in a 1498 separate module. 1499 Or if you have operating system support, the OS could mmap the 1500 procesor-specific code from a single page, since it is highly 1501 repetitive. */ 1502 .global GLOBAL(ic_invalidate) 1503 HIDDEN_FUNC(GLOBAL(ic_invalidate)) 1504GLOBAL(ic_invalidate): 1505#ifdef __pic__ 1506#ifdef __vxworks 1507 mov.l 1f,r1 1508 mov.l 2f,r0 1509 mov.l @r1,r1 1510 mov.l 0f,r2 1511 mov.l @(r0,r1),r0 1512#else 1513 mov.l 1f,r1 1514 mova 1f,r0 1515 mov.l 0f,r2 1516 add r1,r0 1517#endif 1518 mov.l @(r0,r2),r1 1519#else 1520 mov.l 0f,r1 1521#endif 1522 ocbwb @r4 1523 mov.l @(8,r1),r0 1524 sub r1,r4 1525 and r4,r0 1526 add r1,r0 1527 jmp @r0 1528 mov.l @(4,r1),r0 1529 .align 2 1530#ifndef __pic__ 15310: .long GLOBAL(ic_invalidate_array) 1532#else /* __pic__ */ 1533 .global GLOBAL(ic_invalidate_array) 15340: .long GLOBAL(ic_invalidate_array)@GOT 1535#ifdef __vxworks 15361: .long ___GOTT_BASE__ 15372: .long ___GOTT_INDEX__ 1538#else 15391: .long _GLOBAL_OFFSET_TABLE_ 1540#endif 1541 ENDFUNC(GLOBAL(ic_invalidate)) 1542#endif /* __pic__ */ 1543#endif /* SH4 */ 1544#endif /* L_ic_invalidate */ 1545 1546#ifdef L_ic_invalidate_array 1547#if defined(__SH4A__) || (defined (__FORCE_SH4A__) && (defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || defined(__SH4_NOFPU__))) 1548 .global GLOBAL(ic_invalidate_array) 1549 /* This is needed when an SH4 dso with trampolines is used on SH4A. */ 1550 .global GLOBAL(ic_invalidate_array) 1551 FUNC(GLOBAL(ic_invalidate_array)) 1552GLOBAL(ic_invalidate_array): 1553 add r1,r4 1554 synco 1555 icbi @r4 1556 rts 1557 nop 1558 .align 2 1559 .long 0 1560 ENDFUNC(GLOBAL(ic_invalidate_array)) 1561#elif defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || defined(__SH4_NOFPU__) 1562 .global GLOBAL(ic_invalidate_array) 1563 .p2align 5 1564 FUNC(GLOBAL(ic_invalidate_array)) 1565/* This must be aligned to the beginning of a cache line. */ 1566GLOBAL(ic_invalidate_array): 1567#ifndef WAYS 1568#define WAYS 4 1569#define WAY_SIZE 0x4000 1570#endif 1571#if WAYS == 1 1572 .rept WAY_SIZE * WAYS / 32 1573 rts 1574 nop 1575 .rept 7 1576 .long WAY_SIZE - 32 1577 .endr 1578 .endr 1579#elif WAYS <= 6 1580 .rept WAY_SIZE * WAYS / 32 1581 braf r0 1582 add #-8,r0 1583 .long WAY_SIZE + 8 1584 .long WAY_SIZE - 32 1585 .rept WAYS-2 1586 braf r0 1587 nop 1588 .endr 1589 .rept 7 - WAYS 1590 rts 1591 nop 1592 .endr 1593 .endr 1594#else /* WAYS > 6 */ 1595 /* This variant needs two different pages for mmap-ing. */ 1596 .rept WAYS-1 1597 .rept WAY_SIZE / 32 1598 braf r0 1599 nop 1600 .long WAY_SIZE 1601 .rept 6 1602 .long WAY_SIZE - 32 1603 .endr 1604 .endr 1605 .endr 1606 .rept WAY_SIZE / 32 1607 rts 1608 .rept 15 1609 nop 1610 .endr 1611 .endr 1612#endif /* WAYS */ 1613 ENDFUNC(GLOBAL(ic_invalidate_array)) 1614#endif /* SH4 */ 1615#endif /* L_ic_invalidate_array */ 1616 1617 1618#ifdef L_div_table 1619 1620#if defined (__SH2A__) || defined (__SH3__) || defined (__SH3E__) || defined (__SH4__) || defined (__SH4_SINGLE__) || defined (__SH4_SINGLE_ONLY__) || defined (__SH4_NOFPU__) 1621/* This code uses shld, thus is not suitable for SH1 / SH2. */ 1622 1623/* Signed / unsigned division without use of FPU, optimized for SH4. 1624 Uses a lookup table for divisors in the range -128 .. +128, and 1625 div1 with case distinction for larger divisors in three more ranges. 1626 The code is lumped together with the table to allow the use of mova. */ 1627#ifdef __LITTLE_ENDIAN__ 1628#define L_LSB 0 1629#define L_LSWMSB 1 1630#define L_MSWLSB 2 1631#else 1632#define L_LSB 3 1633#define L_LSWMSB 2 1634#define L_MSWLSB 1 1635#endif 1636 1637 .balign 4 1638 .global GLOBAL(udivsi3_i4i) 1639 FUNC(GLOBAL(udivsi3_i4i)) 1640GLOBAL(udivsi3_i4i): 1641 mov.w LOCAL(c128_w), r1 1642 div0u 1643 mov r4,r0 1644 shlr8 r0 1645 cmp/hi r1,r5 1646 extu.w r5,r1 1647 bf LOCAL(udiv_le128) 1648 cmp/eq r5,r1 1649 bf LOCAL(udiv_ge64k) 1650 shlr r0 1651 mov r5,r1 1652 shll16 r5 1653 mov.l r4,@-r15 1654 div1 r5,r0 1655 mov.l r1,@-r15 1656 div1 r5,r0 1657 div1 r5,r0 1658 bra LOCAL(udiv_25) 1659 div1 r5,r0 1660 1661LOCAL(div_le128): 1662 mova LOCAL(div_table_ix),r0 1663 bra LOCAL(div_le128_2) 1664 mov.b @(r0,r5),r1 1665LOCAL(udiv_le128): 1666 mov.l r4,@-r15 1667 mova LOCAL(div_table_ix),r0 1668 mov.b @(r0,r5),r1 1669 mov.l r5,@-r15 1670LOCAL(div_le128_2): 1671 mova LOCAL(div_table_inv),r0 1672 mov.l @(r0,r1),r1 1673 mov r5,r0 1674 tst #0xfe,r0 1675 mova LOCAL(div_table_clz),r0 1676 dmulu.l r1,r4 1677 mov.b @(r0,r5),r1 1678 bt/s LOCAL(div_by_1) 1679 mov r4,r0 1680 mov.l @r15+,r5 1681 sts mach,r0 1682 /* clrt */ 1683 addc r4,r0 1684 mov.l @r15+,r4 1685 rotcr r0 1686 rts 1687 shld r1,r0 1688 1689LOCAL(div_by_1_neg): 1690 neg r4,r0 1691LOCAL(div_by_1): 1692 mov.l @r15+,r5 1693 rts 1694 mov.l @r15+,r4 1695 1696LOCAL(div_ge64k): 1697 bt/s LOCAL(div_r8) 1698 div0u 1699 shll8 r5 1700 bra LOCAL(div_ge64k_2) 1701 div1 r5,r0 1702LOCAL(udiv_ge64k): 1703 cmp/hi r0,r5 1704 mov r5,r1 1705 bt LOCAL(udiv_r8) 1706 shll8 r5 1707 mov.l r4,@-r15 1708 div1 r5,r0 1709 mov.l r1,@-r15 1710LOCAL(div_ge64k_2): 1711 div1 r5,r0 1712 mov.l LOCAL(zero_l),r1 1713 .rept 4 1714 div1 r5,r0 1715 .endr 1716 mov.l r1,@-r15 1717 div1 r5,r0 1718 mov.w LOCAL(m256_w),r1 1719 div1 r5,r0 1720 mov.b r0,@(L_LSWMSB,r15) 1721 xor r4,r0 1722 and r1,r0 1723 bra LOCAL(div_ge64k_end) 1724 xor r4,r0 1725 1726LOCAL(div_r8): 1727 shll16 r4 1728 bra LOCAL(div_r8_2) 1729 shll8 r4 1730LOCAL(udiv_r8): 1731 mov.l r4,@-r15 1732 shll16 r4 1733 clrt 1734 shll8 r4 1735 mov.l r5,@-r15 1736LOCAL(div_r8_2): 1737 rotcl r4 1738 mov r0,r1 1739 div1 r5,r1 1740 mov r4,r0 1741 rotcl r0 1742 mov r5,r4 1743 div1 r5,r1 1744 .rept 5 1745 rotcl r0; div1 r5,r1 1746 .endr 1747 rotcl r0 1748 mov.l @r15+,r5 1749 div1 r4,r1 1750 mov.l @r15+,r4 1751 rts 1752 rotcl r0 1753 1754 ENDFUNC(GLOBAL(udivsi3_i4i)) 1755 1756 .global GLOBAL(sdivsi3_i4i) 1757 FUNC(GLOBAL(sdivsi3_i4i)) 1758 /* This is link-compatible with a GLOBAL(sdivsi3) call, 1759 but we effectively clobber only r1. */ 1760GLOBAL(sdivsi3_i4i): 1761 mov.l r4,@-r15 1762 cmp/pz r5 1763 mov.w LOCAL(c128_w), r1 1764 bt/s LOCAL(pos_divisor) 1765 cmp/pz r4 1766 mov.l r5,@-r15 1767 neg r5,r5 1768 bt/s LOCAL(neg_result) 1769 cmp/hi r1,r5 1770 neg r4,r4 1771LOCAL(pos_result): 1772 extu.w r5,r0 1773 bf LOCAL(div_le128) 1774 cmp/eq r5,r0 1775 mov r4,r0 1776 shlr8 r0 1777 bf/s LOCAL(div_ge64k) 1778 cmp/hi r0,r5 1779 div0u 1780 shll16 r5 1781 div1 r5,r0 1782 div1 r5,r0 1783 div1 r5,r0 1784LOCAL(udiv_25): 1785 mov.l LOCAL(zero_l),r1 1786 div1 r5,r0 1787 div1 r5,r0 1788 mov.l r1,@-r15 1789 .rept 3 1790 div1 r5,r0 1791 .endr 1792 mov.b r0,@(L_MSWLSB,r15) 1793 xtrct r4,r0 1794 swap.w r0,r0 1795 .rept 8 1796 div1 r5,r0 1797 .endr 1798 mov.b r0,@(L_LSWMSB,r15) 1799LOCAL(div_ge64k_end): 1800 .rept 8 1801 div1 r5,r0 1802 .endr 1803 mov.l @r15+,r4 ! zero-extension and swap using LS unit. 1804 extu.b r0,r0 1805 mov.l @r15+,r5 1806 or r4,r0 1807 mov.l @r15+,r4 1808 rts 1809 rotcl r0 1810 1811LOCAL(div_le128_neg): 1812 tst #0xfe,r0 1813 mova LOCAL(div_table_ix),r0 1814 mov.b @(r0,r5),r1 1815 mova LOCAL(div_table_inv),r0 1816 bt/s LOCAL(div_by_1_neg) 1817 mov.l @(r0,r1),r1 1818 mova LOCAL(div_table_clz),r0 1819 dmulu.l r1,r4 1820 mov.b @(r0,r5),r1 1821 mov.l @r15+,r5 1822 sts mach,r0 1823 /* clrt */ 1824 addc r4,r0 1825 mov.l @r15+,r4 1826 rotcr r0 1827 shld r1,r0 1828 rts 1829 neg r0,r0 1830 1831LOCAL(pos_divisor): 1832 mov.l r5,@-r15 1833 bt/s LOCAL(pos_result) 1834 cmp/hi r1,r5 1835 neg r4,r4 1836LOCAL(neg_result): 1837 extu.w r5,r0 1838 bf LOCAL(div_le128_neg) 1839 cmp/eq r5,r0 1840 mov r4,r0 1841 shlr8 r0 1842 bf/s LOCAL(div_ge64k_neg) 1843 cmp/hi r0,r5 1844 div0u 1845 mov.l LOCAL(zero_l),r1 1846 shll16 r5 1847 div1 r5,r0 1848 mov.l r1,@-r15 1849 .rept 7 1850 div1 r5,r0 1851 .endr 1852 mov.b r0,@(L_MSWLSB,r15) 1853 xtrct r4,r0 1854 swap.w r0,r0 1855 .rept 8 1856 div1 r5,r0 1857 .endr 1858 mov.b r0,@(L_LSWMSB,r15) 1859LOCAL(div_ge64k_neg_end): 1860 .rept 8 1861 div1 r5,r0 1862 .endr 1863 mov.l @r15+,r4 ! zero-extension and swap using LS unit. 1864 extu.b r0,r1 1865 mov.l @r15+,r5 1866 or r4,r1 1867LOCAL(div_r8_neg_end): 1868 mov.l @r15+,r4 1869 rotcl r1 1870 rts 1871 neg r1,r0 1872 1873LOCAL(div_ge64k_neg): 1874 bt/s LOCAL(div_r8_neg) 1875 div0u 1876 shll8 r5 1877 mov.l LOCAL(zero_l),r1 1878 .rept 6 1879 div1 r5,r0 1880 .endr 1881 mov.l r1,@-r15 1882 div1 r5,r0 1883 mov.w LOCAL(m256_w),r1 1884 div1 r5,r0 1885 mov.b r0,@(L_LSWMSB,r15) 1886 xor r4,r0 1887 and r1,r0 1888 bra LOCAL(div_ge64k_neg_end) 1889 xor r4,r0 1890 1891LOCAL(c128_w): 1892 .word 128 1893 1894LOCAL(div_r8_neg): 1895 clrt 1896 shll16 r4 1897 mov r4,r1 1898 shll8 r1 1899 mov r5,r4 1900 .rept 7 1901 rotcl r1; div1 r5,r0 1902 .endr 1903 mov.l @r15+,r5 1904 rotcl r1 1905 bra LOCAL(div_r8_neg_end) 1906 div1 r4,r0 1907 1908LOCAL(m256_w): 1909 .word 0xff00 1910/* This table has been generated by divtab-sh4.c. */ 1911 .balign 4 1912LOCAL(div_table_clz): 1913 .byte 0 1914 .byte 1 1915 .byte 0 1916 .byte -1 1917 .byte -1 1918 .byte -2 1919 .byte -2 1920 .byte -2 1921 .byte -2 1922 .byte -3 1923 .byte -3 1924 .byte -3 1925 .byte -3 1926 .byte -3 1927 .byte -3 1928 .byte -3 1929 .byte -3 1930 .byte -4 1931 .byte -4 1932 .byte -4 1933 .byte -4 1934 .byte -4 1935 .byte -4 1936 .byte -4 1937 .byte -4 1938 .byte -4 1939 .byte -4 1940 .byte -4 1941 .byte -4 1942 .byte -4 1943 .byte -4 1944 .byte -4 1945 .byte -4 1946 .byte -5 1947 .byte -5 1948 .byte -5 1949 .byte -5 1950 .byte -5 1951 .byte -5 1952 .byte -5 1953 .byte -5 1954 .byte -5 1955 .byte -5 1956 .byte -5 1957 .byte -5 1958 .byte -5 1959 .byte -5 1960 .byte -5 1961 .byte -5 1962 .byte -5 1963 .byte -5 1964 .byte -5 1965 .byte -5 1966 .byte -5 1967 .byte -5 1968 .byte -5 1969 .byte -5 1970 .byte -5 1971 .byte -5 1972 .byte -5 1973 .byte -5 1974 .byte -5 1975 .byte -5 1976 .byte -5 1977 .byte -5 1978 .byte -6 1979 .byte -6 1980 .byte -6 1981 .byte -6 1982 .byte -6 1983 .byte -6 1984 .byte -6 1985 .byte -6 1986 .byte -6 1987 .byte -6 1988 .byte -6 1989 .byte -6 1990 .byte -6 1991 .byte -6 1992 .byte -6 1993 .byte -6 1994 .byte -6 1995 .byte -6 1996 .byte -6 1997 .byte -6 1998 .byte -6 1999 .byte -6 2000 .byte -6 2001 .byte -6 2002 .byte -6 2003 .byte -6 2004 .byte -6 2005 .byte -6 2006 .byte -6 2007 .byte -6 2008 .byte -6 2009 .byte -6 2010 .byte -6 2011 .byte -6 2012 .byte -6 2013 .byte -6 2014 .byte -6 2015 .byte -6 2016 .byte -6 2017 .byte -6 2018 .byte -6 2019 .byte -6 2020 .byte -6 2021 .byte -6 2022 .byte -6 2023 .byte -6 2024 .byte -6 2025 .byte -6 2026 .byte -6 2027 .byte -6 2028 .byte -6 2029 .byte -6 2030 .byte -6 2031 .byte -6 2032 .byte -6 2033 .byte -6 2034 .byte -6 2035 .byte -6 2036 .byte -6 2037 .byte -6 2038 .byte -6 2039 .byte -6 2040 .byte -6 2041/* Lookup table translating positive divisor to index into table of 2042 normalized inverse. N.B. the '0' entry is also the last entry of the 2043 previous table, and causes an unaligned access for division by zero. */ 2044LOCAL(div_table_ix): 2045 .byte -6 2046 .byte -128 2047 .byte -128 2048 .byte 0 2049 .byte -128 2050 .byte -64 2051 .byte 0 2052 .byte 64 2053 .byte -128 2054 .byte -96 2055 .byte -64 2056 .byte -32 2057 .byte 0 2058 .byte 32 2059 .byte 64 2060 .byte 96 2061 .byte -128 2062 .byte -112 2063 .byte -96 2064 .byte -80 2065 .byte -64 2066 .byte -48 2067 .byte -32 2068 .byte -16 2069 .byte 0 2070 .byte 16 2071 .byte 32 2072 .byte 48 2073 .byte 64 2074 .byte 80 2075 .byte 96 2076 .byte 112 2077 .byte -128 2078 .byte -120 2079 .byte -112 2080 .byte -104 2081 .byte -96 2082 .byte -88 2083 .byte -80 2084 .byte -72 2085 .byte -64 2086 .byte -56 2087 .byte -48 2088 .byte -40 2089 .byte -32 2090 .byte -24 2091 .byte -16 2092 .byte -8 2093 .byte 0 2094 .byte 8 2095 .byte 16 2096 .byte 24 2097 .byte 32 2098 .byte 40 2099 .byte 48 2100 .byte 56 2101 .byte 64 2102 .byte 72 2103 .byte 80 2104 .byte 88 2105 .byte 96 2106 .byte 104 2107 .byte 112 2108 .byte 120 2109 .byte -128 2110 .byte -124 2111 .byte -120 2112 .byte -116 2113 .byte -112 2114 .byte -108 2115 .byte -104 2116 .byte -100 2117 .byte -96 2118 .byte -92 2119 .byte -88 2120 .byte -84 2121 .byte -80 2122 .byte -76 2123 .byte -72 2124 .byte -68 2125 .byte -64 2126 .byte -60 2127 .byte -56 2128 .byte -52 2129 .byte -48 2130 .byte -44 2131 .byte -40 2132 .byte -36 2133 .byte -32 2134 .byte -28 2135 .byte -24 2136 .byte -20 2137 .byte -16 2138 .byte -12 2139 .byte -8 2140 .byte -4 2141 .byte 0 2142 .byte 4 2143 .byte 8 2144 .byte 12 2145 .byte 16 2146 .byte 20 2147 .byte 24 2148 .byte 28 2149 .byte 32 2150 .byte 36 2151 .byte 40 2152 .byte 44 2153 .byte 48 2154 .byte 52 2155 .byte 56 2156 .byte 60 2157 .byte 64 2158 .byte 68 2159 .byte 72 2160 .byte 76 2161 .byte 80 2162 .byte 84 2163 .byte 88 2164 .byte 92 2165 .byte 96 2166 .byte 100 2167 .byte 104 2168 .byte 108 2169 .byte 112 2170 .byte 116 2171 .byte 120 2172 .byte 124 2173 .byte -128 2174/* 1/64 .. 1/127, normalized. There is an implicit leading 1 in bit 32. */ 2175 .balign 4 2176LOCAL(zero_l): 2177 .long 0x0 2178 .long 0xF81F81F9 2179 .long 0xF07C1F08 2180 .long 0xE9131AC0 2181 .long 0xE1E1E1E2 2182 .long 0xDAE6076C 2183 .long 0xD41D41D5 2184 .long 0xCD856891 2185 .long 0xC71C71C8 2186 .long 0xC0E07039 2187 .long 0xBACF914D 2188 .long 0xB4E81B4F 2189 .long 0xAF286BCB 2190 .long 0xA98EF607 2191 .long 0xA41A41A5 2192 .long 0x9EC8E952 2193 .long 0x9999999A 2194 .long 0x948B0FCE 2195 .long 0x8F9C18FA 2196 .long 0x8ACB90F7 2197 .long 0x86186187 2198 .long 0x81818182 2199 .long 0x7D05F418 2200 .long 0x78A4C818 2201 .long 0x745D1746 2202 .long 0x702E05C1 2203 .long 0x6C16C16D 2204 .long 0x68168169 2205 .long 0x642C8591 2206 .long 0x60581606 2207 .long 0x5C9882BA 2208 .long 0x58ED2309 2209LOCAL(div_table_inv): 2210 .long 0x55555556 2211 .long 0x51D07EAF 2212 .long 0x4E5E0A73 2213 .long 0x4AFD6A06 2214 .long 0x47AE147B 2215 .long 0x446F8657 2216 .long 0x41414142 2217 .long 0x3E22CBCF 2218 .long 0x3B13B13C 2219 .long 0x38138139 2220 .long 0x3521CFB3 2221 .long 0x323E34A3 2222 .long 0x2F684BDB 2223 .long 0x2C9FB4D9 2224 .long 0x29E4129F 2225 .long 0x27350B89 2226 .long 0x24924925 2227 .long 0x21FB7813 2228 .long 0x1F7047DD 2229 .long 0x1CF06ADB 2230 .long 0x1A7B9612 2231 .long 0x18118119 2232 .long 0x15B1E5F8 2233 .long 0x135C8114 2234 .long 0x11111112 2235 .long 0xECF56BF 2236 .long 0xC9714FC 2237 .long 0xA6810A7 2238 .long 0x8421085 2239 .long 0x624DD30 2240 .long 0x4104105 2241 .long 0x2040811 2242 /* maximum error: 0.987342 scaled: 0.921875*/ 2243 2244 ENDFUNC(GLOBAL(sdivsi3_i4i)) 2245#endif /* SH3 / SH4 */ 2246 2247#endif /* L_div_table */ 2248 2249#ifdef L_udiv_qrnnd_16 2250 HIDDEN_FUNC(GLOBAL(udiv_qrnnd_16)) 2251 /* r0: rn r1: qn */ /* r0: n1 r4: n0 r5: d r6: d1 */ /* r2: __m */ 2252 /* n1 < d, but n1 might be larger than d1. */ 2253 .global GLOBAL(udiv_qrnnd_16) 2254 .balign 8 2255GLOBAL(udiv_qrnnd_16): 2256 div0u 2257 cmp/hi r6,r0 2258 bt .Lots 2259 .rept 16 2260 div1 r6,r0 2261 .endr 2262 extu.w r0,r1 2263 bt 0f 2264 add r6,r0 22650: rotcl r1 2266 mulu.w r1,r5 2267 xtrct r4,r0 2268 swap.w r0,r0 2269 sts macl,r2 2270 cmp/hs r2,r0 2271 sub r2,r0 2272 bt 0f 2273 addc r5,r0 2274 add #-1,r1 2275 bt 0f 22761: add #-1,r1 2277 rts 2278 add r5,r0 2279 .balign 8 2280.Lots: 2281 sub r5,r0 2282 swap.w r4,r1 2283 xtrct r0,r1 2284 clrt 2285 mov r1,r0 2286 addc r5,r0 2287 mov #-1,r1 2288 SL1(bf, 1b, 2289 shlr16 r1) 22900: rts 2291 nop 2292 ENDFUNC(GLOBAL(udiv_qrnnd_16)) 2293#endif /* L_udiv_qrnnd_16 */ 2294