1/* Copyright (C) 1994-2013 Free Software Foundation, Inc. 2 3This file is free software; you can redistribute it and/or modify it 4under the terms of the GNU General Public License as published by the 5Free Software Foundation; either version 3, or (at your option) any 6later version. 7 8This file is distributed in the hope that it will be useful, but 9WITHOUT ANY WARRANTY; without even the implied warranty of 10MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11General Public License for more details. 12 13Under Section 7 of GPL version 3, you are granted additional 14permissions described in the GCC Runtime Library Exception, version 153.1, as published by the Free Software Foundation. 16 17You should have received a copy of the GNU General Public License and 18a copy of the GCC Runtime Library Exception along with this program; 19see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 20<http://www.gnu.org/licenses/>. */ 21 22 23!! libgcc routines for the Renesas / SuperH SH CPUs. 24!! Contributed by Steve Chamberlain. 25!! sac@cygnus.com 26 27!! ashiftrt_r4_x, ___ashrsi3, ___ashlsi3, ___lshrsi3 routines 28!! recoded in assembly by Toshiyasu Morita 29!! tm@netcom.com 30 31#if defined(__ELF__) && defined(__linux__) 32.section .note.GNU-stack,"",%progbits 33.previous 34#endif 35 36/* SH2 optimizations for ___ashrsi3, ___ashlsi3, ___lshrsi3 and 37 ELF local label prefixes by J"orn Rennecke 38 amylaar@cygnus.com */ 39 40#include "lib1funcs.h" 41 42/* t-vxworks needs to build both PIC and non-PIC versions of libgcc, 43 so it is more convenient to define NO_FPSCR_VALUES here than to 44 define it on the command line. */ 45#if defined __vxworks && defined __PIC__ 46#define NO_FPSCR_VALUES 47#endif 48 49#if ! __SH5__ 50#ifdef L_ashiftrt 51 .global GLOBAL(ashiftrt_r4_0) 52 .global GLOBAL(ashiftrt_r4_1) 53 .global GLOBAL(ashiftrt_r4_2) 54 .global GLOBAL(ashiftrt_r4_3) 55 .global GLOBAL(ashiftrt_r4_4) 56 .global GLOBAL(ashiftrt_r4_5) 57 .global GLOBAL(ashiftrt_r4_6) 58 .global GLOBAL(ashiftrt_r4_7) 59 .global GLOBAL(ashiftrt_r4_8) 60 .global GLOBAL(ashiftrt_r4_9) 61 .global GLOBAL(ashiftrt_r4_10) 62 .global GLOBAL(ashiftrt_r4_11) 63 .global GLOBAL(ashiftrt_r4_12) 64 .global GLOBAL(ashiftrt_r4_13) 65 .global GLOBAL(ashiftrt_r4_14) 66 .global GLOBAL(ashiftrt_r4_15) 67 .global GLOBAL(ashiftrt_r4_16) 68 .global GLOBAL(ashiftrt_r4_17) 69 .global GLOBAL(ashiftrt_r4_18) 70 .global GLOBAL(ashiftrt_r4_19) 71 .global GLOBAL(ashiftrt_r4_20) 72 .global GLOBAL(ashiftrt_r4_21) 73 .global GLOBAL(ashiftrt_r4_22) 74 .global GLOBAL(ashiftrt_r4_23) 75 .global GLOBAL(ashiftrt_r4_24) 76 .global GLOBAL(ashiftrt_r4_25) 77 .global GLOBAL(ashiftrt_r4_26) 78 .global GLOBAL(ashiftrt_r4_27) 79 .global GLOBAL(ashiftrt_r4_28) 80 .global GLOBAL(ashiftrt_r4_29) 81 .global GLOBAL(ashiftrt_r4_30) 82 .global GLOBAL(ashiftrt_r4_31) 83 .global GLOBAL(ashiftrt_r4_32) 84 85 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_0)) 86 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_1)) 87 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_2)) 88 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_3)) 89 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_4)) 90 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_5)) 91 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_6)) 92 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_7)) 93 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_8)) 94 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_9)) 95 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_10)) 96 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_11)) 97 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_12)) 98 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_13)) 99 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_14)) 100 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_15)) 101 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_16)) 102 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_17)) 103 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_18)) 104 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_19)) 105 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_20)) 106 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_21)) 107 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_22)) 108 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_23)) 109 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_24)) 110 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_25)) 111 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_26)) 112 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_27)) 113 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_28)) 114 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_29)) 115 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_30)) 116 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_31)) 117 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_32)) 118 119 .align 1 120GLOBAL(ashiftrt_r4_32): 121GLOBAL(ashiftrt_r4_31): 122 rotcl r4 123 rts 124 subc r4,r4 125 126GLOBAL(ashiftrt_r4_30): 127 shar r4 128GLOBAL(ashiftrt_r4_29): 129 shar r4 130GLOBAL(ashiftrt_r4_28): 131 shar r4 132GLOBAL(ashiftrt_r4_27): 133 shar r4 134GLOBAL(ashiftrt_r4_26): 135 shar r4 136GLOBAL(ashiftrt_r4_25): 137 shar r4 138GLOBAL(ashiftrt_r4_24): 139 shlr16 r4 140 shlr8 r4 141 rts 142 exts.b r4,r4 143 144GLOBAL(ashiftrt_r4_23): 145 shar r4 146GLOBAL(ashiftrt_r4_22): 147 shar r4 148GLOBAL(ashiftrt_r4_21): 149 shar r4 150GLOBAL(ashiftrt_r4_20): 151 shar r4 152GLOBAL(ashiftrt_r4_19): 153 shar r4 154GLOBAL(ashiftrt_r4_18): 155 shar r4 156GLOBAL(ashiftrt_r4_17): 157 shar r4 158GLOBAL(ashiftrt_r4_16): 159 shlr16 r4 160 rts 161 exts.w r4,r4 162 163GLOBAL(ashiftrt_r4_15): 164 shar r4 165GLOBAL(ashiftrt_r4_14): 166 shar r4 167GLOBAL(ashiftrt_r4_13): 168 shar r4 169GLOBAL(ashiftrt_r4_12): 170 shar r4 171GLOBAL(ashiftrt_r4_11): 172 shar r4 173GLOBAL(ashiftrt_r4_10): 174 shar r4 175GLOBAL(ashiftrt_r4_9): 176 shar r4 177GLOBAL(ashiftrt_r4_8): 178 shar r4 179GLOBAL(ashiftrt_r4_7): 180 shar r4 181GLOBAL(ashiftrt_r4_6): 182 shar r4 183GLOBAL(ashiftrt_r4_5): 184 shar r4 185GLOBAL(ashiftrt_r4_4): 186 shar r4 187GLOBAL(ashiftrt_r4_3): 188 shar r4 189GLOBAL(ashiftrt_r4_2): 190 shar r4 191GLOBAL(ashiftrt_r4_1): 192 rts 193 shar r4 194 195GLOBAL(ashiftrt_r4_0): 196 rts 197 nop 198 199 ENDFUNC(GLOBAL(ashiftrt_r4_0)) 200 ENDFUNC(GLOBAL(ashiftrt_r4_1)) 201 ENDFUNC(GLOBAL(ashiftrt_r4_2)) 202 ENDFUNC(GLOBAL(ashiftrt_r4_3)) 203 ENDFUNC(GLOBAL(ashiftrt_r4_4)) 204 ENDFUNC(GLOBAL(ashiftrt_r4_5)) 205 ENDFUNC(GLOBAL(ashiftrt_r4_6)) 206 ENDFUNC(GLOBAL(ashiftrt_r4_7)) 207 ENDFUNC(GLOBAL(ashiftrt_r4_8)) 208 ENDFUNC(GLOBAL(ashiftrt_r4_9)) 209 ENDFUNC(GLOBAL(ashiftrt_r4_10)) 210 ENDFUNC(GLOBAL(ashiftrt_r4_11)) 211 ENDFUNC(GLOBAL(ashiftrt_r4_12)) 212 ENDFUNC(GLOBAL(ashiftrt_r4_13)) 213 ENDFUNC(GLOBAL(ashiftrt_r4_14)) 214 ENDFUNC(GLOBAL(ashiftrt_r4_15)) 215 ENDFUNC(GLOBAL(ashiftrt_r4_16)) 216 ENDFUNC(GLOBAL(ashiftrt_r4_17)) 217 ENDFUNC(GLOBAL(ashiftrt_r4_18)) 218 ENDFUNC(GLOBAL(ashiftrt_r4_19)) 219 ENDFUNC(GLOBAL(ashiftrt_r4_20)) 220 ENDFUNC(GLOBAL(ashiftrt_r4_21)) 221 ENDFUNC(GLOBAL(ashiftrt_r4_22)) 222 ENDFUNC(GLOBAL(ashiftrt_r4_23)) 223 ENDFUNC(GLOBAL(ashiftrt_r4_24)) 224 ENDFUNC(GLOBAL(ashiftrt_r4_25)) 225 ENDFUNC(GLOBAL(ashiftrt_r4_26)) 226 ENDFUNC(GLOBAL(ashiftrt_r4_27)) 227 ENDFUNC(GLOBAL(ashiftrt_r4_28)) 228 ENDFUNC(GLOBAL(ashiftrt_r4_29)) 229 ENDFUNC(GLOBAL(ashiftrt_r4_30)) 230 ENDFUNC(GLOBAL(ashiftrt_r4_31)) 231 ENDFUNC(GLOBAL(ashiftrt_r4_32)) 232#endif 233 234#ifdef L_ashiftrt_n 235 236! 237! GLOBAL(ashrsi3) 238! 239! Entry: 240! 241! r4: Value to shift 242! r5: Shift count 243! 244! Exit: 245! 246! r0: Result 247! 248! Destroys: 249! 250! T bit, r5 251! 252 253 .global GLOBAL(ashrsi3) 254 HIDDEN_FUNC(GLOBAL(ashrsi3)) 255 .align 2 256GLOBAL(ashrsi3): 257 mov #31,r0 258 and r0,r5 259 mova LOCAL(ashrsi3_table),r0 260 mov.b @(r0,r5),r5 261#ifdef __sh1__ 262 add r5,r0 263 jmp @r0 264#else 265 braf r5 266#endif 267 mov r4,r0 268 269 .align 2 270LOCAL(ashrsi3_table): 271 .byte LOCAL(ashrsi3_0)-LOCAL(ashrsi3_table) 272 .byte LOCAL(ashrsi3_1)-LOCAL(ashrsi3_table) 273 .byte LOCAL(ashrsi3_2)-LOCAL(ashrsi3_table) 274 .byte LOCAL(ashrsi3_3)-LOCAL(ashrsi3_table) 275 .byte LOCAL(ashrsi3_4)-LOCAL(ashrsi3_table) 276 .byte LOCAL(ashrsi3_5)-LOCAL(ashrsi3_table) 277 .byte LOCAL(ashrsi3_6)-LOCAL(ashrsi3_table) 278 .byte LOCAL(ashrsi3_7)-LOCAL(ashrsi3_table) 279 .byte LOCAL(ashrsi3_8)-LOCAL(ashrsi3_table) 280 .byte LOCAL(ashrsi3_9)-LOCAL(ashrsi3_table) 281 .byte LOCAL(ashrsi3_10)-LOCAL(ashrsi3_table) 282 .byte LOCAL(ashrsi3_11)-LOCAL(ashrsi3_table) 283 .byte LOCAL(ashrsi3_12)-LOCAL(ashrsi3_table) 284 .byte LOCAL(ashrsi3_13)-LOCAL(ashrsi3_table) 285 .byte LOCAL(ashrsi3_14)-LOCAL(ashrsi3_table) 286 .byte LOCAL(ashrsi3_15)-LOCAL(ashrsi3_table) 287 .byte LOCAL(ashrsi3_16)-LOCAL(ashrsi3_table) 288 .byte LOCAL(ashrsi3_17)-LOCAL(ashrsi3_table) 289 .byte LOCAL(ashrsi3_18)-LOCAL(ashrsi3_table) 290 .byte LOCAL(ashrsi3_19)-LOCAL(ashrsi3_table) 291 .byte LOCAL(ashrsi3_20)-LOCAL(ashrsi3_table) 292 .byte LOCAL(ashrsi3_21)-LOCAL(ashrsi3_table) 293 .byte LOCAL(ashrsi3_22)-LOCAL(ashrsi3_table) 294 .byte LOCAL(ashrsi3_23)-LOCAL(ashrsi3_table) 295 .byte LOCAL(ashrsi3_24)-LOCAL(ashrsi3_table) 296 .byte LOCAL(ashrsi3_25)-LOCAL(ashrsi3_table) 297 .byte LOCAL(ashrsi3_26)-LOCAL(ashrsi3_table) 298 .byte LOCAL(ashrsi3_27)-LOCAL(ashrsi3_table) 299 .byte LOCAL(ashrsi3_28)-LOCAL(ashrsi3_table) 300 .byte LOCAL(ashrsi3_29)-LOCAL(ashrsi3_table) 301 .byte LOCAL(ashrsi3_30)-LOCAL(ashrsi3_table) 302 .byte LOCAL(ashrsi3_31)-LOCAL(ashrsi3_table) 303 304LOCAL(ashrsi3_31): 305 rotcl r0 306 rts 307 subc r0,r0 308 309LOCAL(ashrsi3_30): 310 shar r0 311LOCAL(ashrsi3_29): 312 shar r0 313LOCAL(ashrsi3_28): 314 shar r0 315LOCAL(ashrsi3_27): 316 shar r0 317LOCAL(ashrsi3_26): 318 shar r0 319LOCAL(ashrsi3_25): 320 shar r0 321LOCAL(ashrsi3_24): 322 shlr16 r0 323 shlr8 r0 324 rts 325 exts.b r0,r0 326 327LOCAL(ashrsi3_23): 328 shar r0 329LOCAL(ashrsi3_22): 330 shar r0 331LOCAL(ashrsi3_21): 332 shar r0 333LOCAL(ashrsi3_20): 334 shar r0 335LOCAL(ashrsi3_19): 336 shar r0 337LOCAL(ashrsi3_18): 338 shar r0 339LOCAL(ashrsi3_17): 340 shar r0 341LOCAL(ashrsi3_16): 342 shlr16 r0 343 rts 344 exts.w r0,r0 345 346LOCAL(ashrsi3_15): 347 shar r0 348LOCAL(ashrsi3_14): 349 shar r0 350LOCAL(ashrsi3_13): 351 shar r0 352LOCAL(ashrsi3_12): 353 shar r0 354LOCAL(ashrsi3_11): 355 shar r0 356LOCAL(ashrsi3_10): 357 shar r0 358LOCAL(ashrsi3_9): 359 shar r0 360LOCAL(ashrsi3_8): 361 shar r0 362LOCAL(ashrsi3_7): 363 shar r0 364LOCAL(ashrsi3_6): 365 shar r0 366LOCAL(ashrsi3_5): 367 shar r0 368LOCAL(ashrsi3_4): 369 shar r0 370LOCAL(ashrsi3_3): 371 shar r0 372LOCAL(ashrsi3_2): 373 shar r0 374LOCAL(ashrsi3_1): 375 rts 376 shar r0 377 378LOCAL(ashrsi3_0): 379 rts 380 nop 381 382 ENDFUNC(GLOBAL(ashrsi3)) 383#endif 384 385#ifdef L_ashiftlt 386 387! 388! GLOBAL(ashlsi3) 389! (For compatibility with older binaries, not used by compiler) 390! 391! Entry: 392! r4: Value to shift 393! r5: Shift count 394! 395! Exit: 396! r0: Result 397! 398! Destroys: 399! T bit 400! 401! 402! GLOBAL(ashlsi3_r0) 403! 404! Entry: 405! r4: Value to shift 406! r0: Shift count 407! 408! Exit: 409! r0: Result 410! 411! Destroys: 412! T bit 413 414 .global GLOBAL(ashlsi3) 415 .global GLOBAL(ashlsi3_r0) 416 HIDDEN_FUNC(GLOBAL(ashlsi3)) 417 HIDDEN_FUNC(GLOBAL(ashlsi3_r0)) 418GLOBAL(ashlsi3): 419 mov r5,r0 420 .align 2 421GLOBAL(ashlsi3_r0): 422 423#ifdef __sh1__ 424 and #31,r0 425 shll2 r0 426 mov.l r4,@-r15 427 mov r0,r4 428 mova LOCAL(ashlsi3_table),r0 429 add r4,r0 430 mov.l @r15+,r4 431 jmp @r0 432 mov r4,r0 433 .align 2 434#else 435 and #31,r0 436 shll2 r0 437 braf r0 438 mov r4,r0 439#endif 440 441LOCAL(ashlsi3_table): 442 rts // << 0 443 nop 444LOCAL(ashlsi_1): 445 rts // << 1 446 shll r0 447LOCAL(ashlsi_2): // << 2 448 rts 449 shll2 r0 450 bra LOCAL(ashlsi_1) // << 3 451 shll2 r0 452 bra LOCAL(ashlsi_2) // << 4 453 shll2 r0 454 bra LOCAL(ashlsi_5) // << 5 455 shll r0 456 bra LOCAL(ashlsi_6) // << 6 457 shll2 r0 458 bra LOCAL(ashlsi_7) // << 7 459 shll r0 460LOCAL(ashlsi_8): // << 8 461 rts 462 shll8 r0 463 bra LOCAL(ashlsi_8) // << 9 464 shll r0 465 bra LOCAL(ashlsi_8) // << 10 466 shll2 r0 467 bra LOCAL(ashlsi_11) // << 11 468 shll r0 469 bra LOCAL(ashlsi_12) // << 12 470 shll2 r0 471 bra LOCAL(ashlsi_13) // << 13 472 shll r0 473 bra LOCAL(ashlsi_14) // << 14 474 shll8 r0 475 bra LOCAL(ashlsi_15) // << 15 476 shll8 r0 477LOCAL(ashlsi_16): // << 16 478 rts 479 shll16 r0 480 bra LOCAL(ashlsi_16) // << 17 481 shll r0 482 bra LOCAL(ashlsi_16) // << 18 483 shll2 r0 484 bra LOCAL(ashlsi_19) // << 19 485 shll r0 486 bra LOCAL(ashlsi_20) // << 20 487 shll2 r0 488 bra LOCAL(ashlsi_21) // << 21 489 shll r0 490 bra LOCAL(ashlsi_22) // << 22 491 shll16 r0 492 bra LOCAL(ashlsi_23) // << 23 493 shll16 r0 494 bra LOCAL(ashlsi_16) // << 24 495 shll8 r0 496 bra LOCAL(ashlsi_25) // << 25 497 shll r0 498 bra LOCAL(ashlsi_26) // << 26 499 shll2 r0 500 bra LOCAL(ashlsi_27) // << 27 501 shll r0 502 bra LOCAL(ashlsi_28) // << 28 503 shll2 r0 504 bra LOCAL(ashlsi_29) // << 29 505 shll16 r0 506 bra LOCAL(ashlsi_30) // << 30 507 shll16 r0 508 and #1,r0 // << 31 509 rts 510 rotr r0 511 512LOCAL(ashlsi_7): 513 shll2 r0 514LOCAL(ashlsi_5): 515LOCAL(ashlsi_6): 516 shll2 r0 517 rts 518LOCAL(ashlsi_13): 519 shll2 r0 520LOCAL(ashlsi_12): 521LOCAL(ashlsi_11): 522 shll8 r0 523 rts 524LOCAL(ashlsi_21): 525 shll2 r0 526LOCAL(ashlsi_20): 527LOCAL(ashlsi_19): 528 shll16 r0 529 rts 530LOCAL(ashlsi_28): 531LOCAL(ashlsi_27): 532 shll2 r0 533LOCAL(ashlsi_26): 534LOCAL(ashlsi_25): 535 shll16 r0 536 rts 537 shll8 r0 538 539LOCAL(ashlsi_22): 540LOCAL(ashlsi_14): 541 shlr2 r0 542 rts 543 shll8 r0 544 545LOCAL(ashlsi_23): 546LOCAL(ashlsi_15): 547 shlr r0 548 rts 549 shll8 r0 550 551LOCAL(ashlsi_29): 552 shlr r0 553LOCAL(ashlsi_30): 554 shlr2 r0 555 rts 556 shll16 r0 557 558 ENDFUNC(GLOBAL(ashlsi3)) 559 ENDFUNC(GLOBAL(ashlsi3_r0)) 560#endif 561 562#ifdef L_lshiftrt 563 564! 565! GLOBAL(lshrsi3) 566! (For compatibility with older binaries, not used by compiler) 567! 568! Entry: 569! r4: Value to shift 570! r5: Shift count 571! 572! Exit: 573! r0: Result 574! 575! Destroys: 576! T bit 577! 578! 579! GLOBAL(lshrsi3_r0) 580! 581! Entry: 582! r4: Value to shift 583! r0: Shift count 584! 585! Exit: 586! r0: Result 587! 588! Destroys: 589! T bit 590 591 .global GLOBAL(lshrsi3) 592 .global GLOBAL(lshrsi3_r0) 593 HIDDEN_FUNC(GLOBAL(lshrsi3)) 594 HIDDEN_FUNC(GLOBAL(lshrsi3_r0)) 595GLOBAL(lshrsi3): 596 mov r5,r0 597 .align 2 598GLOBAL(lshrsi3_r0): 599 600#ifdef __sh1__ 601 and #31,r0 602 shll2 r0 603 mov.l r4,@-r15 604 mov r0,r4 605 mova LOCAL(lshrsi3_table),r0 606 add r4,r0 607 mov.l @r15+,r4 608 jmp @r0 609 mov r4,r0 610 .align 2 611#else 612 and #31,r0 613 shll2 r0 614 braf r0 615 mov r4,r0 616#endif 617LOCAL(lshrsi3_table): 618 rts // >> 0 619 nop 620LOCAL(lshrsi_1): // >> 1 621 rts 622 shlr r0 623LOCAL(lshrsi_2): // >> 2 624 rts 625 shlr2 r0 626 bra LOCAL(lshrsi_1) // >> 3 627 shlr2 r0 628 bra LOCAL(lshrsi_2) // >> 4 629 shlr2 r0 630 bra LOCAL(lshrsi_5) // >> 5 631 shlr r0 632 bra LOCAL(lshrsi_6) // >> 6 633 shlr2 r0 634 bra LOCAL(lshrsi_7) // >> 7 635 shlr r0 636LOCAL(lshrsi_8): // >> 8 637 rts 638 shlr8 r0 639 bra LOCAL(lshrsi_8) // >> 9 640 shlr r0 641 bra LOCAL(lshrsi_8) // >> 10 642 shlr2 r0 643 bra LOCAL(lshrsi_11) // >> 11 644 shlr r0 645 bra LOCAL(lshrsi_12) // >> 12 646 shlr2 r0 647 bra LOCAL(lshrsi_13) // >> 13 648 shlr r0 649 bra LOCAL(lshrsi_14) // >> 14 650 shlr8 r0 651 bra LOCAL(lshrsi_15) // >> 15 652 shlr8 r0 653LOCAL(lshrsi_16): // >> 16 654 rts 655 shlr16 r0 656 bra LOCAL(lshrsi_16) // >> 17 657 shlr r0 658 bra LOCAL(lshrsi_16) // >> 18 659 shlr2 r0 660 bra LOCAL(lshrsi_19) // >> 19 661 shlr r0 662 bra LOCAL(lshrsi_20) // >> 20 663 shlr2 r0 664 bra LOCAL(lshrsi_21) // >> 21 665 shlr r0 666 bra LOCAL(lshrsi_22) // >> 22 667 shlr16 r0 668 bra LOCAL(lshrsi_23) // >> 23 669 shlr16 r0 670 bra LOCAL(lshrsi_16) // >> 24 671 shlr8 r0 672 bra LOCAL(lshrsi_25) // >> 25 673 shlr r0 674 bra LOCAL(lshrsi_26) // >> 26 675 shlr2 r0 676 bra LOCAL(lshrsi_27) // >> 27 677 shlr r0 678 bra LOCAL(lshrsi_28) // >> 28 679 shlr2 r0 680 bra LOCAL(lshrsi_29) // >> 29 681 shlr16 r0 682 bra LOCAL(lshrsi_30) // >> 30 683 shlr16 r0 684 shll r0 // >> 31 685 rts 686 movt r0 687 688LOCAL(lshrsi_7): 689 shlr2 r0 690LOCAL(lshrsi_5): 691LOCAL(lshrsi_6): 692 shlr2 r0 693 rts 694LOCAL(lshrsi_13): 695 shlr2 r0 696LOCAL(lshrsi_12): 697LOCAL(lshrsi_11): 698 shlr8 r0 699 rts 700LOCAL(lshrsi_21): 701 shlr2 r0 702LOCAL(lshrsi_20): 703LOCAL(lshrsi_19): 704 shlr16 r0 705 rts 706LOCAL(lshrsi_28): 707LOCAL(lshrsi_27): 708 shlr2 r0 709LOCAL(lshrsi_26): 710LOCAL(lshrsi_25): 711 shlr16 r0 712 rts 713 shlr8 r0 714 715LOCAL(lshrsi_22): 716LOCAL(lshrsi_14): 717 shll2 r0 718 rts 719 shlr8 r0 720 721LOCAL(lshrsi_23): 722LOCAL(lshrsi_15): 723 shll r0 724 rts 725 shlr8 r0 726 727LOCAL(lshrsi_29): 728 shll r0 729LOCAL(lshrsi_30): 730 shll2 r0 731 rts 732 shlr16 r0 733 734 ENDFUNC(GLOBAL(lshrsi3)) 735 ENDFUNC(GLOBAL(lshrsi3_r0)) 736#endif 737 738#ifdef L_movmem 739 .text 740 .balign 4 741 .global GLOBAL(movmem) 742 HIDDEN_FUNC(GLOBAL(movmem)) 743 HIDDEN_ALIAS(movstr,movmem) 744 /* This would be a lot simpler if r6 contained the byte count 745 minus 64, and we wouldn't be called here for a byte count of 64. */ 746GLOBAL(movmem): 747 sts.l pr,@-r15 748 shll2 r6 749 bsr GLOBAL(movmemSI52+2) 750 mov.l @(48,r5),r0 751 .balign 4 752LOCAL(movmem_loop): /* Reached with rts */ 753 mov.l @(60,r5),r0 754 add #-64,r6 755 mov.l r0,@(60,r4) 756 tst r6,r6 757 mov.l @(56,r5),r0 758 bt LOCAL(movmem_done) 759 mov.l r0,@(56,r4) 760 cmp/pl r6 761 mov.l @(52,r5),r0 762 add #64,r5 763 mov.l r0,@(52,r4) 764 add #64,r4 765 bt GLOBAL(movmemSI52) 766! done all the large groups, do the remainder 767! jump to movmem+ 768 mova GLOBAL(movmemSI4)+4,r0 769 add r6,r0 770 jmp @r0 771LOCAL(movmem_done): ! share slot insn, works out aligned. 772 lds.l @r15+,pr 773 mov.l r0,@(56,r4) 774 mov.l @(52,r5),r0 775 rts 776 mov.l r0,@(52,r4) 777 .balign 4 778! ??? We need aliases movstr* for movmem* for the older libraries. These 779! aliases will be removed at the some point in the future. 780 .global GLOBAL(movmemSI64) 781 HIDDEN_FUNC(GLOBAL(movmemSI64)) 782 HIDDEN_ALIAS(movstrSI64,movmemSI64) 783GLOBAL(movmemSI64): 784 mov.l @(60,r5),r0 785 mov.l r0,@(60,r4) 786 .global GLOBAL(movmemSI60) 787 HIDDEN_FUNC(GLOBAL(movmemSI60)) 788 HIDDEN_ALIAS(movstrSI60,movmemSI60) 789GLOBAL(movmemSI60): 790 mov.l @(56,r5),r0 791 mov.l r0,@(56,r4) 792 .global GLOBAL(movmemSI56) 793 HIDDEN_FUNC(GLOBAL(movmemSI56)) 794 HIDDEN_ALIAS(movstrSI56,movmemSI56) 795GLOBAL(movmemSI56): 796 mov.l @(52,r5),r0 797 mov.l r0,@(52,r4) 798 .global GLOBAL(movmemSI52) 799 HIDDEN_FUNC(GLOBAL(movmemSI52)) 800 HIDDEN_ALIAS(movstrSI52,movmemSI52) 801GLOBAL(movmemSI52): 802 mov.l @(48,r5),r0 803 mov.l r0,@(48,r4) 804 .global GLOBAL(movmemSI48) 805 HIDDEN_FUNC(GLOBAL(movmemSI48)) 806 HIDDEN_ALIAS(movstrSI48,movmemSI48) 807GLOBAL(movmemSI48): 808 mov.l @(44,r5),r0 809 mov.l r0,@(44,r4) 810 .global GLOBAL(movmemSI44) 811 HIDDEN_FUNC(GLOBAL(movmemSI44)) 812 HIDDEN_ALIAS(movstrSI44,movmemSI44) 813GLOBAL(movmemSI44): 814 mov.l @(40,r5),r0 815 mov.l r0,@(40,r4) 816 .global GLOBAL(movmemSI40) 817 HIDDEN_FUNC(GLOBAL(movmemSI40)) 818 HIDDEN_ALIAS(movstrSI40,movmemSI40) 819GLOBAL(movmemSI40): 820 mov.l @(36,r5),r0 821 mov.l r0,@(36,r4) 822 .global GLOBAL(movmemSI36) 823 HIDDEN_FUNC(GLOBAL(movmemSI36)) 824 HIDDEN_ALIAS(movstrSI36,movmemSI36) 825GLOBAL(movmemSI36): 826 mov.l @(32,r5),r0 827 mov.l r0,@(32,r4) 828 .global GLOBAL(movmemSI32) 829 HIDDEN_FUNC(GLOBAL(movmemSI32)) 830 HIDDEN_ALIAS(movstrSI32,movmemSI32) 831GLOBAL(movmemSI32): 832 mov.l @(28,r5),r0 833 mov.l r0,@(28,r4) 834 .global GLOBAL(movmemSI28) 835 HIDDEN_FUNC(GLOBAL(movmemSI28)) 836 HIDDEN_ALIAS(movstrSI28,movmemSI28) 837GLOBAL(movmemSI28): 838 mov.l @(24,r5),r0 839 mov.l r0,@(24,r4) 840 .global GLOBAL(movmemSI24) 841 HIDDEN_FUNC(GLOBAL(movmemSI24)) 842 HIDDEN_ALIAS(movstrSI24,movmemSI24) 843GLOBAL(movmemSI24): 844 mov.l @(20,r5),r0 845 mov.l r0,@(20,r4) 846 .global GLOBAL(movmemSI20) 847 HIDDEN_FUNC(GLOBAL(movmemSI20)) 848 HIDDEN_ALIAS(movstrSI20,movmemSI20) 849GLOBAL(movmemSI20): 850 mov.l @(16,r5),r0 851 mov.l r0,@(16,r4) 852 .global GLOBAL(movmemSI16) 853 HIDDEN_FUNC(GLOBAL(movmemSI16)) 854 HIDDEN_ALIAS(movstrSI16,movmemSI16) 855GLOBAL(movmemSI16): 856 mov.l @(12,r5),r0 857 mov.l r0,@(12,r4) 858 .global GLOBAL(movmemSI12) 859 HIDDEN_FUNC(GLOBAL(movmemSI12)) 860 HIDDEN_ALIAS(movstrSI12,movmemSI12) 861GLOBAL(movmemSI12): 862 mov.l @(8,r5),r0 863 mov.l r0,@(8,r4) 864 .global GLOBAL(movmemSI8) 865 HIDDEN_FUNC(GLOBAL(movmemSI8)) 866 HIDDEN_ALIAS(movstrSI8,movmemSI8) 867GLOBAL(movmemSI8): 868 mov.l @(4,r5),r0 869 mov.l r0,@(4,r4) 870 .global GLOBAL(movmemSI4) 871 HIDDEN_FUNC(GLOBAL(movmemSI4)) 872 HIDDEN_ALIAS(movstrSI4,movmemSI4) 873GLOBAL(movmemSI4): 874 mov.l @(0,r5),r0 875 rts 876 mov.l r0,@(0,r4) 877 878 ENDFUNC(GLOBAL(movmemSI64)) 879 ENDFUNC(GLOBAL(movmemSI60)) 880 ENDFUNC(GLOBAL(movmemSI56)) 881 ENDFUNC(GLOBAL(movmemSI52)) 882 ENDFUNC(GLOBAL(movmemSI48)) 883 ENDFUNC(GLOBAL(movmemSI44)) 884 ENDFUNC(GLOBAL(movmemSI40)) 885 ENDFUNC(GLOBAL(movmemSI36)) 886 ENDFUNC(GLOBAL(movmemSI32)) 887 ENDFUNC(GLOBAL(movmemSI28)) 888 ENDFUNC(GLOBAL(movmemSI24)) 889 ENDFUNC(GLOBAL(movmemSI20)) 890 ENDFUNC(GLOBAL(movmemSI16)) 891 ENDFUNC(GLOBAL(movmemSI12)) 892 ENDFUNC(GLOBAL(movmemSI8)) 893 ENDFUNC(GLOBAL(movmemSI4)) 894 ENDFUNC(GLOBAL(movmem)) 895#endif 896 897#ifdef L_movmem_i4 898 .text 899 .global GLOBAL(movmem_i4_even) 900 .global GLOBAL(movmem_i4_odd) 901 .global GLOBAL(movmemSI12_i4) 902 903 HIDDEN_FUNC(GLOBAL(movmem_i4_even)) 904 HIDDEN_FUNC(GLOBAL(movmem_i4_odd)) 905 HIDDEN_FUNC(GLOBAL(movmemSI12_i4)) 906 907 HIDDEN_ALIAS(movstr_i4_even,movmem_i4_even) 908 HIDDEN_ALIAS(movstr_i4_odd,movmem_i4_odd) 909 HIDDEN_ALIAS(movstrSI12_i4,movmemSI12_i4) 910 911 .p2align 5 912L_movmem_2mod4_end: 913 mov.l r0,@(16,r4) 914 rts 915 mov.l r1,@(20,r4) 916 917 .p2align 2 918 919GLOBAL(movmem_i4_even): 920 mov.l @r5+,r0 921 bra L_movmem_start_even 922 mov.l @r5+,r1 923 924GLOBAL(movmem_i4_odd): 925 mov.l @r5+,r1 926 add #-4,r4 927 mov.l @r5+,r2 928 mov.l @r5+,r3 929 mov.l r1,@(4,r4) 930 mov.l r2,@(8,r4) 931 932L_movmem_loop: 933 mov.l r3,@(12,r4) 934 dt r6 935 mov.l @r5+,r0 936 bt/s L_movmem_2mod4_end 937 mov.l @r5+,r1 938 add #16,r4 939L_movmem_start_even: 940 mov.l @r5+,r2 941 mov.l @r5+,r3 942 mov.l r0,@r4 943 dt r6 944 mov.l r1,@(4,r4) 945 bf/s L_movmem_loop 946 mov.l r2,@(8,r4) 947 rts 948 mov.l r3,@(12,r4) 949 950 ENDFUNC(GLOBAL(movmem_i4_even)) 951 ENDFUNC(GLOBAL(movmem_i4_odd)) 952 953 .p2align 4 954GLOBAL(movmemSI12_i4): 955 mov.l @r5,r0 956 mov.l @(4,r5),r1 957 mov.l @(8,r5),r2 958 mov.l r0,@r4 959 mov.l r1,@(4,r4) 960 rts 961 mov.l r2,@(8,r4) 962 963 ENDFUNC(GLOBAL(movmemSI12_i4)) 964#endif 965 966#ifdef L_mulsi3 967 968 969 .global GLOBAL(mulsi3) 970 HIDDEN_FUNC(GLOBAL(mulsi3)) 971 972! r4 = aabb 973! r5 = ccdd 974! r0 = aabb*ccdd via partial products 975! 976! if aa == 0 and cc = 0 977! r0 = bb*dd 978! 979! else 980! aa = bb*dd + (aa*dd*65536) + (cc*bb*65536) 981! 982 983GLOBAL(mulsi3): 984 mulu.w r4,r5 ! multiply the lsws macl=bb*dd 985 mov r5,r3 ! r3 = ccdd 986 swap.w r4,r2 ! r2 = bbaa 987 xtrct r2,r3 ! r3 = aacc 988 tst r3,r3 ! msws zero ? 989 bf hiset 990 rts ! yes - then we have the answer 991 sts macl,r0 992 993hiset: sts macl,r0 ! r0 = bb*dd 994 mulu.w r2,r5 ! brewing macl = aa*dd 995 sts macl,r1 996 mulu.w r3,r4 ! brewing macl = cc*bb 997 sts macl,r2 998 add r1,r2 999 shll16 r2 1000 rts 1001 add r2,r0 1002 1003 ENDFUNC(GLOBAL(mulsi3)) 1004#endif 1005#endif /* ! __SH5__ */ 1006#ifdef L_sdivsi3_i4 1007 .title "SH DIVIDE" 1008!! 4 byte integer Divide code for the Renesas SH 1009#if defined (__SH4__) || defined (__SH2A__) 1010!! args in r4 and r5, result in fpul, clobber dr0, dr2 1011 1012 .global GLOBAL(sdivsi3_i4) 1013 HIDDEN_FUNC(GLOBAL(sdivsi3_i4)) 1014GLOBAL(sdivsi3_i4): 1015 lds r4,fpul 1016 float fpul,dr0 1017 lds r5,fpul 1018 float fpul,dr2 1019 fdiv dr2,dr0 1020 rts 1021 ftrc dr0,fpul 1022 1023 ENDFUNC(GLOBAL(sdivsi3_i4)) 1024#elif defined (__SH2A_SINGLE__) || defined (__SH2A_SINGLE_ONLY__) || defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__) || (defined (__SH5__) && ! defined __SH4_NOFPU__) 1025!! args in r4 and r5, result in fpul, clobber r2, dr0, dr2 1026 1027#if ! __SH5__ || __SH5__ == 32 1028#if __SH5__ 1029 .mode SHcompact 1030#endif 1031 .global GLOBAL(sdivsi3_i4) 1032 HIDDEN_FUNC(GLOBAL(sdivsi3_i4)) 1033GLOBAL(sdivsi3_i4): 1034 sts.l fpscr,@-r15 1035 mov #8,r2 1036 swap.w r2,r2 1037 lds r2,fpscr 1038 lds r4,fpul 1039 float fpul,dr0 1040 lds r5,fpul 1041 float fpul,dr2 1042 fdiv dr2,dr0 1043 ftrc dr0,fpul 1044 rts 1045 lds.l @r15+,fpscr 1046 1047 ENDFUNC(GLOBAL(sdivsi3_i4)) 1048#endif /* ! __SH5__ || __SH5__ == 32 */ 1049#endif /* ! __SH4__ || __SH2A__ */ 1050#endif 1051 1052#ifdef L_sdivsi3 1053/* __SH4_SINGLE_ONLY__ keeps this part for link compatibility with 1054 sh2e/sh3e code. */ 1055!! 1056!! Steve Chamberlain 1057!! sac@cygnus.com 1058!! 1059!! 1060 1061!! args in r4 and r5, result in r0 clobber r1, r2, r3, and t bit 1062 1063 .global GLOBAL(sdivsi3) 1064#if __SHMEDIA__ 1065#if __SH5__ == 32 1066 .section .text..SHmedia32,"ax" 1067#else 1068 .text 1069#endif 1070 .align 2 1071#if 0 1072/* The assembly code that follows is a hand-optimized version of the C 1073 code that follows. Note that the registers that are modified are 1074 exactly those listed as clobbered in the patterns divsi3_i1 and 1075 divsi3_i1_media. 1076 1077int __sdivsi3 (i, j) 1078 int i, j; 1079{ 1080 register unsigned long long r18 asm ("r18"); 1081 register unsigned long long r19 asm ("r19"); 1082 register unsigned long long r0 asm ("r0") = 0; 1083 register unsigned long long r1 asm ("r1") = 1; 1084 register int r2 asm ("r2") = i >> 31; 1085 register int r3 asm ("r3") = j >> 31; 1086 1087 r2 = r2 ? r2 : r1; 1088 r3 = r3 ? r3 : r1; 1089 r18 = i * r2; 1090 r19 = j * r3; 1091 r2 *= r3; 1092 1093 r19 <<= 31; 1094 r1 <<= 31; 1095 do 1096 if (r18 >= r19) 1097 r0 |= r1, r18 -= r19; 1098 while (r19 >>= 1, r1 >>= 1); 1099 1100 return r2 * (int)r0; 1101} 1102*/ 1103GLOBAL(sdivsi3): 1104 pt/l LOCAL(sdivsi3_dontadd), tr2 1105 pt/l LOCAL(sdivsi3_loop), tr1 1106 ptabs/l r18, tr0 1107 movi 0, r0 1108 movi 1, r1 1109 shari.l r4, 31, r2 1110 shari.l r5, 31, r3 1111 cmveq r2, r1, r2 1112 cmveq r3, r1, r3 1113 muls.l r4, r2, r18 1114 muls.l r5, r3, r19 1115 muls.l r2, r3, r2 1116 shlli r19, 31, r19 1117 shlli r1, 31, r1 1118LOCAL(sdivsi3_loop): 1119 bgtu r19, r18, tr2 1120 or r0, r1, r0 1121 sub r18, r19, r18 1122LOCAL(sdivsi3_dontadd): 1123 shlri r1, 1, r1 1124 shlri r19, 1, r19 1125 bnei r1, 0, tr1 1126 muls.l r0, r2, r0 1127 add.l r0, r63, r0 1128 blink tr0, r63 1129#elif 0 /* ! 0 */ 1130 // inputs: r4,r5 1131 // clobbered: r1,r2,r3,r18,r19,r20,r21,r25,tr0 1132 // result in r0 1133GLOBAL(sdivsi3): 1134 // can create absolute value without extra latency, 1135 // but dependent on proper sign extension of inputs: 1136 // shari.l r5,31,r2 1137 // xor r5,r2,r20 1138 // sub r20,r2,r20 // r20 is now absolute value of r5, zero-extended. 1139 shari.l r5,31,r2 1140 ori r2,1,r2 1141 muls.l r5,r2,r20 // r20 is now absolute value of r5, zero-extended. 1142 movi 0xffffffffffffbb0c,r19 // shift count eqiv 76 1143 shari.l r4,31,r3 1144 nsb r20,r0 1145 shlld r20,r0,r25 1146 shlri r25,48,r25 1147 sub r19,r25,r1 1148 mmulfx.w r1,r1,r2 1149 mshflo.w r1,r63,r1 1150 // If r4 was to be used in-place instead of r21, could use this sequence 1151 // to compute absolute: 1152 // sub r63,r4,r19 // compute absolute value of r4 1153 // shlri r4,32,r3 // into lower 32 bit of r4, keeping 1154 // mcmv r19,r3,r4 // the sign in the upper 32 bits intact. 1155 ori r3,1,r3 1156 mmulfx.w r25,r2,r2 1157 sub r19,r0,r0 1158 muls.l r4,r3,r21 1159 msub.w r1,r2,r2 1160 addi r2,-2,r1 1161 mulu.l r21,r1,r19 1162 mmulfx.w r2,r2,r2 1163 shlli r1,15,r1 1164 shlrd r19,r0,r19 1165 mulu.l r19,r20,r3 1166 mmacnfx.wl r25,r2,r1 1167 ptabs r18,tr0 1168 sub r21,r3,r25 1169 1170 mulu.l r25,r1,r2 1171 addi r0,14,r0 1172 xor r4,r5,r18 1173 shlrd r2,r0,r2 1174 mulu.l r2,r20,r3 1175 add r19,r2,r19 1176 shari.l r18,31,r18 1177 sub r25,r3,r25 1178 1179 mulu.l r25,r1,r2 1180 sub r25,r20,r25 1181 add r19,r18,r19 1182 shlrd r2,r0,r2 1183 mulu.l r2,r20,r3 1184 addi r25,1,r25 1185 add r19,r2,r19 1186 1187 cmpgt r25,r3,r25 1188 add.l r19,r25,r0 1189 xor r0,r18,r0 1190 blink tr0,r63 1191#else /* ! 0 && ! 0 */ 1192 1193 // inputs: r4,r5 1194 // clobbered: r1,r18,r19,r20,r21,r25,tr0 1195 // result in r0 1196 HIDDEN_FUNC(GLOBAL(sdivsi3_2)) 1197#ifndef __pic__ 1198 FUNC(GLOBAL(sdivsi3)) 1199GLOBAL(sdivsi3): /* this is the shcompact entry point */ 1200 // The special SHmedia entry point sdivsi3_1 prevents accidental linking 1201 // with the SHcompact implementation, which clobbers tr1 / tr2. 1202 .global GLOBAL(sdivsi3_1) 1203GLOBAL(sdivsi3_1): 1204 .global GLOBAL(div_table_internal) 1205 movi (GLOBAL(div_table_internal) >> 16) & 65535, r20 1206 shori GLOBAL(div_table_internal) & 65535, r20 1207#endif 1208 .global GLOBAL(sdivsi3_2) 1209 // div_table in r20 1210 // clobbered: r1,r18,r19,r21,r25,tr0 1211GLOBAL(sdivsi3_2): 1212 nsb r5, r1 1213 shlld r5, r1, r25 // normalize; [-2 ..1, 1..2) in s2.62 1214 shari r25, 58, r21 // extract 5(6) bit index (s2.4 with hole -1..1) 1215 ldx.ub r20, r21, r19 // u0.8 1216 shari r25, 32, r25 // normalize to s2.30 1217 shlli r21, 1, r21 1218 muls.l r25, r19, r19 // s2.38 1219 ldx.w r20, r21, r21 // s2.14 1220 ptabs r18, tr0 1221 shari r19, 24, r19 // truncate to s2.14 1222 sub r21, r19, r19 // some 11 bit inverse in s1.14 1223 muls.l r19, r19, r21 // u0.28 1224 sub r63, r1, r1 1225 addi r1, 92, r1 1226 muls.l r25, r21, r18 // s2.58 1227 shlli r19, 45, r19 // multiply by two and convert to s2.58 1228 /* bubble */ 1229 sub r19, r18, r18 1230 shari r18, 28, r18 // some 22 bit inverse in s1.30 1231 muls.l r18, r25, r0 // s2.60 1232 muls.l r18, r4, r25 // s32.30 1233 /* bubble */ 1234 shari r0, 16, r19 // s-16.44 1235 muls.l r19, r18, r19 // s-16.74 1236 shari r25, 63, r0 1237 shari r4, 14, r18 // s19.-14 1238 shari r19, 30, r19 // s-16.44 1239 muls.l r19, r18, r19 // s15.30 1240 xor r21, r0, r21 // You could also use the constant 1 << 27. 1241 add r21, r25, r21 1242 sub r21, r19, r21 1243 shard r21, r1, r21 1244 sub r21, r0, r0 1245 blink tr0, r63 1246#ifndef __pic__ 1247 ENDFUNC(GLOBAL(sdivsi3)) 1248#endif 1249 ENDFUNC(GLOBAL(sdivsi3_2)) 1250#endif 1251#elif defined __SHMEDIA__ 1252/* m5compact-nofpu */ 1253 // clobbered: r18,r19,r20,r21,r25,tr0,tr1,tr2 1254 .mode SHmedia 1255 .section .text..SHmedia32,"ax" 1256 .align 2 1257 FUNC(GLOBAL(sdivsi3)) 1258GLOBAL(sdivsi3): 1259 pt/l LOCAL(sdivsi3_dontsub), tr0 1260 pt/l LOCAL(sdivsi3_loop), tr1 1261 ptabs/l r18,tr2 1262 shari.l r4,31,r18 1263 shari.l r5,31,r19 1264 xor r4,r18,r20 1265 xor r5,r19,r21 1266 sub.l r20,r18,r20 1267 sub.l r21,r19,r21 1268 xor r18,r19,r19 1269 shlli r21,32,r25 1270 addi r25,-1,r21 1271 addz.l r20,r63,r20 1272LOCAL(sdivsi3_loop): 1273 shlli r20,1,r20 1274 bgeu/u r21,r20,tr0 1275 sub r20,r21,r20 1276LOCAL(sdivsi3_dontsub): 1277 addi.l r25,-1,r25 1278 bnei r25,-32,tr1 1279 xor r20,r19,r20 1280 sub.l r20,r19,r0 1281 blink tr2,r63 1282 ENDFUNC(GLOBAL(sdivsi3)) 1283#else /* ! __SHMEDIA__ */ 1284 FUNC(GLOBAL(sdivsi3)) 1285GLOBAL(sdivsi3): 1286 mov r4,r1 1287 mov r5,r0 1288 1289 tst r0,r0 1290 bt div0 1291 mov #0,r2 1292 div0s r2,r1 1293 subc r3,r3 1294 subc r2,r1 1295 div0s r0,r3 1296 rotcl r1 1297 div1 r0,r3 1298 rotcl r1 1299 div1 r0,r3 1300 rotcl r1 1301 div1 r0,r3 1302 rotcl r1 1303 div1 r0,r3 1304 rotcl r1 1305 div1 r0,r3 1306 rotcl r1 1307 div1 r0,r3 1308 rotcl r1 1309 div1 r0,r3 1310 rotcl r1 1311 div1 r0,r3 1312 rotcl r1 1313 div1 r0,r3 1314 rotcl r1 1315 div1 r0,r3 1316 rotcl r1 1317 div1 r0,r3 1318 rotcl r1 1319 div1 r0,r3 1320 rotcl r1 1321 div1 r0,r3 1322 rotcl r1 1323 div1 r0,r3 1324 rotcl r1 1325 div1 r0,r3 1326 rotcl r1 1327 div1 r0,r3 1328 rotcl r1 1329 div1 r0,r3 1330 rotcl r1 1331 div1 r0,r3 1332 rotcl r1 1333 div1 r0,r3 1334 rotcl r1 1335 div1 r0,r3 1336 rotcl r1 1337 div1 r0,r3 1338 rotcl r1 1339 div1 r0,r3 1340 rotcl r1 1341 div1 r0,r3 1342 rotcl r1 1343 div1 r0,r3 1344 rotcl r1 1345 div1 r0,r3 1346 rotcl r1 1347 div1 r0,r3 1348 rotcl r1 1349 div1 r0,r3 1350 rotcl r1 1351 div1 r0,r3 1352 rotcl r1 1353 div1 r0,r3 1354 rotcl r1 1355 div1 r0,r3 1356 rotcl r1 1357 div1 r0,r3 1358 rotcl r1 1359 div1 r0,r3 1360 rotcl r1 1361 addc r2,r1 1362 rts 1363 mov r1,r0 1364 1365 1366div0: rts 1367 mov #0,r0 1368 1369 ENDFUNC(GLOBAL(sdivsi3)) 1370#endif /* ! __SHMEDIA__ */ 1371#endif 1372#ifdef L_udivsi3_i4 1373 1374 .title "SH DIVIDE" 1375!! 4 byte integer Divide code for the Renesas SH 1376#if defined (__SH4__) || defined (__SH2A__) 1377!! args in r4 and r5, result in fpul, clobber r0, r1, r4, r5, dr0, dr2, dr4, 1378!! and t bit 1379 1380 .global GLOBAL(udivsi3_i4) 1381 HIDDEN_FUNC(GLOBAL(udivsi3_i4)) 1382GLOBAL(udivsi3_i4): 1383 mov #1,r1 1384 cmp/hi r1,r5 1385 bf trivial 1386 rotr r1 1387 xor r1,r4 1388 lds r4,fpul 1389 mova L1,r0 1390#ifdef FMOVD_WORKS 1391 fmov.d @r0+,dr4 1392#else 1393 fmov.s @r0+,DR40 1394 fmov.s @r0,DR41 1395#endif 1396 float fpul,dr0 1397 xor r1,r5 1398 lds r5,fpul 1399 float fpul,dr2 1400 fadd dr4,dr0 1401 fadd dr4,dr2 1402 fdiv dr2,dr0 1403 rts 1404 ftrc dr0,fpul 1405 1406trivial: 1407 rts 1408 lds r4,fpul 1409 1410 .align 2 1411#ifdef FMOVD_WORKS 1412 .align 3 ! make double below 8 byte aligned. 1413#endif 1414L1: 1415 .double 2147483648 1416 1417 ENDFUNC(GLOBAL(udivsi3_i4)) 1418#elif defined (__SH5__) && ! defined (__SH4_NOFPU__) && ! defined (__SH2A_NOFPU__) 1419#if ! __SH5__ || __SH5__ == 32 1420!! args in r4 and r5, result in fpul, clobber r20, r21, dr0, fr33 1421 .mode SHmedia 1422 .global GLOBAL(udivsi3_i4) 1423 HIDDEN_FUNC(GLOBAL(udivsi3_i4)) 1424GLOBAL(udivsi3_i4): 1425 addz.l r4,r63,r20 1426 addz.l r5,r63,r21 1427 fmov.qd r20,dr0 1428 fmov.qd r21,dr32 1429 ptabs r18,tr0 1430 float.qd dr0,dr0 1431 float.qd dr32,dr32 1432 fdiv.d dr0,dr32,dr0 1433 ftrc.dq dr0,dr32 1434 fmov.s fr33,fr32 1435 blink tr0,r63 1436 1437 ENDFUNC(GLOBAL(udivsi3_i4)) 1438#endif /* ! __SH5__ || __SH5__ == 32 */ 1439#elif defined (__SH2A_SINGLE__) || defined (__SH2A_SINGLE_ONLY__) || defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__) 1440!! args in r4 and r5, result in fpul, clobber r0, r1, r4, r5, dr0, dr2, dr4 1441 1442 .global GLOBAL(udivsi3_i4) 1443 HIDDEN_FUNC(GLOBAL(udivsi3_i4)) 1444GLOBAL(udivsi3_i4): 1445 mov #1,r1 1446 cmp/hi r1,r5 1447 bf trivial 1448 sts.l fpscr,@-r15 1449 mova L1,r0 1450 lds.l @r0+,fpscr 1451 rotr r1 1452 xor r1,r4 1453 lds r4,fpul 1454#ifdef FMOVD_WORKS 1455 fmov.d @r0+,dr4 1456#else 1457 fmov.s @r0+,DR40 1458 fmov.s @r0,DR41 1459#endif 1460 float fpul,dr0 1461 xor r1,r5 1462 lds r5,fpul 1463 float fpul,dr2 1464 fadd dr4,dr0 1465 fadd dr4,dr2 1466 fdiv dr2,dr0 1467 ftrc dr0,fpul 1468 rts 1469 lds.l @r15+,fpscr 1470 1471#ifdef FMOVD_WORKS 1472 .align 3 ! make double below 8 byte aligned. 1473#endif 1474trivial: 1475 rts 1476 lds r4,fpul 1477 1478 .align 2 1479L1: 1480#ifndef FMOVD_WORKS 1481 .long 0x80000 1482#else 1483 .long 0x180000 1484#endif 1485 .double 2147483648 1486 1487 ENDFUNC(GLOBAL(udivsi3_i4)) 1488#endif /* ! __SH4__ */ 1489#endif 1490 1491#ifdef L_udivsi3 1492/* __SH4_SINGLE_ONLY__ keeps this part for link compatibility with 1493 sh2e/sh3e code. */ 1494 1495!! args in r4 and r5, result in r0, clobbers r4, pr, and t bit 1496 .global GLOBAL(udivsi3) 1497 HIDDEN_FUNC(GLOBAL(udivsi3)) 1498 1499#if __SHMEDIA__ 1500#if __SH5__ == 32 1501 .section .text..SHmedia32,"ax" 1502#else 1503 .text 1504#endif 1505 .align 2 1506#if 0 1507/* The assembly code that follows is a hand-optimized version of the C 1508 code that follows. Note that the registers that are modified are 1509 exactly those listed as clobbered in the patterns udivsi3_i1 and 1510 udivsi3_i1_media. 1511 1512unsigned 1513__udivsi3 (i, j) 1514 unsigned i, j; 1515{ 1516 register unsigned long long r0 asm ("r0") = 0; 1517 register unsigned long long r18 asm ("r18") = 1; 1518 register unsigned long long r4 asm ("r4") = i; 1519 register unsigned long long r19 asm ("r19") = j; 1520 1521 r19 <<= 31; 1522 r18 <<= 31; 1523 do 1524 if (r4 >= r19) 1525 r0 |= r18, r4 -= r19; 1526 while (r19 >>= 1, r18 >>= 1); 1527 1528 return r0; 1529} 1530*/ 1531GLOBAL(udivsi3): 1532 pt/l LOCAL(udivsi3_dontadd), tr2 1533 pt/l LOCAL(udivsi3_loop), tr1 1534 ptabs/l r18, tr0 1535 movi 0, r0 1536 movi 1, r18 1537 addz.l r5, r63, r19 1538 addz.l r4, r63, r4 1539 shlli r19, 31, r19 1540 shlli r18, 31, r18 1541LOCAL(udivsi3_loop): 1542 bgtu r19, r4, tr2 1543 or r0, r18, r0 1544 sub r4, r19, r4 1545LOCAL(udivsi3_dontadd): 1546 shlri r18, 1, r18 1547 shlri r19, 1, r19 1548 bnei r18, 0, tr1 1549 blink tr0, r63 1550#else 1551GLOBAL(udivsi3): 1552 // inputs: r4,r5 1553 // clobbered: r18,r19,r20,r21,r22,r25,tr0 1554 // result in r0. 1555 addz.l r5,r63,r22 1556 nsb r22,r0 1557 shlld r22,r0,r25 1558 shlri r25,48,r25 1559 movi 0xffffffffffffbb0c,r20 // shift count eqiv 76 1560 sub r20,r25,r21 1561 mmulfx.w r21,r21,r19 1562 mshflo.w r21,r63,r21 1563 ptabs r18,tr0 1564 mmulfx.w r25,r19,r19 1565 sub r20,r0,r0 1566 /* bubble */ 1567 msub.w r21,r19,r19 1568 addi r19,-2,r21 /* It would be nice for scheduling to do this add to r21 1569 before the msub.w, but we need a different value for 1570 r19 to keep errors under control. */ 1571 mulu.l r4,r21,r18 1572 mmulfx.w r19,r19,r19 1573 shlli r21,15,r21 1574 shlrd r18,r0,r18 1575 mulu.l r18,r22,r20 1576 mmacnfx.wl r25,r19,r21 1577 /* bubble */ 1578 sub r4,r20,r25 1579 1580 mulu.l r25,r21,r19 1581 addi r0,14,r0 1582 /* bubble */ 1583 shlrd r19,r0,r19 1584 mulu.l r19,r22,r20 1585 add r18,r19,r18 1586 /* bubble */ 1587 sub.l r25,r20,r25 1588 1589 mulu.l r25,r21,r19 1590 addz.l r25,r63,r25 1591 sub r25,r22,r25 1592 shlrd r19,r0,r19 1593 mulu.l r19,r22,r20 1594 addi r25,1,r25 1595 add r18,r19,r18 1596 1597 cmpgt r25,r20,r25 1598 add.l r18,r25,r0 1599 blink tr0,r63 1600#endif 1601#elif defined (__SHMEDIA__) 1602/* m5compact-nofpu - more emphasis on code size than on speed, but don't 1603 ignore speed altogether - div1 needs 9 cycles, subc 7 and rotcl 4. 1604 So use a short shmedia loop. */ 1605 // clobbered: r20,r21,r25,tr0,tr1,tr2 1606 .mode SHmedia 1607 .section .text..SHmedia32,"ax" 1608 .align 2 1609GLOBAL(udivsi3): 1610 pt/l LOCAL(udivsi3_dontsub), tr0 1611 pt/l LOCAL(udivsi3_loop), tr1 1612 ptabs/l r18,tr2 1613 shlli r5,32,r25 1614 addi r25,-1,r21 1615 addz.l r4,r63,r20 1616LOCAL(udivsi3_loop): 1617 shlli r20,1,r20 1618 bgeu/u r21,r20,tr0 1619 sub r20,r21,r20 1620LOCAL(udivsi3_dontsub): 1621 addi.l r25,-1,r25 1622 bnei r25,-32,tr1 1623 add.l r20,r63,r0 1624 blink tr2,r63 1625#else /* ! defined (__SHMEDIA__) */ 1626LOCAL(div8): 1627 div1 r5,r4 1628LOCAL(div7): 1629 div1 r5,r4; div1 r5,r4; div1 r5,r4 1630 div1 r5,r4; div1 r5,r4; div1 r5,r4; rts; div1 r5,r4 1631 1632LOCAL(divx4): 1633 div1 r5,r4; rotcl r0 1634 div1 r5,r4; rotcl r0 1635 div1 r5,r4; rotcl r0 1636 rts; div1 r5,r4 1637 1638GLOBAL(udivsi3): 1639 sts.l pr,@-r15 1640 extu.w r5,r0 1641 cmp/eq r5,r0 1642#ifdef __sh1__ 1643 bf LOCAL(large_divisor) 1644#else 1645 bf/s LOCAL(large_divisor) 1646#endif 1647 div0u 1648 swap.w r4,r0 1649 shlr16 r4 1650 bsr LOCAL(div8) 1651 shll16 r5 1652 bsr LOCAL(div7) 1653 div1 r5,r4 1654 xtrct r4,r0 1655 xtrct r0,r4 1656 bsr LOCAL(div8) 1657 swap.w r4,r4 1658 bsr LOCAL(div7) 1659 div1 r5,r4 1660 lds.l @r15+,pr 1661 xtrct r4,r0 1662 swap.w r0,r0 1663 rotcl r0 1664 rts 1665 shlr16 r5 1666 1667LOCAL(large_divisor): 1668#ifdef __sh1__ 1669 div0u 1670#endif 1671 mov #0,r0 1672 xtrct r4,r0 1673 xtrct r0,r4 1674 bsr LOCAL(divx4) 1675 rotcl r0 1676 bsr LOCAL(divx4) 1677 rotcl r0 1678 bsr LOCAL(divx4) 1679 rotcl r0 1680 bsr LOCAL(divx4) 1681 rotcl r0 1682 lds.l @r15+,pr 1683 rts 1684 rotcl r0 1685 1686 ENDFUNC(GLOBAL(udivsi3)) 1687#endif /* ! __SHMEDIA__ */ 1688#endif /* L_udivsi3 */ 1689 1690#ifdef L_udivdi3 1691#ifdef __SHMEDIA__ 1692 .mode SHmedia 1693 .section .text..SHmedia32,"ax" 1694 .align 2 1695 .global GLOBAL(udivdi3) 1696 FUNC(GLOBAL(udivdi3)) 1697GLOBAL(udivdi3): 1698 HIDDEN_ALIAS(udivdi3_internal,udivdi3) 1699 shlri r3,1,r4 1700 nsb r4,r22 1701 shlld r3,r22,r6 1702 shlri r6,49,r5 1703 movi 0xffffffffffffbaf1,r21 /* .l shift count 17. */ 1704 sub r21,r5,r1 1705 mmulfx.w r1,r1,r4 1706 mshflo.w r1,r63,r1 1707 sub r63,r22,r20 // r63 == 64 % 64 1708 mmulfx.w r5,r4,r4 1709 pta LOCAL(large_divisor),tr0 1710 addi r20,32,r9 1711 msub.w r1,r4,r1 1712 madd.w r1,r1,r1 1713 mmulfx.w r1,r1,r4 1714 shlri r6,32,r7 1715 bgt/u r9,r63,tr0 // large_divisor 1716 mmulfx.w r5,r4,r4 1717 shlri r2,32+14,r19 1718 addi r22,-31,r0 1719 msub.w r1,r4,r1 1720 1721 mulu.l r1,r7,r4 1722 addi r1,-3,r5 1723 mulu.l r5,r19,r5 1724 sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2 1725 shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as 1726 the case may be, %0000000000000000 000.11111111111, still */ 1727 muls.l r1,r4,r4 /* leaving at least one sign bit. */ 1728 mulu.l r5,r3,r8 1729 mshalds.l r1,r21,r1 1730 shari r4,26,r4 1731 shlld r8,r0,r8 1732 add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5) 1733 sub r2,r8,r2 1734 /* Can do second step of 64 : 32 div now, using r1 and the rest in r2. */ 1735 1736 shlri r2,22,r21 1737 mulu.l r21,r1,r21 1738 shlld r5,r0,r8 1739 addi r20,30-22,r0 1740 shlrd r21,r0,r21 1741 mulu.l r21,r3,r5 1742 add r8,r21,r8 1743 mcmpgt.l r21,r63,r21 // See Note 1 1744 addi r20,30,r0 1745 mshfhi.l r63,r21,r21 1746 sub r2,r5,r2 1747 andc r2,r21,r2 1748 1749 /* small divisor: need a third divide step */ 1750 mulu.l r2,r1,r7 1751 ptabs r18,tr0 1752 addi r2,1,r2 1753 shlrd r7,r0,r7 1754 mulu.l r7,r3,r5 1755 add r8,r7,r8 1756 sub r2,r3,r2 1757 cmpgt r2,r5,r5 1758 add r8,r5,r2 1759 /* could test r3 here to check for divide by zero. */ 1760 blink tr0,r63 1761 1762LOCAL(large_divisor): 1763 mmulfx.w r5,r4,r4 1764 shlrd r2,r9,r25 1765 shlri r25,32,r8 1766 msub.w r1,r4,r1 1767 1768 mulu.l r1,r7,r4 1769 addi r1,-3,r5 1770 mulu.l r5,r8,r5 1771 sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2 1772 shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as 1773 the case may be, %0000000000000000 000.11111111111, still */ 1774 muls.l r1,r4,r4 /* leaving at least one sign bit. */ 1775 shlri r5,14-1,r8 1776 mulu.l r8,r7,r5 1777 mshalds.l r1,r21,r1 1778 shari r4,26,r4 1779 add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5) 1780 sub r25,r5,r25 1781 /* Can do second step of 64 : 32 div now, using r1 and the rest in r25. */ 1782 1783 shlri r25,22,r21 1784 mulu.l r21,r1,r21 1785 pta LOCAL(no_lo_adj),tr0 1786 addi r22,32,r0 1787 shlri r21,40,r21 1788 mulu.l r21,r7,r5 1789 add r8,r21,r8 1790 shlld r2,r0,r2 1791 sub r25,r5,r25 1792 bgtu/u r7,r25,tr0 // no_lo_adj 1793 addi r8,1,r8 1794 sub r25,r7,r25 1795LOCAL(no_lo_adj): 1796 mextr4 r2,r25,r2 1797 1798 /* large_divisor: only needs a few adjustments. */ 1799 mulu.l r8,r6,r5 1800 ptabs r18,tr0 1801 /* bubble */ 1802 cmpgtu r5,r2,r5 1803 sub r8,r5,r2 1804 blink tr0,r63 1805 ENDFUNC(GLOBAL(udivdi3)) 1806/* Note 1: To shift the result of the second divide stage so that the result 1807 always fits into 32 bits, yet we still reduce the rest sufficiently 1808 would require a lot of instructions to do the shifts just right. Using 1809 the full 64 bit shift result to multiply with the divisor would require 1810 four extra instructions for the upper 32 bits (shift / mulu / shift / sub). 1811 Fortunately, if the upper 32 bits of the shift result are nonzero, we 1812 know that the rest after taking this partial result into account will 1813 fit into 32 bits. So we just clear the upper 32 bits of the rest if the 1814 upper 32 bits of the partial result are nonzero. */ 1815#endif /* __SHMEDIA__ */ 1816#endif /* L_udivdi3 */ 1817 1818#ifdef L_divdi3 1819#ifdef __SHMEDIA__ 1820 .mode SHmedia 1821 .section .text..SHmedia32,"ax" 1822 .align 2 1823 .global GLOBAL(divdi3) 1824 FUNC(GLOBAL(divdi3)) 1825GLOBAL(divdi3): 1826 pta GLOBAL(udivdi3_internal),tr0 1827 shari r2,63,r22 1828 shari r3,63,r23 1829 xor r2,r22,r2 1830 xor r3,r23,r3 1831 sub r2,r22,r2 1832 sub r3,r23,r3 1833 beq/u r22,r23,tr0 1834 ptabs r18,tr1 1835 blink tr0,r18 1836 sub r63,r2,r2 1837 blink tr1,r63 1838 ENDFUNC(GLOBAL(divdi3)) 1839#endif /* __SHMEDIA__ */ 1840#endif /* L_divdi3 */ 1841 1842#ifdef L_umoddi3 1843#ifdef __SHMEDIA__ 1844 .mode SHmedia 1845 .section .text..SHmedia32,"ax" 1846 .align 2 1847 .global GLOBAL(umoddi3) 1848 FUNC(GLOBAL(umoddi3)) 1849GLOBAL(umoddi3): 1850 HIDDEN_ALIAS(umoddi3_internal,umoddi3) 1851 shlri r3,1,r4 1852 nsb r4,r22 1853 shlld r3,r22,r6 1854 shlri r6,49,r5 1855 movi 0xffffffffffffbaf1,r21 /* .l shift count 17. */ 1856 sub r21,r5,r1 1857 mmulfx.w r1,r1,r4 1858 mshflo.w r1,r63,r1 1859 sub r63,r22,r20 // r63 == 64 % 64 1860 mmulfx.w r5,r4,r4 1861 pta LOCAL(large_divisor),tr0 1862 addi r20,32,r9 1863 msub.w r1,r4,r1 1864 madd.w r1,r1,r1 1865 mmulfx.w r1,r1,r4 1866 shlri r6,32,r7 1867 bgt/u r9,r63,tr0 // large_divisor 1868 mmulfx.w r5,r4,r4 1869 shlri r2,32+14,r19 1870 addi r22,-31,r0 1871 msub.w r1,r4,r1 1872 1873 mulu.l r1,r7,r4 1874 addi r1,-3,r5 1875 mulu.l r5,r19,r5 1876 sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2 1877 shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as 1878 the case may be, %0000000000000000 000.11111111111, still */ 1879 muls.l r1,r4,r4 /* leaving at least one sign bit. */ 1880 mulu.l r5,r3,r5 1881 mshalds.l r1,r21,r1 1882 shari r4,26,r4 1883 shlld r5,r0,r5 1884 add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5) 1885 sub r2,r5,r2 1886 /* Can do second step of 64 : 32 div now, using r1 and the rest in r2. */ 1887 1888 shlri r2,22,r21 1889 mulu.l r21,r1,r21 1890 addi r20,30-22,r0 1891 /* bubble */ /* could test r3 here to check for divide by zero. */ 1892 shlrd r21,r0,r21 1893 mulu.l r21,r3,r5 1894 mcmpgt.l r21,r63,r21 // See Note 1 1895 addi r20,30,r0 1896 mshfhi.l r63,r21,r21 1897 sub r2,r5,r2 1898 andc r2,r21,r2 1899 1900 /* small divisor: need a third divide step */ 1901 mulu.l r2,r1,r7 1902 ptabs r18,tr0 1903 sub r2,r3,r8 /* re-use r8 here for rest - r3 */ 1904 shlrd r7,r0,r7 1905 mulu.l r7,r3,r5 1906 /* bubble */ 1907 addi r8,1,r7 1908 cmpgt r7,r5,r7 1909 cmvne r7,r8,r2 1910 sub r2,r5,r2 1911 blink tr0,r63 1912 1913LOCAL(large_divisor): 1914 mmulfx.w r5,r4,r4 1915 shlrd r2,r9,r25 1916 shlri r25,32,r8 1917 msub.w r1,r4,r1 1918 1919 mulu.l r1,r7,r4 1920 addi r1,-3,r5 1921 mulu.l r5,r8,r5 1922 sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2 1923 shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as 1924 the case may be, %0000000000000000 000.11111111111, still */ 1925 muls.l r1,r4,r4 /* leaving at least one sign bit. */ 1926 shlri r5,14-1,r8 1927 mulu.l r8,r7,r5 1928 mshalds.l r1,r21,r1 1929 shari r4,26,r4 1930 add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5) 1931 sub r25,r5,r25 1932 /* Can do second step of 64 : 32 div now, using r1 and the rest in r25. */ 1933 1934 shlri r25,22,r21 1935 mulu.l r21,r1,r21 1936 pta LOCAL(no_lo_adj),tr0 1937 addi r22,32,r0 1938 shlri r21,40,r21 1939 mulu.l r21,r7,r5 1940 add r8,r21,r8 1941 shlld r2,r0,r2 1942 sub r25,r5,r25 1943 bgtu/u r7,r25,tr0 // no_lo_adj 1944 addi r8,1,r8 1945 sub r25,r7,r25 1946LOCAL(no_lo_adj): 1947 mextr4 r2,r25,r2 1948 1949 /* large_divisor: only needs a few adjustments. */ 1950 mulu.l r8,r6,r5 1951 ptabs r18,tr0 1952 add r2,r6,r7 1953 cmpgtu r5,r2,r8 1954 cmvne r8,r7,r2 1955 sub r2,r5,r2 1956 shlrd r2,r22,r2 1957 blink tr0,r63 1958 ENDFUNC(GLOBAL(umoddi3)) 1959/* Note 1: To shift the result of the second divide stage so that the result 1960 always fits into 32 bits, yet we still reduce the rest sufficiently 1961 would require a lot of instructions to do the shifts just right. Using 1962 the full 64 bit shift result to multiply with the divisor would require 1963 four extra instructions for the upper 32 bits (shift / mulu / shift / sub). 1964 Fortunately, if the upper 32 bits of the shift result are nonzero, we 1965 know that the rest after taking this partial result into account will 1966 fit into 32 bits. So we just clear the upper 32 bits of the rest if the 1967 upper 32 bits of the partial result are nonzero. */ 1968#endif /* __SHMEDIA__ */ 1969#endif /* L_umoddi3 */ 1970 1971#ifdef L_moddi3 1972#ifdef __SHMEDIA__ 1973 .mode SHmedia 1974 .section .text..SHmedia32,"ax" 1975 .align 2 1976 .global GLOBAL(moddi3) 1977 FUNC(GLOBAL(moddi3)) 1978GLOBAL(moddi3): 1979 pta GLOBAL(umoddi3_internal),tr0 1980 shari r2,63,r22 1981 shari r3,63,r23 1982 xor r2,r22,r2 1983 xor r3,r23,r3 1984 sub r2,r22,r2 1985 sub r3,r23,r3 1986 beq/u r22,r63,tr0 1987 ptabs r18,tr1 1988 blink tr0,r18 1989 sub r63,r2,r2 1990 blink tr1,r63 1991 ENDFUNC(GLOBAL(moddi3)) 1992#endif /* __SHMEDIA__ */ 1993#endif /* L_moddi3 */ 1994 1995#ifdef L_set_fpscr 1996#if !defined (__SH2A_NOFPU__) 1997#if defined (__SH2E__) || defined (__SH2A__) || defined (__SH3E__) || defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || __SH5__ == 32 1998#ifdef __SH5__ 1999 .mode SHcompact 2000#endif 2001 .global GLOBAL(set_fpscr) 2002 HIDDEN_FUNC(GLOBAL(set_fpscr)) 2003GLOBAL(set_fpscr): 2004 lds r4,fpscr 2005#ifdef __PIC__ 2006 mov.l r12,@-r15 2007#ifdef __vxworks 2008 mov.l LOCAL(set_fpscr_L0_base),r12 2009 mov.l LOCAL(set_fpscr_L0_index),r0 2010 mov.l @r12,r12 2011 mov.l @(r0,r12),r12 2012#else 2013 mova LOCAL(set_fpscr_L0),r0 2014 mov.l LOCAL(set_fpscr_L0),r12 2015 add r0,r12 2016#endif 2017 mov.l LOCAL(set_fpscr_L1),r0 2018 mov.l @(r0,r12),r1 2019 mov.l @r15+,r12 2020#else 2021 mov.l LOCAL(set_fpscr_L1),r1 2022#endif 2023 swap.w r4,r0 2024 or #24,r0 2025#ifndef FMOVD_WORKS 2026 xor #16,r0 2027#endif 2028#if defined(__SH4__) || defined (__SH2A_DOUBLE__) 2029 swap.w r0,r3 2030 mov.l r3,@(4,r1) 2031#else /* defined (__SH2E__) || defined(__SH3E__) || defined(__SH4_SINGLE*__) */ 2032 swap.w r0,r2 2033 mov.l r2,@r1 2034#endif 2035#ifndef FMOVD_WORKS 2036 xor #8,r0 2037#else 2038 xor #24,r0 2039#endif 2040#if defined(__SH4__) || defined (__SH2A_DOUBLE__) 2041 swap.w r0,r2 2042 rts 2043 mov.l r2,@r1 2044#else /* defined(__SH2E__) || defined(__SH3E__) || defined(__SH4_SINGLE*__) */ 2045 swap.w r0,r3 2046 rts 2047 mov.l r3,@(4,r1) 2048#endif 2049 .align 2 2050#ifdef __PIC__ 2051#ifdef __vxworks 2052LOCAL(set_fpscr_L0_base): 2053 .long ___GOTT_BASE__ 2054LOCAL(set_fpscr_L0_index): 2055 .long ___GOTT_INDEX__ 2056#else 2057LOCAL(set_fpscr_L0): 2058 .long _GLOBAL_OFFSET_TABLE_ 2059#endif 2060LOCAL(set_fpscr_L1): 2061 .long GLOBAL(fpscr_values@GOT) 2062#else 2063LOCAL(set_fpscr_L1): 2064 .long GLOBAL(fpscr_values) 2065#endif 2066 2067 ENDFUNC(GLOBAL(set_fpscr)) 2068#ifndef NO_FPSCR_VALUES 2069#ifdef __ELF__ 2070 .comm GLOBAL(fpscr_values),8,4 2071#else 2072 .comm GLOBAL(fpscr_values),8 2073#endif /* ELF */ 2074#endif /* NO_FPSCR_VALUES */ 2075#endif /* SH2E / SH3E / SH4 */ 2076#endif /* __SH2A_NOFPU__ */ 2077#endif /* L_set_fpscr */ 2078#ifdef L_ic_invalidate 2079#if __SH5__ == 32 2080 .mode SHmedia 2081 .section .text..SHmedia32,"ax" 2082 .align 2 2083 .global GLOBAL(init_trampoline) 2084 HIDDEN_FUNC(GLOBAL(init_trampoline)) 2085GLOBAL(init_trampoline): 2086 st.l r0,8,r2 2087#ifdef __LITTLE_ENDIAN__ 2088 movi 9,r20 2089 shori 0x402b,r20 2090 shori 0xd101,r20 2091 shori 0xd002,r20 2092#else 2093 movi 0xffffffffffffd002,r20 2094 shori 0xd101,r20 2095 shori 0x402b,r20 2096 shori 9,r20 2097#endif 2098 st.q r0,0,r20 2099 st.l r0,12,r3 2100 ENDFUNC(GLOBAL(init_trampoline)) 2101 .global GLOBAL(ic_invalidate) 2102 HIDDEN_FUNC(GLOBAL(ic_invalidate)) 2103GLOBAL(ic_invalidate): 2104 ocbwb r0,0 2105 synco 2106 icbi r0, 0 2107 ptabs r18, tr0 2108 synci 2109 blink tr0, r63 2110 ENDFUNC(GLOBAL(ic_invalidate)) 2111#elif defined(__SH4A__) 2112 .global GLOBAL(ic_invalidate) 2113 HIDDEN_FUNC(GLOBAL(ic_invalidate)) 2114GLOBAL(ic_invalidate): 2115 ocbwb @r4 2116 synco 2117 icbi @r4 2118 rts 2119 nop 2120 ENDFUNC(GLOBAL(ic_invalidate)) 2121#elif defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || (defined(__SH4_NOFPU__) && !defined(__SH5__)) 2122 /* For system code, we use ic_invalidate_line_i, but user code 2123 needs a different mechanism. A kernel call is generally not 2124 available, and it would also be slow. Different SH4 variants use 2125 different sizes and associativities of the Icache. We use a small 2126 bit of dispatch code that can be put hidden in every shared object, 2127 which calls the actual processor-specific invalidation code in a 2128 separate module. 2129 Or if you have operating system support, the OS could mmap the 2130 procesor-specific code from a single page, since it is highly 2131 repetitive. */ 2132 .global GLOBAL(ic_invalidate) 2133 HIDDEN_FUNC(GLOBAL(ic_invalidate)) 2134GLOBAL(ic_invalidate): 2135#ifdef __pic__ 2136#ifdef __vxworks 2137 mov.l 1f,r1 2138 mov.l 2f,r0 2139 mov.l @r1,r1 2140 mov.l 0f,r2 2141 mov.l @(r0,r1),r0 2142#else 2143 mov.l 1f,r1 2144 mova 1f,r0 2145 mov.l 0f,r2 2146 add r1,r0 2147#endif 2148 mov.l @(r0,r2),r1 2149#else 2150 mov.l 0f,r1 2151#endif 2152 ocbwb @r4 2153 mov.l @(8,r1),r0 2154 sub r1,r4 2155 and r4,r0 2156 add r1,r0 2157 jmp @r0 2158 mov.l @(4,r1),r0 2159 .align 2 2160#ifndef __pic__ 21610: .long GLOBAL(ic_invalidate_array) 2162#else /* __pic__ */ 2163 .global GLOBAL(ic_invalidate_array) 21640: .long GLOBAL(ic_invalidate_array)@GOT 2165#ifdef __vxworks 21661: .long ___GOTT_BASE__ 21672: .long ___GOTT_INDEX__ 2168#else 21691: .long _GLOBAL_OFFSET_TABLE_ 2170#endif 2171 ENDFUNC(GLOBAL(ic_invalidate)) 2172#endif /* __pic__ */ 2173#endif /* SH4 */ 2174#endif /* L_ic_invalidate */ 2175 2176#ifdef L_ic_invalidate_array 2177#if defined(__SH4A__) || (defined (__FORCE_SH4A__) && (defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || (defined(__SH4_NOFPU__) && !defined(__SH5__)))) 2178 .global GLOBAL(ic_invalidate_array) 2179 /* This is needed when an SH4 dso with trampolines is used on SH4A. */ 2180 .global GLOBAL(ic_invalidate_array) 2181 FUNC(GLOBAL(ic_invalidate_array)) 2182GLOBAL(ic_invalidate_array): 2183 add r1,r4 2184 synco 2185 icbi @r4 2186 rts 2187 nop 2188 .align 2 2189 .long 0 2190 ENDFUNC(GLOBAL(ic_invalidate_array)) 2191#elif defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || (defined(__SH4_NOFPU__) && !defined(__SH5__)) 2192 .global GLOBAL(ic_invalidate_array) 2193 .p2align 5 2194 FUNC(GLOBAL(ic_invalidate_array)) 2195/* This must be aligned to the beginning of a cache line. */ 2196GLOBAL(ic_invalidate_array): 2197#ifndef WAYS 2198#define WAYS 4 2199#define WAY_SIZE 0x4000 2200#endif 2201#if WAYS == 1 2202 .rept WAY_SIZE * WAYS / 32 2203 rts 2204 nop 2205 .rept 7 2206 .long WAY_SIZE - 32 2207 .endr 2208 .endr 2209#elif WAYS <= 6 2210 .rept WAY_SIZE * WAYS / 32 2211 braf r0 2212 add #-8,r0 2213 .long WAY_SIZE + 8 2214 .long WAY_SIZE - 32 2215 .rept WAYS-2 2216 braf r0 2217 nop 2218 .endr 2219 .rept 7 - WAYS 2220 rts 2221 nop 2222 .endr 2223 .endr 2224#else /* WAYS > 6 */ 2225 /* This variant needs two different pages for mmap-ing. */ 2226 .rept WAYS-1 2227 .rept WAY_SIZE / 32 2228 braf r0 2229 nop 2230 .long WAY_SIZE 2231 .rept 6 2232 .long WAY_SIZE - 32 2233 .endr 2234 .endr 2235 .endr 2236 .rept WAY_SIZE / 32 2237 rts 2238 .rept 15 2239 nop 2240 .endr 2241 .endr 2242#endif /* WAYS */ 2243 ENDFUNC(GLOBAL(ic_invalidate_array)) 2244#endif /* SH4 */ 2245#endif /* L_ic_invalidate_array */ 2246 2247#if defined (__SH5__) && __SH5__ == 32 2248#ifdef L_shcompact_call_trampoline 2249 .section .rodata 2250 .align 1 2251LOCAL(ct_main_table): 2252.word LOCAL(ct_r2_fp) - datalabel LOCAL(ct_main_label) 2253.word LOCAL(ct_r2_ld) - datalabel LOCAL(ct_main_label) 2254.word LOCAL(ct_r2_pop) - datalabel LOCAL(ct_main_label) 2255.word LOCAL(ct_r3_fp) - datalabel LOCAL(ct_main_label) 2256.word LOCAL(ct_r3_ld) - datalabel LOCAL(ct_main_label) 2257.word LOCAL(ct_r3_pop) - datalabel LOCAL(ct_main_label) 2258.word LOCAL(ct_r4_fp) - datalabel LOCAL(ct_main_label) 2259.word LOCAL(ct_r4_ld) - datalabel LOCAL(ct_main_label) 2260.word LOCAL(ct_r4_pop) - datalabel LOCAL(ct_main_label) 2261.word LOCAL(ct_r5_fp) - datalabel LOCAL(ct_main_label) 2262.word LOCAL(ct_r5_ld) - datalabel LOCAL(ct_main_label) 2263.word LOCAL(ct_r5_pop) - datalabel LOCAL(ct_main_label) 2264.word LOCAL(ct_r6_fph) - datalabel LOCAL(ct_main_label) 2265.word LOCAL(ct_r6_fpl) - datalabel LOCAL(ct_main_label) 2266.word LOCAL(ct_r6_ld) - datalabel LOCAL(ct_main_label) 2267.word LOCAL(ct_r6_pop) - datalabel LOCAL(ct_main_label) 2268.word LOCAL(ct_r7_fph) - datalabel LOCAL(ct_main_label) 2269.word LOCAL(ct_r7_fpl) - datalabel LOCAL(ct_main_label) 2270.word LOCAL(ct_r7_ld) - datalabel LOCAL(ct_main_label) 2271.word LOCAL(ct_r7_pop) - datalabel LOCAL(ct_main_label) 2272.word LOCAL(ct_r8_fph) - datalabel LOCAL(ct_main_label) 2273.word LOCAL(ct_r8_fpl) - datalabel LOCAL(ct_main_label) 2274.word LOCAL(ct_r8_ld) - datalabel LOCAL(ct_main_label) 2275.word LOCAL(ct_r8_pop) - datalabel LOCAL(ct_main_label) 2276.word LOCAL(ct_r9_fph) - datalabel LOCAL(ct_main_label) 2277.word LOCAL(ct_r9_fpl) - datalabel LOCAL(ct_main_label) 2278.word LOCAL(ct_r9_ld) - datalabel LOCAL(ct_main_label) 2279.word LOCAL(ct_r9_pop) - datalabel LOCAL(ct_main_label) 2280.word LOCAL(ct_pop_seq) - datalabel LOCAL(ct_main_label) 2281.word LOCAL(ct_pop_seq) - datalabel LOCAL(ct_main_label) 2282.word LOCAL(ct_r9_pop) - datalabel LOCAL(ct_main_label) 2283.word LOCAL(ct_ret_wide) - datalabel LOCAL(ct_main_label) 2284.word LOCAL(ct_call_func) - datalabel LOCAL(ct_main_label) 2285 .mode SHmedia 2286 .section .text..SHmedia32, "ax" 2287 .align 2 2288 2289 /* This function loads 64-bit general-purpose registers from the 2290 stack, from a memory address contained in them or from an FP 2291 register, according to a cookie passed in r1. Its execution 2292 time is linear on the number of registers that actually have 2293 to be copied. See sh.h for details on the actual bit pattern. 2294 2295 The function to be called is passed in r0. If a 32-bit return 2296 value is expected, the actual function will be tail-called, 2297 otherwise the return address will be stored in r10 (that the 2298 caller should expect to be clobbered) and the return value 2299 will be expanded into r2/r3 upon return. */ 2300 2301 .global GLOBAL(GCC_shcompact_call_trampoline) 2302 FUNC(GLOBAL(GCC_shcompact_call_trampoline)) 2303GLOBAL(GCC_shcompact_call_trampoline): 2304 ptabs/l r0, tr0 /* Prepare to call the actual function. */ 2305 movi ((datalabel LOCAL(ct_main_table) - 31 * 2) >> 16) & 65535, r0 2306 pt/l LOCAL(ct_loop), tr1 2307 addz.l r1, r63, r1 2308 shori ((datalabel LOCAL(ct_main_table) - 31 * 2)) & 65535, r0 2309LOCAL(ct_loop): 2310 nsb r1, r28 2311 shlli r28, 1, r29 2312 ldx.w r0, r29, r30 2313LOCAL(ct_main_label): 2314 ptrel/l r30, tr2 2315 blink tr2, r63 2316LOCAL(ct_r2_fp): /* Copy r2 from an FP register. */ 2317 /* It must be dr0, so just do it. */ 2318 fmov.dq dr0, r2 2319 movi 7, r30 2320 shlli r30, 29, r31 2321 andc r1, r31, r1 2322 blink tr1, r63 2323LOCAL(ct_r3_fp): /* Copy r3 from an FP register. */ 2324 /* It is either dr0 or dr2. */ 2325 movi 7, r30 2326 shlri r1, 26, r32 2327 shlli r30, 26, r31 2328 andc r1, r31, r1 2329 fmov.dq dr0, r3 2330 beqi/l r32, 4, tr1 2331 fmov.dq dr2, r3 2332 blink tr1, r63 2333LOCAL(ct_r4_fp): /* Copy r4 from an FP register. */ 2334 shlri r1, 23 - 3, r34 2335 andi r34, 3 << 3, r33 2336 addi r33, LOCAL(ct_r4_fp_copy) - datalabel LOCAL(ct_r4_fp_base), r32 2337LOCAL(ct_r4_fp_base): 2338 ptrel/l r32, tr2 2339 movi 7, r30 2340 shlli r30, 23, r31 2341 andc r1, r31, r1 2342 blink tr2, r63 2343LOCAL(ct_r4_fp_copy): 2344 fmov.dq dr0, r4 2345 blink tr1, r63 2346 fmov.dq dr2, r4 2347 blink tr1, r63 2348 fmov.dq dr4, r4 2349 blink tr1, r63 2350LOCAL(ct_r5_fp): /* Copy r5 from an FP register. */ 2351 shlri r1, 20 - 3, r34 2352 andi r34, 3 << 3, r33 2353 addi r33, LOCAL(ct_r5_fp_copy) - datalabel LOCAL(ct_r5_fp_base), r32 2354LOCAL(ct_r5_fp_base): 2355 ptrel/l r32, tr2 2356 movi 7, r30 2357 shlli r30, 20, r31 2358 andc r1, r31, r1 2359 blink tr2, r63 2360LOCAL(ct_r5_fp_copy): 2361 fmov.dq dr0, r5 2362 blink tr1, r63 2363 fmov.dq dr2, r5 2364 blink tr1, r63 2365 fmov.dq dr4, r5 2366 blink tr1, r63 2367 fmov.dq dr6, r5 2368 blink tr1, r63 2369LOCAL(ct_r6_fph): /* Copy r6 from a high FP register. */ 2370 /* It must be dr8. */ 2371 fmov.dq dr8, r6 2372 movi 15, r30 2373 shlli r30, 16, r31 2374 andc r1, r31, r1 2375 blink tr1, r63 2376LOCAL(ct_r6_fpl): /* Copy r6 from a low FP register. */ 2377 shlri r1, 16 - 3, r34 2378 andi r34, 3 << 3, r33 2379 addi r33, LOCAL(ct_r6_fp_copy) - datalabel LOCAL(ct_r6_fp_base), r32 2380LOCAL(ct_r6_fp_base): 2381 ptrel/l r32, tr2 2382 movi 7, r30 2383 shlli r30, 16, r31 2384 andc r1, r31, r1 2385 blink tr2, r63 2386LOCAL(ct_r6_fp_copy): 2387 fmov.dq dr0, r6 2388 blink tr1, r63 2389 fmov.dq dr2, r6 2390 blink tr1, r63 2391 fmov.dq dr4, r6 2392 blink tr1, r63 2393 fmov.dq dr6, r6 2394 blink tr1, r63 2395LOCAL(ct_r7_fph): /* Copy r7 from a high FP register. */ 2396 /* It is either dr8 or dr10. */ 2397 movi 15 << 12, r31 2398 shlri r1, 12, r32 2399 andc r1, r31, r1 2400 fmov.dq dr8, r7 2401 beqi/l r32, 8, tr1 2402 fmov.dq dr10, r7 2403 blink tr1, r63 2404LOCAL(ct_r7_fpl): /* Copy r7 from a low FP register. */ 2405 shlri r1, 12 - 3, r34 2406 andi r34, 3 << 3, r33 2407 addi r33, LOCAL(ct_r7_fp_copy) - datalabel LOCAL(ct_r7_fp_base), r32 2408LOCAL(ct_r7_fp_base): 2409 ptrel/l r32, tr2 2410 movi 7 << 12, r31 2411 andc r1, r31, r1 2412 blink tr2, r63 2413LOCAL(ct_r7_fp_copy): 2414 fmov.dq dr0, r7 2415 blink tr1, r63 2416 fmov.dq dr2, r7 2417 blink tr1, r63 2418 fmov.dq dr4, r7 2419 blink tr1, r63 2420 fmov.dq dr6, r7 2421 blink tr1, r63 2422LOCAL(ct_r8_fph): /* Copy r8 from a high FP register. */ 2423 /* It is either dr8 or dr10. */ 2424 movi 15 << 8, r31 2425 andi r1, 1 << 8, r32 2426 andc r1, r31, r1 2427 fmov.dq dr8, r8 2428 beq/l r32, r63, tr1 2429 fmov.dq dr10, r8 2430 blink tr1, r63 2431LOCAL(ct_r8_fpl): /* Copy r8 from a low FP register. */ 2432 shlri r1, 8 - 3, r34 2433 andi r34, 3 << 3, r33 2434 addi r33, LOCAL(ct_r8_fp_copy) - datalabel LOCAL(ct_r8_fp_base), r32 2435LOCAL(ct_r8_fp_base): 2436 ptrel/l r32, tr2 2437 movi 7 << 8, r31 2438 andc r1, r31, r1 2439 blink tr2, r63 2440LOCAL(ct_r8_fp_copy): 2441 fmov.dq dr0, r8 2442 blink tr1, r63 2443 fmov.dq dr2, r8 2444 blink tr1, r63 2445 fmov.dq dr4, r8 2446 blink tr1, r63 2447 fmov.dq dr6, r8 2448 blink tr1, r63 2449LOCAL(ct_r9_fph): /* Copy r9 from a high FP register. */ 2450 /* It is either dr8 or dr10. */ 2451 movi 15 << 4, r31 2452 andi r1, 1 << 4, r32 2453 andc r1, r31, r1 2454 fmov.dq dr8, r9 2455 beq/l r32, r63, tr1 2456 fmov.dq dr10, r9 2457 blink tr1, r63 2458LOCAL(ct_r9_fpl): /* Copy r9 from a low FP register. */ 2459 shlri r1, 4 - 3, r34 2460 andi r34, 3 << 3, r33 2461 addi r33, LOCAL(ct_r9_fp_copy) - datalabel LOCAL(ct_r9_fp_base), r32 2462LOCAL(ct_r9_fp_base): 2463 ptrel/l r32, tr2 2464 movi 7 << 4, r31 2465 andc r1, r31, r1 2466 blink tr2, r63 2467LOCAL(ct_r9_fp_copy): 2468 fmov.dq dr0, r9 2469 blink tr1, r63 2470 fmov.dq dr2, r9 2471 blink tr1, r63 2472 fmov.dq dr4, r9 2473 blink tr1, r63 2474 fmov.dq dr6, r9 2475 blink tr1, r63 2476LOCAL(ct_r2_ld): /* Copy r2 from a memory address. */ 2477 pt/l LOCAL(ct_r2_load), tr2 2478 movi 3, r30 2479 shlli r30, 29, r31 2480 and r1, r31, r32 2481 andc r1, r31, r1 2482 beq/l r31, r32, tr2 2483 addi.l r2, 8, r3 2484 ldx.q r2, r63, r2 2485 /* Fall through. */ 2486LOCAL(ct_r3_ld): /* Copy r3 from a memory address. */ 2487 pt/l LOCAL(ct_r3_load), tr2 2488 movi 3, r30 2489 shlli r30, 26, r31 2490 and r1, r31, r32 2491 andc r1, r31, r1 2492 beq/l r31, r32, tr2 2493 addi.l r3, 8, r4 2494 ldx.q r3, r63, r3 2495LOCAL(ct_r4_ld): /* Copy r4 from a memory address. */ 2496 pt/l LOCAL(ct_r4_load), tr2 2497 movi 3, r30 2498 shlli r30, 23, r31 2499 and r1, r31, r32 2500 andc r1, r31, r1 2501 beq/l r31, r32, tr2 2502 addi.l r4, 8, r5 2503 ldx.q r4, r63, r4 2504LOCAL(ct_r5_ld): /* Copy r5 from a memory address. */ 2505 pt/l LOCAL(ct_r5_load), tr2 2506 movi 3, r30 2507 shlli r30, 20, r31 2508 and r1, r31, r32 2509 andc r1, r31, r1 2510 beq/l r31, r32, tr2 2511 addi.l r5, 8, r6 2512 ldx.q r5, r63, r5 2513LOCAL(ct_r6_ld): /* Copy r6 from a memory address. */ 2514 pt/l LOCAL(ct_r6_load), tr2 2515 movi 3 << 16, r31 2516 and r1, r31, r32 2517 andc r1, r31, r1 2518 beq/l r31, r32, tr2 2519 addi.l r6, 8, r7 2520 ldx.q r6, r63, r6 2521LOCAL(ct_r7_ld): /* Copy r7 from a memory address. */ 2522 pt/l LOCAL(ct_r7_load), tr2 2523 movi 3 << 12, r31 2524 and r1, r31, r32 2525 andc r1, r31, r1 2526 beq/l r31, r32, tr2 2527 addi.l r7, 8, r8 2528 ldx.q r7, r63, r7 2529LOCAL(ct_r8_ld): /* Copy r8 from a memory address. */ 2530 pt/l LOCAL(ct_r8_load), tr2 2531 movi 3 << 8, r31 2532 and r1, r31, r32 2533 andc r1, r31, r1 2534 beq/l r31, r32, tr2 2535 addi.l r8, 8, r9 2536 ldx.q r8, r63, r8 2537LOCAL(ct_r9_ld): /* Copy r9 from a memory address. */ 2538 pt/l LOCAL(ct_check_tramp), tr2 2539 ldx.q r9, r63, r9 2540 blink tr2, r63 2541LOCAL(ct_r2_load): 2542 ldx.q r2, r63, r2 2543 blink tr1, r63 2544LOCAL(ct_r3_load): 2545 ldx.q r3, r63, r3 2546 blink tr1, r63 2547LOCAL(ct_r4_load): 2548 ldx.q r4, r63, r4 2549 blink tr1, r63 2550LOCAL(ct_r5_load): 2551 ldx.q r5, r63, r5 2552 blink tr1, r63 2553LOCAL(ct_r6_load): 2554 ldx.q r6, r63, r6 2555 blink tr1, r63 2556LOCAL(ct_r7_load): 2557 ldx.q r7, r63, r7 2558 blink tr1, r63 2559LOCAL(ct_r8_load): 2560 ldx.q r8, r63, r8 2561 blink tr1, r63 2562LOCAL(ct_r2_pop): /* Pop r2 from the stack. */ 2563 movi 1, r30 2564 ldx.q r15, r63, r2 2565 shlli r30, 29, r31 2566 addi.l r15, 8, r15 2567 andc r1, r31, r1 2568 blink tr1, r63 2569LOCAL(ct_r3_pop): /* Pop r3 from the stack. */ 2570 movi 1, r30 2571 ldx.q r15, r63, r3 2572 shlli r30, 26, r31 2573 addi.l r15, 8, r15 2574 andc r1, r31, r1 2575 blink tr1, r63 2576LOCAL(ct_r4_pop): /* Pop r4 from the stack. */ 2577 movi 1, r30 2578 ldx.q r15, r63, r4 2579 shlli r30, 23, r31 2580 addi.l r15, 8, r15 2581 andc r1, r31, r1 2582 blink tr1, r63 2583LOCAL(ct_r5_pop): /* Pop r5 from the stack. */ 2584 movi 1, r30 2585 ldx.q r15, r63, r5 2586 shlli r30, 20, r31 2587 addi.l r15, 8, r15 2588 andc r1, r31, r1 2589 blink tr1, r63 2590LOCAL(ct_r6_pop): /* Pop r6 from the stack. */ 2591 movi 1, r30 2592 ldx.q r15, r63, r6 2593 shlli r30, 16, r31 2594 addi.l r15, 8, r15 2595 andc r1, r31, r1 2596 blink tr1, r63 2597LOCAL(ct_r7_pop): /* Pop r7 from the stack. */ 2598 ldx.q r15, r63, r7 2599 movi 1 << 12, r31 2600 addi.l r15, 8, r15 2601 andc r1, r31, r1 2602 blink tr1, r63 2603LOCAL(ct_r8_pop): /* Pop r8 from the stack. */ 2604 ldx.q r15, r63, r8 2605 movi 1 << 8, r31 2606 addi.l r15, 8, r15 2607 andc r1, r31, r1 2608 blink tr1, r63 2609LOCAL(ct_pop_seq): /* Pop a sequence of registers off the stack. */ 2610 andi r1, 7 << 1, r30 2611 movi (LOCAL(ct_end_of_pop_seq) >> 16) & 65535, r32 2612 shlli r30, 2, r31 2613 shori LOCAL(ct_end_of_pop_seq) & 65535, r32 2614 sub.l r32, r31, r33 2615 ptabs/l r33, tr2 2616 blink tr2, r63 2617LOCAL(ct_start_of_pop_seq): /* Beginning of pop sequence. */ 2618 ldx.q r15, r63, r3 2619 addi.l r15, 8, r15 2620 ldx.q r15, r63, r4 2621 addi.l r15, 8, r15 2622 ldx.q r15, r63, r5 2623 addi.l r15, 8, r15 2624 ldx.q r15, r63, r6 2625 addi.l r15, 8, r15 2626 ldx.q r15, r63, r7 2627 addi.l r15, 8, r15 2628 ldx.q r15, r63, r8 2629 addi.l r15, 8, r15 2630LOCAL(ct_r9_pop): /* Pop r9 from the stack. */ 2631 ldx.q r15, r63, r9 2632 addi.l r15, 8, r15 2633LOCAL(ct_end_of_pop_seq): /* Label used to compute first pop instruction. */ 2634LOCAL(ct_check_tramp): /* Check whether we need a trampoline. */ 2635 pt/u LOCAL(ct_ret_wide), tr2 2636 andi r1, 1, r1 2637 bne/u r1, r63, tr2 2638LOCAL(ct_call_func): /* Just branch to the function. */ 2639 blink tr0, r63 2640LOCAL(ct_ret_wide): /* Call the function, so that we can unpack its 2641 64-bit return value. */ 2642 add.l r18, r63, r10 2643 blink tr0, r18 2644 ptabs r10, tr0 2645#if __LITTLE_ENDIAN__ 2646 shari r2, 32, r3 2647 add.l r2, r63, r2 2648#else 2649 add.l r2, r63, r3 2650 shari r2, 32, r2 2651#endif 2652 blink tr0, r63 2653 2654 ENDFUNC(GLOBAL(GCC_shcompact_call_trampoline)) 2655#endif /* L_shcompact_call_trampoline */ 2656 2657#ifdef L_shcompact_return_trampoline 2658 /* This function does the converse of the code in `ret_wide' 2659 above. It is tail-called by SHcompact functions returning 2660 64-bit non-floating-point values, to pack the 32-bit values in 2661 r2 and r3 into r2. */ 2662 2663 .mode SHmedia 2664 .section .text..SHmedia32, "ax" 2665 .align 2 2666 .global GLOBAL(GCC_shcompact_return_trampoline) 2667 HIDDEN_FUNC(GLOBAL(GCC_shcompact_return_trampoline)) 2668GLOBAL(GCC_shcompact_return_trampoline): 2669 ptabs/l r18, tr0 2670#if __LITTLE_ENDIAN__ 2671 addz.l r2, r63, r2 2672 shlli r3, 32, r3 2673#else 2674 addz.l r3, r63, r3 2675 shlli r2, 32, r2 2676#endif 2677 or r3, r2, r2 2678 blink tr0, r63 2679 2680 ENDFUNC(GLOBAL(GCC_shcompact_return_trampoline)) 2681#endif /* L_shcompact_return_trampoline */ 2682 2683#ifdef L_shcompact_incoming_args 2684 .section .rodata 2685 .align 1 2686LOCAL(ia_main_table): 2687.word 1 /* Invalid, just loop */ 2688.word LOCAL(ia_r2_ld) - datalabel LOCAL(ia_main_label) 2689.word LOCAL(ia_r2_push) - datalabel LOCAL(ia_main_label) 2690.word 1 /* Invalid, just loop */ 2691.word LOCAL(ia_r3_ld) - datalabel LOCAL(ia_main_label) 2692.word LOCAL(ia_r3_push) - datalabel LOCAL(ia_main_label) 2693.word 1 /* Invalid, just loop */ 2694.word LOCAL(ia_r4_ld) - datalabel LOCAL(ia_main_label) 2695.word LOCAL(ia_r4_push) - datalabel LOCAL(ia_main_label) 2696.word 1 /* Invalid, just loop */ 2697.word LOCAL(ia_r5_ld) - datalabel LOCAL(ia_main_label) 2698.word LOCAL(ia_r5_push) - datalabel LOCAL(ia_main_label) 2699.word 1 /* Invalid, just loop */ 2700.word 1 /* Invalid, just loop */ 2701.word LOCAL(ia_r6_ld) - datalabel LOCAL(ia_main_label) 2702.word LOCAL(ia_r6_push) - datalabel LOCAL(ia_main_label) 2703.word 1 /* Invalid, just loop */ 2704.word 1 /* Invalid, just loop */ 2705.word LOCAL(ia_r7_ld) - datalabel LOCAL(ia_main_label) 2706.word LOCAL(ia_r7_push) - datalabel LOCAL(ia_main_label) 2707.word 1 /* Invalid, just loop */ 2708.word 1 /* Invalid, just loop */ 2709.word LOCAL(ia_r8_ld) - datalabel LOCAL(ia_main_label) 2710.word LOCAL(ia_r8_push) - datalabel LOCAL(ia_main_label) 2711.word 1 /* Invalid, just loop */ 2712.word 1 /* Invalid, just loop */ 2713.word LOCAL(ia_r9_ld) - datalabel LOCAL(ia_main_label) 2714.word LOCAL(ia_r9_push) - datalabel LOCAL(ia_main_label) 2715.word LOCAL(ia_push_seq) - datalabel LOCAL(ia_main_label) 2716.word LOCAL(ia_push_seq) - datalabel LOCAL(ia_main_label) 2717.word LOCAL(ia_r9_push) - datalabel LOCAL(ia_main_label) 2718.word LOCAL(ia_return) - datalabel LOCAL(ia_main_label) 2719.word LOCAL(ia_return) - datalabel LOCAL(ia_main_label) 2720 .mode SHmedia 2721 .section .text..SHmedia32, "ax" 2722 .align 2 2723 2724 /* This function stores 64-bit general-purpose registers back in 2725 the stack, and loads the address in which each register 2726 was stored into itself. The lower 32 bits of r17 hold the address 2727 to begin storing, and the upper 32 bits of r17 hold the cookie. 2728 Its execution time is linear on the 2729 number of registers that actually have to be copied, and it is 2730 optimized for structures larger than 64 bits, as opposed to 2731 individual `long long' arguments. See sh.h for details on the 2732 actual bit pattern. */ 2733 2734 .global GLOBAL(GCC_shcompact_incoming_args) 2735 FUNC(GLOBAL(GCC_shcompact_incoming_args)) 2736GLOBAL(GCC_shcompact_incoming_args): 2737 ptabs/l r18, tr0 /* Prepare to return. */ 2738 shlri r17, 32, r0 /* Load the cookie. */ 2739 movi ((datalabel LOCAL(ia_main_table) - 31 * 2) >> 16) & 65535, r43 2740 pt/l LOCAL(ia_loop), tr1 2741 add.l r17, r63, r17 2742 shori ((datalabel LOCAL(ia_main_table) - 31 * 2)) & 65535, r43 2743LOCAL(ia_loop): 2744 nsb r0, r36 2745 shlli r36, 1, r37 2746 ldx.w r43, r37, r38 2747LOCAL(ia_main_label): 2748 ptrel/l r38, tr2 2749 blink tr2, r63 2750LOCAL(ia_r2_ld): /* Store r2 and load its address. */ 2751 movi 3, r38 2752 shlli r38, 29, r39 2753 and r0, r39, r40 2754 andc r0, r39, r0 2755 stx.q r17, r63, r2 2756 add.l r17, r63, r2 2757 addi.l r17, 8, r17 2758 beq/u r39, r40, tr1 2759LOCAL(ia_r3_ld): /* Store r3 and load its address. */ 2760 movi 3, r38 2761 shlli r38, 26, r39 2762 and r0, r39, r40 2763 andc r0, r39, r0 2764 stx.q r17, r63, r3 2765 add.l r17, r63, r3 2766 addi.l r17, 8, r17 2767 beq/u r39, r40, tr1 2768LOCAL(ia_r4_ld): /* Store r4 and load its address. */ 2769 movi 3, r38 2770 shlli r38, 23, r39 2771 and r0, r39, r40 2772 andc r0, r39, r0 2773 stx.q r17, r63, r4 2774 add.l r17, r63, r4 2775 addi.l r17, 8, r17 2776 beq/u r39, r40, tr1 2777LOCAL(ia_r5_ld): /* Store r5 and load its address. */ 2778 movi 3, r38 2779 shlli r38, 20, r39 2780 and r0, r39, r40 2781 andc r0, r39, r0 2782 stx.q r17, r63, r5 2783 add.l r17, r63, r5 2784 addi.l r17, 8, r17 2785 beq/u r39, r40, tr1 2786LOCAL(ia_r6_ld): /* Store r6 and load its address. */ 2787 movi 3, r38 2788 shlli r38, 16, r39 2789 and r0, r39, r40 2790 andc r0, r39, r0 2791 stx.q r17, r63, r6 2792 add.l r17, r63, r6 2793 addi.l r17, 8, r17 2794 beq/u r39, r40, tr1 2795LOCAL(ia_r7_ld): /* Store r7 and load its address. */ 2796 movi 3 << 12, r39 2797 and r0, r39, r40 2798 andc r0, r39, r0 2799 stx.q r17, r63, r7 2800 add.l r17, r63, r7 2801 addi.l r17, 8, r17 2802 beq/u r39, r40, tr1 2803LOCAL(ia_r8_ld): /* Store r8 and load its address. */ 2804 movi 3 << 8, r39 2805 and r0, r39, r40 2806 andc r0, r39, r0 2807 stx.q r17, r63, r8 2808 add.l r17, r63, r8 2809 addi.l r17, 8, r17 2810 beq/u r39, r40, tr1 2811LOCAL(ia_r9_ld): /* Store r9 and load its address. */ 2812 stx.q r17, r63, r9 2813 add.l r17, r63, r9 2814 blink tr0, r63 2815LOCAL(ia_r2_push): /* Push r2 onto the stack. */ 2816 movi 1, r38 2817 shlli r38, 29, r39 2818 andc r0, r39, r0 2819 stx.q r17, r63, r2 2820 addi.l r17, 8, r17 2821 blink tr1, r63 2822LOCAL(ia_r3_push): /* Push r3 onto the stack. */ 2823 movi 1, r38 2824 shlli r38, 26, r39 2825 andc r0, r39, r0 2826 stx.q r17, r63, r3 2827 addi.l r17, 8, r17 2828 blink tr1, r63 2829LOCAL(ia_r4_push): /* Push r4 onto the stack. */ 2830 movi 1, r38 2831 shlli r38, 23, r39 2832 andc r0, r39, r0 2833 stx.q r17, r63, r4 2834 addi.l r17, 8, r17 2835 blink tr1, r63 2836LOCAL(ia_r5_push): /* Push r5 onto the stack. */ 2837 movi 1, r38 2838 shlli r38, 20, r39 2839 andc r0, r39, r0 2840 stx.q r17, r63, r5 2841 addi.l r17, 8, r17 2842 blink tr1, r63 2843LOCAL(ia_r6_push): /* Push r6 onto the stack. */ 2844 movi 1, r38 2845 shlli r38, 16, r39 2846 andc r0, r39, r0 2847 stx.q r17, r63, r6 2848 addi.l r17, 8, r17 2849 blink tr1, r63 2850LOCAL(ia_r7_push): /* Push r7 onto the stack. */ 2851 movi 1 << 12, r39 2852 andc r0, r39, r0 2853 stx.q r17, r63, r7 2854 addi.l r17, 8, r17 2855 blink tr1, r63 2856LOCAL(ia_r8_push): /* Push r8 onto the stack. */ 2857 movi 1 << 8, r39 2858 andc r0, r39, r0 2859 stx.q r17, r63, r8 2860 addi.l r17, 8, r17 2861 blink tr1, r63 2862LOCAL(ia_push_seq): /* Push a sequence of registers onto the stack. */ 2863 andi r0, 7 << 1, r38 2864 movi (LOCAL(ia_end_of_push_seq) >> 16) & 65535, r40 2865 shlli r38, 2, r39 2866 shori LOCAL(ia_end_of_push_seq) & 65535, r40 2867 sub.l r40, r39, r41 2868 ptabs/l r41, tr2 2869 blink tr2, r63 2870LOCAL(ia_stack_of_push_seq): /* Beginning of push sequence. */ 2871 stx.q r17, r63, r3 2872 addi.l r17, 8, r17 2873 stx.q r17, r63, r4 2874 addi.l r17, 8, r17 2875 stx.q r17, r63, r5 2876 addi.l r17, 8, r17 2877 stx.q r17, r63, r6 2878 addi.l r17, 8, r17 2879 stx.q r17, r63, r7 2880 addi.l r17, 8, r17 2881 stx.q r17, r63, r8 2882 addi.l r17, 8, r17 2883LOCAL(ia_r9_push): /* Push r9 onto the stack. */ 2884 stx.q r17, r63, r9 2885LOCAL(ia_return): /* Return. */ 2886 blink tr0, r63 2887LOCAL(ia_end_of_push_seq): /* Label used to compute the first push instruction. */ 2888 ENDFUNC(GLOBAL(GCC_shcompact_incoming_args)) 2889#endif /* L_shcompact_incoming_args */ 2890#endif 2891#if __SH5__ 2892#ifdef L_nested_trampoline 2893#if __SH5__ == 32 2894 .section .text..SHmedia32,"ax" 2895#else 2896 .text 2897#endif 2898 .align 3 /* It is copied in units of 8 bytes in SHmedia mode. */ 2899 .global GLOBAL(GCC_nested_trampoline) 2900 HIDDEN_FUNC(GLOBAL(GCC_nested_trampoline)) 2901GLOBAL(GCC_nested_trampoline): 2902 .mode SHmedia 2903 ptrel/u r63, tr0 2904 gettr tr0, r0 2905#if __SH5__ == 64 2906 ld.q r0, 24, r1 2907#else 2908 ld.l r0, 24, r1 2909#endif 2910 ptabs/l r1, tr1 2911#if __SH5__ == 64 2912 ld.q r0, 32, r1 2913#else 2914 ld.l r0, 28, r1 2915#endif 2916 blink tr1, r63 2917 2918 ENDFUNC(GLOBAL(GCC_nested_trampoline)) 2919#endif /* L_nested_trampoline */ 2920#endif /* __SH5__ */ 2921#if __SH5__ == 32 2922#ifdef L_push_pop_shmedia_regs 2923 .section .text..SHmedia32,"ax" 2924 .mode SHmedia 2925 .align 2 2926#ifndef __SH4_NOFPU__ 2927 .global GLOBAL(GCC_push_shmedia_regs) 2928 FUNC(GLOBAL(GCC_push_shmedia_regs)) 2929GLOBAL(GCC_push_shmedia_regs): 2930 addi.l r15, -14*8, r15 2931 fst.d r15, 13*8, dr62 2932 fst.d r15, 12*8, dr60 2933 fst.d r15, 11*8, dr58 2934 fst.d r15, 10*8, dr56 2935 fst.d r15, 9*8, dr54 2936 fst.d r15, 8*8, dr52 2937 fst.d r15, 7*8, dr50 2938 fst.d r15, 6*8, dr48 2939 fst.d r15, 5*8, dr46 2940 fst.d r15, 4*8, dr44 2941 fst.d r15, 3*8, dr42 2942 fst.d r15, 2*8, dr40 2943 fst.d r15, 1*8, dr38 2944 fst.d r15, 0*8, dr36 2945#else /* ! __SH4_NOFPU__ */ 2946 .global GLOBAL(GCC_push_shmedia_regs_nofpu) 2947 FUNC(GLOBAL(GCC_push_shmedia_regs_nofpu)) 2948GLOBAL(GCC_push_shmedia_regs_nofpu): 2949#endif /* ! __SH4_NOFPU__ */ 2950 ptabs/l r18, tr0 2951 addi.l r15, -27*8, r15 2952 gettr tr7, r62 2953 gettr tr6, r61 2954 gettr tr5, r60 2955 st.q r15, 26*8, r62 2956 st.q r15, 25*8, r61 2957 st.q r15, 24*8, r60 2958 st.q r15, 23*8, r59 2959 st.q r15, 22*8, r58 2960 st.q r15, 21*8, r57 2961 st.q r15, 20*8, r56 2962 st.q r15, 19*8, r55 2963 st.q r15, 18*8, r54 2964 st.q r15, 17*8, r53 2965 st.q r15, 16*8, r52 2966 st.q r15, 15*8, r51 2967 st.q r15, 14*8, r50 2968 st.q r15, 13*8, r49 2969 st.q r15, 12*8, r48 2970 st.q r15, 11*8, r47 2971 st.q r15, 10*8, r46 2972 st.q r15, 9*8, r45 2973 st.q r15, 8*8, r44 2974 st.q r15, 7*8, r35 2975 st.q r15, 6*8, r34 2976 st.q r15, 5*8, r33 2977 st.q r15, 4*8, r32 2978 st.q r15, 3*8, r31 2979 st.q r15, 2*8, r30 2980 st.q r15, 1*8, r29 2981 st.q r15, 0*8, r28 2982 blink tr0, r63 2983#ifndef __SH4_NOFPU__ 2984 ENDFUNC(GLOBAL(GCC_push_shmedia_regs)) 2985#else 2986 ENDFUNC(GLOBAL(GCC_push_shmedia_regs_nofpu)) 2987#endif 2988#ifndef __SH4_NOFPU__ 2989 .global GLOBAL(GCC_pop_shmedia_regs) 2990 FUNC(GLOBAL(GCC_pop_shmedia_regs)) 2991GLOBAL(GCC_pop_shmedia_regs): 2992 pt .L0, tr1 2993 movi 41*8, r0 2994 fld.d r15, 40*8, dr62 2995 fld.d r15, 39*8, dr60 2996 fld.d r15, 38*8, dr58 2997 fld.d r15, 37*8, dr56 2998 fld.d r15, 36*8, dr54 2999 fld.d r15, 35*8, dr52 3000 fld.d r15, 34*8, dr50 3001 fld.d r15, 33*8, dr48 3002 fld.d r15, 32*8, dr46 3003 fld.d r15, 31*8, dr44 3004 fld.d r15, 30*8, dr42 3005 fld.d r15, 29*8, dr40 3006 fld.d r15, 28*8, dr38 3007 fld.d r15, 27*8, dr36 3008 blink tr1, r63 3009#else /* ! __SH4_NOFPU__ */ 3010 .global GLOBAL(GCC_pop_shmedia_regs_nofpu) 3011 FUNC(GLOBAL(GCC_pop_shmedia_regs_nofpu)) 3012GLOBAL(GCC_pop_shmedia_regs_nofpu): 3013#endif /* ! __SH4_NOFPU__ */ 3014 movi 27*8, r0 3015.L0: 3016 ptabs r18, tr0 3017 ld.q r15, 26*8, r62 3018 ld.q r15, 25*8, r61 3019 ld.q r15, 24*8, r60 3020 ptabs r62, tr7 3021 ptabs r61, tr6 3022 ptabs r60, tr5 3023 ld.q r15, 23*8, r59 3024 ld.q r15, 22*8, r58 3025 ld.q r15, 21*8, r57 3026 ld.q r15, 20*8, r56 3027 ld.q r15, 19*8, r55 3028 ld.q r15, 18*8, r54 3029 ld.q r15, 17*8, r53 3030 ld.q r15, 16*8, r52 3031 ld.q r15, 15*8, r51 3032 ld.q r15, 14*8, r50 3033 ld.q r15, 13*8, r49 3034 ld.q r15, 12*8, r48 3035 ld.q r15, 11*8, r47 3036 ld.q r15, 10*8, r46 3037 ld.q r15, 9*8, r45 3038 ld.q r15, 8*8, r44 3039 ld.q r15, 7*8, r35 3040 ld.q r15, 6*8, r34 3041 ld.q r15, 5*8, r33 3042 ld.q r15, 4*8, r32 3043 ld.q r15, 3*8, r31 3044 ld.q r15, 2*8, r30 3045 ld.q r15, 1*8, r29 3046 ld.q r15, 0*8, r28 3047 add.l r15, r0, r15 3048 blink tr0, r63 3049 3050#ifndef __SH4_NOFPU__ 3051 ENDFUNC(GLOBAL(GCC_pop_shmedia_regs)) 3052#else 3053 ENDFUNC(GLOBAL(GCC_pop_shmedia_regs_nofpu)) 3054#endif 3055#endif /* __SH5__ == 32 */ 3056#endif /* L_push_pop_shmedia_regs */ 3057 3058#ifdef L_div_table 3059#if __SH5__ 3060#if defined(__pic__) && defined(__SHMEDIA__) 3061 .global GLOBAL(sdivsi3) 3062 FUNC(GLOBAL(sdivsi3)) 3063#if __SH5__ == 32 3064 .section .text..SHmedia32,"ax" 3065#else 3066 .text 3067#endif 3068#if 0 3069/* ??? FIXME: Presumably due to a linker bug, exporting data symbols 3070 in a text section does not work (at least for shared libraries): 3071 the linker sets the LSB of the address as if this was SHmedia code. */ 3072#define TEXT_DATA_BUG 3073#endif 3074 .align 2 3075 // inputs: r4,r5 3076 // clobbered: r1,r18,r19,r20,r21,r25,tr0 3077 // result in r0 3078 .global GLOBAL(sdivsi3) 3079GLOBAL(sdivsi3): 3080#ifdef TEXT_DATA_BUG 3081 ptb datalabel Local_div_table,tr0 3082#else 3083 ptb GLOBAL(div_table_internal),tr0 3084#endif 3085 nsb r5, r1 3086 shlld r5, r1, r25 // normalize; [-2 ..1, 1..2) in s2.62 3087 shari r25, 58, r21 // extract 5(6) bit index (s2.4 with hole -1..1) 3088 /* bubble */ 3089 gettr tr0,r20 3090 ldx.ub r20, r21, r19 // u0.8 3091 shari r25, 32, r25 // normalize to s2.30 3092 shlli r21, 1, r21 3093 muls.l r25, r19, r19 // s2.38 3094 ldx.w r20, r21, r21 // s2.14 3095 ptabs r18, tr0 3096 shari r19, 24, r19 // truncate to s2.14 3097 sub r21, r19, r19 // some 11 bit inverse in s1.14 3098 muls.l r19, r19, r21 // u0.28 3099 sub r63, r1, r1 3100 addi r1, 92, r1 3101 muls.l r25, r21, r18 // s2.58 3102 shlli r19, 45, r19 // multiply by two and convert to s2.58 3103 /* bubble */ 3104 sub r19, r18, r18 3105 shari r18, 28, r18 // some 22 bit inverse in s1.30 3106 muls.l r18, r25, r0 // s2.60 3107 muls.l r18, r4, r25 // s32.30 3108 /* bubble */ 3109 shari r0, 16, r19 // s-16.44 3110 muls.l r19, r18, r19 // s-16.74 3111 shari r25, 63, r0 3112 shari r4, 14, r18 // s19.-14 3113 shari r19, 30, r19 // s-16.44 3114 muls.l r19, r18, r19 // s15.30 3115 xor r21, r0, r21 // You could also use the constant 1 << 27. 3116 add r21, r25, r21 3117 sub r21, r19, r21 3118 shard r21, r1, r21 3119 sub r21, r0, r0 3120 blink tr0, r63 3121 ENDFUNC(GLOBAL(sdivsi3)) 3122/* This table has been generated by divtab.c . 3123Defects for bias -330: 3124 Max defect: 6.081536e-07 at -1.000000e+00 3125 Min defect: 2.849516e-08 at 1.030651e+00 3126 Max 2nd step defect: 9.606539e-12 at -1.000000e+00 3127 Min 2nd step defect: 0.000000e+00 at 0.000000e+00 3128 Defect at 1: 1.238659e-07 3129 Defect at -2: 1.061708e-07 */ 3130#else /* ! __pic__ || ! __SHMEDIA__ */ 3131 .section .rodata 3132#endif /* __pic__ */ 3133#if defined(TEXT_DATA_BUG) && defined(__pic__) && defined(__SHMEDIA__) 3134 .balign 2 3135 .type Local_div_table,@object 3136 .size Local_div_table,128 3137/* negative division constants */ 3138 .word -16638 3139 .word -17135 3140 .word -17737 3141 .word -18433 3142 .word -19103 3143 .word -19751 3144 .word -20583 3145 .word -21383 3146 .word -22343 3147 .word -23353 3148 .word -24407 3149 .word -25582 3150 .word -26863 3151 .word -28382 3152 .word -29965 3153 .word -31800 3154/* negative division factors */ 3155 .byte 66 3156 .byte 70 3157 .byte 75 3158 .byte 81 3159 .byte 87 3160 .byte 93 3161 .byte 101 3162 .byte 109 3163 .byte 119 3164 .byte 130 3165 .byte 142 3166 .byte 156 3167 .byte 172 3168 .byte 192 3169 .byte 214 3170 .byte 241 3171 .skip 16 3172Local_div_table: 3173 .skip 16 3174/* positive division factors */ 3175 .byte 241 3176 .byte 214 3177 .byte 192 3178 .byte 172 3179 .byte 156 3180 .byte 142 3181 .byte 130 3182 .byte 119 3183 .byte 109 3184 .byte 101 3185 .byte 93 3186 .byte 87 3187 .byte 81 3188 .byte 75 3189 .byte 70 3190 .byte 66 3191/* positive division constants */ 3192 .word 31801 3193 .word 29966 3194 .word 28383 3195 .word 26864 3196 .word 25583 3197 .word 24408 3198 .word 23354 3199 .word 22344 3200 .word 21384 3201 .word 20584 3202 .word 19752 3203 .word 19104 3204 .word 18434 3205 .word 17738 3206 .word 17136 3207 .word 16639 3208 .section .rodata 3209#endif /* TEXT_DATA_BUG */ 3210 .balign 2 3211 .type GLOBAL(div_table),@object 3212 .size GLOBAL(div_table),128 3213/* negative division constants */ 3214 .word -16638 3215 .word -17135 3216 .word -17737 3217 .word -18433 3218 .word -19103 3219 .word -19751 3220 .word -20583 3221 .word -21383 3222 .word -22343 3223 .word -23353 3224 .word -24407 3225 .word -25582 3226 .word -26863 3227 .word -28382 3228 .word -29965 3229 .word -31800 3230/* negative division factors */ 3231 .byte 66 3232 .byte 70 3233 .byte 75 3234 .byte 81 3235 .byte 87 3236 .byte 93 3237 .byte 101 3238 .byte 109 3239 .byte 119 3240 .byte 130 3241 .byte 142 3242 .byte 156 3243 .byte 172 3244 .byte 192 3245 .byte 214 3246 .byte 241 3247 .skip 16 3248 .global GLOBAL(div_table) 3249GLOBAL(div_table): 3250 HIDDEN_ALIAS(div_table_internal,div_table) 3251 .skip 16 3252/* positive division factors */ 3253 .byte 241 3254 .byte 214 3255 .byte 192 3256 .byte 172 3257 .byte 156 3258 .byte 142 3259 .byte 130 3260 .byte 119 3261 .byte 109 3262 .byte 101 3263 .byte 93 3264 .byte 87 3265 .byte 81 3266 .byte 75 3267 .byte 70 3268 .byte 66 3269/* positive division constants */ 3270 .word 31801 3271 .word 29966 3272 .word 28383 3273 .word 26864 3274 .word 25583 3275 .word 24408 3276 .word 23354 3277 .word 22344 3278 .word 21384 3279 .word 20584 3280 .word 19752 3281 .word 19104 3282 .word 18434 3283 .word 17738 3284 .word 17136 3285 .word 16639 3286 3287#elif defined (__SH2A__) || defined (__SH3__) || defined (__SH3E__) || defined (__SH4__) || defined (__SH4_SINGLE__) || defined (__SH4_SINGLE_ONLY__) || defined (__SH4_NOFPU__) 3288/* This code uses shld, thus is not suitable for SH1 / SH2. */ 3289 3290/* Signed / unsigned division without use of FPU, optimized for SH4. 3291 Uses a lookup table for divisors in the range -128 .. +128, and 3292 div1 with case distinction for larger divisors in three more ranges. 3293 The code is lumped together with the table to allow the use of mova. */ 3294#ifdef __LITTLE_ENDIAN__ 3295#define L_LSB 0 3296#define L_LSWMSB 1 3297#define L_MSWLSB 2 3298#else 3299#define L_LSB 3 3300#define L_LSWMSB 2 3301#define L_MSWLSB 1 3302#endif 3303 3304 .balign 4 3305 .global GLOBAL(udivsi3_i4i) 3306 FUNC(GLOBAL(udivsi3_i4i)) 3307GLOBAL(udivsi3_i4i): 3308 mov.w LOCAL(c128_w), r1 3309 div0u 3310 mov r4,r0 3311 shlr8 r0 3312 cmp/hi r1,r5 3313 extu.w r5,r1 3314 bf LOCAL(udiv_le128) 3315 cmp/eq r5,r1 3316 bf LOCAL(udiv_ge64k) 3317 shlr r0 3318 mov r5,r1 3319 shll16 r5 3320 mov.l r4,@-r15 3321 div1 r5,r0 3322 mov.l r1,@-r15 3323 div1 r5,r0 3324 div1 r5,r0 3325 bra LOCAL(udiv_25) 3326 div1 r5,r0 3327 3328LOCAL(div_le128): 3329 mova LOCAL(div_table_ix),r0 3330 bra LOCAL(div_le128_2) 3331 mov.b @(r0,r5),r1 3332LOCAL(udiv_le128): 3333 mov.l r4,@-r15 3334 mova LOCAL(div_table_ix),r0 3335 mov.b @(r0,r5),r1 3336 mov.l r5,@-r15 3337LOCAL(div_le128_2): 3338 mova LOCAL(div_table_inv),r0 3339 mov.l @(r0,r1),r1 3340 mov r5,r0 3341 tst #0xfe,r0 3342 mova LOCAL(div_table_clz),r0 3343 dmulu.l r1,r4 3344 mov.b @(r0,r5),r1 3345 bt/s LOCAL(div_by_1) 3346 mov r4,r0 3347 mov.l @r15+,r5 3348 sts mach,r0 3349 /* clrt */ 3350 addc r4,r0 3351 mov.l @r15+,r4 3352 rotcr r0 3353 rts 3354 shld r1,r0 3355 3356LOCAL(div_by_1_neg): 3357 neg r4,r0 3358LOCAL(div_by_1): 3359 mov.l @r15+,r5 3360 rts 3361 mov.l @r15+,r4 3362 3363LOCAL(div_ge64k): 3364 bt/s LOCAL(div_r8) 3365 div0u 3366 shll8 r5 3367 bra LOCAL(div_ge64k_2) 3368 div1 r5,r0 3369LOCAL(udiv_ge64k): 3370 cmp/hi r0,r5 3371 mov r5,r1 3372 bt LOCAL(udiv_r8) 3373 shll8 r5 3374 mov.l r4,@-r15 3375 div1 r5,r0 3376 mov.l r1,@-r15 3377LOCAL(div_ge64k_2): 3378 div1 r5,r0 3379 mov.l LOCAL(zero_l),r1 3380 .rept 4 3381 div1 r5,r0 3382 .endr 3383 mov.l r1,@-r15 3384 div1 r5,r0 3385 mov.w LOCAL(m256_w),r1 3386 div1 r5,r0 3387 mov.b r0,@(L_LSWMSB,r15) 3388 xor r4,r0 3389 and r1,r0 3390 bra LOCAL(div_ge64k_end) 3391 xor r4,r0 3392 3393LOCAL(div_r8): 3394 shll16 r4 3395 bra LOCAL(div_r8_2) 3396 shll8 r4 3397LOCAL(udiv_r8): 3398 mov.l r4,@-r15 3399 shll16 r4 3400 clrt 3401 shll8 r4 3402 mov.l r5,@-r15 3403LOCAL(div_r8_2): 3404 rotcl r4 3405 mov r0,r1 3406 div1 r5,r1 3407 mov r4,r0 3408 rotcl r0 3409 mov r5,r4 3410 div1 r5,r1 3411 .rept 5 3412 rotcl r0; div1 r5,r1 3413 .endr 3414 rotcl r0 3415 mov.l @r15+,r5 3416 div1 r4,r1 3417 mov.l @r15+,r4 3418 rts 3419 rotcl r0 3420 3421 ENDFUNC(GLOBAL(udivsi3_i4i)) 3422 3423 .global GLOBAL(sdivsi3_i4i) 3424 FUNC(GLOBAL(sdivsi3_i4i)) 3425 /* This is link-compatible with a GLOBAL(sdivsi3) call, 3426 but we effectively clobber only r1. */ 3427GLOBAL(sdivsi3_i4i): 3428 mov.l r4,@-r15 3429 cmp/pz r5 3430 mov.w LOCAL(c128_w), r1 3431 bt/s LOCAL(pos_divisor) 3432 cmp/pz r4 3433 mov.l r5,@-r15 3434 neg r5,r5 3435 bt/s LOCAL(neg_result) 3436 cmp/hi r1,r5 3437 neg r4,r4 3438LOCAL(pos_result): 3439 extu.w r5,r0 3440 bf LOCAL(div_le128) 3441 cmp/eq r5,r0 3442 mov r4,r0 3443 shlr8 r0 3444 bf/s LOCAL(div_ge64k) 3445 cmp/hi r0,r5 3446 div0u 3447 shll16 r5 3448 div1 r5,r0 3449 div1 r5,r0 3450 div1 r5,r0 3451LOCAL(udiv_25): 3452 mov.l LOCAL(zero_l),r1 3453 div1 r5,r0 3454 div1 r5,r0 3455 mov.l r1,@-r15 3456 .rept 3 3457 div1 r5,r0 3458 .endr 3459 mov.b r0,@(L_MSWLSB,r15) 3460 xtrct r4,r0 3461 swap.w r0,r0 3462 .rept 8 3463 div1 r5,r0 3464 .endr 3465 mov.b r0,@(L_LSWMSB,r15) 3466LOCAL(div_ge64k_end): 3467 .rept 8 3468 div1 r5,r0 3469 .endr 3470 mov.l @r15+,r4 ! zero-extension and swap using LS unit. 3471 extu.b r0,r0 3472 mov.l @r15+,r5 3473 or r4,r0 3474 mov.l @r15+,r4 3475 rts 3476 rotcl r0 3477 3478LOCAL(div_le128_neg): 3479 tst #0xfe,r0 3480 mova LOCAL(div_table_ix),r0 3481 mov.b @(r0,r5),r1 3482 mova LOCAL(div_table_inv),r0 3483 bt/s LOCAL(div_by_1_neg) 3484 mov.l @(r0,r1),r1 3485 mova LOCAL(div_table_clz),r0 3486 dmulu.l r1,r4 3487 mov.b @(r0,r5),r1 3488 mov.l @r15+,r5 3489 sts mach,r0 3490 /* clrt */ 3491 addc r4,r0 3492 mov.l @r15+,r4 3493 rotcr r0 3494 shld r1,r0 3495 rts 3496 neg r0,r0 3497 3498LOCAL(pos_divisor): 3499 mov.l r5,@-r15 3500 bt/s LOCAL(pos_result) 3501 cmp/hi r1,r5 3502 neg r4,r4 3503LOCAL(neg_result): 3504 extu.w r5,r0 3505 bf LOCAL(div_le128_neg) 3506 cmp/eq r5,r0 3507 mov r4,r0 3508 shlr8 r0 3509 bf/s LOCAL(div_ge64k_neg) 3510 cmp/hi r0,r5 3511 div0u 3512 mov.l LOCAL(zero_l),r1 3513 shll16 r5 3514 div1 r5,r0 3515 mov.l r1,@-r15 3516 .rept 7 3517 div1 r5,r0 3518 .endr 3519 mov.b r0,@(L_MSWLSB,r15) 3520 xtrct r4,r0 3521 swap.w r0,r0 3522 .rept 8 3523 div1 r5,r0 3524 .endr 3525 mov.b r0,@(L_LSWMSB,r15) 3526LOCAL(div_ge64k_neg_end): 3527 .rept 8 3528 div1 r5,r0 3529 .endr 3530 mov.l @r15+,r4 ! zero-extension and swap using LS unit. 3531 extu.b r0,r1 3532 mov.l @r15+,r5 3533 or r4,r1 3534LOCAL(div_r8_neg_end): 3535 mov.l @r15+,r4 3536 rotcl r1 3537 rts 3538 neg r1,r0 3539 3540LOCAL(div_ge64k_neg): 3541 bt/s LOCAL(div_r8_neg) 3542 div0u 3543 shll8 r5 3544 mov.l LOCAL(zero_l),r1 3545 .rept 6 3546 div1 r5,r0 3547 .endr 3548 mov.l r1,@-r15 3549 div1 r5,r0 3550 mov.w LOCAL(m256_w),r1 3551 div1 r5,r0 3552 mov.b r0,@(L_LSWMSB,r15) 3553 xor r4,r0 3554 and r1,r0 3555 bra LOCAL(div_ge64k_neg_end) 3556 xor r4,r0 3557 3558LOCAL(c128_w): 3559 .word 128 3560 3561LOCAL(div_r8_neg): 3562 clrt 3563 shll16 r4 3564 mov r4,r1 3565 shll8 r1 3566 mov r5,r4 3567 .rept 7 3568 rotcl r1; div1 r5,r0 3569 .endr 3570 mov.l @r15+,r5 3571 rotcl r1 3572 bra LOCAL(div_r8_neg_end) 3573 div1 r4,r0 3574 3575LOCAL(m256_w): 3576 .word 0xff00 3577/* This table has been generated by divtab-sh4.c. */ 3578 .balign 4 3579LOCAL(div_table_clz): 3580 .byte 0 3581 .byte 1 3582 .byte 0 3583 .byte -1 3584 .byte -1 3585 .byte -2 3586 .byte -2 3587 .byte -2 3588 .byte -2 3589 .byte -3 3590 .byte -3 3591 .byte -3 3592 .byte -3 3593 .byte -3 3594 .byte -3 3595 .byte -3 3596 .byte -3 3597 .byte -4 3598 .byte -4 3599 .byte -4 3600 .byte -4 3601 .byte -4 3602 .byte -4 3603 .byte -4 3604 .byte -4 3605 .byte -4 3606 .byte -4 3607 .byte -4 3608 .byte -4 3609 .byte -4 3610 .byte -4 3611 .byte -4 3612 .byte -4 3613 .byte -5 3614 .byte -5 3615 .byte -5 3616 .byte -5 3617 .byte -5 3618 .byte -5 3619 .byte -5 3620 .byte -5 3621 .byte -5 3622 .byte -5 3623 .byte -5 3624 .byte -5 3625 .byte -5 3626 .byte -5 3627 .byte -5 3628 .byte -5 3629 .byte -5 3630 .byte -5 3631 .byte -5 3632 .byte -5 3633 .byte -5 3634 .byte -5 3635 .byte -5 3636 .byte -5 3637 .byte -5 3638 .byte -5 3639 .byte -5 3640 .byte -5 3641 .byte -5 3642 .byte -5 3643 .byte -5 3644 .byte -5 3645 .byte -6 3646 .byte -6 3647 .byte -6 3648 .byte -6 3649 .byte -6 3650 .byte -6 3651 .byte -6 3652 .byte -6 3653 .byte -6 3654 .byte -6 3655 .byte -6 3656 .byte -6 3657 .byte -6 3658 .byte -6 3659 .byte -6 3660 .byte -6 3661 .byte -6 3662 .byte -6 3663 .byte -6 3664 .byte -6 3665 .byte -6 3666 .byte -6 3667 .byte -6 3668 .byte -6 3669 .byte -6 3670 .byte -6 3671 .byte -6 3672 .byte -6 3673 .byte -6 3674 .byte -6 3675 .byte -6 3676 .byte -6 3677 .byte -6 3678 .byte -6 3679 .byte -6 3680 .byte -6 3681 .byte -6 3682 .byte -6 3683 .byte -6 3684 .byte -6 3685 .byte -6 3686 .byte -6 3687 .byte -6 3688 .byte -6 3689 .byte -6 3690 .byte -6 3691 .byte -6 3692 .byte -6 3693 .byte -6 3694 .byte -6 3695 .byte -6 3696 .byte -6 3697 .byte -6 3698 .byte -6 3699 .byte -6 3700 .byte -6 3701 .byte -6 3702 .byte -6 3703 .byte -6 3704 .byte -6 3705 .byte -6 3706 .byte -6 3707 .byte -6 3708/* Lookup table translating positive divisor to index into table of 3709 normalized inverse. N.B. the '0' entry is also the last entry of the 3710 previous table, and causes an unaligned access for division by zero. */ 3711LOCAL(div_table_ix): 3712 .byte -6 3713 .byte -128 3714 .byte -128 3715 .byte 0 3716 .byte -128 3717 .byte -64 3718 .byte 0 3719 .byte 64 3720 .byte -128 3721 .byte -96 3722 .byte -64 3723 .byte -32 3724 .byte 0 3725 .byte 32 3726 .byte 64 3727 .byte 96 3728 .byte -128 3729 .byte -112 3730 .byte -96 3731 .byte -80 3732 .byte -64 3733 .byte -48 3734 .byte -32 3735 .byte -16 3736 .byte 0 3737 .byte 16 3738 .byte 32 3739 .byte 48 3740 .byte 64 3741 .byte 80 3742 .byte 96 3743 .byte 112 3744 .byte -128 3745 .byte -120 3746 .byte -112 3747 .byte -104 3748 .byte -96 3749 .byte -88 3750 .byte -80 3751 .byte -72 3752 .byte -64 3753 .byte -56 3754 .byte -48 3755 .byte -40 3756 .byte -32 3757 .byte -24 3758 .byte -16 3759 .byte -8 3760 .byte 0 3761 .byte 8 3762 .byte 16 3763 .byte 24 3764 .byte 32 3765 .byte 40 3766 .byte 48 3767 .byte 56 3768 .byte 64 3769 .byte 72 3770 .byte 80 3771 .byte 88 3772 .byte 96 3773 .byte 104 3774 .byte 112 3775 .byte 120 3776 .byte -128 3777 .byte -124 3778 .byte -120 3779 .byte -116 3780 .byte -112 3781 .byte -108 3782 .byte -104 3783 .byte -100 3784 .byte -96 3785 .byte -92 3786 .byte -88 3787 .byte -84 3788 .byte -80 3789 .byte -76 3790 .byte -72 3791 .byte -68 3792 .byte -64 3793 .byte -60 3794 .byte -56 3795 .byte -52 3796 .byte -48 3797 .byte -44 3798 .byte -40 3799 .byte -36 3800 .byte -32 3801 .byte -28 3802 .byte -24 3803 .byte -20 3804 .byte -16 3805 .byte -12 3806 .byte -8 3807 .byte -4 3808 .byte 0 3809 .byte 4 3810 .byte 8 3811 .byte 12 3812 .byte 16 3813 .byte 20 3814 .byte 24 3815 .byte 28 3816 .byte 32 3817 .byte 36 3818 .byte 40 3819 .byte 44 3820 .byte 48 3821 .byte 52 3822 .byte 56 3823 .byte 60 3824 .byte 64 3825 .byte 68 3826 .byte 72 3827 .byte 76 3828 .byte 80 3829 .byte 84 3830 .byte 88 3831 .byte 92 3832 .byte 96 3833 .byte 100 3834 .byte 104 3835 .byte 108 3836 .byte 112 3837 .byte 116 3838 .byte 120 3839 .byte 124 3840 .byte -128 3841/* 1/64 .. 1/127, normalized. There is an implicit leading 1 in bit 32. */ 3842 .balign 4 3843LOCAL(zero_l): 3844 .long 0x0 3845 .long 0xF81F81F9 3846 .long 0xF07C1F08 3847 .long 0xE9131AC0 3848 .long 0xE1E1E1E2 3849 .long 0xDAE6076C 3850 .long 0xD41D41D5 3851 .long 0xCD856891 3852 .long 0xC71C71C8 3853 .long 0xC0E07039 3854 .long 0xBACF914D 3855 .long 0xB4E81B4F 3856 .long 0xAF286BCB 3857 .long 0xA98EF607 3858 .long 0xA41A41A5 3859 .long 0x9EC8E952 3860 .long 0x9999999A 3861 .long 0x948B0FCE 3862 .long 0x8F9C18FA 3863 .long 0x8ACB90F7 3864 .long 0x86186187 3865 .long 0x81818182 3866 .long 0x7D05F418 3867 .long 0x78A4C818 3868 .long 0x745D1746 3869 .long 0x702E05C1 3870 .long 0x6C16C16D 3871 .long 0x68168169 3872 .long 0x642C8591 3873 .long 0x60581606 3874 .long 0x5C9882BA 3875 .long 0x58ED2309 3876LOCAL(div_table_inv): 3877 .long 0x55555556 3878 .long 0x51D07EAF 3879 .long 0x4E5E0A73 3880 .long 0x4AFD6A06 3881 .long 0x47AE147B 3882 .long 0x446F8657 3883 .long 0x41414142 3884 .long 0x3E22CBCF 3885 .long 0x3B13B13C 3886 .long 0x38138139 3887 .long 0x3521CFB3 3888 .long 0x323E34A3 3889 .long 0x2F684BDB 3890 .long 0x2C9FB4D9 3891 .long 0x29E4129F 3892 .long 0x27350B89 3893 .long 0x24924925 3894 .long 0x21FB7813 3895 .long 0x1F7047DD 3896 .long 0x1CF06ADB 3897 .long 0x1A7B9612 3898 .long 0x18118119 3899 .long 0x15B1E5F8 3900 .long 0x135C8114 3901 .long 0x11111112 3902 .long 0xECF56BF 3903 .long 0xC9714FC 3904 .long 0xA6810A7 3905 .long 0x8421085 3906 .long 0x624DD30 3907 .long 0x4104105 3908 .long 0x2040811 3909 /* maximum error: 0.987342 scaled: 0.921875*/ 3910 3911 ENDFUNC(GLOBAL(sdivsi3_i4i)) 3912#endif /* SH3 / SH4 */ 3913 3914#endif /* L_div_table */ 3915 3916#ifdef L_udiv_qrnnd_16 3917#if !__SHMEDIA__ 3918 HIDDEN_FUNC(GLOBAL(udiv_qrnnd_16)) 3919 /* r0: rn r1: qn */ /* r0: n1 r4: n0 r5: d r6: d1 */ /* r2: __m */ 3920 /* n1 < d, but n1 might be larger than d1. */ 3921 .global GLOBAL(udiv_qrnnd_16) 3922 .balign 8 3923GLOBAL(udiv_qrnnd_16): 3924 div0u 3925 cmp/hi r6,r0 3926 bt .Lots 3927 .rept 16 3928 div1 r6,r0 3929 .endr 3930 extu.w r0,r1 3931 bt 0f 3932 add r6,r0 39330: rotcl r1 3934 mulu.w r1,r5 3935 xtrct r4,r0 3936 swap.w r0,r0 3937 sts macl,r2 3938 cmp/hs r2,r0 3939 sub r2,r0 3940 bt 0f 3941 addc r5,r0 3942 add #-1,r1 3943 bt 0f 39441: add #-1,r1 3945 rts 3946 add r5,r0 3947 .balign 8 3948.Lots: 3949 sub r5,r0 3950 swap.w r4,r1 3951 xtrct r0,r1 3952 clrt 3953 mov r1,r0 3954 addc r5,r0 3955 mov #-1,r1 3956 SL1(bf, 1b, 3957 shlr16 r1) 39580: rts 3959 nop 3960 ENDFUNC(GLOBAL(udiv_qrnnd_16)) 3961#endif /* !__SHMEDIA__ */ 3962#endif /* L_udiv_qrnnd_16 */ 3963