1/* Copyright (C) 1994, 1995, 1997, 1998, 1999, 2000, 2001, 2002 2 Free Software Foundation, Inc. 3 4This file is free software; you can redistribute it and/or modify it 5under the terms of the GNU General Public License as published by the 6Free Software Foundation; either version 2, or (at your option) any 7later version. 8 9In addition to the permissions in the GNU General Public License, the 10Free Software Foundation gives you unlimited permission to link the 11compiled version of this file into combinations with other programs, 12and to distribute those combinations without any restriction coming 13from the use of this file. (The General Public License restrictions 14do apply in other respects; for example, they cover modification of 15the file, and distribution when not linked into a combine 16executable.) 17 18This file is distributed in the hope that it will be useful, but 19WITHOUT ANY WARRANTY; without even the implied warranty of 20MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 21General Public License for more details. 22 23You should have received a copy of the GNU General Public License 24along with this program; see the file COPYING. If not, write to 25the Free Software Foundation, 59 Temple Place - Suite 330, 26Boston, MA 02111-1307, USA. */ 27 28!! libgcc routines for the Hitachi / SuperH SH CPUs. 29!! Contributed by Steve Chamberlain. 30!! sac@cygnus.com 31 32!! ashiftrt_r4_x, ___ashrsi3, ___ashlsi3, ___lshrsi3 routines 33!! recoded in assembly by Toshiyasu Morita 34!! tm@netcom.com 35 36/* SH2 optimizations for ___ashrsi3, ___ashlsi3, ___lshrsi3 and 37 ELF local label prefixes by J"orn Rennecke 38 amylaar@cygnus.com */ 39 40#ifdef __ELF__ 41#define LOCAL(X) .L_##X 42#define FUNC(X,Y) .type X,Y; .hidden X 43#define ENDFUNC(X) .size X,.-X 44#else 45#define LOCAL(X) L_##X 46#define FUNC(X,Y) 47#define ENDFUNC(X) 48#endif 49 50#define CONCAT(A,B) A##B 51#define GLOBAL0(U,X) CONCAT(U,__##X) 52#define GLOBAL(X) GLOBAL0(__USER_LABEL_PREFIX__,X) 53 54#if defined __SH5__ && ! defined __SH4_NOFPU__ && ! defined (__LITTLE_ENDIAN__) 55#define FMOVD_WORKS 56#endif 57 58#if ! __SH5__ 59#ifdef L_ashiftrt 60 .global GLOBAL(ashiftrt_r4_0) 61 .global GLOBAL(ashiftrt_r4_1) 62 .global GLOBAL(ashiftrt_r4_2) 63 .global GLOBAL(ashiftrt_r4_3) 64 .global GLOBAL(ashiftrt_r4_4) 65 .global GLOBAL(ashiftrt_r4_5) 66 .global GLOBAL(ashiftrt_r4_6) 67 .global GLOBAL(ashiftrt_r4_7) 68 .global GLOBAL(ashiftrt_r4_8) 69 .global GLOBAL(ashiftrt_r4_9) 70 .global GLOBAL(ashiftrt_r4_10) 71 .global GLOBAL(ashiftrt_r4_11) 72 .global GLOBAL(ashiftrt_r4_12) 73 .global GLOBAL(ashiftrt_r4_13) 74 .global GLOBAL(ashiftrt_r4_14) 75 .global GLOBAL(ashiftrt_r4_15) 76 .global GLOBAL(ashiftrt_r4_16) 77 .global GLOBAL(ashiftrt_r4_17) 78 .global GLOBAL(ashiftrt_r4_18) 79 .global GLOBAL(ashiftrt_r4_19) 80 .global GLOBAL(ashiftrt_r4_20) 81 .global GLOBAL(ashiftrt_r4_21) 82 .global GLOBAL(ashiftrt_r4_22) 83 .global GLOBAL(ashiftrt_r4_23) 84 .global GLOBAL(ashiftrt_r4_24) 85 .global GLOBAL(ashiftrt_r4_25) 86 .global GLOBAL(ashiftrt_r4_26) 87 .global GLOBAL(ashiftrt_r4_27) 88 .global GLOBAL(ashiftrt_r4_28) 89 .global GLOBAL(ashiftrt_r4_29) 90 .global GLOBAL(ashiftrt_r4_30) 91 .global GLOBAL(ashiftrt_r4_31) 92 .global GLOBAL(ashiftrt_r4_32) 93 94 FUNC(GLOBAL(ashiftrt_r4_0),function) 95 FUNC(GLOBAL(ashiftrt_r4_1),function) 96 FUNC(GLOBAL(ashiftrt_r4_2),function) 97 FUNC(GLOBAL(ashiftrt_r4_3),function) 98 FUNC(GLOBAL(ashiftrt_r4_4),function) 99 FUNC(GLOBAL(ashiftrt_r4_5),function) 100 FUNC(GLOBAL(ashiftrt_r4_6),function) 101 FUNC(GLOBAL(ashiftrt_r4_7),function) 102 FUNC(GLOBAL(ashiftrt_r4_8),function) 103 FUNC(GLOBAL(ashiftrt_r4_9),function) 104 FUNC(GLOBAL(ashiftrt_r4_10),function) 105 FUNC(GLOBAL(ashiftrt_r4_11),function) 106 FUNC(GLOBAL(ashiftrt_r4_12),function) 107 FUNC(GLOBAL(ashiftrt_r4_13),function) 108 FUNC(GLOBAL(ashiftrt_r4_14),function) 109 FUNC(GLOBAL(ashiftrt_r4_15),function) 110 FUNC(GLOBAL(ashiftrt_r4_16),function) 111 FUNC(GLOBAL(ashiftrt_r4_17),function) 112 FUNC(GLOBAL(ashiftrt_r4_18),function) 113 FUNC(GLOBAL(ashiftrt_r4_19),function) 114 FUNC(GLOBAL(ashiftrt_r4_20),function) 115 FUNC(GLOBAL(ashiftrt_r4_21),function) 116 FUNC(GLOBAL(ashiftrt_r4_22),function) 117 FUNC(GLOBAL(ashiftrt_r4_23),function) 118 FUNC(GLOBAL(ashiftrt_r4_24),function) 119 FUNC(GLOBAL(ashiftrt_r4_25),function) 120 FUNC(GLOBAL(ashiftrt_r4_26),function) 121 FUNC(GLOBAL(ashiftrt_r4_27),function) 122 FUNC(GLOBAL(ashiftrt_r4_28),function) 123 FUNC(GLOBAL(ashiftrt_r4_29),function) 124 FUNC(GLOBAL(ashiftrt_r4_30),function) 125 FUNC(GLOBAL(ashiftrt_r4_31),function) 126 FUNC(GLOBAL(ashiftrt_r4_32),function) 127 128 .align 1 129GLOBAL(ashiftrt_r4_32): 130GLOBAL(ashiftrt_r4_31): 131 rotcl r4 132 rts 133 subc r4,r4 134 135GLOBAL(ashiftrt_r4_30): 136 shar r4 137GLOBAL(ashiftrt_r4_29): 138 shar r4 139GLOBAL(ashiftrt_r4_28): 140 shar r4 141GLOBAL(ashiftrt_r4_27): 142 shar r4 143GLOBAL(ashiftrt_r4_26): 144 shar r4 145GLOBAL(ashiftrt_r4_25): 146 shar r4 147GLOBAL(ashiftrt_r4_24): 148 shlr16 r4 149 shlr8 r4 150 rts 151 exts.b r4,r4 152 153GLOBAL(ashiftrt_r4_23): 154 shar r4 155GLOBAL(ashiftrt_r4_22): 156 shar r4 157GLOBAL(ashiftrt_r4_21): 158 shar r4 159GLOBAL(ashiftrt_r4_20): 160 shar r4 161GLOBAL(ashiftrt_r4_19): 162 shar r4 163GLOBAL(ashiftrt_r4_18): 164 shar r4 165GLOBAL(ashiftrt_r4_17): 166 shar r4 167GLOBAL(ashiftrt_r4_16): 168 shlr16 r4 169 rts 170 exts.w r4,r4 171 172GLOBAL(ashiftrt_r4_15): 173 shar r4 174GLOBAL(ashiftrt_r4_14): 175 shar r4 176GLOBAL(ashiftrt_r4_13): 177 shar r4 178GLOBAL(ashiftrt_r4_12): 179 shar r4 180GLOBAL(ashiftrt_r4_11): 181 shar r4 182GLOBAL(ashiftrt_r4_10): 183 shar r4 184GLOBAL(ashiftrt_r4_9): 185 shar r4 186GLOBAL(ashiftrt_r4_8): 187 shar r4 188GLOBAL(ashiftrt_r4_7): 189 shar r4 190GLOBAL(ashiftrt_r4_6): 191 shar r4 192GLOBAL(ashiftrt_r4_5): 193 shar r4 194GLOBAL(ashiftrt_r4_4): 195 shar r4 196GLOBAL(ashiftrt_r4_3): 197 shar r4 198GLOBAL(ashiftrt_r4_2): 199 shar r4 200GLOBAL(ashiftrt_r4_1): 201 rts 202 shar r4 203 204GLOBAL(ashiftrt_r4_0): 205 rts 206 nop 207#endif 208 209#ifdef L_ashiftrt_n 210 211! 212! GLOBAL(ashrsi3) 213! 214! Entry: 215! 216! r4: Value to shift 217! r5: Shifts 218! 219! Exit: 220! 221! r0: Result 222! 223! Destroys: 224! 225! (none) 226! 227 228 .global GLOBAL(ashrsi3) 229 FUNC(GLOBAL(ashrsi3),function) 230 .align 2 231GLOBAL(ashrsi3): 232 mov #31,r0 233 and r0,r5 234 mova LOCAL(ashrsi3_table),r0 235 mov.b @(r0,r5),r5 236#ifdef __sh1__ 237 add r5,r0 238 jmp @r0 239#else 240 braf r5 241#endif 242 mov r4,r0 243 ENDFUNC(GLOBAL(ashrsi3)) 244 245 .align 2 246LOCAL(ashrsi3_table): 247 .byte LOCAL(ashrsi3_0)-LOCAL(ashrsi3_table) 248 .byte LOCAL(ashrsi3_1)-LOCAL(ashrsi3_table) 249 .byte LOCAL(ashrsi3_2)-LOCAL(ashrsi3_table) 250 .byte LOCAL(ashrsi3_3)-LOCAL(ashrsi3_table) 251 .byte LOCAL(ashrsi3_4)-LOCAL(ashrsi3_table) 252 .byte LOCAL(ashrsi3_5)-LOCAL(ashrsi3_table) 253 .byte LOCAL(ashrsi3_6)-LOCAL(ashrsi3_table) 254 .byte LOCAL(ashrsi3_7)-LOCAL(ashrsi3_table) 255 .byte LOCAL(ashrsi3_8)-LOCAL(ashrsi3_table) 256 .byte LOCAL(ashrsi3_9)-LOCAL(ashrsi3_table) 257 .byte LOCAL(ashrsi3_10)-LOCAL(ashrsi3_table) 258 .byte LOCAL(ashrsi3_11)-LOCAL(ashrsi3_table) 259 .byte LOCAL(ashrsi3_12)-LOCAL(ashrsi3_table) 260 .byte LOCAL(ashrsi3_13)-LOCAL(ashrsi3_table) 261 .byte LOCAL(ashrsi3_14)-LOCAL(ashrsi3_table) 262 .byte LOCAL(ashrsi3_15)-LOCAL(ashrsi3_table) 263 .byte LOCAL(ashrsi3_16)-LOCAL(ashrsi3_table) 264 .byte LOCAL(ashrsi3_17)-LOCAL(ashrsi3_table) 265 .byte LOCAL(ashrsi3_18)-LOCAL(ashrsi3_table) 266 .byte LOCAL(ashrsi3_19)-LOCAL(ashrsi3_table) 267 .byte LOCAL(ashrsi3_20)-LOCAL(ashrsi3_table) 268 .byte LOCAL(ashrsi3_21)-LOCAL(ashrsi3_table) 269 .byte LOCAL(ashrsi3_22)-LOCAL(ashrsi3_table) 270 .byte LOCAL(ashrsi3_23)-LOCAL(ashrsi3_table) 271 .byte LOCAL(ashrsi3_24)-LOCAL(ashrsi3_table) 272 .byte LOCAL(ashrsi3_25)-LOCAL(ashrsi3_table) 273 .byte LOCAL(ashrsi3_26)-LOCAL(ashrsi3_table) 274 .byte LOCAL(ashrsi3_27)-LOCAL(ashrsi3_table) 275 .byte LOCAL(ashrsi3_28)-LOCAL(ashrsi3_table) 276 .byte LOCAL(ashrsi3_29)-LOCAL(ashrsi3_table) 277 .byte LOCAL(ashrsi3_30)-LOCAL(ashrsi3_table) 278 .byte LOCAL(ashrsi3_31)-LOCAL(ashrsi3_table) 279 280LOCAL(ashrsi3_31): 281 rotcl r0 282 rts 283 subc r0,r0 284 285LOCAL(ashrsi3_30): 286 shar r0 287LOCAL(ashrsi3_29): 288 shar r0 289LOCAL(ashrsi3_28): 290 shar r0 291LOCAL(ashrsi3_27): 292 shar r0 293LOCAL(ashrsi3_26): 294 shar r0 295LOCAL(ashrsi3_25): 296 shar r0 297LOCAL(ashrsi3_24): 298 shlr16 r0 299 shlr8 r0 300 rts 301 exts.b r0,r0 302 303LOCAL(ashrsi3_23): 304 shar r0 305LOCAL(ashrsi3_22): 306 shar r0 307LOCAL(ashrsi3_21): 308 shar r0 309LOCAL(ashrsi3_20): 310 shar r0 311LOCAL(ashrsi3_19): 312 shar r0 313LOCAL(ashrsi3_18): 314 shar r0 315LOCAL(ashrsi3_17): 316 shar r0 317LOCAL(ashrsi3_16): 318 shlr16 r0 319 rts 320 exts.w r0,r0 321 322LOCAL(ashrsi3_15): 323 shar r0 324LOCAL(ashrsi3_14): 325 shar r0 326LOCAL(ashrsi3_13): 327 shar r0 328LOCAL(ashrsi3_12): 329 shar r0 330LOCAL(ashrsi3_11): 331 shar r0 332LOCAL(ashrsi3_10): 333 shar r0 334LOCAL(ashrsi3_9): 335 shar r0 336LOCAL(ashrsi3_8): 337 shar r0 338LOCAL(ashrsi3_7): 339 shar r0 340LOCAL(ashrsi3_6): 341 shar r0 342LOCAL(ashrsi3_5): 343 shar r0 344LOCAL(ashrsi3_4): 345 shar r0 346LOCAL(ashrsi3_3): 347 shar r0 348LOCAL(ashrsi3_2): 349 shar r0 350LOCAL(ashrsi3_1): 351 rts 352 shar r0 353 354LOCAL(ashrsi3_0): 355 rts 356 nop 357 358#endif 359 360#ifdef L_ashiftlt 361 362! 363! GLOBAL(ashlsi3) 364! 365! Entry: 366! 367! r4: Value to shift 368! r5: Shifts 369! 370! Exit: 371! 372! r0: Result 373! 374! Destroys: 375! 376! (none) 377! 378 .global GLOBAL(ashlsi3) 379 FUNC(GLOBAL(ashlsi3),function) 380 .align 2 381GLOBAL(ashlsi3): 382 mov #31,r0 383 and r0,r5 384 mova LOCAL(ashlsi3_table),r0 385 mov.b @(r0,r5),r5 386#ifdef __sh1__ 387 add r5,r0 388 jmp @r0 389#else 390 braf r5 391#endif 392 mov r4,r0 393 ENDFUNC(GLOBAL(ashlsi3)) 394 395 .align 2 396LOCAL(ashlsi3_table): 397 .byte LOCAL(ashlsi3_0)-LOCAL(ashlsi3_table) 398 .byte LOCAL(ashlsi3_1)-LOCAL(ashlsi3_table) 399 .byte LOCAL(ashlsi3_2)-LOCAL(ashlsi3_table) 400 .byte LOCAL(ashlsi3_3)-LOCAL(ashlsi3_table) 401 .byte LOCAL(ashlsi3_4)-LOCAL(ashlsi3_table) 402 .byte LOCAL(ashlsi3_5)-LOCAL(ashlsi3_table) 403 .byte LOCAL(ashlsi3_6)-LOCAL(ashlsi3_table) 404 .byte LOCAL(ashlsi3_7)-LOCAL(ashlsi3_table) 405 .byte LOCAL(ashlsi3_8)-LOCAL(ashlsi3_table) 406 .byte LOCAL(ashlsi3_9)-LOCAL(ashlsi3_table) 407 .byte LOCAL(ashlsi3_10)-LOCAL(ashlsi3_table) 408 .byte LOCAL(ashlsi3_11)-LOCAL(ashlsi3_table) 409 .byte LOCAL(ashlsi3_12)-LOCAL(ashlsi3_table) 410 .byte LOCAL(ashlsi3_13)-LOCAL(ashlsi3_table) 411 .byte LOCAL(ashlsi3_14)-LOCAL(ashlsi3_table) 412 .byte LOCAL(ashlsi3_15)-LOCAL(ashlsi3_table) 413 .byte LOCAL(ashlsi3_16)-LOCAL(ashlsi3_table) 414 .byte LOCAL(ashlsi3_17)-LOCAL(ashlsi3_table) 415 .byte LOCAL(ashlsi3_18)-LOCAL(ashlsi3_table) 416 .byte LOCAL(ashlsi3_19)-LOCAL(ashlsi3_table) 417 .byte LOCAL(ashlsi3_20)-LOCAL(ashlsi3_table) 418 .byte LOCAL(ashlsi3_21)-LOCAL(ashlsi3_table) 419 .byte LOCAL(ashlsi3_22)-LOCAL(ashlsi3_table) 420 .byte LOCAL(ashlsi3_23)-LOCAL(ashlsi3_table) 421 .byte LOCAL(ashlsi3_24)-LOCAL(ashlsi3_table) 422 .byte LOCAL(ashlsi3_25)-LOCAL(ashlsi3_table) 423 .byte LOCAL(ashlsi3_26)-LOCAL(ashlsi3_table) 424 .byte LOCAL(ashlsi3_27)-LOCAL(ashlsi3_table) 425 .byte LOCAL(ashlsi3_28)-LOCAL(ashlsi3_table) 426 .byte LOCAL(ashlsi3_29)-LOCAL(ashlsi3_table) 427 .byte LOCAL(ashlsi3_30)-LOCAL(ashlsi3_table) 428 .byte LOCAL(ashlsi3_31)-LOCAL(ashlsi3_table) 429 430LOCAL(ashlsi3_6): 431 shll2 r0 432LOCAL(ashlsi3_4): 433 shll2 r0 434LOCAL(ashlsi3_2): 435 rts 436 shll2 r0 437 438LOCAL(ashlsi3_7): 439 shll2 r0 440LOCAL(ashlsi3_5): 441 shll2 r0 442LOCAL(ashlsi3_3): 443 shll2 r0 444LOCAL(ashlsi3_1): 445 rts 446 shll r0 447 448LOCAL(ashlsi3_14): 449 shll2 r0 450LOCAL(ashlsi3_12): 451 shll2 r0 452LOCAL(ashlsi3_10): 453 shll2 r0 454LOCAL(ashlsi3_8): 455 rts 456 shll8 r0 457 458LOCAL(ashlsi3_15): 459 shll2 r0 460LOCAL(ashlsi3_13): 461 shll2 r0 462LOCAL(ashlsi3_11): 463 shll2 r0 464LOCAL(ashlsi3_9): 465 shll8 r0 466 rts 467 shll r0 468 469LOCAL(ashlsi3_22): 470 shll2 r0 471LOCAL(ashlsi3_20): 472 shll2 r0 473LOCAL(ashlsi3_18): 474 shll2 r0 475LOCAL(ashlsi3_16): 476 rts 477 shll16 r0 478 479LOCAL(ashlsi3_23): 480 shll2 r0 481LOCAL(ashlsi3_21): 482 shll2 r0 483LOCAL(ashlsi3_19): 484 shll2 r0 485LOCAL(ashlsi3_17): 486 shll16 r0 487 rts 488 shll r0 489 490LOCAL(ashlsi3_30): 491 shll2 r0 492LOCAL(ashlsi3_28): 493 shll2 r0 494LOCAL(ashlsi3_26): 495 shll2 r0 496LOCAL(ashlsi3_24): 497 shll16 r0 498 rts 499 shll8 r0 500 501LOCAL(ashlsi3_31): 502 shll2 r0 503LOCAL(ashlsi3_29): 504 shll2 r0 505LOCAL(ashlsi3_27): 506 shll2 r0 507LOCAL(ashlsi3_25): 508 shll16 r0 509 shll8 r0 510 rts 511 shll r0 512 513LOCAL(ashlsi3_0): 514 rts 515 nop 516 517#endif 518 519#ifdef L_lshiftrt 520 521! 522! GLOBAL(lshrsi3) 523! 524! Entry: 525! 526! r4: Value to shift 527! r5: Shifts 528! 529! Exit: 530! 531! r0: Result 532! 533! Destroys: 534! 535! (none) 536! 537 .global GLOBAL(lshrsi3) 538 FUNC(GLOBAL(lshrsi3),function) 539 .align 2 540GLOBAL(lshrsi3): 541 mov #31,r0 542 and r0,r5 543 mova LOCAL(lshrsi3_table),r0 544 mov.b @(r0,r5),r5 545#ifdef __sh1__ 546 add r5,r0 547 jmp @r0 548#else 549 braf r5 550#endif 551 mov r4,r0 552 ENDFUNC(GLOBAL(lshrsi3)) 553 554 .align 2 555LOCAL(lshrsi3_table): 556 .byte LOCAL(lshrsi3_0)-LOCAL(lshrsi3_table) 557 .byte LOCAL(lshrsi3_1)-LOCAL(lshrsi3_table) 558 .byte LOCAL(lshrsi3_2)-LOCAL(lshrsi3_table) 559 .byte LOCAL(lshrsi3_3)-LOCAL(lshrsi3_table) 560 .byte LOCAL(lshrsi3_4)-LOCAL(lshrsi3_table) 561 .byte LOCAL(lshrsi3_5)-LOCAL(lshrsi3_table) 562 .byte LOCAL(lshrsi3_6)-LOCAL(lshrsi3_table) 563 .byte LOCAL(lshrsi3_7)-LOCAL(lshrsi3_table) 564 .byte LOCAL(lshrsi3_8)-LOCAL(lshrsi3_table) 565 .byte LOCAL(lshrsi3_9)-LOCAL(lshrsi3_table) 566 .byte LOCAL(lshrsi3_10)-LOCAL(lshrsi3_table) 567 .byte LOCAL(lshrsi3_11)-LOCAL(lshrsi3_table) 568 .byte LOCAL(lshrsi3_12)-LOCAL(lshrsi3_table) 569 .byte LOCAL(lshrsi3_13)-LOCAL(lshrsi3_table) 570 .byte LOCAL(lshrsi3_14)-LOCAL(lshrsi3_table) 571 .byte LOCAL(lshrsi3_15)-LOCAL(lshrsi3_table) 572 .byte LOCAL(lshrsi3_16)-LOCAL(lshrsi3_table) 573 .byte LOCAL(lshrsi3_17)-LOCAL(lshrsi3_table) 574 .byte LOCAL(lshrsi3_18)-LOCAL(lshrsi3_table) 575 .byte LOCAL(lshrsi3_19)-LOCAL(lshrsi3_table) 576 .byte LOCAL(lshrsi3_20)-LOCAL(lshrsi3_table) 577 .byte LOCAL(lshrsi3_21)-LOCAL(lshrsi3_table) 578 .byte LOCAL(lshrsi3_22)-LOCAL(lshrsi3_table) 579 .byte LOCAL(lshrsi3_23)-LOCAL(lshrsi3_table) 580 .byte LOCAL(lshrsi3_24)-LOCAL(lshrsi3_table) 581 .byte LOCAL(lshrsi3_25)-LOCAL(lshrsi3_table) 582 .byte LOCAL(lshrsi3_26)-LOCAL(lshrsi3_table) 583 .byte LOCAL(lshrsi3_27)-LOCAL(lshrsi3_table) 584 .byte LOCAL(lshrsi3_28)-LOCAL(lshrsi3_table) 585 .byte LOCAL(lshrsi3_29)-LOCAL(lshrsi3_table) 586 .byte LOCAL(lshrsi3_30)-LOCAL(lshrsi3_table) 587 .byte LOCAL(lshrsi3_31)-LOCAL(lshrsi3_table) 588 589LOCAL(lshrsi3_6): 590 shlr2 r0 591LOCAL(lshrsi3_4): 592 shlr2 r0 593LOCAL(lshrsi3_2): 594 rts 595 shlr2 r0 596 597LOCAL(lshrsi3_7): 598 shlr2 r0 599LOCAL(lshrsi3_5): 600 shlr2 r0 601LOCAL(lshrsi3_3): 602 shlr2 r0 603LOCAL(lshrsi3_1): 604 rts 605 shlr r0 606 607LOCAL(lshrsi3_14): 608 shlr2 r0 609LOCAL(lshrsi3_12): 610 shlr2 r0 611LOCAL(lshrsi3_10): 612 shlr2 r0 613LOCAL(lshrsi3_8): 614 rts 615 shlr8 r0 616 617LOCAL(lshrsi3_15): 618 shlr2 r0 619LOCAL(lshrsi3_13): 620 shlr2 r0 621LOCAL(lshrsi3_11): 622 shlr2 r0 623LOCAL(lshrsi3_9): 624 shlr8 r0 625 rts 626 shlr r0 627 628LOCAL(lshrsi3_22): 629 shlr2 r0 630LOCAL(lshrsi3_20): 631 shlr2 r0 632LOCAL(lshrsi3_18): 633 shlr2 r0 634LOCAL(lshrsi3_16): 635 rts 636 shlr16 r0 637 638LOCAL(lshrsi3_23): 639 shlr2 r0 640LOCAL(lshrsi3_21): 641 shlr2 r0 642LOCAL(lshrsi3_19): 643 shlr2 r0 644LOCAL(lshrsi3_17): 645 shlr16 r0 646 rts 647 shlr r0 648 649LOCAL(lshrsi3_30): 650 shlr2 r0 651LOCAL(lshrsi3_28): 652 shlr2 r0 653LOCAL(lshrsi3_26): 654 shlr2 r0 655LOCAL(lshrsi3_24): 656 shlr16 r0 657 rts 658 shlr8 r0 659 660LOCAL(lshrsi3_31): 661 shlr2 r0 662LOCAL(lshrsi3_29): 663 shlr2 r0 664LOCAL(lshrsi3_27): 665 shlr2 r0 666LOCAL(lshrsi3_25): 667 shlr16 r0 668 shlr8 r0 669 rts 670 shlr r0 671 672LOCAL(lshrsi3_0): 673 rts 674 nop 675 676#endif 677 678#ifdef L_movstr 679 .text 680! done all the large groups, do the remainder 681 682! jump to movstr+ 683done: 684 add #64,r5 685 mova GLOBAL(movstrSI0),r0 686 shll2 r6 687 add r6,r0 688 jmp @r0 689 add #64,r4 690 .align 4 691 .global GLOBAL(movstrSI64) 692 FUNC(GLOBAL(movstrSI64),function) 693GLOBAL(movstrSI64): 694 mov.l @(60,r5),r0 695 mov.l r0,@(60,r4) 696 .global GLOBAL(movstrSI60) 697 FUNC(GLOBAL(movstrSI60),function) 698GLOBAL(movstrSI60): 699 mov.l @(56,r5),r0 700 mov.l r0,@(56,r4) 701 .global GLOBAL(movstrSI56) 702 FUNC(GLOBAL(movstrSI56),function) 703GLOBAL(movstrSI56): 704 mov.l @(52,r5),r0 705 mov.l r0,@(52,r4) 706 .global GLOBAL(movstrSI52) 707 FUNC(GLOBAL(movstrSI52),function) 708GLOBAL(movstrSI52): 709 mov.l @(48,r5),r0 710 mov.l r0,@(48,r4) 711 .global GLOBAL(movstrSI48) 712 FUNC(GLOBAL(movstrSI48),function) 713GLOBAL(movstrSI48): 714 mov.l @(44,r5),r0 715 mov.l r0,@(44,r4) 716 .global GLOBAL(movstrSI44) 717 FUNC(GLOBAL(movstrSI44),function) 718GLOBAL(movstrSI44): 719 mov.l @(40,r5),r0 720 mov.l r0,@(40,r4) 721 .global GLOBAL(movstrSI40) 722 FUNC(GLOBAL(movstrSI40),function) 723GLOBAL(movstrSI40): 724 mov.l @(36,r5),r0 725 mov.l r0,@(36,r4) 726 .global GLOBAL(movstrSI36) 727 FUNC(GLOBAL(movstrSI36),function) 728GLOBAL(movstrSI36): 729 mov.l @(32,r5),r0 730 mov.l r0,@(32,r4) 731 .global GLOBAL(movstrSI32) 732 FUNC(GLOBAL(movstrSI32),function) 733GLOBAL(movstrSI32): 734 mov.l @(28,r5),r0 735 mov.l r0,@(28,r4) 736 .global GLOBAL(movstrSI28) 737 FUNC(GLOBAL(movstrSI28),function) 738GLOBAL(movstrSI28): 739 mov.l @(24,r5),r0 740 mov.l r0,@(24,r4) 741 .global GLOBAL(movstrSI24) 742 FUNC(GLOBAL(movstrSI24),function) 743GLOBAL(movstrSI24): 744 mov.l @(20,r5),r0 745 mov.l r0,@(20,r4) 746 .global GLOBAL(movstrSI20) 747 FUNC(GLOBAL(movstrSI20),function) 748GLOBAL(movstrSI20): 749 mov.l @(16,r5),r0 750 mov.l r0,@(16,r4) 751 .global GLOBAL(movstrSI16) 752 FUNC(GLOBAL(movstrSI16),function) 753GLOBAL(movstrSI16): 754 mov.l @(12,r5),r0 755 mov.l r0,@(12,r4) 756 .global GLOBAL(movstrSI12) 757 FUNC(GLOBAL(movstrSI12),function) 758GLOBAL(movstrSI12): 759 mov.l @(8,r5),r0 760 mov.l r0,@(8,r4) 761 .global GLOBAL(movstrSI8) 762 FUNC(GLOBAL(movstrSI8),function) 763GLOBAL(movstrSI8): 764 mov.l @(4,r5),r0 765 mov.l r0,@(4,r4) 766 .global GLOBAL(movstrSI4) 767 FUNC(GLOBAL(movstrSI4),function) 768GLOBAL(movstrSI4): 769 mov.l @(0,r5),r0 770 mov.l r0,@(0,r4) 771GLOBAL(movstrSI0): 772 FUNC(GLOBAL(movstrSI0),function) 773 rts 774 nop 775 ENDFUNC(GLOBAL(movstrSI64)) 776 ENDFUNC(GLOBAL(movstrSI60)) 777 ENDFUNC(GLOBAL(movstrSI56)) 778 ENDFUNC(GLOBAL(movstrSI52)) 779 ENDFUNC(GLOBAL(movstrSI48)) 780 ENDFUNC(GLOBAL(movstrSI44)) 781 ENDFUNC(GLOBAL(movstrSI40)) 782 ENDFUNC(GLOBAL(movstrSI36)) 783 ENDFUNC(GLOBAL(movstrSI32)) 784 ENDFUNC(GLOBAL(movstrSI28)) 785 ENDFUNC(GLOBAL(movstrSI24)) 786 ENDFUNC(GLOBAL(movstrSI20)) 787 ENDFUNC(GLOBAL(movstrSI16)) 788 ENDFUNC(GLOBAL(movstrSI12)) 789 ENDFUNC(GLOBAL(movstrSI8)) 790 ENDFUNC(GLOBAL(movstrSI4)) 791 ENDFUNC(GLOBAL(movstrSI0)) 792 793 .align 4 794 795 .global GLOBAL(movstr) 796 FUNC(GLOBAL(movstr),function) 797GLOBAL(movstr): 798 mov.l @(60,r5),r0 799 mov.l r0,@(60,r4) 800 801 mov.l @(56,r5),r0 802 mov.l r0,@(56,r4) 803 804 mov.l @(52,r5),r0 805 mov.l r0,@(52,r4) 806 807 mov.l @(48,r5),r0 808 mov.l r0,@(48,r4) 809 810 mov.l @(44,r5),r0 811 mov.l r0,@(44,r4) 812 813 mov.l @(40,r5),r0 814 mov.l r0,@(40,r4) 815 816 mov.l @(36,r5),r0 817 mov.l r0,@(36,r4) 818 819 mov.l @(32,r5),r0 820 mov.l r0,@(32,r4) 821 822 mov.l @(28,r5),r0 823 mov.l r0,@(28,r4) 824 825 mov.l @(24,r5),r0 826 mov.l r0,@(24,r4) 827 828 mov.l @(20,r5),r0 829 mov.l r0,@(20,r4) 830 831 mov.l @(16,r5),r0 832 mov.l r0,@(16,r4) 833 834 mov.l @(12,r5),r0 835 mov.l r0,@(12,r4) 836 837 mov.l @(8,r5),r0 838 mov.l r0,@(8,r4) 839 840 mov.l @(4,r5),r0 841 mov.l r0,@(4,r4) 842 843 mov.l @(0,r5),r0 844 mov.l r0,@(0,r4) 845 846 add #-16,r6 847 cmp/pl r6 848 bf done 849 850 add #64,r5 851 bra GLOBAL(movstr) 852 add #64,r4 853 ENDFUNC(GLOBAL(movstr)) 854#endif 855 856#ifdef L_movstr_i4 857 .text 858 .global GLOBAL(movstr_i4_even) 859 .global GLOBAL(movstr_i4_odd) 860 .global GLOBAL(movstrSI12_i4) 861 FUNC(GLOBAL(movstr_i4_even),function) 862 FUNC(GLOBAL(movstr_i4_odd),function) 863 FUNC(GLOBAL(movstrSI12_i4),function) 864 865 .p2align 5 866L_movstr_2mod4_end: 867 mov.l r0,@(16,r4) 868 rts 869 mov.l r1,@(20,r4) 870 871 .p2align 2 872 873GLOBAL(movstr_i4_odd): 874 mov.l @r5+,r1 875 add #-4,r4 876 mov.l @r5+,r2 877 mov.l @r5+,r3 878 mov.l r1,@(4,r4) 879 mov.l r2,@(8,r4) 880 881L_movstr_loop: 882 mov.l r3,@(12,r4) 883 dt r6 884 mov.l @r5+,r0 885 bt/s L_movstr_2mod4_end 886 mov.l @r5+,r1 887 add #16,r4 888L_movstr_start_even: 889 mov.l @r5+,r2 890 mov.l @r5+,r3 891 mov.l r0,@r4 892 dt r6 893 mov.l r1,@(4,r4) 894 bf/s L_movstr_loop 895 mov.l r2,@(8,r4) 896 rts 897 mov.l r3,@(12,r4) 898 ENDFUNC(GLOBAL(movstr_i4_odd)) 899 900GLOBAL(movstr_i4_even): 901 mov.l @r5+,r0 902 bra L_movstr_start_even 903 mov.l @r5+,r1 904 905 .p2align 4 906GLOBAL(movstrSI12_i4): 907 mov.l @r5,r0 908 mov.l @(4,r5),r1 909 mov.l @(8,r5),r2 910 mov.l r0,@r4 911 mov.l r1,@(4,r4) 912 rts 913 mov.l r2,@(8,r4) 914 ENDFUNC(GLOBAL(movstr_i4_even)) 915 ENDFUNC(GLOBAL(movstrSI12_i4)) 916#endif 917 918#ifdef L_mulsi3 919 920 921 .global GLOBAL(mulsi3) 922 FUNC(GLOBAL(mulsi3),function) 923 924! r4 = aabb 925! r5 = ccdd 926! r0 = aabb*ccdd via partial products 927! 928! if aa == 0 and cc = 0 929! r0 = bb*dd 930! 931! else 932! aa = bb*dd + (aa*dd*65536) + (cc*bb*65536) 933! 934 935GLOBAL(mulsi3): 936 mulu.w r4,r5 ! multiply the lsws macl=bb*dd 937 mov r5,r3 ! r3 = ccdd 938 swap.w r4,r2 ! r2 = bbaa 939 xtrct r2,r3 ! r3 = aacc 940 tst r3,r3 ! msws zero ? 941 bf hiset 942 rts ! yes - then we have the answer 943 sts macl,r0 944 945hiset: sts macl,r0 ! r0 = bb*dd 946 mulu.w r2,r5 ! brewing macl = aa*dd 947 sts macl,r1 948 mulu.w r3,r4 ! brewing macl = cc*bb 949 sts macl,r2 950 add r1,r2 951 shll16 r2 952 rts 953 add r2,r0 954 955 956#endif 957#endif /* ! __SH5__ */ 958#ifdef L_sdivsi3_i4 959 .title "SH DIVIDE" 960!! 4 byte integer Divide code for the Hitachi SH 961#ifdef __SH4__ 962!! args in r4 and r5, result in fpul, clobber dr0, dr2 963 964 .global GLOBAL(sdivsi3_i4) 965 FUNC(GLOBAL(sdivsi3_i4),function) 966GLOBAL(sdivsi3_i4): 967 lds r4,fpul 968 float fpul,dr0 969 lds r5,fpul 970 float fpul,dr2 971 fdiv dr2,dr0 972 rts 973 ftrc dr0,fpul 974 ENDFUNC(GLOBAL(sdivsi3_i4)) 975 976#elif defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__) || (defined (__SH5__) && ! defined __SH4_NOFPU__) 977!! args in r4 and r5, result in fpul, clobber r2, dr0, dr2 978 979#if ! __SH5__ || __SH5__ == 32 980#if __SH5__ 981 .mode SHcompact 982#endif 983 .global GLOBAL(sdivsi3_i4) 984 FUNC(GLOBAL(sdivsi3_i4),function) 985GLOBAL(sdivsi3_i4): 986 sts.l fpscr,@-r15 987 mov #8,r2 988 swap.w r2,r2 989 lds r2,fpscr 990 lds r4,fpul 991 float fpul,dr0 992 lds r5,fpul 993 float fpul,dr2 994 fdiv dr2,dr0 995 ftrc dr0,fpul 996 rts 997 lds.l @r15+,fpscr 998 999#endif /* ! __SH5__ || __SH5__ == 32 */ 1000#endif /* ! __SH4__ */ 1001#endif 1002 1003#ifdef L_sdivsi3 1004/* __SH4_SINGLE_ONLY__ keeps this part for link compatibility with 1005 sh3e code. */ 1006#if (! defined(__SH4__) && ! defined (__SH4_SINGLE__)) || defined (__linux__) || defined (__OpenBSD__) 1007!! 1008!! Steve Chamberlain 1009!! sac@cygnus.com 1010!! 1011!! 1012 1013!! args in r4 and r5, result in r0 clobber r1,r2,r3 1014 1015 .global GLOBAL(sdivsi3) 1016 FUNC(GLOBAL(sdivsi3), function) 1017#if __SHMEDIA__ 1018#if __SH5__ == 32 1019 .section .text..SHmedia32,"ax" 1020#else 1021 .text 1022#endif 1023 .align 2 1024#if 0 1025/* The assembly code that follows is a hand-optimized version of the C 1026 code that follows. Note that the registers that are modified are 1027 exactly those listed as clobbered in the patterns divsi3_i1 and 1028 divsi3_i1_media. 1029 1030int __sdivsi3 (i, j) 1031 int i, j; 1032{ 1033 register unsigned long long r18 asm ("r18"); 1034 register unsigned long long r19 asm ("r19"); 1035 register unsigned long long r0 asm ("r0") = 0; 1036 register unsigned long long r1 asm ("r1") = 1; 1037 register int r2 asm ("r2") = i >> 31; 1038 register int r3 asm ("r3") = j >> 31; 1039 1040 r2 = r2 ? r2 : r1; 1041 r3 = r3 ? r3 : r1; 1042 r18 = i * r2; 1043 r19 = j * r3; 1044 r2 *= r3; 1045 1046 r19 <<= 31; 1047 r1 <<= 31; 1048 do 1049 if (r18 >= r19) 1050 r0 |= r1, r18 -= r19; 1051 while (r19 >>= 1, r1 >>= 1); 1052 1053 return r2 * (int)r0; 1054} 1055*/ 1056GLOBAL(sdivsi3): 1057 pt/l LOCAL(sdivsi3_dontadd), tr2 1058 pt/l LOCAL(sdivsi3_loop), tr1 1059 ptabs/l r18, tr0 1060 movi 0, r0 1061 movi 1, r1 1062 shari.l r4, 31, r2 1063 shari.l r5, 31, r3 1064 cmveq r2, r1, r2 1065 cmveq r3, r1, r3 1066 muls.l r4, r2, r18 1067 muls.l r5, r3, r19 1068 muls.l r2, r3, r2 1069 shlli r19, 31, r19 1070 shlli r1, 31, r1 1071LOCAL(sdivsi3_loop): 1072 bgtu r19, r18, tr2 1073 or r0, r1, r0 1074 sub r18, r19, r18 1075LOCAL(sdivsi3_dontadd): 1076 shlri r1, 1, r1 1077 shlri r19, 1, r19 1078 bnei r1, 0, tr1 1079 muls.l r0, r2, r0 1080 add.l r0, r63, r0 1081 blink tr0, r63 1082#else /* ! 0 */ 1083 // inputs: r4,r5 1084 // clobbered: r1,r2,r3,r18,r19,r20,r21,r25,tr0 1085 // result in r0 1086GLOBAL(sdivsi3): 1087 // can create absolute value without extra latency, 1088 // but dependent on proper sign extension of inputs: 1089 // shari.l r5,31,r2 1090 // xor r5,r2,r20 1091 // sub r20,r2,r20 // r20 is now absolute value of r5, zero-extended. 1092 shari.l r5,31,r2 1093 ori r2,1,r2 1094 muls.l r5,r2,r20 // r20 is now absolute value of r5, zero-extended. 1095 movi 0xffffffffffffbb0c,r19 // shift count eqiv 76 1096 shari.l r4,31,r3 1097 nsb r20,r0 1098 shlld r20,r0,r25 1099 shlri r25,48,r25 1100 sub r19,r25,r1 1101 mmulfx.w r1,r1,r2 1102 mshflo.w r1,r63,r1 1103 // If r4 was to be used in-place instead of r21, could use this sequence 1104 // to compute absolute: 1105 // sub r63,r4,r19 // compute absolute value of r4 1106 // shlri r4,32,r3 // into lower 32 bit of r4, keeping 1107 // mcmv r19,r3,r4 // the sign in the upper 32 bits intact. 1108 ori r3,1,r3 1109 mmulfx.w r25,r2,r2 1110 sub r19,r0,r0 1111 muls.l r4,r3,r21 1112 msub.w r1,r2,r2 1113 addi r2,-2,r1 1114 mulu.l r21,r1,r19 1115 mmulfx.w r2,r2,r2 1116 shlli r1,15,r1 1117 shlrd r19,r0,r19 1118 mulu.l r19,r20,r3 1119 mmacnfx.wl r25,r2,r1 1120 ptabs r18,tr0 1121 sub r21,r3,r25 1122 1123 mulu.l r25,r1,r2 1124 addi r0,14,r0 1125 xor r4,r5,r18 1126 shlrd r2,r0,r2 1127 mulu.l r2,r20,r3 1128 add r19,r2,r19 1129 shari.l r18,31,r18 1130 sub r25,r3,r25 1131 1132 mulu.l r25,r1,r2 1133 sub r25,r20,r25 1134 add r19,r18,r19 1135 shlrd r2,r0,r2 1136 mulu.l r2,r20,r3 1137 addi r25,1,r25 1138 add r19,r2,r19 1139 1140 cmpgt r25,r3,r25 1141 add.l r19,r25,r0 1142 xor r0,r18,r0 1143 blink tr0,r63 1144#endif 1145#elif defined __SHMEDIA__ 1146/* m5compact-nofpu */ 1147 // clobbered: r18,r19,r20,r21,r25,tr0,tr1,tr2 1148 .mode SHmedia 1149 .section .text..SHmedia32,"ax" 1150 .align 2 1151GLOBAL(sdivsi3): 1152 pt/l LOCAL(sdivsi3_dontsub), tr0 1153 pt/l LOCAL(sdivsi3_loop), tr1 1154 ptabs/l r18,tr2 1155 shari.l r4,31,r18 1156 shari.l r5,31,r19 1157 xor r4,r18,r20 1158 xor r5,r19,r21 1159 sub.l r20,r18,r20 1160 sub.l r21,r19,r21 1161 xor r18,r19,r19 1162 shlli r21,32,r25 1163 addi r25,-1,r21 1164 addz.l r20,r63,r20 1165LOCAL(sdivsi3_loop): 1166 shlli r20,1,r20 1167 bgeu/u r21,r20,tr0 1168 sub r20,r21,r20 1169LOCAL(sdivsi3_dontsub): 1170 addi.l r25,-1,r25 1171 bnei r25,-32,tr1 1172 xor r20,r19,r20 1173 sub.l r20,r19,r0 1174 blink tr2,r63 1175#else /* ! __SHMEDIA__ */ 1176GLOBAL(sdivsi3): 1177 mov r4,r1 1178 mov r5,r0 1179 1180 tst r0,r0 1181 bt div0 1182 mov #0,r2 1183 div0s r2,r1 1184 subc r3,r3 1185 subc r2,r1 1186 div0s r0,r3 1187 rotcl r1 1188 div1 r0,r3 1189 rotcl r1 1190 div1 r0,r3 1191 rotcl r1 1192 div1 r0,r3 1193 rotcl r1 1194 div1 r0,r3 1195 rotcl r1 1196 div1 r0,r3 1197 rotcl r1 1198 div1 r0,r3 1199 rotcl r1 1200 div1 r0,r3 1201 rotcl r1 1202 div1 r0,r3 1203 rotcl r1 1204 div1 r0,r3 1205 rotcl r1 1206 div1 r0,r3 1207 rotcl r1 1208 div1 r0,r3 1209 rotcl r1 1210 div1 r0,r3 1211 rotcl r1 1212 div1 r0,r3 1213 rotcl r1 1214 div1 r0,r3 1215 rotcl r1 1216 div1 r0,r3 1217 rotcl r1 1218 div1 r0,r3 1219 rotcl r1 1220 div1 r0,r3 1221 rotcl r1 1222 div1 r0,r3 1223 rotcl r1 1224 div1 r0,r3 1225 rotcl r1 1226 div1 r0,r3 1227 rotcl r1 1228 div1 r0,r3 1229 rotcl r1 1230 div1 r0,r3 1231 rotcl r1 1232 div1 r0,r3 1233 rotcl r1 1234 div1 r0,r3 1235 rotcl r1 1236 div1 r0,r3 1237 rotcl r1 1238 div1 r0,r3 1239 rotcl r1 1240 div1 r0,r3 1241 rotcl r1 1242 div1 r0,r3 1243 rotcl r1 1244 div1 r0,r3 1245 rotcl r1 1246 div1 r0,r3 1247 rotcl r1 1248 div1 r0,r3 1249 rotcl r1 1250 div1 r0,r3 1251 rotcl r1 1252 addc r2,r1 1253 rts 1254 mov r1,r0 1255 1256 1257div0: rts 1258 mov #0,r0 1259 ENDFUNC(GLOBAL(sdivsi3)) 1260 1261#endif /* ! __SHMEDIA__ */ 1262#endif /* ! __SH4__ */ 1263#endif 1264#ifdef L_udivsi3_i4 1265 1266 .title "SH DIVIDE" 1267!! 4 byte integer Divide code for the Hitachi SH 1268#ifdef __SH4__ 1269!! args in r4 and r5, result in fpul, clobber r0, r1, r4, r5, dr0, dr2, dr4 1270 1271 .global GLOBAL(udivsi3_i4) 1272 FUNC(GLOBAL(udivsi3_i4),function) 1273GLOBAL(udivsi3_i4): 1274 mov #1,r1 1275 cmp/hi r1,r5 1276 bf trivial 1277 rotr r1 1278 xor r1,r4 1279 lds r4,fpul 1280 mova L1,r0 1281#ifdef FMOVD_WORKS 1282 fmov.d @r0+,dr4 1283#else 1284#ifdef __LITTLE_ENDIAN__ 1285 fmov.s @r0+,fr5 1286 fmov.s @r0,fr4 1287#else 1288 fmov.s @r0+,fr4 1289 fmov.s @r0,fr5 1290#endif 1291#endif 1292 float fpul,dr0 1293 xor r1,r5 1294 lds r5,fpul 1295 float fpul,dr2 1296 fadd dr4,dr0 1297 fadd dr4,dr2 1298 fdiv dr2,dr0 1299 rts 1300 ftrc dr0,fpul 1301 1302trivial: 1303 rts 1304 lds r4,fpul 1305 1306 .align 2 1307#ifdef FMOVD_WORKS 1308 .align 3 ! make double below 8 byte aligned. 1309#endif 1310L1: 1311 .double 2147483648 1312 1313#elif defined (__SH5__) && ! defined (__SH4_NOFPU__) 1314#if ! __SH5__ || __SH5__ == 32 1315!! args in r4 and r5, result in fpul, clobber r20, r21, dr0, fr33 1316 .mode SHmedia 1317 .global GLOBAL(udivsi3_i4) 1318 FUNC(GLOBAL(udivsi3_i4),function) 1319GLOBAL(udivsi3_i4): 1320 addz.l r4,r63,r20 1321 addz.l r5,r63,r21 1322 fmov.qd r20,dr0 1323 fmov.qd r21,dr32 1324 ptabs r18,tr0 1325 float.qd dr0,dr0 1326 float.qd dr32,dr32 1327 fdiv.d dr0,dr32,dr0 1328 ftrc.dq dr0,dr32 1329 fmov.s fr33,fr32 1330 blink tr0,r63 1331#endif /* ! __SH5__ || __SH5__ == 32 */ 1332#elif defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__) 1333!! args in r4 and r5, result in fpul, clobber r0, r1, r4, r5, dr0, dr2, dr4 1334 1335 .global GLOBAL(udivsi3_i4) 1336 FUNC(GLOBAL(udivsi3_i4),function) 1337GLOBAL(udivsi3_i4): 1338 mov #1,r1 1339 cmp/hi r1,r5 1340 bf trivial 1341 sts.l fpscr,@-r15 1342 mova L1,r0 1343 lds.l @r0+,fpscr 1344 rotr r1 1345 xor r1,r4 1346 lds r4,fpul 1347#ifdef FMOVD_WORKS 1348 fmov.d @r0+,dr4 1349#else 1350#ifdef __LITTLE_ENDIAN__ 1351 fmov.s @r0+,fr5 1352 fmov.s @r0,fr4 1353#else 1354 fmov.s @r0+,fr4 1355 fmov.s @r0,fr5 1356#endif 1357#endif 1358 float fpul,dr0 1359 xor r1,r5 1360 lds r5,fpul 1361 float fpul,dr2 1362 fadd dr4,dr0 1363 fadd dr4,dr2 1364 fdiv dr2,dr0 1365 ftrc dr0,fpul 1366 rts 1367 lds.l @r15+,fpscr 1368 ENDFUNC(GLOBAL(udivsi3_i4)) 1369 1370#ifdef FMOVD_WORKS 1371 .align 3 ! make double below 8 byte aligned. 1372#endif 1373trivial: 1374 rts 1375 lds r4,fpul 1376 1377 .align 2 1378L1: 1379#ifndef FMOVD_WORKS 1380 .long 0x80000 1381#else 1382 .long 0x180000 1383#endif 1384 .double 2147483648 1385 1386#endif /* ! __SH4__ */ 1387#endif 1388 1389#ifdef L_udivsi3 1390/* __SH4_SINGLE_ONLY__ keeps this part for link compatibility with 1391 sh3e code. */ 1392#if (! defined(__SH4__) && ! defined (__SH4_SINGLE__)) || defined (__linux__) || defined (__OpenBSD__) 1393 1394!! args in r4 and r5, result in r0, clobbers r4, pr, and t bit 1395 .global GLOBAL(udivsi3) 1396 FUNC(GLOBAL(udivsi3),function) 1397 1398#if __SHMEDIA__ 1399#if __SH5__ == 32 1400 .section .text..SHmedia32,"ax" 1401#else 1402 .text 1403#endif 1404 .align 2 1405#if 0 1406/* The assembly code that follows is a hand-optimized version of the C 1407 code that follows. Note that the registers that are modified are 1408 exactly those listed as clobbered in the patterns udivsi3_i1 and 1409 udivsi3_i1_media. 1410 1411unsigned 1412__udivsi3 (i, j) 1413 unsigned i, j; 1414{ 1415 register unsigned long long r0 asm ("r0") = 0; 1416 register unsigned long long r18 asm ("r18") = 1; 1417 register unsigned long long r4 asm ("r4") = i; 1418 register unsigned long long r19 asm ("r19") = j; 1419 1420 r19 <<= 31; 1421 r18 <<= 31; 1422 do 1423 if (r4 >= r19) 1424 r0 |= r18, r4 -= r19; 1425 while (r19 >>= 1, r18 >>= 1); 1426 1427 return r0; 1428} 1429*/ 1430GLOBAL(udivsi3): 1431 pt/l LOCAL(udivsi3_dontadd), tr2 1432 pt/l LOCAL(udivsi3_loop), tr1 1433 ptabs/l r18, tr0 1434 movi 0, r0 1435 movi 1, r18 1436 addz.l r5, r63, r19 1437 addz.l r4, r63, r4 1438 shlli r19, 31, r19 1439 shlli r18, 31, r18 1440LOCAL(udivsi3_loop): 1441 bgtu r19, r4, tr2 1442 or r0, r18, r0 1443 sub r4, r19, r4 1444LOCAL(udivsi3_dontadd): 1445 shlri r18, 1, r18 1446 shlri r19, 1, r19 1447 bnei r18, 0, tr1 1448 blink tr0, r63 1449#else 1450GLOBAL(udivsi3): 1451 // inputs: r4,r5 1452 // clobbered: r18,r19,r20,r21,r22,r25,tr0 1453 // result in r0. 1454 addz.l r5,r63,r22 1455 nsb r22,r0 1456 shlld r22,r0,r25 1457 shlri r25,48,r25 1458 movi 0xffffffffffffbb0c,r20 // shift count eqiv 76 1459 sub r20,r25,r21 1460 mmulfx.w r21,r21,r19 1461 mshflo.w r21,r63,r21 1462 ptabs r18,tr0 1463 mmulfx.w r25,r19,r19 1464 sub r20,r0,r0 1465 /* bubble */ 1466 msub.w r21,r19,r19 1467 addi r19,-2,r21 /* It would be nice for scheduling to do this add to r21 1468 before the msub.w, but we need a different value for 1469 r19 to keep errors under control. */ 1470 mulu.l r4,r21,r18 1471 mmulfx.w r19,r19,r19 1472 shlli r21,15,r21 1473 shlrd r18,r0,r18 1474 mulu.l r18,r22,r20 1475 mmacnfx.wl r25,r19,r21 1476 /* bubble */ 1477 sub r4,r20,r25 1478 1479 mulu.l r25,r21,r19 1480 addi r0,14,r0 1481 /* bubble */ 1482 shlrd r19,r0,r19 1483 mulu.l r19,r22,r20 1484 add r18,r19,r18 1485 /* bubble */ 1486 sub.l r25,r20,r25 1487 1488 mulu.l r25,r21,r19 1489 addz.l r25,r63,r25 1490 sub r25,r22,r25 1491 shlrd r19,r0,r19 1492 mulu.l r19,r22,r20 1493 addi r25,1,r25 1494 add r18,r19,r18 1495 1496 cmpgt r25,r20,r25 1497 add.l r18,r25,r0 1498 blink tr0,r63 1499#endif 1500#elif defined (__SHMEDIA__) 1501/* m5compact-nofpu - more emphasis on code size than on speed, but don't 1502 ignore speed altogether - div1 needs 9 cycles, subc 7 and rotcl 4. 1503 So use a short shmedia loop. */ 1504 // clobbered: r20,r21,r25,tr0,tr1,tr2 1505 .mode SHmedia 1506 .section .text..SHmedia32,"ax" 1507 .align 2 1508GLOBAL(udivsi3): 1509 pt/l LOCAL(udivsi3_dontsub), tr0 1510 pt/l LOCAL(udivsi3_loop), tr1 1511 ptabs/l r18,tr2 1512 shlli r5,32,r25 1513 addi r25,-1,r21 1514 addz.l r4,r63,r20 1515LOCAL(udivsi3_loop): 1516 shlli r20,1,r20 1517 bgeu/u r21,r20,tr0 1518 sub r20,r21,r20 1519LOCAL(udivsi3_dontsub): 1520 addi.l r25,-1,r25 1521 bnei r25,-32,tr1 1522 add.l r20,r63,r0 1523 blink tr2,r63 1524#else /* ! defined (__SHMEDIA__) */ 1525LOCAL(div8): 1526 div1 r5,r4 1527LOCAL(div7): 1528 div1 r5,r4; div1 r5,r4; div1 r5,r4 1529 div1 r5,r4; div1 r5,r4; div1 r5,r4; rts; div1 r5,r4 1530 1531LOCAL(divx4): 1532 div1 r5,r4; rotcl r0 1533 div1 r5,r4; rotcl r0 1534 div1 r5,r4; rotcl r0 1535 rts; div1 r5,r4 1536 1537GLOBAL(udivsi3): 1538 sts.l pr,@-r15 1539 extu.w r5,r0 1540 cmp/eq r5,r0 1541#ifdef __sh1__ 1542 bf LOCAL(large_divisor) 1543#else 1544 bf/s LOCAL(large_divisor) 1545#endif 1546 div0u 1547 swap.w r4,r0 1548 shlr16 r4 1549 bsr LOCAL(div8) 1550 shll16 r5 1551 bsr LOCAL(div7) 1552 div1 r5,r4 1553 xtrct r4,r0 1554 xtrct r0,r4 1555 bsr LOCAL(div8) 1556 swap.w r4,r4 1557 bsr LOCAL(div7) 1558 div1 r5,r4 1559 lds.l @r15+,pr 1560 xtrct r4,r0 1561 swap.w r0,r0 1562 rotcl r0 1563 rts 1564 shlr16 r5 1565 ENDFUNC(GLOBAL(udivsi3)) 1566 1567LOCAL(large_divisor): 1568#ifdef __sh1__ 1569 div0u 1570#endif 1571 mov #0,r0 1572 xtrct r4,r0 1573 xtrct r0,r4 1574 bsr LOCAL(divx4) 1575 rotcl r0 1576 bsr LOCAL(divx4) 1577 rotcl r0 1578 bsr LOCAL(divx4) 1579 rotcl r0 1580 bsr LOCAL(divx4) 1581 rotcl r0 1582 lds.l @r15+,pr 1583 rts 1584 rotcl r0 1585 1586#endif /* ! __SHMEDIA__ */ 1587#endif /* __SH4__ */ 1588#endif /* L_udivsi3 */ 1589 1590#ifdef L_udivdi3 1591#ifdef __SHMEDIA__ 1592 .mode SHmedia 1593 .section .text..SHmedia32,"ax" 1594 .align 2 1595 .global GLOBAL(udivdi3) 1596 FUNC(GLOBAL(udivdi3),function) 1597GLOBAL(udivdi3): 1598 shlri r3,1,r4 1599 nsb r4,r22 1600 shlld r3,r22,r6 1601 shlri r6,49,r5 1602 movi 0xffffffffffffbaf1,r21 /* .l shift count 17. */ 1603 sub r21,r5,r1 1604 mmulfx.w r1,r1,r4 1605 mshflo.w r1,r63,r1 1606 sub r63,r22,r20 // r63 == 64 % 64 1607 mmulfx.w r5,r4,r4 1608 pta LOCAL(large_divisor),tr0 1609 addi r20,32,r9 1610 msub.w r1,r4,r1 1611 madd.w r1,r1,r1 1612 mmulfx.w r1,r1,r4 1613 shlri r6,32,r7 1614 bgt/u r9,r63,tr0 // large_divisor 1615 mmulfx.w r5,r4,r4 1616 shlri r2,32+14,r19 1617 addi r22,-31,r0 1618 msub.w r1,r4,r1 1619 1620 mulu.l r1,r7,r4 1621 addi r1,-3,r5 1622 mulu.l r5,r19,r5 1623 sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2 1624 shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as 1625 the case may be, %0000000000000000 000.11111111111, still */ 1626 muls.l r1,r4,r4 /* leaving at least one sign bit. */ 1627 mulu.l r5,r3,r8 1628 mshalds.l r1,r21,r1 1629 shari r4,26,r4 1630 shlld r8,r0,r8 1631 add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5) 1632 sub r2,r8,r2 1633 /* Can do second step of 64 : 32 div now, using r1 and the rest in r2. */ 1634 1635 shlri r2,22,r21 1636 mulu.l r21,r1,r21 1637 shlld r5,r0,r8 1638 addi r20,30-22,r0 1639 shlrd r21,r0,r21 1640 mulu.l r21,r3,r5 1641 add r8,r21,r8 1642 mcmpgt.l r21,r63,r21 // See Note 1 1643 addi r20,30,r0 1644 mshfhi.l r63,r21,r21 1645 sub r2,r5,r2 1646 andc r2,r21,r2 1647 1648 /* small divisor: need a third divide step */ 1649 mulu.l r2,r1,r7 1650 ptabs r18,tr0 1651 addi r2,1,r2 1652 shlrd r7,r0,r7 1653 mulu.l r7,r3,r5 1654 add r8,r7,r8 1655 sub r2,r3,r2 1656 cmpgt r2,r5,r5 1657 add r8,r5,r2 1658 /* could test r3 here to check for divide by zero. */ 1659 blink tr0,r63 1660 1661LOCAL(large_divisor): 1662 mmulfx.w r5,r4,r4 1663 shlrd r2,r9,r25 1664 shlri r25,32,r8 1665 msub.w r1,r4,r1 1666 1667 mulu.l r1,r7,r4 1668 addi r1,-3,r5 1669 mulu.l r5,r8,r5 1670 sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2 1671 shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as 1672 the case may be, %0000000000000000 000.11111111111, still */ 1673 muls.l r1,r4,r4 /* leaving at least one sign bit. */ 1674 shlri r5,14-1,r8 1675 mulu.l r8,r7,r5 1676 mshalds.l r1,r21,r1 1677 shari r4,26,r4 1678 add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5) 1679 sub r25,r5,r25 1680 /* Can do second step of 64 : 32 div now, using r1 and the rest in r25. */ 1681 1682 shlri r25,22,r21 1683 mulu.l r21,r1,r21 1684 pta LOCAL(no_lo_adj),tr0 1685 addi r22,32,r0 1686 shlri r21,40,r21 1687 mulu.l r21,r7,r5 1688 add r8,r21,r8 1689 shlld r2,r0,r2 1690 sub r25,r5,r25 1691 bgtu/u r7,r25,tr0 // no_lo_adj 1692 addi r8,1,r8 1693 sub r25,r7,r25 1694LOCAL(no_lo_adj): 1695 mextr4 r2,r25,r2 1696 1697 /* large_divisor: only needs a few adjustments. */ 1698 mulu.l r8,r6,r5 1699 ptabs r18,tr0 1700 /* bubble */ 1701 cmpgtu r5,r2,r5 1702 sub r8,r5,r2 1703 blink tr0,r63 1704/* Note 1: To shift the result of the second divide stage so that the result 1705 always fits into 32 bits, yet we still reduce the rest sufficiently 1706 would require a lot of instructions to do the shifts just right. Using 1707 the full 64 bit shift result to multiply with the divisor would require 1708 four extra instructions for the upper 32 bits (shift / mulu / shift / sub). 1709 Fortunately, if the upper 32 bits of the shift result are nonzero, we 1710 know that the rest after taking this partial result into account will 1711 fit into 32 bits. So we just clear the upper 32 bits of the rest if the 1712 upper 32 bits of the partial result are nonzero. */ 1713#endif /* __SHMEDIA__ */ 1714#endif /* L_udivdi3 */ 1715 1716#ifdef L_divdi3 1717#ifdef __SHMEDIA__ 1718 .mode SHmedia 1719 .section .text..SHmedia32,"ax" 1720 .align 2 1721 .global GLOBAL(divdi3) 1722 FUNC(GLOBAL(divdi3),function) 1723GLOBAL(divdi3): 1724 pta GLOBAL(udivdi3),tr0 1725 shari r2,63,r22 1726 shari r3,63,r23 1727 xor r2,r22,r2 1728 xor r3,r23,r3 1729 sub r2,r22,r2 1730 sub r3,r23,r3 1731 beq/u r22,r23,tr0 1732 ptabs r18,tr1 1733 blink tr0,r18 1734 sub r63,r2,r2 1735 blink tr1,r63 1736#endif /* __SHMEDIA__ */ 1737#endif /* L_divdi3 */ 1738 1739#ifdef L_umoddi3 1740#ifdef __SHMEDIA__ 1741 .mode SHmedia 1742 .section .text..SHmedia32,"ax" 1743 .align 2 1744 .global GLOBAL(umoddi3) 1745 FUNC(GLOBAL(umoddi3),function) 1746GLOBAL(umoddi3): 1747 shlri r3,1,r4 1748 nsb r4,r22 1749 shlld r3,r22,r6 1750 shlri r6,49,r5 1751 movi 0xffffffffffffbaf1,r21 /* .l shift count 17. */ 1752 sub r21,r5,r1 1753 mmulfx.w r1,r1,r4 1754 mshflo.w r1,r63,r1 1755 sub r63,r22,r20 // r63 == 64 % 64 1756 mmulfx.w r5,r4,r4 1757 pta LOCAL(large_divisor),tr0 1758 addi r20,32,r9 1759 msub.w r1,r4,r1 1760 madd.w r1,r1,r1 1761 mmulfx.w r1,r1,r4 1762 shlri r6,32,r7 1763 bgt/u r9,r63,tr0 // large_divisor 1764 mmulfx.w r5,r4,r4 1765 shlri r2,32+14,r19 1766 addi r22,-31,r0 1767 msub.w r1,r4,r1 1768 1769 mulu.l r1,r7,r4 1770 addi r1,-3,r5 1771 mulu.l r5,r19,r5 1772 sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2 1773 shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as 1774 the case may be, %0000000000000000 000.11111111111, still */ 1775 muls.l r1,r4,r4 /* leaving at least one sign bit. */ 1776 mulu.l r5,r3,r5 1777 mshalds.l r1,r21,r1 1778 shari r4,26,r4 1779 shlld r5,r0,r5 1780 add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5) 1781 sub r2,r5,r2 1782 /* Can do second step of 64 : 32 div now, using r1 and the rest in r2. */ 1783 1784 shlri r2,22,r21 1785 mulu.l r21,r1,r21 1786 addi r20,30-22,r0 1787 /* bubble */ /* could test r3 here to check for divide by zero. */ 1788 shlrd r21,r0,r21 1789 mulu.l r21,r3,r5 1790 mcmpgt.l r21,r63,r21 // See Note 1 1791 addi r20,30,r0 1792 mshfhi.l r63,r21,r21 1793 sub r2,r5,r2 1794 andc r2,r21,r2 1795 1796 /* small divisor: need a third divide step */ 1797 mulu.l r2,r1,r7 1798 ptabs r18,tr0 1799 sub r2,r3,r8 /* re-use r8 here for rest - r3 */ 1800 shlrd r7,r0,r7 1801 mulu.l r7,r3,r5 1802 /* bubble */ 1803 addi r8,1,r7 1804 cmpgt r7,r5,r7 1805 cmvne r7,r8,r2 1806 sub r2,r5,r2 1807 blink tr0,r63 1808 1809LOCAL(large_divisor): 1810 mmulfx.w r5,r4,r4 1811 shlrd r2,r9,r25 1812 shlri r25,32,r8 1813 msub.w r1,r4,r1 1814 1815 mulu.l r1,r7,r4 1816 addi r1,-3,r5 1817 mulu.l r5,r8,r5 1818 sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2 1819 shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as 1820 the case may be, %0000000000000000 000.11111111111, still */ 1821 muls.l r1,r4,r4 /* leaving at least one sign bit. */ 1822 shlri r5,14-1,r8 1823 mulu.l r8,r7,r5 1824 mshalds.l r1,r21,r1 1825 shari r4,26,r4 1826 add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5) 1827 sub r25,r5,r25 1828 /* Can do second step of 64 : 32 div now, using r1 and the rest in r25. */ 1829 1830 shlri r25,22,r21 1831 mulu.l r21,r1,r21 1832 pta LOCAL(no_lo_adj),tr0 1833 addi r22,32,r0 1834 shlri r21,40,r21 1835 mulu.l r21,r7,r5 1836 add r8,r21,r8 1837 shlld r2,r0,r2 1838 sub r25,r5,r25 1839 bgtu/u r7,r25,tr0 // no_lo_adj 1840 addi r8,1,r8 1841 sub r25,r7,r25 1842LOCAL(no_lo_adj): 1843 mextr4 r2,r25,r2 1844 1845 /* large_divisor: only needs a few adjustments. */ 1846 mulu.l r8,r6,r5 1847 ptabs r18,tr0 1848 add r2,r6,r7 1849 cmpgtu r5,r2,r8 1850 cmvne r8,r7,r2 1851 sub r2,r5,r2 1852 shlrd r2,r22,r2 1853 blink tr0,r63 1854/* Note 1: To shift the result of the second divide stage so that the result 1855 always fits into 32 bits, yet we still reduce the rest sufficiently 1856 would require a lot of instructions to do the shifts just right. Using 1857 the full 64 bit shift result to multiply with the divisor would require 1858 four extra instructions for the upper 32 bits (shift / mulu / shift / sub). 1859 Fortunately, if the upper 32 bits of the shift result are nonzero, we 1860 know that the rest after taking this partial result into account will 1861 fit into 32 bits. So we just clear the upper 32 bits of the rest if the 1862 upper 32 bits of the partial result are nonzero. */ 1863#endif /* __SHMEDIA__ */ 1864#endif /* L_umoddi3 */ 1865 1866#ifdef L_moddi3 1867#ifdef __SHMEDIA__ 1868 .mode SHmedia 1869 .section .text..SHmedia32,"ax" 1870 .align 2 1871 .global GLOBAL(moddi3) 1872 FUNC(GLOBAL(moddi3),function) 1873GLOBAL(moddi3): 1874 pta GLOBAL(umoddi3),tr0 1875 shari r2,63,r22 1876 shari r3,63,r23 1877 xor r2,r22,r2 1878 xor r3,r23,r3 1879 sub r2,r22,r2 1880 sub r3,r23,r3 1881 beq/u r22,r63,tr0 1882 ptabs r18,tr1 1883 blink tr0,r18 1884 sub r63,r2,r2 1885 blink tr1,r63 1886#endif /* __SHMEDIA__ */ 1887#endif /* L_moddi3 */ 1888 1889#ifdef L_set_fpscr 1890#if defined (__SH3E__) || defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || __SH5__ == 32 1891#ifdef __SH5__ 1892 .mode SHcompact 1893#endif 1894 .global GLOBAL(set_fpscr) 1895 FUNC(GLOBAL(set_fpscr),function) 1896GLOBAL(set_fpscr): 1897 lds r4,fpscr 1898#ifdef __PIC__ 1899 mov.l r12,@-r15 1900 mova LOCAL(set_fpscr_L0),r0 1901 mov.l LOCAL(set_fpscr_L0),r12 1902 add r0,r12 1903 mov.l LOCAL(set_fpscr_L1),r0 1904 mov.l @(r0,r12),r1 1905 mov.l @r15+,r12 1906#else 1907 mov.l LOCAL(set_fpscr_L1),r1 1908#endif 1909 swap.w r4,r0 1910 or #24,r0 1911#ifndef FMOVD_WORKS 1912 xor #16,r0 1913#endif 1914#if defined(__SH4__) 1915 swap.w r0,r3 1916 mov.l r3,@(4,r1) 1917#else /* defined(__SH3E__) || defined(__SH4_SINGLE*__) */ 1918 swap.w r0,r2 1919 mov.l r2,@r1 1920#endif 1921#ifndef FMOVD_WORKS 1922 xor #8,r0 1923#else 1924 xor #24,r0 1925#endif 1926#if defined(__SH4__) 1927 swap.w r0,r2 1928 rts 1929 mov.l r2,@r1 1930#else /* defined(__SH3E__) || defined(__SH4_SINGLE*__) */ 1931 swap.w r0,r3 1932 rts 1933 mov.l r3,@(4,r1) 1934#endif 1935 .align 2 1936#ifdef __PIC__ 1937LOCAL(set_fpscr_L0): 1938 .long _GLOBAL_OFFSET_TABLE_ 1939LOCAL(set_fpscr_L1): 1940 .long GLOBAL(fpscr_values@GOT) 1941#else 1942LOCAL(set_fpscr_L1): 1943 .long GLOBAL(fpscr_values) 1944#endif 1945 1946#ifdef __ELF__ 1947 .comm GLOBAL(fpscr_values),8,4 1948#else 1949 .comm GLOBAL(fpscr_values),8 1950#endif /* ELF */ 1951#endif /* SH3E / SH4 */ 1952#endif /* L_set_fpscr */ 1953#ifdef L_ic_invalidate 1954#if __SH5__ == 32 1955 .mode SHmedia 1956 .section .text..SHmedia32,"ax" 1957 .align 2 1958 .global GLOBAL(init_trampoline) 1959 FUNC(GLOBAL(set_fpscr),function) 1960GLOBAL(init_trampoline): 1961 st.l r0,8,r2 1962#ifdef __LITTLE_ENDIAN__ 1963 movi 9,r20 1964 shori 0x402b,r20 1965 shori 0xd101,r20 1966 shori 0xd002,r20 1967#else 1968 movi 0xffffffffffffd002,r20 1969 shori 0xd101,r20 1970 shori 0x402b,r20 1971 shori 9,r20 1972#endif 1973 st.q r0,0,r20 1974 st.l r0,12,r3 1975 .global GLOBAL(ic_invalidate) 1976 FUNC(GLOBAL(ic_invalidate),function) 1977GLOBAL(ic_invalidate): 1978 ocbwb r0,0 1979 synco 1980 icbi r0, 0 1981 ptabs r18, tr0 1982 synci 1983 blink tr0, r63 1984#elif defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) 1985 .global GLOBAL(ic_invalidate) 1986 FUNC(GLOBAL(ic_invalidate),function) 1987GLOBAL(ic_invalidate): 1988 ocbwb @r4 1989 mova 0f,r0 1990 mov.w 1f,r1 1991/* Compute how many cache lines 0f is away from r4. */ 1992 sub r0,r4 1993 and r1,r4 1994/* Prepare to branch to 0f plus the cache-line offset. */ 1995 add # 0f - 1f,r4 1996 braf r4 1997 nop 19981: 1999 .short 0x1fe0 2000 .p2align 5 2001/* This must be aligned to the beginning of a cache line. */ 20020: 2003 .rept 256 /* There are 256 cache lines of 32 bytes. */ 2004 rts 2005 .rept 15 2006 nop 2007 .endr 2008 .endr 2009#endif /* SH4 */ 2010#endif /* L_ic_invalidate */ 2011 2012#if defined (__SH5__) && __SH5__ == 32 2013#ifdef L_shcompact_call_trampoline 2014 .section .rodata 2015 .align 1 2016LOCAL(ct_main_table): 2017.word LOCAL(ct_r2_fp) - datalabel LOCAL(ct_main_label) 2018.word LOCAL(ct_r2_ld) - datalabel LOCAL(ct_main_label) 2019.word LOCAL(ct_r2_pop) - datalabel LOCAL(ct_main_label) 2020.word LOCAL(ct_r3_fp) - datalabel LOCAL(ct_main_label) 2021.word LOCAL(ct_r3_ld) - datalabel LOCAL(ct_main_label) 2022.word LOCAL(ct_r3_pop) - datalabel LOCAL(ct_main_label) 2023.word LOCAL(ct_r4_fp) - datalabel LOCAL(ct_main_label) 2024.word LOCAL(ct_r4_ld) - datalabel LOCAL(ct_main_label) 2025.word LOCAL(ct_r4_pop) - datalabel LOCAL(ct_main_label) 2026.word LOCAL(ct_r5_fp) - datalabel LOCAL(ct_main_label) 2027.word LOCAL(ct_r5_ld) - datalabel LOCAL(ct_main_label) 2028.word LOCAL(ct_r5_pop) - datalabel LOCAL(ct_main_label) 2029.word LOCAL(ct_r6_fph) - datalabel LOCAL(ct_main_label) 2030.word LOCAL(ct_r6_fpl) - datalabel LOCAL(ct_main_label) 2031.word LOCAL(ct_r6_ld) - datalabel LOCAL(ct_main_label) 2032.word LOCAL(ct_r6_pop) - datalabel LOCAL(ct_main_label) 2033.word LOCAL(ct_r7_fph) - datalabel LOCAL(ct_main_label) 2034.word LOCAL(ct_r7_fpl) - datalabel LOCAL(ct_main_label) 2035.word LOCAL(ct_r7_ld) - datalabel LOCAL(ct_main_label) 2036.word LOCAL(ct_r7_pop) - datalabel LOCAL(ct_main_label) 2037.word LOCAL(ct_r8_fph) - datalabel LOCAL(ct_main_label) 2038.word LOCAL(ct_r8_fpl) - datalabel LOCAL(ct_main_label) 2039.word LOCAL(ct_r8_ld) - datalabel LOCAL(ct_main_label) 2040.word LOCAL(ct_r8_pop) - datalabel LOCAL(ct_main_label) 2041.word LOCAL(ct_r9_fph) - datalabel LOCAL(ct_main_label) 2042.word LOCAL(ct_r9_fpl) - datalabel LOCAL(ct_main_label) 2043.word LOCAL(ct_r9_ld) - datalabel LOCAL(ct_main_label) 2044.word LOCAL(ct_r9_pop) - datalabel LOCAL(ct_main_label) 2045.word LOCAL(ct_pop_seq) - datalabel LOCAL(ct_main_label) 2046.word LOCAL(ct_pop_seq) - datalabel LOCAL(ct_main_label) 2047.word LOCAL(ct_r9_pop) - datalabel LOCAL(ct_main_label) 2048.word LOCAL(ct_ret_wide) - datalabel LOCAL(ct_main_label) 2049.word LOCAL(ct_call_func) - datalabel LOCAL(ct_main_label) 2050 .mode SHmedia 2051 .section .text..SHmedia32, "ax" 2052 .align 2 2053 2054 /* This function loads 64-bit general-purpose registers from the 2055 stack, from a memory address contained in them or from an FP 2056 register, according to a cookie passed in r1. Its execution 2057 time is linear on the number of registers that actually have 2058 to be copied. See sh.h for details on the actual bit pattern. 2059 2060 The function to be called is passed in r0. If a 32-bit return 2061 value is expected, the actual function will be tail-called, 2062 otherwise the return address will be stored in r10 (that the 2063 caller should expect to be clobbered) and the return value 2064 will be expanded into r2/r3 upon return. */ 2065 2066 .global GLOBAL(GCC_shcompact_call_trampoline) 2067 FUNC(GLOBAL(GCC_shcompact_call_trampoline),function) 2068GLOBAL(GCC_shcompact_call_trampoline): 2069 ptabs/l r0, tr0 /* Prepare to call the actual function. */ 2070 movi ((datalabel LOCAL(ct_main_table) - 31 * 2) >> 16) & 65535, r0 2071 pt/l LOCAL(ct_loop), tr1 2072 addz.l r1, r63, r1 2073 shori ((datalabel LOCAL(ct_main_table) - 31 * 2)) & 65535, r0 2074LOCAL(ct_loop): 2075 nsb r1, r28 2076 shlli r28, 1, r29 2077 ldx.w r0, r29, r30 2078LOCAL(ct_main_label): 2079 ptrel/l r30, tr2 2080 blink tr2, r63 2081LOCAL(ct_r2_fp): /* Copy r2 from an FP register. */ 2082 /* It must be dr0, so just do it. */ 2083 fmov.dq dr0, r2 2084 movi 7, r30 2085 shlli r30, 29, r31 2086 andc r1, r31, r1 2087 blink tr1, r63 2088LOCAL(ct_r3_fp): /* Copy r3 from an FP register. */ 2089 /* It is either dr0 or dr2. */ 2090 movi 7, r30 2091 shlri r1, 26, r32 2092 shlli r30, 26, r31 2093 andc r1, r31, r1 2094 fmov.dq dr0, r3 2095 beqi/l r32, 4, tr1 2096 fmov.dq dr2, r3 2097 blink tr1, r63 2098LOCAL(ct_r4_fp): /* Copy r4 from an FP register. */ 2099 shlri r1, 23 - 3, r34 2100 andi r34, 3 << 3, r33 2101 addi r33, LOCAL(ct_r4_fp_copy) - datalabel LOCAL(ct_r4_fp_base), r32 2102LOCAL(ct_r4_fp_base): 2103 ptrel/l r32, tr2 2104 movi 7, r30 2105 shlli r30, 23, r31 2106 andc r1, r31, r1 2107 blink tr2, r63 2108LOCAL(ct_r4_fp_copy): 2109 fmov.dq dr0, r4 2110 blink tr1, r63 2111 fmov.dq dr2, r4 2112 blink tr1, r63 2113 fmov.dq dr4, r4 2114 blink tr1, r63 2115LOCAL(ct_r5_fp): /* Copy r5 from an FP register. */ 2116 shlri r1, 20 - 3, r34 2117 andi r34, 3 << 3, r33 2118 addi r33, LOCAL(ct_r5_fp_copy) - datalabel LOCAL(ct_r5_fp_base), r32 2119LOCAL(ct_r5_fp_base): 2120 ptrel/l r32, tr2 2121 movi 7, r30 2122 shlli r30, 20, r31 2123 andc r1, r31, r1 2124 blink tr2, r63 2125LOCAL(ct_r5_fp_copy): 2126 fmov.dq dr0, r5 2127 blink tr1, r63 2128 fmov.dq dr2, r5 2129 blink tr1, r63 2130 fmov.dq dr4, r5 2131 blink tr1, r63 2132 fmov.dq dr6, r5 2133 blink tr1, r63 2134LOCAL(ct_r6_fph): /* Copy r6 from a high FP register. */ 2135 /* It must be dr8. */ 2136 fmov.dq dr8, r6 2137 movi 15, r30 2138 shlli r30, 16, r31 2139 andc r1, r31, r1 2140 blink tr1, r63 2141LOCAL(ct_r6_fpl): /* Copy r6 from a low FP register. */ 2142 shlri r1, 16 - 3, r34 2143 andi r34, 3 << 3, r33 2144 addi r33, LOCAL(ct_r6_fp_copy) - datalabel LOCAL(ct_r6_fp_base), r32 2145LOCAL(ct_r6_fp_base): 2146 ptrel/l r32, tr2 2147 movi 7, r30 2148 shlli r30, 16, r31 2149 andc r1, r31, r1 2150 blink tr2, r63 2151LOCAL(ct_r6_fp_copy): 2152 fmov.dq dr0, r6 2153 blink tr1, r63 2154 fmov.dq dr2, r6 2155 blink tr1, r63 2156 fmov.dq dr4, r6 2157 blink tr1, r63 2158 fmov.dq dr6, r6 2159 blink tr1, r63 2160LOCAL(ct_r7_fph): /* Copy r7 from a high FP register. */ 2161 /* It is either dr8 or dr10. */ 2162 movi 15 << 12, r31 2163 shlri r1, 12, r32 2164 andc r1, r31, r1 2165 fmov.dq dr8, r7 2166 beqi/l r32, 8, tr1 2167 fmov.dq dr10, r7 2168 blink tr1, r63 2169LOCAL(ct_r7_fpl): /* Copy r7 from a low FP register. */ 2170 shlri r1, 12 - 3, r34 2171 andi r34, 3 << 3, r33 2172 addi r33, LOCAL(ct_r7_fp_copy) - datalabel LOCAL(ct_r7_fp_base), r32 2173LOCAL(ct_r7_fp_base): 2174 ptrel/l r32, tr2 2175 movi 7 << 12, r31 2176 andc r1, r31, r1 2177 blink tr2, r63 2178LOCAL(ct_r7_fp_copy): 2179 fmov.dq dr0, r7 2180 blink tr1, r63 2181 fmov.dq dr2, r7 2182 blink tr1, r63 2183 fmov.dq dr4, r7 2184 blink tr1, r63 2185 fmov.dq dr6, r7 2186 blink tr1, r63 2187LOCAL(ct_r8_fph): /* Copy r8 from a high FP register. */ 2188 /* It is either dr8 or dr10. */ 2189 movi 15 << 8, r31 2190 andi r1, 1 << 8, r32 2191 andc r1, r31, r1 2192 fmov.dq dr8, r8 2193 beq/l r32, r63, tr1 2194 fmov.dq dr10, r8 2195 blink tr1, r63 2196LOCAL(ct_r8_fpl): /* Copy r8 from a low FP register. */ 2197 shlri r1, 8 - 3, r34 2198 andi r34, 3 << 3, r33 2199 addi r33, LOCAL(ct_r8_fp_copy) - datalabel LOCAL(ct_r8_fp_base), r32 2200LOCAL(ct_r8_fp_base): 2201 ptrel/l r32, tr2 2202 movi 7 << 8, r31 2203 andc r1, r31, r1 2204 blink tr2, r63 2205LOCAL(ct_r8_fp_copy): 2206 fmov.dq dr0, r8 2207 blink tr1, r63 2208 fmov.dq dr2, r8 2209 blink tr1, r63 2210 fmov.dq dr4, r8 2211 blink tr1, r63 2212 fmov.dq dr6, r8 2213 blink tr1, r63 2214LOCAL(ct_r9_fph): /* Copy r9 from a high FP register. */ 2215 /* It is either dr8 or dr10. */ 2216 movi 15 << 4, r31 2217 andi r1, 1 << 4, r32 2218 andc r1, r31, r1 2219 fmov.dq dr8, r9 2220 beq/l r32, r63, tr1 2221 fmov.dq dr10, r9 2222 blink tr1, r63 2223LOCAL(ct_r9_fpl): /* Copy r9 from a low FP register. */ 2224 shlri r1, 4 - 3, r34 2225 andi r34, 3 << 3, r33 2226 addi r33, LOCAL(ct_r9_fp_copy) - datalabel LOCAL(ct_r9_fp_base), r32 2227LOCAL(ct_r9_fp_base): 2228 ptrel/l r32, tr2 2229 movi 7 << 4, r31 2230 andc r1, r31, r1 2231 blink tr2, r63 2232LOCAL(ct_r9_fp_copy): 2233 fmov.dq dr0, r9 2234 blink tr1, r63 2235 fmov.dq dr2, r9 2236 blink tr1, r63 2237 fmov.dq dr4, r9 2238 blink tr1, r63 2239 fmov.dq dr6, r9 2240 blink tr1, r63 2241LOCAL(ct_r2_ld): /* Copy r2 from a memory address. */ 2242 pt/l LOCAL(ct_r2_load), tr2 2243 movi 3, r30 2244 shlli r30, 29, r31 2245 and r1, r31, r32 2246 andc r1, r31, r1 2247 beq/l r31, r32, tr2 2248 addi.l r2, 8, r3 2249 ldx.q r2, r63, r2 2250 /* Fall through. */ 2251LOCAL(ct_r3_ld): /* Copy r3 from a memory address. */ 2252 pt/l LOCAL(ct_r3_load), tr2 2253 movi 3, r30 2254 shlli r30, 26, r31 2255 and r1, r31, r32 2256 andc r1, r31, r1 2257 beq/l r31, r32, tr2 2258 addi.l r3, 8, r4 2259 ldx.q r3, r63, r3 2260LOCAL(ct_r4_ld): /* Copy r4 from a memory address. */ 2261 pt/l LOCAL(ct_r4_load), tr2 2262 movi 3, r30 2263 shlli r30, 23, r31 2264 and r1, r31, r32 2265 andc r1, r31, r1 2266 beq/l r31, r32, tr2 2267 addi.l r4, 8, r5 2268 ldx.q r4, r63, r4 2269LOCAL(ct_r5_ld): /* Copy r5 from a memory address. */ 2270 pt/l LOCAL(ct_r5_load), tr2 2271 movi 3, r30 2272 shlli r30, 20, r31 2273 and r1, r31, r32 2274 andc r1, r31, r1 2275 beq/l r31, r32, tr2 2276 addi.l r5, 8, r6 2277 ldx.q r5, r63, r5 2278LOCAL(ct_r6_ld): /* Copy r6 from a memory address. */ 2279 pt/l LOCAL(ct_r6_load), tr2 2280 movi 3 << 16, r31 2281 and r1, r31, r32 2282 andc r1, r31, r1 2283 beq/l r31, r32, tr2 2284 addi.l r6, 8, r7 2285 ldx.q r6, r63, r6 2286LOCAL(ct_r7_ld): /* Copy r7 from a memory address. */ 2287 pt/l LOCAL(ct_r7_load), tr2 2288 movi 3 << 12, r31 2289 and r1, r31, r32 2290 andc r1, r31, r1 2291 beq/l r31, r32, tr2 2292 addi.l r7, 8, r8 2293 ldx.q r7, r63, r7 2294LOCAL(ct_r8_ld): /* Copy r8 from a memory address. */ 2295 pt/l LOCAL(ct_r8_load), tr2 2296 movi 3 << 8, r31 2297 and r1, r31, r32 2298 andc r1, r31, r1 2299 beq/l r31, r32, tr2 2300 addi.l r8, 8, r9 2301 ldx.q r8, r63, r8 2302LOCAL(ct_r9_ld): /* Copy r9 from a memory address. */ 2303 pt/l LOCAL(ct_check_tramp), tr2 2304 ldx.q r9, r63, r9 2305 blink tr2, r63 2306LOCAL(ct_r2_load): 2307 ldx.q r2, r63, r2 2308 blink tr1, r63 2309LOCAL(ct_r3_load): 2310 ldx.q r3, r63, r3 2311 blink tr1, r63 2312LOCAL(ct_r4_load): 2313 ldx.q r4, r63, r4 2314 blink tr1, r63 2315LOCAL(ct_r5_load): 2316 ldx.q r5, r63, r5 2317 blink tr1, r63 2318LOCAL(ct_r6_load): 2319 ldx.q r6, r63, r6 2320 blink tr1, r63 2321LOCAL(ct_r7_load): 2322 ldx.q r7, r63, r7 2323 blink tr1, r63 2324LOCAL(ct_r8_load): 2325 ldx.q r8, r63, r8 2326 blink tr1, r63 2327LOCAL(ct_r2_pop): /* Pop r2 from the stack. */ 2328 movi 1, r30 2329 ldx.q r15, r63, r2 2330 shlli r30, 29, r31 2331 addi.l r15, 8, r15 2332 andc r1, r31, r1 2333 blink tr1, r63 2334LOCAL(ct_r3_pop): /* Pop r3 from the stack. */ 2335 movi 1, r30 2336 ldx.q r15, r63, r3 2337 shlli r30, 26, r31 2338 addi.l r15, 8, r15 2339 andc r1, r31, r1 2340 blink tr1, r63 2341LOCAL(ct_r4_pop): /* Pop r4 from the stack. */ 2342 movi 1, r30 2343 ldx.q r15, r63, r4 2344 shlli r30, 23, r31 2345 addi.l r15, 8, r15 2346 andc r1, r31, r1 2347 blink tr1, r63 2348LOCAL(ct_r5_pop): /* Pop r5 from the stack. */ 2349 movi 1, r30 2350 ldx.q r15, r63, r5 2351 shlli r30, 20, r31 2352 addi.l r15, 8, r15 2353 andc r1, r31, r1 2354 blink tr1, r63 2355LOCAL(ct_r6_pop): /* Pop r6 from the stack. */ 2356 movi 1, r30 2357 ldx.q r15, r63, r6 2358 shlli r30, 16, r31 2359 addi.l r15, 8, r15 2360 andc r1, r31, r1 2361 blink tr1, r63 2362LOCAL(ct_r7_pop): /* Pop r7 from the stack. */ 2363 ldx.q r15, r63, r7 2364 movi 1 << 12, r31 2365 addi.l r15, 8, r15 2366 andc r1, r31, r1 2367 blink tr1, r63 2368LOCAL(ct_r8_pop): /* Pop r8 from the stack. */ 2369 ldx.q r15, r63, r8 2370 movi 1 << 8, r31 2371 addi.l r15, 8, r15 2372 andc r1, r31, r1 2373 blink tr1, r63 2374LOCAL(ct_pop_seq): /* Pop a sequence of registers off the stack. */ 2375 andi r1, 7 << 1, r30 2376 movi (LOCAL(ct_end_of_pop_seq) >> 16) & 65535, r32 2377 shlli r30, 2, r31 2378 shori LOCAL(ct_end_of_pop_seq) & 65535, r32 2379 sub.l r32, r31, r33 2380 ptabs/l r33, tr2 2381 blink tr2, r63 2382LOCAL(ct_start_of_pop_seq): /* Beginning of pop sequence. */ 2383 ldx.q r15, r63, r3 2384 addi.l r15, 8, r15 2385 ldx.q r15, r63, r4 2386 addi.l r15, 8, r15 2387 ldx.q r15, r63, r5 2388 addi.l r15, 8, r15 2389 ldx.q r15, r63, r6 2390 addi.l r15, 8, r15 2391 ldx.q r15, r63, r7 2392 addi.l r15, 8, r15 2393 ldx.q r15, r63, r8 2394 addi.l r15, 8, r15 2395LOCAL(ct_r9_pop): /* Pop r9 from the stack. */ 2396 ldx.q r15, r63, r9 2397 addi.l r15, 8, r15 2398LOCAL(ct_end_of_pop_seq): /* Label used to compute first pop instruction. */ 2399LOCAL(ct_check_tramp): /* Check whether we need a trampoline. */ 2400 pt/u LOCAL(ct_ret_wide), tr2 2401 andi r1, 1, r1 2402 bne/u r1, r63, tr2 2403LOCAL(ct_call_func): /* Just branch to the function. */ 2404 blink tr0, r63 2405LOCAL(ct_ret_wide): /* Call the function, so that we can unpack its 2406 64-bit return value. */ 2407 add.l r18, r63, r10 2408 blink tr0, r18 2409 ptabs r10, tr0 2410#if __LITTLE_ENDIAN__ 2411 shari r2, 32, r3 2412 add.l r2, r63, r2 2413#else 2414 add.l r2, r63, r3 2415 shari r2, 32, r2 2416#endif 2417 blink tr0, r63 2418#endif /* L_shcompact_call_trampoline */ 2419 2420#ifdef L_shcompact_return_trampoline 2421 /* This function does the converse of the code in `ret_wide' 2422 above. It is tail-called by SHcompact functions returning 2423 64-bit non-floating-point values, to pack the 32-bit values in 2424 r2 and r3 into r2. */ 2425 2426 .mode SHmedia 2427 .section .text..SHmedia32, "ax" 2428 .align 2 2429 .global GLOBAL(GCC_shcompact_return_trampoline) 2430 FUNC(GLOBAL(GCC_shcompact_return_trampoline),function) 2431GLOBAL(GCC_shcompact_return_trampoline): 2432 ptabs/l r18, tr0 2433#if __LITTLE_ENDIAN__ 2434 addz.l r2, r63, r2 2435 shlli r3, 32, r3 2436#else 2437 addz.l r3, r63, r3 2438 shlli r2, 32, r2 2439#endif 2440 or r3, r2, r2 2441 blink tr0, r63 2442#endif /* L_shcompact_return_trampoline */ 2443 2444#ifdef L_shcompact_incoming_args 2445 .section .rodata 2446 .align 1 2447LOCAL(ia_main_table): 2448.word 1 /* Invalid, just loop */ 2449.word LOCAL(ia_r2_ld) - datalabel LOCAL(ia_main_label) 2450.word LOCAL(ia_r2_push) - datalabel LOCAL(ia_main_label) 2451.word 1 /* Invalid, just loop */ 2452.word LOCAL(ia_r3_ld) - datalabel LOCAL(ia_main_label) 2453.word LOCAL(ia_r3_push) - datalabel LOCAL(ia_main_label) 2454.word 1 /* Invalid, just loop */ 2455.word LOCAL(ia_r4_ld) - datalabel LOCAL(ia_main_label) 2456.word LOCAL(ia_r4_push) - datalabel LOCAL(ia_main_label) 2457.word 1 /* Invalid, just loop */ 2458.word LOCAL(ia_r5_ld) - datalabel LOCAL(ia_main_label) 2459.word LOCAL(ia_r5_push) - datalabel LOCAL(ia_main_label) 2460.word 1 /* Invalid, just loop */ 2461.word 1 /* Invalid, just loop */ 2462.word LOCAL(ia_r6_ld) - datalabel LOCAL(ia_main_label) 2463.word LOCAL(ia_r6_push) - datalabel LOCAL(ia_main_label) 2464.word 1 /* Invalid, just loop */ 2465.word 1 /* Invalid, just loop */ 2466.word LOCAL(ia_r7_ld) - datalabel LOCAL(ia_main_label) 2467.word LOCAL(ia_r7_push) - datalabel LOCAL(ia_main_label) 2468.word 1 /* Invalid, just loop */ 2469.word 1 /* Invalid, just loop */ 2470.word LOCAL(ia_r8_ld) - datalabel LOCAL(ia_main_label) 2471.word LOCAL(ia_r8_push) - datalabel LOCAL(ia_main_label) 2472.word 1 /* Invalid, just loop */ 2473.word 1 /* Invalid, just loop */ 2474.word LOCAL(ia_r9_ld) - datalabel LOCAL(ia_main_label) 2475.word LOCAL(ia_r9_push) - datalabel LOCAL(ia_main_label) 2476.word LOCAL(ia_push_seq) - datalabel LOCAL(ia_main_label) 2477.word LOCAL(ia_push_seq) - datalabel LOCAL(ia_main_label) 2478.word LOCAL(ia_r9_push) - datalabel LOCAL(ia_main_label) 2479.word LOCAL(ia_return) - datalabel LOCAL(ia_main_label) 2480.word LOCAL(ia_return) - datalabel LOCAL(ia_main_label) 2481 .mode SHmedia 2482 .section .text..SHmedia32, "ax" 2483 .align 2 2484 2485 /* This function stores 64-bit general-purpose registers back in 2486 the stack, and loads the address in which each register 2487 was stored into itself. The lower 32 bits of r17 hold the address 2488 to begin storing, and the upper 32 bits of r17 hold the cookie. 2489 Its execution time is linear on the 2490 number of registers that actually have to be copied, and it is 2491 optimized for structures larger than 64 bits, as opposed to 2492 invidivual `long long' arguments. See sh.h for details on the 2493 actual bit pattern. */ 2494 2495 .global GLOBAL(GCC_shcompact_incoming_args) 2496 FUNC(GLOBAL(GCC_shcompact_incoming_args)) 2497GLOBAL(GCC_shcompact_incoming_args): 2498 ptabs/l r18, tr0 /* Prepare to return. */ 2499 shlri r17, 32, r0 /* Load the cookie. */ 2500 movi ((datalabel LOCAL(ia_main_table) - 31 * 2) >> 16) & 65535, r43 2501 pt/l LOCAL(ia_loop), tr1 2502 add.l r17, r63, r17 2503 shori ((datalabel LOCAL(ia_main_table) - 31 * 2)) & 65535, r43 2504LOCAL(ia_loop): 2505 nsb r0, r36 2506 shlli r36, 1, r37 2507 ldx.w r43, r37, r38 2508LOCAL(ia_main_label): 2509 ptrel/l r38, tr2 2510 blink tr2, r63 2511LOCAL(ia_r2_ld): /* Store r2 and load its address. */ 2512 movi 3, r38 2513 shlli r38, 29, r39 2514 and r0, r39, r40 2515 andc r0, r39, r0 2516 stx.q r17, r63, r2 2517 add.l r17, r63, r2 2518 addi.l r17, 8, r17 2519 beq/u r39, r40, tr1 2520LOCAL(ia_r3_ld): /* Store r3 and load its address. */ 2521 movi 3, r38 2522 shlli r38, 26, r39 2523 and r0, r39, r40 2524 andc r0, r39, r0 2525 stx.q r17, r63, r3 2526 add.l r17, r63, r3 2527 addi.l r17, 8, r17 2528 beq/u r39, r40, tr1 2529LOCAL(ia_r4_ld): /* Store r4 and load its address. */ 2530 movi 3, r38 2531 shlli r38, 23, r39 2532 and r0, r39, r40 2533 andc r0, r39, r0 2534 stx.q r17, r63, r4 2535 add.l r17, r63, r4 2536 addi.l r17, 8, r17 2537 beq/u r39, r40, tr1 2538LOCAL(ia_r5_ld): /* Store r5 and load its address. */ 2539 movi 3, r38 2540 shlli r38, 20, r39 2541 and r0, r39, r40 2542 andc r0, r39, r0 2543 stx.q r17, r63, r5 2544 add.l r17, r63, r5 2545 addi.l r17, 8, r17 2546 beq/u r39, r40, tr1 2547LOCAL(ia_r6_ld): /* Store r6 and load its address. */ 2548 movi 3, r38 2549 shlli r38, 16, r39 2550 and r0, r39, r40 2551 andc r0, r39, r0 2552 stx.q r17, r63, r6 2553 add.l r17, r63, r6 2554 addi.l r17, 8, r17 2555 beq/u r39, r40, tr1 2556LOCAL(ia_r7_ld): /* Store r7 and load its address. */ 2557 movi 3 << 12, r39 2558 and r0, r39, r40 2559 andc r0, r39, r0 2560 stx.q r17, r63, r7 2561 add.l r17, r63, r7 2562 addi.l r17, 8, r17 2563 beq/u r39, r40, tr1 2564LOCAL(ia_r8_ld): /* Store r8 and load its address. */ 2565 movi 3 << 8, r39 2566 and r0, r39, r40 2567 andc r0, r39, r0 2568 stx.q r17, r63, r8 2569 add.l r17, r63, r8 2570 addi.l r17, 8, r17 2571 beq/u r39, r40, tr1 2572LOCAL(ia_r9_ld): /* Store r9 and load its address. */ 2573 stx.q r17, r63, r9 2574 add.l r17, r63, r9 2575 blink tr0, r63 2576LOCAL(ia_r2_push): /* Push r2 onto the stack. */ 2577 movi 1, r38 2578 shlli r38, 29, r39 2579 andc r0, r39, r0 2580 stx.q r17, r63, r2 2581 addi.l r17, 8, r17 2582 blink tr1, r63 2583LOCAL(ia_r3_push): /* Push r3 onto the stack. */ 2584 movi 1, r38 2585 shlli r38, 26, r39 2586 andc r0, r39, r0 2587 stx.q r17, r63, r3 2588 addi.l r17, 8, r17 2589 blink tr1, r63 2590LOCAL(ia_r4_push): /* Push r4 onto the stack. */ 2591 movi 1, r38 2592 shlli r38, 23, r39 2593 andc r0, r39, r0 2594 stx.q r17, r63, r4 2595 addi.l r17, 8, r17 2596 blink tr1, r63 2597LOCAL(ia_r5_push): /* Push r5 onto the stack. */ 2598 movi 1, r38 2599 shlli r38, 20, r39 2600 andc r0, r39, r0 2601 stx.q r17, r63, r5 2602 addi.l r17, 8, r17 2603 blink tr1, r63 2604LOCAL(ia_r6_push): /* Push r6 onto the stack. */ 2605 movi 1, r38 2606 shlli r38, 16, r39 2607 andc r0, r39, r0 2608 stx.q r17, r63, r6 2609 addi.l r17, 8, r17 2610 blink tr1, r63 2611LOCAL(ia_r7_push): /* Push r7 onto the stack. */ 2612 movi 1 << 12, r39 2613 andc r0, r39, r0 2614 stx.q r17, r63, r7 2615 addi.l r17, 8, r17 2616 blink tr1, r63 2617LOCAL(ia_r8_push): /* Push r8 onto the stack. */ 2618 movi 1 << 8, r39 2619 andc r0, r39, r0 2620 stx.q r17, r63, r8 2621 addi.l r17, 8, r17 2622 blink tr1, r63 2623LOCAL(ia_push_seq): /* Push a sequence of registers onto the stack. */ 2624 andi r0, 7 << 1, r38 2625 movi (LOCAL(ia_end_of_push_seq) >> 16) & 65535, r40 2626 shlli r38, 2, r39 2627 shori LOCAL(ia_end_of_push_seq) & 65535, r40 2628 sub.l r40, r39, r41 2629 ptabs/l r41, tr2 2630 blink tr2, r63 2631LOCAL(ia_stack_of_push_seq): /* Beginning of push sequence. */ 2632 stx.q r17, r63, r3 2633 addi.l r17, 8, r17 2634 stx.q r17, r63, r4 2635 addi.l r17, 8, r17 2636 stx.q r17, r63, r5 2637 addi.l r17, 8, r17 2638 stx.q r17, r63, r6 2639 addi.l r17, 8, r17 2640 stx.q r17, r63, r7 2641 addi.l r17, 8, r17 2642 stx.q r17, r63, r8 2643 addi.l r17, 8, r17 2644LOCAL(ia_r9_push): /* Push r9 onto the stack. */ 2645 stx.q r17, r63, r9 2646LOCAL(ia_return): /* Return. */ 2647 blink tr0, r63 2648LOCAL(ia_end_of_push_seq): /* Label used to compute the first push instruction. */ 2649#endif /* L_shcompact_incoming_args */ 2650#endif 2651#if __SH5__ 2652#ifdef L_nested_trampoline 2653#if __SH5__ == 32 2654 .section .text..SHmedia32,"ax" 2655#else 2656 .text 2657#endif 2658 .align 3 /* It is copied in units of 8 bytes in SHmedia mode. */ 2659 .global GLOBAL(GCC_nested_trampoline) 2660GLOBAL(GCC_nested_trampoline): 2661 .mode SHmedia 2662 ptrel/u r63, tr0 2663 gettr tr0, r0 2664#if __SH5__ == 64 2665 ld.q r0, 24, r1 2666#else 2667 ld.l r0, 24, r1 2668#endif 2669 ptabs/l r1, tr1 2670#if __SH5__ == 64 2671 ld.q r0, 32, r1 2672#else 2673 ld.l r0, 28, r1 2674#endif 2675 blink tr1, r63 2676#endif /* L_nested_trampoline */ 2677#endif /* __SH5__ */ 2678#if __SH5__ == 32 2679#ifdef L_push_pop_shmedia_regs 2680 .section .text..SHmedia32,"ax" 2681 .mode SHmedia 2682 .align 2 2683#ifndef __SH4_NOFPU__ 2684 .global GLOBAL(GCC_push_shmedia_regs) 2685GLOBAL(GCC_push_shmedia_regs): 2686 addi.l r15, -14*8, r15 2687 fst.d r15, 13*8, dr62 2688 fst.d r15, 12*8, dr60 2689 fst.d r15, 11*8, dr58 2690 fst.d r15, 10*8, dr56 2691 fst.d r15, 9*8, dr54 2692 fst.d r15, 8*8, dr52 2693 fst.d r15, 7*8, dr50 2694 fst.d r15, 6*8, dr48 2695 fst.d r15, 5*8, dr46 2696 fst.d r15, 4*8, dr44 2697 fst.d r15, 3*8, dr42 2698 fst.d r15, 2*8, dr40 2699 fst.d r15, 1*8, dr38 2700 fst.d r15, 0*8, dr36 2701#endif 2702 .global GLOBAL(GCC_push_shmedia_regs_nofpu) 2703GLOBAL(GCC_push_shmedia_regs_nofpu): 2704 ptabs/l r18, tr0 2705 addi.l r15, -27*8, r15 2706 gettr tr7, r62 2707 gettr tr6, r61 2708 gettr tr5, r60 2709 st.q r15, 26*8, r62 2710 st.q r15, 25*8, r61 2711 st.q r15, 24*8, r60 2712 st.q r15, 23*8, r59 2713 st.q r15, 22*8, r58 2714 st.q r15, 21*8, r57 2715 st.q r15, 20*8, r56 2716 st.q r15, 19*8, r55 2717 st.q r15, 18*8, r54 2718 st.q r15, 17*8, r53 2719 st.q r15, 16*8, r52 2720 st.q r15, 15*8, r51 2721 st.q r15, 14*8, r50 2722 st.q r15, 13*8, r49 2723 st.q r15, 12*8, r48 2724 st.q r15, 11*8, r47 2725 st.q r15, 10*8, r46 2726 st.q r15, 9*8, r45 2727 st.q r15, 8*8, r44 2728 st.q r15, 7*8, r35 2729 st.q r15, 6*8, r34 2730 st.q r15, 5*8, r33 2731 st.q r15, 4*8, r32 2732 st.q r15, 3*8, r31 2733 st.q r15, 2*8, r30 2734 st.q r15, 1*8, r29 2735 st.q r15, 0*8, r28 2736 blink tr0, r63 2737 2738#ifndef __SH4_NOFPU__ 2739 .global GLOBAL(GCC_pop_shmedia_regs) 2740GLOBAL(GCC_pop_shmedia_regs): 2741 pt .L0, tr1 2742 movi 41*8, r0 2743 fld.d r15, 40*8, dr62 2744 fld.d r15, 39*8, dr60 2745 fld.d r15, 38*8, dr58 2746 fld.d r15, 37*8, dr56 2747 fld.d r15, 36*8, dr54 2748 fld.d r15, 35*8, dr52 2749 fld.d r15, 34*8, dr50 2750 fld.d r15, 33*8, dr48 2751 fld.d r15, 32*8, dr46 2752 fld.d r15, 31*8, dr44 2753 fld.d r15, 30*8, dr42 2754 fld.d r15, 29*8, dr40 2755 fld.d r15, 28*8, dr38 2756 fld.d r15, 27*8, dr36 2757 blink tr1, r63 2758#endif 2759 .global GLOBAL(GCC_pop_shmedia_regs_nofpu) 2760GLOBAL(GCC_pop_shmedia_regs_nofpu): 2761 movi 27*8, r0 2762.L0: 2763 ptabs r18, tr0 2764 ld.q r15, 26*8, r62 2765 ld.q r15, 25*8, r61 2766 ld.q r15, 24*8, r60 2767 ptabs r62, tr7 2768 ptabs r61, tr6 2769 ptabs r60, tr5 2770 ld.q r15, 23*8, r59 2771 ld.q r15, 22*8, r58 2772 ld.q r15, 21*8, r57 2773 ld.q r15, 20*8, r56 2774 ld.q r15, 19*8, r55 2775 ld.q r15, 18*8, r54 2776 ld.q r15, 17*8, r53 2777 ld.q r15, 16*8, r52 2778 ld.q r15, 15*8, r51 2779 ld.q r15, 14*8, r50 2780 ld.q r15, 13*8, r49 2781 ld.q r15, 12*8, r48 2782 ld.q r15, 11*8, r47 2783 ld.q r15, 10*8, r46 2784 ld.q r15, 9*8, r45 2785 ld.q r15, 8*8, r44 2786 ld.q r15, 7*8, r35 2787 ld.q r15, 6*8, r34 2788 ld.q r15, 5*8, r33 2789 ld.q r15, 4*8, r32 2790 ld.q r15, 3*8, r31 2791 ld.q r15, 2*8, r30 2792 ld.q r15, 1*8, r29 2793 ld.q r15, 0*8, r28 2794 add.l r15, r0, r15 2795 blink tr0, r63 2796#endif /* __SH5__ == 32 */ 2797#endif /* L_push_pop_shmedia_regs */ 2798