1/* Copyright (C) 1994, 1995, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2 2004, 2005 3 Free Software Foundation, Inc. 4 5This file is free software; you can redistribute it and/or modify it 6under the terms of the GNU General Public License as published by the 7Free Software Foundation; either version 2, or (at your option) any 8later version. 9 10In addition to the permissions in the GNU General Public License, the 11Free Software Foundation gives you unlimited permission to link the 12compiled version of this file into combinations with other programs, 13and to distribute those combinations without any restriction coming 14from the use of this file. (The General Public License restrictions 15do apply in other respects; for example, they cover modification of 16the file, and distribution when not linked into a combine 17executable.) 18 19This file is distributed in the hope that it will be useful, but 20WITHOUT ANY WARRANTY; without even the implied warranty of 21MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 22General Public License for more details. 23 24You should have received a copy of the GNU General Public License 25along with this program; see the file COPYING. If not, write to 26the Free Software Foundation, 51 Franklin Street, Fifth Floor, 27Boston, MA 02110-1301, USA. */ 28 29!! libgcc routines for the Renesas / SuperH SH CPUs. 30!! Contributed by Steve Chamberlain. 31!! sac@cygnus.com 32 33!! ashiftrt_r4_x, ___ashrsi3, ___ashlsi3, ___lshrsi3 routines 34!! recoded in assembly by Toshiyasu Morita 35!! tm@netcom.com 36 37/* SH2 optimizations for ___ashrsi3, ___ashlsi3, ___lshrsi3 and 38 ELF local label prefixes by J"orn Rennecke 39 amylaar@cygnus.com */ 40 41#include "lib1funcs.h" 42 43#if ! __SH5__ 44#ifdef L_ashiftrt 45 .global GLOBAL(ashiftrt_r4_0) 46 .global GLOBAL(ashiftrt_r4_1) 47 .global GLOBAL(ashiftrt_r4_2) 48 .global GLOBAL(ashiftrt_r4_3) 49 .global GLOBAL(ashiftrt_r4_4) 50 .global GLOBAL(ashiftrt_r4_5) 51 .global GLOBAL(ashiftrt_r4_6) 52 .global GLOBAL(ashiftrt_r4_7) 53 .global GLOBAL(ashiftrt_r4_8) 54 .global GLOBAL(ashiftrt_r4_9) 55 .global GLOBAL(ashiftrt_r4_10) 56 .global GLOBAL(ashiftrt_r4_11) 57 .global GLOBAL(ashiftrt_r4_12) 58 .global GLOBAL(ashiftrt_r4_13) 59 .global GLOBAL(ashiftrt_r4_14) 60 .global GLOBAL(ashiftrt_r4_15) 61 .global GLOBAL(ashiftrt_r4_16) 62 .global GLOBAL(ashiftrt_r4_17) 63 .global GLOBAL(ashiftrt_r4_18) 64 .global GLOBAL(ashiftrt_r4_19) 65 .global GLOBAL(ashiftrt_r4_20) 66 .global GLOBAL(ashiftrt_r4_21) 67 .global GLOBAL(ashiftrt_r4_22) 68 .global GLOBAL(ashiftrt_r4_23) 69 .global GLOBAL(ashiftrt_r4_24) 70 .global GLOBAL(ashiftrt_r4_25) 71 .global GLOBAL(ashiftrt_r4_26) 72 .global GLOBAL(ashiftrt_r4_27) 73 .global GLOBAL(ashiftrt_r4_28) 74 .global GLOBAL(ashiftrt_r4_29) 75 .global GLOBAL(ashiftrt_r4_30) 76 .global GLOBAL(ashiftrt_r4_31) 77 .global GLOBAL(ashiftrt_r4_32) 78 79 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_0)) 80 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_1)) 81 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_2)) 82 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_3)) 83 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_4)) 84 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_5)) 85 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_6)) 86 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_7)) 87 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_8)) 88 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_9)) 89 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_10)) 90 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_11)) 91 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_12)) 92 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_13)) 93 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_14)) 94 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_15)) 95 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_16)) 96 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_17)) 97 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_18)) 98 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_19)) 99 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_20)) 100 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_21)) 101 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_22)) 102 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_23)) 103 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_24)) 104 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_25)) 105 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_26)) 106 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_27)) 107 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_28)) 108 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_29)) 109 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_30)) 110 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_31)) 111 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_32)) 112 113 .align 1 114GLOBAL(ashiftrt_r4_32): 115GLOBAL(ashiftrt_r4_31): 116 rotcl r4 117 rts 118 subc r4,r4 119 120GLOBAL(ashiftrt_r4_30): 121 shar r4 122GLOBAL(ashiftrt_r4_29): 123 shar r4 124GLOBAL(ashiftrt_r4_28): 125 shar r4 126GLOBAL(ashiftrt_r4_27): 127 shar r4 128GLOBAL(ashiftrt_r4_26): 129 shar r4 130GLOBAL(ashiftrt_r4_25): 131 shar r4 132GLOBAL(ashiftrt_r4_24): 133 shlr16 r4 134 shlr8 r4 135 rts 136 exts.b r4,r4 137 138GLOBAL(ashiftrt_r4_23): 139 shar r4 140GLOBAL(ashiftrt_r4_22): 141 shar r4 142GLOBAL(ashiftrt_r4_21): 143 shar r4 144GLOBAL(ashiftrt_r4_20): 145 shar r4 146GLOBAL(ashiftrt_r4_19): 147 shar r4 148GLOBAL(ashiftrt_r4_18): 149 shar r4 150GLOBAL(ashiftrt_r4_17): 151 shar r4 152GLOBAL(ashiftrt_r4_16): 153 shlr16 r4 154 rts 155 exts.w r4,r4 156 157GLOBAL(ashiftrt_r4_15): 158 shar r4 159GLOBAL(ashiftrt_r4_14): 160 shar r4 161GLOBAL(ashiftrt_r4_13): 162 shar r4 163GLOBAL(ashiftrt_r4_12): 164 shar r4 165GLOBAL(ashiftrt_r4_11): 166 shar r4 167GLOBAL(ashiftrt_r4_10): 168 shar r4 169GLOBAL(ashiftrt_r4_9): 170 shar r4 171GLOBAL(ashiftrt_r4_8): 172 shar r4 173GLOBAL(ashiftrt_r4_7): 174 shar r4 175GLOBAL(ashiftrt_r4_6): 176 shar r4 177GLOBAL(ashiftrt_r4_5): 178 shar r4 179GLOBAL(ashiftrt_r4_4): 180 shar r4 181GLOBAL(ashiftrt_r4_3): 182 shar r4 183GLOBAL(ashiftrt_r4_2): 184 shar r4 185GLOBAL(ashiftrt_r4_1): 186 rts 187 shar r4 188 189GLOBAL(ashiftrt_r4_0): 190 rts 191 nop 192 193 ENDFUNC(GLOBAL(ashiftrt_r4_0)) 194 ENDFUNC(GLOBAL(ashiftrt_r4_1)) 195 ENDFUNC(GLOBAL(ashiftrt_r4_2)) 196 ENDFUNC(GLOBAL(ashiftrt_r4_3)) 197 ENDFUNC(GLOBAL(ashiftrt_r4_4)) 198 ENDFUNC(GLOBAL(ashiftrt_r4_5)) 199 ENDFUNC(GLOBAL(ashiftrt_r4_6)) 200 ENDFUNC(GLOBAL(ashiftrt_r4_7)) 201 ENDFUNC(GLOBAL(ashiftrt_r4_8)) 202 ENDFUNC(GLOBAL(ashiftrt_r4_9)) 203 ENDFUNC(GLOBAL(ashiftrt_r4_10)) 204 ENDFUNC(GLOBAL(ashiftrt_r4_11)) 205 ENDFUNC(GLOBAL(ashiftrt_r4_12)) 206 ENDFUNC(GLOBAL(ashiftrt_r4_13)) 207 ENDFUNC(GLOBAL(ashiftrt_r4_14)) 208 ENDFUNC(GLOBAL(ashiftrt_r4_15)) 209 ENDFUNC(GLOBAL(ashiftrt_r4_16)) 210 ENDFUNC(GLOBAL(ashiftrt_r4_17)) 211 ENDFUNC(GLOBAL(ashiftrt_r4_18)) 212 ENDFUNC(GLOBAL(ashiftrt_r4_19)) 213 ENDFUNC(GLOBAL(ashiftrt_r4_20)) 214 ENDFUNC(GLOBAL(ashiftrt_r4_21)) 215 ENDFUNC(GLOBAL(ashiftrt_r4_22)) 216 ENDFUNC(GLOBAL(ashiftrt_r4_23)) 217 ENDFUNC(GLOBAL(ashiftrt_r4_24)) 218 ENDFUNC(GLOBAL(ashiftrt_r4_25)) 219 ENDFUNC(GLOBAL(ashiftrt_r4_26)) 220 ENDFUNC(GLOBAL(ashiftrt_r4_27)) 221 ENDFUNC(GLOBAL(ashiftrt_r4_28)) 222 ENDFUNC(GLOBAL(ashiftrt_r4_29)) 223 ENDFUNC(GLOBAL(ashiftrt_r4_30)) 224 ENDFUNC(GLOBAL(ashiftrt_r4_31)) 225 ENDFUNC(GLOBAL(ashiftrt_r4_32)) 226#endif 227 228#ifdef L_ashiftrt_n 229 230! 231! GLOBAL(ashrsi3) 232! 233! Entry: 234! 235! r4: Value to shift 236! r5: Shifts 237! 238! Exit: 239! 240! r0: Result 241! 242! Destroys: 243! 244! (none) 245! 246 247 .global GLOBAL(ashrsi3) 248 HIDDEN_FUNC(GLOBAL(ashrsi3)) 249 .align 2 250GLOBAL(ashrsi3): 251 mov #31,r0 252 and r0,r5 253 mova LOCAL(ashrsi3_table),r0 254 mov.b @(r0,r5),r5 255#ifdef __sh1__ 256 add r5,r0 257 jmp @r0 258#else 259 braf r5 260#endif 261 mov r4,r0 262 263 .align 2 264LOCAL(ashrsi3_table): 265 .byte LOCAL(ashrsi3_0)-LOCAL(ashrsi3_table) 266 .byte LOCAL(ashrsi3_1)-LOCAL(ashrsi3_table) 267 .byte LOCAL(ashrsi3_2)-LOCAL(ashrsi3_table) 268 .byte LOCAL(ashrsi3_3)-LOCAL(ashrsi3_table) 269 .byte LOCAL(ashrsi3_4)-LOCAL(ashrsi3_table) 270 .byte LOCAL(ashrsi3_5)-LOCAL(ashrsi3_table) 271 .byte LOCAL(ashrsi3_6)-LOCAL(ashrsi3_table) 272 .byte LOCAL(ashrsi3_7)-LOCAL(ashrsi3_table) 273 .byte LOCAL(ashrsi3_8)-LOCAL(ashrsi3_table) 274 .byte LOCAL(ashrsi3_9)-LOCAL(ashrsi3_table) 275 .byte LOCAL(ashrsi3_10)-LOCAL(ashrsi3_table) 276 .byte LOCAL(ashrsi3_11)-LOCAL(ashrsi3_table) 277 .byte LOCAL(ashrsi3_12)-LOCAL(ashrsi3_table) 278 .byte LOCAL(ashrsi3_13)-LOCAL(ashrsi3_table) 279 .byte LOCAL(ashrsi3_14)-LOCAL(ashrsi3_table) 280 .byte LOCAL(ashrsi3_15)-LOCAL(ashrsi3_table) 281 .byte LOCAL(ashrsi3_16)-LOCAL(ashrsi3_table) 282 .byte LOCAL(ashrsi3_17)-LOCAL(ashrsi3_table) 283 .byte LOCAL(ashrsi3_18)-LOCAL(ashrsi3_table) 284 .byte LOCAL(ashrsi3_19)-LOCAL(ashrsi3_table) 285 .byte LOCAL(ashrsi3_20)-LOCAL(ashrsi3_table) 286 .byte LOCAL(ashrsi3_21)-LOCAL(ashrsi3_table) 287 .byte LOCAL(ashrsi3_22)-LOCAL(ashrsi3_table) 288 .byte LOCAL(ashrsi3_23)-LOCAL(ashrsi3_table) 289 .byte LOCAL(ashrsi3_24)-LOCAL(ashrsi3_table) 290 .byte LOCAL(ashrsi3_25)-LOCAL(ashrsi3_table) 291 .byte LOCAL(ashrsi3_26)-LOCAL(ashrsi3_table) 292 .byte LOCAL(ashrsi3_27)-LOCAL(ashrsi3_table) 293 .byte LOCAL(ashrsi3_28)-LOCAL(ashrsi3_table) 294 .byte LOCAL(ashrsi3_29)-LOCAL(ashrsi3_table) 295 .byte LOCAL(ashrsi3_30)-LOCAL(ashrsi3_table) 296 .byte LOCAL(ashrsi3_31)-LOCAL(ashrsi3_table) 297 298LOCAL(ashrsi3_31): 299 rotcl r0 300 rts 301 subc r0,r0 302 303LOCAL(ashrsi3_30): 304 shar r0 305LOCAL(ashrsi3_29): 306 shar r0 307LOCAL(ashrsi3_28): 308 shar r0 309LOCAL(ashrsi3_27): 310 shar r0 311LOCAL(ashrsi3_26): 312 shar r0 313LOCAL(ashrsi3_25): 314 shar r0 315LOCAL(ashrsi3_24): 316 shlr16 r0 317 shlr8 r0 318 rts 319 exts.b r0,r0 320 321LOCAL(ashrsi3_23): 322 shar r0 323LOCAL(ashrsi3_22): 324 shar r0 325LOCAL(ashrsi3_21): 326 shar r0 327LOCAL(ashrsi3_20): 328 shar r0 329LOCAL(ashrsi3_19): 330 shar r0 331LOCAL(ashrsi3_18): 332 shar r0 333LOCAL(ashrsi3_17): 334 shar r0 335LOCAL(ashrsi3_16): 336 shlr16 r0 337 rts 338 exts.w r0,r0 339 340LOCAL(ashrsi3_15): 341 shar r0 342LOCAL(ashrsi3_14): 343 shar r0 344LOCAL(ashrsi3_13): 345 shar r0 346LOCAL(ashrsi3_12): 347 shar r0 348LOCAL(ashrsi3_11): 349 shar r0 350LOCAL(ashrsi3_10): 351 shar r0 352LOCAL(ashrsi3_9): 353 shar r0 354LOCAL(ashrsi3_8): 355 shar r0 356LOCAL(ashrsi3_7): 357 shar r0 358LOCAL(ashrsi3_6): 359 shar r0 360LOCAL(ashrsi3_5): 361 shar r0 362LOCAL(ashrsi3_4): 363 shar r0 364LOCAL(ashrsi3_3): 365 shar r0 366LOCAL(ashrsi3_2): 367 shar r0 368LOCAL(ashrsi3_1): 369 rts 370 shar r0 371 372LOCAL(ashrsi3_0): 373 rts 374 nop 375 376 ENDFUNC(GLOBAL(ashrsi3)) 377#endif 378 379#ifdef L_ashiftlt 380 381! 382! GLOBAL(ashlsi3) 383! 384! Entry: 385! 386! r4: Value to shift 387! r5: Shifts 388! 389! Exit: 390! 391! r0: Result 392! 393! Destroys: 394! 395! (none) 396! 397 .global GLOBAL(ashlsi3) 398 HIDDEN_FUNC(GLOBAL(ashlsi3)) 399 .align 2 400GLOBAL(ashlsi3): 401 mov #31,r0 402 and r0,r5 403 mova LOCAL(ashlsi3_table),r0 404 mov.b @(r0,r5),r5 405#ifdef __sh1__ 406 add r5,r0 407 jmp @r0 408#else 409 braf r5 410#endif 411 mov r4,r0 412 413 .align 2 414LOCAL(ashlsi3_table): 415 .byte LOCAL(ashlsi3_0)-LOCAL(ashlsi3_table) 416 .byte LOCAL(ashlsi3_1)-LOCAL(ashlsi3_table) 417 .byte LOCAL(ashlsi3_2)-LOCAL(ashlsi3_table) 418 .byte LOCAL(ashlsi3_3)-LOCAL(ashlsi3_table) 419 .byte LOCAL(ashlsi3_4)-LOCAL(ashlsi3_table) 420 .byte LOCAL(ashlsi3_5)-LOCAL(ashlsi3_table) 421 .byte LOCAL(ashlsi3_6)-LOCAL(ashlsi3_table) 422 .byte LOCAL(ashlsi3_7)-LOCAL(ashlsi3_table) 423 .byte LOCAL(ashlsi3_8)-LOCAL(ashlsi3_table) 424 .byte LOCAL(ashlsi3_9)-LOCAL(ashlsi3_table) 425 .byte LOCAL(ashlsi3_10)-LOCAL(ashlsi3_table) 426 .byte LOCAL(ashlsi3_11)-LOCAL(ashlsi3_table) 427 .byte LOCAL(ashlsi3_12)-LOCAL(ashlsi3_table) 428 .byte LOCAL(ashlsi3_13)-LOCAL(ashlsi3_table) 429 .byte LOCAL(ashlsi3_14)-LOCAL(ashlsi3_table) 430 .byte LOCAL(ashlsi3_15)-LOCAL(ashlsi3_table) 431 .byte LOCAL(ashlsi3_16)-LOCAL(ashlsi3_table) 432 .byte LOCAL(ashlsi3_17)-LOCAL(ashlsi3_table) 433 .byte LOCAL(ashlsi3_18)-LOCAL(ashlsi3_table) 434 .byte LOCAL(ashlsi3_19)-LOCAL(ashlsi3_table) 435 .byte LOCAL(ashlsi3_20)-LOCAL(ashlsi3_table) 436 .byte LOCAL(ashlsi3_21)-LOCAL(ashlsi3_table) 437 .byte LOCAL(ashlsi3_22)-LOCAL(ashlsi3_table) 438 .byte LOCAL(ashlsi3_23)-LOCAL(ashlsi3_table) 439 .byte LOCAL(ashlsi3_24)-LOCAL(ashlsi3_table) 440 .byte LOCAL(ashlsi3_25)-LOCAL(ashlsi3_table) 441 .byte LOCAL(ashlsi3_26)-LOCAL(ashlsi3_table) 442 .byte LOCAL(ashlsi3_27)-LOCAL(ashlsi3_table) 443 .byte LOCAL(ashlsi3_28)-LOCAL(ashlsi3_table) 444 .byte LOCAL(ashlsi3_29)-LOCAL(ashlsi3_table) 445 .byte LOCAL(ashlsi3_30)-LOCAL(ashlsi3_table) 446 .byte LOCAL(ashlsi3_31)-LOCAL(ashlsi3_table) 447 448LOCAL(ashlsi3_6): 449 shll2 r0 450LOCAL(ashlsi3_4): 451 shll2 r0 452LOCAL(ashlsi3_2): 453 rts 454 shll2 r0 455 456LOCAL(ashlsi3_7): 457 shll2 r0 458LOCAL(ashlsi3_5): 459 shll2 r0 460LOCAL(ashlsi3_3): 461 shll2 r0 462LOCAL(ashlsi3_1): 463 rts 464 shll r0 465 466LOCAL(ashlsi3_14): 467 shll2 r0 468LOCAL(ashlsi3_12): 469 shll2 r0 470LOCAL(ashlsi3_10): 471 shll2 r0 472LOCAL(ashlsi3_8): 473 rts 474 shll8 r0 475 476LOCAL(ashlsi3_15): 477 shll2 r0 478LOCAL(ashlsi3_13): 479 shll2 r0 480LOCAL(ashlsi3_11): 481 shll2 r0 482LOCAL(ashlsi3_9): 483 shll8 r0 484 rts 485 shll r0 486 487LOCAL(ashlsi3_22): 488 shll2 r0 489LOCAL(ashlsi3_20): 490 shll2 r0 491LOCAL(ashlsi3_18): 492 shll2 r0 493LOCAL(ashlsi3_16): 494 rts 495 shll16 r0 496 497LOCAL(ashlsi3_23): 498 shll2 r0 499LOCAL(ashlsi3_21): 500 shll2 r0 501LOCAL(ashlsi3_19): 502 shll2 r0 503LOCAL(ashlsi3_17): 504 shll16 r0 505 rts 506 shll r0 507 508LOCAL(ashlsi3_30): 509 shll2 r0 510LOCAL(ashlsi3_28): 511 shll2 r0 512LOCAL(ashlsi3_26): 513 shll2 r0 514LOCAL(ashlsi3_24): 515 shll16 r0 516 rts 517 shll8 r0 518 519LOCAL(ashlsi3_31): 520 shll2 r0 521LOCAL(ashlsi3_29): 522 shll2 r0 523LOCAL(ashlsi3_27): 524 shll2 r0 525LOCAL(ashlsi3_25): 526 shll16 r0 527 shll8 r0 528 rts 529 shll r0 530 531LOCAL(ashlsi3_0): 532 rts 533 nop 534 535 ENDFUNC(GLOBAL(ashlsi3)) 536#endif 537 538#ifdef L_lshiftrt 539 540! 541! GLOBAL(lshrsi3) 542! 543! Entry: 544! 545! r4: Value to shift 546! r5: Shifts 547! 548! Exit: 549! 550! r0: Result 551! 552! Destroys: 553! 554! (none) 555! 556 .global GLOBAL(lshrsi3) 557 HIDDEN_FUNC(GLOBAL(lshrsi3)) 558 .align 2 559GLOBAL(lshrsi3): 560 mov #31,r0 561 and r0,r5 562 mova LOCAL(lshrsi3_table),r0 563 mov.b @(r0,r5),r5 564#ifdef __sh1__ 565 add r5,r0 566 jmp @r0 567#else 568 braf r5 569#endif 570 mov r4,r0 571 572 .align 2 573LOCAL(lshrsi3_table): 574 .byte LOCAL(lshrsi3_0)-LOCAL(lshrsi3_table) 575 .byte LOCAL(lshrsi3_1)-LOCAL(lshrsi3_table) 576 .byte LOCAL(lshrsi3_2)-LOCAL(lshrsi3_table) 577 .byte LOCAL(lshrsi3_3)-LOCAL(lshrsi3_table) 578 .byte LOCAL(lshrsi3_4)-LOCAL(lshrsi3_table) 579 .byte LOCAL(lshrsi3_5)-LOCAL(lshrsi3_table) 580 .byte LOCAL(lshrsi3_6)-LOCAL(lshrsi3_table) 581 .byte LOCAL(lshrsi3_7)-LOCAL(lshrsi3_table) 582 .byte LOCAL(lshrsi3_8)-LOCAL(lshrsi3_table) 583 .byte LOCAL(lshrsi3_9)-LOCAL(lshrsi3_table) 584 .byte LOCAL(lshrsi3_10)-LOCAL(lshrsi3_table) 585 .byte LOCAL(lshrsi3_11)-LOCAL(lshrsi3_table) 586 .byte LOCAL(lshrsi3_12)-LOCAL(lshrsi3_table) 587 .byte LOCAL(lshrsi3_13)-LOCAL(lshrsi3_table) 588 .byte LOCAL(lshrsi3_14)-LOCAL(lshrsi3_table) 589 .byte LOCAL(lshrsi3_15)-LOCAL(lshrsi3_table) 590 .byte LOCAL(lshrsi3_16)-LOCAL(lshrsi3_table) 591 .byte LOCAL(lshrsi3_17)-LOCAL(lshrsi3_table) 592 .byte LOCAL(lshrsi3_18)-LOCAL(lshrsi3_table) 593 .byte LOCAL(lshrsi3_19)-LOCAL(lshrsi3_table) 594 .byte LOCAL(lshrsi3_20)-LOCAL(lshrsi3_table) 595 .byte LOCAL(lshrsi3_21)-LOCAL(lshrsi3_table) 596 .byte LOCAL(lshrsi3_22)-LOCAL(lshrsi3_table) 597 .byte LOCAL(lshrsi3_23)-LOCAL(lshrsi3_table) 598 .byte LOCAL(lshrsi3_24)-LOCAL(lshrsi3_table) 599 .byte LOCAL(lshrsi3_25)-LOCAL(lshrsi3_table) 600 .byte LOCAL(lshrsi3_26)-LOCAL(lshrsi3_table) 601 .byte LOCAL(lshrsi3_27)-LOCAL(lshrsi3_table) 602 .byte LOCAL(lshrsi3_28)-LOCAL(lshrsi3_table) 603 .byte LOCAL(lshrsi3_29)-LOCAL(lshrsi3_table) 604 .byte LOCAL(lshrsi3_30)-LOCAL(lshrsi3_table) 605 .byte LOCAL(lshrsi3_31)-LOCAL(lshrsi3_table) 606 607LOCAL(lshrsi3_6): 608 shlr2 r0 609LOCAL(lshrsi3_4): 610 shlr2 r0 611LOCAL(lshrsi3_2): 612 rts 613 shlr2 r0 614 615LOCAL(lshrsi3_7): 616 shlr2 r0 617LOCAL(lshrsi3_5): 618 shlr2 r0 619LOCAL(lshrsi3_3): 620 shlr2 r0 621LOCAL(lshrsi3_1): 622 rts 623 shlr r0 624 625LOCAL(lshrsi3_14): 626 shlr2 r0 627LOCAL(lshrsi3_12): 628 shlr2 r0 629LOCAL(lshrsi3_10): 630 shlr2 r0 631LOCAL(lshrsi3_8): 632 rts 633 shlr8 r0 634 635LOCAL(lshrsi3_15): 636 shlr2 r0 637LOCAL(lshrsi3_13): 638 shlr2 r0 639LOCAL(lshrsi3_11): 640 shlr2 r0 641LOCAL(lshrsi3_9): 642 shlr8 r0 643 rts 644 shlr r0 645 646LOCAL(lshrsi3_22): 647 shlr2 r0 648LOCAL(lshrsi3_20): 649 shlr2 r0 650LOCAL(lshrsi3_18): 651 shlr2 r0 652LOCAL(lshrsi3_16): 653 rts 654 shlr16 r0 655 656LOCAL(lshrsi3_23): 657 shlr2 r0 658LOCAL(lshrsi3_21): 659 shlr2 r0 660LOCAL(lshrsi3_19): 661 shlr2 r0 662LOCAL(lshrsi3_17): 663 shlr16 r0 664 rts 665 shlr r0 666 667LOCAL(lshrsi3_30): 668 shlr2 r0 669LOCAL(lshrsi3_28): 670 shlr2 r0 671LOCAL(lshrsi3_26): 672 shlr2 r0 673LOCAL(lshrsi3_24): 674 shlr16 r0 675 rts 676 shlr8 r0 677 678LOCAL(lshrsi3_31): 679 shlr2 r0 680LOCAL(lshrsi3_29): 681 shlr2 r0 682LOCAL(lshrsi3_27): 683 shlr2 r0 684LOCAL(lshrsi3_25): 685 shlr16 r0 686 shlr8 r0 687 rts 688 shlr r0 689 690LOCAL(lshrsi3_0): 691 rts 692 nop 693 694 ENDFUNC(GLOBAL(lshrsi3)) 695#endif 696 697#ifdef L_movmem 698 .text 699 .balign 4 700 .global GLOBAL(movmem) 701 HIDDEN_FUNC(GLOBAL(movmem)) 702 HIDDEN_ALIAS(movstr,movmem) 703 /* This would be a lot simpler if r6 contained the byte count 704 minus 64, and we wouldn't be called here for a byte count of 64. */ 705GLOBAL(movmem): 706 sts.l pr,@-r15 707 shll2 r6 708 bsr GLOBAL(movmemSI52+2) 709 mov.l @(48,r5),r0 710 .balign 4 711LOCAL(movmem_loop): /* Reached with rts */ 712 mov.l @(60,r5),r0 713 add #-64,r6 714 mov.l r0,@(60,r4) 715 tst r6,r6 716 mov.l @(56,r5),r0 717 bt LOCAL(movmem_done) 718 mov.l r0,@(56,r4) 719 cmp/pl r6 720 mov.l @(52,r5),r0 721 add #64,r5 722 mov.l r0,@(52,r4) 723 add #64,r4 724 bt GLOBAL(movmemSI52) 725! done all the large groups, do the remainder 726! jump to movmem+ 727 mova GLOBAL(movmemSI4)+4,r0 728 add r6,r0 729 jmp @r0 730LOCAL(movmem_done): ! share slot insn, works out aligned. 731 lds.l @r15+,pr 732 mov.l r0,@(56,r4) 733 mov.l @(52,r5),r0 734 rts 735 mov.l r0,@(52,r4) 736 .balign 4 737! ??? We need aliases movstr* for movmem* for the older libraries. These 738! aliases will be removed at the some point in the future. 739 .global GLOBAL(movmemSI64) 740 HIDDEN_FUNC(GLOBAL(movmemSI64)) 741 HIDDEN_ALIAS(movstrSI64,movmemSI64) 742GLOBAL(movmemSI64): 743 mov.l @(60,r5),r0 744 mov.l r0,@(60,r4) 745 .global GLOBAL(movmemSI60) 746 HIDDEN_FUNC(GLOBAL(movmemSI60)) 747 HIDDEN_ALIAS(movstrSI60,movmemSI60) 748GLOBAL(movmemSI60): 749 mov.l @(56,r5),r0 750 mov.l r0,@(56,r4) 751 .global GLOBAL(movmemSI56) 752 HIDDEN_FUNC(GLOBAL(movmemSI56)) 753 HIDDEN_ALIAS(movstrSI56,movmemSI56) 754GLOBAL(movmemSI56): 755 mov.l @(52,r5),r0 756 mov.l r0,@(52,r4) 757 .global GLOBAL(movmemSI52) 758 HIDDEN_FUNC(GLOBAL(movmemSI52)) 759 HIDDEN_ALIAS(movstrSI52,movmemSI52) 760GLOBAL(movmemSI52): 761 mov.l @(48,r5),r0 762 mov.l r0,@(48,r4) 763 .global GLOBAL(movmemSI48) 764 HIDDEN_FUNC(GLOBAL(movmemSI48)) 765 HIDDEN_ALIAS(movstrSI48,movmemSI48) 766GLOBAL(movmemSI48): 767 mov.l @(44,r5),r0 768 mov.l r0,@(44,r4) 769 .global GLOBAL(movmemSI44) 770 HIDDEN_FUNC(GLOBAL(movmemSI44)) 771 HIDDEN_ALIAS(movstrSI44,movmemSI44) 772GLOBAL(movmemSI44): 773 mov.l @(40,r5),r0 774 mov.l r0,@(40,r4) 775 .global GLOBAL(movmemSI40) 776 HIDDEN_FUNC(GLOBAL(movmemSI40)) 777 HIDDEN_ALIAS(movstrSI40,movmemSI40) 778GLOBAL(movmemSI40): 779 mov.l @(36,r5),r0 780 mov.l r0,@(36,r4) 781 .global GLOBAL(movmemSI36) 782 HIDDEN_FUNC(GLOBAL(movmemSI36)) 783 HIDDEN_ALIAS(movstrSI36,movmemSI36) 784GLOBAL(movmemSI36): 785 mov.l @(32,r5),r0 786 mov.l r0,@(32,r4) 787 .global GLOBAL(movmemSI32) 788 HIDDEN_FUNC(GLOBAL(movmemSI32)) 789 HIDDEN_ALIAS(movstrSI32,movmemSI32) 790GLOBAL(movmemSI32): 791 mov.l @(28,r5),r0 792 mov.l r0,@(28,r4) 793 .global GLOBAL(movmemSI28) 794 HIDDEN_FUNC(GLOBAL(movmemSI28)) 795 HIDDEN_ALIAS(movstrSI28,movmemSI28) 796GLOBAL(movmemSI28): 797 mov.l @(24,r5),r0 798 mov.l r0,@(24,r4) 799 .global GLOBAL(movmemSI24) 800 HIDDEN_FUNC(GLOBAL(movmemSI24)) 801 HIDDEN_ALIAS(movstrSI24,movmemSI24) 802GLOBAL(movmemSI24): 803 mov.l @(20,r5),r0 804 mov.l r0,@(20,r4) 805 .global GLOBAL(movmemSI20) 806 HIDDEN_FUNC(GLOBAL(movmemSI20)) 807 HIDDEN_ALIAS(movstrSI20,movmemSI20) 808GLOBAL(movmemSI20): 809 mov.l @(16,r5),r0 810 mov.l r0,@(16,r4) 811 .global GLOBAL(movmemSI16) 812 HIDDEN_FUNC(GLOBAL(movmemSI16)) 813 HIDDEN_ALIAS(movstrSI16,movmemSI16) 814GLOBAL(movmemSI16): 815 mov.l @(12,r5),r0 816 mov.l r0,@(12,r4) 817 .global GLOBAL(movmemSI12) 818 HIDDEN_FUNC(GLOBAL(movmemSI12)) 819 HIDDEN_ALIAS(movstrSI12,movmemSI12) 820GLOBAL(movmemSI12): 821 mov.l @(8,r5),r0 822 mov.l r0,@(8,r4) 823 .global GLOBAL(movmemSI8) 824 HIDDEN_FUNC(GLOBAL(movmemSI8)) 825 HIDDEN_ALIAS(movstrSI8,movmemSI8) 826GLOBAL(movmemSI8): 827 mov.l @(4,r5),r0 828 mov.l r0,@(4,r4) 829 .global GLOBAL(movmemSI4) 830 HIDDEN_FUNC(GLOBAL(movmemSI4)) 831 HIDDEN_ALIAS(movstrSI4,movmemSI4) 832GLOBAL(movmemSI4): 833 mov.l @(0,r5),r0 834 rts 835 mov.l r0,@(0,r4) 836 837 ENDFUNC(GLOBAL(movmemSI64)) 838 ENDFUNC(GLOBAL(movmemSI60)) 839 ENDFUNC(GLOBAL(movmemSI56)) 840 ENDFUNC(GLOBAL(movmemSI52)) 841 ENDFUNC(GLOBAL(movmemSI48)) 842 ENDFUNC(GLOBAL(movmemSI44)) 843 ENDFUNC(GLOBAL(movmemSI40)) 844 ENDFUNC(GLOBAL(movmemSI36)) 845 ENDFUNC(GLOBAL(movmemSI32)) 846 ENDFUNC(GLOBAL(movmemSI28)) 847 ENDFUNC(GLOBAL(movmemSI24)) 848 ENDFUNC(GLOBAL(movmemSI20)) 849 ENDFUNC(GLOBAL(movmemSI16)) 850 ENDFUNC(GLOBAL(movmemSI12)) 851 ENDFUNC(GLOBAL(movmemSI8)) 852 ENDFUNC(GLOBAL(movmemSI4)) 853 ENDFUNC(GLOBAL(movmem)) 854#endif 855 856#ifdef L_movmem_i4 857 .text 858 .global GLOBAL(movmem_i4_even) 859 .global GLOBAL(movmem_i4_odd) 860 .global GLOBAL(movmemSI12_i4) 861 862 HIDDEN_FUNC(GLOBAL(movmem_i4_even)) 863 HIDDEN_FUNC(GLOBAL(movmem_i4_odd)) 864 HIDDEN_FUNC(GLOBAL(movmemSI12_i4)) 865 866 HIDDEN_ALIAS(movstr_i4_even,movmem_i4_even) 867 HIDDEN_ALIAS(movstr_i4_odd,movmem_i4_odd) 868 HIDDEN_ALIAS(movstrSI12_i4,movmemSI12_i4) 869 870 .p2align 5 871L_movmem_2mod4_end: 872 mov.l r0,@(16,r4) 873 rts 874 mov.l r1,@(20,r4) 875 876 .p2align 2 877 878GLOBAL(movmem_i4_even): 879 mov.l @r5+,r0 880 bra L_movmem_start_even 881 mov.l @r5+,r1 882 883GLOBAL(movmem_i4_odd): 884 mov.l @r5+,r1 885 add #-4,r4 886 mov.l @r5+,r2 887 mov.l @r5+,r3 888 mov.l r1,@(4,r4) 889 mov.l r2,@(8,r4) 890 891L_movmem_loop: 892 mov.l r3,@(12,r4) 893 dt r6 894 mov.l @r5+,r0 895 bt/s L_movmem_2mod4_end 896 mov.l @r5+,r1 897 add #16,r4 898L_movmem_start_even: 899 mov.l @r5+,r2 900 mov.l @r5+,r3 901 mov.l r0,@r4 902 dt r6 903 mov.l r1,@(4,r4) 904 bf/s L_movmem_loop 905 mov.l r2,@(8,r4) 906 rts 907 mov.l r3,@(12,r4) 908 909 ENDFUNC(GLOBAL(movmem_i4_even)) 910 ENDFUNC(GLOBAL(movmem_i4_odd)) 911 912 .p2align 4 913GLOBAL(movmemSI12_i4): 914 mov.l @r5,r0 915 mov.l @(4,r5),r1 916 mov.l @(8,r5),r2 917 mov.l r0,@r4 918 mov.l r1,@(4,r4) 919 rts 920 mov.l r2,@(8,r4) 921 922 ENDFUNC(GLOBAL(movmemSI12_i4)) 923#endif 924 925#ifdef L_mulsi3 926 927 928 .global GLOBAL(mulsi3) 929 HIDDEN_FUNC(GLOBAL(mulsi3)) 930 931! r4 = aabb 932! r5 = ccdd 933! r0 = aabb*ccdd via partial products 934! 935! if aa == 0 and cc = 0 936! r0 = bb*dd 937! 938! else 939! aa = bb*dd + (aa*dd*65536) + (cc*bb*65536) 940! 941 942GLOBAL(mulsi3): 943 mulu.w r4,r5 ! multiply the lsws macl=bb*dd 944 mov r5,r3 ! r3 = ccdd 945 swap.w r4,r2 ! r2 = bbaa 946 xtrct r2,r3 ! r3 = aacc 947 tst r3,r3 ! msws zero ? 948 bf hiset 949 rts ! yes - then we have the answer 950 sts macl,r0 951 952hiset: sts macl,r0 ! r0 = bb*dd 953 mulu.w r2,r5 ! brewing macl = aa*dd 954 sts macl,r1 955 mulu.w r3,r4 ! brewing macl = cc*bb 956 sts macl,r2 957 add r1,r2 958 shll16 r2 959 rts 960 add r2,r0 961 962 ENDFUNC(GLOBAL(mulsi3)) 963#endif 964#endif /* ! __SH5__ */ 965#ifdef L_sdivsi3_i4 966 .title "SH DIVIDE" 967!! 4 byte integer Divide code for the Renesas SH 968#ifdef __SH4__ 969!! args in r4 and r5, result in fpul, clobber dr0, dr2 970 971 .global GLOBAL(sdivsi3_i4) 972 HIDDEN_FUNC(GLOBAL(sdivsi3_i4)) 973GLOBAL(sdivsi3_i4): 974 lds r4,fpul 975 float fpul,dr0 976 lds r5,fpul 977 float fpul,dr2 978 fdiv dr2,dr0 979 rts 980 ftrc dr0,fpul 981 982 ENDFUNC(GLOBAL(sdivsi3_i4)) 983#elif defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__) || (defined (__SH5__) && ! defined __SH4_NOFPU__) 984!! args in r4 and r5, result in fpul, clobber r2, dr0, dr2 985 986#if ! __SH5__ || __SH5__ == 32 987#if __SH5__ 988 .mode SHcompact 989#endif 990 .global GLOBAL(sdivsi3_i4) 991 HIDDEN_FUNC(GLOBAL(sdivsi3_i4)) 992GLOBAL(sdivsi3_i4): 993 sts.l fpscr,@-r15 994 mov #8,r2 995 swap.w r2,r2 996 lds r2,fpscr 997 lds r4,fpul 998 float fpul,dr0 999 lds r5,fpul 1000 float fpul,dr2 1001 fdiv dr2,dr0 1002 ftrc dr0,fpul 1003 rts 1004 lds.l @r15+,fpscr 1005 1006 ENDFUNC(GLOBAL(sdivsi3_i4)) 1007#endif /* ! __SH5__ || __SH5__ == 32 */ 1008#endif /* ! __SH4__ */ 1009#endif 1010 1011#ifdef L_sdivsi3 1012/* __SH4_SINGLE_ONLY__ keeps this part for link compatibility with 1013 sh2e/sh3e code. */ 1014#if (! defined(__SH4__) && ! defined (__SH4_SINGLE__)) || defined (__linux__) || defined (__OpenBSD__) 1015!! 1016!! Steve Chamberlain 1017!! sac@cygnus.com 1018!! 1019!! 1020 1021!! args in r4 and r5, result in r0 clobber r1, r2, r3, and t bit 1022 1023 .global GLOBAL(sdivsi3) 1024#if __SHMEDIA__ 1025#if __SH5__ == 32 1026 .section .text..SHmedia32,"ax" 1027#else 1028 .text 1029#endif 1030 .align 2 1031#if 0 1032/* The assembly code that follows is a hand-optimized version of the C 1033 code that follows. Note that the registers that are modified are 1034 exactly those listed as clobbered in the patterns divsi3_i1 and 1035 divsi3_i1_media. 1036 1037int __sdivsi3 (i, j) 1038 int i, j; 1039{ 1040 register unsigned long long r18 asm ("r18"); 1041 register unsigned long long r19 asm ("r19"); 1042 register unsigned long long r0 asm ("r0") = 0; 1043 register unsigned long long r1 asm ("r1") = 1; 1044 register int r2 asm ("r2") = i >> 31; 1045 register int r3 asm ("r3") = j >> 31; 1046 1047 r2 = r2 ? r2 : r1; 1048 r3 = r3 ? r3 : r1; 1049 r18 = i * r2; 1050 r19 = j * r3; 1051 r2 *= r3; 1052 1053 r19 <<= 31; 1054 r1 <<= 31; 1055 do 1056 if (r18 >= r19) 1057 r0 |= r1, r18 -= r19; 1058 while (r19 >>= 1, r1 >>= 1); 1059 1060 return r2 * (int)r0; 1061} 1062*/ 1063GLOBAL(sdivsi3): 1064 pt/l LOCAL(sdivsi3_dontadd), tr2 1065 pt/l LOCAL(sdivsi3_loop), tr1 1066 ptabs/l r18, tr0 1067 movi 0, r0 1068 movi 1, r1 1069 shari.l r4, 31, r2 1070 shari.l r5, 31, r3 1071 cmveq r2, r1, r2 1072 cmveq r3, r1, r3 1073 muls.l r4, r2, r18 1074 muls.l r5, r3, r19 1075 muls.l r2, r3, r2 1076 shlli r19, 31, r19 1077 shlli r1, 31, r1 1078LOCAL(sdivsi3_loop): 1079 bgtu r19, r18, tr2 1080 or r0, r1, r0 1081 sub r18, r19, r18 1082LOCAL(sdivsi3_dontadd): 1083 shlri r1, 1, r1 1084 shlri r19, 1, r19 1085 bnei r1, 0, tr1 1086 muls.l r0, r2, r0 1087 add.l r0, r63, r0 1088 blink tr0, r63 1089#elif 0 /* ! 0 */ 1090 // inputs: r4,r5 1091 // clobbered: r1,r2,r3,r18,r19,r20,r21,r25,tr0 1092 // result in r0 1093GLOBAL(sdivsi3): 1094 // can create absolute value without extra latency, 1095 // but dependent on proper sign extension of inputs: 1096 // shari.l r5,31,r2 1097 // xor r5,r2,r20 1098 // sub r20,r2,r20 // r20 is now absolute value of r5, zero-extended. 1099 shari.l r5,31,r2 1100 ori r2,1,r2 1101 muls.l r5,r2,r20 // r20 is now absolute value of r5, zero-extended. 1102 movi 0xffffffffffffbb0c,r19 // shift count eqiv 76 1103 shari.l r4,31,r3 1104 nsb r20,r0 1105 shlld r20,r0,r25 1106 shlri r25,48,r25 1107 sub r19,r25,r1 1108 mmulfx.w r1,r1,r2 1109 mshflo.w r1,r63,r1 1110 // If r4 was to be used in-place instead of r21, could use this sequence 1111 // to compute absolute: 1112 // sub r63,r4,r19 // compute absolute value of r4 1113 // shlri r4,32,r3 // into lower 32 bit of r4, keeping 1114 // mcmv r19,r3,r4 // the sign in the upper 32 bits intact. 1115 ori r3,1,r3 1116 mmulfx.w r25,r2,r2 1117 sub r19,r0,r0 1118 muls.l r4,r3,r21 1119 msub.w r1,r2,r2 1120 addi r2,-2,r1 1121 mulu.l r21,r1,r19 1122 mmulfx.w r2,r2,r2 1123 shlli r1,15,r1 1124 shlrd r19,r0,r19 1125 mulu.l r19,r20,r3 1126 mmacnfx.wl r25,r2,r1 1127 ptabs r18,tr0 1128 sub r21,r3,r25 1129 1130 mulu.l r25,r1,r2 1131 addi r0,14,r0 1132 xor r4,r5,r18 1133 shlrd r2,r0,r2 1134 mulu.l r2,r20,r3 1135 add r19,r2,r19 1136 shari.l r18,31,r18 1137 sub r25,r3,r25 1138 1139 mulu.l r25,r1,r2 1140 sub r25,r20,r25 1141 add r19,r18,r19 1142 shlrd r2,r0,r2 1143 mulu.l r2,r20,r3 1144 addi r25,1,r25 1145 add r19,r2,r19 1146 1147 cmpgt r25,r3,r25 1148 add.l r19,r25,r0 1149 xor r0,r18,r0 1150 blink tr0,r63 1151#else /* ! 0 && ! 0 */ 1152 1153 // inputs: r4,r5 1154 // clobbered: r1,r18,r19,r20,r21,r25,tr0 1155 // result in r0 1156 HIDDEN_FUNC(GLOBAL(sdivsi3_2)) 1157#ifndef __pic__ 1158 FUNC(GLOBAL(sdivsi3)) 1159GLOBAL(sdivsi3): /* this is the shcompact entry point */ 1160 // The special SHmedia entry point sdivsi3_1 prevents accidental linking 1161 // with the SHcompact implementation, which clobbers tr1 / tr2. 1162 .global GLOBAL(sdivsi3_1) 1163GLOBAL(sdivsi3_1): 1164 .global GLOBAL(div_table_internal) 1165 movi (GLOBAL(div_table_internal) >> 16) & 65535, r20 1166 shori GLOBAL(div_table_internal) & 65535, r20 1167#endif 1168 .global GLOBAL(sdivsi3_2) 1169 // div_table in r20 1170 // clobbered: r1,r18,r19,r21,r25,tr0 1171GLOBAL(sdivsi3_2): 1172 nsb r5, r1 1173 shlld r5, r1, r25 // normalize; [-2 ..1, 1..2) in s2.62 1174 shari r25, 58, r21 // extract 5(6) bit index (s2.4 with hole -1..1) 1175 ldx.ub r20, r21, r19 // u0.8 1176 shari r25, 32, r25 // normalize to s2.30 1177 shlli r21, 1, r21 1178 muls.l r25, r19, r19 // s2.38 1179 ldx.w r20, r21, r21 // s2.14 1180 ptabs r18, tr0 1181 shari r19, 24, r19 // truncate to s2.14 1182 sub r21, r19, r19 // some 11 bit inverse in s1.14 1183 muls.l r19, r19, r21 // u0.28 1184 sub r63, r1, r1 1185 addi r1, 92, r1 1186 muls.l r25, r21, r18 // s2.58 1187 shlli r19, 45, r19 // multiply by two and convert to s2.58 1188 /* bubble */ 1189 sub r19, r18, r18 1190 shari r18, 28, r18 // some 22 bit inverse in s1.30 1191 muls.l r18, r25, r0 // s2.60 1192 muls.l r18, r4, r25 // s32.30 1193 /* bubble */ 1194 shari r0, 16, r19 // s-16.44 1195 muls.l r19, r18, r19 // s-16.74 1196 shari r25, 63, r0 1197 shari r4, 14, r18 // s19.-14 1198 shari r19, 30, r19 // s-16.44 1199 muls.l r19, r18, r19 // s15.30 1200 xor r21, r0, r21 // You could also use the constant 1 << 27. 1201 add r21, r25, r21 1202 sub r21, r19, r21 1203 shard r21, r1, r21 1204 sub r21, r0, r0 1205 blink tr0, r63 1206#ifndef __pic__ 1207 ENDFUNC(GLOBAL(sdivsi3)) 1208#endif 1209 ENDFUNC(GLOBAL(sdivsi3_2)) 1210#endif 1211#elif defined __SHMEDIA__ 1212/* m5compact-nofpu */ 1213 // clobbered: r18,r19,r20,r21,r25,tr0,tr1,tr2 1214 .mode SHmedia 1215 .section .text..SHmedia32,"ax" 1216 .align 2 1217 FUNC(GLOBAL(sdivsi3)) 1218GLOBAL(sdivsi3): 1219 pt/l LOCAL(sdivsi3_dontsub), tr0 1220 pt/l LOCAL(sdivsi3_loop), tr1 1221 ptabs/l r18,tr2 1222 shari.l r4,31,r18 1223 shari.l r5,31,r19 1224 xor r4,r18,r20 1225 xor r5,r19,r21 1226 sub.l r20,r18,r20 1227 sub.l r21,r19,r21 1228 xor r18,r19,r19 1229 shlli r21,32,r25 1230 addi r25,-1,r21 1231 addz.l r20,r63,r20 1232LOCAL(sdivsi3_loop): 1233 shlli r20,1,r20 1234 bgeu/u r21,r20,tr0 1235 sub r20,r21,r20 1236LOCAL(sdivsi3_dontsub): 1237 addi.l r25,-1,r25 1238 bnei r25,-32,tr1 1239 xor r20,r19,r20 1240 sub.l r20,r19,r0 1241 blink tr2,r63 1242 ENDFUNC(GLOBAL(sdivsi3)) 1243#else /* ! __SHMEDIA__ */ 1244 FUNC(GLOBAL(sdivsi3)) 1245GLOBAL(sdivsi3): 1246 mov r4,r1 1247 mov r5,r0 1248 1249 tst r0,r0 1250 bt div0 1251 mov #0,r2 1252 div0s r2,r1 1253 subc r3,r3 1254 subc r2,r1 1255 div0s r0,r3 1256 rotcl r1 1257 div1 r0,r3 1258 rotcl r1 1259 div1 r0,r3 1260 rotcl r1 1261 div1 r0,r3 1262 rotcl r1 1263 div1 r0,r3 1264 rotcl r1 1265 div1 r0,r3 1266 rotcl r1 1267 div1 r0,r3 1268 rotcl r1 1269 div1 r0,r3 1270 rotcl r1 1271 div1 r0,r3 1272 rotcl r1 1273 div1 r0,r3 1274 rotcl r1 1275 div1 r0,r3 1276 rotcl r1 1277 div1 r0,r3 1278 rotcl r1 1279 div1 r0,r3 1280 rotcl r1 1281 div1 r0,r3 1282 rotcl r1 1283 div1 r0,r3 1284 rotcl r1 1285 div1 r0,r3 1286 rotcl r1 1287 div1 r0,r3 1288 rotcl r1 1289 div1 r0,r3 1290 rotcl r1 1291 div1 r0,r3 1292 rotcl r1 1293 div1 r0,r3 1294 rotcl r1 1295 div1 r0,r3 1296 rotcl r1 1297 div1 r0,r3 1298 rotcl r1 1299 div1 r0,r3 1300 rotcl r1 1301 div1 r0,r3 1302 rotcl r1 1303 div1 r0,r3 1304 rotcl r1 1305 div1 r0,r3 1306 rotcl r1 1307 div1 r0,r3 1308 rotcl r1 1309 div1 r0,r3 1310 rotcl r1 1311 div1 r0,r3 1312 rotcl r1 1313 div1 r0,r3 1314 rotcl r1 1315 div1 r0,r3 1316 rotcl r1 1317 div1 r0,r3 1318 rotcl r1 1319 div1 r0,r3 1320 rotcl r1 1321 addc r2,r1 1322 rts 1323 mov r1,r0 1324 1325 1326div0: rts 1327 mov #0,r0 1328 1329 ENDFUNC(GLOBAL(sdivsi3)) 1330#endif /* ! __SHMEDIA__ */ 1331#endif /* ! __SH4__ */ 1332#endif 1333#ifdef L_udivsi3_i4 1334 1335 .title "SH DIVIDE" 1336!! 4 byte integer Divide code for the Renesas SH 1337#ifdef __SH4__ 1338!! args in r4 and r5, result in fpul, clobber r0, r1, r4, r5, dr0, dr2, dr4, 1339!! and t bit 1340 1341 .global GLOBAL(udivsi3_i4) 1342 HIDDEN_FUNC(GLOBAL(udivsi3_i4)) 1343GLOBAL(udivsi3_i4): 1344 mov #1,r1 1345 cmp/hi r1,r5 1346 bf trivial 1347 rotr r1 1348 xor r1,r4 1349 lds r4,fpul 1350 mova L1,r0 1351#ifdef FMOVD_WORKS 1352 fmov.d @r0+,dr4 1353#else 1354 fmov.s @r0+,DR40 1355 fmov.s @r0,DR41 1356#endif 1357 float fpul,dr0 1358 xor r1,r5 1359 lds r5,fpul 1360 float fpul,dr2 1361 fadd dr4,dr0 1362 fadd dr4,dr2 1363 fdiv dr2,dr0 1364 rts 1365 ftrc dr0,fpul 1366 1367trivial: 1368 rts 1369 lds r4,fpul 1370 1371 .align 2 1372#ifdef FMOVD_WORKS 1373 .align 3 ! make double below 8 byte aligned. 1374#endif 1375L1: 1376 .double 2147483648 1377 1378 ENDFUNC(GLOBAL(udivsi3_i4)) 1379#elif defined (__SH5__) && ! defined (__SH4_NOFPU__) 1380#if ! __SH5__ || __SH5__ == 32 1381!! args in r4 and r5, result in fpul, clobber r20, r21, dr0, fr33 1382 .mode SHmedia 1383 .global GLOBAL(udivsi3_i4) 1384 HIDDEN_FUNC(GLOBAL(udivsi3_i4)) 1385GLOBAL(udivsi3_i4): 1386 addz.l r4,r63,r20 1387 addz.l r5,r63,r21 1388 fmov.qd r20,dr0 1389 fmov.qd r21,dr32 1390 ptabs r18,tr0 1391 float.qd dr0,dr0 1392 float.qd dr32,dr32 1393 fdiv.d dr0,dr32,dr0 1394 ftrc.dq dr0,dr32 1395 fmov.s fr33,fr32 1396 blink tr0,r63 1397 1398 ENDFUNC(GLOBAL(udivsi3_i4)) 1399#endif /* ! __SH5__ || __SH5__ == 32 */ 1400#elif defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__) 1401!! args in r4 and r5, result in fpul, clobber r0, r1, r4, r5, dr0, dr2, dr4 1402 1403 .global GLOBAL(udivsi3_i4) 1404 HIDDEN_FUNC(GLOBAL(udivsi3_i4)) 1405GLOBAL(udivsi3_i4): 1406 mov #1,r1 1407 cmp/hi r1,r5 1408 bf trivial 1409 sts.l fpscr,@-r15 1410 mova L1,r0 1411 lds.l @r0+,fpscr 1412 rotr r1 1413 xor r1,r4 1414 lds r4,fpul 1415#ifdef FMOVD_WORKS 1416 fmov.d @r0+,dr4 1417#else 1418 fmov.s @r0+,DR40 1419 fmov.s @r0,DR41 1420#endif 1421 float fpul,dr0 1422 xor r1,r5 1423 lds r5,fpul 1424 float fpul,dr2 1425 fadd dr4,dr0 1426 fadd dr4,dr2 1427 fdiv dr2,dr0 1428 ftrc dr0,fpul 1429 rts 1430 lds.l @r15+,fpscr 1431 1432#ifdef FMOVD_WORKS 1433 .align 3 ! make double below 8 byte aligned. 1434#endif 1435trivial: 1436 rts 1437 lds r4,fpul 1438 1439 .align 2 1440L1: 1441#ifndef FMOVD_WORKS 1442 .long 0x80000 1443#else 1444 .long 0x180000 1445#endif 1446 .double 2147483648 1447 1448 ENDFUNC(GLOBAL(udivsi3_i4)) 1449#endif /* ! __SH4__ */ 1450#endif 1451 1452#ifdef L_udivsi3 1453/* __SH4_SINGLE_ONLY__ keeps this part for link compatibility with 1454 sh2e/sh3e code. */ 1455#if (! defined(__SH4__) && ! defined (__SH4_SINGLE__)) || defined (__linux__) || defined (__OpenBSD__) 1456 1457!! args in r4 and r5, result in r0, clobbers r4, pr, and t bit 1458 .global GLOBAL(udivsi3) 1459 HIDDEN_FUNC(GLOBAL(udivsi3)) 1460 1461#if __SHMEDIA__ 1462#if __SH5__ == 32 1463 .section .text..SHmedia32,"ax" 1464#else 1465 .text 1466#endif 1467 .align 2 1468#if 0 1469/* The assembly code that follows is a hand-optimized version of the C 1470 code that follows. Note that the registers that are modified are 1471 exactly those listed as clobbered in the patterns udivsi3_i1 and 1472 udivsi3_i1_media. 1473 1474unsigned 1475__udivsi3 (i, j) 1476 unsigned i, j; 1477{ 1478 register unsigned long long r0 asm ("r0") = 0; 1479 register unsigned long long r18 asm ("r18") = 1; 1480 register unsigned long long r4 asm ("r4") = i; 1481 register unsigned long long r19 asm ("r19") = j; 1482 1483 r19 <<= 31; 1484 r18 <<= 31; 1485 do 1486 if (r4 >= r19) 1487 r0 |= r18, r4 -= r19; 1488 while (r19 >>= 1, r18 >>= 1); 1489 1490 return r0; 1491} 1492*/ 1493GLOBAL(udivsi3): 1494 pt/l LOCAL(udivsi3_dontadd), tr2 1495 pt/l LOCAL(udivsi3_loop), tr1 1496 ptabs/l r18, tr0 1497 movi 0, r0 1498 movi 1, r18 1499 addz.l r5, r63, r19 1500 addz.l r4, r63, r4 1501 shlli r19, 31, r19 1502 shlli r18, 31, r18 1503LOCAL(udivsi3_loop): 1504 bgtu r19, r4, tr2 1505 or r0, r18, r0 1506 sub r4, r19, r4 1507LOCAL(udivsi3_dontadd): 1508 shlri r18, 1, r18 1509 shlri r19, 1, r19 1510 bnei r18, 0, tr1 1511 blink tr0, r63 1512#else 1513GLOBAL(udivsi3): 1514 // inputs: r4,r5 1515 // clobbered: r18,r19,r20,r21,r22,r25,tr0 1516 // result in r0. 1517 addz.l r5,r63,r22 1518 nsb r22,r0 1519 shlld r22,r0,r25 1520 shlri r25,48,r25 1521 movi 0xffffffffffffbb0c,r20 // shift count eqiv 76 1522 sub r20,r25,r21 1523 mmulfx.w r21,r21,r19 1524 mshflo.w r21,r63,r21 1525 ptabs r18,tr0 1526 mmulfx.w r25,r19,r19 1527 sub r20,r0,r0 1528 /* bubble */ 1529 msub.w r21,r19,r19 1530 addi r19,-2,r21 /* It would be nice for scheduling to do this add to r21 1531 before the msub.w, but we need a different value for 1532 r19 to keep errors under control. */ 1533 mulu.l r4,r21,r18 1534 mmulfx.w r19,r19,r19 1535 shlli r21,15,r21 1536 shlrd r18,r0,r18 1537 mulu.l r18,r22,r20 1538 mmacnfx.wl r25,r19,r21 1539 /* bubble */ 1540 sub r4,r20,r25 1541 1542 mulu.l r25,r21,r19 1543 addi r0,14,r0 1544 /* bubble */ 1545 shlrd r19,r0,r19 1546 mulu.l r19,r22,r20 1547 add r18,r19,r18 1548 /* bubble */ 1549 sub.l r25,r20,r25 1550 1551 mulu.l r25,r21,r19 1552 addz.l r25,r63,r25 1553 sub r25,r22,r25 1554 shlrd r19,r0,r19 1555 mulu.l r19,r22,r20 1556 addi r25,1,r25 1557 add r18,r19,r18 1558 1559 cmpgt r25,r20,r25 1560 add.l r18,r25,r0 1561 blink tr0,r63 1562#endif 1563#elif defined (__SHMEDIA__) 1564/* m5compact-nofpu - more emphasis on code size than on speed, but don't 1565 ignore speed altogether - div1 needs 9 cycles, subc 7 and rotcl 4. 1566 So use a short shmedia loop. */ 1567 // clobbered: r20,r21,r25,tr0,tr1,tr2 1568 .mode SHmedia 1569 .section .text..SHmedia32,"ax" 1570 .align 2 1571GLOBAL(udivsi3): 1572 pt/l LOCAL(udivsi3_dontsub), tr0 1573 pt/l LOCAL(udivsi3_loop), tr1 1574 ptabs/l r18,tr2 1575 shlli r5,32,r25 1576 addi r25,-1,r21 1577 addz.l r4,r63,r20 1578LOCAL(udivsi3_loop): 1579 shlli r20,1,r20 1580 bgeu/u r21,r20,tr0 1581 sub r20,r21,r20 1582LOCAL(udivsi3_dontsub): 1583 addi.l r25,-1,r25 1584 bnei r25,-32,tr1 1585 add.l r20,r63,r0 1586 blink tr2,r63 1587#else /* ! defined (__SHMEDIA__) */ 1588LOCAL(div8): 1589 div1 r5,r4 1590LOCAL(div7): 1591 div1 r5,r4; div1 r5,r4; div1 r5,r4 1592 div1 r5,r4; div1 r5,r4; div1 r5,r4; rts; div1 r5,r4 1593 1594LOCAL(divx4): 1595 div1 r5,r4; rotcl r0 1596 div1 r5,r4; rotcl r0 1597 div1 r5,r4; rotcl r0 1598 rts; div1 r5,r4 1599 1600GLOBAL(udivsi3): 1601 sts.l pr,@-r15 1602 extu.w r5,r0 1603 cmp/eq r5,r0 1604#ifdef __sh1__ 1605 bf LOCAL(large_divisor) 1606#else 1607 bf/s LOCAL(large_divisor) 1608#endif 1609 div0u 1610 swap.w r4,r0 1611 shlr16 r4 1612 bsr LOCAL(div8) 1613 shll16 r5 1614 bsr LOCAL(div7) 1615 div1 r5,r4 1616 xtrct r4,r0 1617 xtrct r0,r4 1618 bsr LOCAL(div8) 1619 swap.w r4,r4 1620 bsr LOCAL(div7) 1621 div1 r5,r4 1622 lds.l @r15+,pr 1623 xtrct r4,r0 1624 swap.w r0,r0 1625 rotcl r0 1626 rts 1627 shlr16 r5 1628 1629LOCAL(large_divisor): 1630#ifdef __sh1__ 1631 div0u 1632#endif 1633 mov #0,r0 1634 xtrct r4,r0 1635 xtrct r0,r4 1636 bsr LOCAL(divx4) 1637 rotcl r0 1638 bsr LOCAL(divx4) 1639 rotcl r0 1640 bsr LOCAL(divx4) 1641 rotcl r0 1642 bsr LOCAL(divx4) 1643 rotcl r0 1644 lds.l @r15+,pr 1645 rts 1646 rotcl r0 1647 1648 ENDFUNC(GLOBAL(udivsi3)) 1649#endif /* ! __SHMEDIA__ */ 1650#endif /* __SH4__ */ 1651#endif /* L_udivsi3 */ 1652 1653#ifdef L_udivdi3 1654#ifdef __SHMEDIA__ 1655 .mode SHmedia 1656 .section .text..SHmedia32,"ax" 1657 .align 2 1658 .global GLOBAL(udivdi3) 1659 FUNC(GLOBAL(udivdi3)) 1660GLOBAL(udivdi3): 1661 HIDDEN_ALIAS(udivdi3_internal,udivdi3) 1662 shlri r3,1,r4 1663 nsb r4,r22 1664 shlld r3,r22,r6 1665 shlri r6,49,r5 1666 movi 0xffffffffffffbaf1,r21 /* .l shift count 17. */ 1667 sub r21,r5,r1 1668 mmulfx.w r1,r1,r4 1669 mshflo.w r1,r63,r1 1670 sub r63,r22,r20 // r63 == 64 % 64 1671 mmulfx.w r5,r4,r4 1672 pta LOCAL(large_divisor),tr0 1673 addi r20,32,r9 1674 msub.w r1,r4,r1 1675 madd.w r1,r1,r1 1676 mmulfx.w r1,r1,r4 1677 shlri r6,32,r7 1678 bgt/u r9,r63,tr0 // large_divisor 1679 mmulfx.w r5,r4,r4 1680 shlri r2,32+14,r19 1681 addi r22,-31,r0 1682 msub.w r1,r4,r1 1683 1684 mulu.l r1,r7,r4 1685 addi r1,-3,r5 1686 mulu.l r5,r19,r5 1687 sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2 1688 shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as 1689 the case may be, %0000000000000000 000.11111111111, still */ 1690 muls.l r1,r4,r4 /* leaving at least one sign bit. */ 1691 mulu.l r5,r3,r8 1692 mshalds.l r1,r21,r1 1693 shari r4,26,r4 1694 shlld r8,r0,r8 1695 add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5) 1696 sub r2,r8,r2 1697 /* Can do second step of 64 : 32 div now, using r1 and the rest in r2. */ 1698 1699 shlri r2,22,r21 1700 mulu.l r21,r1,r21 1701 shlld r5,r0,r8 1702 addi r20,30-22,r0 1703 shlrd r21,r0,r21 1704 mulu.l r21,r3,r5 1705 add r8,r21,r8 1706 mcmpgt.l r21,r63,r21 // See Note 1 1707 addi r20,30,r0 1708 mshfhi.l r63,r21,r21 1709 sub r2,r5,r2 1710 andc r2,r21,r2 1711 1712 /* small divisor: need a third divide step */ 1713 mulu.l r2,r1,r7 1714 ptabs r18,tr0 1715 addi r2,1,r2 1716 shlrd r7,r0,r7 1717 mulu.l r7,r3,r5 1718 add r8,r7,r8 1719 sub r2,r3,r2 1720 cmpgt r2,r5,r5 1721 add r8,r5,r2 1722 /* could test r3 here to check for divide by zero. */ 1723 blink tr0,r63 1724 1725LOCAL(large_divisor): 1726 mmulfx.w r5,r4,r4 1727 shlrd r2,r9,r25 1728 shlri r25,32,r8 1729 msub.w r1,r4,r1 1730 1731 mulu.l r1,r7,r4 1732 addi r1,-3,r5 1733 mulu.l r5,r8,r5 1734 sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2 1735 shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as 1736 the case may be, %0000000000000000 000.11111111111, still */ 1737 muls.l r1,r4,r4 /* leaving at least one sign bit. */ 1738 shlri r5,14-1,r8 1739 mulu.l r8,r7,r5 1740 mshalds.l r1,r21,r1 1741 shari r4,26,r4 1742 add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5) 1743 sub r25,r5,r25 1744 /* Can do second step of 64 : 32 div now, using r1 and the rest in r25. */ 1745 1746 shlri r25,22,r21 1747 mulu.l r21,r1,r21 1748 pta LOCAL(no_lo_adj),tr0 1749 addi r22,32,r0 1750 shlri r21,40,r21 1751 mulu.l r21,r7,r5 1752 add r8,r21,r8 1753 shlld r2,r0,r2 1754 sub r25,r5,r25 1755 bgtu/u r7,r25,tr0 // no_lo_adj 1756 addi r8,1,r8 1757 sub r25,r7,r25 1758LOCAL(no_lo_adj): 1759 mextr4 r2,r25,r2 1760 1761 /* large_divisor: only needs a few adjustments. */ 1762 mulu.l r8,r6,r5 1763 ptabs r18,tr0 1764 /* bubble */ 1765 cmpgtu r5,r2,r5 1766 sub r8,r5,r2 1767 blink tr0,r63 1768 ENDFUNC(GLOBAL(udivdi3)) 1769/* Note 1: To shift the result of the second divide stage so that the result 1770 always fits into 32 bits, yet we still reduce the rest sufficiently 1771 would require a lot of instructions to do the shifts just right. Using 1772 the full 64 bit shift result to multiply with the divisor would require 1773 four extra instructions for the upper 32 bits (shift / mulu / shift / sub). 1774 Fortunately, if the upper 32 bits of the shift result are nonzero, we 1775 know that the rest after taking this partial result into account will 1776 fit into 32 bits. So we just clear the upper 32 bits of the rest if the 1777 upper 32 bits of the partial result are nonzero. */ 1778#endif /* __SHMEDIA__ */ 1779#endif /* L_udivdi3 */ 1780 1781#ifdef L_divdi3 1782#ifdef __SHMEDIA__ 1783 .mode SHmedia 1784 .section .text..SHmedia32,"ax" 1785 .align 2 1786 .global GLOBAL(divdi3) 1787 FUNC(GLOBAL(divdi3)) 1788GLOBAL(divdi3): 1789 pta GLOBAL(udivdi3_internal),tr0 1790 shari r2,63,r22 1791 shari r3,63,r23 1792 xor r2,r22,r2 1793 xor r3,r23,r3 1794 sub r2,r22,r2 1795 sub r3,r23,r3 1796 beq/u r22,r23,tr0 1797 ptabs r18,tr1 1798 blink tr0,r18 1799 sub r63,r2,r2 1800 blink tr1,r63 1801 ENDFUNC(GLOBAL(divdi3)) 1802#endif /* __SHMEDIA__ */ 1803#endif /* L_divdi3 */ 1804 1805#ifdef L_umoddi3 1806#ifdef __SHMEDIA__ 1807 .mode SHmedia 1808 .section .text..SHmedia32,"ax" 1809 .align 2 1810 .global GLOBAL(umoddi3) 1811 FUNC(GLOBAL(umoddi3)) 1812GLOBAL(umoddi3): 1813 HIDDEN_ALIAS(umoddi3_internal,umoddi3) 1814 shlri r3,1,r4 1815 nsb r4,r22 1816 shlld r3,r22,r6 1817 shlri r6,49,r5 1818 movi 0xffffffffffffbaf1,r21 /* .l shift count 17. */ 1819 sub r21,r5,r1 1820 mmulfx.w r1,r1,r4 1821 mshflo.w r1,r63,r1 1822 sub r63,r22,r20 // r63 == 64 % 64 1823 mmulfx.w r5,r4,r4 1824 pta LOCAL(large_divisor),tr0 1825 addi r20,32,r9 1826 msub.w r1,r4,r1 1827 madd.w r1,r1,r1 1828 mmulfx.w r1,r1,r4 1829 shlri r6,32,r7 1830 bgt/u r9,r63,tr0 // large_divisor 1831 mmulfx.w r5,r4,r4 1832 shlri r2,32+14,r19 1833 addi r22,-31,r0 1834 msub.w r1,r4,r1 1835 1836 mulu.l r1,r7,r4 1837 addi r1,-3,r5 1838 mulu.l r5,r19,r5 1839 sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2 1840 shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as 1841 the case may be, %0000000000000000 000.11111111111, still */ 1842 muls.l r1,r4,r4 /* leaving at least one sign bit. */ 1843 mulu.l r5,r3,r5 1844 mshalds.l r1,r21,r1 1845 shari r4,26,r4 1846 shlld r5,r0,r5 1847 add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5) 1848 sub r2,r5,r2 1849 /* Can do second step of 64 : 32 div now, using r1 and the rest in r2. */ 1850 1851 shlri r2,22,r21 1852 mulu.l r21,r1,r21 1853 addi r20,30-22,r0 1854 /* bubble */ /* could test r3 here to check for divide by zero. */ 1855 shlrd r21,r0,r21 1856 mulu.l r21,r3,r5 1857 mcmpgt.l r21,r63,r21 // See Note 1 1858 addi r20,30,r0 1859 mshfhi.l r63,r21,r21 1860 sub r2,r5,r2 1861 andc r2,r21,r2 1862 1863 /* small divisor: need a third divide step */ 1864 mulu.l r2,r1,r7 1865 ptabs r18,tr0 1866 sub r2,r3,r8 /* re-use r8 here for rest - r3 */ 1867 shlrd r7,r0,r7 1868 mulu.l r7,r3,r5 1869 /* bubble */ 1870 addi r8,1,r7 1871 cmpgt r7,r5,r7 1872 cmvne r7,r8,r2 1873 sub r2,r5,r2 1874 blink tr0,r63 1875 1876LOCAL(large_divisor): 1877 mmulfx.w r5,r4,r4 1878 shlrd r2,r9,r25 1879 shlri r25,32,r8 1880 msub.w r1,r4,r1 1881 1882 mulu.l r1,r7,r4 1883 addi r1,-3,r5 1884 mulu.l r5,r8,r5 1885 sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2 1886 shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as 1887 the case may be, %0000000000000000 000.11111111111, still */ 1888 muls.l r1,r4,r4 /* leaving at least one sign bit. */ 1889 shlri r5,14-1,r8 1890 mulu.l r8,r7,r5 1891 mshalds.l r1,r21,r1 1892 shari r4,26,r4 1893 add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5) 1894 sub r25,r5,r25 1895 /* Can do second step of 64 : 32 div now, using r1 and the rest in r25. */ 1896 1897 shlri r25,22,r21 1898 mulu.l r21,r1,r21 1899 pta LOCAL(no_lo_adj),tr0 1900 addi r22,32,r0 1901 shlri r21,40,r21 1902 mulu.l r21,r7,r5 1903 add r8,r21,r8 1904 shlld r2,r0,r2 1905 sub r25,r5,r25 1906 bgtu/u r7,r25,tr0 // no_lo_adj 1907 addi r8,1,r8 1908 sub r25,r7,r25 1909LOCAL(no_lo_adj): 1910 mextr4 r2,r25,r2 1911 1912 /* large_divisor: only needs a few adjustments. */ 1913 mulu.l r8,r6,r5 1914 ptabs r18,tr0 1915 add r2,r6,r7 1916 cmpgtu r5,r2,r8 1917 cmvne r8,r7,r2 1918 sub r2,r5,r2 1919 shlrd r2,r22,r2 1920 blink tr0,r63 1921 ENDFUNC(GLOBAL(umoddi3)) 1922/* Note 1: To shift the result of the second divide stage so that the result 1923 always fits into 32 bits, yet we still reduce the rest sufficiently 1924 would require a lot of instructions to do the shifts just right. Using 1925 the full 64 bit shift result to multiply with the divisor would require 1926 four extra instructions for the upper 32 bits (shift / mulu / shift / sub). 1927 Fortunately, if the upper 32 bits of the shift result are nonzero, we 1928 know that the rest after taking this partial result into account will 1929 fit into 32 bits. So we just clear the upper 32 bits of the rest if the 1930 upper 32 bits of the partial result are nonzero. */ 1931#endif /* __SHMEDIA__ */ 1932#endif /* L_umoddi3 */ 1933 1934#ifdef L_moddi3 1935#ifdef __SHMEDIA__ 1936 .mode SHmedia 1937 .section .text..SHmedia32,"ax" 1938 .align 2 1939 .global GLOBAL(moddi3) 1940 FUNC(GLOBAL(moddi3)) 1941GLOBAL(moddi3): 1942 pta GLOBAL(umoddi3_internal),tr0 1943 shari r2,63,r22 1944 shari r3,63,r23 1945 xor r2,r22,r2 1946 xor r3,r23,r3 1947 sub r2,r22,r2 1948 sub r3,r23,r3 1949 beq/u r22,r63,tr0 1950 ptabs r18,tr1 1951 blink tr0,r18 1952 sub r63,r2,r2 1953 blink tr1,r63 1954 ENDFUNC(GLOBAL(moddi3)) 1955#endif /* __SHMEDIA__ */ 1956#endif /* L_moddi3 */ 1957 1958#ifdef L_set_fpscr 1959#if !defined (__SH2A_NOFPU__) 1960#if defined (__SH2E__) || defined (__SH2A__) || defined (__SH3E__) || defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || __SH5__ == 32 1961#ifdef __SH5__ 1962 .mode SHcompact 1963#endif 1964 .global GLOBAL(set_fpscr) 1965 HIDDEN_FUNC(GLOBAL(set_fpscr)) 1966GLOBAL(set_fpscr): 1967 lds r4,fpscr 1968#ifdef __PIC__ 1969 mov.l r12,@-r15 1970 mova LOCAL(set_fpscr_L0),r0 1971 mov.l LOCAL(set_fpscr_L0),r12 1972 add r0,r12 1973 mov.l LOCAL(set_fpscr_L1),r0 1974 mov.l @(r0,r12),r1 1975 mov.l @r15+,r12 1976#else 1977 mov.l LOCAL(set_fpscr_L1),r1 1978#endif 1979 swap.w r4,r0 1980 or #24,r0 1981#ifndef FMOVD_WORKS 1982 xor #16,r0 1983#endif 1984#if defined(__SH4__) || defined (__SH2A_DOUBLE__) 1985 swap.w r0,r3 1986 mov.l r3,@(4,r1) 1987#else /* defined (__SH2E__) || defined(__SH3E__) || defined(__SH4_SINGLE*__) */ 1988 swap.w r0,r2 1989 mov.l r2,@r1 1990#endif 1991#ifndef FMOVD_WORKS 1992 xor #8,r0 1993#else 1994 xor #24,r0 1995#endif 1996#if defined(__SH4__) || defined (__SH2A_DOUBLE__) 1997 swap.w r0,r2 1998 rts 1999 mov.l r2,@r1 2000#else /* defined(__SH2E__) || defined(__SH3E__) || defined(__SH4_SINGLE*__) */ 2001 swap.w r0,r3 2002 rts 2003 mov.l r3,@(4,r1) 2004#endif 2005 .align 2 2006#ifdef __PIC__ 2007LOCAL(set_fpscr_L0): 2008 .long _GLOBAL_OFFSET_TABLE_ 2009LOCAL(set_fpscr_L1): 2010 .long GLOBAL(fpscr_values@GOT) 2011#else 2012LOCAL(set_fpscr_L1): 2013 .long GLOBAL(fpscr_values) 2014#endif 2015 2016 ENDFUNC(GLOBAL(set_fpscr)) 2017#ifndef NO_FPSCR_VALUES 2018#ifdef __ELF__ 2019 .comm GLOBAL(fpscr_values),8,4 2020#else 2021 .comm GLOBAL(fpscr_values),8 2022#endif /* ELF */ 2023#endif /* NO_FPSCR_VALUES */ 2024#endif /* SH2E / SH3E / SH4 */ 2025#endif /* __SH2A_NOFPU__ */ 2026#endif /* L_set_fpscr */ 2027#ifdef L_ic_invalidate 2028#if __SH5__ == 32 2029 .mode SHmedia 2030 .section .text..SHmedia32,"ax" 2031 .align 2 2032 .global GLOBAL(init_trampoline) 2033 HIDDEN_FUNC(GLOBAL(init_trampoline)) 2034GLOBAL(init_trampoline): 2035 st.l r0,8,r2 2036#ifdef __LITTLE_ENDIAN__ 2037 movi 9,r20 2038 shori 0x402b,r20 2039 shori 0xd101,r20 2040 shori 0xd002,r20 2041#else 2042 movi 0xffffffffffffd002,r20 2043 shori 0xd101,r20 2044 shori 0x402b,r20 2045 shori 9,r20 2046#endif 2047 st.q r0,0,r20 2048 st.l r0,12,r3 2049 ENDFUNC(GLOBAL(init_trampoline)) 2050 .global GLOBAL(ic_invalidate) 2051 HIDDEN_FUNC(GLOBAL(ic_invalidate)) 2052GLOBAL(ic_invalidate): 2053 ocbwb r0,0 2054 synco 2055 icbi r0, 0 2056 ptabs r18, tr0 2057 synci 2058 blink tr0, r63 2059 ENDFUNC(GLOBAL(ic_invalidate)) 2060#elif defined(__SH4A__) 2061 .global GLOBAL(ic_invalidate) 2062 HIDDEN_FUNC(GLOBAL(ic_invalidate)) 2063GLOBAL(ic_invalidate): 2064 ocbwb @r4 2065 synco 2066 rts 2067 icbi @r4 2068 ENDFUNC(GLOBAL(ic_invalidate)) 2069#elif defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || (defined(__SH4_NOFPU__) && !defined(__SH5__)) 2070 /* For system code, we use ic_invalidate_line_i, but user code 2071 needs a different mechanism. A kernel call is generally not 2072 available, and it would also be slow. Different SH4 variants use 2073 different sizes and associativities of the Icache. We use a small 2074 bit of dispatch code that can be put hidden in every shared object, 2075 which calls the actual processor-specific invalidation code in a 2076 separate module. 2077 Or if you have operating system support, the OS could mmap the 2078 procesor-specific code from a single page, since it is highly 2079 repetitive. */ 2080 .global GLOBAL(ic_invalidate) 2081 HIDDEN_FUNC(GLOBAL(ic_invalidate)) 2082GLOBAL(ic_invalidate): 2083 mov.l 0f,r1 2084#ifdef __pic__ 2085 mova 0f,r0 2086 mov.l 1f,r2 2087 add r1,r0 2088 mov.l @(r0,r2),r1 2089#endif 2090 ocbwb @r4 2091 mov.l @(8,r1),r0 2092 sub r1,r4 2093 and r4,r0 2094 add r1,r0 2095 jmp @r0 2096 mov.l @(4,r1),r0 2097#ifndef __pic__ 20980: .long GLOBAL(ic_invalidate_array) 2099#else /* __pic__ */ 2100 .global GLOBAL(ic_invalidate_array) 2101 /* ??? Why won't the assembler allow to add these two constants? */ 21020: .long _GLOBAL_OFFSET_TABLE_ 21031: .long GLOBAL(ic_invalidate_array)@GOT 2104 ENDFUNC(GLOBAL(ic_invalidate)) 2105#endif /* __pic__ */ 2106#endif /* SH4 */ 2107#endif /* L_ic_invalidate */ 2108 2109#ifdef L_ic_invalidate_array 2110#if defined(__SH4A__) || (defined (__FORCE_SH4A__) && (defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || (defined(__SH4_NOFPU__) && !defined(__SH5__)))) 2111 .global GLOBAL(ic_invalidate_array) 2112 /* This is needed when an SH4 dso with trampolines is used on SH4A. */ 2113 .global GLOBAL(ic_invalidate_array) 2114 FUNC(GLOBAL(ic_invalidate_array)) 2115GLOBAL(ic_invalidate_array): 2116 add r1,r4 2117 synco 2118 rts 2119 icbi @r4 2120 .long 0 2121 ENDFUNC(GLOBAL(ic_invalidate_array)) 2122#elif defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || (defined(__SH4_NOFPU__) && !defined(__SH5__)) 2123 .global GLOBAL(ic_invalidate_array) 2124 .p2align 5 2125 FUNC(GLOBAL(ic_invalidate_array)) 2126/* This must be aligned to the beginning of a cache line. */ 2127GLOBAL(ic_invalidate_array): 2128#ifndef WAYS 2129#define WAYS 4 2130#define WAY_SIZE 0x4000 2131#endif 2132#if WAYS == 1 2133 .rept WAY_SIZE * WAYS / 32 2134 rts 2135 nop 2136 .rept 7 2137 .long WAY_SIZE - 32 2138 .endr 2139 .endr 2140#elif WAYS <= 6 2141 .rept WAY_SIZE * WAYS / 32 2142 braf r0 2143 add #-8,r0 2144 .long WAY_SIZE + 8 2145 .long WAY_SIZE - 32 2146 .rept WAYS-2 2147 braf r0 2148 nop 2149 .endr 2150 .rept 7 - WAYS 2151 rts 2152 nop 2153 .endr 2154 .endr 2155#else /* WAYS > 6 */ 2156 /* This variant needs two different pages for mmap-ing. */ 2157 .rept WAYS-1 2158 .rept WAY_SIZE / 32 2159 braf r0 2160 nop 2161 .long WAY_SIZE 2162 .rept 6 2163 .long WAY_SIZE - 32 2164 .endr 2165 .endr 2166 .endr 2167 .rept WAY_SIZE / 32 2168 rts 2169 .rept 15 2170 nop 2171 .endr 2172 .endr 2173#endif /* WAYS */ 2174 ENDFUNC(GLOBAL(ic_invalidate_array)) 2175#endif /* SH4 */ 2176#endif /* L_ic_invalidate_array */ 2177 2178#if defined (__SH5__) && __SH5__ == 32 2179#ifdef L_shcompact_call_trampoline 2180 .section .rodata 2181 .align 1 2182LOCAL(ct_main_table): 2183.word LOCAL(ct_r2_fp) - datalabel LOCAL(ct_main_label) 2184.word LOCAL(ct_r2_ld) - datalabel LOCAL(ct_main_label) 2185.word LOCAL(ct_r2_pop) - datalabel LOCAL(ct_main_label) 2186.word LOCAL(ct_r3_fp) - datalabel LOCAL(ct_main_label) 2187.word LOCAL(ct_r3_ld) - datalabel LOCAL(ct_main_label) 2188.word LOCAL(ct_r3_pop) - datalabel LOCAL(ct_main_label) 2189.word LOCAL(ct_r4_fp) - datalabel LOCAL(ct_main_label) 2190.word LOCAL(ct_r4_ld) - datalabel LOCAL(ct_main_label) 2191.word LOCAL(ct_r4_pop) - datalabel LOCAL(ct_main_label) 2192.word LOCAL(ct_r5_fp) - datalabel LOCAL(ct_main_label) 2193.word LOCAL(ct_r5_ld) - datalabel LOCAL(ct_main_label) 2194.word LOCAL(ct_r5_pop) - datalabel LOCAL(ct_main_label) 2195.word LOCAL(ct_r6_fph) - datalabel LOCAL(ct_main_label) 2196.word LOCAL(ct_r6_fpl) - datalabel LOCAL(ct_main_label) 2197.word LOCAL(ct_r6_ld) - datalabel LOCAL(ct_main_label) 2198.word LOCAL(ct_r6_pop) - datalabel LOCAL(ct_main_label) 2199.word LOCAL(ct_r7_fph) - datalabel LOCAL(ct_main_label) 2200.word LOCAL(ct_r7_fpl) - datalabel LOCAL(ct_main_label) 2201.word LOCAL(ct_r7_ld) - datalabel LOCAL(ct_main_label) 2202.word LOCAL(ct_r7_pop) - datalabel LOCAL(ct_main_label) 2203.word LOCAL(ct_r8_fph) - datalabel LOCAL(ct_main_label) 2204.word LOCAL(ct_r8_fpl) - datalabel LOCAL(ct_main_label) 2205.word LOCAL(ct_r8_ld) - datalabel LOCAL(ct_main_label) 2206.word LOCAL(ct_r8_pop) - datalabel LOCAL(ct_main_label) 2207.word LOCAL(ct_r9_fph) - datalabel LOCAL(ct_main_label) 2208.word LOCAL(ct_r9_fpl) - datalabel LOCAL(ct_main_label) 2209.word LOCAL(ct_r9_ld) - datalabel LOCAL(ct_main_label) 2210.word LOCAL(ct_r9_pop) - datalabel LOCAL(ct_main_label) 2211.word LOCAL(ct_pop_seq) - datalabel LOCAL(ct_main_label) 2212.word LOCAL(ct_pop_seq) - datalabel LOCAL(ct_main_label) 2213.word LOCAL(ct_r9_pop) - datalabel LOCAL(ct_main_label) 2214.word LOCAL(ct_ret_wide) - datalabel LOCAL(ct_main_label) 2215.word LOCAL(ct_call_func) - datalabel LOCAL(ct_main_label) 2216 .mode SHmedia 2217 .section .text..SHmedia32, "ax" 2218 .align 2 2219 2220 /* This function loads 64-bit general-purpose registers from the 2221 stack, from a memory address contained in them or from an FP 2222 register, according to a cookie passed in r1. Its execution 2223 time is linear on the number of registers that actually have 2224 to be copied. See sh.h for details on the actual bit pattern. 2225 2226 The function to be called is passed in r0. If a 32-bit return 2227 value is expected, the actual function will be tail-called, 2228 otherwise the return address will be stored in r10 (that the 2229 caller should expect to be clobbered) and the return value 2230 will be expanded into r2/r3 upon return. */ 2231 2232 .global GLOBAL(GCC_shcompact_call_trampoline) 2233 FUNC(GLOBAL(GCC_shcompact_call_trampoline)) 2234GLOBAL(GCC_shcompact_call_trampoline): 2235 ptabs/l r0, tr0 /* Prepare to call the actual function. */ 2236 movi ((datalabel LOCAL(ct_main_table) - 31 * 2) >> 16) & 65535, r0 2237 pt/l LOCAL(ct_loop), tr1 2238 addz.l r1, r63, r1 2239 shori ((datalabel LOCAL(ct_main_table) - 31 * 2)) & 65535, r0 2240LOCAL(ct_loop): 2241 nsb r1, r28 2242 shlli r28, 1, r29 2243 ldx.w r0, r29, r30 2244LOCAL(ct_main_label): 2245 ptrel/l r30, tr2 2246 blink tr2, r63 2247LOCAL(ct_r2_fp): /* Copy r2 from an FP register. */ 2248 /* It must be dr0, so just do it. */ 2249 fmov.dq dr0, r2 2250 movi 7, r30 2251 shlli r30, 29, r31 2252 andc r1, r31, r1 2253 blink tr1, r63 2254LOCAL(ct_r3_fp): /* Copy r3 from an FP register. */ 2255 /* It is either dr0 or dr2. */ 2256 movi 7, r30 2257 shlri r1, 26, r32 2258 shlli r30, 26, r31 2259 andc r1, r31, r1 2260 fmov.dq dr0, r3 2261 beqi/l r32, 4, tr1 2262 fmov.dq dr2, r3 2263 blink tr1, r63 2264LOCAL(ct_r4_fp): /* Copy r4 from an FP register. */ 2265 shlri r1, 23 - 3, r34 2266 andi r34, 3 << 3, r33 2267 addi r33, LOCAL(ct_r4_fp_copy) - datalabel LOCAL(ct_r4_fp_base), r32 2268LOCAL(ct_r4_fp_base): 2269 ptrel/l r32, tr2 2270 movi 7, r30 2271 shlli r30, 23, r31 2272 andc r1, r31, r1 2273 blink tr2, r63 2274LOCAL(ct_r4_fp_copy): 2275 fmov.dq dr0, r4 2276 blink tr1, r63 2277 fmov.dq dr2, r4 2278 blink tr1, r63 2279 fmov.dq dr4, r4 2280 blink tr1, r63 2281LOCAL(ct_r5_fp): /* Copy r5 from an FP register. */ 2282 shlri r1, 20 - 3, r34 2283 andi r34, 3 << 3, r33 2284 addi r33, LOCAL(ct_r5_fp_copy) - datalabel LOCAL(ct_r5_fp_base), r32 2285LOCAL(ct_r5_fp_base): 2286 ptrel/l r32, tr2 2287 movi 7, r30 2288 shlli r30, 20, r31 2289 andc r1, r31, r1 2290 blink tr2, r63 2291LOCAL(ct_r5_fp_copy): 2292 fmov.dq dr0, r5 2293 blink tr1, r63 2294 fmov.dq dr2, r5 2295 blink tr1, r63 2296 fmov.dq dr4, r5 2297 blink tr1, r63 2298 fmov.dq dr6, r5 2299 blink tr1, r63 2300LOCAL(ct_r6_fph): /* Copy r6 from a high FP register. */ 2301 /* It must be dr8. */ 2302 fmov.dq dr8, r6 2303 movi 15, r30 2304 shlli r30, 16, r31 2305 andc r1, r31, r1 2306 blink tr1, r63 2307LOCAL(ct_r6_fpl): /* Copy r6 from a low FP register. */ 2308 shlri r1, 16 - 3, r34 2309 andi r34, 3 << 3, r33 2310 addi r33, LOCAL(ct_r6_fp_copy) - datalabel LOCAL(ct_r6_fp_base), r32 2311LOCAL(ct_r6_fp_base): 2312 ptrel/l r32, tr2 2313 movi 7, r30 2314 shlli r30, 16, r31 2315 andc r1, r31, r1 2316 blink tr2, r63 2317LOCAL(ct_r6_fp_copy): 2318 fmov.dq dr0, r6 2319 blink tr1, r63 2320 fmov.dq dr2, r6 2321 blink tr1, r63 2322 fmov.dq dr4, r6 2323 blink tr1, r63 2324 fmov.dq dr6, r6 2325 blink tr1, r63 2326LOCAL(ct_r7_fph): /* Copy r7 from a high FP register. */ 2327 /* It is either dr8 or dr10. */ 2328 movi 15 << 12, r31 2329 shlri r1, 12, r32 2330 andc r1, r31, r1 2331 fmov.dq dr8, r7 2332 beqi/l r32, 8, tr1 2333 fmov.dq dr10, r7 2334 blink tr1, r63 2335LOCAL(ct_r7_fpl): /* Copy r7 from a low FP register. */ 2336 shlri r1, 12 - 3, r34 2337 andi r34, 3 << 3, r33 2338 addi r33, LOCAL(ct_r7_fp_copy) - datalabel LOCAL(ct_r7_fp_base), r32 2339LOCAL(ct_r7_fp_base): 2340 ptrel/l r32, tr2 2341 movi 7 << 12, r31 2342 andc r1, r31, r1 2343 blink tr2, r63 2344LOCAL(ct_r7_fp_copy): 2345 fmov.dq dr0, r7 2346 blink tr1, r63 2347 fmov.dq dr2, r7 2348 blink tr1, r63 2349 fmov.dq dr4, r7 2350 blink tr1, r63 2351 fmov.dq dr6, r7 2352 blink tr1, r63 2353LOCAL(ct_r8_fph): /* Copy r8 from a high FP register. */ 2354 /* It is either dr8 or dr10. */ 2355 movi 15 << 8, r31 2356 andi r1, 1 << 8, r32 2357 andc r1, r31, r1 2358 fmov.dq dr8, r8 2359 beq/l r32, r63, tr1 2360 fmov.dq dr10, r8 2361 blink tr1, r63 2362LOCAL(ct_r8_fpl): /* Copy r8 from a low FP register. */ 2363 shlri r1, 8 - 3, r34 2364 andi r34, 3 << 3, r33 2365 addi r33, LOCAL(ct_r8_fp_copy) - datalabel LOCAL(ct_r8_fp_base), r32 2366LOCAL(ct_r8_fp_base): 2367 ptrel/l r32, tr2 2368 movi 7 << 8, r31 2369 andc r1, r31, r1 2370 blink tr2, r63 2371LOCAL(ct_r8_fp_copy): 2372 fmov.dq dr0, r8 2373 blink tr1, r63 2374 fmov.dq dr2, r8 2375 blink tr1, r63 2376 fmov.dq dr4, r8 2377 blink tr1, r63 2378 fmov.dq dr6, r8 2379 blink tr1, r63 2380LOCAL(ct_r9_fph): /* Copy r9 from a high FP register. */ 2381 /* It is either dr8 or dr10. */ 2382 movi 15 << 4, r31 2383 andi r1, 1 << 4, r32 2384 andc r1, r31, r1 2385 fmov.dq dr8, r9 2386 beq/l r32, r63, tr1 2387 fmov.dq dr10, r9 2388 blink tr1, r63 2389LOCAL(ct_r9_fpl): /* Copy r9 from a low FP register. */ 2390 shlri r1, 4 - 3, r34 2391 andi r34, 3 << 3, r33 2392 addi r33, LOCAL(ct_r9_fp_copy) - datalabel LOCAL(ct_r9_fp_base), r32 2393LOCAL(ct_r9_fp_base): 2394 ptrel/l r32, tr2 2395 movi 7 << 4, r31 2396 andc r1, r31, r1 2397 blink tr2, r63 2398LOCAL(ct_r9_fp_copy): 2399 fmov.dq dr0, r9 2400 blink tr1, r63 2401 fmov.dq dr2, r9 2402 blink tr1, r63 2403 fmov.dq dr4, r9 2404 blink tr1, r63 2405 fmov.dq dr6, r9 2406 blink tr1, r63 2407LOCAL(ct_r2_ld): /* Copy r2 from a memory address. */ 2408 pt/l LOCAL(ct_r2_load), tr2 2409 movi 3, r30 2410 shlli r30, 29, r31 2411 and r1, r31, r32 2412 andc r1, r31, r1 2413 beq/l r31, r32, tr2 2414 addi.l r2, 8, r3 2415 ldx.q r2, r63, r2 2416 /* Fall through. */ 2417LOCAL(ct_r3_ld): /* Copy r3 from a memory address. */ 2418 pt/l LOCAL(ct_r3_load), tr2 2419 movi 3, r30 2420 shlli r30, 26, r31 2421 and r1, r31, r32 2422 andc r1, r31, r1 2423 beq/l r31, r32, tr2 2424 addi.l r3, 8, r4 2425 ldx.q r3, r63, r3 2426LOCAL(ct_r4_ld): /* Copy r4 from a memory address. */ 2427 pt/l LOCAL(ct_r4_load), tr2 2428 movi 3, r30 2429 shlli r30, 23, r31 2430 and r1, r31, r32 2431 andc r1, r31, r1 2432 beq/l r31, r32, tr2 2433 addi.l r4, 8, r5 2434 ldx.q r4, r63, r4 2435LOCAL(ct_r5_ld): /* Copy r5 from a memory address. */ 2436 pt/l LOCAL(ct_r5_load), tr2 2437 movi 3, r30 2438 shlli r30, 20, r31 2439 and r1, r31, r32 2440 andc r1, r31, r1 2441 beq/l r31, r32, tr2 2442 addi.l r5, 8, r6 2443 ldx.q r5, r63, r5 2444LOCAL(ct_r6_ld): /* Copy r6 from a memory address. */ 2445 pt/l LOCAL(ct_r6_load), tr2 2446 movi 3 << 16, r31 2447 and r1, r31, r32 2448 andc r1, r31, r1 2449 beq/l r31, r32, tr2 2450 addi.l r6, 8, r7 2451 ldx.q r6, r63, r6 2452LOCAL(ct_r7_ld): /* Copy r7 from a memory address. */ 2453 pt/l LOCAL(ct_r7_load), tr2 2454 movi 3 << 12, r31 2455 and r1, r31, r32 2456 andc r1, r31, r1 2457 beq/l r31, r32, tr2 2458 addi.l r7, 8, r8 2459 ldx.q r7, r63, r7 2460LOCAL(ct_r8_ld): /* Copy r8 from a memory address. */ 2461 pt/l LOCAL(ct_r8_load), tr2 2462 movi 3 << 8, r31 2463 and r1, r31, r32 2464 andc r1, r31, r1 2465 beq/l r31, r32, tr2 2466 addi.l r8, 8, r9 2467 ldx.q r8, r63, r8 2468LOCAL(ct_r9_ld): /* Copy r9 from a memory address. */ 2469 pt/l LOCAL(ct_check_tramp), tr2 2470 ldx.q r9, r63, r9 2471 blink tr2, r63 2472LOCAL(ct_r2_load): 2473 ldx.q r2, r63, r2 2474 blink tr1, r63 2475LOCAL(ct_r3_load): 2476 ldx.q r3, r63, r3 2477 blink tr1, r63 2478LOCAL(ct_r4_load): 2479 ldx.q r4, r63, r4 2480 blink tr1, r63 2481LOCAL(ct_r5_load): 2482 ldx.q r5, r63, r5 2483 blink tr1, r63 2484LOCAL(ct_r6_load): 2485 ldx.q r6, r63, r6 2486 blink tr1, r63 2487LOCAL(ct_r7_load): 2488 ldx.q r7, r63, r7 2489 blink tr1, r63 2490LOCAL(ct_r8_load): 2491 ldx.q r8, r63, r8 2492 blink tr1, r63 2493LOCAL(ct_r2_pop): /* Pop r2 from the stack. */ 2494 movi 1, r30 2495 ldx.q r15, r63, r2 2496 shlli r30, 29, r31 2497 addi.l r15, 8, r15 2498 andc r1, r31, r1 2499 blink tr1, r63 2500LOCAL(ct_r3_pop): /* Pop r3 from the stack. */ 2501 movi 1, r30 2502 ldx.q r15, r63, r3 2503 shlli r30, 26, r31 2504 addi.l r15, 8, r15 2505 andc r1, r31, r1 2506 blink tr1, r63 2507LOCAL(ct_r4_pop): /* Pop r4 from the stack. */ 2508 movi 1, r30 2509 ldx.q r15, r63, r4 2510 shlli r30, 23, r31 2511 addi.l r15, 8, r15 2512 andc r1, r31, r1 2513 blink tr1, r63 2514LOCAL(ct_r5_pop): /* Pop r5 from the stack. */ 2515 movi 1, r30 2516 ldx.q r15, r63, r5 2517 shlli r30, 20, r31 2518 addi.l r15, 8, r15 2519 andc r1, r31, r1 2520 blink tr1, r63 2521LOCAL(ct_r6_pop): /* Pop r6 from the stack. */ 2522 movi 1, r30 2523 ldx.q r15, r63, r6 2524 shlli r30, 16, r31 2525 addi.l r15, 8, r15 2526 andc r1, r31, r1 2527 blink tr1, r63 2528LOCAL(ct_r7_pop): /* Pop r7 from the stack. */ 2529 ldx.q r15, r63, r7 2530 movi 1 << 12, r31 2531 addi.l r15, 8, r15 2532 andc r1, r31, r1 2533 blink tr1, r63 2534LOCAL(ct_r8_pop): /* Pop r8 from the stack. */ 2535 ldx.q r15, r63, r8 2536 movi 1 << 8, r31 2537 addi.l r15, 8, r15 2538 andc r1, r31, r1 2539 blink tr1, r63 2540LOCAL(ct_pop_seq): /* Pop a sequence of registers off the stack. */ 2541 andi r1, 7 << 1, r30 2542 movi (LOCAL(ct_end_of_pop_seq) >> 16) & 65535, r32 2543 shlli r30, 2, r31 2544 shori LOCAL(ct_end_of_pop_seq) & 65535, r32 2545 sub.l r32, r31, r33 2546 ptabs/l r33, tr2 2547 blink tr2, r63 2548LOCAL(ct_start_of_pop_seq): /* Beginning of pop sequence. */ 2549 ldx.q r15, r63, r3 2550 addi.l r15, 8, r15 2551 ldx.q r15, r63, r4 2552 addi.l r15, 8, r15 2553 ldx.q r15, r63, r5 2554 addi.l r15, 8, r15 2555 ldx.q r15, r63, r6 2556 addi.l r15, 8, r15 2557 ldx.q r15, r63, r7 2558 addi.l r15, 8, r15 2559 ldx.q r15, r63, r8 2560 addi.l r15, 8, r15 2561LOCAL(ct_r9_pop): /* Pop r9 from the stack. */ 2562 ldx.q r15, r63, r9 2563 addi.l r15, 8, r15 2564LOCAL(ct_end_of_pop_seq): /* Label used to compute first pop instruction. */ 2565LOCAL(ct_check_tramp): /* Check whether we need a trampoline. */ 2566 pt/u LOCAL(ct_ret_wide), tr2 2567 andi r1, 1, r1 2568 bne/u r1, r63, tr2 2569LOCAL(ct_call_func): /* Just branch to the function. */ 2570 blink tr0, r63 2571LOCAL(ct_ret_wide): /* Call the function, so that we can unpack its 2572 64-bit return value. */ 2573 add.l r18, r63, r10 2574 blink tr0, r18 2575 ptabs r10, tr0 2576#if __LITTLE_ENDIAN__ 2577 shari r2, 32, r3 2578 add.l r2, r63, r2 2579#else 2580 add.l r2, r63, r3 2581 shari r2, 32, r2 2582#endif 2583 blink tr0, r63 2584 2585 ENDFUNC(GLOBAL(GCC_shcompact_call_trampoline)) 2586#endif /* L_shcompact_call_trampoline */ 2587 2588#ifdef L_shcompact_return_trampoline 2589 /* This function does the converse of the code in `ret_wide' 2590 above. It is tail-called by SHcompact functions returning 2591 64-bit non-floating-point values, to pack the 32-bit values in 2592 r2 and r3 into r2. */ 2593 2594 .mode SHmedia 2595 .section .text..SHmedia32, "ax" 2596 .align 2 2597 .global GLOBAL(GCC_shcompact_return_trampoline) 2598 HIDDEN_FUNC(GLOBAL(GCC_shcompact_return_trampoline)) 2599GLOBAL(GCC_shcompact_return_trampoline): 2600 ptabs/l r18, tr0 2601#if __LITTLE_ENDIAN__ 2602 addz.l r2, r63, r2 2603 shlli r3, 32, r3 2604#else 2605 addz.l r3, r63, r3 2606 shlli r2, 32, r2 2607#endif 2608 or r3, r2, r2 2609 blink tr0, r63 2610 2611 ENDFUNC(GLOBAL(GCC_shcompact_return_trampoline)) 2612#endif /* L_shcompact_return_trampoline */ 2613 2614#ifdef L_shcompact_incoming_args 2615 .section .rodata 2616 .align 1 2617LOCAL(ia_main_table): 2618.word 1 /* Invalid, just loop */ 2619.word LOCAL(ia_r2_ld) - datalabel LOCAL(ia_main_label) 2620.word LOCAL(ia_r2_push) - datalabel LOCAL(ia_main_label) 2621.word 1 /* Invalid, just loop */ 2622.word LOCAL(ia_r3_ld) - datalabel LOCAL(ia_main_label) 2623.word LOCAL(ia_r3_push) - datalabel LOCAL(ia_main_label) 2624.word 1 /* Invalid, just loop */ 2625.word LOCAL(ia_r4_ld) - datalabel LOCAL(ia_main_label) 2626.word LOCAL(ia_r4_push) - datalabel LOCAL(ia_main_label) 2627.word 1 /* Invalid, just loop */ 2628.word LOCAL(ia_r5_ld) - datalabel LOCAL(ia_main_label) 2629.word LOCAL(ia_r5_push) - datalabel LOCAL(ia_main_label) 2630.word 1 /* Invalid, just loop */ 2631.word 1 /* Invalid, just loop */ 2632.word LOCAL(ia_r6_ld) - datalabel LOCAL(ia_main_label) 2633.word LOCAL(ia_r6_push) - datalabel LOCAL(ia_main_label) 2634.word 1 /* Invalid, just loop */ 2635.word 1 /* Invalid, just loop */ 2636.word LOCAL(ia_r7_ld) - datalabel LOCAL(ia_main_label) 2637.word LOCAL(ia_r7_push) - datalabel LOCAL(ia_main_label) 2638.word 1 /* Invalid, just loop */ 2639.word 1 /* Invalid, just loop */ 2640.word LOCAL(ia_r8_ld) - datalabel LOCAL(ia_main_label) 2641.word LOCAL(ia_r8_push) - datalabel LOCAL(ia_main_label) 2642.word 1 /* Invalid, just loop */ 2643.word 1 /* Invalid, just loop */ 2644.word LOCAL(ia_r9_ld) - datalabel LOCAL(ia_main_label) 2645.word LOCAL(ia_r9_push) - datalabel LOCAL(ia_main_label) 2646.word LOCAL(ia_push_seq) - datalabel LOCAL(ia_main_label) 2647.word LOCAL(ia_push_seq) - datalabel LOCAL(ia_main_label) 2648.word LOCAL(ia_r9_push) - datalabel LOCAL(ia_main_label) 2649.word LOCAL(ia_return) - datalabel LOCAL(ia_main_label) 2650.word LOCAL(ia_return) - datalabel LOCAL(ia_main_label) 2651 .mode SHmedia 2652 .section .text..SHmedia32, "ax" 2653 .align 2 2654 2655 /* This function stores 64-bit general-purpose registers back in 2656 the stack, and loads the address in which each register 2657 was stored into itself. The lower 32 bits of r17 hold the address 2658 to begin storing, and the upper 32 bits of r17 hold the cookie. 2659 Its execution time is linear on the 2660 number of registers that actually have to be copied, and it is 2661 optimized for structures larger than 64 bits, as opposed to 2662 individual `long long' arguments. See sh.h for details on the 2663 actual bit pattern. */ 2664 2665 .global GLOBAL(GCC_shcompact_incoming_args) 2666 FUNC(GLOBAL(GCC_shcompact_incoming_args)) 2667GLOBAL(GCC_shcompact_incoming_args): 2668 ptabs/l r18, tr0 /* Prepare to return. */ 2669 shlri r17, 32, r0 /* Load the cookie. */ 2670 movi ((datalabel LOCAL(ia_main_table) - 31 * 2) >> 16) & 65535, r43 2671 pt/l LOCAL(ia_loop), tr1 2672 add.l r17, r63, r17 2673 shori ((datalabel LOCAL(ia_main_table) - 31 * 2)) & 65535, r43 2674LOCAL(ia_loop): 2675 nsb r0, r36 2676 shlli r36, 1, r37 2677 ldx.w r43, r37, r38 2678LOCAL(ia_main_label): 2679 ptrel/l r38, tr2 2680 blink tr2, r63 2681LOCAL(ia_r2_ld): /* Store r2 and load its address. */ 2682 movi 3, r38 2683 shlli r38, 29, r39 2684 and r0, r39, r40 2685 andc r0, r39, r0 2686 stx.q r17, r63, r2 2687 add.l r17, r63, r2 2688 addi.l r17, 8, r17 2689 beq/u r39, r40, tr1 2690LOCAL(ia_r3_ld): /* Store r3 and load its address. */ 2691 movi 3, r38 2692 shlli r38, 26, r39 2693 and r0, r39, r40 2694 andc r0, r39, r0 2695 stx.q r17, r63, r3 2696 add.l r17, r63, r3 2697 addi.l r17, 8, r17 2698 beq/u r39, r40, tr1 2699LOCAL(ia_r4_ld): /* Store r4 and load its address. */ 2700 movi 3, r38 2701 shlli r38, 23, r39 2702 and r0, r39, r40 2703 andc r0, r39, r0 2704 stx.q r17, r63, r4 2705 add.l r17, r63, r4 2706 addi.l r17, 8, r17 2707 beq/u r39, r40, tr1 2708LOCAL(ia_r5_ld): /* Store r5 and load its address. */ 2709 movi 3, r38 2710 shlli r38, 20, r39 2711 and r0, r39, r40 2712 andc r0, r39, r0 2713 stx.q r17, r63, r5 2714 add.l r17, r63, r5 2715 addi.l r17, 8, r17 2716 beq/u r39, r40, tr1 2717LOCAL(ia_r6_ld): /* Store r6 and load its address. */ 2718 movi 3, r38 2719 shlli r38, 16, r39 2720 and r0, r39, r40 2721 andc r0, r39, r0 2722 stx.q r17, r63, r6 2723 add.l r17, r63, r6 2724 addi.l r17, 8, r17 2725 beq/u r39, r40, tr1 2726LOCAL(ia_r7_ld): /* Store r7 and load its address. */ 2727 movi 3 << 12, r39 2728 and r0, r39, r40 2729 andc r0, r39, r0 2730 stx.q r17, r63, r7 2731 add.l r17, r63, r7 2732 addi.l r17, 8, r17 2733 beq/u r39, r40, tr1 2734LOCAL(ia_r8_ld): /* Store r8 and load its address. */ 2735 movi 3 << 8, r39 2736 and r0, r39, r40 2737 andc r0, r39, r0 2738 stx.q r17, r63, r8 2739 add.l r17, r63, r8 2740 addi.l r17, 8, r17 2741 beq/u r39, r40, tr1 2742LOCAL(ia_r9_ld): /* Store r9 and load its address. */ 2743 stx.q r17, r63, r9 2744 add.l r17, r63, r9 2745 blink tr0, r63 2746LOCAL(ia_r2_push): /* Push r2 onto the stack. */ 2747 movi 1, r38 2748 shlli r38, 29, r39 2749 andc r0, r39, r0 2750 stx.q r17, r63, r2 2751 addi.l r17, 8, r17 2752 blink tr1, r63 2753LOCAL(ia_r3_push): /* Push r3 onto the stack. */ 2754 movi 1, r38 2755 shlli r38, 26, r39 2756 andc r0, r39, r0 2757 stx.q r17, r63, r3 2758 addi.l r17, 8, r17 2759 blink tr1, r63 2760LOCAL(ia_r4_push): /* Push r4 onto the stack. */ 2761 movi 1, r38 2762 shlli r38, 23, r39 2763 andc r0, r39, r0 2764 stx.q r17, r63, r4 2765 addi.l r17, 8, r17 2766 blink tr1, r63 2767LOCAL(ia_r5_push): /* Push r5 onto the stack. */ 2768 movi 1, r38 2769 shlli r38, 20, r39 2770 andc r0, r39, r0 2771 stx.q r17, r63, r5 2772 addi.l r17, 8, r17 2773 blink tr1, r63 2774LOCAL(ia_r6_push): /* Push r6 onto the stack. */ 2775 movi 1, r38 2776 shlli r38, 16, r39 2777 andc r0, r39, r0 2778 stx.q r17, r63, r6 2779 addi.l r17, 8, r17 2780 blink tr1, r63 2781LOCAL(ia_r7_push): /* Push r7 onto the stack. */ 2782 movi 1 << 12, r39 2783 andc r0, r39, r0 2784 stx.q r17, r63, r7 2785 addi.l r17, 8, r17 2786 blink tr1, r63 2787LOCAL(ia_r8_push): /* Push r8 onto the stack. */ 2788 movi 1 << 8, r39 2789 andc r0, r39, r0 2790 stx.q r17, r63, r8 2791 addi.l r17, 8, r17 2792 blink tr1, r63 2793LOCAL(ia_push_seq): /* Push a sequence of registers onto the stack. */ 2794 andi r0, 7 << 1, r38 2795 movi (LOCAL(ia_end_of_push_seq) >> 16) & 65535, r40 2796 shlli r38, 2, r39 2797 shori LOCAL(ia_end_of_push_seq) & 65535, r40 2798 sub.l r40, r39, r41 2799 ptabs/l r41, tr2 2800 blink tr2, r63 2801LOCAL(ia_stack_of_push_seq): /* Beginning of push sequence. */ 2802 stx.q r17, r63, r3 2803 addi.l r17, 8, r17 2804 stx.q r17, r63, r4 2805 addi.l r17, 8, r17 2806 stx.q r17, r63, r5 2807 addi.l r17, 8, r17 2808 stx.q r17, r63, r6 2809 addi.l r17, 8, r17 2810 stx.q r17, r63, r7 2811 addi.l r17, 8, r17 2812 stx.q r17, r63, r8 2813 addi.l r17, 8, r17 2814LOCAL(ia_r9_push): /* Push r9 onto the stack. */ 2815 stx.q r17, r63, r9 2816LOCAL(ia_return): /* Return. */ 2817 blink tr0, r63 2818LOCAL(ia_end_of_push_seq): /* Label used to compute the first push instruction. */ 2819 ENDFUNC(GLOBAL(GCC_shcompact_incoming_args)) 2820#endif /* L_shcompact_incoming_args */ 2821#endif 2822#if __SH5__ 2823#ifdef L_nested_trampoline 2824#if __SH5__ == 32 2825 .section .text..SHmedia32,"ax" 2826#else 2827 .text 2828#endif 2829 .align 3 /* It is copied in units of 8 bytes in SHmedia mode. */ 2830 .global GLOBAL(GCC_nested_trampoline) 2831 HIDDEN_FUNC(GLOBAL(GCC_nested_trampoline)) 2832GLOBAL(GCC_nested_trampoline): 2833 .mode SHmedia 2834 ptrel/u r63, tr0 2835 gettr tr0, r0 2836#if __SH5__ == 64 2837 ld.q r0, 24, r1 2838#else 2839 ld.l r0, 24, r1 2840#endif 2841 ptabs/l r1, tr1 2842#if __SH5__ == 64 2843 ld.q r0, 32, r1 2844#else 2845 ld.l r0, 28, r1 2846#endif 2847 blink tr1, r63 2848 2849 ENDFUNC(GLOBAL(GCC_nested_trampoline)) 2850#endif /* L_nested_trampoline */ 2851#endif /* __SH5__ */ 2852#if __SH5__ == 32 2853#ifdef L_push_pop_shmedia_regs 2854 .section .text..SHmedia32,"ax" 2855 .mode SHmedia 2856 .align 2 2857#ifndef __SH4_NOFPU__ 2858 .global GLOBAL(GCC_push_shmedia_regs) 2859 FUNC(GLOBAL(GCC_push_shmedia_regs)) 2860GLOBAL(GCC_push_shmedia_regs): 2861 addi.l r15, -14*8, r15 2862 fst.d r15, 13*8, dr62 2863 fst.d r15, 12*8, dr60 2864 fst.d r15, 11*8, dr58 2865 fst.d r15, 10*8, dr56 2866 fst.d r15, 9*8, dr54 2867 fst.d r15, 8*8, dr52 2868 fst.d r15, 7*8, dr50 2869 fst.d r15, 6*8, dr48 2870 fst.d r15, 5*8, dr46 2871 fst.d r15, 4*8, dr44 2872 fst.d r15, 3*8, dr42 2873 fst.d r15, 2*8, dr40 2874 fst.d r15, 1*8, dr38 2875 fst.d r15, 0*8, dr36 2876#else /* ! __SH4_NOFPU__ */ 2877 .global GLOBAL(GCC_push_shmedia_regs_nofpu) 2878 FUNC(GLOBAL(GCC_push_shmedia_regs_nofpu)) 2879GLOBAL(GCC_push_shmedia_regs_nofpu): 2880#endif /* ! __SH4_NOFPU__ */ 2881 ptabs/l r18, tr0 2882 addi.l r15, -27*8, r15 2883 gettr tr7, r62 2884 gettr tr6, r61 2885 gettr tr5, r60 2886 st.q r15, 26*8, r62 2887 st.q r15, 25*8, r61 2888 st.q r15, 24*8, r60 2889 st.q r15, 23*8, r59 2890 st.q r15, 22*8, r58 2891 st.q r15, 21*8, r57 2892 st.q r15, 20*8, r56 2893 st.q r15, 19*8, r55 2894 st.q r15, 18*8, r54 2895 st.q r15, 17*8, r53 2896 st.q r15, 16*8, r52 2897 st.q r15, 15*8, r51 2898 st.q r15, 14*8, r50 2899 st.q r15, 13*8, r49 2900 st.q r15, 12*8, r48 2901 st.q r15, 11*8, r47 2902 st.q r15, 10*8, r46 2903 st.q r15, 9*8, r45 2904 st.q r15, 8*8, r44 2905 st.q r15, 7*8, r35 2906 st.q r15, 6*8, r34 2907 st.q r15, 5*8, r33 2908 st.q r15, 4*8, r32 2909 st.q r15, 3*8, r31 2910 st.q r15, 2*8, r30 2911 st.q r15, 1*8, r29 2912 st.q r15, 0*8, r28 2913 blink tr0, r63 2914#ifndef __SH4_NOFPU__ 2915 ENDFUNC(GLOBAL(GCC_push_shmedia_regs)) 2916#else 2917 ENDFUNC(GLOBAL(GCC_push_shmedia_regs_nofpu)) 2918#endif 2919#ifndef __SH4_NOFPU__ 2920 .global GLOBAL(GCC_pop_shmedia_regs) 2921 FUNC(GLOBAL(GCC_pop_shmedia_regs)) 2922GLOBAL(GCC_pop_shmedia_regs): 2923 pt .L0, tr1 2924 movi 41*8, r0 2925 fld.d r15, 40*8, dr62 2926 fld.d r15, 39*8, dr60 2927 fld.d r15, 38*8, dr58 2928 fld.d r15, 37*8, dr56 2929 fld.d r15, 36*8, dr54 2930 fld.d r15, 35*8, dr52 2931 fld.d r15, 34*8, dr50 2932 fld.d r15, 33*8, dr48 2933 fld.d r15, 32*8, dr46 2934 fld.d r15, 31*8, dr44 2935 fld.d r15, 30*8, dr42 2936 fld.d r15, 29*8, dr40 2937 fld.d r15, 28*8, dr38 2938 fld.d r15, 27*8, dr36 2939 blink tr1, r63 2940#else /* ! __SH4_NOFPU__ */ 2941 .global GLOBAL(GCC_pop_shmedia_regs_nofpu) 2942 FUNC(GLOBAL(GCC_pop_shmedia_regs_nofpu)) 2943GLOBAL(GCC_pop_shmedia_regs_nofpu): 2944#endif /* ! __SH4_NOFPU__ */ 2945 movi 27*8, r0 2946.L0: 2947 ptabs r18, tr0 2948 ld.q r15, 26*8, r62 2949 ld.q r15, 25*8, r61 2950 ld.q r15, 24*8, r60 2951 ptabs r62, tr7 2952 ptabs r61, tr6 2953 ptabs r60, tr5 2954 ld.q r15, 23*8, r59 2955 ld.q r15, 22*8, r58 2956 ld.q r15, 21*8, r57 2957 ld.q r15, 20*8, r56 2958 ld.q r15, 19*8, r55 2959 ld.q r15, 18*8, r54 2960 ld.q r15, 17*8, r53 2961 ld.q r15, 16*8, r52 2962 ld.q r15, 15*8, r51 2963 ld.q r15, 14*8, r50 2964 ld.q r15, 13*8, r49 2965 ld.q r15, 12*8, r48 2966 ld.q r15, 11*8, r47 2967 ld.q r15, 10*8, r46 2968 ld.q r15, 9*8, r45 2969 ld.q r15, 8*8, r44 2970 ld.q r15, 7*8, r35 2971 ld.q r15, 6*8, r34 2972 ld.q r15, 5*8, r33 2973 ld.q r15, 4*8, r32 2974 ld.q r15, 3*8, r31 2975 ld.q r15, 2*8, r30 2976 ld.q r15, 1*8, r29 2977 ld.q r15, 0*8, r28 2978 add.l r15, r0, r15 2979 blink tr0, r63 2980 2981#ifndef __SH4_NOFPU__ 2982 ENDFUNC(GLOBAL(GCC_pop_shmedia_regs)) 2983#else 2984 ENDFUNC(GLOBAL(GCC_pop_shmedia_regs_nofpu)) 2985#endif 2986#endif /* __SH5__ == 32 */ 2987#endif /* L_push_pop_shmedia_regs */ 2988 2989#if __SH5__ 2990#ifdef L_div_table 2991#if defined(__pic__) && defined(__SHMEDIA__) 2992 .global GLOBAL(sdivsi3) 2993 FUNC(GLOBAL(sdivsi3)) 2994#if __SH5__ == 32 2995 .section .text..SHmedia32,"ax" 2996#else 2997 .text 2998#endif 2999#if 0 3000/* ??? FIXME: Presumably due to a linker bug, exporting data symbols 3001 in a text section does not work (at least for shared libraries): 3002 the linker sets the LSB of the address as if this was SHmedia code. */ 3003#define TEXT_DATA_BUG 3004#endif 3005 .align 2 3006 // inputs: r4,r5 3007 // clobbered: r1,r18,r19,r20,r21,r25,tr0 3008 // result in r0 3009 .global GLOBAL(sdivsi3) 3010GLOBAL(sdivsi3): 3011#ifdef TEXT_DATA_BUG 3012 ptb datalabel Local_div_table,tr0 3013#else 3014 ptb GLOBAL(div_table_internal),tr0 3015#endif 3016 nsb r5, r1 3017 shlld r5, r1, r25 // normalize; [-2 ..1, 1..2) in s2.62 3018 shari r25, 58, r21 // extract 5(6) bit index (s2.4 with hole -1..1) 3019 /* bubble */ 3020 gettr tr0,r20 3021 ldx.ub r20, r21, r19 // u0.8 3022 shari r25, 32, r25 // normalize to s2.30 3023 shlli r21, 1, r21 3024 muls.l r25, r19, r19 // s2.38 3025 ldx.w r20, r21, r21 // s2.14 3026 ptabs r18, tr0 3027 shari r19, 24, r19 // truncate to s2.14 3028 sub r21, r19, r19 // some 11 bit inverse in s1.14 3029 muls.l r19, r19, r21 // u0.28 3030 sub r63, r1, r1 3031 addi r1, 92, r1 3032 muls.l r25, r21, r18 // s2.58 3033 shlli r19, 45, r19 // multiply by two and convert to s2.58 3034 /* bubble */ 3035 sub r19, r18, r18 3036 shari r18, 28, r18 // some 22 bit inverse in s1.30 3037 muls.l r18, r25, r0 // s2.60 3038 muls.l r18, r4, r25 // s32.30 3039 /* bubble */ 3040 shari r0, 16, r19 // s-16.44 3041 muls.l r19, r18, r19 // s-16.74 3042 shari r25, 63, r0 3043 shari r4, 14, r18 // s19.-14 3044 shari r19, 30, r19 // s-16.44 3045 muls.l r19, r18, r19 // s15.30 3046 xor r21, r0, r21 // You could also use the constant 1 << 27. 3047 add r21, r25, r21 3048 sub r21, r19, r21 3049 shard r21, r1, r21 3050 sub r21, r0, r0 3051 blink tr0, r63 3052 ENDFUNC(GLOBAL(sdivsi3)) 3053/* This table has been generated by divtab.c . 3054Defects for bias -330: 3055 Max defect: 6.081536e-07 at -1.000000e+00 3056 Min defect: 2.849516e-08 at 1.030651e+00 3057 Max 2nd step defect: 9.606539e-12 at -1.000000e+00 3058 Min 2nd step defect: 0.000000e+00 at 0.000000e+00 3059 Defect at 1: 1.238659e-07 3060 Defect at -2: 1.061708e-07 */ 3061#else /* ! __pic__ || ! __SHMEDIA__ */ 3062 .section .rodata 3063#endif /* __pic__ */ 3064#if defined(TEXT_DATA_BUG) && defined(__pic__) && defined(__SHMEDIA__) 3065 .balign 2 3066 .type Local_div_table,@object 3067 .size Local_div_table,128 3068/* negative division constants */ 3069 .word -16638 3070 .word -17135 3071 .word -17737 3072 .word -18433 3073 .word -19103 3074 .word -19751 3075 .word -20583 3076 .word -21383 3077 .word -22343 3078 .word -23353 3079 .word -24407 3080 .word -25582 3081 .word -26863 3082 .word -28382 3083 .word -29965 3084 .word -31800 3085/* negative division factors */ 3086 .byte 66 3087 .byte 70 3088 .byte 75 3089 .byte 81 3090 .byte 87 3091 .byte 93 3092 .byte 101 3093 .byte 109 3094 .byte 119 3095 .byte 130 3096 .byte 142 3097 .byte 156 3098 .byte 172 3099 .byte 192 3100 .byte 214 3101 .byte 241 3102 .skip 16 3103Local_div_table: 3104 .skip 16 3105/* positive division factors */ 3106 .byte 241 3107 .byte 214 3108 .byte 192 3109 .byte 172 3110 .byte 156 3111 .byte 142 3112 .byte 130 3113 .byte 119 3114 .byte 109 3115 .byte 101 3116 .byte 93 3117 .byte 87 3118 .byte 81 3119 .byte 75 3120 .byte 70 3121 .byte 66 3122/* positive division constants */ 3123 .word 31801 3124 .word 29966 3125 .word 28383 3126 .word 26864 3127 .word 25583 3128 .word 24408 3129 .word 23354 3130 .word 22344 3131 .word 21384 3132 .word 20584 3133 .word 19752 3134 .word 19104 3135 .word 18434 3136 .word 17738 3137 .word 17136 3138 .word 16639 3139 .section .rodata 3140#endif /* TEXT_DATA_BUG */ 3141 .balign 2 3142 .type GLOBAL(div_table),@object 3143 .size GLOBAL(div_table),128 3144/* negative division constants */ 3145 .word -16638 3146 .word -17135 3147 .word -17737 3148 .word -18433 3149 .word -19103 3150 .word -19751 3151 .word -20583 3152 .word -21383 3153 .word -22343 3154 .word -23353 3155 .word -24407 3156 .word -25582 3157 .word -26863 3158 .word -28382 3159 .word -29965 3160 .word -31800 3161/* negative division factors */ 3162 .byte 66 3163 .byte 70 3164 .byte 75 3165 .byte 81 3166 .byte 87 3167 .byte 93 3168 .byte 101 3169 .byte 109 3170 .byte 119 3171 .byte 130 3172 .byte 142 3173 .byte 156 3174 .byte 172 3175 .byte 192 3176 .byte 214 3177 .byte 241 3178 .skip 16 3179 .global GLOBAL(div_table) 3180GLOBAL(div_table): 3181 HIDDEN_ALIAS(div_table_internal,div_table) 3182 .skip 16 3183/* positive division factors */ 3184 .byte 241 3185 .byte 214 3186 .byte 192 3187 .byte 172 3188 .byte 156 3189 .byte 142 3190 .byte 130 3191 .byte 119 3192 .byte 109 3193 .byte 101 3194 .byte 93 3195 .byte 87 3196 .byte 81 3197 .byte 75 3198 .byte 70 3199 .byte 66 3200/* positive division constants */ 3201 .word 31801 3202 .word 29966 3203 .word 28383 3204 .word 26864 3205 .word 25583 3206 .word 24408 3207 .word 23354 3208 .word 22344 3209 .word 21384 3210 .word 20584 3211 .word 19752 3212 .word 19104 3213 .word 18434 3214 .word 17738 3215 .word 17136 3216 .word 16639 3217#endif /* L_div_table */ 3218#endif /* __SH5__ */ 3219 3220#ifdef L_udiv_qrnnd_16 3221#if !__SHMEDIA__ 3222 HIDDEN_FUNC(GLOBAL(udiv_qrnnd_16)) 3223 /* r0: rn r1: qn */ /* r0: n1 r4: n0 r5: d r6: d1 */ /* r2: __m */ 3224 /* n1 < d, but n1 might be larger than d1. */ 3225 .global GLOBAL(udiv_qrnnd_16) 3226 .balign 8 3227GLOBAL(udiv_qrnnd_16): 3228 div0u 3229 cmp/hi r6,r0 3230 bt .Lots 3231 .rept 16 3232 div1 r6,r0 3233 .endr 3234 extu.w r0,r1 3235 bt 0f 3236 add r6,r0 32370: rotcl r1 3238 mulu.w r1,r5 3239 xtrct r4,r0 3240 swap.w r0,r0 3241 sts macl,r2 3242 cmp/hs r2,r0 3243 sub r2,r0 3244 bt 0f 3245 addc r5,r0 3246 add #-1,r1 3247 bt 0f 32481: add #-1,r1 3249 rts 3250 add r5,r0 3251 .balign 8 3252.Lots: 3253 sub r5,r0 3254 swap.w r4,r1 3255 xtrct r0,r1 3256 clrt 3257 mov r1,r0 3258 addc r5,r0 3259 mov #-1,r1 3260 SL1(bf, 1b, 3261 shlr16 r1) 32620: rts 3263 nop 3264 ENDFUNC(GLOBAL(udiv_qrnnd_16)) 3265#endif /* !__SHMEDIA__ */ 3266#endif /* L_udiv_qrnnd_16 */ 3267