1dnl HP-PA 2.0 64-bit mpn_mul_1 -- Multiply a limb vector with a limb and store 2dnl the result in a second limb vector. 3 4dnl Copyright 1998, 1999, 2000, 2002, 2003 Free Software Foundation, Inc. 5 6dnl This file is part of the GNU MP Library. 7 8dnl The GNU MP Library is free software; you can redistribute it and/or modify 9dnl it under the terms of the GNU Lesser General Public License as published 10dnl by the Free Software Foundation; either version 3 of the License, or (at 11dnl your option) any later version. 12 13dnl The GNU MP Library is distributed in the hope that it will be useful, but 14dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 16dnl License for more details. 17 18dnl You should have received a copy of the GNU Lesser General Public License 19dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. 20 21include(`../config.m4') 22 23C cycles/limb 24C 8000,8200: 6.5 25C 8500,8600,8700: 5.625 26 27C The feed-in and wind-down code has not yet been scheduled. Many cycles 28C could be saved there per call. 29 30C DESCRIPTION: 31C The main loop "BIG" is 4-way unrolled, mainly to allow 32C effective use of ADD,DC. Delays in moving data via the cache from the FP 33C registers to the IU registers, have demanded a deep software pipeline, and 34C a lot of stack slots for partial products in flight. 35C 36C CODE STRUCTURE: 37C save-some-registers 38C do 0, 1, 2, or 3 limbs 39C if done, restore-some-regs and return 40C save-many-regs 41C do 4, 8, ... limb 42C restore-all-regs 43 44C STACK LAYOUT: 45C HP-PA stack grows upwards. We could allocate 8 fewer slots by using the 46C slots marked FREE, as well as some slots in the caller's "frame marker". 47C 48C -00 <- r30 49C -08 FREE 50C -10 tmp 51C -18 tmp 52C -20 tmp 53C -28 tmp 54C -30 tmp 55C -38 tmp 56C -40 tmp 57C -48 tmp 58C -50 tmp 59C -58 tmp 60C -60 tmp 61C -68 tmp 62C -70 tmp 63C -78 tmp 64C -80 tmp 65C -88 tmp 66C -90 FREE 67C -98 FREE 68C -a0 FREE 69C -a8 FREE 70C -b0 r13 71C -b8 r12 72C -c0 r11 73C -c8 r10 74C -d0 r8 75C -d8 r8 76C -e0 r7 77C -e8 r6 78C -f0 r5 79C -f8 r4 80C -100 r3 81C Previous frame: 82C [unused area] 83C -38/-138 vlimb home slot. For 2.0N, the vlimb arg will arrive here. 84 85 86include(`../config.m4') 87 88C INPUT PARAMETERS: 89define(`rp',`%r26') C 90define(`up',`%r25') C 91define(`n',`%r24') C 92define(`vlimb',`%r23') C 93 94define(`climb',`%r23') C 95 96ifdef(`HAVE_ABI_2_0w', 97` .level 2.0w 98',` .level 2.0 99') 100PROLOGUE(mpn_mul_1) 101 102ifdef(`HAVE_ABI_2_0w', 103` std vlimb, -0x38(%r30) C store vlimb into "home" slot 104') 105 std,ma %r3, 0x100(%r30) 106 std %r4, -0xf8(%r30) 107 std %r5, -0xf0(%r30) 108 ldo 0(%r0), climb C clear climb 109 fldd -0x138(%r30), %fr8 C put vlimb in fp register 110 111define(`p032a1',`%r1') C 112define(`p032a2',`%r19') C 113 114define(`m032',`%r20') C 115define(`m096',`%r21') C 116 117define(`p000a',`%r22') C 118define(`p064a',`%r29') C 119 120define(`s000',`%r31') C 121 122define(`ma000',`%r4') C 123define(`ma064',`%r20') C 124 125C define(`r000',`%r3') C FIXME don't save r3 for n < 4. 126 127 extrd,u n, 63, 2, %r5 128 cmpb,= %r5, %r0, L(BIG) 129 nop 130 131 fldd 0(up), %fr4 132 ldo 8(up), up 133 xmpyu %fr8R, %fr4L, %fr22 134 xmpyu %fr8L, %fr4R, %fr23 135 fstd %fr22, -0x78(%r30) C mid product to -0x78..-0x71 136 xmpyu %fr8R, %fr4R, %fr24 137 xmpyu %fr8L, %fr4L, %fr25 138 fstd %fr23, -0x70(%r30) C mid product to -0x70..-0x69 139 fstd %fr24, -0x80(%r30) C low product to -0x80..-0x79 140 addib,<> -1, %r5, L(two_or_more) 141 fstd %fr25, -0x68(%r30) C high product to -0x68..-0x61 142LDEF(one) 143 ldd -0x78(%r30), p032a1 144 ldd -0x70(%r30), p032a2 145 ldd -0x80(%r30), p000a 146 b L(0_one_out) 147 ldd -0x68(%r30), p064a 148 149LDEF(two_or_more) 150 fldd 0(up), %fr4 151 ldo 8(up), up 152 xmpyu %fr8R, %fr4L, %fr22 153 xmpyu %fr8L, %fr4R, %fr23 154 ldd -0x78(%r30), p032a1 155 fstd %fr22, -0x78(%r30) C mid product to -0x78..-0x71 156 xmpyu %fr8R, %fr4R, %fr24 157 xmpyu %fr8L, %fr4L, %fr25 158 ldd -0x70(%r30), p032a2 159 fstd %fr23, -0x70(%r30) C mid product to -0x70..-0x69 160 ldd -0x80(%r30), p000a 161 fstd %fr24, -0x80(%r30) C low product to -0x80..-0x79 162 ldd -0x68(%r30), p064a 163 addib,<> -1, %r5, L(three_or_more) 164 fstd %fr25, -0x68(%r30) C high product to -0x68..-0x61 165LDEF(two) 166 add p032a1, p032a2, m032 167 add,dc %r0, %r0, m096 168 depd,z m032, 31, 32, ma000 169 extrd,u m032, 31, 32, ma064 170 b L(0_two_out) 171 depd m096, 31, 32, ma064 172 173LDEF(three_or_more) 174 fldd 0(up), %fr4 175 add p032a1, p032a2, m032 176 add,dc %r0, %r0, m096 177 depd,z m032, 31, 32, ma000 178 extrd,u m032, 31, 32, ma064 179C addib,= -1, %r5, L(0_out) 180 depd m096, 31, 32, ma064 181LDEF(loop0) 182C xmpyu %fr8R, %fr4L, %fr22 183C xmpyu %fr8L, %fr4R, %fr23 184C ldd -0x78(%r30), p032a1 185C fstd %fr22, -0x78(%r30) C mid product to -0x78..-0x71 186C 187C xmpyu %fr8R, %fr4R, %fr24 188C xmpyu %fr8L, %fr4L, %fr25 189C ldd -0x70(%r30), p032a2 190C fstd %fr23, -0x70(%r30) C mid product to -0x70..-0x69 191C 192C ldo 8(rp), rp 193C add climb, p000a, s000 194C ldd -0x80(%r30), p000a 195C fstd %fr24, -0x80(%r30) C low product to -0x80..-0x79 196C 197C add,dc p064a, %r0, climb 198C ldo 8(up), up 199C ldd -0x68(%r30), p064a 200C fstd %fr25, -0x68(%r30) C high product to -0x68..-0x61 201C 202C add ma000, s000, s000 203C add,dc ma064, climb, climb 204C fldd 0(up), %fr4 205C 206C std s000, -8(rp) 207C 208C add p032a1, p032a2, m032 209C add,dc %r0, %r0, m096 210C 211C depd,z m032, 31, 32, ma000 212C extrd,u m032, 31, 32, ma064 213C addib,<> -1, %r5, L(loop0) 214C depd m096, 31, 32, ma064 215LDEF(0_out) 216 ldo 8(up), up 217 xmpyu %fr8R, %fr4L, %fr22 218 xmpyu %fr8L, %fr4R, %fr23 219 ldd -0x78(%r30), p032a1 220 fstd %fr22, -0x78(%r30) C mid product to -0x78..-0x71 221 xmpyu %fr8R, %fr4R, %fr24 222 xmpyu %fr8L, %fr4L, %fr25 223 ldd -0x70(%r30), p032a2 224 fstd %fr23, -0x70(%r30) C mid product to -0x70..-0x69 225 ldo 8(rp), rp 226 add climb, p000a, s000 227 ldd -0x80(%r30), p000a 228 fstd %fr24, -0x80(%r30) C low product to -0x80..-0x79 229 add,dc p064a, %r0, climb 230 ldd -0x68(%r30), p064a 231 fstd %fr25, -0x68(%r30) C high product to -0x68..-0x61 232 add ma000, s000, s000 233 add,dc ma064, climb, climb 234 std s000, -8(rp) 235 add p032a1, p032a2, m032 236 add,dc %r0, %r0, m096 237 depd,z m032, 31, 32, ma000 238 extrd,u m032, 31, 32, ma064 239 depd m096, 31, 32, ma064 240LDEF(0_two_out) 241 ldd -0x78(%r30), p032a1 242 ldd -0x70(%r30), p032a2 243 ldo 8(rp), rp 244 add climb, p000a, s000 245 ldd -0x80(%r30), p000a 246 add,dc p064a, %r0, climb 247 ldd -0x68(%r30), p064a 248 add ma000, s000, s000 249 add,dc ma064, climb, climb 250 std s000, -8(rp) 251LDEF(0_one_out) 252 add p032a1, p032a2, m032 253 add,dc %r0, %r0, m096 254 depd,z m032, 31, 32, ma000 255 extrd,u m032, 31, 32, ma064 256 depd m096, 31, 32, ma064 257 258 add climb, p000a, s000 259 add,dc p064a, %r0, climb 260 add ma000, s000, s000 261 add,dc ma064, climb, climb 262 std s000, 0(rp) 263 264 cmpib,>= 4, n, L(done) 265 ldo 8(rp), rp 266 267C 4-way unrolled code. 268 269LDEF(BIG) 270 271define(`p032a1',`%r1') C 272define(`p032a2',`%r19') C 273define(`p096b1',`%r20') C 274define(`p096b2',`%r21') C 275define(`p160c1',`%r22') C 276define(`p160c2',`%r29') C 277define(`p224d1',`%r31') C 278define(`p224d2',`%r3') C 279 C 280define(`m032',`%r4') C 281define(`m096',`%r5') C 282define(`m160',`%r6') C 283define(`m224',`%r7') C 284define(`m288',`%r8') C 285 C 286define(`p000a',`%r1') C 287define(`p064a',`%r19') C 288define(`p064b',`%r20') C 289define(`p128b',`%r21') C 290define(`p128c',`%r22') C 291define(`p192c',`%r29') C 292define(`p192d',`%r31') C 293define(`p256d',`%r3') C 294 C 295define(`s000',`%r10') C 296define(`s064',`%r11') C 297define(`s128',`%r12') C 298define(`s192',`%r13') C 299 C 300define(`ma000',`%r9') C 301define(`ma064',`%r4') C 302define(`ma128',`%r5') C 303define(`ma192',`%r6') C 304define(`ma256',`%r7') C 305 306 std %r6, -0xe8(%r30) 307 std %r7, -0xe0(%r30) 308 std %r8, -0xd8(%r30) 309 std %r9, -0xd0(%r30) 310 std %r10, -0xc8(%r30) 311 std %r11, -0xc0(%r30) 312 std %r12, -0xb8(%r30) 313 std %r13, -0xb0(%r30) 314 315ifdef(`HAVE_ABI_2_0w', 316` extrd,u n, 61, 62, n C right shift 2 317',` extrd,u n, 61, 30, n C right shift 2, zero extend 318') 319 320LDEF(4_or_more) 321 fldd 0(up), %fr4 322 fldd 8(up), %fr5 323 fldd 16(up), %fr6 324 fldd 24(up), %fr7 325 xmpyu %fr8R, %fr4L, %fr22 326 xmpyu %fr8L, %fr4R, %fr23 327 xmpyu %fr8R, %fr5L, %fr24 328 xmpyu %fr8L, %fr5R, %fr25 329 xmpyu %fr8R, %fr6L, %fr26 330 xmpyu %fr8L, %fr6R, %fr27 331 fstd %fr22, -0x78(%r30) C mid product to -0x78..-0x71 332 xmpyu %fr8R, %fr7L, %fr28 333 xmpyu %fr8L, %fr7R, %fr29 334 fstd %fr23, -0x70(%r30) C mid product to -0x70..-0x69 335 xmpyu %fr8R, %fr4R, %fr30 336 xmpyu %fr8L, %fr4L, %fr31 337 fstd %fr24, -0x38(%r30) C mid product to -0x38..-0x31 338 xmpyu %fr8R, %fr5R, %fr22 339 xmpyu %fr8L, %fr5L, %fr23 340 fstd %fr25, -0x30(%r30) C mid product to -0x30..-0x29 341 xmpyu %fr8R, %fr6R, %fr24 342 xmpyu %fr8L, %fr6L, %fr25 343 fstd %fr26, -0x58(%r30) C mid product to -0x58..-0x51 344 xmpyu %fr8R, %fr7R, %fr26 345 fstd %fr27, -0x50(%r30) C mid product to -0x50..-0x49 346 addib,<> -1, n, L(8_or_more) 347 xmpyu %fr8L, %fr7L, %fr27 348 fstd %fr28, -0x18(%r30) C mid product to -0x18..-0x11 349 fstd %fr29, -0x10(%r30) C mid product to -0x10..-0x09 350 fstd %fr30, -0x80(%r30) C low product to -0x80..-0x79 351 fstd %fr31, -0x68(%r30) C high product to -0x68..-0x61 352 fstd %fr22, -0x40(%r30) C low product to -0x40..-0x39 353 fstd %fr23, -0x28(%r30) C high product to -0x28..-0x21 354 fstd %fr24, -0x60(%r30) C low product to -0x60..-0x59 355 fstd %fr25, -0x48(%r30) C high product to -0x48..-0x41 356 fstd %fr26, -0x20(%r30) C low product to -0x20..-0x19 357 fstd %fr27, -0x88(%r30) C high product to -0x88..-0x81 358 ldd -0x78(%r30), p032a1 359 ldd -0x70(%r30), p032a2 360 ldd -0x38(%r30), p096b1 361 ldd -0x30(%r30), p096b2 362 ldd -0x58(%r30), p160c1 363 ldd -0x50(%r30), p160c2 364 ldd -0x18(%r30), p224d1 365 ldd -0x10(%r30), p224d2 366 b L(end1) 367 nop 368 369LDEF(8_or_more) 370 fstd %fr28, -0x18(%r30) C mid product to -0x18..-0x11 371 fstd %fr29, -0x10(%r30) C mid product to -0x10..-0x09 372 ldo 32(up), up 373 fstd %fr30, -0x80(%r30) C low product to -0x80..-0x79 374 fstd %fr31, -0x68(%r30) C high product to -0x68..-0x61 375 fstd %fr22, -0x40(%r30) C low product to -0x40..-0x39 376 fstd %fr23, -0x28(%r30) C high product to -0x28..-0x21 377 fstd %fr24, -0x60(%r30) C low product to -0x60..-0x59 378 fstd %fr25, -0x48(%r30) C high product to -0x48..-0x41 379 fstd %fr26, -0x20(%r30) C low product to -0x20..-0x19 380 fstd %fr27, -0x88(%r30) C high product to -0x88..-0x81 381 fldd 0(up), %fr4 382 fldd 8(up), %fr5 383 fldd 16(up), %fr6 384 fldd 24(up), %fr7 385 xmpyu %fr8R, %fr4L, %fr22 386 ldd -0x78(%r30), p032a1 387 xmpyu %fr8L, %fr4R, %fr23 388 xmpyu %fr8R, %fr5L, %fr24 389 ldd -0x70(%r30), p032a2 390 xmpyu %fr8L, %fr5R, %fr25 391 xmpyu %fr8R, %fr6L, %fr26 392 ldd -0x38(%r30), p096b1 393 xmpyu %fr8L, %fr6R, %fr27 394 fstd %fr22, -0x78(%r30) C mid product to -0x78..-0x71 395 xmpyu %fr8R, %fr7L, %fr28 396 ldd -0x30(%r30), p096b2 397 xmpyu %fr8L, %fr7R, %fr29 398 fstd %fr23, -0x70(%r30) C mid product to -0x70..-0x69 399 xmpyu %fr8R, %fr4R, %fr30 400 ldd -0x58(%r30), p160c1 401 xmpyu %fr8L, %fr4L, %fr31 402 fstd %fr24, -0x38(%r30) C mid product to -0x38..-0x31 403 xmpyu %fr8R, %fr5R, %fr22 404 ldd -0x50(%r30), p160c2 405 xmpyu %fr8L, %fr5L, %fr23 406 fstd %fr25, -0x30(%r30) C mid product to -0x30..-0x29 407 xmpyu %fr8R, %fr6R, %fr24 408 ldd -0x18(%r30), p224d1 409 xmpyu %fr8L, %fr6L, %fr25 410 fstd %fr26, -0x58(%r30) C mid product to -0x58..-0x51 411 xmpyu %fr8R, %fr7R, %fr26 412 ldd -0x10(%r30), p224d2 413 fstd %fr27, -0x50(%r30) C mid product to -0x50..-0x49 414 addib,= -1, n, L(end2) 415 xmpyu %fr8L, %fr7L, %fr27 416LDEF(loop) 417 add p032a1, p032a2, m032 418 ldd -0x80(%r30), p000a 419 add,dc p096b1, p096b2, m096 420 fstd %fr28, -0x18(%r30) C mid product to -0x18..-0x11 421 422 add,dc p160c1, p160c2, m160 423 ldd -0x68(%r30), p064a 424 add,dc p224d1, p224d2, m224 425 fstd %fr29, -0x10(%r30) C mid product to -0x10..-0x09 426 427 add,dc %r0, %r0, m288 428 ldd -0x40(%r30), p064b 429 ldo 32(up), up 430 fstd %fr30, -0x80(%r30) C low product to -0x80..-0x79 431 432 depd,z m032, 31, 32, ma000 433 ldd -0x28(%r30), p128b 434 extrd,u m032, 31, 32, ma064 435 fstd %fr31, -0x68(%r30) C high product to -0x68..-0x61 436 437 depd m096, 31, 32, ma064 438 ldd -0x60(%r30), p128c 439 extrd,u m096, 31, 32, ma128 440 fstd %fr22, -0x40(%r30) C low product to -0x40..-0x39 441 442 depd m160, 31, 32, ma128 443 ldd -0x48(%r30), p192c 444 extrd,u m160, 31, 32, ma192 445 fstd %fr23, -0x28(%r30) C high product to -0x28..-0x21 446 447 depd m224, 31, 32, ma192 448 ldd -0x20(%r30), p192d 449 extrd,u m224, 31, 32, ma256 450 fstd %fr24, -0x60(%r30) C low product to -0x60..-0x59 451 452 depd m288, 31, 32, ma256 453 ldd -0x88(%r30), p256d 454 add climb, p000a, s000 455 fstd %fr25, -0x48(%r30) C high product to -0x48..-0x41 456 457 add,dc p064a, p064b, s064 458 add,dc p128b, p128c, s128 459 fstd %fr26, -0x20(%r30) C low product to -0x20..-0x19 460 461 add,dc p192c, p192d, s192 462 add,dc p256d, %r0, climb 463 fstd %fr27, -0x88(%r30) C high product to -0x88..-0x81 464 465 add ma000, s000, s000 C accum mid 0 466 fldd 0(up), %fr4 467 add,dc ma064, s064, s064 C accum mid 1 468 std s000, 0(rp) 469 470 add,dc ma128, s128, s128 C accum mid 2 471 fldd 8(up), %fr5 472 add,dc ma192, s192, s192 C accum mid 3 473 std s064, 8(rp) 474 475 add,dc ma256, climb, climb 476 fldd 16(up), %fr6 477 std s128, 16(rp) 478 479 xmpyu %fr8R, %fr4L, %fr22 480 ldd -0x78(%r30), p032a1 481 xmpyu %fr8L, %fr4R, %fr23 482 fldd 24(up), %fr7 483 484 xmpyu %fr8R, %fr5L, %fr24 485 ldd -0x70(%r30), p032a2 486 xmpyu %fr8L, %fr5R, %fr25 487 std s192, 24(rp) 488 489 xmpyu %fr8R, %fr6L, %fr26 490 ldd -0x38(%r30), p096b1 491 xmpyu %fr8L, %fr6R, %fr27 492 fstd %fr22, -0x78(%r30) C mid product to -0x78..-0x71 493 494 xmpyu %fr8R, %fr7L, %fr28 495 ldd -0x30(%r30), p096b2 496 xmpyu %fr8L, %fr7R, %fr29 497 fstd %fr23, -0x70(%r30) C mid product to -0x70..-0x69 498 499 xmpyu %fr8R, %fr4R, %fr30 500 ldd -0x58(%r30), p160c1 501 xmpyu %fr8L, %fr4L, %fr31 502 fstd %fr24, -0x38(%r30) C mid product to -0x38..-0x31 503 504 xmpyu %fr8R, %fr5R, %fr22 505 ldd -0x50(%r30), p160c2 506 xmpyu %fr8L, %fr5L, %fr23 507 fstd %fr25, -0x30(%r30) C mid product to -0x30..-0x29 508 509 xmpyu %fr8R, %fr6R, %fr24 510 ldd -0x18(%r30), p224d1 511 xmpyu %fr8L, %fr6L, %fr25 512 fstd %fr26, -0x58(%r30) C mid product to -0x58..-0x51 513 514 xmpyu %fr8R, %fr7R, %fr26 515 ldd -0x10(%r30), p224d2 516 fstd %fr27, -0x50(%r30) C mid product to -0x50..-0x49 517 xmpyu %fr8L, %fr7L, %fr27 518 519 addib,<> -1, n, L(loop) 520 ldo 32(rp), rp 521 522LDEF(end2) 523 add p032a1, p032a2, m032 524 ldd -0x80(%r30), p000a 525 add,dc p096b1, p096b2, m096 526 fstd %fr28, -0x18(%r30) C mid product to -0x18..-0x11 527 add,dc p160c1, p160c2, m160 528 ldd -0x68(%r30), p064a 529 add,dc p224d1, p224d2, m224 530 fstd %fr29, -0x10(%r30) C mid product to -0x10..-0x09 531 add,dc %r0, %r0, m288 532 ldd -0x40(%r30), p064b 533 fstd %fr30, -0x80(%r30) C low product to -0x80..-0x79 534 depd,z m032, 31, 32, ma000 535 ldd -0x28(%r30), p128b 536 extrd,u m032, 31, 32, ma064 537 fstd %fr31, -0x68(%r30) C high product to -0x68..-0x61 538 depd m096, 31, 32, ma064 539 ldd -0x60(%r30), p128c 540 extrd,u m096, 31, 32, ma128 541 fstd %fr22, -0x40(%r30) C low product to -0x40..-0x39 542 depd m160, 31, 32, ma128 543 ldd -0x48(%r30), p192c 544 extrd,u m160, 31, 32, ma192 545 fstd %fr23, -0x28(%r30) C high product to -0x28..-0x21 546 depd m224, 31, 32, ma192 547 ldd -0x20(%r30), p192d 548 extrd,u m224, 31, 32, ma256 549 fstd %fr24, -0x60(%r30) C low product to -0x60..-0x59 550 depd m288, 31, 32, ma256 551 ldd -0x88(%r30), p256d 552 add climb, p000a, s000 553 fstd %fr25, -0x48(%r30) C high product to -0x48..-0x41 554 add,dc p064a, p064b, s064 555 add,dc p128b, p128c, s128 556 fstd %fr26, -0x20(%r30) C low product to -0x20..-0x19 557 add,dc p192c, p192d, s192 558 add,dc p256d, %r0, climb 559 fstd %fr27, -0x88(%r30) C high product to -0x88..-0x81 560 add ma000, s000, s000 C accum mid 0 561 add,dc ma064, s064, s064 C accum mid 1 562 add,dc ma128, s128, s128 C accum mid 2 563 add,dc ma192, s192, s192 C accum mid 3 564 add,dc ma256, climb, climb 565 std s000, 0(rp) 566 std s064, 8(rp) 567 ldd -0x78(%r30), p032a1 568 std s128, 16(rp) 569 ldd -0x70(%r30), p032a2 570 std s192, 24(rp) 571 ldd -0x38(%r30), p096b1 572 ldd -0x30(%r30), p096b2 573 ldd -0x58(%r30), p160c1 574 ldd -0x50(%r30), p160c2 575 ldd -0x18(%r30), p224d1 576 ldd -0x10(%r30), p224d2 577 ldo 32(rp), rp 578 579LDEF(end1) 580 add p032a1, p032a2, m032 581 ldd -0x80(%r30), p000a 582 add,dc p096b1, p096b2, m096 583 add,dc p160c1, p160c2, m160 584 ldd -0x68(%r30), p064a 585 add,dc p224d1, p224d2, m224 586 add,dc %r0, %r0, m288 587 ldd -0x40(%r30), p064b 588 depd,z m032, 31, 32, ma000 589 ldd -0x28(%r30), p128b 590 extrd,u m032, 31, 32, ma064 591 depd m096, 31, 32, ma064 592 ldd -0x60(%r30), p128c 593 extrd,u m096, 31, 32, ma128 594 depd m160, 31, 32, ma128 595 ldd -0x48(%r30), p192c 596 extrd,u m160, 31, 32, ma192 597 depd m224, 31, 32, ma192 598 ldd -0x20(%r30), p192d 599 extrd,u m224, 31, 32, ma256 600 depd m288, 31, 32, ma256 601 ldd -0x88(%r30), p256d 602 add climb, p000a, s000 603 add,dc p064a, p064b, s064 604 add,dc p128b, p128c, s128 605 add,dc p192c, p192d, s192 606 add,dc p256d, %r0, climb 607 add ma000, s000, s000 C accum mid 0 608 add,dc ma064, s064, s064 C accum mid 1 609 add,dc ma128, s128, s128 C accum mid 2 610 add,dc ma192, s192, s192 C accum mid 3 611 add,dc ma256, climb, climb 612 std s000, 0(rp) 613 std s064, 8(rp) 614 std s128, 16(rp) 615 std s192, 24(rp) 616 617 ldd -0xb0(%r30), %r13 618 ldd -0xb8(%r30), %r12 619 ldd -0xc0(%r30), %r11 620 ldd -0xc8(%r30), %r10 621 ldd -0xd0(%r30), %r9 622 ldd -0xd8(%r30), %r8 623 ldd -0xe0(%r30), %r7 624 ldd -0xe8(%r30), %r6 625LDEF(done) 626ifdef(`HAVE_ABI_2_0w', 627` copy climb, %r28 628',` extrd,u climb, 63, 32, %r29 629 extrd,u climb, 31, 32, %r28 630') 631 ldd -0xf0(%r30), %r5 632 ldd -0xf8(%r30), %r4 633 bve (%r2) 634 ldd,mb -0x100(%r30), %r3 635EPILOGUE(mpn_mul_1) 636