1dnl HP-PA 2.0 64-bit mpn_addmul_1 -- Multiply a limb vector with a limb and 2dnl add the result to a second limb vector. 3 4dnl Copyright 1998, 1999, 2000, 2002, 2003 Free Software Foundation, Inc. 5 6dnl This file is part of the GNU MP Library. 7 8dnl The GNU MP Library is free software; you can redistribute it and/or modify 9dnl it under the terms of the GNU Lesser General Public License as published 10dnl by the Free Software Foundation; either version 3 of the License, or (at 11dnl your option) any later version. 12 13dnl The GNU MP Library is distributed in the hope that it will be useful, but 14dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 16dnl License for more details. 17 18dnl You should have received a copy of the GNU Lesser General Public License 19dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. 20 21include(`../config.m4') 22 23C cycles/limb 24C 8000,8200: 7 25C 8500,8600,8700: 6.375 26 27C The feed-in and wind-down code has not yet been scheduled. Many cycles 28C could be saved there per call. 29 30C DESCRIPTION: 31C The main loop "BIG" is 4-way unrolled, mainly to allow 32C effective use of ADD,DC. Delays in moving data via the cache from the FP 33C registers to the IU registers, have demanded a deep software pipeline, and 34C a lot of stack slots for partial products in flight. 35C 36C CODE STRUCTURE: 37C save-some-registers 38C do 0, 1, 2, or 3 limbs 39C if done, restore-some-regs and return 40C save-many-regs 41C do 4, 8, ... limb 42C restore-all-regs 43 44C STACK LAYOUT: 45C HP-PA stack grows upwards. We could allocate 8 fewer slots by using the 46C slots marked FREE, as well as some slots in the caller's "frame marker". 47C 48C -00 <- r30 49C -08 FREE 50C -10 tmp 51C -18 tmp 52C -20 tmp 53C -28 tmp 54C -30 tmp 55C -38 tmp 56C -40 tmp 57C -48 tmp 58C -50 tmp 59C -58 tmp 60C -60 tmp 61C -68 tmp 62C -70 tmp 63C -78 tmp 64C -80 tmp 65C -88 tmp 66C -90 FREE 67C -98 FREE 68C -a0 FREE 69C -a8 FREE 70C -b0 r13 71C -b8 r12 72C -c0 r11 73C -c8 r10 74C -d0 r8 75C -d8 r8 76C -e0 r7 77C -e8 r6 78C -f0 r5 79C -f8 r4 80C -100 r3 81C Previous frame: 82C [unused area] 83C -38/-138 vlimb home slot. For 2.0N, the vlimb arg will arrive here. 84 85 86include(`../config.m4') 87 88C INPUT PARAMETERS: 89define(`rp',`%r26') C 90define(`up',`%r25') C 91define(`n',`%r24') C 92define(`vlimb',`%r23') C 93 94define(`climb',`%r23') C 95 96ifdef(`HAVE_ABI_2_0w', 97` .level 2.0w 98',` .level 2.0 99') 100PROLOGUE(mpn_addmul_1) 101 102ifdef(`HAVE_ABI_2_0w', 103` std vlimb, -0x38(%r30) C store vlimb into "home" slot 104') 105 std,ma %r3, 0x100(%r30) 106 std %r4, -0xf8(%r30) 107 std %r5, -0xf0(%r30) 108 ldo 0(%r0), climb C clear climb 109 fldd -0x138(%r30), %fr8 C put vlimb in fp register 110 111define(`p032a1',`%r1') C 112define(`p032a2',`%r19') C 113 114define(`m032',`%r20') C 115define(`m096',`%r21') C 116 117define(`p000a',`%r22') C 118define(`p064a',`%r29') C 119 120define(`s000',`%r31') C 121 122define(`ma000',`%r4') C 123define(`ma064',`%r20') C 124 125define(`r000',`%r3') C 126 127 extrd,u n, 63, 2, %r5 128 cmpb,= %r5, %r0, L(BIG) 129 nop 130 131 fldd 0(up), %fr4 132 ldo 8(up), up 133 xmpyu %fr8R, %fr4L, %fr22 134 xmpyu %fr8L, %fr4R, %fr23 135 fstd %fr22, -0x78(%r30) C mid product to -0x78..-0x71 136 xmpyu %fr8R, %fr4R, %fr24 137 xmpyu %fr8L, %fr4L, %fr25 138 fstd %fr23, -0x70(%r30) C mid product to -0x70..-0x69 139 fstd %fr24, -0x80(%r30) C low product to -0x80..-0x79 140 addib,<> -1, %r5, L(two_or_more) 141 fstd %fr25, -0x68(%r30) C high product to -0x68..-0x61 142LDEF(one) 143 ldd -0x78(%r30), p032a1 144 ldd -0x70(%r30), p032a2 145 ldd -0x80(%r30), p000a 146 b L(0_one_out) 147 ldd -0x68(%r30), p064a 148 149LDEF(two_or_more) 150 fldd 0(up), %fr4 151 ldo 8(up), up 152 xmpyu %fr8R, %fr4L, %fr22 153 xmpyu %fr8L, %fr4R, %fr23 154 ldd -0x78(%r30), p032a1 155 fstd %fr22, -0x78(%r30) C mid product to -0x78..-0x71 156 xmpyu %fr8R, %fr4R, %fr24 157 xmpyu %fr8L, %fr4L, %fr25 158 ldd -0x70(%r30), p032a2 159 fstd %fr23, -0x70(%r30) C mid product to -0x70..-0x69 160 ldd -0x80(%r30), p000a 161 fstd %fr24, -0x80(%r30) C low product to -0x80..-0x79 162 ldd -0x68(%r30), p064a 163 addib,<> -1, %r5, L(three_or_more) 164 fstd %fr25, -0x68(%r30) C high product to -0x68..-0x61 165LDEF(two) 166 add p032a1, p032a2, m032 167 add,dc %r0, %r0, m096 168 depd,z m032, 31, 32, ma000 169 extrd,u m032, 31, 32, ma064 170 ldd 0(rp), r000 171 b L(0_two_out) 172 depd m096, 31, 32, ma064 173 174LDEF(three_or_more) 175 fldd 0(up), %fr4 176 add p032a1, p032a2, m032 177 add,dc %r0, %r0, m096 178 depd,z m032, 31, 32, ma000 179 extrd,u m032, 31, 32, ma064 180 ldd 0(rp), r000 181C addib,= -1, %r5, L(0_out) 182 depd m096, 31, 32, ma064 183LDEF(loop0) 184C xmpyu %fr8R, %fr4L, %fr22 185C xmpyu %fr8L, %fr4R, %fr23 186C ldd -0x78(%r30), p032a1 187C fstd %fr22, -0x78(%r30) C mid product to -0x78..-0x71 188C 189C xmpyu %fr8R, %fr4R, %fr24 190C xmpyu %fr8L, %fr4L, %fr25 191C ldd -0x70(%r30), p032a2 192C fstd %fr23, -0x70(%r30) C mid product to -0x70..-0x69 193C 194C ldo 8(rp), rp 195C add climb, p000a, s000 196C ldd -0x80(%r30), p000a 197C fstd %fr24, -0x80(%r30) C low product to -0x80..-0x79 198C 199C add,dc p064a, %r0, climb 200C ldo 8(up), up 201C ldd -0x68(%r30), p064a 202C fstd %fr25, -0x68(%r30) C high product to -0x68..-0x61 203C 204C add ma000, s000, s000 205C add,dc ma064, climb, climb 206C fldd 0(up), %fr4 207C 208C add r000, s000, s000 209C add,dc %r0, climb, climb 210C std s000, -8(rp) 211C 212C add p032a1, p032a2, m032 213C add,dc %r0, %r0, m096 214C 215C depd,z m032, 31, 32, ma000 216C extrd,u m032, 31, 32, ma064 217C ldd 0(rp), r000 218C addib,<> -1, %r5, L(loop0) 219C depd m096, 31, 32, ma064 220LDEF(0_out) 221 ldo 8(up), up 222 xmpyu %fr8R, %fr4L, %fr22 223 xmpyu %fr8L, %fr4R, %fr23 224 ldd -0x78(%r30), p032a1 225 fstd %fr22, -0x78(%r30) C mid product to -0x78..-0x71 226 xmpyu %fr8R, %fr4R, %fr24 227 xmpyu %fr8L, %fr4L, %fr25 228 ldd -0x70(%r30), p032a2 229 fstd %fr23, -0x70(%r30) C mid product to -0x70..-0x69 230 ldo 8(rp), rp 231 add climb, p000a, s000 232 ldd -0x80(%r30), p000a 233 fstd %fr24, -0x80(%r30) C low product to -0x80..-0x79 234 add,dc p064a, %r0, climb 235 ldd -0x68(%r30), p064a 236 fstd %fr25, -0x68(%r30) C high product to -0x68..-0x61 237 add ma000, s000, s000 238 add,dc ma064, climb, climb 239 add r000, s000, s000 240 add,dc %r0, climb, climb 241 std s000, -8(rp) 242 add p032a1, p032a2, m032 243 add,dc %r0, %r0, m096 244 depd,z m032, 31, 32, ma000 245 extrd,u m032, 31, 32, ma064 246 ldd 0(rp), r000 247 depd m096, 31, 32, ma064 248LDEF(0_two_out) 249 ldd -0x78(%r30), p032a1 250 ldd -0x70(%r30), p032a2 251 ldo 8(rp), rp 252 add climb, p000a, s000 253 ldd -0x80(%r30), p000a 254 add,dc p064a, %r0, climb 255 ldd -0x68(%r30), p064a 256 add ma000, s000, s000 257 add,dc ma064, climb, climb 258 add r000, s000, s000 259 add,dc %r0, climb, climb 260 std s000, -8(rp) 261LDEF(0_one_out) 262 add p032a1, p032a2, m032 263 add,dc %r0, %r0, m096 264 depd,z m032, 31, 32, ma000 265 extrd,u m032, 31, 32, ma064 266 ldd 0(rp), r000 267 depd m096, 31, 32, ma064 268 269 add climb, p000a, s000 270 add,dc p064a, %r0, climb 271 add ma000, s000, s000 272 add,dc ma064, climb, climb 273 add r000, s000, s000 274 add,dc %r0, climb, climb 275 std s000, 0(rp) 276 277 cmpib,>= 4, n, L(done) 278 ldo 8(rp), rp 279 280C 4-way unrolled code. 281 282LDEF(BIG) 283 284define(`p032a1',`%r1') C 285define(`p032a2',`%r19') C 286define(`p096b1',`%r20') C 287define(`p096b2',`%r21') C 288define(`p160c1',`%r22') C 289define(`p160c2',`%r29') C 290define(`p224d1',`%r31') C 291define(`p224d2',`%r3') C 292 C 293define(`m032',`%r4') C 294define(`m096',`%r5') C 295define(`m160',`%r6') C 296define(`m224',`%r7') C 297define(`m288',`%r8') C 298 C 299define(`p000a',`%r1') C 300define(`p064a',`%r19') C 301define(`p064b',`%r20') C 302define(`p128b',`%r21') C 303define(`p128c',`%r22') C 304define(`p192c',`%r29') C 305define(`p192d',`%r31') C 306define(`p256d',`%r3') C 307 C 308define(`s000',`%r10') C 309define(`s064',`%r11') C 310define(`s128',`%r12') C 311define(`s192',`%r13') C 312 C 313define(`ma000',`%r9') C 314define(`ma064',`%r4') C 315define(`ma128',`%r5') C 316define(`ma192',`%r6') C 317define(`ma256',`%r7') C 318 C 319define(`r000',`%r1') C 320define(`r064',`%r19') C 321define(`r128',`%r20') C 322define(`r192',`%r21') C 323 324 std %r6, -0xe8(%r30) 325 std %r7, -0xe0(%r30) 326 std %r8, -0xd8(%r30) 327 std %r9, -0xd0(%r30) 328 std %r10, -0xc8(%r30) 329 std %r11, -0xc0(%r30) 330 std %r12, -0xb8(%r30) 331 std %r13, -0xb0(%r30) 332 333ifdef(`HAVE_ABI_2_0w', 334` extrd,u n, 61, 62, n C right shift 2 335',` extrd,u n, 61, 30, n C right shift 2, zero extend 336') 337 338LDEF(4_or_more) 339 fldd 0(up), %fr4 340 fldd 8(up), %fr5 341 fldd 16(up), %fr6 342 fldd 24(up), %fr7 343 xmpyu %fr8R, %fr4L, %fr22 344 xmpyu %fr8L, %fr4R, %fr23 345 xmpyu %fr8R, %fr5L, %fr24 346 xmpyu %fr8L, %fr5R, %fr25 347 xmpyu %fr8R, %fr6L, %fr26 348 xmpyu %fr8L, %fr6R, %fr27 349 fstd %fr22, -0x78(%r30) C mid product to -0x78..-0x71 350 xmpyu %fr8R, %fr7L, %fr28 351 xmpyu %fr8L, %fr7R, %fr29 352 fstd %fr23, -0x70(%r30) C mid product to -0x70..-0x69 353 xmpyu %fr8R, %fr4R, %fr30 354 xmpyu %fr8L, %fr4L, %fr31 355 fstd %fr24, -0x38(%r30) C mid product to -0x38..-0x31 356 xmpyu %fr8R, %fr5R, %fr22 357 xmpyu %fr8L, %fr5L, %fr23 358 fstd %fr25, -0x30(%r30) C mid product to -0x30..-0x29 359 xmpyu %fr8R, %fr6R, %fr24 360 xmpyu %fr8L, %fr6L, %fr25 361 fstd %fr26, -0x58(%r30) C mid product to -0x58..-0x51 362 xmpyu %fr8R, %fr7R, %fr26 363 fstd %fr27, -0x50(%r30) C mid product to -0x50..-0x49 364 addib,<> -1, n, L(8_or_more) 365 xmpyu %fr8L, %fr7L, %fr27 366 fstd %fr28, -0x18(%r30) C mid product to -0x18..-0x11 367 fstd %fr29, -0x10(%r30) C mid product to -0x10..-0x09 368 fstd %fr30, -0x80(%r30) C low product to -0x80..-0x79 369 fstd %fr31, -0x68(%r30) C high product to -0x68..-0x61 370 fstd %fr22, -0x40(%r30) C low product to -0x40..-0x39 371 fstd %fr23, -0x28(%r30) C high product to -0x28..-0x21 372 fstd %fr24, -0x60(%r30) C low product to -0x60..-0x59 373 fstd %fr25, -0x48(%r30) C high product to -0x48..-0x41 374 fstd %fr26, -0x20(%r30) C low product to -0x20..-0x19 375 fstd %fr27, -0x88(%r30) C high product to -0x88..-0x81 376 ldd -0x78(%r30), p032a1 377 ldd -0x70(%r30), p032a2 378 ldd -0x38(%r30), p096b1 379 ldd -0x30(%r30), p096b2 380 ldd -0x58(%r30), p160c1 381 ldd -0x50(%r30), p160c2 382 ldd -0x18(%r30), p224d1 383 ldd -0x10(%r30), p224d2 384 b L(end1) 385 nop 386 387LDEF(8_or_more) 388 fstd %fr28, -0x18(%r30) C mid product to -0x18..-0x11 389 fstd %fr29, -0x10(%r30) C mid product to -0x10..-0x09 390 ldo 32(up), up 391 fstd %fr30, -0x80(%r30) C low product to -0x80..-0x79 392 fstd %fr31, -0x68(%r30) C high product to -0x68..-0x61 393 fstd %fr22, -0x40(%r30) C low product to -0x40..-0x39 394 fstd %fr23, -0x28(%r30) C high product to -0x28..-0x21 395 fstd %fr24, -0x60(%r30) C low product to -0x60..-0x59 396 fstd %fr25, -0x48(%r30) C high product to -0x48..-0x41 397 fstd %fr26, -0x20(%r30) C low product to -0x20..-0x19 398 fstd %fr27, -0x88(%r30) C high product to -0x88..-0x81 399 fldd 0(up), %fr4 400 fldd 8(up), %fr5 401 fldd 16(up), %fr6 402 fldd 24(up), %fr7 403 xmpyu %fr8R, %fr4L, %fr22 404 ldd -0x78(%r30), p032a1 405 xmpyu %fr8L, %fr4R, %fr23 406 xmpyu %fr8R, %fr5L, %fr24 407 ldd -0x70(%r30), p032a2 408 xmpyu %fr8L, %fr5R, %fr25 409 xmpyu %fr8R, %fr6L, %fr26 410 ldd -0x38(%r30), p096b1 411 xmpyu %fr8L, %fr6R, %fr27 412 fstd %fr22, -0x78(%r30) C mid product to -0x78..-0x71 413 xmpyu %fr8R, %fr7L, %fr28 414 ldd -0x30(%r30), p096b2 415 xmpyu %fr8L, %fr7R, %fr29 416 fstd %fr23, -0x70(%r30) C mid product to -0x70..-0x69 417 xmpyu %fr8R, %fr4R, %fr30 418 ldd -0x58(%r30), p160c1 419 xmpyu %fr8L, %fr4L, %fr31 420 fstd %fr24, -0x38(%r30) C mid product to -0x38..-0x31 421 xmpyu %fr8R, %fr5R, %fr22 422 ldd -0x50(%r30), p160c2 423 xmpyu %fr8L, %fr5L, %fr23 424 fstd %fr25, -0x30(%r30) C mid product to -0x30..-0x29 425 xmpyu %fr8R, %fr6R, %fr24 426 ldd -0x18(%r30), p224d1 427 xmpyu %fr8L, %fr6L, %fr25 428 fstd %fr26, -0x58(%r30) C mid product to -0x58..-0x51 429 xmpyu %fr8R, %fr7R, %fr26 430 ldd -0x10(%r30), p224d2 431 fstd %fr27, -0x50(%r30) C mid product to -0x50..-0x49 432 addib,= -1, n, L(end2) 433 xmpyu %fr8L, %fr7L, %fr27 434LDEF(loop) 435 add p032a1, p032a2, m032 436 ldd -0x80(%r30), p000a 437 add,dc p096b1, p096b2, m096 438 fstd %fr28, -0x18(%r30) C mid product to -0x18..-0x11 439 440 add,dc p160c1, p160c2, m160 441 ldd -0x68(%r30), p064a 442 add,dc p224d1, p224d2, m224 443 fstd %fr29, -0x10(%r30) C mid product to -0x10..-0x09 444 445 add,dc %r0, %r0, m288 446 ldd -0x40(%r30), p064b 447 ldo 32(up), up 448 fstd %fr30, -0x80(%r30) C low product to -0x80..-0x79 449 450 depd,z m032, 31, 32, ma000 451 ldd -0x28(%r30), p128b 452 extrd,u m032, 31, 32, ma064 453 fstd %fr31, -0x68(%r30) C high product to -0x68..-0x61 454 455 depd m096, 31, 32, ma064 456 ldd -0x60(%r30), p128c 457 extrd,u m096, 31, 32, ma128 458 fstd %fr22, -0x40(%r30) C low product to -0x40..-0x39 459 460 depd m160, 31, 32, ma128 461 ldd -0x48(%r30), p192c 462 extrd,u m160, 31, 32, ma192 463 fstd %fr23, -0x28(%r30) C high product to -0x28..-0x21 464 465 depd m224, 31, 32, ma192 466 ldd -0x20(%r30), p192d 467 extrd,u m224, 31, 32, ma256 468 fstd %fr24, -0x60(%r30) C low product to -0x60..-0x59 469 470 depd m288, 31, 32, ma256 471 ldd -0x88(%r30), p256d 472 add climb, p000a, s000 473 fstd %fr25, -0x48(%r30) C high product to -0x48..-0x41 474 475 add,dc p064a, p064b, s064 476 ldd 0(rp), r000 477 add,dc p128b, p128c, s128 478 fstd %fr26, -0x20(%r30) C low product to -0x20..-0x19 479 480 add,dc p192c, p192d, s192 481 ldd 8(rp), r064 482 add,dc p256d, %r0, climb 483 fstd %fr27, -0x88(%r30) C high product to -0x88..-0x81 484 485 ldd 16(rp), r128 486 add ma000, s000, s000 C accum mid 0 487 ldd 24(rp), r192 488 add,dc ma064, s064, s064 C accum mid 1 489 490 add,dc ma128, s128, s128 C accum mid 2 491 fldd 0(up), %fr4 492 add,dc ma192, s192, s192 C accum mid 3 493 fldd 8(up), %fr5 494 495 add,dc ma256, climb, climb 496 fldd 16(up), %fr6 497 add r000, s000, s000 C accum rlimb 0 498 fldd 24(up), %fr7 499 500 add,dc r064, s064, s064 C accum rlimb 1 501 add,dc r128, s128, s128 C accum rlimb 2 502 std s000, 0(rp) 503 504 add,dc r192, s192, s192 C accum rlimb 3 505 add,dc %r0, climb, climb 506 std s064, 8(rp) 507 508 xmpyu %fr8R, %fr4L, %fr22 509 ldd -0x78(%r30), p032a1 510 xmpyu %fr8L, %fr4R, %fr23 511 std s128, 16(rp) 512 513 xmpyu %fr8R, %fr5L, %fr24 514 ldd -0x70(%r30), p032a2 515 xmpyu %fr8L, %fr5R, %fr25 516 std s192, 24(rp) 517 518 xmpyu %fr8R, %fr6L, %fr26 519 ldd -0x38(%r30), p096b1 520 xmpyu %fr8L, %fr6R, %fr27 521 fstd %fr22, -0x78(%r30) C mid product to -0x78..-0x71 522 523 xmpyu %fr8R, %fr7L, %fr28 524 ldd -0x30(%r30), p096b2 525 xmpyu %fr8L, %fr7R, %fr29 526 fstd %fr23, -0x70(%r30) C mid product to -0x70..-0x69 527 528 xmpyu %fr8R, %fr4R, %fr30 529 ldd -0x58(%r30), p160c1 530 xmpyu %fr8L, %fr4L, %fr31 531 fstd %fr24, -0x38(%r30) C mid product to -0x38..-0x31 532 533 xmpyu %fr8R, %fr5R, %fr22 534 ldd -0x50(%r30), p160c2 535 xmpyu %fr8L, %fr5L, %fr23 536 fstd %fr25, -0x30(%r30) C mid product to -0x30..-0x29 537 538 xmpyu %fr8R, %fr6R, %fr24 539 ldd -0x18(%r30), p224d1 540 xmpyu %fr8L, %fr6L, %fr25 541 fstd %fr26, -0x58(%r30) C mid product to -0x58..-0x51 542 543 xmpyu %fr8R, %fr7R, %fr26 544 ldd -0x10(%r30), p224d2 545 fstd %fr27, -0x50(%r30) C mid product to -0x50..-0x49 546 xmpyu %fr8L, %fr7L, %fr27 547 548 addib,<> -1, n, L(loop) 549 ldo 32(rp), rp 550 551LDEF(end2) 552 add p032a1, p032a2, m032 553 ldd -0x80(%r30), p000a 554 add,dc p096b1, p096b2, m096 555 fstd %fr28, -0x18(%r30) C mid product to -0x18..-0x11 556 add,dc p160c1, p160c2, m160 557 ldd -0x68(%r30), p064a 558 add,dc p224d1, p224d2, m224 559 fstd %fr29, -0x10(%r30) C mid product to -0x10..-0x09 560 add,dc %r0, %r0, m288 561 ldd -0x40(%r30), p064b 562 fstd %fr30, -0x80(%r30) C low product to -0x80..-0x79 563 depd,z m032, 31, 32, ma000 564 ldd -0x28(%r30), p128b 565 extrd,u m032, 31, 32, ma064 566 fstd %fr31, -0x68(%r30) C high product to -0x68..-0x61 567 depd m096, 31, 32, ma064 568 ldd -0x60(%r30), p128c 569 extrd,u m096, 31, 32, ma128 570 fstd %fr22, -0x40(%r30) C low product to -0x40..-0x39 571 depd m160, 31, 32, ma128 572 ldd -0x48(%r30), p192c 573 extrd,u m160, 31, 32, ma192 574 fstd %fr23, -0x28(%r30) C high product to -0x28..-0x21 575 depd m224, 31, 32, ma192 576 ldd -0x20(%r30), p192d 577 extrd,u m224, 31, 32, ma256 578 fstd %fr24, -0x60(%r30) C low product to -0x60..-0x59 579 depd m288, 31, 32, ma256 580 ldd -0x88(%r30), p256d 581 add climb, p000a, s000 582 fstd %fr25, -0x48(%r30) C high product to -0x48..-0x41 583 add,dc p064a, p064b, s064 584 ldd 0(rp), r000 585 add,dc p128b, p128c, s128 586 fstd %fr26, -0x20(%r30) C low product to -0x20..-0x19 587 add,dc p192c, p192d, s192 588 ldd 8(rp), r064 589 add,dc p256d, %r0, climb 590 fstd %fr27, -0x88(%r30) C high product to -0x88..-0x81 591 ldd 16(rp), r128 592 add ma000, s000, s000 C accum mid 0 593 ldd 24(rp), r192 594 add,dc ma064, s064, s064 C accum mid 1 595 add,dc ma128, s128, s128 C accum mid 2 596 add,dc ma192, s192, s192 C accum mid 3 597 add,dc ma256, climb, climb 598 add r000, s000, s000 C accum rlimb 0 599 add,dc r064, s064, s064 C accum rlimb 1 600 add,dc r128, s128, s128 C accum rlimb 2 601 std s000, 0(rp) 602 add,dc r192, s192, s192 C accum rlimb 3 603 add,dc %r0, climb, climb 604 std s064, 8(rp) 605 ldd -0x78(%r30), p032a1 606 std s128, 16(rp) 607 ldd -0x70(%r30), p032a2 608 std s192, 24(rp) 609 ldd -0x38(%r30), p096b1 610 ldd -0x30(%r30), p096b2 611 ldd -0x58(%r30), p160c1 612 ldd -0x50(%r30), p160c2 613 ldd -0x18(%r30), p224d1 614 ldd -0x10(%r30), p224d2 615 ldo 32(rp), rp 616 617LDEF(end1) 618 add p032a1, p032a2, m032 619 ldd -0x80(%r30), p000a 620 add,dc p096b1, p096b2, m096 621 add,dc p160c1, p160c2, m160 622 ldd -0x68(%r30), p064a 623 add,dc p224d1, p224d2, m224 624 add,dc %r0, %r0, m288 625 ldd -0x40(%r30), p064b 626 depd,z m032, 31, 32, ma000 627 ldd -0x28(%r30), p128b 628 extrd,u m032, 31, 32, ma064 629 depd m096, 31, 32, ma064 630 ldd -0x60(%r30), p128c 631 extrd,u m096, 31, 32, ma128 632 depd m160, 31, 32, ma128 633 ldd -0x48(%r30), p192c 634 extrd,u m160, 31, 32, ma192 635 depd m224, 31, 32, ma192 636 ldd -0x20(%r30), p192d 637 extrd,u m224, 31, 32, ma256 638 depd m288, 31, 32, ma256 639 ldd -0x88(%r30), p256d 640 add climb, p000a, s000 641 add,dc p064a, p064b, s064 642 ldd 0(rp), r000 643 add,dc p128b, p128c, s128 644 add,dc p192c, p192d, s192 645 ldd 8(rp), r064 646 add,dc p256d, %r0, climb 647 ldd 16(rp), r128 648 add ma000, s000, s000 C accum mid 0 649 ldd 24(rp), r192 650 add,dc ma064, s064, s064 C accum mid 1 651 add,dc ma128, s128, s128 C accum mid 2 652 add,dc ma192, s192, s192 C accum mid 3 653 add,dc ma256, climb, climb 654 add r000, s000, s000 C accum rlimb 0 655 add,dc r064, s064, s064 C accum rlimb 1 656 add,dc r128, s128, s128 C accum rlimb 2 657 std s000, 0(rp) 658 add,dc r192, s192, s192 C accum rlimb 3 659 add,dc %r0, climb, climb 660 std s064, 8(rp) 661 std s128, 16(rp) 662 std s192, 24(rp) 663 664 ldd -0xb0(%r30), %r13 665 ldd -0xb8(%r30), %r12 666 ldd -0xc0(%r30), %r11 667 ldd -0xc8(%r30), %r10 668 ldd -0xd0(%r30), %r9 669 ldd -0xd8(%r30), %r8 670 ldd -0xe0(%r30), %r7 671 ldd -0xe8(%r30), %r6 672LDEF(done) 673ifdef(`HAVE_ABI_2_0w', 674` copy climb, %r28 675',` extrd,u climb, 63, 32, %r29 676 extrd,u climb, 31, 32, %r28 677') 678 ldd -0xf0(%r30), %r5 679 ldd -0xf8(%r30), %r4 680 bve (%r2) 681 ldd,mb -0x100(%r30), %r3 682EPILOGUE(mpn_addmul_1) 683