1dnl HP-PA 2.0 64-bit mpn_addmul_1 -- Multiply a limb vector with a limb and 2dnl add the result to a second limb vector. 3 4dnl Copyright 1998-2000, 2002, 2003 Free Software Foundation, Inc. 5 6dnl This file is part of the GNU MP Library. 7dnl 8dnl The GNU MP Library is free software; you can redistribute it and/or modify 9dnl it under the terms of either: 10dnl 11dnl * the GNU Lesser General Public License as published by the Free 12dnl Software Foundation; either version 3 of the License, or (at your 13dnl option) any later version. 14dnl 15dnl or 16dnl 17dnl * the GNU General Public License as published by the Free Software 18dnl Foundation; either version 2 of the License, or (at your option) any 19dnl later version. 20dnl 21dnl or both in parallel, as here. 22dnl 23dnl The GNU MP Library is distributed in the hope that it will be useful, but 24dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 25dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 26dnl for more details. 27dnl 28dnl You should have received copies of the GNU General Public License and the 29dnl GNU Lesser General Public License along with the GNU MP Library. If not, 30dnl see https://www.gnu.org/licenses/. 31 32include(`../config.m4') 33 34C cycles/limb 35C 8000,8200: 7 36C 8500,8600,8700: 6.375 37 38C The feed-in and wind-down code has not yet been scheduled. Many cycles 39C could be saved there per call. 40 41C DESCRIPTION: 42C The main loop "BIG" is 4-way unrolled, mainly to allow 43C effective use of ADD,DC. Delays in moving data via the cache from the FP 44C registers to the IU registers, have demanded a deep software pipeline, and 45C a lot of stack slots for partial products in flight. 46C 47C CODE STRUCTURE: 48C save-some-registers 49C do 0, 1, 2, or 3 limbs 50C if done, restore-some-regs and return 51C save-many-regs 52C do 4, 8, ... limb 53C restore-all-regs 54 55C STACK LAYOUT: 56C HP-PA stack grows upwards. We could allocate 8 fewer slots by using the 57C slots marked FREE, as well as some slots in the caller's "frame marker". 58C 59C -00 <- r30 60C -08 FREE 61C -10 tmp 62C -18 tmp 63C -20 tmp 64C -28 tmp 65C -30 tmp 66C -38 tmp 67C -40 tmp 68C -48 tmp 69C -50 tmp 70C -58 tmp 71C -60 tmp 72C -68 tmp 73C -70 tmp 74C -78 tmp 75C -80 tmp 76C -88 tmp 77C -90 FREE 78C -98 FREE 79C -a0 FREE 80C -a8 FREE 81C -b0 r13 82C -b8 r12 83C -c0 r11 84C -c8 r10 85C -d0 r8 86C -d8 r8 87C -e0 r7 88C -e8 r6 89C -f0 r5 90C -f8 r4 91C -100 r3 92C Previous frame: 93C [unused area] 94C -38/-138 vlimb home slot. For 2.0N, the vlimb arg will arrive here. 95 96 97include(`../config.m4') 98 99C INPUT PARAMETERS: 100define(`rp',`%r26') C 101define(`up',`%r25') C 102define(`n',`%r24') C 103define(`vlimb',`%r23') C 104 105define(`climb',`%r23') C 106 107ifdef(`HAVE_ABI_2_0w', 108` .level 2.0w 109',` .level 2.0 110') 111PROLOGUE(mpn_addmul_1) 112 113ifdef(`HAVE_ABI_2_0w', 114` std vlimb, -0x38(%r30) C store vlimb into "home" slot 115') 116 std,ma %r3, 0x100(%r30) 117 std %r4, -0xf8(%r30) 118 std %r5, -0xf0(%r30) 119 ldo 0(%r0), climb C clear climb 120 fldd -0x138(%r30), %fr8 C put vlimb in fp register 121 122define(`p032a1',`%r1') C 123define(`p032a2',`%r19') C 124 125define(`m032',`%r20') C 126define(`m096',`%r21') C 127 128define(`p000a',`%r22') C 129define(`p064a',`%r29') C 130 131define(`s000',`%r31') C 132 133define(`ma000',`%r4') C 134define(`ma064',`%r20') C 135 136define(`r000',`%r3') C 137 138 extrd,u n, 63, 2, %r5 139 cmpb,= %r5, %r0, L(BIG) 140 nop 141 142 fldd 0(up), %fr4 143 ldo 8(up), up 144 xmpyu %fr8R, %fr4L, %fr22 145 xmpyu %fr8L, %fr4R, %fr23 146 fstd %fr22, -0x78(%r30) C mid product to -0x78..-0x71 147 xmpyu %fr8R, %fr4R, %fr24 148 xmpyu %fr8L, %fr4L, %fr25 149 fstd %fr23, -0x70(%r30) C mid product to -0x70..-0x69 150 fstd %fr24, -0x80(%r30) C low product to -0x80..-0x79 151 addib,<> -1, %r5, L(two_or_more) 152 fstd %fr25, -0x68(%r30) C high product to -0x68..-0x61 153LDEF(one) 154 ldd -0x78(%r30), p032a1 155 ldd -0x70(%r30), p032a2 156 ldd -0x80(%r30), p000a 157 b L(0_one_out) 158 ldd -0x68(%r30), p064a 159 160LDEF(two_or_more) 161 fldd 0(up), %fr4 162 ldo 8(up), up 163 xmpyu %fr8R, %fr4L, %fr22 164 xmpyu %fr8L, %fr4R, %fr23 165 ldd -0x78(%r30), p032a1 166 fstd %fr22, -0x78(%r30) C mid product to -0x78..-0x71 167 xmpyu %fr8R, %fr4R, %fr24 168 xmpyu %fr8L, %fr4L, %fr25 169 ldd -0x70(%r30), p032a2 170 fstd %fr23, -0x70(%r30) C mid product to -0x70..-0x69 171 ldd -0x80(%r30), p000a 172 fstd %fr24, -0x80(%r30) C low product to -0x80..-0x79 173 ldd -0x68(%r30), p064a 174 addib,<> -1, %r5, L(three_or_more) 175 fstd %fr25, -0x68(%r30) C high product to -0x68..-0x61 176LDEF(two) 177 add p032a1, p032a2, m032 178 add,dc %r0, %r0, m096 179 depd,z m032, 31, 32, ma000 180 extrd,u m032, 31, 32, ma064 181 ldd 0(rp), r000 182 b L(0_two_out) 183 depd m096, 31, 32, ma064 184 185LDEF(three_or_more) 186 fldd 0(up), %fr4 187 add p032a1, p032a2, m032 188 add,dc %r0, %r0, m096 189 depd,z m032, 31, 32, ma000 190 extrd,u m032, 31, 32, ma064 191 ldd 0(rp), r000 192C addib,= -1, %r5, L(0_out) 193 depd m096, 31, 32, ma064 194LDEF(loop0) 195C xmpyu %fr8R, %fr4L, %fr22 196C xmpyu %fr8L, %fr4R, %fr23 197C ldd -0x78(%r30), p032a1 198C fstd %fr22, -0x78(%r30) C mid product to -0x78..-0x71 199C 200C xmpyu %fr8R, %fr4R, %fr24 201C xmpyu %fr8L, %fr4L, %fr25 202C ldd -0x70(%r30), p032a2 203C fstd %fr23, -0x70(%r30) C mid product to -0x70..-0x69 204C 205C ldo 8(rp), rp 206C add climb, p000a, s000 207C ldd -0x80(%r30), p000a 208C fstd %fr24, -0x80(%r30) C low product to -0x80..-0x79 209C 210C add,dc p064a, %r0, climb 211C ldo 8(up), up 212C ldd -0x68(%r30), p064a 213C fstd %fr25, -0x68(%r30) C high product to -0x68..-0x61 214C 215C add ma000, s000, s000 216C add,dc ma064, climb, climb 217C fldd 0(up), %fr4 218C 219C add r000, s000, s000 220C add,dc %r0, climb, climb 221C std s000, -8(rp) 222C 223C add p032a1, p032a2, m032 224C add,dc %r0, %r0, m096 225C 226C depd,z m032, 31, 32, ma000 227C extrd,u m032, 31, 32, ma064 228C ldd 0(rp), r000 229C addib,<> -1, %r5, L(loop0) 230C depd m096, 31, 32, ma064 231LDEF(0_out) 232 ldo 8(up), up 233 xmpyu %fr8R, %fr4L, %fr22 234 xmpyu %fr8L, %fr4R, %fr23 235 ldd -0x78(%r30), p032a1 236 fstd %fr22, -0x78(%r30) C mid product to -0x78..-0x71 237 xmpyu %fr8R, %fr4R, %fr24 238 xmpyu %fr8L, %fr4L, %fr25 239 ldd -0x70(%r30), p032a2 240 fstd %fr23, -0x70(%r30) C mid product to -0x70..-0x69 241 ldo 8(rp), rp 242 add climb, p000a, s000 243 ldd -0x80(%r30), p000a 244 fstd %fr24, -0x80(%r30) C low product to -0x80..-0x79 245 add,dc p064a, %r0, climb 246 ldd -0x68(%r30), p064a 247 fstd %fr25, -0x68(%r30) C high product to -0x68..-0x61 248 add ma000, s000, s000 249 add,dc ma064, climb, climb 250 add r000, s000, s000 251 add,dc %r0, climb, climb 252 std s000, -8(rp) 253 add p032a1, p032a2, m032 254 add,dc %r0, %r0, m096 255 depd,z m032, 31, 32, ma000 256 extrd,u m032, 31, 32, ma064 257 ldd 0(rp), r000 258 depd m096, 31, 32, ma064 259LDEF(0_two_out) 260 ldd -0x78(%r30), p032a1 261 ldd -0x70(%r30), p032a2 262 ldo 8(rp), rp 263 add climb, p000a, s000 264 ldd -0x80(%r30), p000a 265 add,dc p064a, %r0, climb 266 ldd -0x68(%r30), p064a 267 add ma000, s000, s000 268 add,dc ma064, climb, climb 269 add r000, s000, s000 270 add,dc %r0, climb, climb 271 std s000, -8(rp) 272LDEF(0_one_out) 273 add p032a1, p032a2, m032 274 add,dc %r0, %r0, m096 275 depd,z m032, 31, 32, ma000 276 extrd,u m032, 31, 32, ma064 277 ldd 0(rp), r000 278 depd m096, 31, 32, ma064 279 280 add climb, p000a, s000 281 add,dc p064a, %r0, climb 282 add ma000, s000, s000 283 add,dc ma064, climb, climb 284 add r000, s000, s000 285 add,dc %r0, climb, climb 286 std s000, 0(rp) 287 288 cmpib,>= 4, n, L(done) 289 ldo 8(rp), rp 290 291C 4-way unrolled code. 292 293LDEF(BIG) 294 295define(`p032a1',`%r1') C 296define(`p032a2',`%r19') C 297define(`p096b1',`%r20') C 298define(`p096b2',`%r21') C 299define(`p160c1',`%r22') C 300define(`p160c2',`%r29') C 301define(`p224d1',`%r31') C 302define(`p224d2',`%r3') C 303 C 304define(`m032',`%r4') C 305define(`m096',`%r5') C 306define(`m160',`%r6') C 307define(`m224',`%r7') C 308define(`m288',`%r8') C 309 C 310define(`p000a',`%r1') C 311define(`p064a',`%r19') C 312define(`p064b',`%r20') C 313define(`p128b',`%r21') C 314define(`p128c',`%r22') C 315define(`p192c',`%r29') C 316define(`p192d',`%r31') C 317define(`p256d',`%r3') C 318 C 319define(`s000',`%r10') C 320define(`s064',`%r11') C 321define(`s128',`%r12') C 322define(`s192',`%r13') C 323 C 324define(`ma000',`%r9') C 325define(`ma064',`%r4') C 326define(`ma128',`%r5') C 327define(`ma192',`%r6') C 328define(`ma256',`%r7') C 329 C 330define(`r000',`%r1') C 331define(`r064',`%r19') C 332define(`r128',`%r20') C 333define(`r192',`%r21') C 334 335 std %r6, -0xe8(%r30) 336 std %r7, -0xe0(%r30) 337 std %r8, -0xd8(%r30) 338 std %r9, -0xd0(%r30) 339 std %r10, -0xc8(%r30) 340 std %r11, -0xc0(%r30) 341 std %r12, -0xb8(%r30) 342 std %r13, -0xb0(%r30) 343 344ifdef(`HAVE_ABI_2_0w', 345` extrd,u n, 61, 62, n C right shift 2 346',` extrd,u n, 61, 30, n C right shift 2, zero extend 347') 348 349LDEF(4_or_more) 350 fldd 0(up), %fr4 351 fldd 8(up), %fr5 352 fldd 16(up), %fr6 353 fldd 24(up), %fr7 354 xmpyu %fr8R, %fr4L, %fr22 355 xmpyu %fr8L, %fr4R, %fr23 356 xmpyu %fr8R, %fr5L, %fr24 357 xmpyu %fr8L, %fr5R, %fr25 358 xmpyu %fr8R, %fr6L, %fr26 359 xmpyu %fr8L, %fr6R, %fr27 360 fstd %fr22, -0x78(%r30) C mid product to -0x78..-0x71 361 xmpyu %fr8R, %fr7L, %fr28 362 xmpyu %fr8L, %fr7R, %fr29 363 fstd %fr23, -0x70(%r30) C mid product to -0x70..-0x69 364 xmpyu %fr8R, %fr4R, %fr30 365 xmpyu %fr8L, %fr4L, %fr31 366 fstd %fr24, -0x38(%r30) C mid product to -0x38..-0x31 367 xmpyu %fr8R, %fr5R, %fr22 368 xmpyu %fr8L, %fr5L, %fr23 369 fstd %fr25, -0x30(%r30) C mid product to -0x30..-0x29 370 xmpyu %fr8R, %fr6R, %fr24 371 xmpyu %fr8L, %fr6L, %fr25 372 fstd %fr26, -0x58(%r30) C mid product to -0x58..-0x51 373 xmpyu %fr8R, %fr7R, %fr26 374 fstd %fr27, -0x50(%r30) C mid product to -0x50..-0x49 375 addib,<> -1, n, L(8_or_more) 376 xmpyu %fr8L, %fr7L, %fr27 377 fstd %fr28, -0x18(%r30) C mid product to -0x18..-0x11 378 fstd %fr29, -0x10(%r30) C mid product to -0x10..-0x09 379 fstd %fr30, -0x80(%r30) C low product to -0x80..-0x79 380 fstd %fr31, -0x68(%r30) C high product to -0x68..-0x61 381 fstd %fr22, -0x40(%r30) C low product to -0x40..-0x39 382 fstd %fr23, -0x28(%r30) C high product to -0x28..-0x21 383 fstd %fr24, -0x60(%r30) C low product to -0x60..-0x59 384 fstd %fr25, -0x48(%r30) C high product to -0x48..-0x41 385 fstd %fr26, -0x20(%r30) C low product to -0x20..-0x19 386 fstd %fr27, -0x88(%r30) C high product to -0x88..-0x81 387 ldd -0x78(%r30), p032a1 388 ldd -0x70(%r30), p032a2 389 ldd -0x38(%r30), p096b1 390 ldd -0x30(%r30), p096b2 391 ldd -0x58(%r30), p160c1 392 ldd -0x50(%r30), p160c2 393 ldd -0x18(%r30), p224d1 394 ldd -0x10(%r30), p224d2 395 b L(end1) 396 nop 397 398LDEF(8_or_more) 399 fstd %fr28, -0x18(%r30) C mid product to -0x18..-0x11 400 fstd %fr29, -0x10(%r30) C mid product to -0x10..-0x09 401 ldo 32(up), up 402 fstd %fr30, -0x80(%r30) C low product to -0x80..-0x79 403 fstd %fr31, -0x68(%r30) C high product to -0x68..-0x61 404 fstd %fr22, -0x40(%r30) C low product to -0x40..-0x39 405 fstd %fr23, -0x28(%r30) C high product to -0x28..-0x21 406 fstd %fr24, -0x60(%r30) C low product to -0x60..-0x59 407 fstd %fr25, -0x48(%r30) C high product to -0x48..-0x41 408 fstd %fr26, -0x20(%r30) C low product to -0x20..-0x19 409 fstd %fr27, -0x88(%r30) C high product to -0x88..-0x81 410 fldd 0(up), %fr4 411 fldd 8(up), %fr5 412 fldd 16(up), %fr6 413 fldd 24(up), %fr7 414 xmpyu %fr8R, %fr4L, %fr22 415 ldd -0x78(%r30), p032a1 416 xmpyu %fr8L, %fr4R, %fr23 417 xmpyu %fr8R, %fr5L, %fr24 418 ldd -0x70(%r30), p032a2 419 xmpyu %fr8L, %fr5R, %fr25 420 xmpyu %fr8R, %fr6L, %fr26 421 ldd -0x38(%r30), p096b1 422 xmpyu %fr8L, %fr6R, %fr27 423 fstd %fr22, -0x78(%r30) C mid product to -0x78..-0x71 424 xmpyu %fr8R, %fr7L, %fr28 425 ldd -0x30(%r30), p096b2 426 xmpyu %fr8L, %fr7R, %fr29 427 fstd %fr23, -0x70(%r30) C mid product to -0x70..-0x69 428 xmpyu %fr8R, %fr4R, %fr30 429 ldd -0x58(%r30), p160c1 430 xmpyu %fr8L, %fr4L, %fr31 431 fstd %fr24, -0x38(%r30) C mid product to -0x38..-0x31 432 xmpyu %fr8R, %fr5R, %fr22 433 ldd -0x50(%r30), p160c2 434 xmpyu %fr8L, %fr5L, %fr23 435 fstd %fr25, -0x30(%r30) C mid product to -0x30..-0x29 436 xmpyu %fr8R, %fr6R, %fr24 437 ldd -0x18(%r30), p224d1 438 xmpyu %fr8L, %fr6L, %fr25 439 fstd %fr26, -0x58(%r30) C mid product to -0x58..-0x51 440 xmpyu %fr8R, %fr7R, %fr26 441 ldd -0x10(%r30), p224d2 442 fstd %fr27, -0x50(%r30) C mid product to -0x50..-0x49 443 addib,= -1, n, L(end2) 444 xmpyu %fr8L, %fr7L, %fr27 445LDEF(loop) 446 add p032a1, p032a2, m032 447 ldd -0x80(%r30), p000a 448 add,dc p096b1, p096b2, m096 449 fstd %fr28, -0x18(%r30) C mid product to -0x18..-0x11 450 451 add,dc p160c1, p160c2, m160 452 ldd -0x68(%r30), p064a 453 add,dc p224d1, p224d2, m224 454 fstd %fr29, -0x10(%r30) C mid product to -0x10..-0x09 455 456 add,dc %r0, %r0, m288 457 ldd -0x40(%r30), p064b 458 ldo 32(up), up 459 fstd %fr30, -0x80(%r30) C low product to -0x80..-0x79 460 461 depd,z m032, 31, 32, ma000 462 ldd -0x28(%r30), p128b 463 extrd,u m032, 31, 32, ma064 464 fstd %fr31, -0x68(%r30) C high product to -0x68..-0x61 465 466 depd m096, 31, 32, ma064 467 ldd -0x60(%r30), p128c 468 extrd,u m096, 31, 32, ma128 469 fstd %fr22, -0x40(%r30) C low product to -0x40..-0x39 470 471 depd m160, 31, 32, ma128 472 ldd -0x48(%r30), p192c 473 extrd,u m160, 31, 32, ma192 474 fstd %fr23, -0x28(%r30) C high product to -0x28..-0x21 475 476 depd m224, 31, 32, ma192 477 ldd -0x20(%r30), p192d 478 extrd,u m224, 31, 32, ma256 479 fstd %fr24, -0x60(%r30) C low product to -0x60..-0x59 480 481 depd m288, 31, 32, ma256 482 ldd -0x88(%r30), p256d 483 add climb, p000a, s000 484 fstd %fr25, -0x48(%r30) C high product to -0x48..-0x41 485 486 add,dc p064a, p064b, s064 487 ldd 0(rp), r000 488 add,dc p128b, p128c, s128 489 fstd %fr26, -0x20(%r30) C low product to -0x20..-0x19 490 491 add,dc p192c, p192d, s192 492 ldd 8(rp), r064 493 add,dc p256d, %r0, climb 494 fstd %fr27, -0x88(%r30) C high product to -0x88..-0x81 495 496 ldd 16(rp), r128 497 add ma000, s000, s000 C accum mid 0 498 ldd 24(rp), r192 499 add,dc ma064, s064, s064 C accum mid 1 500 501 add,dc ma128, s128, s128 C accum mid 2 502 fldd 0(up), %fr4 503 add,dc ma192, s192, s192 C accum mid 3 504 fldd 8(up), %fr5 505 506 add,dc ma256, climb, climb 507 fldd 16(up), %fr6 508 add r000, s000, s000 C accum rlimb 0 509 fldd 24(up), %fr7 510 511 add,dc r064, s064, s064 C accum rlimb 1 512 add,dc r128, s128, s128 C accum rlimb 2 513 std s000, 0(rp) 514 515 add,dc r192, s192, s192 C accum rlimb 3 516 add,dc %r0, climb, climb 517 std s064, 8(rp) 518 519 xmpyu %fr8R, %fr4L, %fr22 520 ldd -0x78(%r30), p032a1 521 xmpyu %fr8L, %fr4R, %fr23 522 std s128, 16(rp) 523 524 xmpyu %fr8R, %fr5L, %fr24 525 ldd -0x70(%r30), p032a2 526 xmpyu %fr8L, %fr5R, %fr25 527 std s192, 24(rp) 528 529 xmpyu %fr8R, %fr6L, %fr26 530 ldd -0x38(%r30), p096b1 531 xmpyu %fr8L, %fr6R, %fr27 532 fstd %fr22, -0x78(%r30) C mid product to -0x78..-0x71 533 534 xmpyu %fr8R, %fr7L, %fr28 535 ldd -0x30(%r30), p096b2 536 xmpyu %fr8L, %fr7R, %fr29 537 fstd %fr23, -0x70(%r30) C mid product to -0x70..-0x69 538 539 xmpyu %fr8R, %fr4R, %fr30 540 ldd -0x58(%r30), p160c1 541 xmpyu %fr8L, %fr4L, %fr31 542 fstd %fr24, -0x38(%r30) C mid product to -0x38..-0x31 543 544 xmpyu %fr8R, %fr5R, %fr22 545 ldd -0x50(%r30), p160c2 546 xmpyu %fr8L, %fr5L, %fr23 547 fstd %fr25, -0x30(%r30) C mid product to -0x30..-0x29 548 549 xmpyu %fr8R, %fr6R, %fr24 550 ldd -0x18(%r30), p224d1 551 xmpyu %fr8L, %fr6L, %fr25 552 fstd %fr26, -0x58(%r30) C mid product to -0x58..-0x51 553 554 xmpyu %fr8R, %fr7R, %fr26 555 ldd -0x10(%r30), p224d2 556 fstd %fr27, -0x50(%r30) C mid product to -0x50..-0x49 557 xmpyu %fr8L, %fr7L, %fr27 558 559 addib,<> -1, n, L(loop) 560 ldo 32(rp), rp 561 562LDEF(end2) 563 add p032a1, p032a2, m032 564 ldd -0x80(%r30), p000a 565 add,dc p096b1, p096b2, m096 566 fstd %fr28, -0x18(%r30) C mid product to -0x18..-0x11 567 add,dc p160c1, p160c2, m160 568 ldd -0x68(%r30), p064a 569 add,dc p224d1, p224d2, m224 570 fstd %fr29, -0x10(%r30) C mid product to -0x10..-0x09 571 add,dc %r0, %r0, m288 572 ldd -0x40(%r30), p064b 573 fstd %fr30, -0x80(%r30) C low product to -0x80..-0x79 574 depd,z m032, 31, 32, ma000 575 ldd -0x28(%r30), p128b 576 extrd,u m032, 31, 32, ma064 577 fstd %fr31, -0x68(%r30) C high product to -0x68..-0x61 578 depd m096, 31, 32, ma064 579 ldd -0x60(%r30), p128c 580 extrd,u m096, 31, 32, ma128 581 fstd %fr22, -0x40(%r30) C low product to -0x40..-0x39 582 depd m160, 31, 32, ma128 583 ldd -0x48(%r30), p192c 584 extrd,u m160, 31, 32, ma192 585 fstd %fr23, -0x28(%r30) C high product to -0x28..-0x21 586 depd m224, 31, 32, ma192 587 ldd -0x20(%r30), p192d 588 extrd,u m224, 31, 32, ma256 589 fstd %fr24, -0x60(%r30) C low product to -0x60..-0x59 590 depd m288, 31, 32, ma256 591 ldd -0x88(%r30), p256d 592 add climb, p000a, s000 593 fstd %fr25, -0x48(%r30) C high product to -0x48..-0x41 594 add,dc p064a, p064b, s064 595 ldd 0(rp), r000 596 add,dc p128b, p128c, s128 597 fstd %fr26, -0x20(%r30) C low product to -0x20..-0x19 598 add,dc p192c, p192d, s192 599 ldd 8(rp), r064 600 add,dc p256d, %r0, climb 601 fstd %fr27, -0x88(%r30) C high product to -0x88..-0x81 602 ldd 16(rp), r128 603 add ma000, s000, s000 C accum mid 0 604 ldd 24(rp), r192 605 add,dc ma064, s064, s064 C accum mid 1 606 add,dc ma128, s128, s128 C accum mid 2 607 add,dc ma192, s192, s192 C accum mid 3 608 add,dc ma256, climb, climb 609 add r000, s000, s000 C accum rlimb 0 610 add,dc r064, s064, s064 C accum rlimb 1 611 add,dc r128, s128, s128 C accum rlimb 2 612 std s000, 0(rp) 613 add,dc r192, s192, s192 C accum rlimb 3 614 add,dc %r0, climb, climb 615 std s064, 8(rp) 616 ldd -0x78(%r30), p032a1 617 std s128, 16(rp) 618 ldd -0x70(%r30), p032a2 619 std s192, 24(rp) 620 ldd -0x38(%r30), p096b1 621 ldd -0x30(%r30), p096b2 622 ldd -0x58(%r30), p160c1 623 ldd -0x50(%r30), p160c2 624 ldd -0x18(%r30), p224d1 625 ldd -0x10(%r30), p224d2 626 ldo 32(rp), rp 627 628LDEF(end1) 629 add p032a1, p032a2, m032 630 ldd -0x80(%r30), p000a 631 add,dc p096b1, p096b2, m096 632 add,dc p160c1, p160c2, m160 633 ldd -0x68(%r30), p064a 634 add,dc p224d1, p224d2, m224 635 add,dc %r0, %r0, m288 636 ldd -0x40(%r30), p064b 637 depd,z m032, 31, 32, ma000 638 ldd -0x28(%r30), p128b 639 extrd,u m032, 31, 32, ma064 640 depd m096, 31, 32, ma064 641 ldd -0x60(%r30), p128c 642 extrd,u m096, 31, 32, ma128 643 depd m160, 31, 32, ma128 644 ldd -0x48(%r30), p192c 645 extrd,u m160, 31, 32, ma192 646 depd m224, 31, 32, ma192 647 ldd -0x20(%r30), p192d 648 extrd,u m224, 31, 32, ma256 649 depd m288, 31, 32, ma256 650 ldd -0x88(%r30), p256d 651 add climb, p000a, s000 652 add,dc p064a, p064b, s064 653 ldd 0(rp), r000 654 add,dc p128b, p128c, s128 655 add,dc p192c, p192d, s192 656 ldd 8(rp), r064 657 add,dc p256d, %r0, climb 658 ldd 16(rp), r128 659 add ma000, s000, s000 C accum mid 0 660 ldd 24(rp), r192 661 add,dc ma064, s064, s064 C accum mid 1 662 add,dc ma128, s128, s128 C accum mid 2 663 add,dc ma192, s192, s192 C accum mid 3 664 add,dc ma256, climb, climb 665 add r000, s000, s000 C accum rlimb 0 666 add,dc r064, s064, s064 C accum rlimb 1 667 add,dc r128, s128, s128 C accum rlimb 2 668 std s000, 0(rp) 669 add,dc r192, s192, s192 C accum rlimb 3 670 add,dc %r0, climb, climb 671 std s064, 8(rp) 672 std s128, 16(rp) 673 std s192, 24(rp) 674 675 ldd -0xb0(%r30), %r13 676 ldd -0xb8(%r30), %r12 677 ldd -0xc0(%r30), %r11 678 ldd -0xc8(%r30), %r10 679 ldd -0xd0(%r30), %r9 680 ldd -0xd8(%r30), %r8 681 ldd -0xe0(%r30), %r7 682 ldd -0xe8(%r30), %r6 683LDEF(done) 684ifdef(`HAVE_ABI_2_0w', 685` copy climb, %r28 686',` extrd,u climb, 63, 32, %r29 687 extrd,u climb, 31, 32, %r28 688') 689 ldd -0xf0(%r30), %r5 690 ldd -0xf8(%r30), %r4 691 bve (%r2) 692 ldd,mb -0x100(%r30), %r3 693EPILOGUE(mpn_addmul_1) 694