1#include "mips_arch.h" 2 3#if defined(_MIPS_ARCH_MIPS64R6) 4# define ddivu(rs,rt) 5# define mfqt(rd,rs,rt) ddivu rd,rs,rt 6# define mfrm(rd,rs,rt) dmodu rd,rs,rt 7#elif defined(_MIPS_ARCH_MIPS32R6) 8# define divu(rs,rt) 9# define mfqt(rd,rs,rt) divu rd,rs,rt 10# define mfrm(rd,rs,rt) modu rd,rs,rt 11#else 12# define ddivu(rs,rt) ddivu $0,rs,rt 13# define mfqt(rd,rs,rt) mflo rd 14# define mfrm(rd,rs,rt) mfhi rd 15#endif 16 17.rdata 18.asciiz "mips3.s, Version 1.2" 19.asciiz "MIPS II/III/IV ISA artwork by Andy Polyakov <appro@fy.chalmers.se>" 20 21.text 22.set noat 23 24.align 5 25.globl bn_mul_add_words 26.ent bn_mul_add_words 27bn_mul_add_words: 28 .set noreorder 29 bgtz $6,bn_mul_add_words_internal 30 move $2,$0 31 jr $31 32 move $4,$2 33.end bn_mul_add_words 34 35.align 5 36.ent bn_mul_add_words_internal 37bn_mul_add_words_internal: 38 .set reorder 39 li $3,-4 40 and $8,$6,$3 41 beqz $8,.L_bn_mul_add_words_tail 42 43.L_bn_mul_add_words_loop: 44 ld $12,0($5) 45 dmultu ($12,$7) 46 ld $13,0($4) 47 ld $14,8($5) 48 ld $15,8($4) 49 ld $8,2*8($5) 50 ld $9,2*8($4) 51 daddu $13,$2 52 sltu $2,$13,$2 # All manuals say it "compares 32-bit 53 # values", but it seems to work fine 54 # even on 64-bit registers. 55 mflo ($1,$12,$7) 56 mfhi ($12,$12,$7) 57 daddu $13,$1 58 daddu $2,$12 59 dmultu ($14,$7) 60 sltu $1,$13,$1 61 sd $13,0($4) 62 daddu $2,$1 63 64 ld $10,3*8($5) 65 ld $11,3*8($4) 66 daddu $15,$2 67 sltu $2,$15,$2 68 mflo ($1,$14,$7) 69 mfhi ($14,$14,$7) 70 daddu $15,$1 71 daddu $2,$14 72 dmultu ($8,$7) 73 sltu $1,$15,$1 74 sd $15,8($4) 75 daddu $2,$1 76 77 subu $6,4 78 daddu $4,4*8 79 daddu $5,4*8 80 daddu $9,$2 81 sltu $2,$9,$2 82 mflo ($1,$8,$7) 83 mfhi ($8,$8,$7) 84 daddu $9,$1 85 daddu $2,$8 86 dmultu ($10,$7) 87 sltu $1,$9,$1 88 sd $9,-2*8($4) 89 daddu $2,$1 90 91 92 and $8,$6,$3 93 daddu $11,$2 94 sltu $2,$11,$2 95 mflo ($1,$10,$7) 96 mfhi ($10,$10,$7) 97 daddu $11,$1 98 daddu $2,$10 99 sltu $1,$11,$1 100 sd $11,-8($4) 101 .set noreorder 102 bgtz $8,.L_bn_mul_add_words_loop 103 daddu $2,$1 104 105 beqz $6,.L_bn_mul_add_words_return 106 nop 107 108.L_bn_mul_add_words_tail: 109 .set reorder 110 ld $12,0($5) 111 dmultu ($12,$7) 112 ld $13,0($4) 113 subu $6,1 114 daddu $13,$2 115 sltu $2,$13,$2 116 mflo ($1,$12,$7) 117 mfhi ($12,$12,$7) 118 daddu $13,$1 119 daddu $2,$12 120 sltu $1,$13,$1 121 sd $13,0($4) 122 daddu $2,$1 123 beqz $6,.L_bn_mul_add_words_return 124 125 ld $12,8($5) 126 dmultu ($12,$7) 127 ld $13,8($4) 128 subu $6,1 129 daddu $13,$2 130 sltu $2,$13,$2 131 mflo ($1,$12,$7) 132 mfhi ($12,$12,$7) 133 daddu $13,$1 134 daddu $2,$12 135 sltu $1,$13,$1 136 sd $13,8($4) 137 daddu $2,$1 138 beqz $6,.L_bn_mul_add_words_return 139 140 ld $12,2*8($5) 141 dmultu ($12,$7) 142 ld $13,2*8($4) 143 daddu $13,$2 144 sltu $2,$13,$2 145 mflo ($1,$12,$7) 146 mfhi ($12,$12,$7) 147 daddu $13,$1 148 daddu $2,$12 149 sltu $1,$13,$1 150 sd $13,2*8($4) 151 daddu $2,$1 152 153.L_bn_mul_add_words_return: 154 .set noreorder 155 jr $31 156 move $4,$2 157.end bn_mul_add_words_internal 158 159.align 5 160.globl bn_mul_words 161.ent bn_mul_words 162bn_mul_words: 163 .set noreorder 164 bgtz $6,bn_mul_words_internal 165 move $2,$0 166 jr $31 167 move $4,$2 168.end bn_mul_words 169 170.align 5 171.ent bn_mul_words_internal 172bn_mul_words_internal: 173 .set reorder 174 li $3,-4 175 and $8,$6,$3 176 beqz $8,.L_bn_mul_words_tail 177 178.L_bn_mul_words_loop: 179 ld $12,0($5) 180 dmultu ($12,$7) 181 ld $14,8($5) 182 ld $8,2*8($5) 183 ld $10,3*8($5) 184 mflo ($1,$12,$7) 185 mfhi ($12,$12,$7) 186 daddu $2,$1 187 sltu $13,$2,$1 188 dmultu ($14,$7) 189 sd $2,0($4) 190 daddu $2,$13,$12 191 192 subu $6,4 193 daddu $4,4*8 194 daddu $5,4*8 195 mflo ($1,$14,$7) 196 mfhi ($14,$14,$7) 197 daddu $2,$1 198 sltu $15,$2,$1 199 dmultu ($8,$7) 200 sd $2,-3*8($4) 201 daddu $2,$15,$14 202 203 mflo ($1,$8,$7) 204 mfhi ($8,$8,$7) 205 daddu $2,$1 206 sltu $9,$2,$1 207 dmultu ($10,$7) 208 sd $2,-2*8($4) 209 daddu $2,$9,$8 210 211 and $8,$6,$3 212 mflo ($1,$10,$7) 213 mfhi ($10,$10,$7) 214 daddu $2,$1 215 sltu $11,$2,$1 216 sd $2,-8($4) 217 .set noreorder 218 bgtz $8,.L_bn_mul_words_loop 219 daddu $2,$11,$10 220 221 beqz $6,.L_bn_mul_words_return 222 nop 223 224.L_bn_mul_words_tail: 225 .set reorder 226 ld $12,0($5) 227 dmultu ($12,$7) 228 subu $6,1 229 mflo ($1,$12,$7) 230 mfhi ($12,$12,$7) 231 daddu $2,$1 232 sltu $13,$2,$1 233 sd $2,0($4) 234 daddu $2,$13,$12 235 beqz $6,.L_bn_mul_words_return 236 237 ld $12,8($5) 238 dmultu ($12,$7) 239 subu $6,1 240 mflo ($1,$12,$7) 241 mfhi ($12,$12,$7) 242 daddu $2,$1 243 sltu $13,$2,$1 244 sd $2,8($4) 245 daddu $2,$13,$12 246 beqz $6,.L_bn_mul_words_return 247 248 ld $12,2*8($5) 249 dmultu ($12,$7) 250 mflo ($1,$12,$7) 251 mfhi ($12,$12,$7) 252 daddu $2,$1 253 sltu $13,$2,$1 254 sd $2,2*8($4) 255 daddu $2,$13,$12 256 257.L_bn_mul_words_return: 258 .set noreorder 259 jr $31 260 move $4,$2 261.end bn_mul_words_internal 262 263.align 5 264.globl bn_sqr_words 265.ent bn_sqr_words 266bn_sqr_words: 267 .set noreorder 268 bgtz $6,bn_sqr_words_internal 269 move $2,$0 270 jr $31 271 move $4,$2 272.end bn_sqr_words 273 274.align 5 275.ent bn_sqr_words_internal 276bn_sqr_words_internal: 277 .set reorder 278 li $3,-4 279 and $8,$6,$3 280 beqz $8,.L_bn_sqr_words_tail 281 282.L_bn_sqr_words_loop: 283 ld $12,0($5) 284 dmultu ($12,$12) 285 ld $14,8($5) 286 ld $8,2*8($5) 287 ld $10,3*8($5) 288 mflo ($13,$12,$12) 289 mfhi ($12,$12,$12) 290 sd $13,0($4) 291 sd $12,8($4) 292 293 dmultu ($14,$14) 294 subu $6,4 295 daddu $4,8*8 296 daddu $5,4*8 297 mflo ($15,$14,$14) 298 mfhi ($14,$14,$14) 299 sd $15,-6*8($4) 300 sd $14,-5*8($4) 301 302 dmultu ($8,$8) 303 mflo ($9,$8,$8) 304 mfhi ($8,$8,$8) 305 sd $9,-4*8($4) 306 sd $8,-3*8($4) 307 308 309 dmultu ($10,$10) 310 and $8,$6,$3 311 mflo ($11,$10,$10) 312 mfhi ($10,$10,$10) 313 sd $11,-2*8($4) 314 315 .set noreorder 316 bgtz $8,.L_bn_sqr_words_loop 317 sd $10,-8($4) 318 319 beqz $6,.L_bn_sqr_words_return 320 nop 321 322.L_bn_sqr_words_tail: 323 .set reorder 324 ld $12,0($5) 325 dmultu ($12,$12) 326 subu $6,1 327 mflo ($13,$12,$12) 328 mfhi ($12,$12,$12) 329 sd $13,0($4) 330 sd $12,8($4) 331 beqz $6,.L_bn_sqr_words_return 332 333 ld $12,8($5) 334 dmultu ($12,$12) 335 subu $6,1 336 mflo ($13,$12,$12) 337 mfhi ($12,$12,$12) 338 sd $13,2*8($4) 339 sd $12,3*8($4) 340 beqz $6,.L_bn_sqr_words_return 341 342 ld $12,2*8($5) 343 dmultu ($12,$12) 344 mflo ($13,$12,$12) 345 mfhi ($12,$12,$12) 346 sd $13,4*8($4) 347 sd $12,5*8($4) 348 349.L_bn_sqr_words_return: 350 .set noreorder 351 jr $31 352 move $4,$2 353 354.end bn_sqr_words_internal 355 356.align 5 357.globl bn_add_words 358.ent bn_add_words 359bn_add_words: 360 .set noreorder 361 bgtz $7,bn_add_words_internal 362 move $2,$0 363 jr $31 364 move $4,$2 365.end bn_add_words 366 367.align 5 368.ent bn_add_words_internal 369bn_add_words_internal: 370 .set reorder 371 li $3,-4 372 and $1,$7,$3 373 beqz $1,.L_bn_add_words_tail 374 375.L_bn_add_words_loop: 376 ld $12,0($5) 377 ld $8,0($6) 378 subu $7,4 379 ld $13,8($5) 380 and $1,$7,$3 381 ld $14,2*8($5) 382 daddu $6,4*8 383 ld $15,3*8($5) 384 daddu $4,4*8 385 ld $9,-3*8($6) 386 daddu $5,4*8 387 ld $10,-2*8($6) 388 ld $11,-8($6) 389 daddu $8,$12 390 sltu $24,$8,$12 391 daddu $12,$8,$2 392 sltu $2,$12,$8 393 sd $12,-4*8($4) 394 daddu $2,$24 395 396 daddu $9,$13 397 sltu $25,$9,$13 398 daddu $13,$9,$2 399 sltu $2,$13,$9 400 sd $13,-3*8($4) 401 daddu $2,$25 402 403 daddu $10,$14 404 sltu $24,$10,$14 405 daddu $14,$10,$2 406 sltu $2,$14,$10 407 sd $14,-2*8($4) 408 daddu $2,$24 409 410 daddu $11,$15 411 sltu $25,$11,$15 412 daddu $15,$11,$2 413 sltu $2,$15,$11 414 sd $15,-8($4) 415 416 .set noreorder 417 bgtz $1,.L_bn_add_words_loop 418 daddu $2,$25 419 420 beqz $7,.L_bn_add_words_return 421 nop 422 423.L_bn_add_words_tail: 424 .set reorder 425 ld $12,0($5) 426 ld $8,0($6) 427 daddu $8,$12 428 subu $7,1 429 sltu $24,$8,$12 430 daddu $12,$8,$2 431 sltu $2,$12,$8 432 sd $12,0($4) 433 daddu $2,$24 434 beqz $7,.L_bn_add_words_return 435 436 ld $13,8($5) 437 ld $9,8($6) 438 daddu $9,$13 439 subu $7,1 440 sltu $25,$9,$13 441 daddu $13,$9,$2 442 sltu $2,$13,$9 443 sd $13,8($4) 444 daddu $2,$25 445 beqz $7,.L_bn_add_words_return 446 447 ld $14,2*8($5) 448 ld $10,2*8($6) 449 daddu $10,$14 450 sltu $24,$10,$14 451 daddu $14,$10,$2 452 sltu $2,$14,$10 453 sd $14,2*8($4) 454 daddu $2,$24 455 456.L_bn_add_words_return: 457 .set noreorder 458 jr $31 459 move $4,$2 460 461.end bn_add_words_internal 462 463.align 5 464.globl bn_sub_words 465.ent bn_sub_words 466bn_sub_words: 467 .set noreorder 468 bgtz $7,bn_sub_words_internal 469 move $2,$0 470 jr $31 471 move $4,$0 472.end bn_sub_words 473 474.align 5 475.ent bn_sub_words_internal 476bn_sub_words_internal: 477 .set reorder 478 li $3,-4 479 and $1,$7,$3 480 beqz $1,.L_bn_sub_words_tail 481 482.L_bn_sub_words_loop: 483 ld $12,0($5) 484 ld $8,0($6) 485 subu $7,4 486 ld $13,8($5) 487 and $1,$7,$3 488 ld $14,2*8($5) 489 daddu $6,4*8 490 ld $15,3*8($5) 491 daddu $4,4*8 492 ld $9,-3*8($6) 493 daddu $5,4*8 494 ld $10,-2*8($6) 495 ld $11,-8($6) 496 sltu $24,$12,$8 497 dsubu $8,$12,$8 498 dsubu $12,$8,$2 499 sgtu $2,$12,$8 500 sd $12,-4*8($4) 501 daddu $2,$24 502 503 sltu $25,$13,$9 504 dsubu $9,$13,$9 505 dsubu $13,$9,$2 506 sgtu $2,$13,$9 507 sd $13,-3*8($4) 508 daddu $2,$25 509 510 511 sltu $24,$14,$10 512 dsubu $10,$14,$10 513 dsubu $14,$10,$2 514 sgtu $2,$14,$10 515 sd $14,-2*8($4) 516 daddu $2,$24 517 518 sltu $25,$15,$11 519 dsubu $11,$15,$11 520 dsubu $15,$11,$2 521 sgtu $2,$15,$11 522 sd $15,-8($4) 523 524 .set noreorder 525 bgtz $1,.L_bn_sub_words_loop 526 daddu $2,$25 527 528 beqz $7,.L_bn_sub_words_return 529 nop 530 531.L_bn_sub_words_tail: 532 .set reorder 533 ld $12,0($5) 534 ld $8,0($6) 535 subu $7,1 536 sltu $24,$12,$8 537 dsubu $8,$12,$8 538 dsubu $12,$8,$2 539 sgtu $2,$12,$8 540 sd $12,0($4) 541 daddu $2,$24 542 beqz $7,.L_bn_sub_words_return 543 544 ld $13,8($5) 545 subu $7,1 546 ld $9,8($6) 547 sltu $25,$13,$9 548 dsubu $9,$13,$9 549 dsubu $13,$9,$2 550 sgtu $2,$13,$9 551 sd $13,8($4) 552 daddu $2,$25 553 beqz $7,.L_bn_sub_words_return 554 555 ld $14,2*8($5) 556 ld $10,2*8($6) 557 sltu $24,$14,$10 558 dsubu $10,$14,$10 559 dsubu $14,$10,$2 560 sgtu $2,$14,$10 561 sd $14,2*8($4) 562 daddu $2,$24 563 564.L_bn_sub_words_return: 565 .set noreorder 566 jr $31 567 move $4,$2 568.end bn_sub_words_internal 569 570#if 0 571/* 572 * The bn_div_3_words entry point is re-used for constant-time interface. 573 * Implementation is retained as historical reference. 574 */ 575.align 5 576.globl bn_div_3_words 577.ent bn_div_3_words 578bn_div_3_words: 579 .set noreorder 580 move $7,$4 # we know that bn_div_words does not 581 # touch $7, $10, $11 and preserves $6 582 # so that we can save two arguments 583 # and return address in registers 584 # instead of stack:-) 585 586 ld $4,($7) 587 move $10,$5 588 bne $4,$6,bn_div_3_words_internal 589 ld $5,-8($7) 590 li $2,-1 591 jr $31 592 move $4,$2 593.end bn_div_3_words 594 595.align 5 596.ent bn_div_3_words_internal 597bn_div_3_words_internal: 598 .set reorder 599 move $11,$31 600 bal bn_div_words_internal 601 move $31,$11 602 dmultu ($10,$2) 603 ld $14,-2*8($7) 604 move $8,$0 605 mfhi ($13,$10,$2) 606 mflo ($12,$10,$2) 607 sltu $24,$13,$5 608.L_bn_div_3_words_inner_loop: 609 bnez $24,.L_bn_div_3_words_inner_loop_done 610 sgeu $1,$14,$12 611 seq $25,$13,$5 612 and $1,$25 613 sltu $15,$12,$10 614 daddu $5,$6 615 dsubu $13,$15 616 dsubu $12,$10 617 sltu $24,$13,$5 618 sltu $8,$5,$6 619 or $24,$8 620 .set noreorder 621 beqz $1,.L_bn_div_3_words_inner_loop 622 dsubu $2,1 623 daddu $2,1 624 .set reorder 625.L_bn_div_3_words_inner_loop_done: 626 .set noreorder 627 jr $31 628 move $4,$2 629.end bn_div_3_words_internal 630#endif 631 632.align 5 633.globl bn_div_words 634.ent bn_div_words 635bn_div_words: 636 .set noreorder 637 bnez $6,bn_div_words_internal 638 li $2,-1 # I would rather signal div-by-zero 639 # which can be done with 'break 7' 640 jr $31 641 move $4,$2 642.end bn_div_words 643 644.align 5 645.ent bn_div_words_internal 646bn_div_words_internal: 647 move $3,$0 648 bltz $6,.L_bn_div_words_body 649 move $25,$3 650 dsll $6,1 651 bgtz $6,.-4 652 addu $25,1 653 654 .set reorder 655 negu $13,$25 656 li $14,-1 657 dsll $14,$13 658 and $14,$4 659 dsrl $1,$5,$13 660 .set noreorder 661 beqz $14,.+12 662 nop 663 break 6 # signal overflow 664 .set reorder 665 dsll $4,$25 666 dsll $5,$25 667 or $4,$1 668.L_bn_div_words_body: 669 dsrl $3,$6,4*8 # bits 670 sgeu $1,$4,$6 671 .set noreorder 672 beqz $1,.+12 673 nop 674 dsubu $4,$6 675 .set reorder 676 677 li $8,-1 678 dsrl $9,$4,4*8 # bits 679 dsrl $8,4*8 # q=0xffffffff 680 beq $3,$9,.L_bn_div_words_skip_div1 681 ddivu ($4,$3) 682 mfqt ($8,$4,$3) 683.L_bn_div_words_skip_div1: 684 dmultu ($6,$8) 685 dsll $15,$4,4*8 # bits 686 dsrl $1,$5,4*8 # bits 687 or $15,$1 688 mflo ($12,$6,$8) 689 mfhi ($13,$6,$8) 690.L_bn_div_words_inner_loop1: 691 sltu $14,$15,$12 692 seq $24,$9,$13 693 sltu $1,$9,$13 694 and $14,$24 695 sltu $2,$12,$6 696 or $1,$14 697 .set noreorder 698 beqz $1,.L_bn_div_words_inner_loop1_done 699 dsubu $13,$2 700 dsubu $12,$6 701 b .L_bn_div_words_inner_loop1 702 dsubu $8,1 703 .set reorder 704.L_bn_div_words_inner_loop1_done: 705 706 dsll $5,4*8 # bits 707 dsubu $4,$15,$12 708 dsll $2,$8,4*8 # bits 709 710 li $8,-1 711 dsrl $9,$4,4*8 # bits 712 dsrl $8,4*8 # q=0xffffffff 713 beq $3,$9,.L_bn_div_words_skip_div2 714 ddivu ($4,$3) 715 mfqt ($8,$4,$3) 716.L_bn_div_words_skip_div2: 717 dmultu ($6,$8) 718 dsll $15,$4,4*8 # bits 719 dsrl $1,$5,4*8 # bits 720 or $15,$1 721 mflo ($12,$6,$8) 722 mfhi ($13,$6,$8) 723.L_bn_div_words_inner_loop2: 724 sltu $14,$15,$12 725 seq $24,$9,$13 726 sltu $1,$9,$13 727 and $14,$24 728 sltu $3,$12,$6 729 or $1,$14 730 .set noreorder 731 beqz $1,.L_bn_div_words_inner_loop2_done 732 dsubu $13,$3 733 dsubu $12,$6 734 b .L_bn_div_words_inner_loop2 735 dsubu $8,1 736 .set reorder 737.L_bn_div_words_inner_loop2_done: 738 739 dsubu $4,$15,$12 740 or $2,$8 741 dsrl $3,$4,$25 # $3 contains remainder if anybody wants it 742 dsrl $6,$25 # restore $6 743 744 .set noreorder 745 move $5,$3 746 jr $31 747 move $4,$2 748.end bn_div_words_internal 749 750.align 5 751.globl bn_mul_comba8 752.ent bn_mul_comba8 753bn_mul_comba8: 754 .set noreorder 755 .frame $29,6*8,$31 756 .mask 0x003f0000,-8 757 dsubu $29,6*8 758 sd $21,5*8($29) 759 sd $20,4*8($29) 760 sd $19,3*8($29) 761 sd $18,2*8($29) 762 sd $17,1*8($29) 763 sd $16,0*8($29) 764 765 .set reorder 766 ld $12,0($5) # If compiled with -mips3 option on 767 # R5000 box assembler barks on this 768 # 1ine with "should not have mult/div 769 # as last instruction in bb (R10K 770 # bug)" warning. If anybody out there 771 # has a clue about how to circumvent 772 # this do send me a note. 773 # <appro@fy.chalmers.se> 774 775 ld $8,0($6) 776 ld $13,8($5) 777 ld $14,2*8($5) 778 dmultu ($12,$8) # mul_add_c(a[0],b[0],c1,c2,c3); 779 ld $15,3*8($5) 780 ld $9,8($6) 781 ld $10,2*8($6) 782 ld $11,3*8($6) 783 mflo ($2,$12,$8) 784 mfhi ($3,$12,$8) 785 786 ld $16,4*8($5) 787 ld $18,5*8($5) 788 dmultu ($12,$9) # mul_add_c(a[0],b[1],c2,c3,c1); 789 ld $20,6*8($5) 790 ld $5,7*8($5) 791 ld $17,4*8($6) 792 ld $19,5*8($6) 793 mflo ($24,$12,$9) 794 mfhi ($25,$12,$9) 795 daddu $3,$24 796 sltu $1,$3,$24 797 dmultu ($13,$8) # mul_add_c(a[1],b[0],c2,c3,c1); 798 daddu $7,$25,$1 799 ld $21,6*8($6) 800 ld $6,7*8($6) 801 sd $2,0($4) # r[0]=c1; 802 mflo ($24,$13,$8) 803 mfhi ($25,$13,$8) 804 daddu $3,$24 805 sltu $1,$3,$24 806 dmultu ($14,$8) # mul_add_c(a[2],b[0],c3,c1,c2); 807 daddu $25,$1 808 daddu $7,$25 809 sltu $2,$7,$25 810 sd $3,8($4) # r[1]=c2; 811 812 mflo ($24,$14,$8) 813 mfhi ($25,$14,$8) 814 daddu $7,$24 815 sltu $1,$7,$24 816 dmultu ($13,$9) # mul_add_c(a[1],b[1],c3,c1,c2); 817 daddu $25,$1 818 daddu $2,$25 819 mflo ($24,$13,$9) 820 mfhi ($25,$13,$9) 821 daddu $7,$24 822 sltu $1,$7,$24 823 dmultu ($12,$10) # mul_add_c(a[0],b[2],c3,c1,c2); 824 daddu $25,$1 825 daddu $2,$25 826 sltu $3,$2,$25 827 mflo ($24,$12,$10) 828 mfhi ($25,$12,$10) 829 daddu $7,$24 830 sltu $1,$7,$24 831 dmultu ($12,$11) # mul_add_c(a[0],b[3],c1,c2,c3); 832 daddu $25,$1 833 daddu $2,$25 834 sltu $1,$2,$25 835 daddu $3,$1 836 sd $7,2*8($4) # r[2]=c3; 837 838 mflo ($24,$12,$11) 839 mfhi ($25,$12,$11) 840 daddu $2,$24 841 sltu $1,$2,$24 842 dmultu ($13,$10) # mul_add_c(a[1],b[2],c1,c2,c3); 843 daddu $25,$1 844 daddu $3,$25 845 sltu $7,$3,$25 846 mflo ($24,$13,$10) 847 mfhi ($25,$13,$10) 848 daddu $2,$24 849 sltu $1,$2,$24 850 dmultu ($14,$9) # mul_add_c(a[2],b[1],c1,c2,c3); 851 daddu $25,$1 852 daddu $3,$25 853 sltu $1,$3,$25 854 daddu $7,$1 855 mflo ($24,$14,$9) 856 mfhi ($25,$14,$9) 857 daddu $2,$24 858 sltu $1,$2,$24 859 dmultu ($15,$8) # mul_add_c(a[3],b[0],c1,c2,c3); 860 daddu $25,$1 861 daddu $3,$25 862 sltu $1,$3,$25 863 daddu $7,$1 864 mflo ($24,$15,$8) 865 mfhi ($25,$15,$8) 866 daddu $2,$24 867 sltu $1,$2,$24 868 dmultu ($16,$8) # mul_add_c(a[4],b[0],c2,c3,c1); 869 daddu $25,$1 870 daddu $3,$25 871 sltu $1,$3,$25 872 daddu $7,$1 873 sd $2,3*8($4) # r[3]=c1; 874 875 mflo ($24,$16,$8) 876 mfhi ($25,$16,$8) 877 daddu $3,$24 878 sltu $1,$3,$24 879 dmultu ($15,$9) # mul_add_c(a[3],b[1],c2,c3,c1); 880 daddu $25,$1 881 daddu $7,$25 882 sltu $2,$7,$25 883 mflo ($24,$15,$9) 884 mfhi ($25,$15,$9) 885 daddu $3,$24 886 sltu $1,$3,$24 887 dmultu ($14,$10) # mul_add_c(a[2],b[2],c2,c3,c1); 888 daddu $25,$1 889 daddu $7,$25 890 sltu $1,$7,$25 891 daddu $2,$1 892 mflo ($24,$14,$10) 893 mfhi ($25,$14,$10) 894 daddu $3,$24 895 sltu $1,$3,$24 896 dmultu ($13,$11) # mul_add_c(a[1],b[3],c2,c3,c1); 897 daddu $25,$1 898 daddu $7,$25 899 sltu $1,$7,$25 900 daddu $2,$1 901 mflo ($24,$13,$11) 902 mfhi ($25,$13,$11) 903 daddu $3,$24 904 sltu $1,$3,$24 905 dmultu ($12,$17) # mul_add_c(a[0],b[4],c2,c3,c1); 906 daddu $25,$1 907 daddu $7,$25 908 sltu $1,$7,$25 909 daddu $2,$1 910 mflo ($24,$12,$17) 911 mfhi ($25,$12,$17) 912 daddu $3,$24 913 sltu $1,$3,$24 914 dmultu ($12,$19) # mul_add_c(a[0],b[5],c3,c1,c2); 915 daddu $25,$1 916 daddu $7,$25 917 sltu $1,$7,$25 918 daddu $2,$1 919 sd $3,4*8($4) # r[4]=c2; 920 921 mflo ($24,$12,$19) 922 mfhi ($25,$12,$19) 923 daddu $7,$24 924 sltu $1,$7,$24 925 dmultu ($13,$17) # mul_add_c(a[1],b[4],c3,c1,c2); 926 daddu $25,$1 927 daddu $2,$25 928 sltu $3,$2,$25 929 mflo ($24,$13,$17) 930 mfhi ($25,$13,$17) 931 daddu $7,$24 932 sltu $1,$7,$24 933 dmultu ($14,$11) # mul_add_c(a[2],b[3],c3,c1,c2); 934 daddu $25,$1 935 daddu $2,$25 936 sltu $1,$2,$25 937 daddu $3,$1 938 mflo ($24,$14,$11) 939 mfhi ($25,$14,$11) 940 daddu $7,$24 941 sltu $1,$7,$24 942 dmultu ($15,$10) # mul_add_c(a[3],b[2],c3,c1,c2); 943 daddu $25,$1 944 daddu $2,$25 945 sltu $1,$2,$25 946 daddu $3,$1 947 mflo ($24,$15,$10) 948 mfhi ($25,$15,$10) 949 daddu $7,$24 950 sltu $1,$7,$24 951 dmultu ($16,$9) # mul_add_c(a[4],b[1],c3,c1,c2); 952 daddu $25,$1 953 daddu $2,$25 954 sltu $1,$2,$25 955 daddu $3,$1 956 mflo ($24,$16,$9) 957 mfhi ($25,$16,$9) 958 daddu $7,$24 959 sltu $1,$7,$24 960 dmultu ($18,$8) # mul_add_c(a[5],b[0],c3,c1,c2); 961 daddu $25,$1 962 daddu $2,$25 963 sltu $1,$2,$25 964 daddu $3,$1 965 mflo ($24,$18,$8) 966 mfhi ($25,$18,$8) 967 daddu $7,$24 968 sltu $1,$7,$24 969 dmultu ($20,$8) # mul_add_c(a[6],b[0],c1,c2,c3); 970 daddu $25,$1 971 daddu $2,$25 972 sltu $1,$2,$25 973 daddu $3,$1 974 sd $7,5*8($4) # r[5]=c3; 975 976 mflo ($24,$20,$8) 977 mfhi ($25,$20,$8) 978 daddu $2,$24 979 sltu $1,$2,$24 980 dmultu ($18,$9) # mul_add_c(a[5],b[1],c1,c2,c3); 981 daddu $25,$1 982 daddu $3,$25 983 sltu $7,$3,$25 984 mflo ($24,$18,$9) 985 mfhi ($25,$18,$9) 986 daddu $2,$24 987 sltu $1,$2,$24 988 dmultu ($16,$10) # mul_add_c(a[4],b[2],c1,c2,c3); 989 daddu $25,$1 990 daddu $3,$25 991 sltu $1,$3,$25 992 daddu $7,$1 993 mflo ($24,$16,$10) 994 mfhi ($25,$16,$10) 995 daddu $2,$24 996 sltu $1,$2,$24 997 dmultu ($15,$11) # mul_add_c(a[3],b[3],c1,c2,c3); 998 daddu $25,$1 999 daddu $3,$25 1000 sltu $1,$3,$25 1001 daddu $7,$1 1002 mflo ($24,$15,$11) 1003 mfhi ($25,$15,$11) 1004 daddu $2,$24 1005 sltu $1,$2,$24 1006 dmultu ($14,$17) # mul_add_c(a[2],b[4],c1,c2,c3); 1007 daddu $25,$1 1008 daddu $3,$25 1009 sltu $1,$3,$25 1010 daddu $7,$1 1011 mflo ($24,$14,$17) 1012 mfhi ($25,$14,$17) 1013 daddu $2,$24 1014 sltu $1,$2,$24 1015 dmultu ($13,$19) # mul_add_c(a[1],b[5],c1,c2,c3); 1016 daddu $25,$1 1017 daddu $3,$25 1018 sltu $1,$3,$25 1019 daddu $7,$1 1020 mflo ($24,$13,$19) 1021 mfhi ($25,$13,$19) 1022 daddu $2,$24 1023 sltu $1,$2,$24 1024 dmultu ($12,$21) # mul_add_c(a[0],b[6],c1,c2,c3); 1025 daddu $25,$1 1026 daddu $3,$25 1027 sltu $1,$3,$25 1028 daddu $7,$1 1029 mflo ($24,$12,$21) 1030 mfhi ($25,$12,$21) 1031 daddu $2,$24 1032 sltu $1,$2,$24 1033 dmultu ($12,$6) # mul_add_c(a[0],b[7],c2,c3,c1); 1034 daddu $25,$1 1035 daddu $3,$25 1036 sltu $1,$3,$25 1037 daddu $7,$1 1038 sd $2,6*8($4) # r[6]=c1; 1039 1040 mflo ($24,$12,$6) 1041 mfhi ($25,$12,$6) 1042 daddu $3,$24 1043 sltu $1,$3,$24 1044 dmultu ($13,$21) # mul_add_c(a[1],b[6],c2,c3,c1); 1045 daddu $25,$1 1046 daddu $7,$25 1047 sltu $2,$7,$25 1048 mflo ($24,$13,$21) 1049 mfhi ($25,$13,$21) 1050 daddu $3,$24 1051 sltu $1,$3,$24 1052 dmultu ($14,$19) # mul_add_c(a[2],b[5],c2,c3,c1); 1053 daddu $25,$1 1054 daddu $7,$25 1055 sltu $1,$7,$25 1056 daddu $2,$1 1057 mflo ($24,$14,$19) 1058 mfhi ($25,$14,$19) 1059 daddu $3,$24 1060 sltu $1,$3,$24 1061 dmultu ($15,$17) # mul_add_c(a[3],b[4],c2,c3,c1); 1062 daddu $25,$1 1063 daddu $7,$25 1064 sltu $1,$7,$25 1065 daddu $2,$1 1066 mflo ($24,$15,$17) 1067 mfhi ($25,$15,$17) 1068 daddu $3,$24 1069 sltu $1,$3,$24 1070 dmultu ($16,$11) # mul_add_c(a[4],b[3],c2,c3,c1); 1071 daddu $25,$1 1072 daddu $7,$25 1073 sltu $1,$7,$25 1074 daddu $2,$1 1075 mflo ($24,$16,$11) 1076 mfhi ($25,$16,$11) 1077 daddu $3,$24 1078 sltu $1,$3,$24 1079 dmultu ($18,$10) # mul_add_c(a[5],b[2],c2,c3,c1); 1080 daddu $25,$1 1081 daddu $7,$25 1082 sltu $1,$7,$25 1083 daddu $2,$1 1084 mflo ($24,$18,$10) 1085 mfhi ($25,$18,$10) 1086 daddu $3,$24 1087 sltu $1,$3,$24 1088 dmultu ($20,$9) # mul_add_c(a[6],b[1],c2,c3,c1); 1089 daddu $25,$1 1090 daddu $7,$25 1091 sltu $1,$7,$25 1092 daddu $2,$1 1093 mflo ($24,$20,$9) 1094 mfhi ($25,$20,$9) 1095 daddu $3,$24 1096 sltu $1,$3,$24 1097 dmultu ($5,$8) # mul_add_c(a[7],b[0],c2,c3,c1); 1098 daddu $25,$1 1099 daddu $7,$25 1100 sltu $1,$7,$25 1101 daddu $2,$1 1102 mflo ($24,$5,$8) 1103 mfhi ($25,$5,$8) 1104 daddu $3,$24 1105 sltu $1,$3,$24 1106 dmultu ($5,$9) # mul_add_c(a[7],b[1],c3,c1,c2); 1107 daddu $25,$1 1108 daddu $7,$25 1109 sltu $1,$7,$25 1110 daddu $2,$1 1111 sd $3,7*8($4) # r[7]=c2; 1112 1113 mflo ($24,$5,$9) 1114 mfhi ($25,$5,$9) 1115 daddu $7,$24 1116 sltu $1,$7,$24 1117 dmultu ($20,$10) # mul_add_c(a[6],b[2],c3,c1,c2); 1118 daddu $25,$1 1119 daddu $2,$25 1120 sltu $3,$2,$25 1121 mflo ($24,$20,$10) 1122 mfhi ($25,$20,$10) 1123 daddu $7,$24 1124 sltu $1,$7,$24 1125 dmultu ($18,$11) # mul_add_c(a[5],b[3],c3,c1,c2); 1126 daddu $25,$1 1127 daddu $2,$25 1128 sltu $1,$2,$25 1129 daddu $3,$1 1130 mflo ($24,$18,$11) 1131 mfhi ($25,$18,$11) 1132 daddu $7,$24 1133 sltu $1,$7,$24 1134 dmultu ($16,$17) # mul_add_c(a[4],b[4],c3,c1,c2); 1135 daddu $25,$1 1136 daddu $2,$25 1137 sltu $1,$2,$25 1138 daddu $3,$1 1139 mflo ($24,$16,$17) 1140 mfhi ($25,$16,$17) 1141 daddu $7,$24 1142 sltu $1,$7,$24 1143 dmultu ($15,$19) # mul_add_c(a[3],b[5],c3,c1,c2); 1144 daddu $25,$1 1145 daddu $2,$25 1146 sltu $1,$2,$25 1147 daddu $3,$1 1148 mflo ($24,$15,$19) 1149 mfhi ($25,$15,$19) 1150 daddu $7,$24 1151 sltu $1,$7,$24 1152 dmultu ($14,$21) # mul_add_c(a[2],b[6],c3,c1,c2); 1153 daddu $25,$1 1154 daddu $2,$25 1155 sltu $1,$2,$25 1156 daddu $3,$1 1157 mflo ($24,$14,$21) 1158 mfhi ($25,$14,$21) 1159 daddu $7,$24 1160 sltu $1,$7,$24 1161 dmultu ($13,$6) # mul_add_c(a[1],b[7],c3,c1,c2); 1162 daddu $25,$1 1163 daddu $2,$25 1164 sltu $1,$2,$25 1165 daddu $3,$1 1166 mflo ($24,$13,$6) 1167 mfhi ($25,$13,$6) 1168 daddu $7,$24 1169 sltu $1,$7,$24 1170 dmultu ($14,$6) # mul_add_c(a[2],b[7],c1,c2,c3); 1171 daddu $25,$1 1172 daddu $2,$25 1173 sltu $1,$2,$25 1174 daddu $3,$1 1175 sd $7,8*8($4) # r[8]=c3; 1176 1177 mflo ($24,$14,$6) 1178 mfhi ($25,$14,$6) 1179 daddu $2,$24 1180 sltu $1,$2,$24 1181 dmultu ($15,$21) # mul_add_c(a[3],b[6],c1,c2,c3); 1182 daddu $25,$1 1183 daddu $3,$25 1184 sltu $7,$3,$25 1185 mflo ($24,$15,$21) 1186 mfhi ($25,$15,$21) 1187 daddu $2,$24 1188 sltu $1,$2,$24 1189 dmultu ($16,$19) # mul_add_c(a[4],b[5],c1,c2,c3); 1190 daddu $25,$1 1191 daddu $3,$25 1192 sltu $1,$3,$25 1193 daddu $7,$1 1194 mflo ($24,$16,$19) 1195 mfhi ($25,$16,$19) 1196 daddu $2,$24 1197 sltu $1,$2,$24 1198 dmultu ($18,$17) # mul_add_c(a[5],b[4],c1,c2,c3); 1199 daddu $25,$1 1200 daddu $3,$25 1201 sltu $1,$3,$25 1202 daddu $7,$1 1203 mflo ($24,$18,$17) 1204 mfhi ($25,$18,$17) 1205 daddu $2,$24 1206 sltu $1,$2,$24 1207 dmultu ($20,$11) # mul_add_c(a[6],b[3],c1,c2,c3); 1208 daddu $25,$1 1209 daddu $3,$25 1210 sltu $1,$3,$25 1211 daddu $7,$1 1212 mflo ($24,$20,$11) 1213 mfhi ($25,$20,$11) 1214 daddu $2,$24 1215 sltu $1,$2,$24 1216 dmultu ($5,$10) # mul_add_c(a[7],b[2],c1,c2,c3); 1217 daddu $25,$1 1218 daddu $3,$25 1219 sltu $1,$3,$25 1220 daddu $7,$1 1221 mflo ($24,$5,$10) 1222 mfhi ($25,$5,$10) 1223 daddu $2,$24 1224 sltu $1,$2,$24 1225 dmultu ($5,$11) # mul_add_c(a[7],b[3],c2,c3,c1); 1226 daddu $25,$1 1227 daddu $3,$25 1228 sltu $1,$3,$25 1229 daddu $7,$1 1230 sd $2,9*8($4) # r[9]=c1; 1231 1232 mflo ($24,$5,$11) 1233 mfhi ($25,$5,$11) 1234 daddu $3,$24 1235 sltu $1,$3,$24 1236 dmultu ($20,$17) # mul_add_c(a[6],b[4],c2,c3,c1); 1237 daddu $25,$1 1238 daddu $7,$25 1239 sltu $2,$7,$25 1240 mflo ($24,$20,$17) 1241 mfhi ($25,$20,$17) 1242 daddu $3,$24 1243 sltu $1,$3,$24 1244 dmultu ($18,$19) # mul_add_c(a[5],b[5],c2,c3,c1); 1245 daddu $25,$1 1246 daddu $7,$25 1247 sltu $1,$7,$25 1248 daddu $2,$1 1249 mflo ($24,$18,$19) 1250 mfhi ($25,$18,$19) 1251 daddu $3,$24 1252 sltu $1,$3,$24 1253 dmultu ($16,$21) # mul_add_c(a[4],b[6],c2,c3,c1); 1254 daddu $25,$1 1255 daddu $7,$25 1256 sltu $1,$7,$25 1257 daddu $2,$1 1258 mflo ($24,$16,$21) 1259 mfhi ($25,$16,$21) 1260 daddu $3,$24 1261 sltu $1,$3,$24 1262 dmultu ($15,$6) # mul_add_c(a[3],b[7],c2,c3,c1); 1263 daddu $25,$1 1264 daddu $7,$25 1265 sltu $1,$7,$25 1266 daddu $2,$1 1267 mflo ($24,$15,$6) 1268 mfhi ($25,$15,$6) 1269 daddu $3,$24 1270 sltu $1,$3,$24 1271 dmultu ($16,$6) # mul_add_c(a[4],b[7],c3,c1,c2); 1272 daddu $25,$1 1273 daddu $7,$25 1274 sltu $1,$7,$25 1275 daddu $2,$1 1276 sd $3,10*8($4) # r[10]=c2; 1277 1278 mflo ($24,$16,$6) 1279 mfhi ($25,$16,$6) 1280 daddu $7,$24 1281 sltu $1,$7,$24 1282 dmultu ($18,$21) # mul_add_c(a[5],b[6],c3,c1,c2); 1283 daddu $25,$1 1284 daddu $2,$25 1285 sltu $3,$2,$25 1286 mflo ($24,$18,$21) 1287 mfhi ($25,$18,$21) 1288 daddu $7,$24 1289 sltu $1,$7,$24 1290 dmultu ($20,$19) # mul_add_c(a[6],b[5],c3,c1,c2); 1291 daddu $25,$1 1292 daddu $2,$25 1293 sltu $1,$2,$25 1294 daddu $3,$1 1295 mflo ($24,$20,$19) 1296 mfhi ($25,$20,$19) 1297 daddu $7,$24 1298 sltu $1,$7,$24 1299 dmultu ($5,$17) # mul_add_c(a[7],b[4],c3,c1,c2); 1300 daddu $25,$1 1301 daddu $2,$25 1302 sltu $1,$2,$25 1303 daddu $3,$1 1304 mflo ($24,$5,$17) 1305 mfhi ($25,$5,$17) 1306 daddu $7,$24 1307 sltu $1,$7,$24 1308 dmultu ($5,$19) # mul_add_c(a[7],b[5],c1,c2,c3); 1309 daddu $25,$1 1310 daddu $2,$25 1311 sltu $1,$2,$25 1312 daddu $3,$1 1313 sd $7,11*8($4) # r[11]=c3; 1314 1315 mflo ($24,$5,$19) 1316 mfhi ($25,$5,$19) 1317 daddu $2,$24 1318 sltu $1,$2,$24 1319 dmultu ($20,$21) # mul_add_c(a[6],b[6],c1,c2,c3); 1320 daddu $25,$1 1321 daddu $3,$25 1322 sltu $7,$3,$25 1323 mflo ($24,$20,$21) 1324 mfhi ($25,$20,$21) 1325 daddu $2,$24 1326 sltu $1,$2,$24 1327 dmultu ($18,$6) # mul_add_c(a[5],b[7],c1,c2,c3); 1328 daddu $25,$1 1329 daddu $3,$25 1330 sltu $1,$3,$25 1331 daddu $7,$1 1332 mflo ($24,$18,$6) 1333 mfhi ($25,$18,$6) 1334 daddu $2,$24 1335 sltu $1,$2,$24 1336 dmultu ($20,$6) # mul_add_c(a[6],b[7],c2,c3,c1); 1337 daddu $25,$1 1338 daddu $3,$25 1339 sltu $1,$3,$25 1340 daddu $7,$1 1341 sd $2,12*8($4) # r[12]=c1; 1342 1343 mflo ($24,$20,$6) 1344 mfhi ($25,$20,$6) 1345 daddu $3,$24 1346 sltu $1,$3,$24 1347 dmultu ($5,$21) # mul_add_c(a[7],b[6],c2,c3,c1); 1348 daddu $25,$1 1349 daddu $7,$25 1350 sltu $2,$7,$25 1351 mflo ($24,$5,$21) 1352 mfhi ($25,$5,$21) 1353 daddu $3,$24 1354 sltu $1,$3,$24 1355 dmultu ($5,$6) # mul_add_c(a[7],b[7],c3,c1,c2); 1356 daddu $25,$1 1357 daddu $7,$25 1358 sltu $1,$7,$25 1359 daddu $2,$1 1360 sd $3,13*8($4) # r[13]=c2; 1361 1362 mflo ($24,$5,$6) 1363 mfhi ($25,$5,$6) 1364 daddu $7,$24 1365 sltu $1,$7,$24 1366 daddu $25,$1 1367 daddu $2,$25 1368 sd $7,14*8($4) # r[14]=c3; 1369 sd $2,15*8($4) # r[15]=c1; 1370 1371 .set noreorder 1372 ld $21,5*8($29) 1373 ld $20,4*8($29) 1374 ld $19,3*8($29) 1375 ld $18,2*8($29) 1376 ld $17,1*8($29) 1377 ld $16,0*8($29) 1378 jr $31 1379 daddu $29,6*8 1380.end bn_mul_comba8 1381 1382.align 5 1383.globl bn_mul_comba4 1384.ent bn_mul_comba4 1385bn_mul_comba4: 1386 .set reorder 1387 ld $12,0($5) 1388 ld $8,0($6) 1389 ld $13,8($5) 1390 ld $14,2*8($5) 1391 dmultu ($12,$8) # mul_add_c(a[0],b[0],c1,c2,c3); 1392 ld $15,3*8($5) 1393 ld $9,8($6) 1394 ld $10,2*8($6) 1395 ld $11,3*8($6) 1396 mflo ($2,$12,$8) 1397 mfhi ($3,$12,$8) 1398 sd $2,0($4) 1399 1400 dmultu ($12,$9) # mul_add_c(a[0],b[1],c2,c3,c1); 1401 mflo ($24,$12,$9) 1402 mfhi ($25,$12,$9) 1403 daddu $3,$24 1404 sltu $1,$3,$24 1405 dmultu ($13,$8) # mul_add_c(a[1],b[0],c2,c3,c1); 1406 daddu $7,$25,$1 1407 mflo ($24,$13,$8) 1408 mfhi ($25,$13,$8) 1409 daddu $3,$24 1410 sltu $1,$3,$24 1411 dmultu ($14,$8) # mul_add_c(a[2],b[0],c3,c1,c2); 1412 daddu $25,$1 1413 daddu $7,$25 1414 sltu $2,$7,$25 1415 sd $3,8($4) 1416 1417 mflo ($24,$14,$8) 1418 mfhi ($25,$14,$8) 1419 daddu $7,$24 1420 sltu $1,$7,$24 1421 dmultu ($13,$9) # mul_add_c(a[1],b[1],c3,c1,c2); 1422 daddu $25,$1 1423 daddu $2,$25 1424 mflo ($24,$13,$9) 1425 mfhi ($25,$13,$9) 1426 daddu $7,$24 1427 sltu $1,$7,$24 1428 dmultu ($12,$10) # mul_add_c(a[0],b[2],c3,c1,c2); 1429 daddu $25,$1 1430 daddu $2,$25 1431 sltu $3,$2,$25 1432 mflo ($24,$12,$10) 1433 mfhi ($25,$12,$10) 1434 daddu $7,$24 1435 sltu $1,$7,$24 1436 dmultu ($12,$11) # mul_add_c(a[0],b[3],c1,c2,c3); 1437 daddu $25,$1 1438 daddu $2,$25 1439 sltu $1,$2,$25 1440 daddu $3,$1 1441 sd $7,2*8($4) 1442 1443 mflo ($24,$12,$11) 1444 mfhi ($25,$12,$11) 1445 daddu $2,$24 1446 sltu $1,$2,$24 1447 dmultu ($13,$10) # mul_add_c(a[1],b[2],c1,c2,c3); 1448 daddu $25,$1 1449 daddu $3,$25 1450 sltu $7,$3,$25 1451 mflo ($24,$13,$10) 1452 mfhi ($25,$13,$10) 1453 daddu $2,$24 1454 sltu $1,$2,$24 1455 dmultu ($14,$9) # mul_add_c(a[2],b[1],c1,c2,c3); 1456 daddu $25,$1 1457 daddu $3,$25 1458 sltu $1,$3,$25 1459 daddu $7,$1 1460 mflo ($24,$14,$9) 1461 mfhi ($25,$14,$9) 1462 daddu $2,$24 1463 sltu $1,$2,$24 1464 dmultu ($15,$8) # mul_add_c(a[3],b[0],c1,c2,c3); 1465 daddu $25,$1 1466 daddu $3,$25 1467 sltu $1,$3,$25 1468 daddu $7,$1 1469 mflo ($24,$15,$8) 1470 mfhi ($25,$15,$8) 1471 daddu $2,$24 1472 sltu $1,$2,$24 1473 dmultu ($15,$9) # mul_add_c(a[3],b[1],c2,c3,c1); 1474 daddu $25,$1 1475 daddu $3,$25 1476 sltu $1,$3,$25 1477 daddu $7,$1 1478 sd $2,3*8($4) 1479 1480 mflo ($24,$15,$9) 1481 mfhi ($25,$15,$9) 1482 daddu $3,$24 1483 sltu $1,$3,$24 1484 dmultu ($14,$10) # mul_add_c(a[2],b[2],c2,c3,c1); 1485 daddu $25,$1 1486 daddu $7,$25 1487 sltu $2,$7,$25 1488 mflo ($24,$14,$10) 1489 mfhi ($25,$14,$10) 1490 daddu $3,$24 1491 sltu $1,$3,$24 1492 dmultu ($13,$11) # mul_add_c(a[1],b[3],c2,c3,c1); 1493 daddu $25,$1 1494 daddu $7,$25 1495 sltu $1,$7,$25 1496 daddu $2,$1 1497 mflo ($24,$13,$11) 1498 mfhi ($25,$13,$11) 1499 daddu $3,$24 1500 sltu $1,$3,$24 1501 dmultu ($14,$11) # mul_add_c(a[2],b[3],c3,c1,c2); 1502 daddu $25,$1 1503 daddu $7,$25 1504 sltu $1,$7,$25 1505 daddu $2,$1 1506 sd $3,4*8($4) 1507 1508 mflo ($24,$14,$11) 1509 mfhi ($25,$14,$11) 1510 daddu $7,$24 1511 sltu $1,$7,$24 1512 dmultu ($15,$10) # mul_add_c(a[3],b[2],c3,c1,c2); 1513 daddu $25,$1 1514 daddu $2,$25 1515 sltu $3,$2,$25 1516 mflo ($24,$15,$10) 1517 mfhi ($25,$15,$10) 1518 daddu $7,$24 1519 sltu $1,$7,$24 1520 dmultu ($15,$11) # mul_add_c(a[3],b[3],c1,c2,c3); 1521 daddu $25,$1 1522 daddu $2,$25 1523 sltu $1,$2,$25 1524 daddu $3,$1 1525 sd $7,5*8($4) 1526 1527 mflo ($24,$15,$11) 1528 mfhi ($25,$15,$11) 1529 daddu $2,$24 1530 sltu $1,$2,$24 1531 daddu $25,$1 1532 daddu $3,$25 1533 sd $2,6*8($4) 1534 sd $3,7*8($4) 1535 1536 .set noreorder 1537 jr $31 1538 nop 1539.end bn_mul_comba4 1540 1541.align 5 1542.globl bn_sqr_comba8 1543.ent bn_sqr_comba8 1544bn_sqr_comba8: 1545 .set reorder 1546 ld $12,0($5) 1547 ld $13,8($5) 1548 ld $14,2*8($5) 1549 ld $15,3*8($5) 1550 1551 dmultu ($12,$12) # mul_add_c(a[0],b[0],c1,c2,c3); 1552 ld $8,4*8($5) 1553 ld $9,5*8($5) 1554 ld $10,6*8($5) 1555 ld $11,7*8($5) 1556 mflo ($2,$12,$12) 1557 mfhi ($3,$12,$12) 1558 sd $2,0($4) 1559 1560 dmultu ($12,$13) # mul_add_c2(a[0],b[1],c2,c3,c1); 1561 mflo ($24,$12,$13) 1562 mfhi ($25,$12,$13) 1563 slt $2,$25,$0 1564 dsll $25,1 1565 dmultu ($14,$12) # mul_add_c2(a[2],b[0],c3,c1,c2); 1566 slt $6,$24,$0 1567 daddu $25,$6 1568 dsll $24,1 1569 daddu $3,$24 1570 sltu $1,$3,$24 1571 daddu $7,$25,$1 1572 sd $3,8($4) 1573 sltu $1,$7,$25 1574 daddu $2,$1 1575 mflo ($24,$14,$12) 1576 mfhi ($25,$14,$12) 1577 daddu $7,$24 1578 sltu $1,$7,$24 1579 dmultu ($13,$13) # forward multiplication 1580 daddu $7,$24 1581 daddu $1,$25 1582 sltu $24,$7,$24 1583 daddu $2,$1 1584 daddu $25,$24 1585 sltu $3,$2,$1 1586 daddu $2,$25 1587 sltu $25,$2,$25 1588 daddu $3,$25 1589 mflo ($24,$13,$13) 1590 mfhi ($25,$13,$13) 1591 daddu $7,$24 1592 sltu $1,$7,$24 1593 dmultu ($12,$15) # mul_add_c2(a[0],b[3],c1,c2,c3); 1594 daddu $25,$1 1595 daddu $2,$25 1596 sltu $1,$2,$25 1597 daddu $3,$1 1598 sd $7,2*8($4) 1599 mflo ($24,$12,$15) 1600 mfhi ($25,$12,$15) 1601 daddu $2,$24 1602 sltu $1,$2,$24 1603 dmultu ($13,$14) # forward multiplication 1604 daddu $2,$24 1605 daddu $1,$25 1606 sltu $24,$2,$24 1607 daddu $3,$1 1608 daddu $25,$24 1609 sltu $7,$3,$1 1610 daddu $3,$25 1611 sltu $25,$3,$25 1612 daddu $7,$25 1613 mflo ($24,$13,$14) 1614 mfhi ($25,$13,$14) 1615 daddu $2,$24 1616 sltu $1,$2,$24 1617 dmultu ($8,$12) # forward multiplication 1618 daddu $2,$24 1619 daddu $1,$25 1620 sltu $24,$2,$24 1621 daddu $3,$1 1622 daddu $25,$24 1623 sltu $1,$3,$1 1624 daddu $3,$25 1625 daddu $7,$1 1626 sltu $25,$3,$25 1627 daddu $7,$25 1628 mflo ($24,$8,$12) 1629 mfhi ($25,$8,$12) 1630 sd $2,3*8($4) 1631 daddu $3,$24 1632 sltu $1,$3,$24 1633 dmultu ($15,$13) # forward multiplication 1634 daddu $3,$24 1635 daddu $1,$25 1636 sltu $24,$3,$24 1637 daddu $7,$1 1638 daddu $25,$24 1639 sltu $2,$7,$1 1640 daddu $7,$25 1641 sltu $25,$7,$25 1642 daddu $2,$25 1643 mflo ($24,$15,$13) 1644 mfhi ($25,$15,$13) 1645 daddu $3,$24 1646 sltu $1,$3,$24 1647 dmultu ($14,$14) # forward multiplication 1648 daddu $3,$24 1649 daddu $1,$25 1650 sltu $24,$3,$24 1651 daddu $7,$1 1652 daddu $25,$24 1653 sltu $1,$7,$1 1654 daddu $7,$25 1655 daddu $2,$1 1656 sltu $25,$7,$25 1657 daddu $2,$25 1658 mflo ($24,$14,$14) 1659 mfhi ($25,$14,$14) 1660 daddu $3,$24 1661 sltu $1,$3,$24 1662 dmultu ($12,$9) # mul_add_c2(a[0],b[5],c3,c1,c2); 1663 daddu $25,$1 1664 daddu $7,$25 1665 sltu $1,$7,$25 1666 daddu $2,$1 1667 sd $3,4*8($4) 1668 mflo ($24,$12,$9) 1669 mfhi ($25,$12,$9) 1670 daddu $7,$24 1671 sltu $1,$7,$24 1672 dmultu ($13,$8) # forward multiplication 1673 daddu $7,$24 1674 daddu $1,$25 1675 sltu $24,$7,$24 1676 daddu $2,$1 1677 daddu $25,$24 1678 sltu $3,$2,$1 1679 daddu $2,$25 1680 sltu $25,$2,$25 1681 daddu $3,$25 1682 mflo ($24,$13,$8) 1683 mfhi ($25,$13,$8) 1684 daddu $7,$24 1685 sltu $1,$7,$24 1686 dmultu ($14,$15) # forward multiplication 1687 daddu $7,$24 1688 daddu $1,$25 1689 sltu $24,$7,$24 1690 daddu $2,$1 1691 daddu $25,$24 1692 sltu $1,$2,$1 1693 daddu $2,$25 1694 daddu $3,$1 1695 sltu $25,$2,$25 1696 daddu $3,$25 1697 mflo ($24,$14,$15) 1698 mfhi ($25,$14,$15) 1699 daddu $7,$24 1700 sltu $1,$7,$24 1701 dmultu ($10,$12) # forward multiplication 1702 daddu $7,$24 1703 daddu $1,$25 1704 sltu $24,$7,$24 1705 daddu $2,$1 1706 daddu $25,$24 1707 sltu $1,$2,$1 1708 daddu $2,$25 1709 daddu $3,$1 1710 sltu $25,$2,$25 1711 daddu $3,$25 1712 mflo ($24,$10,$12) 1713 mfhi ($25,$10,$12) 1714 sd $7,5*8($4) 1715 daddu $2,$24 1716 sltu $1,$2,$24 1717 dmultu ($9,$13) # forward multiplication 1718 daddu $2,$24 1719 daddu $1,$25 1720 sltu $24,$2,$24 1721 daddu $3,$1 1722 daddu $25,$24 1723 sltu $7,$3,$1 1724 daddu $3,$25 1725 sltu $25,$3,$25 1726 daddu $7,$25 1727 mflo ($24,$9,$13) 1728 mfhi ($25,$9,$13) 1729 daddu $2,$24 1730 sltu $1,$2,$24 1731 dmultu ($8,$14) # forward multiplication 1732 daddu $2,$24 1733 daddu $1,$25 1734 sltu $24,$2,$24 1735 daddu $3,$1 1736 daddu $25,$24 1737 sltu $1,$3,$1 1738 daddu $3,$25 1739 daddu $7,$1 1740 sltu $25,$3,$25 1741 daddu $7,$25 1742 mflo ($24,$8,$14) 1743 mfhi ($25,$8,$14) 1744 daddu $2,$24 1745 sltu $1,$2,$24 1746 dmultu ($15,$15) # forward multiplication 1747 daddu $2,$24 1748 daddu $1,$25 1749 sltu $24,$2,$24 1750 daddu $3,$1 1751 daddu $25,$24 1752 sltu $1,$3,$1 1753 daddu $3,$25 1754 daddu $7,$1 1755 sltu $25,$3,$25 1756 daddu $7,$25 1757 mflo ($24,$15,$15) 1758 mfhi ($25,$15,$15) 1759 daddu $2,$24 1760 sltu $1,$2,$24 1761 dmultu ($12,$11) # mul_add_c2(a[0],b[7],c2,c3,c1); 1762 daddu $25,$1 1763 daddu $3,$25 1764 sltu $1,$3,$25 1765 daddu $7,$1 1766 sd $2,6*8($4) 1767 mflo ($24,$12,$11) 1768 mfhi ($25,$12,$11) 1769 daddu $3,$24 1770 sltu $1,$3,$24 1771 dmultu ($13,$10) # forward multiplication 1772 daddu $3,$24 1773 daddu $1,$25 1774 sltu $24,$3,$24 1775 daddu $7,$1 1776 daddu $25,$24 1777 sltu $2,$7,$1 1778 daddu $7,$25 1779 sltu $25,$7,$25 1780 daddu $2,$25 1781 mflo ($24,$13,$10) 1782 mfhi ($25,$13,$10) 1783 daddu $3,$24 1784 sltu $1,$3,$24 1785 dmultu ($14,$9) # forward multiplication 1786 daddu $3,$24 1787 daddu $1,$25 1788 sltu $24,$3,$24 1789 daddu $7,$1 1790 daddu $25,$24 1791 sltu $1,$7,$1 1792 daddu $7,$25 1793 daddu $2,$1 1794 sltu $25,$7,$25 1795 daddu $2,$25 1796 mflo ($24,$14,$9) 1797 mfhi ($25,$14,$9) 1798 daddu $3,$24 1799 sltu $1,$3,$24 1800 dmultu ($15,$8) # forward multiplication 1801 daddu $3,$24 1802 daddu $1,$25 1803 sltu $24,$3,$24 1804 daddu $7,$1 1805 daddu $25,$24 1806 sltu $1,$7,$1 1807 daddu $7,$25 1808 daddu $2,$1 1809 sltu $25,$7,$25 1810 daddu $2,$25 1811 mflo ($24,$15,$8) 1812 mfhi ($25,$15,$8) 1813 daddu $3,$24 1814 sltu $1,$3,$24 1815 dmultu ($11,$13) # forward multiplication 1816 daddu $3,$24 1817 daddu $1,$25 1818 sltu $24,$3,$24 1819 daddu $7,$1 1820 daddu $25,$24 1821 sltu $1,$7,$1 1822 daddu $7,$25 1823 daddu $2,$1 1824 sltu $25,$7,$25 1825 daddu $2,$25 1826 mflo ($24,$11,$13) 1827 mfhi ($25,$11,$13) 1828 sd $3,7*8($4) 1829 daddu $7,$24 1830 sltu $1,$7,$24 1831 dmultu ($10,$14) # forward multiplication 1832 daddu $7,$24 1833 daddu $1,$25 1834 sltu $24,$7,$24 1835 daddu $2,$1 1836 daddu $25,$24 1837 sltu $3,$2,$1 1838 daddu $2,$25 1839 sltu $25,$2,$25 1840 daddu $3,$25 1841 mflo ($24,$10,$14) 1842 mfhi ($25,$10,$14) 1843 daddu $7,$24 1844 sltu $1,$7,$24 1845 dmultu ($9,$15) # forward multiplication 1846 daddu $7,$24 1847 daddu $1,$25 1848 sltu $24,$7,$24 1849 daddu $2,$1 1850 daddu $25,$24 1851 sltu $1,$2,$1 1852 daddu $2,$25 1853 daddu $3,$1 1854 sltu $25,$2,$25 1855 daddu $3,$25 1856 mflo ($24,$9,$15) 1857 mfhi ($25,$9,$15) 1858 daddu $7,$24 1859 sltu $1,$7,$24 1860 dmultu ($8,$8) # forward multiplication 1861 daddu $7,$24 1862 daddu $1,$25 1863 sltu $24,$7,$24 1864 daddu $2,$1 1865 daddu $25,$24 1866 sltu $1,$2,$1 1867 daddu $2,$25 1868 daddu $3,$1 1869 sltu $25,$2,$25 1870 daddu $3,$25 1871 mflo ($24,$8,$8) 1872 mfhi ($25,$8,$8) 1873 daddu $7,$24 1874 sltu $1,$7,$24 1875 dmultu ($14,$11) # mul_add_c2(a[2],b[7],c1,c2,c3); 1876 daddu $25,$1 1877 daddu $2,$25 1878 sltu $1,$2,$25 1879 daddu $3,$1 1880 sd $7,8*8($4) 1881 mflo ($24,$14,$11) 1882 mfhi ($25,$14,$11) 1883 daddu $2,$24 1884 sltu $1,$2,$24 1885 dmultu ($15,$10) # forward multiplication 1886 daddu $2,$24 1887 daddu $1,$25 1888 sltu $24,$2,$24 1889 daddu $3,$1 1890 daddu $25,$24 1891 sltu $7,$3,$1 1892 daddu $3,$25 1893 sltu $25,$3,$25 1894 daddu $7,$25 1895 mflo ($24,$15,$10) 1896 mfhi ($25,$15,$10) 1897 daddu $2,$24 1898 sltu $1,$2,$24 1899 dmultu ($8,$9) # forward multiplication 1900 daddu $2,$24 1901 daddu $1,$25 1902 sltu $24,$2,$24 1903 daddu $3,$1 1904 daddu $25,$24 1905 sltu $1,$3,$1 1906 daddu $3,$25 1907 daddu $7,$1 1908 sltu $25,$3,$25 1909 daddu $7,$25 1910 mflo ($24,$8,$9) 1911 mfhi ($25,$8,$9) 1912 daddu $2,$24 1913 sltu $1,$2,$24 1914 dmultu ($11,$15) # forward multiplication 1915 daddu $2,$24 1916 daddu $1,$25 1917 sltu $24,$2,$24 1918 daddu $3,$1 1919 daddu $25,$24 1920 sltu $1,$3,$1 1921 daddu $3,$25 1922 daddu $7,$1 1923 sltu $25,$3,$25 1924 daddu $7,$25 1925 mflo ($24,$11,$15) 1926 mfhi ($25,$11,$15) 1927 sd $2,9*8($4) 1928 daddu $3,$24 1929 sltu $1,$3,$24 1930 dmultu ($10,$8) # forward multiplication 1931 daddu $3,$24 1932 daddu $1,$25 1933 sltu $24,$3,$24 1934 daddu $7,$1 1935 daddu $25,$24 1936 sltu $2,$7,$1 1937 daddu $7,$25 1938 sltu $25,$7,$25 1939 daddu $2,$25 1940 mflo ($24,$10,$8) 1941 mfhi ($25,$10,$8) 1942 daddu $3,$24 1943 sltu $1,$3,$24 1944 dmultu ($9,$9) # forward multiplication 1945 daddu $3,$24 1946 daddu $1,$25 1947 sltu $24,$3,$24 1948 daddu $7,$1 1949 daddu $25,$24 1950 sltu $1,$7,$1 1951 daddu $7,$25 1952 daddu $2,$1 1953 sltu $25,$7,$25 1954 daddu $2,$25 1955 mflo ($24,$9,$9) 1956 mfhi ($25,$9,$9) 1957 daddu $3,$24 1958 sltu $1,$3,$24 1959 dmultu ($8,$11) # mul_add_c2(a[4],b[7],c3,c1,c2); 1960 daddu $25,$1 1961 daddu $7,$25 1962 sltu $1,$7,$25 1963 daddu $2,$1 1964 sd $3,10*8($4) 1965 mflo ($24,$8,$11) 1966 mfhi ($25,$8,$11) 1967 daddu $7,$24 1968 sltu $1,$7,$24 1969 dmultu ($9,$10) # forward multiplication 1970 daddu $7,$24 1971 daddu $1,$25 1972 sltu $24,$7,$24 1973 daddu $2,$1 1974 daddu $25,$24 1975 sltu $3,$2,$1 1976 daddu $2,$25 1977 sltu $25,$2,$25 1978 daddu $3,$25 1979 mflo ($24,$9,$10) 1980 mfhi ($25,$9,$10) 1981 daddu $7,$24 1982 sltu $1,$7,$24 1983 dmultu ($11,$9) # forward multiplication 1984 daddu $7,$24 1985 daddu $1,$25 1986 sltu $24,$7,$24 1987 daddu $2,$1 1988 daddu $25,$24 1989 sltu $1,$2,$1 1990 daddu $2,$25 1991 daddu $3,$1 1992 sltu $25,$2,$25 1993 daddu $3,$25 1994 mflo ($24,$11,$9) 1995 mfhi ($25,$11,$9) 1996 sd $7,11*8($4) 1997 daddu $2,$24 1998 sltu $1,$2,$24 1999 dmultu ($10,$10) # forward multiplication 2000 daddu $2,$24 2001 daddu $1,$25 2002 sltu $24,$2,$24 2003 daddu $3,$1 2004 daddu $25,$24 2005 sltu $7,$3,$1 2006 daddu $3,$25 2007 sltu $25,$3,$25 2008 daddu $7,$25 2009 mflo ($24,$10,$10) 2010 mfhi ($25,$10,$10) 2011 daddu $2,$24 2012 sltu $1,$2,$24 2013 dmultu ($10,$11) # mul_add_c2(a[6],b[7],c2,c3,c1); 2014 daddu $25,$1 2015 daddu $3,$25 2016 sltu $1,$3,$25 2017 daddu $7,$1 2018 sd $2,12*8($4) 2019 mflo ($24,$10,$11) 2020 mfhi ($25,$10,$11) 2021 daddu $3,$24 2022 sltu $1,$3,$24 2023 dmultu ($11,$11) # forward multiplication 2024 daddu $3,$24 2025 daddu $1,$25 2026 sltu $24,$3,$24 2027 daddu $7,$1 2028 daddu $25,$24 2029 sltu $2,$7,$1 2030 daddu $7,$25 2031 sltu $25,$7,$25 2032 daddu $2,$25 2033 mflo ($24,$11,$11) 2034 mfhi ($25,$11,$11) 2035 sd $3,13*8($4) 2036 2037 daddu $7,$24 2038 sltu $1,$7,$24 2039 daddu $25,$1 2040 daddu $2,$25 2041 sd $7,14*8($4) 2042 sd $2,15*8($4) 2043 2044 .set noreorder 2045 jr $31 2046 nop 2047.end bn_sqr_comba8 2048 2049.align 5 2050.globl bn_sqr_comba4 2051.ent bn_sqr_comba4 2052bn_sqr_comba4: 2053 .set reorder 2054 ld $12,0($5) 2055 ld $13,8($5) 2056 dmultu ($12,$12) # mul_add_c(a[0],b[0],c1,c2,c3); 2057 ld $14,2*8($5) 2058 ld $15,3*8($5) 2059 mflo ($2,$12,$12) 2060 mfhi ($3,$12,$12) 2061 sd $2,0($4) 2062 2063 dmultu ($12,$13) # mul_add_c2(a[0],b[1],c2,c3,c1); 2064 mflo ($24,$12,$13) 2065 mfhi ($25,$12,$13) 2066 slt $2,$25,$0 2067 dsll $25,1 2068 dmultu ($14,$12) # mul_add_c2(a[2],b[0],c3,c1,c2); 2069 slt $6,$24,$0 2070 daddu $25,$6 2071 dsll $24,1 2072 daddu $3,$24 2073 sltu $1,$3,$24 2074 daddu $7,$25,$1 2075 sd $3,8($4) 2076 sltu $1,$7,$25 2077 daddu $2,$1 2078 mflo ($24,$14,$12) 2079 mfhi ($25,$14,$12) 2080 daddu $7,$24 2081 sltu $1,$7,$24 2082 dmultu ($13,$13) # forward multiplication 2083 daddu $7,$24 2084 daddu $1,$25 2085 sltu $24,$7,$24 2086 daddu $2,$1 2087 daddu $25,$24 2088 sltu $3,$2,$1 2089 daddu $2,$25 2090 sltu $25,$2,$25 2091 daddu $3,$25 2092 mflo ($24,$13,$13) 2093 mfhi ($25,$13,$13) 2094 daddu $7,$24 2095 sltu $1,$7,$24 2096 dmultu ($12,$15) # mul_add_c2(a[0],b[3],c1,c2,c3); 2097 daddu $25,$1 2098 daddu $2,$25 2099 sltu $1,$2,$25 2100 daddu $3,$1 2101 sd $7,2*8($4) 2102 mflo ($24,$12,$15) 2103 mfhi ($25,$12,$15) 2104 daddu $2,$24 2105 sltu $1,$2,$24 2106 dmultu ($13,$14) # forward multiplication 2107 daddu $2,$24 2108 daddu $1,$25 2109 sltu $24,$2,$24 2110 daddu $3,$1 2111 daddu $25,$24 2112 sltu $7,$3,$1 2113 daddu $3,$25 2114 sltu $25,$3,$25 2115 daddu $7,$25 2116 mflo ($24,$13,$14) 2117 mfhi ($25,$13,$14) 2118 daddu $2,$24 2119 sltu $1,$2,$24 2120 dmultu ($15,$13) # forward multiplication 2121 daddu $2,$24 2122 daddu $1,$25 2123 sltu $24,$2,$24 2124 daddu $3,$1 2125 daddu $25,$24 2126 sltu $1,$3,$1 2127 daddu $3,$25 2128 daddu $7,$1 2129 sltu $25,$3,$25 2130 daddu $7,$25 2131 mflo ($24,$15,$13) 2132 mfhi ($25,$15,$13) 2133 sd $2,3*8($4) 2134 daddu $3,$24 2135 sltu $1,$3,$24 2136 dmultu ($14,$14) # forward multiplication 2137 daddu $3,$24 2138 daddu $1,$25 2139 sltu $24,$3,$24 2140 daddu $7,$1 2141 daddu $25,$24 2142 sltu $2,$7,$1 2143 daddu $7,$25 2144 sltu $25,$7,$25 2145 daddu $2,$25 2146 mflo ($24,$14,$14) 2147 mfhi ($25,$14,$14) 2148 daddu $3,$24 2149 sltu $1,$3,$24 2150 dmultu ($14,$15) # mul_add_c2(a[2],b[3],c3,c1,c2); 2151 daddu $25,$1 2152 daddu $7,$25 2153 sltu $1,$7,$25 2154 daddu $2,$1 2155 sd $3,4*8($4) 2156 mflo ($24,$14,$15) 2157 mfhi ($25,$14,$15) 2158 daddu $7,$24 2159 sltu $1,$7,$24 2160 dmultu ($15,$15) # forward multiplication 2161 daddu $7,$24 2162 daddu $1,$25 2163 sltu $24,$7,$24 2164 daddu $2,$1 2165 daddu $25,$24 2166 sltu $3,$2,$1 2167 daddu $2,$25 2168 sltu $25,$2,$25 2169 daddu $3,$25 2170 mflo ($24,$15,$15) 2171 mfhi ($25,$15,$15) 2172 sd $7,5*8($4) 2173 2174 daddu $2,$24 2175 sltu $1,$2,$24 2176 daddu $25,$1 2177 daddu $3,$25 2178 sd $2,6*8($4) 2179 sd $3,7*8($4) 2180 2181 .set noreorder 2182 jr $31 2183 nop 2184.end bn_sqr_comba4 2185