1#include "mips_arch.h" 2 3#if defined(_MIPS_ARCH_MIPS64R6) 4# define ddivu(rs,rt) 5# define mfqt(rd,rs,rt) ddivu rd,rs,rt 6# define mfrm(rd,rs,rt) dmodu rd,rs,rt 7#elif defined(_MIPS_ARCH_MIPS32R6) 8# define divu(rs,rt) 9# define mfqt(rd,rs,rt) divu rd,rs,rt 10# define mfrm(rd,rs,rt) modu rd,rs,rt 11#else 12# define ddivu(rs,rt) ddivu $0,rs,rt 13# define mfqt(rd,rs,rt) mflo rd 14# define mfrm(rd,rs,rt) mfhi rd 15#endif 16 17.rdata 18.asciiz "mips3.s, Version 1.2" 19.asciiz "MIPS II/III/IV ISA artwork by Andy Polyakov <appro@fy.chalmers.se>" 20 21.text 22.set noat 23 24.align 5 25.globl bn_mul_add_words 26.ent bn_mul_add_words 27bn_mul_add_words: 28 .set noreorder 29 bgtz $6,bn_mul_add_words_internal 30 move $2,$0 31 jr $31 32 move $4,$2 33.end bn_mul_add_words 34 35.align 5 36.ent bn_mul_add_words_internal 37bn_mul_add_words_internal: 38 .set reorder 39 li $3,-4 40 and $8,$6,$3 41 beqz $8,.L_bn_mul_add_words_tail 42 43.L_bn_mul_add_words_loop: 44 ld $12,0($5) 45 dmultu ($12,$7) 46 ld $13,0($4) 47 ld $14,8($5) 48 ld $15,8($4) 49 ld $8,2*8($5) 50 ld $9,2*8($4) 51 daddu $13,$2 52 sltu $2,$13,$2 # All manuals say it "compares 32-bit 53 # values", but it seems to work fine 54 # even on 64-bit registers. 55 mflo ($1,$12,$7) 56 mfhi ($12,$12,$7) 57 daddu $13,$1 58 daddu $2,$12 59 dmultu ($14,$7) 60 sltu $1,$13,$1 61 sd $13,0($4) 62 daddu $2,$1 63 64 ld $10,3*8($5) 65 ld $11,3*8($4) 66 daddu $15,$2 67 sltu $2,$15,$2 68 mflo ($1,$14,$7) 69 mfhi ($14,$14,$7) 70 daddu $15,$1 71 daddu $2,$14 72 dmultu ($8,$7) 73 sltu $1,$15,$1 74 sd $15,8($4) 75 daddu $2,$1 76 77 subu $6,4 78 daddu $4,4*8 79 daddu $5,4*8 80 daddu $9,$2 81 sltu $2,$9,$2 82 mflo ($1,$8,$7) 83 mfhi ($8,$8,$7) 84 daddu $9,$1 85 daddu $2,$8 86 dmultu ($10,$7) 87 sltu $1,$9,$1 88 sd $9,-2*8($4) 89 daddu $2,$1 90 91 92 and $8,$6,$3 93 daddu $11,$2 94 sltu $2,$11,$2 95 mflo ($1,$10,$7) 96 mfhi ($10,$10,$7) 97 daddu $11,$1 98 daddu $2,$10 99 sltu $1,$11,$1 100 sd $11,-8($4) 101 .set noreorder 102 bgtz $8,.L_bn_mul_add_words_loop 103 daddu $2,$1 104 105 beqz $6,.L_bn_mul_add_words_return 106 nop 107 108.L_bn_mul_add_words_tail: 109 .set reorder 110 ld $12,0($5) 111 dmultu ($12,$7) 112 ld $13,0($4) 113 subu $6,1 114 daddu $13,$2 115 sltu $2,$13,$2 116 mflo ($1,$12,$7) 117 mfhi ($12,$12,$7) 118 daddu $13,$1 119 daddu $2,$12 120 sltu $1,$13,$1 121 sd $13,0($4) 122 daddu $2,$1 123 beqz $6,.L_bn_mul_add_words_return 124 125 ld $12,8($5) 126 dmultu ($12,$7) 127 ld $13,8($4) 128 subu $6,1 129 daddu $13,$2 130 sltu $2,$13,$2 131 mflo ($1,$12,$7) 132 mfhi ($12,$12,$7) 133 daddu $13,$1 134 daddu $2,$12 135 sltu $1,$13,$1 136 sd $13,8($4) 137 daddu $2,$1 138 beqz $6,.L_bn_mul_add_words_return 139 140 ld $12,2*8($5) 141 dmultu ($12,$7) 142 ld $13,2*8($4) 143 daddu $13,$2 144 sltu $2,$13,$2 145 mflo ($1,$12,$7) 146 mfhi ($12,$12,$7) 147 daddu $13,$1 148 daddu $2,$12 149 sltu $1,$13,$1 150 sd $13,2*8($4) 151 daddu $2,$1 152 153.L_bn_mul_add_words_return: 154 .set noreorder 155 jr $31 156 move $4,$2 157.end bn_mul_add_words_internal 158 159.align 5 160.globl bn_mul_words 161.ent bn_mul_words 162bn_mul_words: 163 .set noreorder 164 bgtz $6,bn_mul_words_internal 165 move $2,$0 166 jr $31 167 move $4,$2 168.end bn_mul_words 169 170.align 5 171.ent bn_mul_words_internal 172bn_mul_words_internal: 173 .set reorder 174 li $3,-4 175 and $8,$6,$3 176 beqz $8,.L_bn_mul_words_tail 177 178.L_bn_mul_words_loop: 179 ld $12,0($5) 180 dmultu ($12,$7) 181 ld $14,8($5) 182 ld $8,2*8($5) 183 ld $10,3*8($5) 184 mflo ($1,$12,$7) 185 mfhi ($12,$12,$7) 186 daddu $2,$1 187 sltu $13,$2,$1 188 dmultu ($14,$7) 189 sd $2,0($4) 190 daddu $2,$13,$12 191 192 subu $6,4 193 daddu $4,4*8 194 daddu $5,4*8 195 mflo ($1,$14,$7) 196 mfhi ($14,$14,$7) 197 daddu $2,$1 198 sltu $15,$2,$1 199 dmultu ($8,$7) 200 sd $2,-3*8($4) 201 daddu $2,$15,$14 202 203 mflo ($1,$8,$7) 204 mfhi ($8,$8,$7) 205 daddu $2,$1 206 sltu $9,$2,$1 207 dmultu ($10,$7) 208 sd $2,-2*8($4) 209 daddu $2,$9,$8 210 211 and $8,$6,$3 212 mflo ($1,$10,$7) 213 mfhi ($10,$10,$7) 214 daddu $2,$1 215 sltu $11,$2,$1 216 sd $2,-8($4) 217 .set noreorder 218 bgtz $8,.L_bn_mul_words_loop 219 daddu $2,$11,$10 220 221 beqz $6,.L_bn_mul_words_return 222 nop 223 224.L_bn_mul_words_tail: 225 .set reorder 226 ld $12,0($5) 227 dmultu ($12,$7) 228 subu $6,1 229 mflo ($1,$12,$7) 230 mfhi ($12,$12,$7) 231 daddu $2,$1 232 sltu $13,$2,$1 233 sd $2,0($4) 234 daddu $2,$13,$12 235 beqz $6,.L_bn_mul_words_return 236 237 ld $12,8($5) 238 dmultu ($12,$7) 239 subu $6,1 240 mflo ($1,$12,$7) 241 mfhi ($12,$12,$7) 242 daddu $2,$1 243 sltu $13,$2,$1 244 sd $2,8($4) 245 daddu $2,$13,$12 246 beqz $6,.L_bn_mul_words_return 247 248 ld $12,2*8($5) 249 dmultu ($12,$7) 250 mflo ($1,$12,$7) 251 mfhi ($12,$12,$7) 252 daddu $2,$1 253 sltu $13,$2,$1 254 sd $2,2*8($4) 255 daddu $2,$13,$12 256 257.L_bn_mul_words_return: 258 .set noreorder 259 jr $31 260 move $4,$2 261.end bn_mul_words_internal 262 263.align 5 264.globl bn_sqr_words 265.ent bn_sqr_words 266bn_sqr_words: 267 .set noreorder 268 bgtz $6,bn_sqr_words_internal 269 move $2,$0 270 jr $31 271 move $4,$2 272.end bn_sqr_words 273 274.align 5 275.ent bn_sqr_words_internal 276bn_sqr_words_internal: 277 .set reorder 278 li $3,-4 279 and $8,$6,$3 280 beqz $8,.L_bn_sqr_words_tail 281 282.L_bn_sqr_words_loop: 283 ld $12,0($5) 284 dmultu ($12,$12) 285 ld $14,8($5) 286 ld $8,2*8($5) 287 ld $10,3*8($5) 288 mflo ($13,$12,$12) 289 mfhi ($12,$12,$12) 290 sd $13,0($4) 291 sd $12,8($4) 292 293 dmultu ($14,$14) 294 subu $6,4 295 daddu $4,8*8 296 daddu $5,4*8 297 mflo ($15,$14,$14) 298 mfhi ($14,$14,$14) 299 sd $15,-6*8($4) 300 sd $14,-5*8($4) 301 302 dmultu ($8,$8) 303 mflo ($9,$8,$8) 304 mfhi ($8,$8,$8) 305 sd $9,-4*8($4) 306 sd $8,-3*8($4) 307 308 309 dmultu ($10,$10) 310 and $8,$6,$3 311 mflo ($11,$10,$10) 312 mfhi ($10,$10,$10) 313 sd $11,-2*8($4) 314 315 .set noreorder 316 bgtz $8,.L_bn_sqr_words_loop 317 sd $10,-8($4) 318 319 beqz $6,.L_bn_sqr_words_return 320 nop 321 322.L_bn_sqr_words_tail: 323 .set reorder 324 ld $12,0($5) 325 dmultu ($12,$12) 326 subu $6,1 327 mflo ($13,$12,$12) 328 mfhi ($12,$12,$12) 329 sd $13,0($4) 330 sd $12,8($4) 331 beqz $6,.L_bn_sqr_words_return 332 333 ld $12,8($5) 334 dmultu ($12,$12) 335 subu $6,1 336 mflo ($13,$12,$12) 337 mfhi ($12,$12,$12) 338 sd $13,2*8($4) 339 sd $12,3*8($4) 340 beqz $6,.L_bn_sqr_words_return 341 342 ld $12,2*8($5) 343 dmultu ($12,$12) 344 mflo ($13,$12,$12) 345 mfhi ($12,$12,$12) 346 sd $13,4*8($4) 347 sd $12,5*8($4) 348 349.L_bn_sqr_words_return: 350 .set noreorder 351 jr $31 352 move $4,$2 353 354.end bn_sqr_words_internal 355 356.align 5 357.globl bn_add_words 358.ent bn_add_words 359bn_add_words: 360 .set noreorder 361 bgtz $7,bn_add_words_internal 362 move $2,$0 363 jr $31 364 move $4,$2 365.end bn_add_words 366 367.align 5 368.ent bn_add_words_internal 369bn_add_words_internal: 370 .set reorder 371 li $3,-4 372 and $1,$7,$3 373 beqz $1,.L_bn_add_words_tail 374 375.L_bn_add_words_loop: 376 ld $12,0($5) 377 ld $8,0($6) 378 subu $7,4 379 ld $13,8($5) 380 and $1,$7,$3 381 ld $14,2*8($5) 382 daddu $6,4*8 383 ld $15,3*8($5) 384 daddu $4,4*8 385 ld $9,-3*8($6) 386 daddu $5,4*8 387 ld $10,-2*8($6) 388 ld $11,-8($6) 389 daddu $8,$12 390 sltu $24,$8,$12 391 daddu $12,$8,$2 392 sltu $2,$12,$8 393 sd $12,-4*8($4) 394 daddu $2,$24 395 396 daddu $9,$13 397 sltu $25,$9,$13 398 daddu $13,$9,$2 399 sltu $2,$13,$9 400 sd $13,-3*8($4) 401 daddu $2,$25 402 403 daddu $10,$14 404 sltu $24,$10,$14 405 daddu $14,$10,$2 406 sltu $2,$14,$10 407 sd $14,-2*8($4) 408 daddu $2,$24 409 410 daddu $11,$15 411 sltu $25,$11,$15 412 daddu $15,$11,$2 413 sltu $2,$15,$11 414 sd $15,-8($4) 415 416 .set noreorder 417 bgtz $1,.L_bn_add_words_loop 418 daddu $2,$25 419 420 beqz $7,.L_bn_add_words_return 421 nop 422 423.L_bn_add_words_tail: 424 .set reorder 425 ld $12,0($5) 426 ld $8,0($6) 427 daddu $8,$12 428 subu $7,1 429 sltu $24,$8,$12 430 daddu $12,$8,$2 431 sltu $2,$12,$8 432 sd $12,0($4) 433 daddu $2,$24 434 beqz $7,.L_bn_add_words_return 435 436 ld $13,8($5) 437 ld $9,8($6) 438 daddu $9,$13 439 subu $7,1 440 sltu $25,$9,$13 441 daddu $13,$9,$2 442 sltu $2,$13,$9 443 sd $13,8($4) 444 daddu $2,$25 445 beqz $7,.L_bn_add_words_return 446 447 ld $14,2*8($5) 448 ld $10,2*8($6) 449 daddu $10,$14 450 sltu $24,$10,$14 451 daddu $14,$10,$2 452 sltu $2,$14,$10 453 sd $14,2*8($4) 454 daddu $2,$24 455 456.L_bn_add_words_return: 457 .set noreorder 458 jr $31 459 move $4,$2 460 461.end bn_add_words_internal 462 463.align 5 464.globl bn_sub_words 465.ent bn_sub_words 466bn_sub_words: 467 .set noreorder 468 bgtz $7,bn_sub_words_internal 469 move $2,$0 470 jr $31 471 move $4,$0 472.end bn_sub_words 473 474.align 5 475.ent bn_sub_words_internal 476bn_sub_words_internal: 477 .set reorder 478 li $3,-4 479 and $1,$7,$3 480 beqz $1,.L_bn_sub_words_tail 481 482.L_bn_sub_words_loop: 483 ld $12,0($5) 484 ld $8,0($6) 485 subu $7,4 486 ld $13,8($5) 487 and $1,$7,$3 488 ld $14,2*8($5) 489 daddu $6,4*8 490 ld $15,3*8($5) 491 daddu $4,4*8 492 ld $9,-3*8($6) 493 daddu $5,4*8 494 ld $10,-2*8($6) 495 ld $11,-8($6) 496 sltu $24,$12,$8 497 dsubu $8,$12,$8 498 dsubu $12,$8,$2 499 sgtu $2,$12,$8 500 sd $12,-4*8($4) 501 daddu $2,$24 502 503 sltu $25,$13,$9 504 dsubu $9,$13,$9 505 dsubu $13,$9,$2 506 sgtu $2,$13,$9 507 sd $13,-3*8($4) 508 daddu $2,$25 509 510 511 sltu $24,$14,$10 512 dsubu $10,$14,$10 513 dsubu $14,$10,$2 514 sgtu $2,$14,$10 515 sd $14,-2*8($4) 516 daddu $2,$24 517 518 sltu $25,$15,$11 519 dsubu $11,$15,$11 520 dsubu $15,$11,$2 521 sgtu $2,$15,$11 522 sd $15,-8($4) 523 524 .set noreorder 525 bgtz $1,.L_bn_sub_words_loop 526 daddu $2,$25 527 528 beqz $7,.L_bn_sub_words_return 529 nop 530 531.L_bn_sub_words_tail: 532 .set reorder 533 ld $12,0($5) 534 ld $8,0($6) 535 subu $7,1 536 sltu $24,$12,$8 537 dsubu $8,$12,$8 538 dsubu $12,$8,$2 539 sgtu $2,$12,$8 540 sd $12,0($4) 541 daddu $2,$24 542 beqz $7,.L_bn_sub_words_return 543 544 ld $13,8($5) 545 subu $7,1 546 ld $9,8($6) 547 sltu $25,$13,$9 548 dsubu $9,$13,$9 549 dsubu $13,$9,$2 550 sgtu $2,$13,$9 551 sd $13,8($4) 552 daddu $2,$25 553 beqz $7,.L_bn_sub_words_return 554 555 ld $14,2*8($5) 556 ld $10,2*8($6) 557 sltu $24,$14,$10 558 dsubu $10,$14,$10 559 dsubu $14,$10,$2 560 sgtu $2,$14,$10 561 sd $14,2*8($4) 562 daddu $2,$24 563 564.L_bn_sub_words_return: 565 .set noreorder 566 jr $31 567 move $4,$2 568.end bn_sub_words_internal 569 570#if 0 571/* 572 * The bn_div_3_words entry point is re-used for constant-time interface. 573 * Implementation is retained as historical reference. 574 */ 575.align 5 576.globl bn_div_3_words 577.ent bn_div_3_words 578bn_div_3_words: 579 .set noreorder 580 move $7,$4 # we know that bn_div_words does not 581 # touch $7, $10, $11 and preserves $6 582 # so that we can save two arguments 583 # and return address in registers 584 # instead of stack:-) 585 586 ld $4,($7) 587 move $10,$5 588 bne $4,$6,bn_div_3_words_internal 589 ld $5,-8($7) 590 li $2,-1 591 jr $31 592 move $4,$2 593.end bn_div_3_words 594 595.align 5 596.ent bn_div_3_words_internal 597bn_div_3_words_internal: 598 .set reorder 599 move $11,$31 600 bal bn_div_words_internal 601 move $31,$11 602 dmultu ($10,$2) 603 ld $14,-2*8($7) 604 move $8,$0 605 mfhi ($13,$10,$2) 606 mflo ($12,$10,$2) 607 sltu $24,$13,$5 608.L_bn_div_3_words_inner_loop: 609 bnez $24,.L_bn_div_3_words_inner_loop_done 610 sgeu $1,$14,$12 611 seq $25,$13,$5 612 and $1,$25 613 sltu $15,$12,$10 614 daddu $5,$6 615 dsubu $13,$15 616 dsubu $12,$10 617 sltu $24,$13,$5 618 sltu $8,$5,$6 619 or $24,$8 620 .set noreorder 621 beqz $1,.L_bn_div_3_words_inner_loop 622 dsubu $2,1 623 daddu $2,1 624 .set reorder 625.L_bn_div_3_words_inner_loop_done: 626 .set noreorder 627 jr $31 628 move $4,$2 629.end bn_div_3_words_internal 630#endif 631 632.align 5 633.globl bn_div_words 634.ent bn_div_words 635bn_div_words: 636 .set noreorder 637 bnez $6,bn_div_words_internal 638 li $2,-1 # I would rather signal div-by-zero 639 # which can be done with 'break 7' 640 jr $31 641 move $4,$2 642.end bn_div_words 643 644.align 5 645.ent bn_div_words_internal 646bn_div_words_internal: 647 move $3,$0 648 bltz $6,.L_bn_div_words_body 649 move $25,$3 650 dsll $6,1 651 bgtz $6,.-4 652 addu $25,1 653 654 .set reorder 655 negu $13,$25 656 li $14,-1 657 dsll $14,$13 658 and $14,$4 659 dsrl $1,$5,$13 660 .set noreorder 661 beqz $14,.+12 662 nop 663 break 6 # signal overflow 664 .set reorder 665 dsll $4,$25 666 dsll $5,$25 667 or $4,$1 668.L_bn_div_words_body: 669 dsrl $3,$6,4*8 # bits 670 sgeu $1,$4,$6 671 .set noreorder 672 beqz $1,.+12 673 nop 674 dsubu $4,$6 675 .set reorder 676 677 li $8,-1 678 dsrl $9,$4,4*8 # bits 679 dsrl $8,4*8 # q=0xffffffff 680 beq $3,$9,.L_bn_div_words_skip_div1 681 ddivu ($4,$3) 682 mfqt ($8,$4,$3) 683.L_bn_div_words_skip_div1: 684 dmultu ($6,$8) 685 dsll $15,$4,4*8 # bits 686 dsrl $1,$5,4*8 # bits 687 or $15,$1 688 mflo ($12,$6,$8) 689 mfhi ($13,$6,$8) 690.L_bn_div_words_inner_loop1: 691 sltu $14,$15,$12 692 seq $24,$9,$13 693 sltu $1,$9,$13 694 and $14,$24 695 sltu $2,$12,$6 696 or $1,$14 697 .set noreorder 698 beqz $1,.L_bn_div_words_inner_loop1_done 699 dsubu $13,$2 700 dsubu $12,$6 701 b .L_bn_div_words_inner_loop1 702 dsubu $8,1 703 .set reorder 704.L_bn_div_words_inner_loop1_done: 705 706 dsll $5,4*8 # bits 707 dsubu $4,$15,$12 708 dsll $2,$8,4*8 # bits 709 710 li $8,-1 711 dsrl $9,$4,4*8 # bits 712 dsrl $8,4*8 # q=0xffffffff 713 beq $3,$9,.L_bn_div_words_skip_div2 714 ddivu ($4,$3) 715 mfqt ($8,$4,$3) 716.L_bn_div_words_skip_div2: 717 dmultu ($6,$8) 718 dsll $15,$4,4*8 # bits 719 dsrl $1,$5,4*8 # bits 720 or $15,$1 721 mflo ($12,$6,$8) 722 mfhi ($13,$6,$8) 723.L_bn_div_words_inner_loop2: 724 sltu $14,$15,$12 725 seq $24,$9,$13 726 sltu $1,$9,$13 727 and $14,$24 728 sltu $3,$12,$6 729 or $1,$14 730 .set noreorder 731 beqz $1,.L_bn_div_words_inner_loop2_done 732 dsubu $13,$3 733 dsubu $12,$6 734 b .L_bn_div_words_inner_loop2 735 dsubu $8,1 736 .set reorder 737.L_bn_div_words_inner_loop2_done: 738 739 dsubu $4,$15,$12 740 or $2,$8 741 dsrl $3,$4,$25 # $3 contains remainder if anybody wants it 742 dsrl $6,$25 # restore $6 743 744 .set noreorder 745 move $5,$3 746 jr $31 747 move $4,$2 748.end bn_div_words_internal 749 750.align 5 751.globl bn_mul_comba8 752.ent bn_mul_comba8 753bn_mul_comba8: 754 .set noreorder 755 .frame $29,6*8,$31 756 .mask 0x003f0000,-8 757 dsubu $29,6*8 758 sd $21,5*8($29) 759 sd $20,4*8($29) 760 sd $19,3*8($29) 761 sd $18,2*8($29) 762 sd $17,1*8($29) 763 sd $16,0*8($29) 764 765 .set reorder 766 ld $12,0($5) # If compiled with -mips3 option on 767 # R5000 box assembler barks on this 768 # 1ine with "should not have mult/div 769 # as last instruction in bb (R10K 770 # bug)" warning. If anybody out there 771 # has a clue about how to circumvent 772 # this do send me a note. 773 # <appro@fy.chalmers.se> 774 775 ld $8,0($6) 776 ld $13,8($5) 777 ld $14,2*8($5) 778 dmultu ($12,$8) # mul_add_c(a[0],b[0],c1,c2,c3); 779 ld $15,3*8($5) 780 ld $9,8($6) 781 ld $10,2*8($6) 782 ld $11,3*8($6) 783 mflo ($2,$12,$8) 784 mfhi ($3,$12,$8) 785 786 ld $16,4*8($5) 787 ld $18,5*8($5) 788 dmultu ($12,$9) # mul_add_c(a[0],b[1],c2,c3,c1); 789 ld $20,6*8($5) 790 ld $5,7*8($5) 791 ld $17,4*8($6) 792 ld $19,5*8($6) 793 mflo ($24,$12,$9) 794 mfhi ($25,$12,$9) 795 daddu $3,$24 796 sltu $1,$3,$24 797 dmultu ($13,$8) # mul_add_c(a[1],b[0],c2,c3,c1); 798 daddu $7,$25,$1 799 ld $21,6*8($6) 800 ld $6,7*8($6) 801 sd $2,0($4) # r[0]=c1; 802 mflo ($24,$13,$8) 803 mfhi ($25,$13,$8) 804 daddu $3,$24 805 sltu $1,$3,$24 806 dmultu ($14,$8) # mul_add_c(a[2],b[0],c3,c1,c2); 807 daddu $25,$1 808 daddu $7,$25 809 sltu $2,$7,$25 810 sd $3,8($4) # r[1]=c2; 811 812 mflo ($24,$14,$8) 813 mfhi ($25,$14,$8) 814 daddu $7,$24 815 sltu $1,$7,$24 816 dmultu ($13,$9) # mul_add_c(a[1],b[1],c3,c1,c2); 817 daddu $25,$1 818 daddu $2,$25 819 mflo ($24,$13,$9) 820 mfhi ($25,$13,$9) 821 daddu $7,$24 822 sltu $1,$7,$24 823 dmultu ($12,$10) # mul_add_c(a[0],b[2],c3,c1,c2); 824 daddu $25,$1 825 daddu $2,$25 826 sltu $3,$2,$25 827 mflo ($24,$12,$10) 828 mfhi ($25,$12,$10) 829 daddu $7,$24 830 sltu $1,$7,$24 831 dmultu ($12,$11) # mul_add_c(a[0],b[3],c1,c2,c3); 832 daddu $25,$1 833 daddu $2,$25 834 sltu $1,$2,$25 835 daddu $3,$1 836 sd $7,2*8($4) # r[2]=c3; 837 838 mflo ($24,$12,$11) 839 mfhi ($25,$12,$11) 840 daddu $2,$24 841 sltu $1,$2,$24 842 dmultu ($13,$10) # mul_add_c(a[1],b[2],c1,c2,c3); 843 daddu $25,$1 844 daddu $3,$25 845 sltu $7,$3,$25 846 mflo ($24,$13,$10) 847 mfhi ($25,$13,$10) 848 daddu $2,$24 849 sltu $1,$2,$24 850 dmultu ($14,$9) # mul_add_c(a[2],b[1],c1,c2,c3); 851 daddu $25,$1 852 daddu $3,$25 853 sltu $1,$3,$25 854 daddu $7,$1 855 mflo ($24,$14,$9) 856 mfhi ($25,$14,$9) 857 daddu $2,$24 858 sltu $1,$2,$24 859 dmultu ($15,$8) # mul_add_c(a[3],b[0],c1,c2,c3); 860 daddu $25,$1 861 daddu $3,$25 862 sltu $1,$3,$25 863 daddu $7,$1 864 mflo ($24,$15,$8) 865 mfhi ($25,$15,$8) 866 daddu $2,$24 867 sltu $1,$2,$24 868 dmultu ($16,$8) # mul_add_c(a[4],b[0],c2,c3,c1); 869 daddu $25,$1 870 daddu $3,$25 871 sltu $1,$3,$25 872 daddu $7,$1 873 sd $2,3*8($4) # r[3]=c1; 874 875 mflo ($24,$16,$8) 876 mfhi ($25,$16,$8) 877 daddu $3,$24 878 sltu $1,$3,$24 879 dmultu ($15,$9) # mul_add_c(a[3],b[1],c2,c3,c1); 880 daddu $25,$1 881 daddu $7,$25 882 sltu $2,$7,$25 883 mflo ($24,$15,$9) 884 mfhi ($25,$15,$9) 885 daddu $3,$24 886 sltu $1,$3,$24 887 dmultu ($14,$10) # mul_add_c(a[2],b[2],c2,c3,c1); 888 daddu $25,$1 889 daddu $7,$25 890 sltu $1,$7,$25 891 daddu $2,$1 892 mflo ($24,$14,$10) 893 mfhi ($25,$14,$10) 894 daddu $3,$24 895 sltu $1,$3,$24 896 dmultu ($13,$11) # mul_add_c(a[1],b[3],c2,c3,c1); 897 daddu $25,$1 898 daddu $7,$25 899 sltu $1,$7,$25 900 daddu $2,$1 901 mflo ($24,$13,$11) 902 mfhi ($25,$13,$11) 903 daddu $3,$24 904 sltu $1,$3,$24 905 dmultu ($12,$17) # mul_add_c(a[0],b[4],c2,c3,c1); 906 daddu $25,$1 907 daddu $7,$25 908 sltu $1,$7,$25 909 daddu $2,$1 910 mflo ($24,$12,$17) 911 mfhi ($25,$12,$17) 912 daddu $3,$24 913 sltu $1,$3,$24 914 dmultu ($12,$19) # mul_add_c(a[0],b[5],c3,c1,c2); 915 daddu $25,$1 916 daddu $7,$25 917 sltu $1,$7,$25 918 daddu $2,$1 919 sd $3,4*8($4) # r[4]=c2; 920 921 mflo ($24,$12,$19) 922 mfhi ($25,$12,$19) 923 daddu $7,$24 924 sltu $1,$7,$24 925 dmultu ($13,$17) # mul_add_c(a[1],b[4],c3,c1,c2); 926 daddu $25,$1 927 daddu $2,$25 928 sltu $3,$2,$25 929 mflo ($24,$13,$17) 930 mfhi ($25,$13,$17) 931 daddu $7,$24 932 sltu $1,$7,$24 933 dmultu ($14,$11) # mul_add_c(a[2],b[3],c3,c1,c2); 934 daddu $25,$1 935 daddu $2,$25 936 sltu $1,$2,$25 937 daddu $3,$1 938 mflo ($24,$14,$11) 939 mfhi ($25,$14,$11) 940 daddu $7,$24 941 sltu $1,$7,$24 942 dmultu ($15,$10) # mul_add_c(a[3],b[2],c3,c1,c2); 943 daddu $25,$1 944 daddu $2,$25 945 sltu $1,$2,$25 946 daddu $3,$1 947 mflo ($24,$15,$10) 948 mfhi ($25,$15,$10) 949 daddu $7,$24 950 sltu $1,$7,$24 951 dmultu ($16,$9) # mul_add_c(a[4],b[1],c3,c1,c2); 952 daddu $25,$1 953 daddu $2,$25 954 sltu $1,$2,$25 955 daddu $3,$1 956 mflo ($24,$16,$9) 957 mfhi ($25,$16,$9) 958 daddu $7,$24 959 sltu $1,$7,$24 960 dmultu ($18,$8) # mul_add_c(a[5],b[0],c3,c1,c2); 961 daddu $25,$1 962 daddu $2,$25 963 sltu $1,$2,$25 964 daddu $3,$1 965 mflo ($24,$18,$8) 966 mfhi ($25,$18,$8) 967 daddu $7,$24 968 sltu $1,$7,$24 969 dmultu ($20,$8) # mul_add_c(a[6],b[0],c1,c2,c3); 970 daddu $25,$1 971 daddu $2,$25 972 sltu $1,$2,$25 973 daddu $3,$1 974 sd $7,5*8($4) # r[5]=c3; 975 976 mflo ($24,$20,$8) 977 mfhi ($25,$20,$8) 978 daddu $2,$24 979 sltu $1,$2,$24 980 dmultu ($18,$9) # mul_add_c(a[5],b[1],c1,c2,c3); 981 daddu $25,$1 982 daddu $3,$25 983 sltu $7,$3,$25 984 mflo ($24,$18,$9) 985 mfhi ($25,$18,$9) 986 daddu $2,$24 987 sltu $1,$2,$24 988 dmultu ($16,$10) # mul_add_c(a[4],b[2],c1,c2,c3); 989 daddu $25,$1 990 daddu $3,$25 991 sltu $1,$3,$25 992 daddu $7,$1 993 mflo ($24,$16,$10) 994 mfhi ($25,$16,$10) 995 daddu $2,$24 996 sltu $1,$2,$24 997 dmultu ($15,$11) # mul_add_c(a[3],b[3],c1,c2,c3); 998 daddu $25,$1 999 daddu $3,$25 1000 sltu $1,$3,$25 1001 daddu $7,$1 1002 mflo ($24,$15,$11) 1003 mfhi ($25,$15,$11) 1004 daddu $2,$24 1005 sltu $1,$2,$24 1006 dmultu ($14,$17) # mul_add_c(a[2],b[4],c1,c2,c3); 1007 daddu $25,$1 1008 daddu $3,$25 1009 sltu $1,$3,$25 1010 daddu $7,$1 1011 mflo ($24,$14,$17) 1012 mfhi ($25,$14,$17) 1013 daddu $2,$24 1014 sltu $1,$2,$24 1015 dmultu ($13,$19) # mul_add_c(a[1],b[5],c1,c2,c3); 1016 daddu $25,$1 1017 daddu $3,$25 1018 sltu $1,$3,$25 1019 daddu $7,$1 1020 mflo ($24,$13,$19) 1021 mfhi ($25,$13,$19) 1022 daddu $2,$24 1023 sltu $1,$2,$24 1024 dmultu ($12,$21) # mul_add_c(a[0],b[6],c1,c2,c3); 1025 daddu $25,$1 1026 daddu $3,$25 1027 sltu $1,$3,$25 1028 daddu $7,$1 1029 mflo ($24,$12,$21) 1030 mfhi ($25,$12,$21) 1031 daddu $2,$24 1032 sltu $1,$2,$24 1033 dmultu ($12,$6) # mul_add_c(a[0],b[7],c2,c3,c1); 1034 daddu $25,$1 1035 daddu $3,$25 1036 sltu $1,$3,$25 1037 daddu $7,$1 1038 sd $2,6*8($4) # r[6]=c1; 1039 1040 mflo ($24,$12,$6) 1041 mfhi ($25,$12,$6) 1042 daddu $3,$24 1043 sltu $1,$3,$24 1044 dmultu ($13,$21) # mul_add_c(a[1],b[6],c2,c3,c1); 1045 daddu $25,$1 1046 daddu $7,$25 1047 sltu $2,$7,$25 1048 mflo ($24,$13,$21) 1049 mfhi ($25,$13,$21) 1050 daddu $3,$24 1051 sltu $1,$3,$24 1052 dmultu ($14,$19) # mul_add_c(a[2],b[5],c2,c3,c1); 1053 daddu $25,$1 1054 daddu $7,$25 1055 sltu $1,$7,$25 1056 daddu $2,$1 1057 mflo ($24,$14,$19) 1058 mfhi ($25,$14,$19) 1059 daddu $3,$24 1060 sltu $1,$3,$24 1061 dmultu ($15,$17) # mul_add_c(a[3],b[4],c2,c3,c1); 1062 daddu $25,$1 1063 daddu $7,$25 1064 sltu $1,$7,$25 1065 daddu $2,$1 1066 mflo ($24,$15,$17) 1067 mfhi ($25,$15,$17) 1068 daddu $3,$24 1069 sltu $1,$3,$24 1070 dmultu ($16,$11) # mul_add_c(a[4],b[3],c2,c3,c1); 1071 daddu $25,$1 1072 daddu $7,$25 1073 sltu $1,$7,$25 1074 daddu $2,$1 1075 mflo ($24,$16,$11) 1076 mfhi ($25,$16,$11) 1077 daddu $3,$24 1078 sltu $1,$3,$24 1079 dmultu ($18,$10) # mul_add_c(a[5],b[2],c2,c3,c1); 1080 daddu $25,$1 1081 daddu $7,$25 1082 sltu $1,$7,$25 1083 daddu $2,$1 1084 mflo ($24,$18,$10) 1085 mfhi ($25,$18,$10) 1086 daddu $3,$24 1087 sltu $1,$3,$24 1088 dmultu ($20,$9) # mul_add_c(a[6],b[1],c2,c3,c1); 1089 daddu $25,$1 1090 daddu $7,$25 1091 sltu $1,$7,$25 1092 daddu $2,$1 1093 mflo ($24,$20,$9) 1094 mfhi ($25,$20,$9) 1095 daddu $3,$24 1096 sltu $1,$3,$24 1097 dmultu ($5,$8) # mul_add_c(a[7],b[0],c2,c3,c1); 1098 daddu $25,$1 1099 daddu $7,$25 1100 sltu $1,$7,$25 1101 daddu $2,$1 1102 mflo ($24,$5,$8) 1103 mfhi ($25,$5,$8) 1104 daddu $3,$24 1105 sltu $1,$3,$24 1106 dmultu ($5,$9) # mul_add_c(a[7],b[1],c3,c1,c2); 1107 daddu $25,$1 1108 daddu $7,$25 1109 sltu $1,$7,$25 1110 daddu $2,$1 1111 sd $3,7*8($4) # r[7]=c2; 1112 1113 mflo ($24,$5,$9) 1114 mfhi ($25,$5,$9) 1115 daddu $7,$24 1116 sltu $1,$7,$24 1117 dmultu ($20,$10) # mul_add_c(a[6],b[2],c3,c1,c2); 1118 daddu $25,$1 1119 daddu $2,$25 1120 sltu $3,$2,$25 1121 mflo ($24,$20,$10) 1122 mfhi ($25,$20,$10) 1123 daddu $7,$24 1124 sltu $1,$7,$24 1125 dmultu ($18,$11) # mul_add_c(a[5],b[3],c3,c1,c2); 1126 daddu $25,$1 1127 daddu $2,$25 1128 sltu $1,$2,$25 1129 daddu $3,$1 1130 mflo ($24,$18,$11) 1131 mfhi ($25,$18,$11) 1132 daddu $7,$24 1133 sltu $1,$7,$24 1134 dmultu ($16,$17) # mul_add_c(a[4],b[4],c3,c1,c2); 1135 daddu $25,$1 1136 daddu $2,$25 1137 sltu $1,$2,$25 1138 daddu $3,$1 1139 mflo ($24,$16,$17) 1140 mfhi ($25,$16,$17) 1141 daddu $7,$24 1142 sltu $1,$7,$24 1143 dmultu ($15,$19) # mul_add_c(a[3],b[5],c3,c1,c2); 1144 daddu $25,$1 1145 daddu $2,$25 1146 sltu $1,$2,$25 1147 daddu $3,$1 1148 mflo ($24,$15,$19) 1149 mfhi ($25,$15,$19) 1150 daddu $7,$24 1151 sltu $1,$7,$24 1152 dmultu ($14,$21) # mul_add_c(a[2],b[6],c3,c1,c2); 1153 daddu $25,$1 1154 daddu $2,$25 1155 sltu $1,$2,$25 1156 daddu $3,$1 1157 mflo ($24,$14,$21) 1158 mfhi ($25,$14,$21) 1159 daddu $7,$24 1160 sltu $1,$7,$24 1161 dmultu ($13,$6) # mul_add_c(a[1],b[7],c3,c1,c2); 1162 daddu $25,$1 1163 daddu $2,$25 1164 sltu $1,$2,$25 1165 daddu $3,$1 1166 mflo ($24,$13,$6) 1167 mfhi ($25,$13,$6) 1168 daddu $7,$24 1169 sltu $1,$7,$24 1170 dmultu ($14,$6) # mul_add_c(a[2],b[7],c1,c2,c3); 1171 daddu $25,$1 1172 daddu $2,$25 1173 sltu $1,$2,$25 1174 daddu $3,$1 1175 sd $7,8*8($4) # r[8]=c3; 1176 1177 mflo ($24,$14,$6) 1178 mfhi ($25,$14,$6) 1179 daddu $2,$24 1180 sltu $1,$2,$24 1181 dmultu ($15,$21) # mul_add_c(a[3],b[6],c1,c2,c3); 1182 daddu $25,$1 1183 daddu $3,$25 1184 sltu $7,$3,$25 1185 mflo ($24,$15,$21) 1186 mfhi ($25,$15,$21) 1187 daddu $2,$24 1188 sltu $1,$2,$24 1189 dmultu ($16,$19) # mul_add_c(a[4],b[5],c1,c2,c3); 1190 daddu $25,$1 1191 daddu $3,$25 1192 sltu $1,$3,$25 1193 daddu $7,$1 1194 mflo ($24,$16,$19) 1195 mfhi ($25,$16,$19) 1196 daddu $2,$24 1197 sltu $1,$2,$24 1198 dmultu ($18,$17) # mul_add_c(a[5],b[4],c1,c2,c3); 1199 daddu $25,$1 1200 daddu $3,$25 1201 sltu $1,$3,$25 1202 daddu $7,$1 1203 mflo ($24,$18,$17) 1204 mfhi ($25,$18,$17) 1205 daddu $2,$24 1206 sltu $1,$2,$24 1207 dmultu ($20,$11) # mul_add_c(a[6],b[3],c1,c2,c3); 1208 daddu $25,$1 1209 daddu $3,$25 1210 sltu $1,$3,$25 1211 daddu $7,$1 1212 mflo ($24,$20,$11) 1213 mfhi ($25,$20,$11) 1214 daddu $2,$24 1215 sltu $1,$2,$24 1216 dmultu ($5,$10) # mul_add_c(a[7],b[2],c1,c2,c3); 1217 daddu $25,$1 1218 daddu $3,$25 1219 sltu $1,$3,$25 1220 daddu $7,$1 1221 mflo ($24,$5,$10) 1222 mfhi ($25,$5,$10) 1223 daddu $2,$24 1224 sltu $1,$2,$24 1225 dmultu ($5,$11) # mul_add_c(a[7],b[3],c2,c3,c1); 1226 daddu $25,$1 1227 daddu $3,$25 1228 sltu $1,$3,$25 1229 daddu $7,$1 1230 sd $2,9*8($4) # r[9]=c1; 1231 1232 mflo ($24,$5,$11) 1233 mfhi ($25,$5,$11) 1234 daddu $3,$24 1235 sltu $1,$3,$24 1236 dmultu ($20,$17) # mul_add_c(a[6],b[4],c2,c3,c1); 1237 daddu $25,$1 1238 daddu $7,$25 1239 sltu $2,$7,$25 1240 mflo ($24,$20,$17) 1241 mfhi ($25,$20,$17) 1242 daddu $3,$24 1243 sltu $1,$3,$24 1244 dmultu ($18,$19) # mul_add_c(a[5],b[5],c2,c3,c1); 1245 daddu $25,$1 1246 daddu $7,$25 1247 sltu $1,$7,$25 1248 daddu $2,$1 1249 mflo ($24,$18,$19) 1250 mfhi ($25,$18,$19) 1251 daddu $3,$24 1252 sltu $1,$3,$24 1253 dmultu ($16,$21) # mul_add_c(a[4],b[6],c2,c3,c1); 1254 daddu $25,$1 1255 daddu $7,$25 1256 sltu $1,$7,$25 1257 daddu $2,$1 1258 mflo ($24,$16,$21) 1259 mfhi ($25,$16,$21) 1260 daddu $3,$24 1261 sltu $1,$3,$24 1262 dmultu ($15,$6) # mul_add_c(a[3],b[7],c2,c3,c1); 1263 daddu $25,$1 1264 daddu $7,$25 1265 sltu $1,$7,$25 1266 daddu $2,$1 1267 mflo ($24,$15,$6) 1268 mfhi ($25,$15,$6) 1269 daddu $3,$24 1270 sltu $1,$3,$24 1271 dmultu ($16,$6) # mul_add_c(a[4],b[7],c3,c1,c2); 1272 daddu $25,$1 1273 daddu $7,$25 1274 sltu $1,$7,$25 1275 daddu $2,$1 1276 sd $3,10*8($4) # r[10]=c2; 1277 1278 mflo ($24,$16,$6) 1279 mfhi ($25,$16,$6) 1280 daddu $7,$24 1281 sltu $1,$7,$24 1282 dmultu ($18,$21) # mul_add_c(a[5],b[6],c3,c1,c2); 1283 daddu $25,$1 1284 daddu $2,$25 1285 sltu $3,$2,$25 1286 mflo ($24,$18,$21) 1287 mfhi ($25,$18,$21) 1288 daddu $7,$24 1289 sltu $1,$7,$24 1290 dmultu ($20,$19) # mul_add_c(a[6],b[5],c3,c1,c2); 1291 daddu $25,$1 1292 daddu $2,$25 1293 sltu $1,$2,$25 1294 daddu $3,$1 1295 mflo ($24,$20,$19) 1296 mfhi ($25,$20,$19) 1297 daddu $7,$24 1298 sltu $1,$7,$24 1299 dmultu ($5,$17) # mul_add_c(a[7],b[4],c3,c1,c2); 1300 daddu $25,$1 1301 daddu $2,$25 1302 sltu $1,$2,$25 1303 daddu $3,$1 1304 mflo ($24,$5,$17) 1305 mfhi ($25,$5,$17) 1306 daddu $7,$24 1307 sltu $1,$7,$24 1308 dmultu ($5,$19) # mul_add_c(a[7],b[5],c1,c2,c3); 1309 daddu $25,$1 1310 daddu $2,$25 1311 sltu $1,$2,$25 1312 daddu $3,$1 1313 sd $7,11*8($4) # r[11]=c3; 1314 1315 mflo ($24,$5,$19) 1316 mfhi ($25,$5,$19) 1317 daddu $2,$24 1318 sltu $1,$2,$24 1319 dmultu ($20,$21) # mul_add_c(a[6],b[6],c1,c2,c3); 1320 daddu $25,$1 1321 daddu $3,$25 1322 sltu $7,$3,$25 1323 mflo ($24,$20,$21) 1324 mfhi ($25,$20,$21) 1325 daddu $2,$24 1326 sltu $1,$2,$24 1327 dmultu ($18,$6) # mul_add_c(a[5],b[7],c1,c2,c3); 1328 daddu $25,$1 1329 daddu $3,$25 1330 sltu $1,$3,$25 1331 daddu $7,$1 1332 mflo ($24,$18,$6) 1333 mfhi ($25,$18,$6) 1334 daddu $2,$24 1335 sltu $1,$2,$24 1336 dmultu ($20,$6) # mul_add_c(a[6],b[7],c2,c3,c1); 1337 daddu $25,$1 1338 daddu $3,$25 1339 sltu $1,$3,$25 1340 daddu $7,$1 1341 sd $2,12*8($4) # r[12]=c1; 1342 1343 mflo ($24,$20,$6) 1344 mfhi ($25,$20,$6) 1345 daddu $3,$24 1346 sltu $1,$3,$24 1347 dmultu ($5,$21) # mul_add_c(a[7],b[6],c2,c3,c1); 1348 daddu $25,$1 1349 daddu $7,$25 1350 sltu $2,$7,$25 1351 mflo ($24,$5,$21) 1352 mfhi ($25,$5,$21) 1353 daddu $3,$24 1354 sltu $1,$3,$24 1355 dmultu ($5,$6) # mul_add_c(a[7],b[7],c3,c1,c2); 1356 daddu $25,$1 1357 daddu $7,$25 1358 sltu $1,$7,$25 1359 daddu $2,$1 1360 sd $3,13*8($4) # r[13]=c2; 1361 1362 mflo ($24,$5,$6) 1363 mfhi ($25,$5,$6) 1364 daddu $7,$24 1365 sltu $1,$7,$24 1366 daddu $25,$1 1367 daddu $2,$25 1368 sd $7,14*8($4) # r[14]=c3; 1369 sd $2,15*8($4) # r[15]=c1; 1370 1371 .set noreorder 1372 ld $21,5*8($29) 1373 ld $20,4*8($29) 1374 ld $19,3*8($29) 1375 ld $18,2*8($29) 1376 ld $17,1*8($29) 1377 ld $16,0*8($29) 1378 jr $31 1379 daddu $29,6*8 1380.end bn_mul_comba8 1381 1382.align 5 1383.globl bn_mul_comba4 1384.ent bn_mul_comba4 1385bn_mul_comba4: 1386 .set reorder 1387 ld $12,0($5) 1388 ld $8,0($6) 1389 ld $13,8($5) 1390 ld $14,2*8($5) 1391 dmultu ($12,$8) # mul_add_c(a[0],b[0],c1,c2,c3); 1392 ld $15,3*8($5) 1393 ld $9,8($6) 1394 ld $10,2*8($6) 1395 ld $11,3*8($6) 1396 mflo ($2,$12,$8) 1397 mfhi ($3,$12,$8) 1398 sd $2,0($4) 1399 1400 dmultu ($12,$9) # mul_add_c(a[0],b[1],c2,c3,c1); 1401 mflo ($24,$12,$9) 1402 mfhi ($25,$12,$9) 1403 daddu $3,$24 1404 sltu $1,$3,$24 1405 dmultu ($13,$8) # mul_add_c(a[1],b[0],c2,c3,c1); 1406 daddu $7,$25,$1 1407 mflo ($24,$13,$8) 1408 mfhi ($25,$13,$8) 1409 daddu $3,$24 1410 sltu $1,$3,$24 1411 dmultu ($14,$8) # mul_add_c(a[2],b[0],c3,c1,c2); 1412 daddu $25,$1 1413 daddu $7,$25 1414 sltu $2,$7,$25 1415 sd $3,8($4) 1416 1417 mflo ($24,$14,$8) 1418 mfhi ($25,$14,$8) 1419 daddu $7,$24 1420 sltu $1,$7,$24 1421 dmultu ($13,$9) # mul_add_c(a[1],b[1],c3,c1,c2); 1422 daddu $25,$1 1423 daddu $2,$25 1424 mflo ($24,$13,$9) 1425 mfhi ($25,$13,$9) 1426 daddu $7,$24 1427 sltu $1,$7,$24 1428 dmultu ($12,$10) # mul_add_c(a[0],b[2],c3,c1,c2); 1429 daddu $25,$1 1430 daddu $2,$25 1431 sltu $3,$2,$25 1432 mflo ($24,$12,$10) 1433 mfhi ($25,$12,$10) 1434 daddu $7,$24 1435 sltu $1,$7,$24 1436 dmultu ($12,$11) # mul_add_c(a[0],b[3],c1,c2,c3); 1437 daddu $25,$1 1438 daddu $2,$25 1439 sltu $1,$2,$25 1440 daddu $3,$1 1441 sd $7,2*8($4) 1442 1443 mflo ($24,$12,$11) 1444 mfhi ($25,$12,$11) 1445 daddu $2,$24 1446 sltu $1,$2,$24 1447 dmultu ($13,$10) # mul_add_c(a[1],b[2],c1,c2,c3); 1448 daddu $25,$1 1449 daddu $3,$25 1450 sltu $7,$3,$25 1451 mflo ($24,$13,$10) 1452 mfhi ($25,$13,$10) 1453 daddu $2,$24 1454 sltu $1,$2,$24 1455 dmultu ($14,$9) # mul_add_c(a[2],b[1],c1,c2,c3); 1456 daddu $25,$1 1457 daddu $3,$25 1458 sltu $1,$3,$25 1459 daddu $7,$1 1460 mflo ($24,$14,$9) 1461 mfhi ($25,$14,$9) 1462 daddu $2,$24 1463 sltu $1,$2,$24 1464 dmultu ($15,$8) # mul_add_c(a[3],b[0],c1,c2,c3); 1465 daddu $25,$1 1466 daddu $3,$25 1467 sltu $1,$3,$25 1468 daddu $7,$1 1469 mflo ($24,$15,$8) 1470 mfhi ($25,$15,$8) 1471 daddu $2,$24 1472 sltu $1,$2,$24 1473 dmultu ($15,$9) # mul_add_c(a[3],b[1],c2,c3,c1); 1474 daddu $25,$1 1475 daddu $3,$25 1476 sltu $1,$3,$25 1477 daddu $7,$1 1478 sd $2,3*8($4) 1479 1480 mflo ($24,$15,$9) 1481 mfhi ($25,$15,$9) 1482 daddu $3,$24 1483 sltu $1,$3,$24 1484 dmultu ($14,$10) # mul_add_c(a[2],b[2],c2,c3,c1); 1485 daddu $25,$1 1486 daddu $7,$25 1487 sltu $2,$7,$25 1488 mflo ($24,$14,$10) 1489 mfhi ($25,$14,$10) 1490 daddu $3,$24 1491 sltu $1,$3,$24 1492 dmultu ($13,$11) # mul_add_c(a[1],b[3],c2,c3,c1); 1493 daddu $25,$1 1494 daddu $7,$25 1495 sltu $1,$7,$25 1496 daddu $2,$1 1497 mflo ($24,$13,$11) 1498 mfhi ($25,$13,$11) 1499 daddu $3,$24 1500 sltu $1,$3,$24 1501 dmultu ($14,$11) # mul_add_c(a[2],b[3],c3,c1,c2); 1502 daddu $25,$1 1503 daddu $7,$25 1504 sltu $1,$7,$25 1505 daddu $2,$1 1506 sd $3,4*8($4) 1507 1508 mflo ($24,$14,$11) 1509 mfhi ($25,$14,$11) 1510 daddu $7,$24 1511 sltu $1,$7,$24 1512 dmultu ($15,$10) # mul_add_c(a[3],b[2],c3,c1,c2); 1513 daddu $25,$1 1514 daddu $2,$25 1515 sltu $3,$2,$25 1516 mflo ($24,$15,$10) 1517 mfhi ($25,$15,$10) 1518 daddu $7,$24 1519 sltu $1,$7,$24 1520 dmultu ($15,$11) # mul_add_c(a[3],b[3],c1,c2,c3); 1521 daddu $25,$1 1522 daddu $2,$25 1523 sltu $1,$2,$25 1524 daddu $3,$1 1525 sd $7,5*8($4) 1526 1527 mflo ($24,$15,$11) 1528 mfhi ($25,$15,$11) 1529 daddu $2,$24 1530 sltu $1,$2,$24 1531 daddu $25,$1 1532 daddu $3,$25 1533 sd $2,6*8($4) 1534 sd $3,7*8($4) 1535 1536 .set noreorder 1537 jr $31 1538 nop 1539.end bn_mul_comba4 1540 1541.align 5 1542.globl bn_sqr_comba8 1543.ent bn_sqr_comba8 1544bn_sqr_comba8: 1545 .set reorder 1546 ld $12,0($5) 1547 ld $13,8($5) 1548 ld $14,2*8($5) 1549 ld $15,3*8($5) 1550 1551 dmultu ($12,$12) # mul_add_c(a[0],b[0],c1,c2,c3); 1552 ld $8,4*8($5) 1553 ld $9,5*8($5) 1554 ld $10,6*8($5) 1555 ld $11,7*8($5) 1556 mflo ($2,$12,$12) 1557 mfhi ($3,$12,$12) 1558 sd $2,0($4) 1559 1560 dmultu ($12,$13) # mul_add_c2(a[0],b[1],c2,c3,c1); 1561 mflo ($24,$12,$13) 1562 mfhi ($25,$12,$13) 1563 slt $2,$25,$0 1564 dsll $25,1 1565 dmultu ($14,$12) # mul_add_c2(a[2],b[0],c3,c1,c2); 1566 slt $6,$24,$0 1567 daddu $25,$6 1568 dsll $24,1 1569 daddu $3,$24 1570 sltu $1,$3,$24 1571 daddu $7,$25,$1 1572 sd $3,8($4) 1573 mflo ($24,$14,$12) 1574 mfhi ($25,$14,$12) 1575 daddu $7,$24 1576 sltu $1,$7,$24 1577 dmultu ($13,$13) # forward multiplication 1578 daddu $7,$24 1579 daddu $1,$25 1580 sltu $24,$7,$24 1581 daddu $2,$1 1582 daddu $25,$24 1583 sltu $3,$2,$1 1584 daddu $2,$25 1585 sltu $25,$2,$25 1586 daddu $3,$25 1587 mflo ($24,$13,$13) 1588 mfhi ($25,$13,$13) 1589 daddu $7,$24 1590 sltu $1,$7,$24 1591 dmultu ($12,$15) # mul_add_c2(a[0],b[3],c1,c2,c3); 1592 daddu $25,$1 1593 daddu $2,$25 1594 sltu $1,$2,$25 1595 daddu $3,$1 1596 sd $7,2*8($4) 1597 mflo ($24,$12,$15) 1598 mfhi ($25,$12,$15) 1599 daddu $2,$24 1600 sltu $1,$2,$24 1601 dmultu ($13,$14) # forward multiplication 1602 daddu $2,$24 1603 daddu $1,$25 1604 sltu $24,$2,$24 1605 daddu $3,$1 1606 daddu $25,$24 1607 sltu $7,$3,$1 1608 daddu $3,$25 1609 sltu $25,$3,$25 1610 daddu $7,$25 1611 mflo ($24,$13,$14) 1612 mfhi ($25,$13,$14) 1613 daddu $2,$24 1614 sltu $1,$2,$24 1615 dmultu ($8,$12) # forward multiplication 1616 daddu $2,$24 1617 daddu $1,$25 1618 sltu $24,$2,$24 1619 daddu $3,$1 1620 daddu $25,$24 1621 sltu $1,$3,$1 1622 daddu $3,$25 1623 daddu $7,$1 1624 sltu $25,$3,$25 1625 daddu $7,$25 1626 mflo ($24,$8,$12) 1627 mfhi ($25,$8,$12) 1628 sd $2,3*8($4) 1629 daddu $3,$24 1630 sltu $1,$3,$24 1631 dmultu ($15,$13) # forward multiplication 1632 daddu $3,$24 1633 daddu $1,$25 1634 sltu $24,$3,$24 1635 daddu $7,$1 1636 daddu $25,$24 1637 sltu $2,$7,$1 1638 daddu $7,$25 1639 sltu $25,$7,$25 1640 daddu $2,$25 1641 mflo ($24,$15,$13) 1642 mfhi ($25,$15,$13) 1643 daddu $3,$24 1644 sltu $1,$3,$24 1645 dmultu ($14,$14) # forward multiplication 1646 daddu $3,$24 1647 daddu $1,$25 1648 sltu $24,$3,$24 1649 daddu $7,$1 1650 daddu $25,$24 1651 sltu $1,$7,$1 1652 daddu $7,$25 1653 daddu $2,$1 1654 sltu $25,$7,$25 1655 daddu $2,$25 1656 mflo ($24,$14,$14) 1657 mfhi ($25,$14,$14) 1658 daddu $3,$24 1659 sltu $1,$3,$24 1660 dmultu ($12,$9) # mul_add_c2(a[0],b[5],c3,c1,c2); 1661 daddu $25,$1 1662 daddu $7,$25 1663 sltu $1,$7,$25 1664 daddu $2,$1 1665 sd $3,4*8($4) 1666 mflo ($24,$12,$9) 1667 mfhi ($25,$12,$9) 1668 daddu $7,$24 1669 sltu $1,$7,$24 1670 dmultu ($13,$8) # forward multiplication 1671 daddu $7,$24 1672 daddu $1,$25 1673 sltu $24,$7,$24 1674 daddu $2,$1 1675 daddu $25,$24 1676 sltu $3,$2,$1 1677 daddu $2,$25 1678 sltu $25,$2,$25 1679 daddu $3,$25 1680 mflo ($24,$13,$8) 1681 mfhi ($25,$13,$8) 1682 daddu $7,$24 1683 sltu $1,$7,$24 1684 dmultu ($14,$15) # forward multiplication 1685 daddu $7,$24 1686 daddu $1,$25 1687 sltu $24,$7,$24 1688 daddu $2,$1 1689 daddu $25,$24 1690 sltu $1,$2,$1 1691 daddu $2,$25 1692 daddu $3,$1 1693 sltu $25,$2,$25 1694 daddu $3,$25 1695 mflo ($24,$14,$15) 1696 mfhi ($25,$14,$15) 1697 daddu $7,$24 1698 sltu $1,$7,$24 1699 dmultu ($10,$12) # forward multiplication 1700 daddu $7,$24 1701 daddu $1,$25 1702 sltu $24,$7,$24 1703 daddu $2,$1 1704 daddu $25,$24 1705 sltu $1,$2,$1 1706 daddu $2,$25 1707 daddu $3,$1 1708 sltu $25,$2,$25 1709 daddu $3,$25 1710 mflo ($24,$10,$12) 1711 mfhi ($25,$10,$12) 1712 sd $7,5*8($4) 1713 daddu $2,$24 1714 sltu $1,$2,$24 1715 dmultu ($9,$13) # forward multiplication 1716 daddu $2,$24 1717 daddu $1,$25 1718 sltu $24,$2,$24 1719 daddu $3,$1 1720 daddu $25,$24 1721 sltu $7,$3,$1 1722 daddu $3,$25 1723 sltu $25,$3,$25 1724 daddu $7,$25 1725 mflo ($24,$9,$13) 1726 mfhi ($25,$9,$13) 1727 daddu $2,$24 1728 sltu $1,$2,$24 1729 dmultu ($8,$14) # forward multiplication 1730 daddu $2,$24 1731 daddu $1,$25 1732 sltu $24,$2,$24 1733 daddu $3,$1 1734 daddu $25,$24 1735 sltu $1,$3,$1 1736 daddu $3,$25 1737 daddu $7,$1 1738 sltu $25,$3,$25 1739 daddu $7,$25 1740 mflo ($24,$8,$14) 1741 mfhi ($25,$8,$14) 1742 daddu $2,$24 1743 sltu $1,$2,$24 1744 dmultu ($15,$15) # forward multiplication 1745 daddu $2,$24 1746 daddu $1,$25 1747 sltu $24,$2,$24 1748 daddu $3,$1 1749 daddu $25,$24 1750 sltu $1,$3,$1 1751 daddu $3,$25 1752 daddu $7,$1 1753 sltu $25,$3,$25 1754 daddu $7,$25 1755 mflo ($24,$15,$15) 1756 mfhi ($25,$15,$15) 1757 daddu $2,$24 1758 sltu $1,$2,$24 1759 dmultu ($12,$11) # mul_add_c2(a[0],b[7],c2,c3,c1); 1760 daddu $25,$1 1761 daddu $3,$25 1762 sltu $1,$3,$25 1763 daddu $7,$1 1764 sd $2,6*8($4) 1765 mflo ($24,$12,$11) 1766 mfhi ($25,$12,$11) 1767 daddu $3,$24 1768 sltu $1,$3,$24 1769 dmultu ($13,$10) # forward multiplication 1770 daddu $3,$24 1771 daddu $1,$25 1772 sltu $24,$3,$24 1773 daddu $7,$1 1774 daddu $25,$24 1775 sltu $2,$7,$1 1776 daddu $7,$25 1777 sltu $25,$7,$25 1778 daddu $2,$25 1779 mflo ($24,$13,$10) 1780 mfhi ($25,$13,$10) 1781 daddu $3,$24 1782 sltu $1,$3,$24 1783 dmultu ($14,$9) # forward multiplication 1784 daddu $3,$24 1785 daddu $1,$25 1786 sltu $24,$3,$24 1787 daddu $7,$1 1788 daddu $25,$24 1789 sltu $1,$7,$1 1790 daddu $7,$25 1791 daddu $2,$1 1792 sltu $25,$7,$25 1793 daddu $2,$25 1794 mflo ($24,$14,$9) 1795 mfhi ($25,$14,$9) 1796 daddu $3,$24 1797 sltu $1,$3,$24 1798 dmultu ($15,$8) # forward multiplication 1799 daddu $3,$24 1800 daddu $1,$25 1801 sltu $24,$3,$24 1802 daddu $7,$1 1803 daddu $25,$24 1804 sltu $1,$7,$1 1805 daddu $7,$25 1806 daddu $2,$1 1807 sltu $25,$7,$25 1808 daddu $2,$25 1809 mflo ($24,$15,$8) 1810 mfhi ($25,$15,$8) 1811 daddu $3,$24 1812 sltu $1,$3,$24 1813 dmultu ($11,$13) # forward multiplication 1814 daddu $3,$24 1815 daddu $1,$25 1816 sltu $24,$3,$24 1817 daddu $7,$1 1818 daddu $25,$24 1819 sltu $1,$7,$1 1820 daddu $7,$25 1821 daddu $2,$1 1822 sltu $25,$7,$25 1823 daddu $2,$25 1824 mflo ($24,$11,$13) 1825 mfhi ($25,$11,$13) 1826 sd $3,7*8($4) 1827 daddu $7,$24 1828 sltu $1,$7,$24 1829 dmultu ($10,$14) # forward multiplication 1830 daddu $7,$24 1831 daddu $1,$25 1832 sltu $24,$7,$24 1833 daddu $2,$1 1834 daddu $25,$24 1835 sltu $3,$2,$1 1836 daddu $2,$25 1837 sltu $25,$2,$25 1838 daddu $3,$25 1839 mflo ($24,$10,$14) 1840 mfhi ($25,$10,$14) 1841 daddu $7,$24 1842 sltu $1,$7,$24 1843 dmultu ($9,$15) # forward multiplication 1844 daddu $7,$24 1845 daddu $1,$25 1846 sltu $24,$7,$24 1847 daddu $2,$1 1848 daddu $25,$24 1849 sltu $1,$2,$1 1850 daddu $2,$25 1851 daddu $3,$1 1852 sltu $25,$2,$25 1853 daddu $3,$25 1854 mflo ($24,$9,$15) 1855 mfhi ($25,$9,$15) 1856 daddu $7,$24 1857 sltu $1,$7,$24 1858 dmultu ($8,$8) # forward multiplication 1859 daddu $7,$24 1860 daddu $1,$25 1861 sltu $24,$7,$24 1862 daddu $2,$1 1863 daddu $25,$24 1864 sltu $1,$2,$1 1865 daddu $2,$25 1866 daddu $3,$1 1867 sltu $25,$2,$25 1868 daddu $3,$25 1869 mflo ($24,$8,$8) 1870 mfhi ($25,$8,$8) 1871 daddu $7,$24 1872 sltu $1,$7,$24 1873 dmultu ($14,$11) # mul_add_c2(a[2],b[7],c1,c2,c3); 1874 daddu $25,$1 1875 daddu $2,$25 1876 sltu $1,$2,$25 1877 daddu $3,$1 1878 sd $7,8*8($4) 1879 mflo ($24,$14,$11) 1880 mfhi ($25,$14,$11) 1881 daddu $2,$24 1882 sltu $1,$2,$24 1883 dmultu ($15,$10) # forward multiplication 1884 daddu $2,$24 1885 daddu $1,$25 1886 sltu $24,$2,$24 1887 daddu $3,$1 1888 daddu $25,$24 1889 sltu $7,$3,$1 1890 daddu $3,$25 1891 sltu $25,$3,$25 1892 daddu $7,$25 1893 mflo ($24,$15,$10) 1894 mfhi ($25,$15,$10) 1895 daddu $2,$24 1896 sltu $1,$2,$24 1897 dmultu ($8,$9) # forward multiplication 1898 daddu $2,$24 1899 daddu $1,$25 1900 sltu $24,$2,$24 1901 daddu $3,$1 1902 daddu $25,$24 1903 sltu $1,$3,$1 1904 daddu $3,$25 1905 daddu $7,$1 1906 sltu $25,$3,$25 1907 daddu $7,$25 1908 mflo ($24,$8,$9) 1909 mfhi ($25,$8,$9) 1910 daddu $2,$24 1911 sltu $1,$2,$24 1912 dmultu ($11,$15) # forward multiplication 1913 daddu $2,$24 1914 daddu $1,$25 1915 sltu $24,$2,$24 1916 daddu $3,$1 1917 daddu $25,$24 1918 sltu $1,$3,$1 1919 daddu $3,$25 1920 daddu $7,$1 1921 sltu $25,$3,$25 1922 daddu $7,$25 1923 mflo ($24,$11,$15) 1924 mfhi ($25,$11,$15) 1925 sd $2,9*8($4) 1926 daddu $3,$24 1927 sltu $1,$3,$24 1928 dmultu ($10,$8) # forward multiplication 1929 daddu $3,$24 1930 daddu $1,$25 1931 sltu $24,$3,$24 1932 daddu $7,$1 1933 daddu $25,$24 1934 sltu $2,$7,$1 1935 daddu $7,$25 1936 sltu $25,$7,$25 1937 daddu $2,$25 1938 mflo ($24,$10,$8) 1939 mfhi ($25,$10,$8) 1940 daddu $3,$24 1941 sltu $1,$3,$24 1942 dmultu ($9,$9) # forward multiplication 1943 daddu $3,$24 1944 daddu $1,$25 1945 sltu $24,$3,$24 1946 daddu $7,$1 1947 daddu $25,$24 1948 sltu $1,$7,$1 1949 daddu $7,$25 1950 daddu $2,$1 1951 sltu $25,$7,$25 1952 daddu $2,$25 1953 mflo ($24,$9,$9) 1954 mfhi ($25,$9,$9) 1955 daddu $3,$24 1956 sltu $1,$3,$24 1957 dmultu ($8,$11) # mul_add_c2(a[4],b[7],c3,c1,c2); 1958 daddu $25,$1 1959 daddu $7,$25 1960 sltu $1,$7,$25 1961 daddu $2,$1 1962 sd $3,10*8($4) 1963 mflo ($24,$8,$11) 1964 mfhi ($25,$8,$11) 1965 daddu $7,$24 1966 sltu $1,$7,$24 1967 dmultu ($9,$10) # forward multiplication 1968 daddu $7,$24 1969 daddu $1,$25 1970 sltu $24,$7,$24 1971 daddu $2,$1 1972 daddu $25,$24 1973 sltu $3,$2,$1 1974 daddu $2,$25 1975 sltu $25,$2,$25 1976 daddu $3,$25 1977 mflo ($24,$9,$10) 1978 mfhi ($25,$9,$10) 1979 daddu $7,$24 1980 sltu $1,$7,$24 1981 dmultu ($11,$9) # forward multiplication 1982 daddu $7,$24 1983 daddu $1,$25 1984 sltu $24,$7,$24 1985 daddu $2,$1 1986 daddu $25,$24 1987 sltu $1,$2,$1 1988 daddu $2,$25 1989 daddu $3,$1 1990 sltu $25,$2,$25 1991 daddu $3,$25 1992 mflo ($24,$11,$9) 1993 mfhi ($25,$11,$9) 1994 sd $7,11*8($4) 1995 daddu $2,$24 1996 sltu $1,$2,$24 1997 dmultu ($10,$10) # forward multiplication 1998 daddu $2,$24 1999 daddu $1,$25 2000 sltu $24,$2,$24 2001 daddu $3,$1 2002 daddu $25,$24 2003 sltu $7,$3,$1 2004 daddu $3,$25 2005 sltu $25,$3,$25 2006 daddu $7,$25 2007 mflo ($24,$10,$10) 2008 mfhi ($25,$10,$10) 2009 daddu $2,$24 2010 sltu $1,$2,$24 2011 dmultu ($10,$11) # mul_add_c2(a[6],b[7],c2,c3,c1); 2012 daddu $25,$1 2013 daddu $3,$25 2014 sltu $1,$3,$25 2015 daddu $7,$1 2016 sd $2,12*8($4) 2017 mflo ($24,$10,$11) 2018 mfhi ($25,$10,$11) 2019 daddu $3,$24 2020 sltu $1,$3,$24 2021 dmultu ($11,$11) # forward multiplication 2022 daddu $3,$24 2023 daddu $1,$25 2024 sltu $24,$3,$24 2025 daddu $7,$1 2026 daddu $25,$24 2027 sltu $2,$7,$1 2028 daddu $7,$25 2029 sltu $25,$7,$25 2030 daddu $2,$25 2031 mflo ($24,$11,$11) 2032 mfhi ($25,$11,$11) 2033 sd $3,13*8($4) 2034 2035 daddu $7,$24 2036 sltu $1,$7,$24 2037 daddu $25,$1 2038 daddu $2,$25 2039 sd $7,14*8($4) 2040 sd $2,15*8($4) 2041 2042 .set noreorder 2043 jr $31 2044 nop 2045.end bn_sqr_comba8 2046 2047.align 5 2048.globl bn_sqr_comba4 2049.ent bn_sqr_comba4 2050bn_sqr_comba4: 2051 .set reorder 2052 ld $12,0($5) 2053 ld $13,8($5) 2054 dmultu ($12,$12) # mul_add_c(a[0],b[0],c1,c2,c3); 2055 ld $14,2*8($5) 2056 ld $15,3*8($5) 2057 mflo ($2,$12,$12) 2058 mfhi ($3,$12,$12) 2059 sd $2,0($4) 2060 2061 dmultu ($12,$13) # mul_add_c2(a[0],b[1],c2,c3,c1); 2062 mflo ($24,$12,$13) 2063 mfhi ($25,$12,$13) 2064 slt $2,$25,$0 2065 dsll $25,1 2066 dmultu ($14,$12) # mul_add_c2(a[2],b[0],c3,c1,c2); 2067 slt $6,$24,$0 2068 daddu $25,$6 2069 dsll $24,1 2070 daddu $3,$24 2071 sltu $1,$3,$24 2072 daddu $7,$25,$1 2073 sd $3,8($4) 2074 mflo ($24,$14,$12) 2075 mfhi ($25,$14,$12) 2076 daddu $7,$24 2077 sltu $1,$7,$24 2078 dmultu ($13,$13) # forward multiplication 2079 daddu $7,$24 2080 daddu $1,$25 2081 sltu $24,$7,$24 2082 daddu $2,$1 2083 daddu $25,$24 2084 sltu $3,$2,$1 2085 daddu $2,$25 2086 sltu $25,$2,$25 2087 daddu $3,$25 2088 mflo ($24,$13,$13) 2089 mfhi ($25,$13,$13) 2090 daddu $7,$24 2091 sltu $1,$7,$24 2092 dmultu ($12,$15) # mul_add_c2(a[0],b[3],c1,c2,c3); 2093 daddu $25,$1 2094 daddu $2,$25 2095 sltu $1,$2,$25 2096 daddu $3,$1 2097 sd $7,2*8($4) 2098 mflo ($24,$12,$15) 2099 mfhi ($25,$12,$15) 2100 daddu $2,$24 2101 sltu $1,$2,$24 2102 dmultu ($13,$14) # forward multiplication 2103 daddu $2,$24 2104 daddu $1,$25 2105 sltu $24,$2,$24 2106 daddu $3,$1 2107 daddu $25,$24 2108 sltu $7,$3,$1 2109 daddu $3,$25 2110 sltu $25,$3,$25 2111 daddu $7,$25 2112 mflo ($24,$13,$14) 2113 mfhi ($25,$13,$14) 2114 daddu $2,$24 2115 sltu $1,$2,$24 2116 dmultu ($15,$13) # forward multiplication 2117 daddu $2,$24 2118 daddu $1,$25 2119 sltu $24,$2,$24 2120 daddu $3,$1 2121 daddu $25,$24 2122 sltu $1,$3,$1 2123 daddu $3,$25 2124 daddu $7,$1 2125 sltu $25,$3,$25 2126 daddu $7,$25 2127 mflo ($24,$15,$13) 2128 mfhi ($25,$15,$13) 2129 sd $2,3*8($4) 2130 daddu $3,$24 2131 sltu $1,$3,$24 2132 dmultu ($14,$14) # forward multiplication 2133 daddu $3,$24 2134 daddu $1,$25 2135 sltu $24,$3,$24 2136 daddu $7,$1 2137 daddu $25,$24 2138 sltu $2,$7,$1 2139 daddu $7,$25 2140 sltu $25,$7,$25 2141 daddu $2,$25 2142 mflo ($24,$14,$14) 2143 mfhi ($25,$14,$14) 2144 daddu $3,$24 2145 sltu $1,$3,$24 2146 dmultu ($14,$15) # mul_add_c2(a[2],b[3],c3,c1,c2); 2147 daddu $25,$1 2148 daddu $7,$25 2149 sltu $1,$7,$25 2150 daddu $2,$1 2151 sd $3,4*8($4) 2152 mflo ($24,$14,$15) 2153 mfhi ($25,$14,$15) 2154 daddu $7,$24 2155 sltu $1,$7,$24 2156 dmultu ($15,$15) # forward multiplication 2157 daddu $7,$24 2158 daddu $1,$25 2159 sltu $24,$7,$24 2160 daddu $2,$1 2161 daddu $25,$24 2162 sltu $3,$2,$1 2163 daddu $2,$25 2164 sltu $25,$2,$25 2165 daddu $3,$25 2166 mflo ($24,$15,$15) 2167 mfhi ($25,$15,$15) 2168 sd $7,5*8($4) 2169 2170 daddu $2,$24 2171 sltu $1,$2,$24 2172 daddu $25,$1 2173 daddu $3,$25 2174 sd $2,6*8($4) 2175 sd $3,7*8($4) 2176 2177 .set noreorder 2178 jr $31 2179 nop 2180.end bn_sqr_comba4 2181