1.set mips2 2.rdata 3.asciiz "mips3.s, Version 1.2" 4.asciiz "MIPS II/III/IV ISA artwork by Andy Polyakov <appro@fy.chalmers.se>" 5 6.text 7.set noat 8 9.align 5 10.globl bn_mul_add_words 11.ent bn_mul_add_words 12bn_mul_add_words: 13 .set noreorder 14 bgtz $6,bn_mul_add_words_internal 15 move $2,$0 16 jr $31 17 move $4,$2 18.end bn_mul_add_words 19 20.align 5 21.ent bn_mul_add_words_internal 22bn_mul_add_words_internal: 23 .set reorder 24 li $3,-4 25 and $8,$6,$3 26 beqz $8,.L_bn_mul_add_words_tail 27 28.L_bn_mul_add_words_loop: 29 lw $12,0($5) 30 multu $12,$7 31 lw $13,0($4) 32 lw $14,4($5) 33 lw $15,4($4) 34 lw $8,2*4($5) 35 lw $9,2*4($4) 36 addu $13,$2 37 sltu $2,$13,$2 # All manuals say it "compares 32-bit 38 # values", but it seems to work fine 39 # even on 64-bit registers. 40 mflo $1 41 mfhi $12 42 addu $13,$1 43 addu $2,$12 44 multu $14,$7 45 sltu $1,$13,$1 46 sw $13,0($4) 47 addu $2,$1 48 49 lw $10,3*4($5) 50 lw $11,3*4($4) 51 addu $15,$2 52 sltu $2,$15,$2 53 mflo $1 54 mfhi $14 55 addu $15,$1 56 addu $2,$14 57 multu $8,$7 58 sltu $1,$15,$1 59 sw $15,4($4) 60 addu $2,$1 61 62 subu $6,4 63 addu $4,4*4 64 addu $5,4*4 65 addu $9,$2 66 sltu $2,$9,$2 67 mflo $1 68 mfhi $8 69 addu $9,$1 70 addu $2,$8 71 multu $10,$7 72 sltu $1,$9,$1 73 sw $9,-2*4($4) 74 addu $2,$1 75 76 77 and $8,$6,$3 78 addu $11,$2 79 sltu $2,$11,$2 80 mflo $1 81 mfhi $10 82 addu $11,$1 83 addu $2,$10 84 sltu $1,$11,$1 85 sw $11,-4($4) 86 .set noreorder 87 bgtz $8,.L_bn_mul_add_words_loop 88 addu $2,$1 89 90 beqz $6,.L_bn_mul_add_words_return 91 nop 92 93.L_bn_mul_add_words_tail: 94 .set reorder 95 lw $12,0($5) 96 multu $12,$7 97 lw $13,0($4) 98 subu $6,1 99 addu $13,$2 100 sltu $2,$13,$2 101 mflo $1 102 mfhi $12 103 addu $13,$1 104 addu $2,$12 105 sltu $1,$13,$1 106 sw $13,0($4) 107 addu $2,$1 108 beqz $6,.L_bn_mul_add_words_return 109 110 lw $12,4($5) 111 multu $12,$7 112 lw $13,4($4) 113 subu $6,1 114 addu $13,$2 115 sltu $2,$13,$2 116 mflo $1 117 mfhi $12 118 addu $13,$1 119 addu $2,$12 120 sltu $1,$13,$1 121 sw $13,4($4) 122 addu $2,$1 123 beqz $6,.L_bn_mul_add_words_return 124 125 lw $12,2*4($5) 126 multu $12,$7 127 lw $13,2*4($4) 128 addu $13,$2 129 sltu $2,$13,$2 130 mflo $1 131 mfhi $12 132 addu $13,$1 133 addu $2,$12 134 sltu $1,$13,$1 135 sw $13,2*4($4) 136 addu $2,$1 137 138.L_bn_mul_add_words_return: 139 .set noreorder 140 jr $31 141 move $4,$2 142.end bn_mul_add_words_internal 143 144.align 5 145.globl bn_mul_words 146.ent bn_mul_words 147bn_mul_words: 148 .set noreorder 149 bgtz $6,bn_mul_words_internal 150 move $2,$0 151 jr $31 152 move $4,$2 153.end bn_mul_words 154 155.align 5 156.ent bn_mul_words_internal 157bn_mul_words_internal: 158 .set reorder 159 li $3,-4 160 and $8,$6,$3 161 beqz $8,.L_bn_mul_words_tail 162 163.L_bn_mul_words_loop: 164 lw $12,0($5) 165 multu $12,$7 166 lw $14,4($5) 167 lw $8,2*4($5) 168 lw $10,3*4($5) 169 mflo $1 170 mfhi $12 171 addu $2,$1 172 sltu $13,$2,$1 173 multu $14,$7 174 sw $2,0($4) 175 addu $2,$13,$12 176 177 subu $6,4 178 addu $4,4*4 179 addu $5,4*4 180 mflo $1 181 mfhi $14 182 addu $2,$1 183 sltu $15,$2,$1 184 multu $8,$7 185 sw $2,-3*4($4) 186 addu $2,$15,$14 187 188 mflo $1 189 mfhi $8 190 addu $2,$1 191 sltu $9,$2,$1 192 multu $10,$7 193 sw $2,-2*4($4) 194 addu $2,$9,$8 195 196 and $8,$6,$3 197 mflo $1 198 mfhi $10 199 addu $2,$1 200 sltu $11,$2,$1 201 sw $2,-4($4) 202 .set noreorder 203 bgtz $8,.L_bn_mul_words_loop 204 addu $2,$11,$10 205 206 beqz $6,.L_bn_mul_words_return 207 nop 208 209.L_bn_mul_words_tail: 210 .set reorder 211 lw $12,0($5) 212 multu $12,$7 213 subu $6,1 214 mflo $1 215 mfhi $12 216 addu $2,$1 217 sltu $13,$2,$1 218 sw $2,0($4) 219 addu $2,$13,$12 220 beqz $6,.L_bn_mul_words_return 221 222 lw $12,4($5) 223 multu $12,$7 224 subu $6,1 225 mflo $1 226 mfhi $12 227 addu $2,$1 228 sltu $13,$2,$1 229 sw $2,4($4) 230 addu $2,$13,$12 231 beqz $6,.L_bn_mul_words_return 232 233 lw $12,2*4($5) 234 multu $12,$7 235 mflo $1 236 mfhi $12 237 addu $2,$1 238 sltu $13,$2,$1 239 sw $2,2*4($4) 240 addu $2,$13,$12 241 242.L_bn_mul_words_return: 243 .set noreorder 244 jr $31 245 move $4,$2 246.end bn_mul_words_internal 247 248.align 5 249.globl bn_sqr_words 250.ent bn_sqr_words 251bn_sqr_words: 252 .set noreorder 253 bgtz $6,bn_sqr_words_internal 254 move $2,$0 255 jr $31 256 move $4,$2 257.end bn_sqr_words 258 259.align 5 260.ent bn_sqr_words_internal 261bn_sqr_words_internal: 262 .set reorder 263 li $3,-4 264 and $8,$6,$3 265 beqz $8,.L_bn_sqr_words_tail 266 267.L_bn_sqr_words_loop: 268 lw $12,0($5) 269 multu $12,$12 270 lw $14,4($5) 271 lw $8,2*4($5) 272 lw $10,3*4($5) 273 mflo $13 274 mfhi $12 275 sw $13,0($4) 276 sw $12,4($4) 277 278 multu $14,$14 279 subu $6,4 280 addu $4,8*4 281 addu $5,4*4 282 mflo $15 283 mfhi $14 284 sw $15,-6*4($4) 285 sw $14,-5*4($4) 286 287 multu $8,$8 288 mflo $9 289 mfhi $8 290 sw $9,-4*4($4) 291 sw $8,-3*4($4) 292 293 294 multu $10,$10 295 and $8,$6,$3 296 mflo $11 297 mfhi $10 298 sw $11,-2*4($4) 299 300 .set noreorder 301 bgtz $8,.L_bn_sqr_words_loop 302 sw $10,-4($4) 303 304 beqz $6,.L_bn_sqr_words_return 305 nop 306 307.L_bn_sqr_words_tail: 308 .set reorder 309 lw $12,0($5) 310 multu $12,$12 311 subu $6,1 312 mflo $13 313 mfhi $12 314 sw $13,0($4) 315 sw $12,4($4) 316 beqz $6,.L_bn_sqr_words_return 317 318 lw $12,4($5) 319 multu $12,$12 320 subu $6,1 321 mflo $13 322 mfhi $12 323 sw $13,2*4($4) 324 sw $12,3*4($4) 325 beqz $6,.L_bn_sqr_words_return 326 327 lw $12,2*4($5) 328 multu $12,$12 329 mflo $13 330 mfhi $12 331 sw $13,4*4($4) 332 sw $12,5*4($4) 333 334.L_bn_sqr_words_return: 335 .set noreorder 336 jr $31 337 move $4,$2 338 339.end bn_sqr_words_internal 340 341.align 5 342.globl bn_add_words 343.ent bn_add_words 344bn_add_words: 345 .set noreorder 346 bgtz $7,bn_add_words_internal 347 move $2,$0 348 jr $31 349 move $4,$2 350.end bn_add_words 351 352.align 5 353.ent bn_add_words_internal 354bn_add_words_internal: 355 .set reorder 356 li $3,-4 357 and $1,$7,$3 358 beqz $1,.L_bn_add_words_tail 359 360.L_bn_add_words_loop: 361 lw $12,0($5) 362 lw $8,0($6) 363 subu $7,4 364 lw $13,4($5) 365 and $1,$7,$3 366 lw $14,2*4($5) 367 addu $6,4*4 368 lw $15,3*4($5) 369 addu $4,4*4 370 lw $9,-3*4($6) 371 addu $5,4*4 372 lw $10,-2*4($6) 373 lw $11,-4($6) 374 addu $8,$12 375 sltu $24,$8,$12 376 addu $12,$8,$2 377 sltu $2,$12,$8 378 sw $12,-4*4($4) 379 addu $2,$24 380 381 addu $9,$13 382 sltu $25,$9,$13 383 addu $13,$9,$2 384 sltu $2,$13,$9 385 sw $13,-3*4($4) 386 addu $2,$25 387 388 addu $10,$14 389 sltu $24,$10,$14 390 addu $14,$10,$2 391 sltu $2,$14,$10 392 sw $14,-2*4($4) 393 addu $2,$24 394 395 addu $11,$15 396 sltu $25,$11,$15 397 addu $15,$11,$2 398 sltu $2,$15,$11 399 sw $15,-4($4) 400 401 .set noreorder 402 bgtz $1,.L_bn_add_words_loop 403 addu $2,$25 404 405 beqz $7,.L_bn_add_words_return 406 nop 407 408.L_bn_add_words_tail: 409 .set reorder 410 lw $12,0($5) 411 lw $8,0($6) 412 addu $8,$12 413 subu $7,1 414 sltu $24,$8,$12 415 addu $12,$8,$2 416 sltu $2,$12,$8 417 sw $12,0($4) 418 addu $2,$24 419 beqz $7,.L_bn_add_words_return 420 421 lw $13,4($5) 422 lw $9,4($6) 423 addu $9,$13 424 subu $7,1 425 sltu $25,$9,$13 426 addu $13,$9,$2 427 sltu $2,$13,$9 428 sw $13,4($4) 429 addu $2,$25 430 beqz $7,.L_bn_add_words_return 431 432 lw $14,2*4($5) 433 lw $10,2*4($6) 434 addu $10,$14 435 sltu $24,$10,$14 436 addu $14,$10,$2 437 sltu $2,$14,$10 438 sw $14,2*4($4) 439 addu $2,$24 440 441.L_bn_add_words_return: 442 .set noreorder 443 jr $31 444 move $4,$2 445 446.end bn_add_words_internal 447 448.align 5 449.globl bn_sub_words 450.ent bn_sub_words 451bn_sub_words: 452 .set noreorder 453 bgtz $7,bn_sub_words_internal 454 move $2,$0 455 jr $31 456 move $4,$0 457.end bn_sub_words 458 459.align 5 460.ent bn_sub_words_internal 461bn_sub_words_internal: 462 .set reorder 463 li $3,-4 464 and $1,$7,$3 465 beqz $1,.L_bn_sub_words_tail 466 467.L_bn_sub_words_loop: 468 lw $12,0($5) 469 lw $8,0($6) 470 subu $7,4 471 lw $13,4($5) 472 and $1,$7,$3 473 lw $14,2*4($5) 474 addu $6,4*4 475 lw $15,3*4($5) 476 addu $4,4*4 477 lw $9,-3*4($6) 478 addu $5,4*4 479 lw $10,-2*4($6) 480 lw $11,-4($6) 481 sltu $24,$12,$8 482 subu $8,$12,$8 483 subu $12,$8,$2 484 sgtu $2,$12,$8 485 sw $12,-4*4($4) 486 addu $2,$24 487 488 sltu $25,$13,$9 489 subu $9,$13,$9 490 subu $13,$9,$2 491 sgtu $2,$13,$9 492 sw $13,-3*4($4) 493 addu $2,$25 494 495 496 sltu $24,$14,$10 497 subu $10,$14,$10 498 subu $14,$10,$2 499 sgtu $2,$14,$10 500 sw $14,-2*4($4) 501 addu $2,$24 502 503 sltu $25,$15,$11 504 subu $11,$15,$11 505 subu $15,$11,$2 506 sgtu $2,$15,$11 507 sw $15,-4($4) 508 509 .set noreorder 510 bgtz $1,.L_bn_sub_words_loop 511 addu $2,$25 512 513 beqz $7,.L_bn_sub_words_return 514 nop 515 516.L_bn_sub_words_tail: 517 .set reorder 518 lw $12,0($5) 519 lw $8,0($6) 520 subu $7,1 521 sltu $24,$12,$8 522 subu $8,$12,$8 523 subu $12,$8,$2 524 sgtu $2,$12,$8 525 sw $12,0($4) 526 addu $2,$24 527 beqz $7,.L_bn_sub_words_return 528 529 lw $13,4($5) 530 subu $7,1 531 lw $9,4($6) 532 sltu $25,$13,$9 533 subu $9,$13,$9 534 subu $13,$9,$2 535 sgtu $2,$13,$9 536 sw $13,4($4) 537 addu $2,$25 538 beqz $7,.L_bn_sub_words_return 539 540 lw $14,2*4($5) 541 lw $10,2*4($6) 542 sltu $24,$14,$10 543 subu $10,$14,$10 544 subu $14,$10,$2 545 sgtu $2,$14,$10 546 sw $14,2*4($4) 547 addu $2,$24 548 549.L_bn_sub_words_return: 550 .set noreorder 551 jr $31 552 move $4,$2 553.end bn_sub_words_internal 554 555.align 5 556.globl bn_div_3_words 557.ent bn_div_3_words 558bn_div_3_words: 559 .set noreorder 560 move $7,$4 # we know that bn_div_words does not 561 # touch $7, $10, $11 and preserves $6 562 # so that we can save two arguments 563 # and return address in registers 564 # instead of stack:-) 565 566 lw $4,($7) 567 move $10,$5 568 bne $4,$6,bn_div_3_words_internal 569 lw $5,-4($7) 570 li $2,-1 571 jr $31 572 move $4,$2 573.end bn_div_3_words 574 575.align 5 576.ent bn_div_3_words_internal 577bn_div_3_words_internal: 578 .set reorder 579 move $11,$31 580 bal bn_div_words_internal 581 move $31,$11 582 multu $10,$2 583 lw $14,-2*4($7) 584 move $8,$0 585 mfhi $13 586 mflo $12 587 sltu $24,$13,$5 588.L_bn_div_3_words_inner_loop: 589 bnez $24,.L_bn_div_3_words_inner_loop_done 590 sgeu $1,$14,$12 591 seq $25,$13,$5 592 and $1,$25 593 sltu $15,$12,$10 594 addu $5,$6 595 subu $13,$15 596 subu $12,$10 597 sltu $24,$13,$5 598 sltu $8,$5,$6 599 or $24,$8 600 .set noreorder 601 beqz $1,.L_bn_div_3_words_inner_loop 602 subu $2,1 603 addu $2,1 604 .set reorder 605.L_bn_div_3_words_inner_loop_done: 606 .set noreorder 607 jr $31 608 move $4,$2 609.end bn_div_3_words_internal 610 611.align 5 612.globl bn_div_words 613.ent bn_div_words 614bn_div_words: 615 .set noreorder 616 bnez $6,bn_div_words_internal 617 li $2,-1 # I would rather signal div-by-zero 618 # which can be done with 'break 7' 619 jr $31 620 move $4,$2 621.end bn_div_words 622 623.align 5 624.ent bn_div_words_internal 625bn_div_words_internal: 626 move $3,$0 627 bltz $6,.L_bn_div_words_body 628 move $25,$3 629 sll $6,1 630 bgtz $6,.-4 631 addu $25,1 632 633 .set reorder 634 negu $13,$25 635 li $14,-1 636 sll $14,$13 637 and $14,$4 638 srl $1,$5,$13 639 .set noreorder 640 beqz $14,.+12 641 nop 642 break 6 # signal overflow 643 .set reorder 644 sll $4,$25 645 sll $5,$25 646 or $4,$1 647.L_bn_div_words_body: 648 srl $3,$6,4*4 # bits 649 sgeu $1,$4,$6 650 .set noreorder 651 beqz $1,.+12 652 nop 653 subu $4,$6 654 .set reorder 655 656 li $8,-1 657 srl $9,$4,4*4 # bits 658 srl $8,4*4 # q=0xffffffff 659 beq $3,$9,.L_bn_div_words_skip_div1 660 divu $0,$4,$3 661 mflo $8 662.L_bn_div_words_skip_div1: 663 multu $6,$8 664 sll $15,$4,4*4 # bits 665 srl $1,$5,4*4 # bits 666 or $15,$1 667 mflo $12 668 mfhi $13 669.L_bn_div_words_inner_loop1: 670 sltu $14,$15,$12 671 seq $24,$9,$13 672 sltu $1,$9,$13 673 and $14,$24 674 sltu $2,$12,$6 675 or $1,$14 676 .set noreorder 677 beqz $1,.L_bn_div_words_inner_loop1_done 678 subu $13,$2 679 subu $12,$6 680 b .L_bn_div_words_inner_loop1 681 subu $8,1 682 .set reorder 683.L_bn_div_words_inner_loop1_done: 684 685 sll $5,4*4 # bits 686 subu $4,$15,$12 687 sll $2,$8,4*4 # bits 688 689 li $8,-1 690 srl $9,$4,4*4 # bits 691 srl $8,4*4 # q=0xffffffff 692 beq $3,$9,.L_bn_div_words_skip_div2 693 divu $0,$4,$3 694 mflo $8 695.L_bn_div_words_skip_div2: 696 multu $6,$8 697 sll $15,$4,4*4 # bits 698 srl $1,$5,4*4 # bits 699 or $15,$1 700 mflo $12 701 mfhi $13 702.L_bn_div_words_inner_loop2: 703 sltu $14,$15,$12 704 seq $24,$9,$13 705 sltu $1,$9,$13 706 and $14,$24 707 sltu $3,$12,$6 708 or $1,$14 709 .set noreorder 710 beqz $1,.L_bn_div_words_inner_loop2_done 711 subu $13,$3 712 subu $12,$6 713 b .L_bn_div_words_inner_loop2 714 subu $8,1 715 .set reorder 716.L_bn_div_words_inner_loop2_done: 717 718 subu $4,$15,$12 719 or $2,$8 720 srl $3,$4,$25 # $3 contains remainder if anybody wants it 721 srl $6,$25 # restore $6 722 723 .set noreorder 724 move $5,$3 725 jr $31 726 move $4,$2 727.end bn_div_words_internal 728 729.align 5 730.globl bn_mul_comba8 731.ent bn_mul_comba8 732bn_mul_comba8: 733 .set noreorder 734 .frame $29,6*4,$31 735 .mask 0x003f0000,-4 736 subu $29,6*4 737 sw $21,5*4($29) 738 sw $20,4*4($29) 739 sw $19,3*4($29) 740 sw $18,2*4($29) 741 sw $17,1*4($29) 742 sw $16,0*4($29) 743 744 .set reorder 745 lw $12,0($5) # If compiled with -mips3 option on 746 # R5000 box assembler barks on this 747 # 1ine with "should not have mult/div 748 # as last instruction in bb (R10K 749 # bug)" warning. If anybody out there 750 # has a clue about how to circumvent 751 # this do send me a note. 752 # <appro@fy.chalmers.se> 753 754 lw $8,0($6) 755 lw $13,4($5) 756 lw $14,2*4($5) 757 multu $12,$8 # mul_add_c(a[0],b[0],c1,c2,c3); 758 lw $15,3*4($5) 759 lw $9,4($6) 760 lw $10,2*4($6) 761 lw $11,3*4($6) 762 mflo $2 763 mfhi $3 764 765 lw $16,4*4($5) 766 lw $18,5*4($5) 767 multu $12,$9 # mul_add_c(a[0],b[1],c2,c3,c1); 768 lw $20,6*4($5) 769 lw $5,7*4($5) 770 lw $17,4*4($6) 771 lw $19,5*4($6) 772 mflo $24 773 mfhi $25 774 addu $3,$24 775 sltu $1,$3,$24 776 multu $13,$8 # mul_add_c(a[1],b[0],c2,c3,c1); 777 addu $7,$25,$1 778 lw $21,6*4($6) 779 lw $6,7*4($6) 780 sw $2,0($4) # r[0]=c1; 781 mflo $24 782 mfhi $25 783 addu $3,$24 784 sltu $1,$3,$24 785 multu $14,$8 # mul_add_c(a[2],b[0],c3,c1,c2); 786 addu $25,$1 787 addu $7,$25 788 sltu $2,$7,$25 789 sw $3,4($4) # r[1]=c2; 790 791 mflo $24 792 mfhi $25 793 addu $7,$24 794 sltu $1,$7,$24 795 multu $13,$9 # mul_add_c(a[1],b[1],c3,c1,c2); 796 addu $25,$1 797 addu $2,$25 798 mflo $24 799 mfhi $25 800 addu $7,$24 801 sltu $1,$7,$24 802 multu $12,$10 # mul_add_c(a[0],b[2],c3,c1,c2); 803 addu $25,$1 804 addu $2,$25 805 sltu $3,$2,$25 806 mflo $24 807 mfhi $25 808 addu $7,$24 809 sltu $1,$7,$24 810 multu $12,$11 # mul_add_c(a[0],b[3],c1,c2,c3); 811 addu $25,$1 812 addu $2,$25 813 sltu $1,$2,$25 814 addu $3,$1 815 sw $7,2*4($4) # r[2]=c3; 816 817 mflo $24 818 mfhi $25 819 addu $2,$24 820 sltu $1,$2,$24 821 multu $13,$10 # mul_add_c(a[1],b[2],c1,c2,c3); 822 addu $25,$1 823 addu $3,$25 824 sltu $7,$3,$25 825 mflo $24 826 mfhi $25 827 addu $2,$24 828 sltu $1,$2,$24 829 multu $14,$9 # mul_add_c(a[2],b[1],c1,c2,c3); 830 addu $25,$1 831 addu $3,$25 832 sltu $1,$3,$25 833 addu $7,$1 834 mflo $24 835 mfhi $25 836 addu $2,$24 837 sltu $1,$2,$24 838 multu $15,$8 # mul_add_c(a[3],b[0],c1,c2,c3); 839 addu $25,$1 840 addu $3,$25 841 sltu $1,$3,$25 842 addu $7,$1 843 mflo $24 844 mfhi $25 845 addu $2,$24 846 sltu $1,$2,$24 847 multu $16,$8 # mul_add_c(a[4],b[0],c2,c3,c1); 848 addu $25,$1 849 addu $3,$25 850 sltu $1,$3,$25 851 addu $7,$1 852 sw $2,3*4($4) # r[3]=c1; 853 854 mflo $24 855 mfhi $25 856 addu $3,$24 857 sltu $1,$3,$24 858 multu $15,$9 # mul_add_c(a[3],b[1],c2,c3,c1); 859 addu $25,$1 860 addu $7,$25 861 sltu $2,$7,$25 862 mflo $24 863 mfhi $25 864 addu $3,$24 865 sltu $1,$3,$24 866 multu $14,$10 # mul_add_c(a[2],b[2],c2,c3,c1); 867 addu $25,$1 868 addu $7,$25 869 sltu $1,$7,$25 870 addu $2,$1 871 mflo $24 872 mfhi $25 873 addu $3,$24 874 sltu $1,$3,$24 875 multu $13,$11 # mul_add_c(a[1],b[3],c2,c3,c1); 876 addu $25,$1 877 addu $7,$25 878 sltu $1,$7,$25 879 addu $2,$1 880 mflo $24 881 mfhi $25 882 addu $3,$24 883 sltu $1,$3,$24 884 multu $12,$17 # mul_add_c(a[0],b[4],c2,c3,c1); 885 addu $25,$1 886 addu $7,$25 887 sltu $1,$7,$25 888 addu $2,$1 889 mflo $24 890 mfhi $25 891 addu $3,$24 892 sltu $1,$3,$24 893 multu $12,$19 # mul_add_c(a[0],b[5],c3,c1,c2); 894 addu $25,$1 895 addu $7,$25 896 sltu $1,$7,$25 897 addu $2,$1 898 sw $3,4*4($4) # r[4]=c2; 899 900 mflo $24 901 mfhi $25 902 addu $7,$24 903 sltu $1,$7,$24 904 multu $13,$17 # mul_add_c(a[1],b[4],c3,c1,c2); 905 addu $25,$1 906 addu $2,$25 907 sltu $3,$2,$25 908 mflo $24 909 mfhi $25 910 addu $7,$24 911 sltu $1,$7,$24 912 multu $14,$11 # mul_add_c(a[2],b[3],c3,c1,c2); 913 addu $25,$1 914 addu $2,$25 915 sltu $1,$2,$25 916 addu $3,$1 917 mflo $24 918 mfhi $25 919 addu $7,$24 920 sltu $1,$7,$24 921 multu $15,$10 # mul_add_c(a[3],b[2],c3,c1,c2); 922 addu $25,$1 923 addu $2,$25 924 sltu $1,$2,$25 925 addu $3,$1 926 mflo $24 927 mfhi $25 928 addu $7,$24 929 sltu $1,$7,$24 930 multu $16,$9 # mul_add_c(a[4],b[1],c3,c1,c2); 931 addu $25,$1 932 addu $2,$25 933 sltu $1,$2,$25 934 addu $3,$1 935 mflo $24 936 mfhi $25 937 addu $7,$24 938 sltu $1,$7,$24 939 multu $18,$8 # mul_add_c(a[5],b[0],c3,c1,c2); 940 addu $25,$1 941 addu $2,$25 942 sltu $1,$2,$25 943 addu $3,$1 944 mflo $24 945 mfhi $25 946 addu $7,$24 947 sltu $1,$7,$24 948 multu $20,$8 # mul_add_c(a[6],b[0],c1,c2,c3); 949 addu $25,$1 950 addu $2,$25 951 sltu $1,$2,$25 952 addu $3,$1 953 sw $7,5*4($4) # r[5]=c3; 954 955 mflo $24 956 mfhi $25 957 addu $2,$24 958 sltu $1,$2,$24 959 multu $18,$9 # mul_add_c(a[5],b[1],c1,c2,c3); 960 addu $25,$1 961 addu $3,$25 962 sltu $7,$3,$25 963 mflo $24 964 mfhi $25 965 addu $2,$24 966 sltu $1,$2,$24 967 multu $16,$10 # mul_add_c(a[4],b[2],c1,c2,c3); 968 addu $25,$1 969 addu $3,$25 970 sltu $1,$3,$25 971 addu $7,$1 972 mflo $24 973 mfhi $25 974 addu $2,$24 975 sltu $1,$2,$24 976 multu $15,$11 # mul_add_c(a[3],b[3],c1,c2,c3); 977 addu $25,$1 978 addu $3,$25 979 sltu $1,$3,$25 980 addu $7,$1 981 mflo $24 982 mfhi $25 983 addu $2,$24 984 sltu $1,$2,$24 985 multu $14,$17 # mul_add_c(a[2],b[4],c1,c2,c3); 986 addu $25,$1 987 addu $3,$25 988 sltu $1,$3,$25 989 addu $7,$1 990 mflo $24 991 mfhi $25 992 addu $2,$24 993 sltu $1,$2,$24 994 multu $13,$19 # mul_add_c(a[1],b[5],c1,c2,c3); 995 addu $25,$1 996 addu $3,$25 997 sltu $1,$3,$25 998 addu $7,$1 999 mflo $24 1000 mfhi $25 1001 addu $2,$24 1002 sltu $1,$2,$24 1003 multu $12,$21 # mul_add_c(a[0],b[6],c1,c2,c3); 1004 addu $25,$1 1005 addu $3,$25 1006 sltu $1,$3,$25 1007 addu $7,$1 1008 mflo $24 1009 mfhi $25 1010 addu $2,$24 1011 sltu $1,$2,$24 1012 multu $12,$6 # mul_add_c(a[0],b[7],c2,c3,c1); 1013 addu $25,$1 1014 addu $3,$25 1015 sltu $1,$3,$25 1016 addu $7,$1 1017 sw $2,6*4($4) # r[6]=c1; 1018 1019 mflo $24 1020 mfhi $25 1021 addu $3,$24 1022 sltu $1,$3,$24 1023 multu $13,$21 # mul_add_c(a[1],b[6],c2,c3,c1); 1024 addu $25,$1 1025 addu $7,$25 1026 sltu $2,$7,$25 1027 mflo $24 1028 mfhi $25 1029 addu $3,$24 1030 sltu $1,$3,$24 1031 multu $14,$19 # mul_add_c(a[2],b[5],c2,c3,c1); 1032 addu $25,$1 1033 addu $7,$25 1034 sltu $1,$7,$25 1035 addu $2,$1 1036 mflo $24 1037 mfhi $25 1038 addu $3,$24 1039 sltu $1,$3,$24 1040 multu $15,$17 # mul_add_c(a[3],b[4],c2,c3,c1); 1041 addu $25,$1 1042 addu $7,$25 1043 sltu $1,$7,$25 1044 addu $2,$1 1045 mflo $24 1046 mfhi $25 1047 addu $3,$24 1048 sltu $1,$3,$24 1049 multu $16,$11 # mul_add_c(a[4],b[3],c2,c3,c1); 1050 addu $25,$1 1051 addu $7,$25 1052 sltu $1,$7,$25 1053 addu $2,$1 1054 mflo $24 1055 mfhi $25 1056 addu $3,$24 1057 sltu $1,$3,$24 1058 multu $18,$10 # mul_add_c(a[5],b[2],c2,c3,c1); 1059 addu $25,$1 1060 addu $7,$25 1061 sltu $1,$7,$25 1062 addu $2,$1 1063 mflo $24 1064 mfhi $25 1065 addu $3,$24 1066 sltu $1,$3,$24 1067 multu $20,$9 # mul_add_c(a[6],b[1],c2,c3,c1); 1068 addu $25,$1 1069 addu $7,$25 1070 sltu $1,$7,$25 1071 addu $2,$1 1072 mflo $24 1073 mfhi $25 1074 addu $3,$24 1075 sltu $1,$3,$24 1076 multu $5,$8 # mul_add_c(a[7],b[0],c2,c3,c1); 1077 addu $25,$1 1078 addu $7,$25 1079 sltu $1,$7,$25 1080 addu $2,$1 1081 mflo $24 1082 mfhi $25 1083 addu $3,$24 1084 sltu $1,$3,$24 1085 multu $5,$9 # mul_add_c(a[7],b[1],c3,c1,c2); 1086 addu $25,$1 1087 addu $7,$25 1088 sltu $1,$7,$25 1089 addu $2,$1 1090 sw $3,7*4($4) # r[7]=c2; 1091 1092 mflo $24 1093 mfhi $25 1094 addu $7,$24 1095 sltu $1,$7,$24 1096 multu $20,$10 # mul_add_c(a[6],b[2],c3,c1,c2); 1097 addu $25,$1 1098 addu $2,$25 1099 sltu $3,$2,$25 1100 mflo $24 1101 mfhi $25 1102 addu $7,$24 1103 sltu $1,$7,$24 1104 multu $18,$11 # mul_add_c(a[5],b[3],c3,c1,c2); 1105 addu $25,$1 1106 addu $2,$25 1107 sltu $1,$2,$25 1108 addu $3,$1 1109 mflo $24 1110 mfhi $25 1111 addu $7,$24 1112 sltu $1,$7,$24 1113 multu $16,$17 # mul_add_c(a[4],b[4],c3,c1,c2); 1114 addu $25,$1 1115 addu $2,$25 1116 sltu $1,$2,$25 1117 addu $3,$1 1118 mflo $24 1119 mfhi $25 1120 addu $7,$24 1121 sltu $1,$7,$24 1122 multu $15,$19 # mul_add_c(a[3],b[5],c3,c1,c2); 1123 addu $25,$1 1124 addu $2,$25 1125 sltu $1,$2,$25 1126 addu $3,$1 1127 mflo $24 1128 mfhi $25 1129 addu $7,$24 1130 sltu $1,$7,$24 1131 multu $14,$21 # mul_add_c(a[2],b[6],c3,c1,c2); 1132 addu $25,$1 1133 addu $2,$25 1134 sltu $1,$2,$25 1135 addu $3,$1 1136 mflo $24 1137 mfhi $25 1138 addu $7,$24 1139 sltu $1,$7,$24 1140 multu $13,$6 # mul_add_c(a[1],b[7],c3,c1,c2); 1141 addu $25,$1 1142 addu $2,$25 1143 sltu $1,$2,$25 1144 addu $3,$1 1145 mflo $24 1146 mfhi $25 1147 addu $7,$24 1148 sltu $1,$7,$24 1149 multu $14,$6 # mul_add_c(a[2],b[7],c1,c2,c3); 1150 addu $25,$1 1151 addu $2,$25 1152 sltu $1,$2,$25 1153 addu $3,$1 1154 sw $7,8*4($4) # r[8]=c3; 1155 1156 mflo $24 1157 mfhi $25 1158 addu $2,$24 1159 sltu $1,$2,$24 1160 multu $15,$21 # mul_add_c(a[3],b[6],c1,c2,c3); 1161 addu $25,$1 1162 addu $3,$25 1163 sltu $7,$3,$25 1164 mflo $24 1165 mfhi $25 1166 addu $2,$24 1167 sltu $1,$2,$24 1168 multu $16,$19 # mul_add_c(a[4],b[5],c1,c2,c3); 1169 addu $25,$1 1170 addu $3,$25 1171 sltu $1,$3,$25 1172 addu $7,$1 1173 mflo $24 1174 mfhi $25 1175 addu $2,$24 1176 sltu $1,$2,$24 1177 multu $18,$17 # mul_add_c(a[5],b[4],c1,c2,c3); 1178 addu $25,$1 1179 addu $3,$25 1180 sltu $1,$3,$25 1181 addu $7,$1 1182 mflo $24 1183 mfhi $25 1184 addu $2,$24 1185 sltu $1,$2,$24 1186 multu $20,$11 # mul_add_c(a[6],b[3],c1,c2,c3); 1187 addu $25,$1 1188 addu $3,$25 1189 sltu $1,$3,$25 1190 addu $7,$1 1191 mflo $24 1192 mfhi $25 1193 addu $2,$24 1194 sltu $1,$2,$24 1195 multu $5,$10 # mul_add_c(a[7],b[2],c1,c2,c3); 1196 addu $25,$1 1197 addu $3,$25 1198 sltu $1,$3,$25 1199 addu $7,$1 1200 mflo $24 1201 mfhi $25 1202 addu $2,$24 1203 sltu $1,$2,$24 1204 multu $5,$11 # mul_add_c(a[7],b[3],c2,c3,c1); 1205 addu $25,$1 1206 addu $3,$25 1207 sltu $1,$3,$25 1208 addu $7,$1 1209 sw $2,9*4($4) # r[9]=c1; 1210 1211 mflo $24 1212 mfhi $25 1213 addu $3,$24 1214 sltu $1,$3,$24 1215 multu $20,$17 # mul_add_c(a[6],b[4],c2,c3,c1); 1216 addu $25,$1 1217 addu $7,$25 1218 sltu $2,$7,$25 1219 mflo $24 1220 mfhi $25 1221 addu $3,$24 1222 sltu $1,$3,$24 1223 multu $18,$19 # mul_add_c(a[5],b[5],c2,c3,c1); 1224 addu $25,$1 1225 addu $7,$25 1226 sltu $1,$7,$25 1227 addu $2,$1 1228 mflo $24 1229 mfhi $25 1230 addu $3,$24 1231 sltu $1,$3,$24 1232 multu $16,$21 # mul_add_c(a[4],b[6],c2,c3,c1); 1233 addu $25,$1 1234 addu $7,$25 1235 sltu $1,$7,$25 1236 addu $2,$1 1237 mflo $24 1238 mfhi $25 1239 addu $3,$24 1240 sltu $1,$3,$24 1241 multu $15,$6 # mul_add_c(a[3],b[7],c2,c3,c1); 1242 addu $25,$1 1243 addu $7,$25 1244 sltu $1,$7,$25 1245 addu $2,$1 1246 mflo $24 1247 mfhi $25 1248 addu $3,$24 1249 sltu $1,$3,$24 1250 multu $16,$6 # mul_add_c(a[4],b[7],c3,c1,c2); 1251 addu $25,$1 1252 addu $7,$25 1253 sltu $1,$7,$25 1254 addu $2,$1 1255 sw $3,10*4($4) # r[10]=c2; 1256 1257 mflo $24 1258 mfhi $25 1259 addu $7,$24 1260 sltu $1,$7,$24 1261 multu $18,$21 # mul_add_c(a[5],b[6],c3,c1,c2); 1262 addu $25,$1 1263 addu $2,$25 1264 sltu $3,$2,$25 1265 mflo $24 1266 mfhi $25 1267 addu $7,$24 1268 sltu $1,$7,$24 1269 multu $20,$19 # mul_add_c(a[6],b[5],c3,c1,c2); 1270 addu $25,$1 1271 addu $2,$25 1272 sltu $1,$2,$25 1273 addu $3,$1 1274 mflo $24 1275 mfhi $25 1276 addu $7,$24 1277 sltu $1,$7,$24 1278 multu $5,$17 # mul_add_c(a[7],b[4],c3,c1,c2); 1279 addu $25,$1 1280 addu $2,$25 1281 sltu $1,$2,$25 1282 addu $3,$1 1283 mflo $24 1284 mfhi $25 1285 addu $7,$24 1286 sltu $1,$7,$24 1287 multu $5,$19 # mul_add_c(a[7],b[5],c1,c2,c3); 1288 addu $25,$1 1289 addu $2,$25 1290 sltu $1,$2,$25 1291 addu $3,$1 1292 sw $7,11*4($4) # r[11]=c3; 1293 1294 mflo $24 1295 mfhi $25 1296 addu $2,$24 1297 sltu $1,$2,$24 1298 multu $20,$21 # mul_add_c(a[6],b[6],c1,c2,c3); 1299 addu $25,$1 1300 addu $3,$25 1301 sltu $7,$3,$25 1302 mflo $24 1303 mfhi $25 1304 addu $2,$24 1305 sltu $1,$2,$24 1306 multu $18,$6 # mul_add_c(a[5],b[7],c1,c2,c3); 1307 addu $25,$1 1308 addu $3,$25 1309 sltu $1,$3,$25 1310 addu $7,$1 1311 mflo $24 1312 mfhi $25 1313 addu $2,$24 1314 sltu $1,$2,$24 1315 multu $20,$6 # mul_add_c(a[6],b[7],c2,c3,c1); 1316 addu $25,$1 1317 addu $3,$25 1318 sltu $1,$3,$25 1319 addu $7,$1 1320 sw $2,12*4($4) # r[12]=c1; 1321 1322 mflo $24 1323 mfhi $25 1324 addu $3,$24 1325 sltu $1,$3,$24 1326 multu $5,$21 # mul_add_c(a[7],b[6],c2,c3,c1); 1327 addu $25,$1 1328 addu $7,$25 1329 sltu $2,$7,$25 1330 mflo $24 1331 mfhi $25 1332 addu $3,$24 1333 sltu $1,$3,$24 1334 multu $5,$6 # mul_add_c(a[7],b[7],c3,c1,c2); 1335 addu $25,$1 1336 addu $7,$25 1337 sltu $1,$7,$25 1338 addu $2,$1 1339 sw $3,13*4($4) # r[13]=c2; 1340 1341 mflo $24 1342 mfhi $25 1343 addu $7,$24 1344 sltu $1,$7,$24 1345 addu $25,$1 1346 addu $2,$25 1347 sw $7,14*4($4) # r[14]=c3; 1348 sw $2,15*4($4) # r[15]=c1; 1349 1350 .set noreorder 1351 lw $21,5*4($29) 1352 lw $20,4*4($29) 1353 lw $19,3*4($29) 1354 lw $18,2*4($29) 1355 lw $17,1*4($29) 1356 lw $16,0*4($29) 1357 jr $31 1358 addu $29,6*4 1359.end bn_mul_comba8 1360 1361.align 5 1362.globl bn_mul_comba4 1363.ent bn_mul_comba4 1364bn_mul_comba4: 1365 .set reorder 1366 lw $12,0($5) 1367 lw $8,0($6) 1368 lw $13,4($5) 1369 lw $14,2*4($5) 1370 multu $12,$8 # mul_add_c(a[0],b[0],c1,c2,c3); 1371 lw $15,3*4($5) 1372 lw $9,4($6) 1373 lw $10,2*4($6) 1374 lw $11,3*4($6) 1375 mflo $2 1376 mfhi $3 1377 sw $2,0($4) 1378 1379 multu $12,$9 # mul_add_c(a[0],b[1],c2,c3,c1); 1380 mflo $24 1381 mfhi $25 1382 addu $3,$24 1383 sltu $1,$3,$24 1384 multu $13,$8 # mul_add_c(a[1],b[0],c2,c3,c1); 1385 addu $7,$25,$1 1386 mflo $24 1387 mfhi $25 1388 addu $3,$24 1389 sltu $1,$3,$24 1390 multu $14,$8 # mul_add_c(a[2],b[0],c3,c1,c2); 1391 addu $25,$1 1392 addu $7,$25 1393 sltu $2,$7,$25 1394 sw $3,4($4) 1395 1396 mflo $24 1397 mfhi $25 1398 addu $7,$24 1399 sltu $1,$7,$24 1400 multu $13,$9 # mul_add_c(a[1],b[1],c3,c1,c2); 1401 addu $25,$1 1402 addu $2,$25 1403 mflo $24 1404 mfhi $25 1405 addu $7,$24 1406 sltu $1,$7,$24 1407 multu $12,$10 # mul_add_c(a[0],b[2],c3,c1,c2); 1408 addu $25,$1 1409 addu $2,$25 1410 sltu $3,$2,$25 1411 mflo $24 1412 mfhi $25 1413 addu $7,$24 1414 sltu $1,$7,$24 1415 multu $12,$11 # mul_add_c(a[0],b[3],c1,c2,c3); 1416 addu $25,$1 1417 addu $2,$25 1418 sltu $1,$2,$25 1419 addu $3,$1 1420 sw $7,2*4($4) 1421 1422 mflo $24 1423 mfhi $25 1424 addu $2,$24 1425 sltu $1,$2,$24 1426 multu $13,$10 # mul_add_c(a[1],b[2],c1,c2,c3); 1427 addu $25,$1 1428 addu $3,$25 1429 sltu $7,$3,$25 1430 mflo $24 1431 mfhi $25 1432 addu $2,$24 1433 sltu $1,$2,$24 1434 multu $14,$9 # mul_add_c(a[2],b[1],c1,c2,c3); 1435 addu $25,$1 1436 addu $3,$25 1437 sltu $1,$3,$25 1438 addu $7,$1 1439 mflo $24 1440 mfhi $25 1441 addu $2,$24 1442 sltu $1,$2,$24 1443 multu $15,$8 # mul_add_c(a[3],b[0],c1,c2,c3); 1444 addu $25,$1 1445 addu $3,$25 1446 sltu $1,$3,$25 1447 addu $7,$1 1448 mflo $24 1449 mfhi $25 1450 addu $2,$24 1451 sltu $1,$2,$24 1452 multu $15,$9 # mul_add_c(a[3],b[1],c2,c3,c1); 1453 addu $25,$1 1454 addu $3,$25 1455 sltu $1,$3,$25 1456 addu $7,$1 1457 sw $2,3*4($4) 1458 1459 mflo $24 1460 mfhi $25 1461 addu $3,$24 1462 sltu $1,$3,$24 1463 multu $14,$10 # mul_add_c(a[2],b[2],c2,c3,c1); 1464 addu $25,$1 1465 addu $7,$25 1466 sltu $2,$7,$25 1467 mflo $24 1468 mfhi $25 1469 addu $3,$24 1470 sltu $1,$3,$24 1471 multu $13,$11 # mul_add_c(a[1],b[3],c2,c3,c1); 1472 addu $25,$1 1473 addu $7,$25 1474 sltu $1,$7,$25 1475 addu $2,$1 1476 mflo $24 1477 mfhi $25 1478 addu $3,$24 1479 sltu $1,$3,$24 1480 multu $14,$11 # mul_add_c(a[2],b[3],c3,c1,c2); 1481 addu $25,$1 1482 addu $7,$25 1483 sltu $1,$7,$25 1484 addu $2,$1 1485 sw $3,4*4($4) 1486 1487 mflo $24 1488 mfhi $25 1489 addu $7,$24 1490 sltu $1,$7,$24 1491 multu $15,$10 # mul_add_c(a[3],b[2],c3,c1,c2); 1492 addu $25,$1 1493 addu $2,$25 1494 sltu $3,$2,$25 1495 mflo $24 1496 mfhi $25 1497 addu $7,$24 1498 sltu $1,$7,$24 1499 multu $15,$11 # mul_add_c(a[3],b[3],c1,c2,c3); 1500 addu $25,$1 1501 addu $2,$25 1502 sltu $1,$2,$25 1503 addu $3,$1 1504 sw $7,5*4($4) 1505 1506 mflo $24 1507 mfhi $25 1508 addu $2,$24 1509 sltu $1,$2,$24 1510 addu $25,$1 1511 addu $3,$25 1512 sw $2,6*4($4) 1513 sw $3,7*4($4) 1514 1515 .set noreorder 1516 jr $31 1517 nop 1518.end bn_mul_comba4 1519 1520.align 5 1521.globl bn_sqr_comba8 1522.ent bn_sqr_comba8 1523bn_sqr_comba8: 1524 .set reorder 1525 lw $12,0($5) 1526 lw $13,4($5) 1527 lw $14,2*4($5) 1528 lw $15,3*4($5) 1529 1530 multu $12,$12 # mul_add_c(a[0],b[0],c1,c2,c3); 1531 lw $8,4*4($5) 1532 lw $9,5*4($5) 1533 lw $10,6*4($5) 1534 lw $11,7*4($5) 1535 mflo $2 1536 mfhi $3 1537 sw $2,0($4) 1538 1539 multu $12,$13 # mul_add_c2(a[0],b[1],c2,c3,c1); 1540 mflo $24 1541 mfhi $25 1542 slt $2,$25,$0 1543 sll $25,1 1544 multu $14,$12 # mul_add_c2(a[2],b[0],c3,c1,c2); 1545 slt $6,$24,$0 1546 addu $25,$6 1547 sll $24,1 1548 addu $3,$24 1549 sltu $1,$3,$24 1550 addu $7,$25,$1 1551 sw $3,4($4) 1552 mflo $24 1553 mfhi $25 1554 addu $7,$24 1555 sltu $1,$7,$24 1556 multu $13,$13 # forward multiplication 1557 addu $7,$24 1558 addu $1,$25 1559 sltu $24,$7,$24 1560 addu $2,$1 1561 addu $25,$24 1562 sltu $3,$2,$1 1563 addu $2,$25 1564 sltu $25,$2,$25 1565 addu $3,$25 1566 mflo $24 1567 mfhi $25 1568 addu $7,$24 1569 sltu $1,$7,$24 1570 multu $12,$15 # mul_add_c2(a[0],b[3],c1,c2,c3); 1571 addu $25,$1 1572 addu $2,$25 1573 sltu $1,$2,$25 1574 addu $3,$1 1575 sw $7,2*4($4) 1576 mflo $24 1577 mfhi $25 1578 addu $2,$24 1579 sltu $1,$2,$24 1580 multu $13,$14 # forward multiplication 1581 addu $2,$24 1582 addu $1,$25 1583 sltu $24,$2,$24 1584 addu $3,$1 1585 addu $25,$24 1586 sltu $7,$3,$1 1587 addu $3,$25 1588 sltu $25,$3,$25 1589 addu $7,$25 1590 mflo $24 1591 mfhi $25 1592 addu $2,$24 1593 sltu $1,$2,$24 1594 multu $8,$12 # forward multiplication 1595 addu $2,$24 1596 addu $1,$25 1597 sltu $24,$2,$24 1598 addu $3,$1 1599 addu $25,$24 1600 sltu $1,$3,$1 1601 addu $3,$25 1602 addu $7,$1 1603 sltu $25,$3,$25 1604 addu $7,$25 1605 sw $2,3*4($4) 1606 mflo $24 1607 mfhi $25 1608 addu $3,$24 1609 sltu $1,$3,$24 1610 multu $15,$13 # forward multiplication 1611 addu $3,$24 1612 addu $1,$25 1613 sltu $24,$3,$24 1614 addu $7,$1 1615 addu $25,$24 1616 sltu $2,$7,$1 1617 addu $7,$25 1618 sltu $25,$7,$25 1619 addu $2,$25 1620 mflo $24 1621 mfhi $25 1622 addu $3,$24 1623 sltu $1,$3,$24 1624 multu $14,$14 # forward multiplication 1625 addu $3,$24 1626 addu $1,$25 1627 sltu $24,$3,$24 1628 addu $7,$1 1629 addu $25,$24 1630 sltu $1,$7,$1 1631 addu $7,$25 1632 addu $2,$1 1633 sltu $25,$7,$25 1634 addu $2,$25 1635 mflo $24 1636 mfhi $25 1637 addu $3,$24 1638 sltu $1,$3,$24 1639 multu $12,$9 # mul_add_c2(a[0],b[5],c3,c1,c2); 1640 addu $25,$1 1641 addu $7,$25 1642 sltu $1,$7,$25 1643 addu $2,$1 1644 sw $3,4*4($4) 1645 mflo $24 1646 mfhi $25 1647 addu $7,$24 1648 sltu $1,$7,$24 1649 multu $13,$8 # forward multiplication 1650 addu $7,$24 1651 addu $1,$25 1652 sltu $24,$7,$24 1653 addu $2,$1 1654 addu $25,$24 1655 sltu $3,$2,$1 1656 addu $2,$25 1657 sltu $25,$2,$25 1658 addu $3,$25 1659 mflo $24 1660 mfhi $25 1661 addu $7,$24 1662 sltu $1,$7,$24 1663 multu $14,$15 # forward multiplication 1664 addu $7,$24 1665 addu $1,$25 1666 sltu $24,$7,$24 1667 addu $2,$1 1668 addu $25,$24 1669 sltu $1,$2,$1 1670 addu $2,$25 1671 addu $3,$1 1672 sltu $25,$2,$25 1673 addu $3,$25 1674 mflo $24 1675 mfhi $25 1676 addu $7,$24 1677 sltu $1,$7,$24 1678 multu $10,$12 # forward multiplication 1679 addu $7,$24 1680 addu $1,$25 1681 sltu $24,$7,$24 1682 addu $2,$1 1683 addu $25,$24 1684 sltu $1,$2,$1 1685 addu $2,$25 1686 addu $3,$1 1687 sltu $25,$2,$25 1688 addu $3,$25 1689 sw $7,5*4($4) 1690 mflo $24 1691 mfhi $25 1692 addu $2,$24 1693 sltu $1,$2,$24 1694 multu $9,$13 # forward multiplication 1695 addu $2,$24 1696 addu $1,$25 1697 sltu $24,$2,$24 1698 addu $3,$1 1699 addu $25,$24 1700 sltu $7,$3,$1 1701 addu $3,$25 1702 sltu $25,$3,$25 1703 addu $7,$25 1704 mflo $24 1705 mfhi $25 1706 addu $2,$24 1707 sltu $1,$2,$24 1708 multu $8,$14 # forward multiplication 1709 addu $2,$24 1710 addu $1,$25 1711 sltu $24,$2,$24 1712 addu $3,$1 1713 addu $25,$24 1714 sltu $1,$3,$1 1715 addu $3,$25 1716 addu $7,$1 1717 sltu $25,$3,$25 1718 addu $7,$25 1719 mflo $24 1720 mfhi $25 1721 addu $2,$24 1722 sltu $1,$2,$24 1723 multu $15,$15 # forward multiplication 1724 addu $2,$24 1725 addu $1,$25 1726 sltu $24,$2,$24 1727 addu $3,$1 1728 addu $25,$24 1729 sltu $1,$3,$1 1730 addu $3,$25 1731 addu $7,$1 1732 sltu $25,$3,$25 1733 addu $7,$25 1734 mflo $24 1735 mfhi $25 1736 addu $2,$24 1737 sltu $1,$2,$24 1738 multu $12,$11 # mul_add_c2(a[0],b[7],c2,c3,c1); 1739 addu $25,$1 1740 addu $3,$25 1741 sltu $1,$3,$25 1742 addu $7,$1 1743 sw $2,6*4($4) 1744 mflo $24 1745 mfhi $25 1746 addu $3,$24 1747 sltu $1,$3,$24 1748 multu $13,$10 # forward multiplication 1749 addu $3,$24 1750 addu $1,$25 1751 sltu $24,$3,$24 1752 addu $7,$1 1753 addu $25,$24 1754 sltu $2,$7,$1 1755 addu $7,$25 1756 sltu $25,$7,$25 1757 addu $2,$25 1758 mflo $24 1759 mfhi $25 1760 addu $3,$24 1761 sltu $1,$3,$24 1762 multu $14,$9 # forward multiplication 1763 addu $3,$24 1764 addu $1,$25 1765 sltu $24,$3,$24 1766 addu $7,$1 1767 addu $25,$24 1768 sltu $1,$7,$1 1769 addu $7,$25 1770 addu $2,$1 1771 sltu $25,$7,$25 1772 addu $2,$25 1773 mflo $24 1774 mfhi $25 1775 addu $3,$24 1776 sltu $1,$3,$24 1777 multu $15,$8 # forward multiplication 1778 addu $3,$24 1779 addu $1,$25 1780 sltu $24,$3,$24 1781 addu $7,$1 1782 addu $25,$24 1783 sltu $1,$7,$1 1784 addu $7,$25 1785 addu $2,$1 1786 sltu $25,$7,$25 1787 addu $2,$25 1788 mflo $24 1789 mfhi $25 1790 addu $3,$24 1791 sltu $1,$3,$24 1792 multu $11,$13 # forward multiplication 1793 addu $3,$24 1794 addu $1,$25 1795 sltu $24,$3,$24 1796 addu $7,$1 1797 addu $25,$24 1798 sltu $1,$7,$1 1799 addu $7,$25 1800 addu $2,$1 1801 sltu $25,$7,$25 1802 addu $2,$25 1803 sw $3,7*4($4) 1804 mflo $24 1805 mfhi $25 1806 addu $7,$24 1807 sltu $1,$7,$24 1808 multu $10,$14 # forward multiplication 1809 addu $7,$24 1810 addu $1,$25 1811 sltu $24,$7,$24 1812 addu $2,$1 1813 addu $25,$24 1814 sltu $3,$2,$1 1815 addu $2,$25 1816 sltu $25,$2,$25 1817 addu $3,$25 1818 mflo $24 1819 mfhi $25 1820 addu $7,$24 1821 sltu $1,$7,$24 1822 multu $9,$15 # forward multiplication 1823 addu $7,$24 1824 addu $1,$25 1825 sltu $24,$7,$24 1826 addu $2,$1 1827 addu $25,$24 1828 sltu $1,$2,$1 1829 addu $2,$25 1830 addu $3,$1 1831 sltu $25,$2,$25 1832 addu $3,$25 1833 mflo $24 1834 mfhi $25 1835 addu $7,$24 1836 sltu $1,$7,$24 1837 multu $8,$8 # forward multiplication 1838 addu $7,$24 1839 addu $1,$25 1840 sltu $24,$7,$24 1841 addu $2,$1 1842 addu $25,$24 1843 sltu $1,$2,$1 1844 addu $2,$25 1845 addu $3,$1 1846 sltu $25,$2,$25 1847 addu $3,$25 1848 mflo $24 1849 mfhi $25 1850 addu $7,$24 1851 sltu $1,$7,$24 1852 multu $14,$11 # mul_add_c2(a[2],b[7],c1,c2,c3); 1853 addu $25,$1 1854 addu $2,$25 1855 sltu $1,$2,$25 1856 addu $3,$1 1857 sw $7,8*4($4) 1858 mflo $24 1859 mfhi $25 1860 addu $2,$24 1861 sltu $1,$2,$24 1862 multu $15,$10 # forward multiplication 1863 addu $2,$24 1864 addu $1,$25 1865 sltu $24,$2,$24 1866 addu $3,$1 1867 addu $25,$24 1868 sltu $7,$3,$1 1869 addu $3,$25 1870 sltu $25,$3,$25 1871 addu $7,$25 1872 mflo $24 1873 mfhi $25 1874 addu $2,$24 1875 sltu $1,$2,$24 1876 multu $8,$9 # forward multiplication 1877 addu $2,$24 1878 addu $1,$25 1879 sltu $24,$2,$24 1880 addu $3,$1 1881 addu $25,$24 1882 sltu $1,$3,$1 1883 addu $3,$25 1884 addu $7,$1 1885 sltu $25,$3,$25 1886 addu $7,$25 1887 mflo $24 1888 mfhi $25 1889 addu $2,$24 1890 sltu $1,$2,$24 1891 multu $11,$15 # forward multiplication 1892 addu $2,$24 1893 addu $1,$25 1894 sltu $24,$2,$24 1895 addu $3,$1 1896 addu $25,$24 1897 sltu $1,$3,$1 1898 addu $3,$25 1899 addu $7,$1 1900 sltu $25,$3,$25 1901 addu $7,$25 1902 sw $2,9*4($4) 1903 mflo $24 1904 mfhi $25 1905 addu $3,$24 1906 sltu $1,$3,$24 1907 multu $10,$8 # forward multiplication 1908 addu $3,$24 1909 addu $1,$25 1910 sltu $24,$3,$24 1911 addu $7,$1 1912 addu $25,$24 1913 sltu $2,$7,$1 1914 addu $7,$25 1915 sltu $25,$7,$25 1916 addu $2,$25 1917 mflo $24 1918 mfhi $25 1919 addu $3,$24 1920 sltu $1,$3,$24 1921 multu $9,$9 # forward multiplication 1922 addu $3,$24 1923 addu $1,$25 1924 sltu $24,$3,$24 1925 addu $7,$1 1926 addu $25,$24 1927 sltu $1,$7,$1 1928 addu $7,$25 1929 addu $2,$1 1930 sltu $25,$7,$25 1931 addu $2,$25 1932 mflo $24 1933 mfhi $25 1934 addu $3,$24 1935 sltu $1,$3,$24 1936 multu $8,$11 # mul_add_c2(a[4],b[7],c3,c1,c2); 1937 addu $25,$1 1938 addu $7,$25 1939 sltu $1,$7,$25 1940 addu $2,$1 1941 sw $3,10*4($4) 1942 mflo $24 1943 mfhi $25 1944 addu $7,$24 1945 sltu $1,$7,$24 1946 multu $9,$10 # forward multiplication 1947 addu $7,$24 1948 addu $1,$25 1949 sltu $24,$7,$24 1950 addu $2,$1 1951 addu $25,$24 1952 sltu $3,$2,$1 1953 addu $2,$25 1954 sltu $25,$2,$25 1955 addu $3,$25 1956 mflo $24 1957 mfhi $25 1958 addu $7,$24 1959 sltu $1,$7,$24 1960 multu $11,$9 # forward multiplication 1961 addu $7,$24 1962 addu $1,$25 1963 sltu $24,$7,$24 1964 addu $2,$1 1965 addu $25,$24 1966 sltu $1,$2,$1 1967 addu $2,$25 1968 addu $3,$1 1969 sltu $25,$2,$25 1970 addu $3,$25 1971 sw $7,11*4($4) 1972 mflo $24 1973 mfhi $25 1974 addu $2,$24 1975 sltu $1,$2,$24 1976 multu $10,$10 # forward multiplication 1977 addu $2,$24 1978 addu $1,$25 1979 sltu $24,$2,$24 1980 addu $3,$1 1981 addu $25,$24 1982 sltu $7,$3,$1 1983 addu $3,$25 1984 sltu $25,$3,$25 1985 addu $7,$25 1986 mflo $24 1987 mfhi $25 1988 addu $2,$24 1989 sltu $1,$2,$24 1990 multu $10,$11 # mul_add_c2(a[6],b[7],c2,c3,c1); 1991 addu $25,$1 1992 addu $3,$25 1993 sltu $1,$3,$25 1994 addu $7,$1 1995 sw $2,12*4($4) 1996 mflo $24 1997 mfhi $25 1998 addu $3,$24 1999 sltu $1,$3,$24 2000 multu $11,$11 # forward multiplication 2001 addu $3,$24 2002 addu $1,$25 2003 sltu $24,$3,$24 2004 addu $7,$1 2005 addu $25,$24 2006 sltu $2,$7,$1 2007 addu $7,$25 2008 sltu $25,$7,$25 2009 addu $2,$25 2010 sw $3,13*4($4) 2011 2012 mflo $24 2013 mfhi $25 2014 addu $7,$24 2015 sltu $1,$7,$24 2016 addu $25,$1 2017 addu $2,$25 2018 sw $7,14*4($4) 2019 sw $2,15*4($4) 2020 2021 .set noreorder 2022 jr $31 2023 nop 2024.end bn_sqr_comba8 2025 2026.align 5 2027.globl bn_sqr_comba4 2028.ent bn_sqr_comba4 2029bn_sqr_comba4: 2030 .set reorder 2031 lw $12,0($5) 2032 lw $13,4($5) 2033 multu $12,$12 # mul_add_c(a[0],b[0],c1,c2,c3); 2034 lw $14,2*4($5) 2035 lw $15,3*4($5) 2036 mflo $2 2037 mfhi $3 2038 sw $2,0($4) 2039 2040 multu $12,$13 # mul_add_c2(a[0],b[1],c2,c3,c1); 2041 mflo $24 2042 mfhi $25 2043 slt $2,$25,$0 2044 sll $25,1 2045 multu $14,$12 # mul_add_c2(a[2],b[0],c3,c1,c2); 2046 slt $6,$24,$0 2047 addu $25,$6 2048 sll $24,1 2049 addu $3,$24 2050 sltu $1,$3,$24 2051 addu $7,$25,$1 2052 sw $3,4($4) 2053 mflo $24 2054 mfhi $25 2055 addu $7,$24 2056 sltu $1,$7,$24 2057 multu $13,$13 # forward multiplication 2058 addu $7,$24 2059 addu $1,$25 2060 sltu $24,$7,$24 2061 addu $2,$1 2062 addu $25,$24 2063 sltu $3,$2,$1 2064 addu $2,$25 2065 sltu $25,$2,$25 2066 addu $3,$25 2067 mflo $24 2068 mfhi $25 2069 addu $7,$24 2070 sltu $1,$7,$24 2071 multu $12,$15 # mul_add_c2(a[0],b[3],c1,c2,c3); 2072 addu $25,$1 2073 addu $2,$25 2074 sltu $1,$2,$25 2075 addu $3,$1 2076 sw $7,2*4($4) 2077 mflo $24 2078 mfhi $25 2079 addu $2,$24 2080 sltu $1,$2,$24 2081 multu $13,$14 # forward multiplication 2082 addu $2,$24 2083 addu $1,$25 2084 sltu $24,$2,$24 2085 addu $3,$1 2086 addu $25,$24 2087 sltu $7,$3,$1 2088 addu $3,$25 2089 sltu $25,$3,$25 2090 addu $7,$25 2091 mflo $24 2092 mfhi $25 2093 addu $2,$24 2094 sltu $1,$2,$24 2095 multu $15,$13 # forward multiplication 2096 addu $2,$24 2097 addu $1,$25 2098 sltu $24,$2,$24 2099 addu $3,$1 2100 addu $25,$24 2101 sltu $1,$3,$1 2102 addu $3,$25 2103 addu $7,$1 2104 sltu $25,$3,$25 2105 addu $7,$25 2106 sw $2,3*4($4) 2107 mflo $24 2108 mfhi $25 2109 addu $3,$24 2110 sltu $1,$3,$24 2111 multu $14,$14 # forward multiplication 2112 addu $3,$24 2113 addu $1,$25 2114 sltu $24,$3,$24 2115 addu $7,$1 2116 addu $25,$24 2117 sltu $2,$7,$1 2118 addu $7,$25 2119 sltu $25,$7,$25 2120 addu $2,$25 2121 mflo $24 2122 mfhi $25 2123 addu $3,$24 2124 sltu $1,$3,$24 2125 multu $14,$15 # mul_add_c2(a[2],b[3],c3,c1,c2); 2126 addu $25,$1 2127 addu $7,$25 2128 sltu $1,$7,$25 2129 addu $2,$1 2130 sw $3,4*4($4) 2131 mflo $24 2132 mfhi $25 2133 addu $7,$24 2134 sltu $1,$7,$24 2135 multu $15,$15 # forward multiplication 2136 addu $7,$24 2137 addu $1,$25 2138 sltu $24,$7,$24 2139 addu $2,$1 2140 addu $25,$24 2141 sltu $3,$2,$1 2142 addu $2,$25 2143 sltu $25,$2,$25 2144 addu $3,$25 2145 sw $7,5*4($4) 2146 2147 mflo $24 2148 mfhi $25 2149 addu $2,$24 2150 sltu $1,$2,$24 2151 addu $25,$1 2152 addu $3,$25 2153 sw $2,6*4($4) 2154 sw $3,7*4($4) 2155 2156 .set noreorder 2157 jr $31 2158 nop 2159.end bn_sqr_comba4 2160