1#if !(defined (__mips_isa_rev) && (__mips_isa_rev >= 6)) 2.set mips2 3#endif 4#include "mips_arch.h" 5 6#if defined(_MIPS_ARCH_MIPS64R6) 7# define ddivu(rs,rt) 8# define mfqt(rd,rs,rt) ddivu rd,rs,rt 9# define mfrm(rd,rs,rt) dmodu rd,rs,rt 10#elif defined(_MIPS_ARCH_MIPS32R6) 11# define divu(rs,rt) 12# define mfqt(rd,rs,rt) divu rd,rs,rt 13# define mfrm(rd,rs,rt) modu rd,rs,rt 14#else 15# define divu(rs,rt) divu $0,rs,rt 16# define mfqt(rd,rs,rt) mflo rd 17# define mfrm(rd,rs,rt) mfhi rd 18#endif 19 20.rdata 21.asciiz "mips3.s, Version 1.2" 22.asciiz "MIPS II/III/IV ISA artwork by Andy Polyakov <appro@fy.chalmers.se>" 23 24.text 25.set noat 26 27.align 5 28.globl bn_mul_add_words 29.ent bn_mul_add_words 30bn_mul_add_words: 31 .set noreorder 32 bgtz $6,bn_mul_add_words_internal 33 move $2,$0 34 jr $31 35 move $4,$2 36.end bn_mul_add_words 37 38.align 5 39.ent bn_mul_add_words_internal 40bn_mul_add_words_internal: 41 .set reorder 42 li $3,-4 43 and $8,$6,$3 44 beqz $8,.L_bn_mul_add_words_tail 45 46.L_bn_mul_add_words_loop: 47 lw $12,0($5) 48 multu ($12,$7) 49 lw $13,0($4) 50 lw $14,4($5) 51 lw $15,4($4) 52 lw $8,2*4($5) 53 lw $9,2*4($4) 54 addu $13,$2 55 sltu $2,$13,$2 # All manuals say it "compares 32-bit 56 # values", but it seems to work fine 57 # even on 64-bit registers. 58 mflo ($1,$12,$7) 59 mfhi ($12,$12,$7) 60 addu $13,$1 61 addu $2,$12 62 multu ($14,$7) 63 sltu $1,$13,$1 64 sw $13,0($4) 65 addu $2,$1 66 67 lw $10,3*4($5) 68 lw $11,3*4($4) 69 addu $15,$2 70 sltu $2,$15,$2 71 mflo ($1,$14,$7) 72 mfhi ($14,$14,$7) 73 addu $15,$1 74 addu $2,$14 75 multu ($8,$7) 76 sltu $1,$15,$1 77 sw $15,4($4) 78 addu $2,$1 79 80 subu $6,4 81 addu $4,4*4 82 addu $5,4*4 83 addu $9,$2 84 sltu $2,$9,$2 85 mflo ($1,$8,$7) 86 mfhi ($8,$8,$7) 87 addu $9,$1 88 addu $2,$8 89 multu ($10,$7) 90 sltu $1,$9,$1 91 sw $9,-2*4($4) 92 addu $2,$1 93 94 95 and $8,$6,$3 96 addu $11,$2 97 sltu $2,$11,$2 98 mflo ($1,$10,$7) 99 mfhi ($10,$10,$7) 100 addu $11,$1 101 addu $2,$10 102 sltu $1,$11,$1 103 sw $11,-4($4) 104 .set noreorder 105 bgtz $8,.L_bn_mul_add_words_loop 106 addu $2,$1 107 108 beqz $6,.L_bn_mul_add_words_return 109 nop 110 111.L_bn_mul_add_words_tail: 112 .set reorder 113 lw $12,0($5) 114 multu ($12,$7) 115 lw $13,0($4) 116 subu $6,1 117 addu $13,$2 118 sltu $2,$13,$2 119 mflo ($1,$12,$7) 120 mfhi ($12,$12,$7) 121 addu $13,$1 122 addu $2,$12 123 sltu $1,$13,$1 124 sw $13,0($4) 125 addu $2,$1 126 beqz $6,.L_bn_mul_add_words_return 127 128 lw $12,4($5) 129 multu ($12,$7) 130 lw $13,4($4) 131 subu $6,1 132 addu $13,$2 133 sltu $2,$13,$2 134 mflo ($1,$12,$7) 135 mfhi ($12,$12,$7) 136 addu $13,$1 137 addu $2,$12 138 sltu $1,$13,$1 139 sw $13,4($4) 140 addu $2,$1 141 beqz $6,.L_bn_mul_add_words_return 142 143 lw $12,2*4($5) 144 multu ($12,$7) 145 lw $13,2*4($4) 146 addu $13,$2 147 sltu $2,$13,$2 148 mflo ($1,$12,$7) 149 mfhi ($12,$12,$7) 150 addu $13,$1 151 addu $2,$12 152 sltu $1,$13,$1 153 sw $13,2*4($4) 154 addu $2,$1 155 156.L_bn_mul_add_words_return: 157 .set noreorder 158 jr $31 159 move $4,$2 160.end bn_mul_add_words_internal 161 162.align 5 163.globl bn_mul_words 164.ent bn_mul_words 165bn_mul_words: 166 .set noreorder 167 bgtz $6,bn_mul_words_internal 168 move $2,$0 169 jr $31 170 move $4,$2 171.end bn_mul_words 172 173.align 5 174.ent bn_mul_words_internal 175bn_mul_words_internal: 176 .set reorder 177 li $3,-4 178 and $8,$6,$3 179 beqz $8,.L_bn_mul_words_tail 180 181.L_bn_mul_words_loop: 182 lw $12,0($5) 183 multu ($12,$7) 184 lw $14,4($5) 185 lw $8,2*4($5) 186 lw $10,3*4($5) 187 mflo ($1,$12,$7) 188 mfhi ($12,$12,$7) 189 addu $2,$1 190 sltu $13,$2,$1 191 multu ($14,$7) 192 sw $2,0($4) 193 addu $2,$13,$12 194 195 subu $6,4 196 addu $4,4*4 197 addu $5,4*4 198 mflo ($1,$14,$7) 199 mfhi ($14,$14,$7) 200 addu $2,$1 201 sltu $15,$2,$1 202 multu ($8,$7) 203 sw $2,-3*4($4) 204 addu $2,$15,$14 205 206 mflo ($1,$8,$7) 207 mfhi ($8,$8,$7) 208 addu $2,$1 209 sltu $9,$2,$1 210 multu ($10,$7) 211 sw $2,-2*4($4) 212 addu $2,$9,$8 213 214 and $8,$6,$3 215 mflo ($1,$10,$7) 216 mfhi ($10,$10,$7) 217 addu $2,$1 218 sltu $11,$2,$1 219 sw $2,-4($4) 220 .set noreorder 221 bgtz $8,.L_bn_mul_words_loop 222 addu $2,$11,$10 223 224 beqz $6,.L_bn_mul_words_return 225 nop 226 227.L_bn_mul_words_tail: 228 .set reorder 229 lw $12,0($5) 230 multu ($12,$7) 231 subu $6,1 232 mflo ($1,$12,$7) 233 mfhi ($12,$12,$7) 234 addu $2,$1 235 sltu $13,$2,$1 236 sw $2,0($4) 237 addu $2,$13,$12 238 beqz $6,.L_bn_mul_words_return 239 240 lw $12,4($5) 241 multu ($12,$7) 242 subu $6,1 243 mflo ($1,$12,$7) 244 mfhi ($12,$12,$7) 245 addu $2,$1 246 sltu $13,$2,$1 247 sw $2,4($4) 248 addu $2,$13,$12 249 beqz $6,.L_bn_mul_words_return 250 251 lw $12,2*4($5) 252 multu ($12,$7) 253 mflo ($1,$12,$7) 254 mfhi ($12,$12,$7) 255 addu $2,$1 256 sltu $13,$2,$1 257 sw $2,2*4($4) 258 addu $2,$13,$12 259 260.L_bn_mul_words_return: 261 .set noreorder 262 jr $31 263 move $4,$2 264.end bn_mul_words_internal 265 266.align 5 267.globl bn_sqr_words 268.ent bn_sqr_words 269bn_sqr_words: 270 .set noreorder 271 bgtz $6,bn_sqr_words_internal 272 move $2,$0 273 jr $31 274 move $4,$2 275.end bn_sqr_words 276 277.align 5 278.ent bn_sqr_words_internal 279bn_sqr_words_internal: 280 .set reorder 281 li $3,-4 282 and $8,$6,$3 283 beqz $8,.L_bn_sqr_words_tail 284 285.L_bn_sqr_words_loop: 286 lw $12,0($5) 287 multu ($12,$12) 288 lw $14,4($5) 289 lw $8,2*4($5) 290 lw $10,3*4($5) 291 mflo ($13,$12,$12) 292 mfhi ($12,$12,$12) 293 sw $13,0($4) 294 sw $12,4($4) 295 296 multu ($14,$14) 297 subu $6,4 298 addu $4,8*4 299 addu $5,4*4 300 mflo ($15,$14,$14) 301 mfhi ($14,$14,$14) 302 sw $15,-6*4($4) 303 sw $14,-5*4($4) 304 305 multu ($8,$8) 306 mflo ($9,$8,$8) 307 mfhi ($8,$8,$8) 308 sw $9,-4*4($4) 309 sw $8,-3*4($4) 310 311 312 multu ($10,$10) 313 and $8,$6,$3 314 mflo ($11,$10,$10) 315 mfhi ($10,$10,$10) 316 sw $11,-2*4($4) 317 318 .set noreorder 319 sw $10,-4($4) 320 bgtz $8,.L_bn_sqr_words_loop 321 nop 322 323 beqz $6,.L_bn_sqr_words_return 324 nop 325 326.L_bn_sqr_words_tail: 327 .set reorder 328 lw $12,0($5) 329 multu ($12,$12) 330 subu $6,1 331 mflo ($13,$12,$12) 332 mfhi ($12,$12,$12) 333 sw $13,0($4) 334 sw $12,4($4) 335 beqz $6,.L_bn_sqr_words_return 336 337 lw $12,4($5) 338 multu ($12,$12) 339 subu $6,1 340 mflo ($13,$12,$12) 341 mfhi ($12,$12,$12) 342 sw $13,2*4($4) 343 sw $12,3*4($4) 344 beqz $6,.L_bn_sqr_words_return 345 346 lw $12,2*4($5) 347 multu ($12,$12) 348 mflo ($13,$12,$12) 349 mfhi ($12,$12,$12) 350 sw $13,4*4($4) 351 sw $12,5*4($4) 352 353.L_bn_sqr_words_return: 354 .set noreorder 355 jr $31 356 move $4,$2 357 358.end bn_sqr_words_internal 359 360.align 5 361.globl bn_add_words 362.ent bn_add_words 363bn_add_words: 364 .set noreorder 365 bgtz $7,bn_add_words_internal 366 move $2,$0 367 jr $31 368 move $4,$2 369.end bn_add_words 370 371.align 5 372.ent bn_add_words_internal 373bn_add_words_internal: 374 .set reorder 375 li $3,-4 376 and $1,$7,$3 377 beqz $1,.L_bn_add_words_tail 378 379.L_bn_add_words_loop: 380 lw $12,0($5) 381 lw $8,0($6) 382 subu $7,4 383 lw $13,4($5) 384 and $1,$7,$3 385 lw $14,2*4($5) 386 addu $6,4*4 387 lw $15,3*4($5) 388 addu $4,4*4 389 lw $9,-3*4($6) 390 addu $5,4*4 391 lw $10,-2*4($6) 392 lw $11,-4($6) 393 addu $8,$12 394 sltu $24,$8,$12 395 addu $12,$8,$2 396 sltu $2,$12,$8 397 sw $12,-4*4($4) 398 addu $2,$24 399 400 addu $9,$13 401 sltu $25,$9,$13 402 addu $13,$9,$2 403 sltu $2,$13,$9 404 sw $13,-3*4($4) 405 addu $2,$25 406 407 addu $10,$14 408 sltu $24,$10,$14 409 addu $14,$10,$2 410 sltu $2,$14,$10 411 sw $14,-2*4($4) 412 addu $2,$24 413 414 addu $11,$15 415 sltu $25,$11,$15 416 addu $15,$11,$2 417 sltu $2,$15,$11 418 sw $15,-4($4) 419 420 .set noreorder 421 bgtz $1,.L_bn_add_words_loop 422 addu $2,$25 423 424 beqz $7,.L_bn_add_words_return 425 nop 426 427.L_bn_add_words_tail: 428 .set reorder 429 lw $12,0($5) 430 lw $8,0($6) 431 addu $8,$12 432 subu $7,1 433 sltu $24,$8,$12 434 addu $12,$8,$2 435 sltu $2,$12,$8 436 sw $12,0($4) 437 addu $2,$24 438 beqz $7,.L_bn_add_words_return 439 440 lw $13,4($5) 441 lw $9,4($6) 442 addu $9,$13 443 subu $7,1 444 sltu $25,$9,$13 445 addu $13,$9,$2 446 sltu $2,$13,$9 447 sw $13,4($4) 448 addu $2,$25 449 beqz $7,.L_bn_add_words_return 450 451 lw $14,2*4($5) 452 lw $10,2*4($6) 453 addu $10,$14 454 sltu $24,$10,$14 455 addu $14,$10,$2 456 sltu $2,$14,$10 457 sw $14,2*4($4) 458 addu $2,$24 459 460.L_bn_add_words_return: 461 .set noreorder 462 jr $31 463 move $4,$2 464 465.end bn_add_words_internal 466 467.align 5 468.globl bn_sub_words 469.ent bn_sub_words 470bn_sub_words: 471 .set noreorder 472 bgtz $7,bn_sub_words_internal 473 move $2,$0 474 jr $31 475 move $4,$0 476.end bn_sub_words 477 478.align 5 479.ent bn_sub_words_internal 480bn_sub_words_internal: 481 .set reorder 482 li $3,-4 483 and $1,$7,$3 484 beqz $1,.L_bn_sub_words_tail 485 486.L_bn_sub_words_loop: 487 lw $12,0($5) 488 lw $8,0($6) 489 subu $7,4 490 lw $13,4($5) 491 and $1,$7,$3 492 lw $14,2*4($5) 493 addu $6,4*4 494 lw $15,3*4($5) 495 addu $4,4*4 496 lw $9,-3*4($6) 497 addu $5,4*4 498 lw $10,-2*4($6) 499 lw $11,-4($6) 500 sltu $24,$12,$8 501 subu $8,$12,$8 502 subu $12,$8,$2 503 sgtu $2,$12,$8 504 sw $12,-4*4($4) 505 addu $2,$24 506 507 sltu $25,$13,$9 508 subu $9,$13,$9 509 subu $13,$9,$2 510 sgtu $2,$13,$9 511 sw $13,-3*4($4) 512 addu $2,$25 513 514 515 sltu $24,$14,$10 516 subu $10,$14,$10 517 subu $14,$10,$2 518 sgtu $2,$14,$10 519 sw $14,-2*4($4) 520 addu $2,$24 521 522 sltu $25,$15,$11 523 subu $11,$15,$11 524 subu $15,$11,$2 525 sgtu $2,$15,$11 526 sw $15,-4($4) 527 528 .set noreorder 529 bgtz $1,.L_bn_sub_words_loop 530 addu $2,$25 531 532 beqz $7,.L_bn_sub_words_return 533 nop 534 535.L_bn_sub_words_tail: 536 .set reorder 537 lw $12,0($5) 538 lw $8,0($6) 539 subu $7,1 540 sltu $24,$12,$8 541 subu $8,$12,$8 542 subu $12,$8,$2 543 sgtu $2,$12,$8 544 sw $12,0($4) 545 addu $2,$24 546 beqz $7,.L_bn_sub_words_return 547 548 lw $13,4($5) 549 subu $7,1 550 lw $9,4($6) 551 sltu $25,$13,$9 552 subu $9,$13,$9 553 subu $13,$9,$2 554 sgtu $2,$13,$9 555 sw $13,4($4) 556 addu $2,$25 557 beqz $7,.L_bn_sub_words_return 558 559 lw $14,2*4($5) 560 lw $10,2*4($6) 561 sltu $24,$14,$10 562 subu $10,$14,$10 563 subu $14,$10,$2 564 sgtu $2,$14,$10 565 sw $14,2*4($4) 566 addu $2,$24 567 568.L_bn_sub_words_return: 569 .set noreorder 570 jr $31 571 move $4,$2 572.end bn_sub_words_internal 573 574#if 0 575/* 576 * The bn_div_3_words entry point is re-used for constant-time interface. 577 * Implementation is retained as historical reference. 578 */ 579.align 5 580.globl bn_div_3_words 581.ent bn_div_3_words 582bn_div_3_words: 583 .set noreorder 584 move $7,$4 # we know that bn_div_words does not 585 # touch $7, $10, $11 and preserves $6 586 # so that we can save two arguments 587 # and return address in registers 588 # instead of stack:-) 589 590 lw $4,($7) 591 move $10,$5 592 lw $5,-4($7) 593 bne $4,$6,bn_div_3_words_internal 594 nop 595 li $2,-1 596 jr $31 597 move $4,$2 598.end bn_div_3_words 599 600.align 5 601.ent bn_div_3_words_internal 602bn_div_3_words_internal: 603 .set reorder 604 move $11,$31 605 bal bn_div_words_internal 606 move $31,$11 607 multu ($10,$2) 608 lw $14,-2*4($7) 609 move $8,$0 610 mfhi ($13,$10,$2) 611 mflo ($12,$10,$2) 612 sltu $24,$13,$5 613.L_bn_div_3_words_inner_loop: 614 bnez $24,.L_bn_div_3_words_inner_loop_done 615 sgeu $1,$14,$12 616 seq $25,$13,$5 617 and $1,$25 618 sltu $15,$12,$10 619 addu $5,$6 620 subu $13,$15 621 subu $12,$10 622 sltu $24,$13,$5 623 sltu $8,$5,$6 624 or $24,$8 625 .set noreorder 626 beqz $1,.L_bn_div_3_words_inner_loop 627 subu $2,1 628 addu $2,1 629 .set reorder 630.L_bn_div_3_words_inner_loop_done: 631 .set noreorder 632 jr $31 633 move $4,$2 634.end bn_div_3_words_internal 635#endif 636 637.align 5 638.globl bn_div_words 639.ent bn_div_words 640bn_div_words: 641 .set noreorder 642 bnez $6,bn_div_words_internal 643 li $2,-1 # I would rather signal div-by-zero 644 # which can be done with 'break 7' 645 jr $31 646 move $4,$2 647.end bn_div_words 648 649.align 5 650.ent bn_div_words_internal 651bn_div_words_internal: 652 move $3,$0 653 bltz $6,.L_bn_div_words_body 654 move $25,$3 655 sll $6,1 656 bgtz $6,.-4 657 addu $25,1 658 659 .set reorder 660 negu $13,$25 661 li $14,-1 662 sll $14,$13 663 and $14,$4 664 srl $1,$5,$13 665 .set noreorder 666 beqz $14,.+12 667 nop 668 break 6 # signal overflow 669 .set reorder 670 sll $4,$25 671 sll $5,$25 672 or $4,$1 673.L_bn_div_words_body: 674 srl $3,$6,4*4 # bits 675 sgeu $1,$4,$6 676 .set noreorder 677 beqz $1,.+12 678 nop 679 subu $4,$6 680 .set reorder 681 682 li $8,-1 683 srl $9,$4,4*4 # bits 684 srl $8,4*4 # q=0xffffffff 685 beq $3,$9,.L_bn_div_words_skip_div1 686 divu ($4,$3) 687 mfqt ($8,$4,$3) 688.L_bn_div_words_skip_div1: 689 multu ($6,$8) 690 sll $15,$4,4*4 # bits 691 srl $1,$5,4*4 # bits 692 or $15,$1 693 mflo ($12,$6,$8) 694 mfhi ($13,$6,$8) 695.L_bn_div_words_inner_loop1: 696 sltu $14,$15,$12 697 seq $24,$9,$13 698 sltu $1,$9,$13 699 and $14,$24 700 sltu $2,$12,$6 701 or $1,$14 702 .set noreorder 703 beqz $1,.L_bn_div_words_inner_loop1_done 704 subu $13,$2 705 subu $12,$6 706 b .L_bn_div_words_inner_loop1 707 subu $8,1 708 .set reorder 709.L_bn_div_words_inner_loop1_done: 710 711 sll $5,4*4 # bits 712 subu $4,$15,$12 713 sll $2,$8,4*4 # bits 714 715 li $8,-1 716 srl $9,$4,4*4 # bits 717 srl $8,4*4 # q=0xffffffff 718 beq $3,$9,.L_bn_div_words_skip_div2 719 divu ($4,$3) 720 mfqt ($8,$4,$3) 721.L_bn_div_words_skip_div2: 722 multu ($6,$8) 723 sll $15,$4,4*4 # bits 724 srl $1,$5,4*4 # bits 725 or $15,$1 726 mflo ($12,$6,$8) 727 mfhi ($13,$6,$8) 728.L_bn_div_words_inner_loop2: 729 sltu $14,$15,$12 730 seq $24,$9,$13 731 sltu $1,$9,$13 732 and $14,$24 733 sltu $3,$12,$6 734 or $1,$14 735 .set noreorder 736 beqz $1,.L_bn_div_words_inner_loop2_done 737 subu $13,$3 738 subu $12,$6 739 b .L_bn_div_words_inner_loop2 740 subu $8,1 741 .set reorder 742.L_bn_div_words_inner_loop2_done: 743 744 subu $4,$15,$12 745 or $2,$8 746 srl $3,$4,$25 # $3 contains remainder if anybody wants it 747 srl $6,$25 # restore $6 748 749 .set noreorder 750 move $5,$3 751 jr $31 752 move $4,$2 753.end bn_div_words_internal 754 755.align 5 756.globl bn_mul_comba8 757.ent bn_mul_comba8 758bn_mul_comba8: 759 .set noreorder 760 .frame $29,6*4,$31 761 .mask 0x003f0000,-4 762 subu $29,6*4 763 sw $21,5*4($29) 764 sw $20,4*4($29) 765 sw $19,3*4($29) 766 sw $18,2*4($29) 767 sw $17,1*4($29) 768 sw $16,0*4($29) 769 770 .set reorder 771 lw $12,0($5) # If compiled with -mips3 option on 772 # R5000 box assembler barks on this 773 # 1ine with "should not have mult/div 774 # as last instruction in bb (R10K 775 # bug)" warning. If anybody out there 776 # has a clue about how to circumvent 777 # this do send me a note. 778 # <appro@fy.chalmers.se> 779 780 lw $8,0($6) 781 lw $13,4($5) 782 lw $14,2*4($5) 783 multu ($12,$8) # mul_add_c(a[0],b[0],c1,c2,c3); 784 lw $15,3*4($5) 785 lw $9,4($6) 786 lw $10,2*4($6) 787 lw $11,3*4($6) 788 mflo ($2,$12,$8) 789 mfhi ($3,$12,$8) 790 791 lw $16,4*4($5) 792 lw $18,5*4($5) 793 multu ($12,$9) # mul_add_c(a[0],b[1],c2,c3,c1); 794 lw $20,6*4($5) 795 lw $5,7*4($5) 796 lw $17,4*4($6) 797 lw $19,5*4($6) 798 mflo ($24,$12,$9) 799 mfhi ($25,$12,$9) 800 addu $3,$24 801 sltu $1,$3,$24 802 multu ($13,$8) # mul_add_c(a[1],b[0],c2,c3,c1); 803 addu $7,$25,$1 804 lw $21,6*4($6) 805 lw $6,7*4($6) 806 sw $2,0($4) # r[0]=c1; 807 mflo ($24,$13,$8) 808 mfhi ($25,$13,$8) 809 addu $3,$24 810 sltu $1,$3,$24 811 multu ($14,$8) # mul_add_c(a[2],b[0],c3,c1,c2); 812 addu $25,$1 813 addu $7,$25 814 sltu $2,$7,$25 815 sw $3,4($4) # r[1]=c2; 816 817 mflo ($24,$14,$8) 818 mfhi ($25,$14,$8) 819 addu $7,$24 820 sltu $1,$7,$24 821 multu ($13,$9) # mul_add_c(a[1],b[1],c3,c1,c2); 822 addu $25,$1 823 addu $2,$25 824 mflo ($24,$13,$9) 825 mfhi ($25,$13,$9) 826 addu $7,$24 827 sltu $1,$7,$24 828 multu ($12,$10) # mul_add_c(a[0],b[2],c3,c1,c2); 829 addu $25,$1 830 addu $2,$25 831 sltu $3,$2,$25 832 mflo ($24,$12,$10) 833 mfhi ($25,$12,$10) 834 addu $7,$24 835 sltu $1,$7,$24 836 multu ($12,$11) # mul_add_c(a[0],b[3],c1,c2,c3); 837 addu $25,$1 838 addu $2,$25 839 sltu $1,$2,$25 840 addu $3,$1 841 sw $7,2*4($4) # r[2]=c3; 842 843 mflo ($24,$12,$11) 844 mfhi ($25,$12,$11) 845 addu $2,$24 846 sltu $1,$2,$24 847 multu ($13,$10) # mul_add_c(a[1],b[2],c1,c2,c3); 848 addu $25,$1 849 addu $3,$25 850 sltu $7,$3,$25 851 mflo ($24,$13,$10) 852 mfhi ($25,$13,$10) 853 addu $2,$24 854 sltu $1,$2,$24 855 multu ($14,$9) # mul_add_c(a[2],b[1],c1,c2,c3); 856 addu $25,$1 857 addu $3,$25 858 sltu $1,$3,$25 859 addu $7,$1 860 mflo ($24,$14,$9) 861 mfhi ($25,$14,$9) 862 addu $2,$24 863 sltu $1,$2,$24 864 multu ($15,$8) # mul_add_c(a[3],b[0],c1,c2,c3); 865 addu $25,$1 866 addu $3,$25 867 sltu $1,$3,$25 868 addu $7,$1 869 mflo ($24,$15,$8) 870 mfhi ($25,$15,$8) 871 addu $2,$24 872 sltu $1,$2,$24 873 multu ($16,$8) # mul_add_c(a[4],b[0],c2,c3,c1); 874 addu $25,$1 875 addu $3,$25 876 sltu $1,$3,$25 877 addu $7,$1 878 sw $2,3*4($4) # r[3]=c1; 879 880 mflo ($24,$16,$8) 881 mfhi ($25,$16,$8) 882 addu $3,$24 883 sltu $1,$3,$24 884 multu ($15,$9) # mul_add_c(a[3],b[1],c2,c3,c1); 885 addu $25,$1 886 addu $7,$25 887 sltu $2,$7,$25 888 mflo ($24,$15,$9) 889 mfhi ($25,$15,$9) 890 addu $3,$24 891 sltu $1,$3,$24 892 multu ($14,$10) # mul_add_c(a[2],b[2],c2,c3,c1); 893 addu $25,$1 894 addu $7,$25 895 sltu $1,$7,$25 896 addu $2,$1 897 mflo ($24,$14,$10) 898 mfhi ($25,$14,$10) 899 addu $3,$24 900 sltu $1,$3,$24 901 multu ($13,$11) # mul_add_c(a[1],b[3],c2,c3,c1); 902 addu $25,$1 903 addu $7,$25 904 sltu $1,$7,$25 905 addu $2,$1 906 mflo ($24,$13,$11) 907 mfhi ($25,$13,$11) 908 addu $3,$24 909 sltu $1,$3,$24 910 multu ($12,$17) # mul_add_c(a[0],b[4],c2,c3,c1); 911 addu $25,$1 912 addu $7,$25 913 sltu $1,$7,$25 914 addu $2,$1 915 mflo ($24,$12,$17) 916 mfhi ($25,$12,$17) 917 addu $3,$24 918 sltu $1,$3,$24 919 multu ($12,$19) # mul_add_c(a[0],b[5],c3,c1,c2); 920 addu $25,$1 921 addu $7,$25 922 sltu $1,$7,$25 923 addu $2,$1 924 sw $3,4*4($4) # r[4]=c2; 925 926 mflo ($24,$12,$19) 927 mfhi ($25,$12,$19) 928 addu $7,$24 929 sltu $1,$7,$24 930 multu ($13,$17) # mul_add_c(a[1],b[4],c3,c1,c2); 931 addu $25,$1 932 addu $2,$25 933 sltu $3,$2,$25 934 mflo ($24,$13,$17) 935 mfhi ($25,$13,$17) 936 addu $7,$24 937 sltu $1,$7,$24 938 multu ($14,$11) # mul_add_c(a[2],b[3],c3,c1,c2); 939 addu $25,$1 940 addu $2,$25 941 sltu $1,$2,$25 942 addu $3,$1 943 mflo ($24,$14,$11) 944 mfhi ($25,$14,$11) 945 addu $7,$24 946 sltu $1,$7,$24 947 multu ($15,$10) # mul_add_c(a[3],b[2],c3,c1,c2); 948 addu $25,$1 949 addu $2,$25 950 sltu $1,$2,$25 951 addu $3,$1 952 mflo ($24,$15,$10) 953 mfhi ($25,$15,$10) 954 addu $7,$24 955 sltu $1,$7,$24 956 multu ($16,$9) # mul_add_c(a[4],b[1],c3,c1,c2); 957 addu $25,$1 958 addu $2,$25 959 sltu $1,$2,$25 960 addu $3,$1 961 mflo ($24,$16,$9) 962 mfhi ($25,$16,$9) 963 addu $7,$24 964 sltu $1,$7,$24 965 multu ($18,$8) # mul_add_c(a[5],b[0],c3,c1,c2); 966 addu $25,$1 967 addu $2,$25 968 sltu $1,$2,$25 969 addu $3,$1 970 mflo ($24,$18,$8) 971 mfhi ($25,$18,$8) 972 addu $7,$24 973 sltu $1,$7,$24 974 multu ($20,$8) # mul_add_c(a[6],b[0],c1,c2,c3); 975 addu $25,$1 976 addu $2,$25 977 sltu $1,$2,$25 978 addu $3,$1 979 sw $7,5*4($4) # r[5]=c3; 980 981 mflo ($24,$20,$8) 982 mfhi ($25,$20,$8) 983 addu $2,$24 984 sltu $1,$2,$24 985 multu ($18,$9) # mul_add_c(a[5],b[1],c1,c2,c3); 986 addu $25,$1 987 addu $3,$25 988 sltu $7,$3,$25 989 mflo ($24,$18,$9) 990 mfhi ($25,$18,$9) 991 addu $2,$24 992 sltu $1,$2,$24 993 multu ($16,$10) # mul_add_c(a[4],b[2],c1,c2,c3); 994 addu $25,$1 995 addu $3,$25 996 sltu $1,$3,$25 997 addu $7,$1 998 mflo ($24,$16,$10) 999 mfhi ($25,$16,$10) 1000 addu $2,$24 1001 sltu $1,$2,$24 1002 multu ($15,$11) # mul_add_c(a[3],b[3],c1,c2,c3); 1003 addu $25,$1 1004 addu $3,$25 1005 sltu $1,$3,$25 1006 addu $7,$1 1007 mflo ($24,$15,$11) 1008 mfhi ($25,$15,$11) 1009 addu $2,$24 1010 sltu $1,$2,$24 1011 multu ($14,$17) # mul_add_c(a[2],b[4],c1,c2,c3); 1012 addu $25,$1 1013 addu $3,$25 1014 sltu $1,$3,$25 1015 addu $7,$1 1016 mflo ($24,$14,$17) 1017 mfhi ($25,$14,$17) 1018 addu $2,$24 1019 sltu $1,$2,$24 1020 multu ($13,$19) # mul_add_c(a[1],b[5],c1,c2,c3); 1021 addu $25,$1 1022 addu $3,$25 1023 sltu $1,$3,$25 1024 addu $7,$1 1025 mflo ($24,$13,$19) 1026 mfhi ($25,$13,$19) 1027 addu $2,$24 1028 sltu $1,$2,$24 1029 multu ($12,$21) # mul_add_c(a[0],b[6],c1,c2,c3); 1030 addu $25,$1 1031 addu $3,$25 1032 sltu $1,$3,$25 1033 addu $7,$1 1034 mflo ($24,$12,$21) 1035 mfhi ($25,$12,$21) 1036 addu $2,$24 1037 sltu $1,$2,$24 1038 multu ($12,$6) # mul_add_c(a[0],b[7],c2,c3,c1); 1039 addu $25,$1 1040 addu $3,$25 1041 sltu $1,$3,$25 1042 addu $7,$1 1043 sw $2,6*4($4) # r[6]=c1; 1044 1045 mflo ($24,$12,$6) 1046 mfhi ($25,$12,$6) 1047 addu $3,$24 1048 sltu $1,$3,$24 1049 multu ($13,$21) # mul_add_c(a[1],b[6],c2,c3,c1); 1050 addu $25,$1 1051 addu $7,$25 1052 sltu $2,$7,$25 1053 mflo ($24,$13,$21) 1054 mfhi ($25,$13,$21) 1055 addu $3,$24 1056 sltu $1,$3,$24 1057 multu ($14,$19) # mul_add_c(a[2],b[5],c2,c3,c1); 1058 addu $25,$1 1059 addu $7,$25 1060 sltu $1,$7,$25 1061 addu $2,$1 1062 mflo ($24,$14,$19) 1063 mfhi ($25,$14,$19) 1064 addu $3,$24 1065 sltu $1,$3,$24 1066 multu ($15,$17) # mul_add_c(a[3],b[4],c2,c3,c1); 1067 addu $25,$1 1068 addu $7,$25 1069 sltu $1,$7,$25 1070 addu $2,$1 1071 mflo ($24,$15,$17) 1072 mfhi ($25,$15,$17) 1073 addu $3,$24 1074 sltu $1,$3,$24 1075 multu ($16,$11) # mul_add_c(a[4],b[3],c2,c3,c1); 1076 addu $25,$1 1077 addu $7,$25 1078 sltu $1,$7,$25 1079 addu $2,$1 1080 mflo ($24,$16,$11) 1081 mfhi ($25,$16,$11) 1082 addu $3,$24 1083 sltu $1,$3,$24 1084 multu ($18,$10) # mul_add_c(a[5],b[2],c2,c3,c1); 1085 addu $25,$1 1086 addu $7,$25 1087 sltu $1,$7,$25 1088 addu $2,$1 1089 mflo ($24,$18,$10) 1090 mfhi ($25,$18,$10) 1091 addu $3,$24 1092 sltu $1,$3,$24 1093 multu ($20,$9) # mul_add_c(a[6],b[1],c2,c3,c1); 1094 addu $25,$1 1095 addu $7,$25 1096 sltu $1,$7,$25 1097 addu $2,$1 1098 mflo ($24,$20,$9) 1099 mfhi ($25,$20,$9) 1100 addu $3,$24 1101 sltu $1,$3,$24 1102 multu ($5,$8) # mul_add_c(a[7],b[0],c2,c3,c1); 1103 addu $25,$1 1104 addu $7,$25 1105 sltu $1,$7,$25 1106 addu $2,$1 1107 mflo ($24,$5,$8) 1108 mfhi ($25,$5,$8) 1109 addu $3,$24 1110 sltu $1,$3,$24 1111 multu ($5,$9) # mul_add_c(a[7],b[1],c3,c1,c2); 1112 addu $25,$1 1113 addu $7,$25 1114 sltu $1,$7,$25 1115 addu $2,$1 1116 sw $3,7*4($4) # r[7]=c2; 1117 1118 mflo ($24,$5,$9) 1119 mfhi ($25,$5,$9) 1120 addu $7,$24 1121 sltu $1,$7,$24 1122 multu ($20,$10) # mul_add_c(a[6],b[2],c3,c1,c2); 1123 addu $25,$1 1124 addu $2,$25 1125 sltu $3,$2,$25 1126 mflo ($24,$20,$10) 1127 mfhi ($25,$20,$10) 1128 addu $7,$24 1129 sltu $1,$7,$24 1130 multu ($18,$11) # mul_add_c(a[5],b[3],c3,c1,c2); 1131 addu $25,$1 1132 addu $2,$25 1133 sltu $1,$2,$25 1134 addu $3,$1 1135 mflo ($24,$18,$11) 1136 mfhi ($25,$18,$11) 1137 addu $7,$24 1138 sltu $1,$7,$24 1139 multu ($16,$17) # mul_add_c(a[4],b[4],c3,c1,c2); 1140 addu $25,$1 1141 addu $2,$25 1142 sltu $1,$2,$25 1143 addu $3,$1 1144 mflo ($24,$16,$17) 1145 mfhi ($25,$16,$17) 1146 addu $7,$24 1147 sltu $1,$7,$24 1148 multu ($15,$19) # mul_add_c(a[3],b[5],c3,c1,c2); 1149 addu $25,$1 1150 addu $2,$25 1151 sltu $1,$2,$25 1152 addu $3,$1 1153 mflo ($24,$15,$19) 1154 mfhi ($25,$15,$19) 1155 addu $7,$24 1156 sltu $1,$7,$24 1157 multu ($14,$21) # mul_add_c(a[2],b[6],c3,c1,c2); 1158 addu $25,$1 1159 addu $2,$25 1160 sltu $1,$2,$25 1161 addu $3,$1 1162 mflo ($24,$14,$21) 1163 mfhi ($25,$14,$21) 1164 addu $7,$24 1165 sltu $1,$7,$24 1166 multu ($13,$6) # mul_add_c(a[1],b[7],c3,c1,c2); 1167 addu $25,$1 1168 addu $2,$25 1169 sltu $1,$2,$25 1170 addu $3,$1 1171 mflo ($24,$13,$6) 1172 mfhi ($25,$13,$6) 1173 addu $7,$24 1174 sltu $1,$7,$24 1175 multu ($14,$6) # mul_add_c(a[2],b[7],c1,c2,c3); 1176 addu $25,$1 1177 addu $2,$25 1178 sltu $1,$2,$25 1179 addu $3,$1 1180 sw $7,8*4($4) # r[8]=c3; 1181 1182 mflo ($24,$14,$6) 1183 mfhi ($25,$14,$6) 1184 addu $2,$24 1185 sltu $1,$2,$24 1186 multu ($15,$21) # mul_add_c(a[3],b[6],c1,c2,c3); 1187 addu $25,$1 1188 addu $3,$25 1189 sltu $7,$3,$25 1190 mflo ($24,$15,$21) 1191 mfhi ($25,$15,$21) 1192 addu $2,$24 1193 sltu $1,$2,$24 1194 multu ($16,$19) # mul_add_c(a[4],b[5],c1,c2,c3); 1195 addu $25,$1 1196 addu $3,$25 1197 sltu $1,$3,$25 1198 addu $7,$1 1199 mflo ($24,$16,$19) 1200 mfhi ($25,$16,$19) 1201 addu $2,$24 1202 sltu $1,$2,$24 1203 multu ($18,$17) # mul_add_c(a[5],b[4],c1,c2,c3); 1204 addu $25,$1 1205 addu $3,$25 1206 sltu $1,$3,$25 1207 addu $7,$1 1208 mflo ($24,$18,$17) 1209 mfhi ($25,$18,$17) 1210 addu $2,$24 1211 sltu $1,$2,$24 1212 multu ($20,$11) # mul_add_c(a[6],b[3],c1,c2,c3); 1213 addu $25,$1 1214 addu $3,$25 1215 sltu $1,$3,$25 1216 addu $7,$1 1217 mflo ($24,$20,$11) 1218 mfhi ($25,$20,$11) 1219 addu $2,$24 1220 sltu $1,$2,$24 1221 multu ($5,$10) # mul_add_c(a[7],b[2],c1,c2,c3); 1222 addu $25,$1 1223 addu $3,$25 1224 sltu $1,$3,$25 1225 addu $7,$1 1226 mflo ($24,$5,$10) 1227 mfhi ($25,$5,$10) 1228 addu $2,$24 1229 sltu $1,$2,$24 1230 multu ($5,$11) # mul_add_c(a[7],b[3],c2,c3,c1); 1231 addu $25,$1 1232 addu $3,$25 1233 sltu $1,$3,$25 1234 addu $7,$1 1235 sw $2,9*4($4) # r[9]=c1; 1236 1237 mflo ($24,$5,$11) 1238 mfhi ($25,$5,$11) 1239 addu $3,$24 1240 sltu $1,$3,$24 1241 multu ($20,$17) # mul_add_c(a[6],b[4],c2,c3,c1); 1242 addu $25,$1 1243 addu $7,$25 1244 sltu $2,$7,$25 1245 mflo ($24,$20,$17) 1246 mfhi ($25,$20,$17) 1247 addu $3,$24 1248 sltu $1,$3,$24 1249 multu ($18,$19) # mul_add_c(a[5],b[5],c2,c3,c1); 1250 addu $25,$1 1251 addu $7,$25 1252 sltu $1,$7,$25 1253 addu $2,$1 1254 mflo ($24,$18,$19) 1255 mfhi ($25,$18,$19) 1256 addu $3,$24 1257 sltu $1,$3,$24 1258 multu ($16,$21) # mul_add_c(a[4],b[6],c2,c3,c1); 1259 addu $25,$1 1260 addu $7,$25 1261 sltu $1,$7,$25 1262 addu $2,$1 1263 mflo ($24,$16,$21) 1264 mfhi ($25,$16,$21) 1265 addu $3,$24 1266 sltu $1,$3,$24 1267 multu ($15,$6) # mul_add_c(a[3],b[7],c2,c3,c1); 1268 addu $25,$1 1269 addu $7,$25 1270 sltu $1,$7,$25 1271 addu $2,$1 1272 mflo ($24,$15,$6) 1273 mfhi ($25,$15,$6) 1274 addu $3,$24 1275 sltu $1,$3,$24 1276 multu ($16,$6) # mul_add_c(a[4],b[7],c3,c1,c2); 1277 addu $25,$1 1278 addu $7,$25 1279 sltu $1,$7,$25 1280 addu $2,$1 1281 sw $3,10*4($4) # r[10]=c2; 1282 1283 mflo ($24,$16,$6) 1284 mfhi ($25,$16,$6) 1285 addu $7,$24 1286 sltu $1,$7,$24 1287 multu ($18,$21) # mul_add_c(a[5],b[6],c3,c1,c2); 1288 addu $25,$1 1289 addu $2,$25 1290 sltu $3,$2,$25 1291 mflo ($24,$18,$21) 1292 mfhi ($25,$18,$21) 1293 addu $7,$24 1294 sltu $1,$7,$24 1295 multu ($20,$19) # mul_add_c(a[6],b[5],c3,c1,c2); 1296 addu $25,$1 1297 addu $2,$25 1298 sltu $1,$2,$25 1299 addu $3,$1 1300 mflo ($24,$20,$19) 1301 mfhi ($25,$20,$19) 1302 addu $7,$24 1303 sltu $1,$7,$24 1304 multu ($5,$17) # mul_add_c(a[7],b[4],c3,c1,c2); 1305 addu $25,$1 1306 addu $2,$25 1307 sltu $1,$2,$25 1308 addu $3,$1 1309 mflo ($24,$5,$17) 1310 mfhi ($25,$5,$17) 1311 addu $7,$24 1312 sltu $1,$7,$24 1313 multu ($5,$19) # mul_add_c(a[7],b[5],c1,c2,c3); 1314 addu $25,$1 1315 addu $2,$25 1316 sltu $1,$2,$25 1317 addu $3,$1 1318 sw $7,11*4($4) # r[11]=c3; 1319 1320 mflo ($24,$5,$19) 1321 mfhi ($25,$5,$19) 1322 addu $2,$24 1323 sltu $1,$2,$24 1324 multu ($20,$21) # mul_add_c(a[6],b[6],c1,c2,c3); 1325 addu $25,$1 1326 addu $3,$25 1327 sltu $7,$3,$25 1328 mflo ($24,$20,$21) 1329 mfhi ($25,$20,$21) 1330 addu $2,$24 1331 sltu $1,$2,$24 1332 multu ($18,$6) # mul_add_c(a[5],b[7],c1,c2,c3); 1333 addu $25,$1 1334 addu $3,$25 1335 sltu $1,$3,$25 1336 addu $7,$1 1337 mflo ($24,$18,$6) 1338 mfhi ($25,$18,$6) 1339 addu $2,$24 1340 sltu $1,$2,$24 1341 multu ($20,$6) # mul_add_c(a[6],b[7],c2,c3,c1); 1342 addu $25,$1 1343 addu $3,$25 1344 sltu $1,$3,$25 1345 addu $7,$1 1346 sw $2,12*4($4) # r[12]=c1; 1347 1348 mflo ($24,$20,$6) 1349 mfhi ($25,$20,$6) 1350 addu $3,$24 1351 sltu $1,$3,$24 1352 multu ($5,$21) # mul_add_c(a[7],b[6],c2,c3,c1); 1353 addu $25,$1 1354 addu $7,$25 1355 sltu $2,$7,$25 1356 mflo ($24,$5,$21) 1357 mfhi ($25,$5,$21) 1358 addu $3,$24 1359 sltu $1,$3,$24 1360 multu ($5,$6) # mul_add_c(a[7],b[7],c3,c1,c2); 1361 addu $25,$1 1362 addu $7,$25 1363 sltu $1,$7,$25 1364 addu $2,$1 1365 sw $3,13*4($4) # r[13]=c2; 1366 1367 mflo ($24,$5,$6) 1368 mfhi ($25,$5,$6) 1369 addu $7,$24 1370 sltu $1,$7,$24 1371 addu $25,$1 1372 addu $2,$25 1373 sw $7,14*4($4) # r[14]=c3; 1374 sw $2,15*4($4) # r[15]=c1; 1375 1376 .set noreorder 1377 lw $21,5*4($29) 1378 lw $20,4*4($29) 1379 lw $19,3*4($29) 1380 lw $18,2*4($29) 1381 lw $17,1*4($29) 1382 lw $16,0*4($29) 1383 jr $31 1384 addu $29,6*4 1385.end bn_mul_comba8 1386 1387.align 5 1388.globl bn_mul_comba4 1389.ent bn_mul_comba4 1390bn_mul_comba4: 1391 .set reorder 1392 lw $12,0($5) 1393 lw $8,0($6) 1394 lw $13,4($5) 1395 lw $14,2*4($5) 1396 multu ($12,$8) # mul_add_c(a[0],b[0],c1,c2,c3); 1397 lw $15,3*4($5) 1398 lw $9,4($6) 1399 lw $10,2*4($6) 1400 lw $11,3*4($6) 1401 mflo ($2,$12,$8) 1402 mfhi ($3,$12,$8) 1403 sw $2,0($4) 1404 1405 multu ($12,$9) # mul_add_c(a[0],b[1],c2,c3,c1); 1406 mflo ($24,$12,$9) 1407 mfhi ($25,$12,$9) 1408 addu $3,$24 1409 sltu $1,$3,$24 1410 multu ($13,$8) # mul_add_c(a[1],b[0],c2,c3,c1); 1411 addu $7,$25,$1 1412 mflo ($24,$13,$8) 1413 mfhi ($25,$13,$8) 1414 addu $3,$24 1415 sltu $1,$3,$24 1416 multu ($14,$8) # mul_add_c(a[2],b[0],c3,c1,c2); 1417 addu $25,$1 1418 addu $7,$25 1419 sltu $2,$7,$25 1420 sw $3,4($4) 1421 1422 mflo ($24,$14,$8) 1423 mfhi ($25,$14,$8) 1424 addu $7,$24 1425 sltu $1,$7,$24 1426 multu ($13,$9) # mul_add_c(a[1],b[1],c3,c1,c2); 1427 addu $25,$1 1428 addu $2,$25 1429 mflo ($24,$13,$9) 1430 mfhi ($25,$13,$9) 1431 addu $7,$24 1432 sltu $1,$7,$24 1433 multu ($12,$10) # mul_add_c(a[0],b[2],c3,c1,c2); 1434 addu $25,$1 1435 addu $2,$25 1436 sltu $3,$2,$25 1437 mflo ($24,$12,$10) 1438 mfhi ($25,$12,$10) 1439 addu $7,$24 1440 sltu $1,$7,$24 1441 multu ($12,$11) # mul_add_c(a[0],b[3],c1,c2,c3); 1442 addu $25,$1 1443 addu $2,$25 1444 sltu $1,$2,$25 1445 addu $3,$1 1446 sw $7,2*4($4) 1447 1448 mflo ($24,$12,$11) 1449 mfhi ($25,$12,$11) 1450 addu $2,$24 1451 sltu $1,$2,$24 1452 multu ($13,$10) # mul_add_c(a[1],b[2],c1,c2,c3); 1453 addu $25,$1 1454 addu $3,$25 1455 sltu $7,$3,$25 1456 mflo ($24,$13,$10) 1457 mfhi ($25,$13,$10) 1458 addu $2,$24 1459 sltu $1,$2,$24 1460 multu ($14,$9) # mul_add_c(a[2],b[1],c1,c2,c3); 1461 addu $25,$1 1462 addu $3,$25 1463 sltu $1,$3,$25 1464 addu $7,$1 1465 mflo ($24,$14,$9) 1466 mfhi ($25,$14,$9) 1467 addu $2,$24 1468 sltu $1,$2,$24 1469 multu ($15,$8) # mul_add_c(a[3],b[0],c1,c2,c3); 1470 addu $25,$1 1471 addu $3,$25 1472 sltu $1,$3,$25 1473 addu $7,$1 1474 mflo ($24,$15,$8) 1475 mfhi ($25,$15,$8) 1476 addu $2,$24 1477 sltu $1,$2,$24 1478 multu ($15,$9) # mul_add_c(a[3],b[1],c2,c3,c1); 1479 addu $25,$1 1480 addu $3,$25 1481 sltu $1,$3,$25 1482 addu $7,$1 1483 sw $2,3*4($4) 1484 1485 mflo ($24,$15,$9) 1486 mfhi ($25,$15,$9) 1487 addu $3,$24 1488 sltu $1,$3,$24 1489 multu ($14,$10) # mul_add_c(a[2],b[2],c2,c3,c1); 1490 addu $25,$1 1491 addu $7,$25 1492 sltu $2,$7,$25 1493 mflo ($24,$14,$10) 1494 mfhi ($25,$14,$10) 1495 addu $3,$24 1496 sltu $1,$3,$24 1497 multu ($13,$11) # mul_add_c(a[1],b[3],c2,c3,c1); 1498 addu $25,$1 1499 addu $7,$25 1500 sltu $1,$7,$25 1501 addu $2,$1 1502 mflo ($24,$13,$11) 1503 mfhi ($25,$13,$11) 1504 addu $3,$24 1505 sltu $1,$3,$24 1506 multu ($14,$11) # mul_add_c(a[2],b[3],c3,c1,c2); 1507 addu $25,$1 1508 addu $7,$25 1509 sltu $1,$7,$25 1510 addu $2,$1 1511 sw $3,4*4($4) 1512 1513 mflo ($24,$14,$11) 1514 mfhi ($25,$14,$11) 1515 addu $7,$24 1516 sltu $1,$7,$24 1517 multu ($15,$10) # mul_add_c(a[3],b[2],c3,c1,c2); 1518 addu $25,$1 1519 addu $2,$25 1520 sltu $3,$2,$25 1521 mflo ($24,$15,$10) 1522 mfhi ($25,$15,$10) 1523 addu $7,$24 1524 sltu $1,$7,$24 1525 multu ($15,$11) # mul_add_c(a[3],b[3],c1,c2,c3); 1526 addu $25,$1 1527 addu $2,$25 1528 sltu $1,$2,$25 1529 addu $3,$1 1530 sw $7,5*4($4) 1531 1532 mflo ($24,$15,$11) 1533 mfhi ($25,$15,$11) 1534 addu $2,$24 1535 sltu $1,$2,$24 1536 addu $25,$1 1537 addu $3,$25 1538 sw $2,6*4($4) 1539 sw $3,7*4($4) 1540 1541 .set noreorder 1542 jr $31 1543 nop 1544.end bn_mul_comba4 1545 1546.align 5 1547.globl bn_sqr_comba8 1548.ent bn_sqr_comba8 1549bn_sqr_comba8: 1550 .set reorder 1551 lw $12,0($5) 1552 lw $13,4($5) 1553 lw $14,2*4($5) 1554 lw $15,3*4($5) 1555 1556 multu ($12,$12) # mul_add_c(a[0],b[0],c1,c2,c3); 1557 lw $8,4*4($5) 1558 lw $9,5*4($5) 1559 lw $10,6*4($5) 1560 lw $11,7*4($5) 1561 mflo ($2,$12,$12) 1562 mfhi ($3,$12,$12) 1563 sw $2,0($4) 1564 1565 multu ($12,$13) # mul_add_c2(a[0],b[1],c2,c3,c1); 1566 mflo ($24,$12,$13) 1567 mfhi ($25,$12,$13) 1568 slt $2,$25,$0 1569 sll $25,1 1570 multu ($14,$12) # mul_add_c2(a[2],b[0],c3,c1,c2); 1571 slt $6,$24,$0 1572 addu $25,$6 1573 sll $24,1 1574 addu $3,$24 1575 sltu $1,$3,$24 1576 addu $7,$25,$1 1577 sw $3,4($4) 1578 mflo ($24,$14,$12) 1579 mfhi ($25,$14,$12) 1580 addu $7,$24 1581 sltu $1,$7,$24 1582 multu ($13,$13) # forward multiplication 1583 addu $7,$24 1584 addu $1,$25 1585 sltu $24,$7,$24 1586 addu $2,$1 1587 addu $25,$24 1588 sltu $3,$2,$1 1589 addu $2,$25 1590 sltu $25,$2,$25 1591 addu $3,$25 1592 mflo ($24,$13,$13) 1593 mfhi ($25,$13,$13) 1594 addu $7,$24 1595 sltu $1,$7,$24 1596 multu ($12,$15) # mul_add_c2(a[0],b[3],c1,c2,c3); 1597 addu $25,$1 1598 addu $2,$25 1599 sltu $1,$2,$25 1600 addu $3,$1 1601 sw $7,2*4($4) 1602 mflo ($24,$12,$15) 1603 mfhi ($25,$12,$15) 1604 addu $2,$24 1605 sltu $1,$2,$24 1606 multu ($13,$14) # forward multiplication 1607 addu $2,$24 1608 addu $1,$25 1609 sltu $24,$2,$24 1610 addu $3,$1 1611 addu $25,$24 1612 sltu $7,$3,$1 1613 addu $3,$25 1614 sltu $25,$3,$25 1615 addu $7,$25 1616 mflo ($24,$13,$14) 1617 mfhi ($25,$13,$14) 1618 addu $2,$24 1619 sltu $1,$2,$24 1620 multu ($8,$12) # forward multiplication 1621 addu $2,$24 1622 addu $1,$25 1623 sltu $24,$2,$24 1624 addu $3,$1 1625 addu $25,$24 1626 sltu $1,$3,$1 1627 addu $3,$25 1628 addu $7,$1 1629 sltu $25,$3,$25 1630 addu $7,$25 1631 mflo ($24,$8,$12) 1632 mfhi ($25,$8,$12) 1633 sw $2,3*4($4) 1634 addu $3,$24 1635 sltu $1,$3,$24 1636 multu ($15,$13) # forward multiplication 1637 addu $3,$24 1638 addu $1,$25 1639 sltu $24,$3,$24 1640 addu $7,$1 1641 addu $25,$24 1642 sltu $2,$7,$1 1643 addu $7,$25 1644 sltu $25,$7,$25 1645 addu $2,$25 1646 mflo ($24,$15,$13) 1647 mfhi ($25,$15,$13) 1648 addu $3,$24 1649 sltu $1,$3,$24 1650 multu ($14,$14) # forward multiplication 1651 addu $3,$24 1652 addu $1,$25 1653 sltu $24,$3,$24 1654 addu $7,$1 1655 addu $25,$24 1656 sltu $1,$7,$1 1657 addu $7,$25 1658 addu $2,$1 1659 sltu $25,$7,$25 1660 addu $2,$25 1661 mflo ($24,$14,$14) 1662 mfhi ($25,$14,$14) 1663 addu $3,$24 1664 sltu $1,$3,$24 1665 multu ($12,$9) # mul_add_c2(a[0],b[5],c3,c1,c2); 1666 addu $25,$1 1667 addu $7,$25 1668 sltu $1,$7,$25 1669 addu $2,$1 1670 sw $3,4*4($4) 1671 mflo ($24,$12,$9) 1672 mfhi ($25,$12,$9) 1673 addu $7,$24 1674 sltu $1,$7,$24 1675 multu ($13,$8) # forward multiplication 1676 addu $7,$24 1677 addu $1,$25 1678 sltu $24,$7,$24 1679 addu $2,$1 1680 addu $25,$24 1681 sltu $3,$2,$1 1682 addu $2,$25 1683 sltu $25,$2,$25 1684 addu $3,$25 1685 mflo ($24,$13,$8) 1686 mfhi ($25,$13,$8) 1687 addu $7,$24 1688 sltu $1,$7,$24 1689 multu ($14,$15) # forward multiplication 1690 addu $7,$24 1691 addu $1,$25 1692 sltu $24,$7,$24 1693 addu $2,$1 1694 addu $25,$24 1695 sltu $1,$2,$1 1696 addu $2,$25 1697 addu $3,$1 1698 sltu $25,$2,$25 1699 addu $3,$25 1700 mflo ($24,$14,$15) 1701 mfhi ($25,$14,$15) 1702 addu $7,$24 1703 sltu $1,$7,$24 1704 multu ($10,$12) # forward multiplication 1705 addu $7,$24 1706 addu $1,$25 1707 sltu $24,$7,$24 1708 addu $2,$1 1709 addu $25,$24 1710 sltu $1,$2,$1 1711 addu $2,$25 1712 addu $3,$1 1713 sltu $25,$2,$25 1714 addu $3,$25 1715 mflo ($24,$10,$12) 1716 mfhi ($25,$10,$12) 1717 sw $7,5*4($4) 1718 addu $2,$24 1719 sltu $1,$2,$24 1720 multu ($9,$13) # forward multiplication 1721 addu $2,$24 1722 addu $1,$25 1723 sltu $24,$2,$24 1724 addu $3,$1 1725 addu $25,$24 1726 sltu $7,$3,$1 1727 addu $3,$25 1728 sltu $25,$3,$25 1729 addu $7,$25 1730 mflo ($24,$9,$13) 1731 mfhi ($25,$9,$13) 1732 addu $2,$24 1733 sltu $1,$2,$24 1734 multu ($8,$14) # forward multiplication 1735 addu $2,$24 1736 addu $1,$25 1737 sltu $24,$2,$24 1738 addu $3,$1 1739 addu $25,$24 1740 sltu $1,$3,$1 1741 addu $3,$25 1742 addu $7,$1 1743 sltu $25,$3,$25 1744 addu $7,$25 1745 mflo ($24,$8,$14) 1746 mfhi ($25,$8,$14) 1747 addu $2,$24 1748 sltu $1,$2,$24 1749 multu ($15,$15) # forward multiplication 1750 addu $2,$24 1751 addu $1,$25 1752 sltu $24,$2,$24 1753 addu $3,$1 1754 addu $25,$24 1755 sltu $1,$3,$1 1756 addu $3,$25 1757 addu $7,$1 1758 sltu $25,$3,$25 1759 addu $7,$25 1760 mflo ($24,$15,$15) 1761 mfhi ($25,$15,$15) 1762 addu $2,$24 1763 sltu $1,$2,$24 1764 multu ($12,$11) # mul_add_c2(a[0],b[7],c2,c3,c1); 1765 addu $25,$1 1766 addu $3,$25 1767 sltu $1,$3,$25 1768 addu $7,$1 1769 sw $2,6*4($4) 1770 mflo ($24,$12,$11) 1771 mfhi ($25,$12,$11) 1772 addu $3,$24 1773 sltu $1,$3,$24 1774 multu ($13,$10) # forward multiplication 1775 addu $3,$24 1776 addu $1,$25 1777 sltu $24,$3,$24 1778 addu $7,$1 1779 addu $25,$24 1780 sltu $2,$7,$1 1781 addu $7,$25 1782 sltu $25,$7,$25 1783 addu $2,$25 1784 mflo ($24,$13,$10) 1785 mfhi ($25,$13,$10) 1786 addu $3,$24 1787 sltu $1,$3,$24 1788 multu ($14,$9) # forward multiplication 1789 addu $3,$24 1790 addu $1,$25 1791 sltu $24,$3,$24 1792 addu $7,$1 1793 addu $25,$24 1794 sltu $1,$7,$1 1795 addu $7,$25 1796 addu $2,$1 1797 sltu $25,$7,$25 1798 addu $2,$25 1799 mflo ($24,$14,$9) 1800 mfhi ($25,$14,$9) 1801 addu $3,$24 1802 sltu $1,$3,$24 1803 multu ($15,$8) # forward multiplication 1804 addu $3,$24 1805 addu $1,$25 1806 sltu $24,$3,$24 1807 addu $7,$1 1808 addu $25,$24 1809 sltu $1,$7,$1 1810 addu $7,$25 1811 addu $2,$1 1812 sltu $25,$7,$25 1813 addu $2,$25 1814 mflo ($24,$15,$8) 1815 mfhi ($25,$15,$8) 1816 addu $3,$24 1817 sltu $1,$3,$24 1818 multu ($11,$13) # forward multiplication 1819 addu $3,$24 1820 addu $1,$25 1821 sltu $24,$3,$24 1822 addu $7,$1 1823 addu $25,$24 1824 sltu $1,$7,$1 1825 addu $7,$25 1826 addu $2,$1 1827 sltu $25,$7,$25 1828 addu $2,$25 1829 mflo ($24,$11,$13) 1830 mfhi ($25,$11,$13) 1831 sw $3,7*4($4) 1832 addu $7,$24 1833 sltu $1,$7,$24 1834 multu ($10,$14) # forward multiplication 1835 addu $7,$24 1836 addu $1,$25 1837 sltu $24,$7,$24 1838 addu $2,$1 1839 addu $25,$24 1840 sltu $3,$2,$1 1841 addu $2,$25 1842 sltu $25,$2,$25 1843 addu $3,$25 1844 mflo ($24,$10,$14) 1845 mfhi ($25,$10,$14) 1846 addu $7,$24 1847 sltu $1,$7,$24 1848 multu ($9,$15) # forward multiplication 1849 addu $7,$24 1850 addu $1,$25 1851 sltu $24,$7,$24 1852 addu $2,$1 1853 addu $25,$24 1854 sltu $1,$2,$1 1855 addu $2,$25 1856 addu $3,$1 1857 sltu $25,$2,$25 1858 addu $3,$25 1859 mflo ($24,$9,$15) 1860 mfhi ($25,$9,$15) 1861 addu $7,$24 1862 sltu $1,$7,$24 1863 multu ($8,$8) # forward multiplication 1864 addu $7,$24 1865 addu $1,$25 1866 sltu $24,$7,$24 1867 addu $2,$1 1868 addu $25,$24 1869 sltu $1,$2,$1 1870 addu $2,$25 1871 addu $3,$1 1872 sltu $25,$2,$25 1873 addu $3,$25 1874 mflo ($24,$8,$8) 1875 mfhi ($25,$8,$8) 1876 addu $7,$24 1877 sltu $1,$7,$24 1878 multu ($14,$11) # mul_add_c2(a[2],b[7],c1,c2,c3); 1879 addu $25,$1 1880 addu $2,$25 1881 sltu $1,$2,$25 1882 addu $3,$1 1883 sw $7,8*4($4) 1884 mflo ($24,$14,$11) 1885 mfhi ($25,$14,$11) 1886 addu $2,$24 1887 sltu $1,$2,$24 1888 multu ($15,$10) # forward multiplication 1889 addu $2,$24 1890 addu $1,$25 1891 sltu $24,$2,$24 1892 addu $3,$1 1893 addu $25,$24 1894 sltu $7,$3,$1 1895 addu $3,$25 1896 sltu $25,$3,$25 1897 addu $7,$25 1898 mflo ($24,$15,$10) 1899 mfhi ($25,$15,$10) 1900 addu $2,$24 1901 sltu $1,$2,$24 1902 multu ($8,$9) # forward multiplication 1903 addu $2,$24 1904 addu $1,$25 1905 sltu $24,$2,$24 1906 addu $3,$1 1907 addu $25,$24 1908 sltu $1,$3,$1 1909 addu $3,$25 1910 addu $7,$1 1911 sltu $25,$3,$25 1912 addu $7,$25 1913 mflo ($24,$8,$9) 1914 mfhi ($25,$8,$9) 1915 addu $2,$24 1916 sltu $1,$2,$24 1917 multu ($11,$15) # forward multiplication 1918 addu $2,$24 1919 addu $1,$25 1920 sltu $24,$2,$24 1921 addu $3,$1 1922 addu $25,$24 1923 sltu $1,$3,$1 1924 addu $3,$25 1925 addu $7,$1 1926 sltu $25,$3,$25 1927 addu $7,$25 1928 mflo ($24,$11,$15) 1929 mfhi ($25,$11,$15) 1930 sw $2,9*4($4) 1931 addu $3,$24 1932 sltu $1,$3,$24 1933 multu ($10,$8) # forward multiplication 1934 addu $3,$24 1935 addu $1,$25 1936 sltu $24,$3,$24 1937 addu $7,$1 1938 addu $25,$24 1939 sltu $2,$7,$1 1940 addu $7,$25 1941 sltu $25,$7,$25 1942 addu $2,$25 1943 mflo ($24,$10,$8) 1944 mfhi ($25,$10,$8) 1945 addu $3,$24 1946 sltu $1,$3,$24 1947 multu ($9,$9) # forward multiplication 1948 addu $3,$24 1949 addu $1,$25 1950 sltu $24,$3,$24 1951 addu $7,$1 1952 addu $25,$24 1953 sltu $1,$7,$1 1954 addu $7,$25 1955 addu $2,$1 1956 sltu $25,$7,$25 1957 addu $2,$25 1958 mflo ($24,$9,$9) 1959 mfhi ($25,$9,$9) 1960 addu $3,$24 1961 sltu $1,$3,$24 1962 multu ($8,$11) # mul_add_c2(a[4],b[7],c3,c1,c2); 1963 addu $25,$1 1964 addu $7,$25 1965 sltu $1,$7,$25 1966 addu $2,$1 1967 sw $3,10*4($4) 1968 mflo ($24,$8,$11) 1969 mfhi ($25,$8,$11) 1970 addu $7,$24 1971 sltu $1,$7,$24 1972 multu ($9,$10) # forward multiplication 1973 addu $7,$24 1974 addu $1,$25 1975 sltu $24,$7,$24 1976 addu $2,$1 1977 addu $25,$24 1978 sltu $3,$2,$1 1979 addu $2,$25 1980 sltu $25,$2,$25 1981 addu $3,$25 1982 mflo ($24,$9,$10) 1983 mfhi ($25,$9,$10) 1984 addu $7,$24 1985 sltu $1,$7,$24 1986 multu ($11,$9) # forward multiplication 1987 addu $7,$24 1988 addu $1,$25 1989 sltu $24,$7,$24 1990 addu $2,$1 1991 addu $25,$24 1992 sltu $1,$2,$1 1993 addu $2,$25 1994 addu $3,$1 1995 sltu $25,$2,$25 1996 addu $3,$25 1997 mflo ($24,$11,$9) 1998 mfhi ($25,$11,$9) 1999 sw $7,11*4($4) 2000 addu $2,$24 2001 sltu $1,$2,$24 2002 multu ($10,$10) # forward multiplication 2003 addu $2,$24 2004 addu $1,$25 2005 sltu $24,$2,$24 2006 addu $3,$1 2007 addu $25,$24 2008 sltu $7,$3,$1 2009 addu $3,$25 2010 sltu $25,$3,$25 2011 addu $7,$25 2012 mflo ($24,$10,$10) 2013 mfhi ($25,$10,$10) 2014 addu $2,$24 2015 sltu $1,$2,$24 2016 multu ($10,$11) # mul_add_c2(a[6],b[7],c2,c3,c1); 2017 addu $25,$1 2018 addu $3,$25 2019 sltu $1,$3,$25 2020 addu $7,$1 2021 sw $2,12*4($4) 2022 mflo ($24,$10,$11) 2023 mfhi ($25,$10,$11) 2024 addu $3,$24 2025 sltu $1,$3,$24 2026 multu ($11,$11) # forward multiplication 2027 addu $3,$24 2028 addu $1,$25 2029 sltu $24,$3,$24 2030 addu $7,$1 2031 addu $25,$24 2032 sltu $2,$7,$1 2033 addu $7,$25 2034 sltu $25,$7,$25 2035 addu $2,$25 2036 mflo ($24,$11,$11) 2037 mfhi ($25,$11,$11) 2038 sw $3,13*4($4) 2039 2040 addu $7,$24 2041 sltu $1,$7,$24 2042 addu $25,$1 2043 addu $2,$25 2044 sw $7,14*4($4) 2045 sw $2,15*4($4) 2046 2047 .set noreorder 2048 jr $31 2049 nop 2050.end bn_sqr_comba8 2051 2052.align 5 2053.globl bn_sqr_comba4 2054.ent bn_sqr_comba4 2055bn_sqr_comba4: 2056 .set reorder 2057 lw $12,0($5) 2058 lw $13,4($5) 2059 multu ($12,$12) # mul_add_c(a[0],b[0],c1,c2,c3); 2060 lw $14,2*4($5) 2061 lw $15,3*4($5) 2062 mflo ($2,$12,$12) 2063 mfhi ($3,$12,$12) 2064 sw $2,0($4) 2065 2066 multu ($12,$13) # mul_add_c2(a[0],b[1],c2,c3,c1); 2067 mflo ($24,$12,$13) 2068 mfhi ($25,$12,$13) 2069 slt $2,$25,$0 2070 sll $25,1 2071 multu ($14,$12) # mul_add_c2(a[2],b[0],c3,c1,c2); 2072 slt $6,$24,$0 2073 addu $25,$6 2074 sll $24,1 2075 addu $3,$24 2076 sltu $1,$3,$24 2077 addu $7,$25,$1 2078 sw $3,4($4) 2079 mflo ($24,$14,$12) 2080 mfhi ($25,$14,$12) 2081 addu $7,$24 2082 sltu $1,$7,$24 2083 multu ($13,$13) # forward multiplication 2084 addu $7,$24 2085 addu $1,$25 2086 sltu $24,$7,$24 2087 addu $2,$1 2088 addu $25,$24 2089 sltu $3,$2,$1 2090 addu $2,$25 2091 sltu $25,$2,$25 2092 addu $3,$25 2093 mflo ($24,$13,$13) 2094 mfhi ($25,$13,$13) 2095 addu $7,$24 2096 sltu $1,$7,$24 2097 multu ($12,$15) # mul_add_c2(a[0],b[3],c1,c2,c3); 2098 addu $25,$1 2099 addu $2,$25 2100 sltu $1,$2,$25 2101 addu $3,$1 2102 sw $7,2*4($4) 2103 mflo ($24,$12,$15) 2104 mfhi ($25,$12,$15) 2105 addu $2,$24 2106 sltu $1,$2,$24 2107 multu ($13,$14) # forward multiplication 2108 addu $2,$24 2109 addu $1,$25 2110 sltu $24,$2,$24 2111 addu $3,$1 2112 addu $25,$24 2113 sltu $7,$3,$1 2114 addu $3,$25 2115 sltu $25,$3,$25 2116 addu $7,$25 2117 mflo ($24,$13,$14) 2118 mfhi ($25,$13,$14) 2119 addu $2,$24 2120 sltu $1,$2,$24 2121 multu ($15,$13) # forward multiplication 2122 addu $2,$24 2123 addu $1,$25 2124 sltu $24,$2,$24 2125 addu $3,$1 2126 addu $25,$24 2127 sltu $1,$3,$1 2128 addu $3,$25 2129 addu $7,$1 2130 sltu $25,$3,$25 2131 addu $7,$25 2132 mflo ($24,$15,$13) 2133 mfhi ($25,$15,$13) 2134 sw $2,3*4($4) 2135 addu $3,$24 2136 sltu $1,$3,$24 2137 multu ($14,$14) # forward multiplication 2138 addu $3,$24 2139 addu $1,$25 2140 sltu $24,$3,$24 2141 addu $7,$1 2142 addu $25,$24 2143 sltu $2,$7,$1 2144 addu $7,$25 2145 sltu $25,$7,$25 2146 addu $2,$25 2147 mflo ($24,$14,$14) 2148 mfhi ($25,$14,$14) 2149 addu $3,$24 2150 sltu $1,$3,$24 2151 multu ($14,$15) # mul_add_c2(a[2],b[3],c3,c1,c2); 2152 addu $25,$1 2153 addu $7,$25 2154 sltu $1,$7,$25 2155 addu $2,$1 2156 sw $3,4*4($4) 2157 mflo ($24,$14,$15) 2158 mfhi ($25,$14,$15) 2159 addu $7,$24 2160 sltu $1,$7,$24 2161 multu ($15,$15) # forward multiplication 2162 addu $7,$24 2163 addu $1,$25 2164 sltu $24,$7,$24 2165 addu $2,$1 2166 addu $25,$24 2167 sltu $3,$2,$1 2168 addu $2,$25 2169 sltu $25,$2,$25 2170 addu $3,$25 2171 mflo ($24,$15,$15) 2172 mfhi ($25,$15,$15) 2173 sw $7,5*4($4) 2174 2175 addu $2,$24 2176 sltu $1,$2,$24 2177 addu $25,$1 2178 addu $3,$25 2179 sw $2,6*4($4) 2180 sw $3,7*4($4) 2181 2182 .set noreorder 2183 jr $31 2184 nop 2185.end bn_sqr_comba4 2186