1.set mips2 2#include "mips_arch.h" 3 4#if defined(_MIPS_ARCH_MIPS64R6) 5# define ddivu(rs,rt) 6# define mfqt(rd,rs,rt) ddivu rd,rs,rt 7# define mfrm(rd,rs,rt) dmodu rd,rs,rt 8#elif defined(_MIPS_ARCH_MIPS32R6) 9# define divu(rs,rt) 10# define mfqt(rd,rs,rt) divu rd,rs,rt 11# define mfrm(rd,rs,rt) modu rd,rs,rt 12#else 13# define divu(rs,rt) divu $0,rs,rt 14# define mfqt(rd,rs,rt) mflo rd 15# define mfrm(rd,rs,rt) mfhi rd 16#endif 17 18.rdata 19.asciiz "mips3.s, Version 1.2" 20.asciiz "MIPS II/III/IV ISA artwork by Andy Polyakov <appro@fy.chalmers.se>" 21 22.text 23.set noat 24 25.align 5 26.globl bn_mul_add_words 27.ent bn_mul_add_words 28bn_mul_add_words: 29 .set noreorder 30 bgtz $6,bn_mul_add_words_internal 31 move $2,$0 32 jr $31 33 move $4,$2 34.end bn_mul_add_words 35 36.align 5 37.ent bn_mul_add_words_internal 38bn_mul_add_words_internal: 39 .set reorder 40 li $3,-4 41 and $8,$6,$3 42 beqz $8,.L_bn_mul_add_words_tail 43 44.L_bn_mul_add_words_loop: 45 lw $12,0($5) 46 multu ($12,$7) 47 lw $13,0($4) 48 lw $14,4($5) 49 lw $15,4($4) 50 lw $8,2*4($5) 51 lw $9,2*4($4) 52 addu $13,$2 53 sltu $2,$13,$2 # All manuals say it "compares 32-bit 54 # values", but it seems to work fine 55 # even on 64-bit registers. 56 mflo ($1,$12,$7) 57 mfhi ($12,$12,$7) 58 addu $13,$1 59 addu $2,$12 60 multu ($14,$7) 61 sltu $1,$13,$1 62 sw $13,0($4) 63 addu $2,$1 64 65 lw $10,3*4($5) 66 lw $11,3*4($4) 67 addu $15,$2 68 sltu $2,$15,$2 69 mflo ($1,$14,$7) 70 mfhi ($14,$14,$7) 71 addu $15,$1 72 addu $2,$14 73 multu ($8,$7) 74 sltu $1,$15,$1 75 sw $15,4($4) 76 addu $2,$1 77 78 subu $6,4 79 addu $4,4*4 80 addu $5,4*4 81 addu $9,$2 82 sltu $2,$9,$2 83 mflo ($1,$8,$7) 84 mfhi ($8,$8,$7) 85 addu $9,$1 86 addu $2,$8 87 multu ($10,$7) 88 sltu $1,$9,$1 89 sw $9,-2*4($4) 90 addu $2,$1 91 92 93 and $8,$6,$3 94 addu $11,$2 95 sltu $2,$11,$2 96 mflo ($1,$10,$7) 97 mfhi ($10,$10,$7) 98 addu $11,$1 99 addu $2,$10 100 sltu $1,$11,$1 101 sw $11,-4($4) 102 .set noreorder 103 bgtz $8,.L_bn_mul_add_words_loop 104 addu $2,$1 105 106 beqz $6,.L_bn_mul_add_words_return 107 nop 108 109.L_bn_mul_add_words_tail: 110 .set reorder 111 lw $12,0($5) 112 multu ($12,$7) 113 lw $13,0($4) 114 subu $6,1 115 addu $13,$2 116 sltu $2,$13,$2 117 mflo ($1,$12,$7) 118 mfhi ($12,$12,$7) 119 addu $13,$1 120 addu $2,$12 121 sltu $1,$13,$1 122 sw $13,0($4) 123 addu $2,$1 124 beqz $6,.L_bn_mul_add_words_return 125 126 lw $12,4($5) 127 multu ($12,$7) 128 lw $13,4($4) 129 subu $6,1 130 addu $13,$2 131 sltu $2,$13,$2 132 mflo ($1,$12,$7) 133 mfhi ($12,$12,$7) 134 addu $13,$1 135 addu $2,$12 136 sltu $1,$13,$1 137 sw $13,4($4) 138 addu $2,$1 139 beqz $6,.L_bn_mul_add_words_return 140 141 lw $12,2*4($5) 142 multu ($12,$7) 143 lw $13,2*4($4) 144 addu $13,$2 145 sltu $2,$13,$2 146 mflo ($1,$12,$7) 147 mfhi ($12,$12,$7) 148 addu $13,$1 149 addu $2,$12 150 sltu $1,$13,$1 151 sw $13,2*4($4) 152 addu $2,$1 153 154.L_bn_mul_add_words_return: 155 .set noreorder 156 jr $31 157 move $4,$2 158.end bn_mul_add_words_internal 159 160.align 5 161.globl bn_mul_words 162.ent bn_mul_words 163bn_mul_words: 164 .set noreorder 165 bgtz $6,bn_mul_words_internal 166 move $2,$0 167 jr $31 168 move $4,$2 169.end bn_mul_words 170 171.align 5 172.ent bn_mul_words_internal 173bn_mul_words_internal: 174 .set reorder 175 li $3,-4 176 and $8,$6,$3 177 beqz $8,.L_bn_mul_words_tail 178 179.L_bn_mul_words_loop: 180 lw $12,0($5) 181 multu ($12,$7) 182 lw $14,4($5) 183 lw $8,2*4($5) 184 lw $10,3*4($5) 185 mflo ($1,$12,$7) 186 mfhi ($12,$12,$7) 187 addu $2,$1 188 sltu $13,$2,$1 189 multu ($14,$7) 190 sw $2,0($4) 191 addu $2,$13,$12 192 193 subu $6,4 194 addu $4,4*4 195 addu $5,4*4 196 mflo ($1,$14,$7) 197 mfhi ($14,$14,$7) 198 addu $2,$1 199 sltu $15,$2,$1 200 multu ($8,$7) 201 sw $2,-3*4($4) 202 addu $2,$15,$14 203 204 mflo ($1,$8,$7) 205 mfhi ($8,$8,$7) 206 addu $2,$1 207 sltu $9,$2,$1 208 multu ($10,$7) 209 sw $2,-2*4($4) 210 addu $2,$9,$8 211 212 and $8,$6,$3 213 mflo ($1,$10,$7) 214 mfhi ($10,$10,$7) 215 addu $2,$1 216 sltu $11,$2,$1 217 sw $2,-4($4) 218 .set noreorder 219 bgtz $8,.L_bn_mul_words_loop 220 addu $2,$11,$10 221 222 beqz $6,.L_bn_mul_words_return 223 nop 224 225.L_bn_mul_words_tail: 226 .set reorder 227 lw $12,0($5) 228 multu ($12,$7) 229 subu $6,1 230 mflo ($1,$12,$7) 231 mfhi ($12,$12,$7) 232 addu $2,$1 233 sltu $13,$2,$1 234 sw $2,0($4) 235 addu $2,$13,$12 236 beqz $6,.L_bn_mul_words_return 237 238 lw $12,4($5) 239 multu ($12,$7) 240 subu $6,1 241 mflo ($1,$12,$7) 242 mfhi ($12,$12,$7) 243 addu $2,$1 244 sltu $13,$2,$1 245 sw $2,4($4) 246 addu $2,$13,$12 247 beqz $6,.L_bn_mul_words_return 248 249 lw $12,2*4($5) 250 multu ($12,$7) 251 mflo ($1,$12,$7) 252 mfhi ($12,$12,$7) 253 addu $2,$1 254 sltu $13,$2,$1 255 sw $2,2*4($4) 256 addu $2,$13,$12 257 258.L_bn_mul_words_return: 259 .set noreorder 260 jr $31 261 move $4,$2 262.end bn_mul_words_internal 263 264.align 5 265.globl bn_sqr_words 266.ent bn_sqr_words 267bn_sqr_words: 268 .set noreorder 269 bgtz $6,bn_sqr_words_internal 270 move $2,$0 271 jr $31 272 move $4,$2 273.end bn_sqr_words 274 275.align 5 276.ent bn_sqr_words_internal 277bn_sqr_words_internal: 278 .set reorder 279 li $3,-4 280 and $8,$6,$3 281 beqz $8,.L_bn_sqr_words_tail 282 283.L_bn_sqr_words_loop: 284 lw $12,0($5) 285 multu ($12,$12) 286 lw $14,4($5) 287 lw $8,2*4($5) 288 lw $10,3*4($5) 289 mflo ($13,$12,$12) 290 mfhi ($12,$12,$12) 291 sw $13,0($4) 292 sw $12,4($4) 293 294 multu ($14,$14) 295 subu $6,4 296 addu $4,8*4 297 addu $5,4*4 298 mflo ($15,$14,$14) 299 mfhi ($14,$14,$14) 300 sw $15,-6*4($4) 301 sw $14,-5*4($4) 302 303 multu ($8,$8) 304 mflo ($9,$8,$8) 305 mfhi ($8,$8,$8) 306 sw $9,-4*4($4) 307 sw $8,-3*4($4) 308 309 310 multu ($10,$10) 311 and $8,$6,$3 312 mflo ($11,$10,$10) 313 mfhi ($10,$10,$10) 314 sw $11,-2*4($4) 315 316 .set noreorder 317 sw $10,-4($4) 318 bgtz $8,.L_bn_sqr_words_loop 319 nop 320 321 beqz $6,.L_bn_sqr_words_return 322 nop 323 324.L_bn_sqr_words_tail: 325 .set reorder 326 lw $12,0($5) 327 multu ($12,$12) 328 subu $6,1 329 mflo ($13,$12,$12) 330 mfhi ($12,$12,$12) 331 sw $13,0($4) 332 sw $12,4($4) 333 beqz $6,.L_bn_sqr_words_return 334 335 lw $12,4($5) 336 multu ($12,$12) 337 subu $6,1 338 mflo ($13,$12,$12) 339 mfhi ($12,$12,$12) 340 sw $13,2*4($4) 341 sw $12,3*4($4) 342 beqz $6,.L_bn_sqr_words_return 343 344 lw $12,2*4($5) 345 multu ($12,$12) 346 mflo ($13,$12,$12) 347 mfhi ($12,$12,$12) 348 sw $13,4*4($4) 349 sw $12,5*4($4) 350 351.L_bn_sqr_words_return: 352 .set noreorder 353 jr $31 354 move $4,$2 355 356.end bn_sqr_words_internal 357 358.align 5 359.globl bn_add_words 360.ent bn_add_words 361bn_add_words: 362 .set noreorder 363 bgtz $7,bn_add_words_internal 364 move $2,$0 365 jr $31 366 move $4,$2 367.end bn_add_words 368 369.align 5 370.ent bn_add_words_internal 371bn_add_words_internal: 372 .set reorder 373 li $3,-4 374 and $1,$7,$3 375 beqz $1,.L_bn_add_words_tail 376 377.L_bn_add_words_loop: 378 lw $12,0($5) 379 lw $8,0($6) 380 subu $7,4 381 lw $13,4($5) 382 and $1,$7,$3 383 lw $14,2*4($5) 384 addu $6,4*4 385 lw $15,3*4($5) 386 addu $4,4*4 387 lw $9,-3*4($6) 388 addu $5,4*4 389 lw $10,-2*4($6) 390 lw $11,-4($6) 391 addu $8,$12 392 sltu $24,$8,$12 393 addu $12,$8,$2 394 sltu $2,$12,$8 395 sw $12,-4*4($4) 396 addu $2,$24 397 398 addu $9,$13 399 sltu $25,$9,$13 400 addu $13,$9,$2 401 sltu $2,$13,$9 402 sw $13,-3*4($4) 403 addu $2,$25 404 405 addu $10,$14 406 sltu $24,$10,$14 407 addu $14,$10,$2 408 sltu $2,$14,$10 409 sw $14,-2*4($4) 410 addu $2,$24 411 412 addu $11,$15 413 sltu $25,$11,$15 414 addu $15,$11,$2 415 sltu $2,$15,$11 416 sw $15,-4($4) 417 418 .set noreorder 419 bgtz $1,.L_bn_add_words_loop 420 addu $2,$25 421 422 beqz $7,.L_bn_add_words_return 423 nop 424 425.L_bn_add_words_tail: 426 .set reorder 427 lw $12,0($5) 428 lw $8,0($6) 429 addu $8,$12 430 subu $7,1 431 sltu $24,$8,$12 432 addu $12,$8,$2 433 sltu $2,$12,$8 434 sw $12,0($4) 435 addu $2,$24 436 beqz $7,.L_bn_add_words_return 437 438 lw $13,4($5) 439 lw $9,4($6) 440 addu $9,$13 441 subu $7,1 442 sltu $25,$9,$13 443 addu $13,$9,$2 444 sltu $2,$13,$9 445 sw $13,4($4) 446 addu $2,$25 447 beqz $7,.L_bn_add_words_return 448 449 lw $14,2*4($5) 450 lw $10,2*4($6) 451 addu $10,$14 452 sltu $24,$10,$14 453 addu $14,$10,$2 454 sltu $2,$14,$10 455 sw $14,2*4($4) 456 addu $2,$24 457 458.L_bn_add_words_return: 459 .set noreorder 460 jr $31 461 move $4,$2 462 463.end bn_add_words_internal 464 465.align 5 466.globl bn_sub_words 467.ent bn_sub_words 468bn_sub_words: 469 .set noreorder 470 bgtz $7,bn_sub_words_internal 471 move $2,$0 472 jr $31 473 move $4,$0 474.end bn_sub_words 475 476.align 5 477.ent bn_sub_words_internal 478bn_sub_words_internal: 479 .set reorder 480 li $3,-4 481 and $1,$7,$3 482 beqz $1,.L_bn_sub_words_tail 483 484.L_bn_sub_words_loop: 485 lw $12,0($5) 486 lw $8,0($6) 487 subu $7,4 488 lw $13,4($5) 489 and $1,$7,$3 490 lw $14,2*4($5) 491 addu $6,4*4 492 lw $15,3*4($5) 493 addu $4,4*4 494 lw $9,-3*4($6) 495 addu $5,4*4 496 lw $10,-2*4($6) 497 lw $11,-4($6) 498 sltu $24,$12,$8 499 subu $8,$12,$8 500 subu $12,$8,$2 501 sgtu $2,$12,$8 502 sw $12,-4*4($4) 503 addu $2,$24 504 505 sltu $25,$13,$9 506 subu $9,$13,$9 507 subu $13,$9,$2 508 sgtu $2,$13,$9 509 sw $13,-3*4($4) 510 addu $2,$25 511 512 513 sltu $24,$14,$10 514 subu $10,$14,$10 515 subu $14,$10,$2 516 sgtu $2,$14,$10 517 sw $14,-2*4($4) 518 addu $2,$24 519 520 sltu $25,$15,$11 521 subu $11,$15,$11 522 subu $15,$11,$2 523 sgtu $2,$15,$11 524 sw $15,-4($4) 525 526 .set noreorder 527 bgtz $1,.L_bn_sub_words_loop 528 addu $2,$25 529 530 beqz $7,.L_bn_sub_words_return 531 nop 532 533.L_bn_sub_words_tail: 534 .set reorder 535 lw $12,0($5) 536 lw $8,0($6) 537 subu $7,1 538 sltu $24,$12,$8 539 subu $8,$12,$8 540 subu $12,$8,$2 541 sgtu $2,$12,$8 542 sw $12,0($4) 543 addu $2,$24 544 beqz $7,.L_bn_sub_words_return 545 546 lw $13,4($5) 547 subu $7,1 548 lw $9,4($6) 549 sltu $25,$13,$9 550 subu $9,$13,$9 551 subu $13,$9,$2 552 sgtu $2,$13,$9 553 sw $13,4($4) 554 addu $2,$25 555 beqz $7,.L_bn_sub_words_return 556 557 lw $14,2*4($5) 558 lw $10,2*4($6) 559 sltu $24,$14,$10 560 subu $10,$14,$10 561 subu $14,$10,$2 562 sgtu $2,$14,$10 563 sw $14,2*4($4) 564 addu $2,$24 565 566.L_bn_sub_words_return: 567 .set noreorder 568 jr $31 569 move $4,$2 570.end bn_sub_words_internal 571 572.align 5 573.globl bn_div_3_words 574.ent bn_div_3_words 575bn_div_3_words: 576 .set noreorder 577 move $7,$4 # we know that bn_div_words does not 578 # touch $7, $10, $11 and preserves $6 579 # so that we can save two arguments 580 # and return address in registers 581 # instead of stack:-) 582 583 lw $4,($7) 584 move $10,$5 585 lw $5,-4($7) 586 bne $4,$6,bn_div_3_words_internal 587 nop 588 li $2,-1 589 jr $31 590 move $4,$2 591.end bn_div_3_words 592 593.align 5 594.ent bn_div_3_words_internal 595bn_div_3_words_internal: 596 .set reorder 597 move $11,$31 598 bal bn_div_words_internal 599 move $31,$11 600 multu ($10,$2) 601 lw $14,-2*4($7) 602 move $8,$0 603 mfhi ($13,$10,$2) 604 mflo ($12,$10,$2) 605 sltu $24,$13,$5 606.L_bn_div_3_words_inner_loop: 607 bnez $24,.L_bn_div_3_words_inner_loop_done 608 sgeu $1,$14,$12 609 seq $25,$13,$5 610 and $1,$25 611 sltu $15,$12,$10 612 addu $5,$6 613 subu $13,$15 614 subu $12,$10 615 sltu $24,$13,$5 616 sltu $8,$5,$6 617 or $24,$8 618 .set noreorder 619 beqz $1,.L_bn_div_3_words_inner_loop 620 subu $2,1 621 addu $2,1 622 .set reorder 623.L_bn_div_3_words_inner_loop_done: 624 .set noreorder 625 jr $31 626 move $4,$2 627.end bn_div_3_words_internal 628 629.align 5 630.globl bn_div_words 631.ent bn_div_words 632bn_div_words: 633 .set noreorder 634 bnez $6,bn_div_words_internal 635 li $2,-1 # I would rather signal div-by-zero 636 # which can be done with 'break 7' 637 jr $31 638 move $4,$2 639.end bn_div_words 640 641.align 5 642.ent bn_div_words_internal 643bn_div_words_internal: 644 move $3,$0 645 bltz $6,.L_bn_div_words_body 646 move $25,$3 647 sll $6,1 648 bgtz $6,.-4 649 addu $25,1 650 651 .set reorder 652 negu $13,$25 653 li $14,-1 654 sll $14,$13 655 and $14,$4 656 srl $1,$5,$13 657 .set noreorder 658 beqz $14,.+12 659 nop 660 break 6 # signal overflow 661 .set reorder 662 sll $4,$25 663 sll $5,$25 664 or $4,$1 665.L_bn_div_words_body: 666 srl $3,$6,4*4 # bits 667 sgeu $1,$4,$6 668 .set noreorder 669 beqz $1,.+12 670 nop 671 subu $4,$6 672 .set reorder 673 674 li $8,-1 675 srl $9,$4,4*4 # bits 676 srl $8,4*4 # q=0xffffffff 677 beq $3,$9,.L_bn_div_words_skip_div1 678 divu ($4,$3) 679 mfqt ($8,$4,$3) 680.L_bn_div_words_skip_div1: 681 multu ($6,$8) 682 sll $15,$4,4*4 # bits 683 srl $1,$5,4*4 # bits 684 or $15,$1 685 mflo ($12,$6,$8) 686 mfhi ($13,$6,$8) 687.L_bn_div_words_inner_loop1: 688 sltu $14,$15,$12 689 seq $24,$9,$13 690 sltu $1,$9,$13 691 and $14,$24 692 sltu $2,$12,$6 693 or $1,$14 694 .set noreorder 695 beqz $1,.L_bn_div_words_inner_loop1_done 696 subu $13,$2 697 subu $12,$6 698 b .L_bn_div_words_inner_loop1 699 subu $8,1 700 .set reorder 701.L_bn_div_words_inner_loop1_done: 702 703 sll $5,4*4 # bits 704 subu $4,$15,$12 705 sll $2,$8,4*4 # bits 706 707 li $8,-1 708 srl $9,$4,4*4 # bits 709 srl $8,4*4 # q=0xffffffff 710 beq $3,$9,.L_bn_div_words_skip_div2 711 divu ($4,$3) 712 mfqt ($8,$4,$3) 713.L_bn_div_words_skip_div2: 714 multu ($6,$8) 715 sll $15,$4,4*4 # bits 716 srl $1,$5,4*4 # bits 717 or $15,$1 718 mflo ($12,$6,$8) 719 mfhi ($13,$6,$8) 720.L_bn_div_words_inner_loop2: 721 sltu $14,$15,$12 722 seq $24,$9,$13 723 sltu $1,$9,$13 724 and $14,$24 725 sltu $3,$12,$6 726 or $1,$14 727 .set noreorder 728 beqz $1,.L_bn_div_words_inner_loop2_done 729 subu $13,$3 730 subu $12,$6 731 b .L_bn_div_words_inner_loop2 732 subu $8,1 733 .set reorder 734.L_bn_div_words_inner_loop2_done: 735 736 subu $4,$15,$12 737 or $2,$8 738 srl $3,$4,$25 # $3 contains remainder if anybody wants it 739 srl $6,$25 # restore $6 740 741 .set noreorder 742 move $5,$3 743 jr $31 744 move $4,$2 745.end bn_div_words_internal 746 747.align 5 748.globl bn_mul_comba8 749.ent bn_mul_comba8 750bn_mul_comba8: 751 .set noreorder 752 .frame $29,6*4,$31 753 .mask 0x003f0000,-4 754 subu $29,6*4 755 sw $21,5*4($29) 756 sw $20,4*4($29) 757 sw $19,3*4($29) 758 sw $18,2*4($29) 759 sw $17,1*4($29) 760 sw $16,0*4($29) 761 762 .set reorder 763 lw $12,0($5) # If compiled with -mips3 option on 764 # R5000 box assembler barks on this 765 # 1ine with "should not have mult/div 766 # as last instruction in bb (R10K 767 # bug)" warning. If anybody out there 768 # has a clue about how to circumvent 769 # this do send me a note. 770 # <appro@fy.chalmers.se> 771 772 lw $8,0($6) 773 lw $13,4($5) 774 lw $14,2*4($5) 775 multu ($12,$8) # mul_add_c(a[0],b[0],c1,c2,c3); 776 lw $15,3*4($5) 777 lw $9,4($6) 778 lw $10,2*4($6) 779 lw $11,3*4($6) 780 mflo ($2,$12,$8) 781 mfhi ($3,$12,$8) 782 783 lw $16,4*4($5) 784 lw $18,5*4($5) 785 multu ($12,$9) # mul_add_c(a[0],b[1],c2,c3,c1); 786 lw $20,6*4($5) 787 lw $5,7*4($5) 788 lw $17,4*4($6) 789 lw $19,5*4($6) 790 mflo ($24,$12,$9) 791 mfhi ($25,$12,$9) 792 addu $3,$24 793 sltu $1,$3,$24 794 multu ($13,$8) # mul_add_c(a[1],b[0],c2,c3,c1); 795 addu $7,$25,$1 796 lw $21,6*4($6) 797 lw $6,7*4($6) 798 sw $2,0($4) # r[0]=c1; 799 mflo ($24,$13,$8) 800 mfhi ($25,$13,$8) 801 addu $3,$24 802 sltu $1,$3,$24 803 multu ($14,$8) # mul_add_c(a[2],b[0],c3,c1,c2); 804 addu $25,$1 805 addu $7,$25 806 sltu $2,$7,$25 807 sw $3,4($4) # r[1]=c2; 808 809 mflo ($24,$14,$8) 810 mfhi ($25,$14,$8) 811 addu $7,$24 812 sltu $1,$7,$24 813 multu ($13,$9) # mul_add_c(a[1],b[1],c3,c1,c2); 814 addu $25,$1 815 addu $2,$25 816 mflo ($24,$13,$9) 817 mfhi ($25,$13,$9) 818 addu $7,$24 819 sltu $1,$7,$24 820 multu ($12,$10) # mul_add_c(a[0],b[2],c3,c1,c2); 821 addu $25,$1 822 addu $2,$25 823 sltu $3,$2,$25 824 mflo ($24,$12,$10) 825 mfhi ($25,$12,$10) 826 addu $7,$24 827 sltu $1,$7,$24 828 multu ($12,$11) # mul_add_c(a[0],b[3],c1,c2,c3); 829 addu $25,$1 830 addu $2,$25 831 sltu $1,$2,$25 832 addu $3,$1 833 sw $7,2*4($4) # r[2]=c3; 834 835 mflo ($24,$12,$11) 836 mfhi ($25,$12,$11) 837 addu $2,$24 838 sltu $1,$2,$24 839 multu ($13,$10) # mul_add_c(a[1],b[2],c1,c2,c3); 840 addu $25,$1 841 addu $3,$25 842 sltu $7,$3,$25 843 mflo ($24,$13,$10) 844 mfhi ($25,$13,$10) 845 addu $2,$24 846 sltu $1,$2,$24 847 multu ($14,$9) # mul_add_c(a[2],b[1],c1,c2,c3); 848 addu $25,$1 849 addu $3,$25 850 sltu $1,$3,$25 851 addu $7,$1 852 mflo ($24,$14,$9) 853 mfhi ($25,$14,$9) 854 addu $2,$24 855 sltu $1,$2,$24 856 multu ($15,$8) # mul_add_c(a[3],b[0],c1,c2,c3); 857 addu $25,$1 858 addu $3,$25 859 sltu $1,$3,$25 860 addu $7,$1 861 mflo ($24,$15,$8) 862 mfhi ($25,$15,$8) 863 addu $2,$24 864 sltu $1,$2,$24 865 multu ($16,$8) # mul_add_c(a[4],b[0],c2,c3,c1); 866 addu $25,$1 867 addu $3,$25 868 sltu $1,$3,$25 869 addu $7,$1 870 sw $2,3*4($4) # r[3]=c1; 871 872 mflo ($24,$16,$8) 873 mfhi ($25,$16,$8) 874 addu $3,$24 875 sltu $1,$3,$24 876 multu ($15,$9) # mul_add_c(a[3],b[1],c2,c3,c1); 877 addu $25,$1 878 addu $7,$25 879 sltu $2,$7,$25 880 mflo ($24,$15,$9) 881 mfhi ($25,$15,$9) 882 addu $3,$24 883 sltu $1,$3,$24 884 multu ($14,$10) # mul_add_c(a[2],b[2],c2,c3,c1); 885 addu $25,$1 886 addu $7,$25 887 sltu $1,$7,$25 888 addu $2,$1 889 mflo ($24,$14,$10) 890 mfhi ($25,$14,$10) 891 addu $3,$24 892 sltu $1,$3,$24 893 multu ($13,$11) # mul_add_c(a[1],b[3],c2,c3,c1); 894 addu $25,$1 895 addu $7,$25 896 sltu $1,$7,$25 897 addu $2,$1 898 mflo ($24,$13,$11) 899 mfhi ($25,$13,$11) 900 addu $3,$24 901 sltu $1,$3,$24 902 multu ($12,$17) # mul_add_c(a[0],b[4],c2,c3,c1); 903 addu $25,$1 904 addu $7,$25 905 sltu $1,$7,$25 906 addu $2,$1 907 mflo ($24,$12,$17) 908 mfhi ($25,$12,$17) 909 addu $3,$24 910 sltu $1,$3,$24 911 multu ($12,$19) # mul_add_c(a[0],b[5],c3,c1,c2); 912 addu $25,$1 913 addu $7,$25 914 sltu $1,$7,$25 915 addu $2,$1 916 sw $3,4*4($4) # r[4]=c2; 917 918 mflo ($24,$12,$19) 919 mfhi ($25,$12,$19) 920 addu $7,$24 921 sltu $1,$7,$24 922 multu ($13,$17) # mul_add_c(a[1],b[4],c3,c1,c2); 923 addu $25,$1 924 addu $2,$25 925 sltu $3,$2,$25 926 mflo ($24,$13,$17) 927 mfhi ($25,$13,$17) 928 addu $7,$24 929 sltu $1,$7,$24 930 multu ($14,$11) # mul_add_c(a[2],b[3],c3,c1,c2); 931 addu $25,$1 932 addu $2,$25 933 sltu $1,$2,$25 934 addu $3,$1 935 mflo ($24,$14,$11) 936 mfhi ($25,$14,$11) 937 addu $7,$24 938 sltu $1,$7,$24 939 multu ($15,$10) # mul_add_c(a[3],b[2],c3,c1,c2); 940 addu $25,$1 941 addu $2,$25 942 sltu $1,$2,$25 943 addu $3,$1 944 mflo ($24,$15,$10) 945 mfhi ($25,$15,$10) 946 addu $7,$24 947 sltu $1,$7,$24 948 multu ($16,$9) # mul_add_c(a[4],b[1],c3,c1,c2); 949 addu $25,$1 950 addu $2,$25 951 sltu $1,$2,$25 952 addu $3,$1 953 mflo ($24,$16,$9) 954 mfhi ($25,$16,$9) 955 addu $7,$24 956 sltu $1,$7,$24 957 multu ($18,$8) # mul_add_c(a[5],b[0],c3,c1,c2); 958 addu $25,$1 959 addu $2,$25 960 sltu $1,$2,$25 961 addu $3,$1 962 mflo ($24,$18,$8) 963 mfhi ($25,$18,$8) 964 addu $7,$24 965 sltu $1,$7,$24 966 multu ($20,$8) # mul_add_c(a[6],b[0],c1,c2,c3); 967 addu $25,$1 968 addu $2,$25 969 sltu $1,$2,$25 970 addu $3,$1 971 sw $7,5*4($4) # r[5]=c3; 972 973 mflo ($24,$20,$8) 974 mfhi ($25,$20,$8) 975 addu $2,$24 976 sltu $1,$2,$24 977 multu ($18,$9) # mul_add_c(a[5],b[1],c1,c2,c3); 978 addu $25,$1 979 addu $3,$25 980 sltu $7,$3,$25 981 mflo ($24,$18,$9) 982 mfhi ($25,$18,$9) 983 addu $2,$24 984 sltu $1,$2,$24 985 multu ($16,$10) # mul_add_c(a[4],b[2],c1,c2,c3); 986 addu $25,$1 987 addu $3,$25 988 sltu $1,$3,$25 989 addu $7,$1 990 mflo ($24,$16,$10) 991 mfhi ($25,$16,$10) 992 addu $2,$24 993 sltu $1,$2,$24 994 multu ($15,$11) # mul_add_c(a[3],b[3],c1,c2,c3); 995 addu $25,$1 996 addu $3,$25 997 sltu $1,$3,$25 998 addu $7,$1 999 mflo ($24,$15,$11) 1000 mfhi ($25,$15,$11) 1001 addu $2,$24 1002 sltu $1,$2,$24 1003 multu ($14,$17) # mul_add_c(a[2],b[4],c1,c2,c3); 1004 addu $25,$1 1005 addu $3,$25 1006 sltu $1,$3,$25 1007 addu $7,$1 1008 mflo ($24,$14,$17) 1009 mfhi ($25,$14,$17) 1010 addu $2,$24 1011 sltu $1,$2,$24 1012 multu ($13,$19) # mul_add_c(a[1],b[5],c1,c2,c3); 1013 addu $25,$1 1014 addu $3,$25 1015 sltu $1,$3,$25 1016 addu $7,$1 1017 mflo ($24,$13,$19) 1018 mfhi ($25,$13,$19) 1019 addu $2,$24 1020 sltu $1,$2,$24 1021 multu ($12,$21) # mul_add_c(a[0],b[6],c1,c2,c3); 1022 addu $25,$1 1023 addu $3,$25 1024 sltu $1,$3,$25 1025 addu $7,$1 1026 mflo ($24,$12,$21) 1027 mfhi ($25,$12,$21) 1028 addu $2,$24 1029 sltu $1,$2,$24 1030 multu ($12,$6) # mul_add_c(a[0],b[7],c2,c3,c1); 1031 addu $25,$1 1032 addu $3,$25 1033 sltu $1,$3,$25 1034 addu $7,$1 1035 sw $2,6*4($4) # r[6]=c1; 1036 1037 mflo ($24,$12,$6) 1038 mfhi ($25,$12,$6) 1039 addu $3,$24 1040 sltu $1,$3,$24 1041 multu ($13,$21) # mul_add_c(a[1],b[6],c2,c3,c1); 1042 addu $25,$1 1043 addu $7,$25 1044 sltu $2,$7,$25 1045 mflo ($24,$13,$21) 1046 mfhi ($25,$13,$21) 1047 addu $3,$24 1048 sltu $1,$3,$24 1049 multu ($14,$19) # mul_add_c(a[2],b[5],c2,c3,c1); 1050 addu $25,$1 1051 addu $7,$25 1052 sltu $1,$7,$25 1053 addu $2,$1 1054 mflo ($24,$14,$19) 1055 mfhi ($25,$14,$19) 1056 addu $3,$24 1057 sltu $1,$3,$24 1058 multu ($15,$17) # mul_add_c(a[3],b[4],c2,c3,c1); 1059 addu $25,$1 1060 addu $7,$25 1061 sltu $1,$7,$25 1062 addu $2,$1 1063 mflo ($24,$15,$17) 1064 mfhi ($25,$15,$17) 1065 addu $3,$24 1066 sltu $1,$3,$24 1067 multu ($16,$11) # mul_add_c(a[4],b[3],c2,c3,c1); 1068 addu $25,$1 1069 addu $7,$25 1070 sltu $1,$7,$25 1071 addu $2,$1 1072 mflo ($24,$16,$11) 1073 mfhi ($25,$16,$11) 1074 addu $3,$24 1075 sltu $1,$3,$24 1076 multu ($18,$10) # mul_add_c(a[5],b[2],c2,c3,c1); 1077 addu $25,$1 1078 addu $7,$25 1079 sltu $1,$7,$25 1080 addu $2,$1 1081 mflo ($24,$18,$10) 1082 mfhi ($25,$18,$10) 1083 addu $3,$24 1084 sltu $1,$3,$24 1085 multu ($20,$9) # mul_add_c(a[6],b[1],c2,c3,c1); 1086 addu $25,$1 1087 addu $7,$25 1088 sltu $1,$7,$25 1089 addu $2,$1 1090 mflo ($24,$20,$9) 1091 mfhi ($25,$20,$9) 1092 addu $3,$24 1093 sltu $1,$3,$24 1094 multu ($5,$8) # mul_add_c(a[7],b[0],c2,c3,c1); 1095 addu $25,$1 1096 addu $7,$25 1097 sltu $1,$7,$25 1098 addu $2,$1 1099 mflo ($24,$5,$8) 1100 mfhi ($25,$5,$8) 1101 addu $3,$24 1102 sltu $1,$3,$24 1103 multu ($5,$9) # mul_add_c(a[7],b[1],c3,c1,c2); 1104 addu $25,$1 1105 addu $7,$25 1106 sltu $1,$7,$25 1107 addu $2,$1 1108 sw $3,7*4($4) # r[7]=c2; 1109 1110 mflo ($24,$5,$9) 1111 mfhi ($25,$5,$9) 1112 addu $7,$24 1113 sltu $1,$7,$24 1114 multu ($20,$10) # mul_add_c(a[6],b[2],c3,c1,c2); 1115 addu $25,$1 1116 addu $2,$25 1117 sltu $3,$2,$25 1118 mflo ($24,$20,$10) 1119 mfhi ($25,$20,$10) 1120 addu $7,$24 1121 sltu $1,$7,$24 1122 multu ($18,$11) # mul_add_c(a[5],b[3],c3,c1,c2); 1123 addu $25,$1 1124 addu $2,$25 1125 sltu $1,$2,$25 1126 addu $3,$1 1127 mflo ($24,$18,$11) 1128 mfhi ($25,$18,$11) 1129 addu $7,$24 1130 sltu $1,$7,$24 1131 multu ($16,$17) # mul_add_c(a[4],b[4],c3,c1,c2); 1132 addu $25,$1 1133 addu $2,$25 1134 sltu $1,$2,$25 1135 addu $3,$1 1136 mflo ($24,$16,$17) 1137 mfhi ($25,$16,$17) 1138 addu $7,$24 1139 sltu $1,$7,$24 1140 multu ($15,$19) # mul_add_c(a[3],b[5],c3,c1,c2); 1141 addu $25,$1 1142 addu $2,$25 1143 sltu $1,$2,$25 1144 addu $3,$1 1145 mflo ($24,$15,$19) 1146 mfhi ($25,$15,$19) 1147 addu $7,$24 1148 sltu $1,$7,$24 1149 multu ($14,$21) # mul_add_c(a[2],b[6],c3,c1,c2); 1150 addu $25,$1 1151 addu $2,$25 1152 sltu $1,$2,$25 1153 addu $3,$1 1154 mflo ($24,$14,$21) 1155 mfhi ($25,$14,$21) 1156 addu $7,$24 1157 sltu $1,$7,$24 1158 multu ($13,$6) # mul_add_c(a[1],b[7],c3,c1,c2); 1159 addu $25,$1 1160 addu $2,$25 1161 sltu $1,$2,$25 1162 addu $3,$1 1163 mflo ($24,$13,$6) 1164 mfhi ($25,$13,$6) 1165 addu $7,$24 1166 sltu $1,$7,$24 1167 multu ($14,$6) # mul_add_c(a[2],b[7],c1,c2,c3); 1168 addu $25,$1 1169 addu $2,$25 1170 sltu $1,$2,$25 1171 addu $3,$1 1172 sw $7,8*4($4) # r[8]=c3; 1173 1174 mflo ($24,$14,$6) 1175 mfhi ($25,$14,$6) 1176 addu $2,$24 1177 sltu $1,$2,$24 1178 multu ($15,$21) # mul_add_c(a[3],b[6],c1,c2,c3); 1179 addu $25,$1 1180 addu $3,$25 1181 sltu $7,$3,$25 1182 mflo ($24,$15,$21) 1183 mfhi ($25,$15,$21) 1184 addu $2,$24 1185 sltu $1,$2,$24 1186 multu ($16,$19) # mul_add_c(a[4],b[5],c1,c2,c3); 1187 addu $25,$1 1188 addu $3,$25 1189 sltu $1,$3,$25 1190 addu $7,$1 1191 mflo ($24,$16,$19) 1192 mfhi ($25,$16,$19) 1193 addu $2,$24 1194 sltu $1,$2,$24 1195 multu ($18,$17) # mul_add_c(a[5],b[4],c1,c2,c3); 1196 addu $25,$1 1197 addu $3,$25 1198 sltu $1,$3,$25 1199 addu $7,$1 1200 mflo ($24,$18,$17) 1201 mfhi ($25,$18,$17) 1202 addu $2,$24 1203 sltu $1,$2,$24 1204 multu ($20,$11) # mul_add_c(a[6],b[3],c1,c2,c3); 1205 addu $25,$1 1206 addu $3,$25 1207 sltu $1,$3,$25 1208 addu $7,$1 1209 mflo ($24,$20,$11) 1210 mfhi ($25,$20,$11) 1211 addu $2,$24 1212 sltu $1,$2,$24 1213 multu ($5,$10) # mul_add_c(a[7],b[2],c1,c2,c3); 1214 addu $25,$1 1215 addu $3,$25 1216 sltu $1,$3,$25 1217 addu $7,$1 1218 mflo ($24,$5,$10) 1219 mfhi ($25,$5,$10) 1220 addu $2,$24 1221 sltu $1,$2,$24 1222 multu ($5,$11) # mul_add_c(a[7],b[3],c2,c3,c1); 1223 addu $25,$1 1224 addu $3,$25 1225 sltu $1,$3,$25 1226 addu $7,$1 1227 sw $2,9*4($4) # r[9]=c1; 1228 1229 mflo ($24,$5,$11) 1230 mfhi ($25,$5,$11) 1231 addu $3,$24 1232 sltu $1,$3,$24 1233 multu ($20,$17) # mul_add_c(a[6],b[4],c2,c3,c1); 1234 addu $25,$1 1235 addu $7,$25 1236 sltu $2,$7,$25 1237 mflo ($24,$20,$17) 1238 mfhi ($25,$20,$17) 1239 addu $3,$24 1240 sltu $1,$3,$24 1241 multu ($18,$19) # mul_add_c(a[5],b[5],c2,c3,c1); 1242 addu $25,$1 1243 addu $7,$25 1244 sltu $1,$7,$25 1245 addu $2,$1 1246 mflo ($24,$18,$19) 1247 mfhi ($25,$18,$19) 1248 addu $3,$24 1249 sltu $1,$3,$24 1250 multu ($16,$21) # mul_add_c(a[4],b[6],c2,c3,c1); 1251 addu $25,$1 1252 addu $7,$25 1253 sltu $1,$7,$25 1254 addu $2,$1 1255 mflo ($24,$16,$21) 1256 mfhi ($25,$16,$21) 1257 addu $3,$24 1258 sltu $1,$3,$24 1259 multu ($15,$6) # mul_add_c(a[3],b[7],c2,c3,c1); 1260 addu $25,$1 1261 addu $7,$25 1262 sltu $1,$7,$25 1263 addu $2,$1 1264 mflo ($24,$15,$6) 1265 mfhi ($25,$15,$6) 1266 addu $3,$24 1267 sltu $1,$3,$24 1268 multu ($16,$6) # mul_add_c(a[4],b[7],c3,c1,c2); 1269 addu $25,$1 1270 addu $7,$25 1271 sltu $1,$7,$25 1272 addu $2,$1 1273 sw $3,10*4($4) # r[10]=c2; 1274 1275 mflo ($24,$16,$6) 1276 mfhi ($25,$16,$6) 1277 addu $7,$24 1278 sltu $1,$7,$24 1279 multu ($18,$21) # mul_add_c(a[5],b[6],c3,c1,c2); 1280 addu $25,$1 1281 addu $2,$25 1282 sltu $3,$2,$25 1283 mflo ($24,$18,$21) 1284 mfhi ($25,$18,$21) 1285 addu $7,$24 1286 sltu $1,$7,$24 1287 multu ($20,$19) # mul_add_c(a[6],b[5],c3,c1,c2); 1288 addu $25,$1 1289 addu $2,$25 1290 sltu $1,$2,$25 1291 addu $3,$1 1292 mflo ($24,$20,$19) 1293 mfhi ($25,$20,$19) 1294 addu $7,$24 1295 sltu $1,$7,$24 1296 multu ($5,$17) # mul_add_c(a[7],b[4],c3,c1,c2); 1297 addu $25,$1 1298 addu $2,$25 1299 sltu $1,$2,$25 1300 addu $3,$1 1301 mflo ($24,$5,$17) 1302 mfhi ($25,$5,$17) 1303 addu $7,$24 1304 sltu $1,$7,$24 1305 multu ($5,$19) # mul_add_c(a[7],b[5],c1,c2,c3); 1306 addu $25,$1 1307 addu $2,$25 1308 sltu $1,$2,$25 1309 addu $3,$1 1310 sw $7,11*4($4) # r[11]=c3; 1311 1312 mflo ($24,$5,$19) 1313 mfhi ($25,$5,$19) 1314 addu $2,$24 1315 sltu $1,$2,$24 1316 multu ($20,$21) # mul_add_c(a[6],b[6],c1,c2,c3); 1317 addu $25,$1 1318 addu $3,$25 1319 sltu $7,$3,$25 1320 mflo ($24,$20,$21) 1321 mfhi ($25,$20,$21) 1322 addu $2,$24 1323 sltu $1,$2,$24 1324 multu ($18,$6) # mul_add_c(a[5],b[7],c1,c2,c3); 1325 addu $25,$1 1326 addu $3,$25 1327 sltu $1,$3,$25 1328 addu $7,$1 1329 mflo ($24,$18,$6) 1330 mfhi ($25,$18,$6) 1331 addu $2,$24 1332 sltu $1,$2,$24 1333 multu ($20,$6) # mul_add_c(a[6],b[7],c2,c3,c1); 1334 addu $25,$1 1335 addu $3,$25 1336 sltu $1,$3,$25 1337 addu $7,$1 1338 sw $2,12*4($4) # r[12]=c1; 1339 1340 mflo ($24,$20,$6) 1341 mfhi ($25,$20,$6) 1342 addu $3,$24 1343 sltu $1,$3,$24 1344 multu ($5,$21) # mul_add_c(a[7],b[6],c2,c3,c1); 1345 addu $25,$1 1346 addu $7,$25 1347 sltu $2,$7,$25 1348 mflo ($24,$5,$21) 1349 mfhi ($25,$5,$21) 1350 addu $3,$24 1351 sltu $1,$3,$24 1352 multu ($5,$6) # mul_add_c(a[7],b[7],c3,c1,c2); 1353 addu $25,$1 1354 addu $7,$25 1355 sltu $1,$7,$25 1356 addu $2,$1 1357 sw $3,13*4($4) # r[13]=c2; 1358 1359 mflo ($24,$5,$6) 1360 mfhi ($25,$5,$6) 1361 addu $7,$24 1362 sltu $1,$7,$24 1363 addu $25,$1 1364 addu $2,$25 1365 sw $7,14*4($4) # r[14]=c3; 1366 sw $2,15*4($4) # r[15]=c1; 1367 1368 .set noreorder 1369 lw $21,5*4($29) 1370 lw $20,4*4($29) 1371 lw $19,3*4($29) 1372 lw $18,2*4($29) 1373 lw $17,1*4($29) 1374 lw $16,0*4($29) 1375 jr $31 1376 addu $29,6*4 1377.end bn_mul_comba8 1378 1379.align 5 1380.globl bn_mul_comba4 1381.ent bn_mul_comba4 1382bn_mul_comba4: 1383 .set reorder 1384 lw $12,0($5) 1385 lw $8,0($6) 1386 lw $13,4($5) 1387 lw $14,2*4($5) 1388 multu ($12,$8) # mul_add_c(a[0],b[0],c1,c2,c3); 1389 lw $15,3*4($5) 1390 lw $9,4($6) 1391 lw $10,2*4($6) 1392 lw $11,3*4($6) 1393 mflo ($2,$12,$8) 1394 mfhi ($3,$12,$8) 1395 sw $2,0($4) 1396 1397 multu ($12,$9) # mul_add_c(a[0],b[1],c2,c3,c1); 1398 mflo ($24,$12,$9) 1399 mfhi ($25,$12,$9) 1400 addu $3,$24 1401 sltu $1,$3,$24 1402 multu ($13,$8) # mul_add_c(a[1],b[0],c2,c3,c1); 1403 addu $7,$25,$1 1404 mflo ($24,$13,$8) 1405 mfhi ($25,$13,$8) 1406 addu $3,$24 1407 sltu $1,$3,$24 1408 multu ($14,$8) # mul_add_c(a[2],b[0],c3,c1,c2); 1409 addu $25,$1 1410 addu $7,$25 1411 sltu $2,$7,$25 1412 sw $3,4($4) 1413 1414 mflo ($24,$14,$8) 1415 mfhi ($25,$14,$8) 1416 addu $7,$24 1417 sltu $1,$7,$24 1418 multu ($13,$9) # mul_add_c(a[1],b[1],c3,c1,c2); 1419 addu $25,$1 1420 addu $2,$25 1421 mflo ($24,$13,$9) 1422 mfhi ($25,$13,$9) 1423 addu $7,$24 1424 sltu $1,$7,$24 1425 multu ($12,$10) # mul_add_c(a[0],b[2],c3,c1,c2); 1426 addu $25,$1 1427 addu $2,$25 1428 sltu $3,$2,$25 1429 mflo ($24,$12,$10) 1430 mfhi ($25,$12,$10) 1431 addu $7,$24 1432 sltu $1,$7,$24 1433 multu ($12,$11) # mul_add_c(a[0],b[3],c1,c2,c3); 1434 addu $25,$1 1435 addu $2,$25 1436 sltu $1,$2,$25 1437 addu $3,$1 1438 sw $7,2*4($4) 1439 1440 mflo ($24,$12,$11) 1441 mfhi ($25,$12,$11) 1442 addu $2,$24 1443 sltu $1,$2,$24 1444 multu ($13,$10) # mul_add_c(a[1],b[2],c1,c2,c3); 1445 addu $25,$1 1446 addu $3,$25 1447 sltu $7,$3,$25 1448 mflo ($24,$13,$10) 1449 mfhi ($25,$13,$10) 1450 addu $2,$24 1451 sltu $1,$2,$24 1452 multu ($14,$9) # mul_add_c(a[2],b[1],c1,c2,c3); 1453 addu $25,$1 1454 addu $3,$25 1455 sltu $1,$3,$25 1456 addu $7,$1 1457 mflo ($24,$14,$9) 1458 mfhi ($25,$14,$9) 1459 addu $2,$24 1460 sltu $1,$2,$24 1461 multu ($15,$8) # mul_add_c(a[3],b[0],c1,c2,c3); 1462 addu $25,$1 1463 addu $3,$25 1464 sltu $1,$3,$25 1465 addu $7,$1 1466 mflo ($24,$15,$8) 1467 mfhi ($25,$15,$8) 1468 addu $2,$24 1469 sltu $1,$2,$24 1470 multu ($15,$9) # mul_add_c(a[3],b[1],c2,c3,c1); 1471 addu $25,$1 1472 addu $3,$25 1473 sltu $1,$3,$25 1474 addu $7,$1 1475 sw $2,3*4($4) 1476 1477 mflo ($24,$15,$9) 1478 mfhi ($25,$15,$9) 1479 addu $3,$24 1480 sltu $1,$3,$24 1481 multu ($14,$10) # mul_add_c(a[2],b[2],c2,c3,c1); 1482 addu $25,$1 1483 addu $7,$25 1484 sltu $2,$7,$25 1485 mflo ($24,$14,$10) 1486 mfhi ($25,$14,$10) 1487 addu $3,$24 1488 sltu $1,$3,$24 1489 multu ($13,$11) # mul_add_c(a[1],b[3],c2,c3,c1); 1490 addu $25,$1 1491 addu $7,$25 1492 sltu $1,$7,$25 1493 addu $2,$1 1494 mflo ($24,$13,$11) 1495 mfhi ($25,$13,$11) 1496 addu $3,$24 1497 sltu $1,$3,$24 1498 multu ($14,$11) # mul_add_c(a[2],b[3],c3,c1,c2); 1499 addu $25,$1 1500 addu $7,$25 1501 sltu $1,$7,$25 1502 addu $2,$1 1503 sw $3,4*4($4) 1504 1505 mflo ($24,$14,$11) 1506 mfhi ($25,$14,$11) 1507 addu $7,$24 1508 sltu $1,$7,$24 1509 multu ($15,$10) # mul_add_c(a[3],b[2],c3,c1,c2); 1510 addu $25,$1 1511 addu $2,$25 1512 sltu $3,$2,$25 1513 mflo ($24,$15,$10) 1514 mfhi ($25,$15,$10) 1515 addu $7,$24 1516 sltu $1,$7,$24 1517 multu ($15,$11) # mul_add_c(a[3],b[3],c1,c2,c3); 1518 addu $25,$1 1519 addu $2,$25 1520 sltu $1,$2,$25 1521 addu $3,$1 1522 sw $7,5*4($4) 1523 1524 mflo ($24,$15,$11) 1525 mfhi ($25,$15,$11) 1526 addu $2,$24 1527 sltu $1,$2,$24 1528 addu $25,$1 1529 addu $3,$25 1530 sw $2,6*4($4) 1531 sw $3,7*4($4) 1532 1533 .set noreorder 1534 jr $31 1535 nop 1536.end bn_mul_comba4 1537 1538.align 5 1539.globl bn_sqr_comba8 1540.ent bn_sqr_comba8 1541bn_sqr_comba8: 1542 .set reorder 1543 lw $12,0($5) 1544 lw $13,4($5) 1545 lw $14,2*4($5) 1546 lw $15,3*4($5) 1547 1548 multu ($12,$12) # mul_add_c(a[0],b[0],c1,c2,c3); 1549 lw $8,4*4($5) 1550 lw $9,5*4($5) 1551 lw $10,6*4($5) 1552 lw $11,7*4($5) 1553 mflo ($2,$12,$12) 1554 mfhi ($3,$12,$12) 1555 sw $2,0($4) 1556 1557 multu ($12,$13) # mul_add_c2(a[0],b[1],c2,c3,c1); 1558 mflo ($24,$12,$13) 1559 mfhi ($25,$12,$13) 1560 slt $2,$25,$0 1561 sll $25,1 1562 multu ($14,$12) # mul_add_c2(a[2],b[0],c3,c1,c2); 1563 slt $6,$24,$0 1564 addu $25,$6 1565 sll $24,1 1566 addu $3,$24 1567 sltu $1,$3,$24 1568 addu $7,$25,$1 1569 sw $3,4($4) 1570 mflo ($24,$14,$12) 1571 mfhi ($25,$14,$12) 1572 addu $7,$24 1573 sltu $1,$7,$24 1574 multu ($13,$13) # forward multiplication 1575 addu $7,$24 1576 addu $1,$25 1577 sltu $24,$7,$24 1578 addu $2,$1 1579 addu $25,$24 1580 sltu $3,$2,$1 1581 addu $2,$25 1582 sltu $25,$2,$25 1583 addu $3,$25 1584 mflo ($24,$13,$13) 1585 mfhi ($25,$13,$13) 1586 addu $7,$24 1587 sltu $1,$7,$24 1588 multu ($12,$15) # mul_add_c2(a[0],b[3],c1,c2,c3); 1589 addu $25,$1 1590 addu $2,$25 1591 sltu $1,$2,$25 1592 addu $3,$1 1593 sw $7,2*4($4) 1594 mflo ($24,$12,$15) 1595 mfhi ($25,$12,$15) 1596 addu $2,$24 1597 sltu $1,$2,$24 1598 multu ($13,$14) # forward multiplication 1599 addu $2,$24 1600 addu $1,$25 1601 sltu $24,$2,$24 1602 addu $3,$1 1603 addu $25,$24 1604 sltu $7,$3,$1 1605 addu $3,$25 1606 sltu $25,$3,$25 1607 addu $7,$25 1608 mflo ($24,$13,$14) 1609 mfhi ($25,$13,$14) 1610 addu $2,$24 1611 sltu $1,$2,$24 1612 multu ($8,$12) # forward multiplication 1613 addu $2,$24 1614 addu $1,$25 1615 sltu $24,$2,$24 1616 addu $3,$1 1617 addu $25,$24 1618 sltu $1,$3,$1 1619 addu $3,$25 1620 addu $7,$1 1621 sltu $25,$3,$25 1622 addu $7,$25 1623 mflo ($24,$8,$12) 1624 mfhi ($25,$8,$12) 1625 sw $2,3*4($4) 1626 addu $3,$24 1627 sltu $1,$3,$24 1628 multu ($15,$13) # forward multiplication 1629 addu $3,$24 1630 addu $1,$25 1631 sltu $24,$3,$24 1632 addu $7,$1 1633 addu $25,$24 1634 sltu $2,$7,$1 1635 addu $7,$25 1636 sltu $25,$7,$25 1637 addu $2,$25 1638 mflo ($24,$15,$13) 1639 mfhi ($25,$15,$13) 1640 addu $3,$24 1641 sltu $1,$3,$24 1642 multu ($14,$14) # forward multiplication 1643 addu $3,$24 1644 addu $1,$25 1645 sltu $24,$3,$24 1646 addu $7,$1 1647 addu $25,$24 1648 sltu $1,$7,$1 1649 addu $7,$25 1650 addu $2,$1 1651 sltu $25,$7,$25 1652 addu $2,$25 1653 mflo ($24,$14,$14) 1654 mfhi ($25,$14,$14) 1655 addu $3,$24 1656 sltu $1,$3,$24 1657 multu ($12,$9) # mul_add_c2(a[0],b[5],c3,c1,c2); 1658 addu $25,$1 1659 addu $7,$25 1660 sltu $1,$7,$25 1661 addu $2,$1 1662 sw $3,4*4($4) 1663 mflo ($24,$12,$9) 1664 mfhi ($25,$12,$9) 1665 addu $7,$24 1666 sltu $1,$7,$24 1667 multu ($13,$8) # forward multiplication 1668 addu $7,$24 1669 addu $1,$25 1670 sltu $24,$7,$24 1671 addu $2,$1 1672 addu $25,$24 1673 sltu $3,$2,$1 1674 addu $2,$25 1675 sltu $25,$2,$25 1676 addu $3,$25 1677 mflo ($24,$13,$8) 1678 mfhi ($25,$13,$8) 1679 addu $7,$24 1680 sltu $1,$7,$24 1681 multu ($14,$15) # forward multiplication 1682 addu $7,$24 1683 addu $1,$25 1684 sltu $24,$7,$24 1685 addu $2,$1 1686 addu $25,$24 1687 sltu $1,$2,$1 1688 addu $2,$25 1689 addu $3,$1 1690 sltu $25,$2,$25 1691 addu $3,$25 1692 mflo ($24,$14,$15) 1693 mfhi ($25,$14,$15) 1694 addu $7,$24 1695 sltu $1,$7,$24 1696 multu ($10,$12) # forward multiplication 1697 addu $7,$24 1698 addu $1,$25 1699 sltu $24,$7,$24 1700 addu $2,$1 1701 addu $25,$24 1702 sltu $1,$2,$1 1703 addu $2,$25 1704 addu $3,$1 1705 sltu $25,$2,$25 1706 addu $3,$25 1707 mflo ($24,$10,$12) 1708 mfhi ($25,$10,$12) 1709 sw $7,5*4($4) 1710 addu $2,$24 1711 sltu $1,$2,$24 1712 multu ($9,$13) # forward multiplication 1713 addu $2,$24 1714 addu $1,$25 1715 sltu $24,$2,$24 1716 addu $3,$1 1717 addu $25,$24 1718 sltu $7,$3,$1 1719 addu $3,$25 1720 sltu $25,$3,$25 1721 addu $7,$25 1722 mflo ($24,$9,$13) 1723 mfhi ($25,$9,$13) 1724 addu $2,$24 1725 sltu $1,$2,$24 1726 multu ($8,$14) # forward multiplication 1727 addu $2,$24 1728 addu $1,$25 1729 sltu $24,$2,$24 1730 addu $3,$1 1731 addu $25,$24 1732 sltu $1,$3,$1 1733 addu $3,$25 1734 addu $7,$1 1735 sltu $25,$3,$25 1736 addu $7,$25 1737 mflo ($24,$8,$14) 1738 mfhi ($25,$8,$14) 1739 addu $2,$24 1740 sltu $1,$2,$24 1741 multu ($15,$15) # forward multiplication 1742 addu $2,$24 1743 addu $1,$25 1744 sltu $24,$2,$24 1745 addu $3,$1 1746 addu $25,$24 1747 sltu $1,$3,$1 1748 addu $3,$25 1749 addu $7,$1 1750 sltu $25,$3,$25 1751 addu $7,$25 1752 mflo ($24,$15,$15) 1753 mfhi ($25,$15,$15) 1754 addu $2,$24 1755 sltu $1,$2,$24 1756 multu ($12,$11) # mul_add_c2(a[0],b[7],c2,c3,c1); 1757 addu $25,$1 1758 addu $3,$25 1759 sltu $1,$3,$25 1760 addu $7,$1 1761 sw $2,6*4($4) 1762 mflo ($24,$12,$11) 1763 mfhi ($25,$12,$11) 1764 addu $3,$24 1765 sltu $1,$3,$24 1766 multu ($13,$10) # forward multiplication 1767 addu $3,$24 1768 addu $1,$25 1769 sltu $24,$3,$24 1770 addu $7,$1 1771 addu $25,$24 1772 sltu $2,$7,$1 1773 addu $7,$25 1774 sltu $25,$7,$25 1775 addu $2,$25 1776 mflo ($24,$13,$10) 1777 mfhi ($25,$13,$10) 1778 addu $3,$24 1779 sltu $1,$3,$24 1780 multu ($14,$9) # forward multiplication 1781 addu $3,$24 1782 addu $1,$25 1783 sltu $24,$3,$24 1784 addu $7,$1 1785 addu $25,$24 1786 sltu $1,$7,$1 1787 addu $7,$25 1788 addu $2,$1 1789 sltu $25,$7,$25 1790 addu $2,$25 1791 mflo ($24,$14,$9) 1792 mfhi ($25,$14,$9) 1793 addu $3,$24 1794 sltu $1,$3,$24 1795 multu ($15,$8) # forward multiplication 1796 addu $3,$24 1797 addu $1,$25 1798 sltu $24,$3,$24 1799 addu $7,$1 1800 addu $25,$24 1801 sltu $1,$7,$1 1802 addu $7,$25 1803 addu $2,$1 1804 sltu $25,$7,$25 1805 addu $2,$25 1806 mflo ($24,$15,$8) 1807 mfhi ($25,$15,$8) 1808 addu $3,$24 1809 sltu $1,$3,$24 1810 multu ($11,$13) # forward multiplication 1811 addu $3,$24 1812 addu $1,$25 1813 sltu $24,$3,$24 1814 addu $7,$1 1815 addu $25,$24 1816 sltu $1,$7,$1 1817 addu $7,$25 1818 addu $2,$1 1819 sltu $25,$7,$25 1820 addu $2,$25 1821 mflo ($24,$11,$13) 1822 mfhi ($25,$11,$13) 1823 sw $3,7*4($4) 1824 addu $7,$24 1825 sltu $1,$7,$24 1826 multu ($10,$14) # forward multiplication 1827 addu $7,$24 1828 addu $1,$25 1829 sltu $24,$7,$24 1830 addu $2,$1 1831 addu $25,$24 1832 sltu $3,$2,$1 1833 addu $2,$25 1834 sltu $25,$2,$25 1835 addu $3,$25 1836 mflo ($24,$10,$14) 1837 mfhi ($25,$10,$14) 1838 addu $7,$24 1839 sltu $1,$7,$24 1840 multu ($9,$15) # forward multiplication 1841 addu $7,$24 1842 addu $1,$25 1843 sltu $24,$7,$24 1844 addu $2,$1 1845 addu $25,$24 1846 sltu $1,$2,$1 1847 addu $2,$25 1848 addu $3,$1 1849 sltu $25,$2,$25 1850 addu $3,$25 1851 mflo ($24,$9,$15) 1852 mfhi ($25,$9,$15) 1853 addu $7,$24 1854 sltu $1,$7,$24 1855 multu ($8,$8) # forward multiplication 1856 addu $7,$24 1857 addu $1,$25 1858 sltu $24,$7,$24 1859 addu $2,$1 1860 addu $25,$24 1861 sltu $1,$2,$1 1862 addu $2,$25 1863 addu $3,$1 1864 sltu $25,$2,$25 1865 addu $3,$25 1866 mflo ($24,$8,$8) 1867 mfhi ($25,$8,$8) 1868 addu $7,$24 1869 sltu $1,$7,$24 1870 multu ($14,$11) # mul_add_c2(a[2],b[7],c1,c2,c3); 1871 addu $25,$1 1872 addu $2,$25 1873 sltu $1,$2,$25 1874 addu $3,$1 1875 sw $7,8*4($4) 1876 mflo ($24,$14,$11) 1877 mfhi ($25,$14,$11) 1878 addu $2,$24 1879 sltu $1,$2,$24 1880 multu ($15,$10) # forward multiplication 1881 addu $2,$24 1882 addu $1,$25 1883 sltu $24,$2,$24 1884 addu $3,$1 1885 addu $25,$24 1886 sltu $7,$3,$1 1887 addu $3,$25 1888 sltu $25,$3,$25 1889 addu $7,$25 1890 mflo ($24,$15,$10) 1891 mfhi ($25,$15,$10) 1892 addu $2,$24 1893 sltu $1,$2,$24 1894 multu ($8,$9) # forward multiplication 1895 addu $2,$24 1896 addu $1,$25 1897 sltu $24,$2,$24 1898 addu $3,$1 1899 addu $25,$24 1900 sltu $1,$3,$1 1901 addu $3,$25 1902 addu $7,$1 1903 sltu $25,$3,$25 1904 addu $7,$25 1905 mflo ($24,$8,$9) 1906 mfhi ($25,$8,$9) 1907 addu $2,$24 1908 sltu $1,$2,$24 1909 multu ($11,$15) # forward multiplication 1910 addu $2,$24 1911 addu $1,$25 1912 sltu $24,$2,$24 1913 addu $3,$1 1914 addu $25,$24 1915 sltu $1,$3,$1 1916 addu $3,$25 1917 addu $7,$1 1918 sltu $25,$3,$25 1919 addu $7,$25 1920 mflo ($24,$11,$15) 1921 mfhi ($25,$11,$15) 1922 sw $2,9*4($4) 1923 addu $3,$24 1924 sltu $1,$3,$24 1925 multu ($10,$8) # forward multiplication 1926 addu $3,$24 1927 addu $1,$25 1928 sltu $24,$3,$24 1929 addu $7,$1 1930 addu $25,$24 1931 sltu $2,$7,$1 1932 addu $7,$25 1933 sltu $25,$7,$25 1934 addu $2,$25 1935 mflo ($24,$10,$8) 1936 mfhi ($25,$10,$8) 1937 addu $3,$24 1938 sltu $1,$3,$24 1939 multu ($9,$9) # forward multiplication 1940 addu $3,$24 1941 addu $1,$25 1942 sltu $24,$3,$24 1943 addu $7,$1 1944 addu $25,$24 1945 sltu $1,$7,$1 1946 addu $7,$25 1947 addu $2,$1 1948 sltu $25,$7,$25 1949 addu $2,$25 1950 mflo ($24,$9,$9) 1951 mfhi ($25,$9,$9) 1952 addu $3,$24 1953 sltu $1,$3,$24 1954 multu ($8,$11) # mul_add_c2(a[4],b[7],c3,c1,c2); 1955 addu $25,$1 1956 addu $7,$25 1957 sltu $1,$7,$25 1958 addu $2,$1 1959 sw $3,10*4($4) 1960 mflo ($24,$8,$11) 1961 mfhi ($25,$8,$11) 1962 addu $7,$24 1963 sltu $1,$7,$24 1964 multu ($9,$10) # forward multiplication 1965 addu $7,$24 1966 addu $1,$25 1967 sltu $24,$7,$24 1968 addu $2,$1 1969 addu $25,$24 1970 sltu $3,$2,$1 1971 addu $2,$25 1972 sltu $25,$2,$25 1973 addu $3,$25 1974 mflo ($24,$9,$10) 1975 mfhi ($25,$9,$10) 1976 addu $7,$24 1977 sltu $1,$7,$24 1978 multu ($11,$9) # forward multiplication 1979 addu $7,$24 1980 addu $1,$25 1981 sltu $24,$7,$24 1982 addu $2,$1 1983 addu $25,$24 1984 sltu $1,$2,$1 1985 addu $2,$25 1986 addu $3,$1 1987 sltu $25,$2,$25 1988 addu $3,$25 1989 mflo ($24,$11,$9) 1990 mfhi ($25,$11,$9) 1991 sw $7,11*4($4) 1992 addu $2,$24 1993 sltu $1,$2,$24 1994 multu ($10,$10) # forward multiplication 1995 addu $2,$24 1996 addu $1,$25 1997 sltu $24,$2,$24 1998 addu $3,$1 1999 addu $25,$24 2000 sltu $7,$3,$1 2001 addu $3,$25 2002 sltu $25,$3,$25 2003 addu $7,$25 2004 mflo ($24,$10,$10) 2005 mfhi ($25,$10,$10) 2006 addu $2,$24 2007 sltu $1,$2,$24 2008 multu ($10,$11) # mul_add_c2(a[6],b[7],c2,c3,c1); 2009 addu $25,$1 2010 addu $3,$25 2011 sltu $1,$3,$25 2012 addu $7,$1 2013 sw $2,12*4($4) 2014 mflo ($24,$10,$11) 2015 mfhi ($25,$10,$11) 2016 addu $3,$24 2017 sltu $1,$3,$24 2018 multu ($11,$11) # forward multiplication 2019 addu $3,$24 2020 addu $1,$25 2021 sltu $24,$3,$24 2022 addu $7,$1 2023 addu $25,$24 2024 sltu $2,$7,$1 2025 addu $7,$25 2026 sltu $25,$7,$25 2027 addu $2,$25 2028 mflo ($24,$11,$11) 2029 mfhi ($25,$11,$11) 2030 sw $3,13*4($4) 2031 2032 addu $7,$24 2033 sltu $1,$7,$24 2034 addu $25,$1 2035 addu $2,$25 2036 sw $7,14*4($4) 2037 sw $2,15*4($4) 2038 2039 .set noreorder 2040 jr $31 2041 nop 2042.end bn_sqr_comba8 2043 2044.align 5 2045.globl bn_sqr_comba4 2046.ent bn_sqr_comba4 2047bn_sqr_comba4: 2048 .set reorder 2049 lw $12,0($5) 2050 lw $13,4($5) 2051 multu ($12,$12) # mul_add_c(a[0],b[0],c1,c2,c3); 2052 lw $14,2*4($5) 2053 lw $15,3*4($5) 2054 mflo ($2,$12,$12) 2055 mfhi ($3,$12,$12) 2056 sw $2,0($4) 2057 2058 multu ($12,$13) # mul_add_c2(a[0],b[1],c2,c3,c1); 2059 mflo ($24,$12,$13) 2060 mfhi ($25,$12,$13) 2061 slt $2,$25,$0 2062 sll $25,1 2063 multu ($14,$12) # mul_add_c2(a[2],b[0],c3,c1,c2); 2064 slt $6,$24,$0 2065 addu $25,$6 2066 sll $24,1 2067 addu $3,$24 2068 sltu $1,$3,$24 2069 addu $7,$25,$1 2070 sw $3,4($4) 2071 mflo ($24,$14,$12) 2072 mfhi ($25,$14,$12) 2073 addu $7,$24 2074 sltu $1,$7,$24 2075 multu ($13,$13) # forward multiplication 2076 addu $7,$24 2077 addu $1,$25 2078 sltu $24,$7,$24 2079 addu $2,$1 2080 addu $25,$24 2081 sltu $3,$2,$1 2082 addu $2,$25 2083 sltu $25,$2,$25 2084 addu $3,$25 2085 mflo ($24,$13,$13) 2086 mfhi ($25,$13,$13) 2087 addu $7,$24 2088 sltu $1,$7,$24 2089 multu ($12,$15) # mul_add_c2(a[0],b[3],c1,c2,c3); 2090 addu $25,$1 2091 addu $2,$25 2092 sltu $1,$2,$25 2093 addu $3,$1 2094 sw $7,2*4($4) 2095 mflo ($24,$12,$15) 2096 mfhi ($25,$12,$15) 2097 addu $2,$24 2098 sltu $1,$2,$24 2099 multu ($13,$14) # forward multiplication 2100 addu $2,$24 2101 addu $1,$25 2102 sltu $24,$2,$24 2103 addu $3,$1 2104 addu $25,$24 2105 sltu $7,$3,$1 2106 addu $3,$25 2107 sltu $25,$3,$25 2108 addu $7,$25 2109 mflo ($24,$13,$14) 2110 mfhi ($25,$13,$14) 2111 addu $2,$24 2112 sltu $1,$2,$24 2113 multu ($15,$13) # forward multiplication 2114 addu $2,$24 2115 addu $1,$25 2116 sltu $24,$2,$24 2117 addu $3,$1 2118 addu $25,$24 2119 sltu $1,$3,$1 2120 addu $3,$25 2121 addu $7,$1 2122 sltu $25,$3,$25 2123 addu $7,$25 2124 mflo ($24,$15,$13) 2125 mfhi ($25,$15,$13) 2126 sw $2,3*4($4) 2127 addu $3,$24 2128 sltu $1,$3,$24 2129 multu ($14,$14) # forward multiplication 2130 addu $3,$24 2131 addu $1,$25 2132 sltu $24,$3,$24 2133 addu $7,$1 2134 addu $25,$24 2135 sltu $2,$7,$1 2136 addu $7,$25 2137 sltu $25,$7,$25 2138 addu $2,$25 2139 mflo ($24,$14,$14) 2140 mfhi ($25,$14,$14) 2141 addu $3,$24 2142 sltu $1,$3,$24 2143 multu ($14,$15) # mul_add_c2(a[2],b[3],c3,c1,c2); 2144 addu $25,$1 2145 addu $7,$25 2146 sltu $1,$7,$25 2147 addu $2,$1 2148 sw $3,4*4($4) 2149 mflo ($24,$14,$15) 2150 mfhi ($25,$14,$15) 2151 addu $7,$24 2152 sltu $1,$7,$24 2153 multu ($15,$15) # forward multiplication 2154 addu $7,$24 2155 addu $1,$25 2156 sltu $24,$7,$24 2157 addu $2,$1 2158 addu $25,$24 2159 sltu $3,$2,$1 2160 addu $2,$25 2161 sltu $25,$2,$25 2162 addu $3,$25 2163 mflo ($24,$15,$15) 2164 mfhi ($25,$15,$15) 2165 sw $7,5*4($4) 2166 2167 addu $2,$24 2168 sltu $1,$2,$24 2169 addu $25,$1 2170 addu $3,$25 2171 sw $2,6*4($4) 2172 sw $3,7*4($4) 2173 2174 .set noreorder 2175 jr $31 2176 nop 2177.end bn_sqr_comba4 2178