1#include "mips_arch.h" 2 3#if defined(_MIPS_ARCH_MIPS64R6) 4# define ddivu(rs,rt) 5# define mfqt(rd,rs,rt) ddivu rd,rs,rt 6# define mfrm(rd,rs,rt) dmodu rd,rs,rt 7#elif defined(_MIPS_ARCH_MIPS32R6) 8# define divu(rs,rt) 9# define mfqt(rd,rs,rt) divu rd,rs,rt 10# define mfrm(rd,rs,rt) modu rd,rs,rt 11#else 12# define ddivu(rs,rt) ddivu $0,rs,rt 13# define mfqt(rd,rs,rt) mflo rd 14# define mfrm(rd,rs,rt) mfhi rd 15#endif 16 17.rdata 18.asciiz "mips3.s, Version 1.2" 19.asciiz "MIPS II/III/IV ISA artwork by Andy Polyakov <appro@fy.chalmers.se>" 20 21.text 22.set noat 23 24.align 5 25.globl bn_mul_add_words 26.ent bn_mul_add_words 27bn_mul_add_words: 28 .set noreorder 29 bgtz $6,bn_mul_add_words_internal 30 move $2,$0 31 jr $31 32 move $4,$2 33.end bn_mul_add_words 34 35.align 5 36.ent bn_mul_add_words_internal 37bn_mul_add_words_internal: 38 .set reorder 39 li $3,-4 40 and $8,$6,$3 41 beqz $8,.L_bn_mul_add_words_tail 42 43.L_bn_mul_add_words_loop: 44 ld $12,0($5) 45 dmultu ($12,$7) 46 ld $13,0($4) 47 ld $14,8($5) 48 ld $15,8($4) 49 ld $8,2*8($5) 50 ld $9,2*8($4) 51 daddu $13,$2 52 sltu $2,$13,$2 # All manuals say it "compares 32-bit 53 # values", but it seems to work fine 54 # even on 64-bit registers. 55 mflo ($1,$12,$7) 56 mfhi ($12,$12,$7) 57 daddu $13,$1 58 daddu $2,$12 59 dmultu ($14,$7) 60 sltu $1,$13,$1 61 sd $13,0($4) 62 daddu $2,$1 63 64 ld $10,3*8($5) 65 ld $11,3*8($4) 66 daddu $15,$2 67 sltu $2,$15,$2 68 mflo ($1,$14,$7) 69 mfhi ($14,$14,$7) 70 daddu $15,$1 71 daddu $2,$14 72 dmultu ($8,$7) 73 sltu $1,$15,$1 74 sd $15,8($4) 75 daddu $2,$1 76 77 subu $6,4 78 daddu $4,4*8 79 daddu $5,4*8 80 daddu $9,$2 81 sltu $2,$9,$2 82 mflo ($1,$8,$7) 83 mfhi ($8,$8,$7) 84 daddu $9,$1 85 daddu $2,$8 86 dmultu ($10,$7) 87 sltu $1,$9,$1 88 sd $9,-2*8($4) 89 daddu $2,$1 90 91 92 and $8,$6,$3 93 daddu $11,$2 94 sltu $2,$11,$2 95 mflo ($1,$10,$7) 96 mfhi ($10,$10,$7) 97 daddu $11,$1 98 daddu $2,$10 99 sltu $1,$11,$1 100 sd $11,-8($4) 101 .set noreorder 102 bgtz $8,.L_bn_mul_add_words_loop 103 daddu $2,$1 104 105 beqz $6,.L_bn_mul_add_words_return 106 nop 107 108.L_bn_mul_add_words_tail: 109 .set reorder 110 ld $12,0($5) 111 dmultu ($12,$7) 112 ld $13,0($4) 113 subu $6,1 114 daddu $13,$2 115 sltu $2,$13,$2 116 mflo ($1,$12,$7) 117 mfhi ($12,$12,$7) 118 daddu $13,$1 119 daddu $2,$12 120 sltu $1,$13,$1 121 sd $13,0($4) 122 daddu $2,$1 123 beqz $6,.L_bn_mul_add_words_return 124 125 ld $12,8($5) 126 dmultu ($12,$7) 127 ld $13,8($4) 128 subu $6,1 129 daddu $13,$2 130 sltu $2,$13,$2 131 mflo ($1,$12,$7) 132 mfhi ($12,$12,$7) 133 daddu $13,$1 134 daddu $2,$12 135 sltu $1,$13,$1 136 sd $13,8($4) 137 daddu $2,$1 138 beqz $6,.L_bn_mul_add_words_return 139 140 ld $12,2*8($5) 141 dmultu ($12,$7) 142 ld $13,2*8($4) 143 daddu $13,$2 144 sltu $2,$13,$2 145 mflo ($1,$12,$7) 146 mfhi ($12,$12,$7) 147 daddu $13,$1 148 daddu $2,$12 149 sltu $1,$13,$1 150 sd $13,2*8($4) 151 daddu $2,$1 152 153.L_bn_mul_add_words_return: 154 .set noreorder 155 jr $31 156 move $4,$2 157.end bn_mul_add_words_internal 158 159.align 5 160.globl bn_mul_words 161.ent bn_mul_words 162bn_mul_words: 163 .set noreorder 164 bgtz $6,bn_mul_words_internal 165 move $2,$0 166 jr $31 167 move $4,$2 168.end bn_mul_words 169 170.align 5 171.ent bn_mul_words_internal 172bn_mul_words_internal: 173 .set reorder 174 li $3,-4 175 and $8,$6,$3 176 beqz $8,.L_bn_mul_words_tail 177 178.L_bn_mul_words_loop: 179 ld $12,0($5) 180 dmultu ($12,$7) 181 ld $14,8($5) 182 ld $8,2*8($5) 183 ld $10,3*8($5) 184 mflo ($1,$12,$7) 185 mfhi ($12,$12,$7) 186 daddu $2,$1 187 sltu $13,$2,$1 188 dmultu ($14,$7) 189 sd $2,0($4) 190 daddu $2,$13,$12 191 192 subu $6,4 193 daddu $4,4*8 194 daddu $5,4*8 195 mflo ($1,$14,$7) 196 mfhi ($14,$14,$7) 197 daddu $2,$1 198 sltu $15,$2,$1 199 dmultu ($8,$7) 200 sd $2,-3*8($4) 201 daddu $2,$15,$14 202 203 mflo ($1,$8,$7) 204 mfhi ($8,$8,$7) 205 daddu $2,$1 206 sltu $9,$2,$1 207 dmultu ($10,$7) 208 sd $2,-2*8($4) 209 daddu $2,$9,$8 210 211 and $8,$6,$3 212 mflo ($1,$10,$7) 213 mfhi ($10,$10,$7) 214 daddu $2,$1 215 sltu $11,$2,$1 216 sd $2,-8($4) 217 .set noreorder 218 bgtz $8,.L_bn_mul_words_loop 219 daddu $2,$11,$10 220 221 beqz $6,.L_bn_mul_words_return 222 nop 223 224.L_bn_mul_words_tail: 225 .set reorder 226 ld $12,0($5) 227 dmultu ($12,$7) 228 subu $6,1 229 mflo ($1,$12,$7) 230 mfhi ($12,$12,$7) 231 daddu $2,$1 232 sltu $13,$2,$1 233 sd $2,0($4) 234 daddu $2,$13,$12 235 beqz $6,.L_bn_mul_words_return 236 237 ld $12,8($5) 238 dmultu ($12,$7) 239 subu $6,1 240 mflo ($1,$12,$7) 241 mfhi ($12,$12,$7) 242 daddu $2,$1 243 sltu $13,$2,$1 244 sd $2,8($4) 245 daddu $2,$13,$12 246 beqz $6,.L_bn_mul_words_return 247 248 ld $12,2*8($5) 249 dmultu ($12,$7) 250 mflo ($1,$12,$7) 251 mfhi ($12,$12,$7) 252 daddu $2,$1 253 sltu $13,$2,$1 254 sd $2,2*8($4) 255 daddu $2,$13,$12 256 257.L_bn_mul_words_return: 258 .set noreorder 259 jr $31 260 move $4,$2 261.end bn_mul_words_internal 262 263.align 5 264.globl bn_sqr_words 265.ent bn_sqr_words 266bn_sqr_words: 267 .set noreorder 268 bgtz $6,bn_sqr_words_internal 269 move $2,$0 270 jr $31 271 move $4,$2 272.end bn_sqr_words 273 274.align 5 275.ent bn_sqr_words_internal 276bn_sqr_words_internal: 277 .set reorder 278 li $3,-4 279 and $8,$6,$3 280 beqz $8,.L_bn_sqr_words_tail 281 282.L_bn_sqr_words_loop: 283 ld $12,0($5) 284 dmultu ($12,$12) 285 ld $14,8($5) 286 ld $8,2*8($5) 287 ld $10,3*8($5) 288 mflo ($13,$12,$12) 289 mfhi ($12,$12,$12) 290 sd $13,0($4) 291 sd $12,8($4) 292 293 dmultu ($14,$14) 294 subu $6,4 295 daddu $4,8*8 296 daddu $5,4*8 297 mflo ($15,$14,$14) 298 mfhi ($14,$14,$14) 299 sd $15,-6*8($4) 300 sd $14,-5*8($4) 301 302 dmultu ($8,$8) 303 mflo ($9,$8,$8) 304 mfhi ($8,$8,$8) 305 sd $9,-4*8($4) 306 sd $8,-3*8($4) 307 308 309 dmultu ($10,$10) 310 and $8,$6,$3 311 mflo ($11,$10,$10) 312 mfhi ($10,$10,$10) 313 sd $11,-2*8($4) 314 315 .set noreorder 316 sd $10,-8($4) 317 bgtz $8,.L_bn_sqr_words_loop 318 nop 319 320 beqz $6,.L_bn_sqr_words_return 321 nop 322 323.L_bn_sqr_words_tail: 324 .set reorder 325 ld $12,0($5) 326 dmultu ($12,$12) 327 subu $6,1 328 mflo ($13,$12,$12) 329 mfhi ($12,$12,$12) 330 sd $13,0($4) 331 sd $12,8($4) 332 beqz $6,.L_bn_sqr_words_return 333 334 ld $12,8($5) 335 dmultu ($12,$12) 336 subu $6,1 337 mflo ($13,$12,$12) 338 mfhi ($12,$12,$12) 339 sd $13,2*8($4) 340 sd $12,3*8($4) 341 beqz $6,.L_bn_sqr_words_return 342 343 ld $12,2*8($5) 344 dmultu ($12,$12) 345 mflo ($13,$12,$12) 346 mfhi ($12,$12,$12) 347 sd $13,4*8($4) 348 sd $12,5*8($4) 349 350.L_bn_sqr_words_return: 351 .set noreorder 352 jr $31 353 move $4,$2 354 355.end bn_sqr_words_internal 356 357.align 5 358.globl bn_add_words 359.ent bn_add_words 360bn_add_words: 361 .set noreorder 362 bgtz $7,bn_add_words_internal 363 move $2,$0 364 jr $31 365 move $4,$2 366.end bn_add_words 367 368.align 5 369.ent bn_add_words_internal 370bn_add_words_internal: 371 .set reorder 372 li $3,-4 373 and $1,$7,$3 374 beqz $1,.L_bn_add_words_tail 375 376.L_bn_add_words_loop: 377 ld $12,0($5) 378 ld $8,0($6) 379 subu $7,4 380 ld $13,8($5) 381 and $1,$7,$3 382 ld $14,2*8($5) 383 daddu $6,4*8 384 ld $15,3*8($5) 385 daddu $4,4*8 386 ld $9,-3*8($6) 387 daddu $5,4*8 388 ld $10,-2*8($6) 389 ld $11,-8($6) 390 daddu $8,$12 391 sltu $24,$8,$12 392 daddu $12,$8,$2 393 sltu $2,$12,$8 394 sd $12,-4*8($4) 395 daddu $2,$24 396 397 daddu $9,$13 398 sltu $25,$9,$13 399 daddu $13,$9,$2 400 sltu $2,$13,$9 401 sd $13,-3*8($4) 402 daddu $2,$25 403 404 daddu $10,$14 405 sltu $24,$10,$14 406 daddu $14,$10,$2 407 sltu $2,$14,$10 408 sd $14,-2*8($4) 409 daddu $2,$24 410 411 daddu $11,$15 412 sltu $25,$11,$15 413 daddu $15,$11,$2 414 sltu $2,$15,$11 415 sd $15,-8($4) 416 417 .set noreorder 418 bgtz $1,.L_bn_add_words_loop 419 daddu $2,$25 420 421 beqz $7,.L_bn_add_words_return 422 nop 423 424.L_bn_add_words_tail: 425 .set reorder 426 ld $12,0($5) 427 ld $8,0($6) 428 daddu $8,$12 429 subu $7,1 430 sltu $24,$8,$12 431 daddu $12,$8,$2 432 sltu $2,$12,$8 433 sd $12,0($4) 434 daddu $2,$24 435 beqz $7,.L_bn_add_words_return 436 437 ld $13,8($5) 438 ld $9,8($6) 439 daddu $9,$13 440 subu $7,1 441 sltu $25,$9,$13 442 daddu $13,$9,$2 443 sltu $2,$13,$9 444 sd $13,8($4) 445 daddu $2,$25 446 beqz $7,.L_bn_add_words_return 447 448 ld $14,2*8($5) 449 ld $10,2*8($6) 450 daddu $10,$14 451 sltu $24,$10,$14 452 daddu $14,$10,$2 453 sltu $2,$14,$10 454 sd $14,2*8($4) 455 daddu $2,$24 456 457.L_bn_add_words_return: 458 .set noreorder 459 jr $31 460 move $4,$2 461 462.end bn_add_words_internal 463 464.align 5 465.globl bn_sub_words 466.ent bn_sub_words 467bn_sub_words: 468 .set noreorder 469 bgtz $7,bn_sub_words_internal 470 move $2,$0 471 jr $31 472 move $4,$0 473.end bn_sub_words 474 475.align 5 476.ent bn_sub_words_internal 477bn_sub_words_internal: 478 .set reorder 479 li $3,-4 480 and $1,$7,$3 481 beqz $1,.L_bn_sub_words_tail 482 483.L_bn_sub_words_loop: 484 ld $12,0($5) 485 ld $8,0($6) 486 subu $7,4 487 ld $13,8($5) 488 and $1,$7,$3 489 ld $14,2*8($5) 490 daddu $6,4*8 491 ld $15,3*8($5) 492 daddu $4,4*8 493 ld $9,-3*8($6) 494 daddu $5,4*8 495 ld $10,-2*8($6) 496 ld $11,-8($6) 497 sltu $24,$12,$8 498 dsubu $8,$12,$8 499 dsubu $12,$8,$2 500 sgtu $2,$12,$8 501 sd $12,-4*8($4) 502 daddu $2,$24 503 504 sltu $25,$13,$9 505 dsubu $9,$13,$9 506 dsubu $13,$9,$2 507 sgtu $2,$13,$9 508 sd $13,-3*8($4) 509 daddu $2,$25 510 511 512 sltu $24,$14,$10 513 dsubu $10,$14,$10 514 dsubu $14,$10,$2 515 sgtu $2,$14,$10 516 sd $14,-2*8($4) 517 daddu $2,$24 518 519 sltu $25,$15,$11 520 dsubu $11,$15,$11 521 dsubu $15,$11,$2 522 sgtu $2,$15,$11 523 sd $15,-8($4) 524 525 .set noreorder 526 bgtz $1,.L_bn_sub_words_loop 527 daddu $2,$25 528 529 beqz $7,.L_bn_sub_words_return 530 nop 531 532.L_bn_sub_words_tail: 533 .set reorder 534 ld $12,0($5) 535 ld $8,0($6) 536 subu $7,1 537 sltu $24,$12,$8 538 dsubu $8,$12,$8 539 dsubu $12,$8,$2 540 sgtu $2,$12,$8 541 sd $12,0($4) 542 daddu $2,$24 543 beqz $7,.L_bn_sub_words_return 544 545 ld $13,8($5) 546 subu $7,1 547 ld $9,8($6) 548 sltu $25,$13,$9 549 dsubu $9,$13,$9 550 dsubu $13,$9,$2 551 sgtu $2,$13,$9 552 sd $13,8($4) 553 daddu $2,$25 554 beqz $7,.L_bn_sub_words_return 555 556 ld $14,2*8($5) 557 ld $10,2*8($6) 558 sltu $24,$14,$10 559 dsubu $10,$14,$10 560 dsubu $14,$10,$2 561 sgtu $2,$14,$10 562 sd $14,2*8($4) 563 daddu $2,$24 564 565.L_bn_sub_words_return: 566 .set noreorder 567 jr $31 568 move $4,$2 569.end bn_sub_words_internal 570 571#if 0 572/* 573 * The bn_div_3_words entry point is re-used for constant-time interface. 574 * Implementation is retained as historical reference. 575 */ 576.align 5 577.globl bn_div_3_words 578.ent bn_div_3_words 579bn_div_3_words: 580 .set noreorder 581 move $7,$4 # we know that bn_div_words does not 582 # touch $7, $10, $11 and preserves $6 583 # so that we can save two arguments 584 # and return address in registers 585 # instead of stack:-) 586 587 ld $4,($7) 588 move $10,$5 589 ld $5,-8($7) 590 bne $4,$6,bn_div_3_words_internal 591 nop 592 li $2,-1 593 jr $31 594 move $4,$2 595.end bn_div_3_words 596 597.align 5 598.ent bn_div_3_words_internal 599bn_div_3_words_internal: 600 .set reorder 601 move $11,$31 602 bal bn_div_words_internal 603 move $31,$11 604 dmultu ($10,$2) 605 ld $14,-2*8($7) 606 move $8,$0 607 mfhi ($13,$10,$2) 608 mflo ($12,$10,$2) 609 sltu $24,$13,$5 610.L_bn_div_3_words_inner_loop: 611 bnez $24,.L_bn_div_3_words_inner_loop_done 612 sgeu $1,$14,$12 613 seq $25,$13,$5 614 and $1,$25 615 sltu $15,$12,$10 616 daddu $5,$6 617 dsubu $13,$15 618 dsubu $12,$10 619 sltu $24,$13,$5 620 sltu $8,$5,$6 621 or $24,$8 622 .set noreorder 623 beqz $1,.L_bn_div_3_words_inner_loop 624 dsubu $2,1 625 daddu $2,1 626 .set reorder 627.L_bn_div_3_words_inner_loop_done: 628 .set noreorder 629 jr $31 630 move $4,$2 631.end bn_div_3_words_internal 632#endif 633 634.align 5 635.globl bn_div_words 636.ent bn_div_words 637bn_div_words: 638 .set noreorder 639 bnez $6,bn_div_words_internal 640 li $2,-1 # I would rather signal div-by-zero 641 # which can be done with 'break 7' 642 jr $31 643 move $4,$2 644.end bn_div_words 645 646.align 5 647.ent bn_div_words_internal 648bn_div_words_internal: 649 move $3,$0 650 bltz $6,.L_bn_div_words_body 651 move $25,$3 652 dsll $6,1 653 bgtz $6,.-4 654 addu $25,1 655 656 .set reorder 657 negu $13,$25 658 li $14,-1 659 dsll $14,$13 660 and $14,$4 661 dsrl $1,$5,$13 662 .set noreorder 663 beqz $14,.+12 664 nop 665 break 6 # signal overflow 666 .set reorder 667 dsll $4,$25 668 dsll $5,$25 669 or $4,$1 670.L_bn_div_words_body: 671 dsrl $3,$6,4*8 # bits 672 sgeu $1,$4,$6 673 .set noreorder 674 beqz $1,.+12 675 nop 676 dsubu $4,$6 677 .set reorder 678 679 li $8,-1 680 dsrl $9,$4,4*8 # bits 681 dsrl $8,4*8 # q=0xffffffff 682 beq $3,$9,.L_bn_div_words_skip_div1 683 ddivu ($4,$3) 684 mfqt ($8,$4,$3) 685.L_bn_div_words_skip_div1: 686 dmultu ($6,$8) 687 dsll $15,$4,4*8 # bits 688 dsrl $1,$5,4*8 # bits 689 or $15,$1 690 mflo ($12,$6,$8) 691 mfhi ($13,$6,$8) 692.L_bn_div_words_inner_loop1: 693 sltu $14,$15,$12 694 seq $24,$9,$13 695 sltu $1,$9,$13 696 and $14,$24 697 sltu $2,$12,$6 698 or $1,$14 699 .set noreorder 700 beqz $1,.L_bn_div_words_inner_loop1_done 701 dsubu $13,$2 702 dsubu $12,$6 703 b .L_bn_div_words_inner_loop1 704 dsubu $8,1 705 .set reorder 706.L_bn_div_words_inner_loop1_done: 707 708 dsll $5,4*8 # bits 709 dsubu $4,$15,$12 710 dsll $2,$8,4*8 # bits 711 712 li $8,-1 713 dsrl $9,$4,4*8 # bits 714 dsrl $8,4*8 # q=0xffffffff 715 beq $3,$9,.L_bn_div_words_skip_div2 716 ddivu ($4,$3) 717 mfqt ($8,$4,$3) 718.L_bn_div_words_skip_div2: 719 dmultu ($6,$8) 720 dsll $15,$4,4*8 # bits 721 dsrl $1,$5,4*8 # bits 722 or $15,$1 723 mflo ($12,$6,$8) 724 mfhi ($13,$6,$8) 725.L_bn_div_words_inner_loop2: 726 sltu $14,$15,$12 727 seq $24,$9,$13 728 sltu $1,$9,$13 729 and $14,$24 730 sltu $3,$12,$6 731 or $1,$14 732 .set noreorder 733 beqz $1,.L_bn_div_words_inner_loop2_done 734 dsubu $13,$3 735 dsubu $12,$6 736 b .L_bn_div_words_inner_loop2 737 dsubu $8,1 738 .set reorder 739.L_bn_div_words_inner_loop2_done: 740 741 dsubu $4,$15,$12 742 or $2,$8 743 dsrl $3,$4,$25 # $3 contains remainder if anybody wants it 744 dsrl $6,$25 # restore $6 745 746 .set noreorder 747 move $5,$3 748 jr $31 749 move $4,$2 750.end bn_div_words_internal 751 752.align 5 753.globl bn_mul_comba8 754.ent bn_mul_comba8 755bn_mul_comba8: 756 .set noreorder 757 .frame $29,6*8,$31 758 .mask 0x003f0000,-8 759 dsubu $29,6*8 760 sd $21,5*8($29) 761 sd $20,4*8($29) 762 sd $19,3*8($29) 763 sd $18,2*8($29) 764 sd $17,1*8($29) 765 sd $16,0*8($29) 766 767 .set reorder 768 ld $12,0($5) # If compiled with -mips3 option on 769 # R5000 box assembler barks on this 770 # 1ine with "should not have mult/div 771 # as last instruction in bb (R10K 772 # bug)" warning. If anybody out there 773 # has a clue about how to circumvent 774 # this do send me a note. 775 # <appro@fy.chalmers.se> 776 777 ld $8,0($6) 778 ld $13,8($5) 779 ld $14,2*8($5) 780 dmultu ($12,$8) # mul_add_c(a[0],b[0],c1,c2,c3); 781 ld $15,3*8($5) 782 ld $9,8($6) 783 ld $10,2*8($6) 784 ld $11,3*8($6) 785 mflo ($2,$12,$8) 786 mfhi ($3,$12,$8) 787 788 ld $16,4*8($5) 789 ld $18,5*8($5) 790 dmultu ($12,$9) # mul_add_c(a[0],b[1],c2,c3,c1); 791 ld $20,6*8($5) 792 ld $5,7*8($5) 793 ld $17,4*8($6) 794 ld $19,5*8($6) 795 mflo ($24,$12,$9) 796 mfhi ($25,$12,$9) 797 daddu $3,$24 798 sltu $1,$3,$24 799 dmultu ($13,$8) # mul_add_c(a[1],b[0],c2,c3,c1); 800 daddu $7,$25,$1 801 ld $21,6*8($6) 802 ld $6,7*8($6) 803 sd $2,0($4) # r[0]=c1; 804 mflo ($24,$13,$8) 805 mfhi ($25,$13,$8) 806 daddu $3,$24 807 sltu $1,$3,$24 808 dmultu ($14,$8) # mul_add_c(a[2],b[0],c3,c1,c2); 809 daddu $25,$1 810 daddu $7,$25 811 sltu $2,$7,$25 812 sd $3,8($4) # r[1]=c2; 813 814 mflo ($24,$14,$8) 815 mfhi ($25,$14,$8) 816 daddu $7,$24 817 sltu $1,$7,$24 818 dmultu ($13,$9) # mul_add_c(a[1],b[1],c3,c1,c2); 819 daddu $25,$1 820 daddu $2,$25 821 mflo ($24,$13,$9) 822 mfhi ($25,$13,$9) 823 daddu $7,$24 824 sltu $1,$7,$24 825 dmultu ($12,$10) # mul_add_c(a[0],b[2],c3,c1,c2); 826 daddu $25,$1 827 daddu $2,$25 828 sltu $3,$2,$25 829 mflo ($24,$12,$10) 830 mfhi ($25,$12,$10) 831 daddu $7,$24 832 sltu $1,$7,$24 833 dmultu ($12,$11) # mul_add_c(a[0],b[3],c1,c2,c3); 834 daddu $25,$1 835 daddu $2,$25 836 sltu $1,$2,$25 837 daddu $3,$1 838 sd $7,2*8($4) # r[2]=c3; 839 840 mflo ($24,$12,$11) 841 mfhi ($25,$12,$11) 842 daddu $2,$24 843 sltu $1,$2,$24 844 dmultu ($13,$10) # mul_add_c(a[1],b[2],c1,c2,c3); 845 daddu $25,$1 846 daddu $3,$25 847 sltu $7,$3,$25 848 mflo ($24,$13,$10) 849 mfhi ($25,$13,$10) 850 daddu $2,$24 851 sltu $1,$2,$24 852 dmultu ($14,$9) # mul_add_c(a[2],b[1],c1,c2,c3); 853 daddu $25,$1 854 daddu $3,$25 855 sltu $1,$3,$25 856 daddu $7,$1 857 mflo ($24,$14,$9) 858 mfhi ($25,$14,$9) 859 daddu $2,$24 860 sltu $1,$2,$24 861 dmultu ($15,$8) # mul_add_c(a[3],b[0],c1,c2,c3); 862 daddu $25,$1 863 daddu $3,$25 864 sltu $1,$3,$25 865 daddu $7,$1 866 mflo ($24,$15,$8) 867 mfhi ($25,$15,$8) 868 daddu $2,$24 869 sltu $1,$2,$24 870 dmultu ($16,$8) # mul_add_c(a[4],b[0],c2,c3,c1); 871 daddu $25,$1 872 daddu $3,$25 873 sltu $1,$3,$25 874 daddu $7,$1 875 sd $2,3*8($4) # r[3]=c1; 876 877 mflo ($24,$16,$8) 878 mfhi ($25,$16,$8) 879 daddu $3,$24 880 sltu $1,$3,$24 881 dmultu ($15,$9) # mul_add_c(a[3],b[1],c2,c3,c1); 882 daddu $25,$1 883 daddu $7,$25 884 sltu $2,$7,$25 885 mflo ($24,$15,$9) 886 mfhi ($25,$15,$9) 887 daddu $3,$24 888 sltu $1,$3,$24 889 dmultu ($14,$10) # mul_add_c(a[2],b[2],c2,c3,c1); 890 daddu $25,$1 891 daddu $7,$25 892 sltu $1,$7,$25 893 daddu $2,$1 894 mflo ($24,$14,$10) 895 mfhi ($25,$14,$10) 896 daddu $3,$24 897 sltu $1,$3,$24 898 dmultu ($13,$11) # mul_add_c(a[1],b[3],c2,c3,c1); 899 daddu $25,$1 900 daddu $7,$25 901 sltu $1,$7,$25 902 daddu $2,$1 903 mflo ($24,$13,$11) 904 mfhi ($25,$13,$11) 905 daddu $3,$24 906 sltu $1,$3,$24 907 dmultu ($12,$17) # mul_add_c(a[0],b[4],c2,c3,c1); 908 daddu $25,$1 909 daddu $7,$25 910 sltu $1,$7,$25 911 daddu $2,$1 912 mflo ($24,$12,$17) 913 mfhi ($25,$12,$17) 914 daddu $3,$24 915 sltu $1,$3,$24 916 dmultu ($12,$19) # mul_add_c(a[0],b[5],c3,c1,c2); 917 daddu $25,$1 918 daddu $7,$25 919 sltu $1,$7,$25 920 daddu $2,$1 921 sd $3,4*8($4) # r[4]=c2; 922 923 mflo ($24,$12,$19) 924 mfhi ($25,$12,$19) 925 daddu $7,$24 926 sltu $1,$7,$24 927 dmultu ($13,$17) # mul_add_c(a[1],b[4],c3,c1,c2); 928 daddu $25,$1 929 daddu $2,$25 930 sltu $3,$2,$25 931 mflo ($24,$13,$17) 932 mfhi ($25,$13,$17) 933 daddu $7,$24 934 sltu $1,$7,$24 935 dmultu ($14,$11) # mul_add_c(a[2],b[3],c3,c1,c2); 936 daddu $25,$1 937 daddu $2,$25 938 sltu $1,$2,$25 939 daddu $3,$1 940 mflo ($24,$14,$11) 941 mfhi ($25,$14,$11) 942 daddu $7,$24 943 sltu $1,$7,$24 944 dmultu ($15,$10) # mul_add_c(a[3],b[2],c3,c1,c2); 945 daddu $25,$1 946 daddu $2,$25 947 sltu $1,$2,$25 948 daddu $3,$1 949 mflo ($24,$15,$10) 950 mfhi ($25,$15,$10) 951 daddu $7,$24 952 sltu $1,$7,$24 953 dmultu ($16,$9) # mul_add_c(a[4],b[1],c3,c1,c2); 954 daddu $25,$1 955 daddu $2,$25 956 sltu $1,$2,$25 957 daddu $3,$1 958 mflo ($24,$16,$9) 959 mfhi ($25,$16,$9) 960 daddu $7,$24 961 sltu $1,$7,$24 962 dmultu ($18,$8) # mul_add_c(a[5],b[0],c3,c1,c2); 963 daddu $25,$1 964 daddu $2,$25 965 sltu $1,$2,$25 966 daddu $3,$1 967 mflo ($24,$18,$8) 968 mfhi ($25,$18,$8) 969 daddu $7,$24 970 sltu $1,$7,$24 971 dmultu ($20,$8) # mul_add_c(a[6],b[0],c1,c2,c3); 972 daddu $25,$1 973 daddu $2,$25 974 sltu $1,$2,$25 975 daddu $3,$1 976 sd $7,5*8($4) # r[5]=c3; 977 978 mflo ($24,$20,$8) 979 mfhi ($25,$20,$8) 980 daddu $2,$24 981 sltu $1,$2,$24 982 dmultu ($18,$9) # mul_add_c(a[5],b[1],c1,c2,c3); 983 daddu $25,$1 984 daddu $3,$25 985 sltu $7,$3,$25 986 mflo ($24,$18,$9) 987 mfhi ($25,$18,$9) 988 daddu $2,$24 989 sltu $1,$2,$24 990 dmultu ($16,$10) # mul_add_c(a[4],b[2],c1,c2,c3); 991 daddu $25,$1 992 daddu $3,$25 993 sltu $1,$3,$25 994 daddu $7,$1 995 mflo ($24,$16,$10) 996 mfhi ($25,$16,$10) 997 daddu $2,$24 998 sltu $1,$2,$24 999 dmultu ($15,$11) # mul_add_c(a[3],b[3],c1,c2,c3); 1000 daddu $25,$1 1001 daddu $3,$25 1002 sltu $1,$3,$25 1003 daddu $7,$1 1004 mflo ($24,$15,$11) 1005 mfhi ($25,$15,$11) 1006 daddu $2,$24 1007 sltu $1,$2,$24 1008 dmultu ($14,$17) # mul_add_c(a[2],b[4],c1,c2,c3); 1009 daddu $25,$1 1010 daddu $3,$25 1011 sltu $1,$3,$25 1012 daddu $7,$1 1013 mflo ($24,$14,$17) 1014 mfhi ($25,$14,$17) 1015 daddu $2,$24 1016 sltu $1,$2,$24 1017 dmultu ($13,$19) # mul_add_c(a[1],b[5],c1,c2,c3); 1018 daddu $25,$1 1019 daddu $3,$25 1020 sltu $1,$3,$25 1021 daddu $7,$1 1022 mflo ($24,$13,$19) 1023 mfhi ($25,$13,$19) 1024 daddu $2,$24 1025 sltu $1,$2,$24 1026 dmultu ($12,$21) # mul_add_c(a[0],b[6],c1,c2,c3); 1027 daddu $25,$1 1028 daddu $3,$25 1029 sltu $1,$3,$25 1030 daddu $7,$1 1031 mflo ($24,$12,$21) 1032 mfhi ($25,$12,$21) 1033 daddu $2,$24 1034 sltu $1,$2,$24 1035 dmultu ($12,$6) # mul_add_c(a[0],b[7],c2,c3,c1); 1036 daddu $25,$1 1037 daddu $3,$25 1038 sltu $1,$3,$25 1039 daddu $7,$1 1040 sd $2,6*8($4) # r[6]=c1; 1041 1042 mflo ($24,$12,$6) 1043 mfhi ($25,$12,$6) 1044 daddu $3,$24 1045 sltu $1,$3,$24 1046 dmultu ($13,$21) # mul_add_c(a[1],b[6],c2,c3,c1); 1047 daddu $25,$1 1048 daddu $7,$25 1049 sltu $2,$7,$25 1050 mflo ($24,$13,$21) 1051 mfhi ($25,$13,$21) 1052 daddu $3,$24 1053 sltu $1,$3,$24 1054 dmultu ($14,$19) # mul_add_c(a[2],b[5],c2,c3,c1); 1055 daddu $25,$1 1056 daddu $7,$25 1057 sltu $1,$7,$25 1058 daddu $2,$1 1059 mflo ($24,$14,$19) 1060 mfhi ($25,$14,$19) 1061 daddu $3,$24 1062 sltu $1,$3,$24 1063 dmultu ($15,$17) # mul_add_c(a[3],b[4],c2,c3,c1); 1064 daddu $25,$1 1065 daddu $7,$25 1066 sltu $1,$7,$25 1067 daddu $2,$1 1068 mflo ($24,$15,$17) 1069 mfhi ($25,$15,$17) 1070 daddu $3,$24 1071 sltu $1,$3,$24 1072 dmultu ($16,$11) # mul_add_c(a[4],b[3],c2,c3,c1); 1073 daddu $25,$1 1074 daddu $7,$25 1075 sltu $1,$7,$25 1076 daddu $2,$1 1077 mflo ($24,$16,$11) 1078 mfhi ($25,$16,$11) 1079 daddu $3,$24 1080 sltu $1,$3,$24 1081 dmultu ($18,$10) # mul_add_c(a[5],b[2],c2,c3,c1); 1082 daddu $25,$1 1083 daddu $7,$25 1084 sltu $1,$7,$25 1085 daddu $2,$1 1086 mflo ($24,$18,$10) 1087 mfhi ($25,$18,$10) 1088 daddu $3,$24 1089 sltu $1,$3,$24 1090 dmultu ($20,$9) # mul_add_c(a[6],b[1],c2,c3,c1); 1091 daddu $25,$1 1092 daddu $7,$25 1093 sltu $1,$7,$25 1094 daddu $2,$1 1095 mflo ($24,$20,$9) 1096 mfhi ($25,$20,$9) 1097 daddu $3,$24 1098 sltu $1,$3,$24 1099 dmultu ($5,$8) # mul_add_c(a[7],b[0],c2,c3,c1); 1100 daddu $25,$1 1101 daddu $7,$25 1102 sltu $1,$7,$25 1103 daddu $2,$1 1104 mflo ($24,$5,$8) 1105 mfhi ($25,$5,$8) 1106 daddu $3,$24 1107 sltu $1,$3,$24 1108 dmultu ($5,$9) # mul_add_c(a[7],b[1],c3,c1,c2); 1109 daddu $25,$1 1110 daddu $7,$25 1111 sltu $1,$7,$25 1112 daddu $2,$1 1113 sd $3,7*8($4) # r[7]=c2; 1114 1115 mflo ($24,$5,$9) 1116 mfhi ($25,$5,$9) 1117 daddu $7,$24 1118 sltu $1,$7,$24 1119 dmultu ($20,$10) # mul_add_c(a[6],b[2],c3,c1,c2); 1120 daddu $25,$1 1121 daddu $2,$25 1122 sltu $3,$2,$25 1123 mflo ($24,$20,$10) 1124 mfhi ($25,$20,$10) 1125 daddu $7,$24 1126 sltu $1,$7,$24 1127 dmultu ($18,$11) # mul_add_c(a[5],b[3],c3,c1,c2); 1128 daddu $25,$1 1129 daddu $2,$25 1130 sltu $1,$2,$25 1131 daddu $3,$1 1132 mflo ($24,$18,$11) 1133 mfhi ($25,$18,$11) 1134 daddu $7,$24 1135 sltu $1,$7,$24 1136 dmultu ($16,$17) # mul_add_c(a[4],b[4],c3,c1,c2); 1137 daddu $25,$1 1138 daddu $2,$25 1139 sltu $1,$2,$25 1140 daddu $3,$1 1141 mflo ($24,$16,$17) 1142 mfhi ($25,$16,$17) 1143 daddu $7,$24 1144 sltu $1,$7,$24 1145 dmultu ($15,$19) # mul_add_c(a[3],b[5],c3,c1,c2); 1146 daddu $25,$1 1147 daddu $2,$25 1148 sltu $1,$2,$25 1149 daddu $3,$1 1150 mflo ($24,$15,$19) 1151 mfhi ($25,$15,$19) 1152 daddu $7,$24 1153 sltu $1,$7,$24 1154 dmultu ($14,$21) # mul_add_c(a[2],b[6],c3,c1,c2); 1155 daddu $25,$1 1156 daddu $2,$25 1157 sltu $1,$2,$25 1158 daddu $3,$1 1159 mflo ($24,$14,$21) 1160 mfhi ($25,$14,$21) 1161 daddu $7,$24 1162 sltu $1,$7,$24 1163 dmultu ($13,$6) # mul_add_c(a[1],b[7],c3,c1,c2); 1164 daddu $25,$1 1165 daddu $2,$25 1166 sltu $1,$2,$25 1167 daddu $3,$1 1168 mflo ($24,$13,$6) 1169 mfhi ($25,$13,$6) 1170 daddu $7,$24 1171 sltu $1,$7,$24 1172 dmultu ($14,$6) # mul_add_c(a[2],b[7],c1,c2,c3); 1173 daddu $25,$1 1174 daddu $2,$25 1175 sltu $1,$2,$25 1176 daddu $3,$1 1177 sd $7,8*8($4) # r[8]=c3; 1178 1179 mflo ($24,$14,$6) 1180 mfhi ($25,$14,$6) 1181 daddu $2,$24 1182 sltu $1,$2,$24 1183 dmultu ($15,$21) # mul_add_c(a[3],b[6],c1,c2,c3); 1184 daddu $25,$1 1185 daddu $3,$25 1186 sltu $7,$3,$25 1187 mflo ($24,$15,$21) 1188 mfhi ($25,$15,$21) 1189 daddu $2,$24 1190 sltu $1,$2,$24 1191 dmultu ($16,$19) # mul_add_c(a[4],b[5],c1,c2,c3); 1192 daddu $25,$1 1193 daddu $3,$25 1194 sltu $1,$3,$25 1195 daddu $7,$1 1196 mflo ($24,$16,$19) 1197 mfhi ($25,$16,$19) 1198 daddu $2,$24 1199 sltu $1,$2,$24 1200 dmultu ($18,$17) # mul_add_c(a[5],b[4],c1,c2,c3); 1201 daddu $25,$1 1202 daddu $3,$25 1203 sltu $1,$3,$25 1204 daddu $7,$1 1205 mflo ($24,$18,$17) 1206 mfhi ($25,$18,$17) 1207 daddu $2,$24 1208 sltu $1,$2,$24 1209 dmultu ($20,$11) # mul_add_c(a[6],b[3],c1,c2,c3); 1210 daddu $25,$1 1211 daddu $3,$25 1212 sltu $1,$3,$25 1213 daddu $7,$1 1214 mflo ($24,$20,$11) 1215 mfhi ($25,$20,$11) 1216 daddu $2,$24 1217 sltu $1,$2,$24 1218 dmultu ($5,$10) # mul_add_c(a[7],b[2],c1,c2,c3); 1219 daddu $25,$1 1220 daddu $3,$25 1221 sltu $1,$3,$25 1222 daddu $7,$1 1223 mflo ($24,$5,$10) 1224 mfhi ($25,$5,$10) 1225 daddu $2,$24 1226 sltu $1,$2,$24 1227 dmultu ($5,$11) # mul_add_c(a[7],b[3],c2,c3,c1); 1228 daddu $25,$1 1229 daddu $3,$25 1230 sltu $1,$3,$25 1231 daddu $7,$1 1232 sd $2,9*8($4) # r[9]=c1; 1233 1234 mflo ($24,$5,$11) 1235 mfhi ($25,$5,$11) 1236 daddu $3,$24 1237 sltu $1,$3,$24 1238 dmultu ($20,$17) # mul_add_c(a[6],b[4],c2,c3,c1); 1239 daddu $25,$1 1240 daddu $7,$25 1241 sltu $2,$7,$25 1242 mflo ($24,$20,$17) 1243 mfhi ($25,$20,$17) 1244 daddu $3,$24 1245 sltu $1,$3,$24 1246 dmultu ($18,$19) # mul_add_c(a[5],b[5],c2,c3,c1); 1247 daddu $25,$1 1248 daddu $7,$25 1249 sltu $1,$7,$25 1250 daddu $2,$1 1251 mflo ($24,$18,$19) 1252 mfhi ($25,$18,$19) 1253 daddu $3,$24 1254 sltu $1,$3,$24 1255 dmultu ($16,$21) # mul_add_c(a[4],b[6],c2,c3,c1); 1256 daddu $25,$1 1257 daddu $7,$25 1258 sltu $1,$7,$25 1259 daddu $2,$1 1260 mflo ($24,$16,$21) 1261 mfhi ($25,$16,$21) 1262 daddu $3,$24 1263 sltu $1,$3,$24 1264 dmultu ($15,$6) # mul_add_c(a[3],b[7],c2,c3,c1); 1265 daddu $25,$1 1266 daddu $7,$25 1267 sltu $1,$7,$25 1268 daddu $2,$1 1269 mflo ($24,$15,$6) 1270 mfhi ($25,$15,$6) 1271 daddu $3,$24 1272 sltu $1,$3,$24 1273 dmultu ($16,$6) # mul_add_c(a[4],b[7],c3,c1,c2); 1274 daddu $25,$1 1275 daddu $7,$25 1276 sltu $1,$7,$25 1277 daddu $2,$1 1278 sd $3,10*8($4) # r[10]=c2; 1279 1280 mflo ($24,$16,$6) 1281 mfhi ($25,$16,$6) 1282 daddu $7,$24 1283 sltu $1,$7,$24 1284 dmultu ($18,$21) # mul_add_c(a[5],b[6],c3,c1,c2); 1285 daddu $25,$1 1286 daddu $2,$25 1287 sltu $3,$2,$25 1288 mflo ($24,$18,$21) 1289 mfhi ($25,$18,$21) 1290 daddu $7,$24 1291 sltu $1,$7,$24 1292 dmultu ($20,$19) # mul_add_c(a[6],b[5],c3,c1,c2); 1293 daddu $25,$1 1294 daddu $2,$25 1295 sltu $1,$2,$25 1296 daddu $3,$1 1297 mflo ($24,$20,$19) 1298 mfhi ($25,$20,$19) 1299 daddu $7,$24 1300 sltu $1,$7,$24 1301 dmultu ($5,$17) # mul_add_c(a[7],b[4],c3,c1,c2); 1302 daddu $25,$1 1303 daddu $2,$25 1304 sltu $1,$2,$25 1305 daddu $3,$1 1306 mflo ($24,$5,$17) 1307 mfhi ($25,$5,$17) 1308 daddu $7,$24 1309 sltu $1,$7,$24 1310 dmultu ($5,$19) # mul_add_c(a[7],b[5],c1,c2,c3); 1311 daddu $25,$1 1312 daddu $2,$25 1313 sltu $1,$2,$25 1314 daddu $3,$1 1315 sd $7,11*8($4) # r[11]=c3; 1316 1317 mflo ($24,$5,$19) 1318 mfhi ($25,$5,$19) 1319 daddu $2,$24 1320 sltu $1,$2,$24 1321 dmultu ($20,$21) # mul_add_c(a[6],b[6],c1,c2,c3); 1322 daddu $25,$1 1323 daddu $3,$25 1324 sltu $7,$3,$25 1325 mflo ($24,$20,$21) 1326 mfhi ($25,$20,$21) 1327 daddu $2,$24 1328 sltu $1,$2,$24 1329 dmultu ($18,$6) # mul_add_c(a[5],b[7],c1,c2,c3); 1330 daddu $25,$1 1331 daddu $3,$25 1332 sltu $1,$3,$25 1333 daddu $7,$1 1334 mflo ($24,$18,$6) 1335 mfhi ($25,$18,$6) 1336 daddu $2,$24 1337 sltu $1,$2,$24 1338 dmultu ($20,$6) # mul_add_c(a[6],b[7],c2,c3,c1); 1339 daddu $25,$1 1340 daddu $3,$25 1341 sltu $1,$3,$25 1342 daddu $7,$1 1343 sd $2,12*8($4) # r[12]=c1; 1344 1345 mflo ($24,$20,$6) 1346 mfhi ($25,$20,$6) 1347 daddu $3,$24 1348 sltu $1,$3,$24 1349 dmultu ($5,$21) # mul_add_c(a[7],b[6],c2,c3,c1); 1350 daddu $25,$1 1351 daddu $7,$25 1352 sltu $2,$7,$25 1353 mflo ($24,$5,$21) 1354 mfhi ($25,$5,$21) 1355 daddu $3,$24 1356 sltu $1,$3,$24 1357 dmultu ($5,$6) # mul_add_c(a[7],b[7],c3,c1,c2); 1358 daddu $25,$1 1359 daddu $7,$25 1360 sltu $1,$7,$25 1361 daddu $2,$1 1362 sd $3,13*8($4) # r[13]=c2; 1363 1364 mflo ($24,$5,$6) 1365 mfhi ($25,$5,$6) 1366 daddu $7,$24 1367 sltu $1,$7,$24 1368 daddu $25,$1 1369 daddu $2,$25 1370 sd $7,14*8($4) # r[14]=c3; 1371 sd $2,15*8($4) # r[15]=c1; 1372 1373 .set noreorder 1374 ld $21,5*8($29) 1375 ld $20,4*8($29) 1376 ld $19,3*8($29) 1377 ld $18,2*8($29) 1378 ld $17,1*8($29) 1379 ld $16,0*8($29) 1380 jr $31 1381 daddu $29,6*8 1382.end bn_mul_comba8 1383 1384.align 5 1385.globl bn_mul_comba4 1386.ent bn_mul_comba4 1387bn_mul_comba4: 1388 .set reorder 1389 ld $12,0($5) 1390 ld $8,0($6) 1391 ld $13,8($5) 1392 ld $14,2*8($5) 1393 dmultu ($12,$8) # mul_add_c(a[0],b[0],c1,c2,c3); 1394 ld $15,3*8($5) 1395 ld $9,8($6) 1396 ld $10,2*8($6) 1397 ld $11,3*8($6) 1398 mflo ($2,$12,$8) 1399 mfhi ($3,$12,$8) 1400 sd $2,0($4) 1401 1402 dmultu ($12,$9) # mul_add_c(a[0],b[1],c2,c3,c1); 1403 mflo ($24,$12,$9) 1404 mfhi ($25,$12,$9) 1405 daddu $3,$24 1406 sltu $1,$3,$24 1407 dmultu ($13,$8) # mul_add_c(a[1],b[0],c2,c3,c1); 1408 daddu $7,$25,$1 1409 mflo ($24,$13,$8) 1410 mfhi ($25,$13,$8) 1411 daddu $3,$24 1412 sltu $1,$3,$24 1413 dmultu ($14,$8) # mul_add_c(a[2],b[0],c3,c1,c2); 1414 daddu $25,$1 1415 daddu $7,$25 1416 sltu $2,$7,$25 1417 sd $3,8($4) 1418 1419 mflo ($24,$14,$8) 1420 mfhi ($25,$14,$8) 1421 daddu $7,$24 1422 sltu $1,$7,$24 1423 dmultu ($13,$9) # mul_add_c(a[1],b[1],c3,c1,c2); 1424 daddu $25,$1 1425 daddu $2,$25 1426 mflo ($24,$13,$9) 1427 mfhi ($25,$13,$9) 1428 daddu $7,$24 1429 sltu $1,$7,$24 1430 dmultu ($12,$10) # mul_add_c(a[0],b[2],c3,c1,c2); 1431 daddu $25,$1 1432 daddu $2,$25 1433 sltu $3,$2,$25 1434 mflo ($24,$12,$10) 1435 mfhi ($25,$12,$10) 1436 daddu $7,$24 1437 sltu $1,$7,$24 1438 dmultu ($12,$11) # mul_add_c(a[0],b[3],c1,c2,c3); 1439 daddu $25,$1 1440 daddu $2,$25 1441 sltu $1,$2,$25 1442 daddu $3,$1 1443 sd $7,2*8($4) 1444 1445 mflo ($24,$12,$11) 1446 mfhi ($25,$12,$11) 1447 daddu $2,$24 1448 sltu $1,$2,$24 1449 dmultu ($13,$10) # mul_add_c(a[1],b[2],c1,c2,c3); 1450 daddu $25,$1 1451 daddu $3,$25 1452 sltu $7,$3,$25 1453 mflo ($24,$13,$10) 1454 mfhi ($25,$13,$10) 1455 daddu $2,$24 1456 sltu $1,$2,$24 1457 dmultu ($14,$9) # mul_add_c(a[2],b[1],c1,c2,c3); 1458 daddu $25,$1 1459 daddu $3,$25 1460 sltu $1,$3,$25 1461 daddu $7,$1 1462 mflo ($24,$14,$9) 1463 mfhi ($25,$14,$9) 1464 daddu $2,$24 1465 sltu $1,$2,$24 1466 dmultu ($15,$8) # mul_add_c(a[3],b[0],c1,c2,c3); 1467 daddu $25,$1 1468 daddu $3,$25 1469 sltu $1,$3,$25 1470 daddu $7,$1 1471 mflo ($24,$15,$8) 1472 mfhi ($25,$15,$8) 1473 daddu $2,$24 1474 sltu $1,$2,$24 1475 dmultu ($15,$9) # mul_add_c(a[3],b[1],c2,c3,c1); 1476 daddu $25,$1 1477 daddu $3,$25 1478 sltu $1,$3,$25 1479 daddu $7,$1 1480 sd $2,3*8($4) 1481 1482 mflo ($24,$15,$9) 1483 mfhi ($25,$15,$9) 1484 daddu $3,$24 1485 sltu $1,$3,$24 1486 dmultu ($14,$10) # mul_add_c(a[2],b[2],c2,c3,c1); 1487 daddu $25,$1 1488 daddu $7,$25 1489 sltu $2,$7,$25 1490 mflo ($24,$14,$10) 1491 mfhi ($25,$14,$10) 1492 daddu $3,$24 1493 sltu $1,$3,$24 1494 dmultu ($13,$11) # mul_add_c(a[1],b[3],c2,c3,c1); 1495 daddu $25,$1 1496 daddu $7,$25 1497 sltu $1,$7,$25 1498 daddu $2,$1 1499 mflo ($24,$13,$11) 1500 mfhi ($25,$13,$11) 1501 daddu $3,$24 1502 sltu $1,$3,$24 1503 dmultu ($14,$11) # mul_add_c(a[2],b[3],c3,c1,c2); 1504 daddu $25,$1 1505 daddu $7,$25 1506 sltu $1,$7,$25 1507 daddu $2,$1 1508 sd $3,4*8($4) 1509 1510 mflo ($24,$14,$11) 1511 mfhi ($25,$14,$11) 1512 daddu $7,$24 1513 sltu $1,$7,$24 1514 dmultu ($15,$10) # mul_add_c(a[3],b[2],c3,c1,c2); 1515 daddu $25,$1 1516 daddu $2,$25 1517 sltu $3,$2,$25 1518 mflo ($24,$15,$10) 1519 mfhi ($25,$15,$10) 1520 daddu $7,$24 1521 sltu $1,$7,$24 1522 dmultu ($15,$11) # mul_add_c(a[3],b[3],c1,c2,c3); 1523 daddu $25,$1 1524 daddu $2,$25 1525 sltu $1,$2,$25 1526 daddu $3,$1 1527 sd $7,5*8($4) 1528 1529 mflo ($24,$15,$11) 1530 mfhi ($25,$15,$11) 1531 daddu $2,$24 1532 sltu $1,$2,$24 1533 daddu $25,$1 1534 daddu $3,$25 1535 sd $2,6*8($4) 1536 sd $3,7*8($4) 1537 1538 .set noreorder 1539 jr $31 1540 nop 1541.end bn_mul_comba4 1542 1543.align 5 1544.globl bn_sqr_comba8 1545.ent bn_sqr_comba8 1546bn_sqr_comba8: 1547 .set reorder 1548 ld $12,0($5) 1549 ld $13,8($5) 1550 ld $14,2*8($5) 1551 ld $15,3*8($5) 1552 1553 dmultu ($12,$12) # mul_add_c(a[0],b[0],c1,c2,c3); 1554 ld $8,4*8($5) 1555 ld $9,5*8($5) 1556 ld $10,6*8($5) 1557 ld $11,7*8($5) 1558 mflo ($2,$12,$12) 1559 mfhi ($3,$12,$12) 1560 sd $2,0($4) 1561 1562 dmultu ($12,$13) # mul_add_c2(a[0],b[1],c2,c3,c1); 1563 mflo ($24,$12,$13) 1564 mfhi ($25,$12,$13) 1565 slt $2,$25,$0 1566 dsll $25,1 1567 dmultu ($14,$12) # mul_add_c2(a[2],b[0],c3,c1,c2); 1568 slt $6,$24,$0 1569 daddu $25,$6 1570 dsll $24,1 1571 daddu $3,$24 1572 sltu $1,$3,$24 1573 daddu $7,$25,$1 1574 sd $3,8($4) 1575 mflo ($24,$14,$12) 1576 mfhi ($25,$14,$12) 1577 daddu $7,$24 1578 sltu $1,$7,$24 1579 dmultu ($13,$13) # forward multiplication 1580 daddu $7,$24 1581 daddu $1,$25 1582 sltu $24,$7,$24 1583 daddu $2,$1 1584 daddu $25,$24 1585 sltu $3,$2,$1 1586 daddu $2,$25 1587 sltu $25,$2,$25 1588 daddu $3,$25 1589 mflo ($24,$13,$13) 1590 mfhi ($25,$13,$13) 1591 daddu $7,$24 1592 sltu $1,$7,$24 1593 dmultu ($12,$15) # mul_add_c2(a[0],b[3],c1,c2,c3); 1594 daddu $25,$1 1595 daddu $2,$25 1596 sltu $1,$2,$25 1597 daddu $3,$1 1598 sd $7,2*8($4) 1599 mflo ($24,$12,$15) 1600 mfhi ($25,$12,$15) 1601 daddu $2,$24 1602 sltu $1,$2,$24 1603 dmultu ($13,$14) # forward multiplication 1604 daddu $2,$24 1605 daddu $1,$25 1606 sltu $24,$2,$24 1607 daddu $3,$1 1608 daddu $25,$24 1609 sltu $7,$3,$1 1610 daddu $3,$25 1611 sltu $25,$3,$25 1612 daddu $7,$25 1613 mflo ($24,$13,$14) 1614 mfhi ($25,$13,$14) 1615 daddu $2,$24 1616 sltu $1,$2,$24 1617 dmultu ($8,$12) # forward multiplication 1618 daddu $2,$24 1619 daddu $1,$25 1620 sltu $24,$2,$24 1621 daddu $3,$1 1622 daddu $25,$24 1623 sltu $1,$3,$1 1624 daddu $3,$25 1625 daddu $7,$1 1626 sltu $25,$3,$25 1627 daddu $7,$25 1628 mflo ($24,$8,$12) 1629 mfhi ($25,$8,$12) 1630 sd $2,3*8($4) 1631 daddu $3,$24 1632 sltu $1,$3,$24 1633 dmultu ($15,$13) # forward multiplication 1634 daddu $3,$24 1635 daddu $1,$25 1636 sltu $24,$3,$24 1637 daddu $7,$1 1638 daddu $25,$24 1639 sltu $2,$7,$1 1640 daddu $7,$25 1641 sltu $25,$7,$25 1642 daddu $2,$25 1643 mflo ($24,$15,$13) 1644 mfhi ($25,$15,$13) 1645 daddu $3,$24 1646 sltu $1,$3,$24 1647 dmultu ($14,$14) # forward multiplication 1648 daddu $3,$24 1649 daddu $1,$25 1650 sltu $24,$3,$24 1651 daddu $7,$1 1652 daddu $25,$24 1653 sltu $1,$7,$1 1654 daddu $7,$25 1655 daddu $2,$1 1656 sltu $25,$7,$25 1657 daddu $2,$25 1658 mflo ($24,$14,$14) 1659 mfhi ($25,$14,$14) 1660 daddu $3,$24 1661 sltu $1,$3,$24 1662 dmultu ($12,$9) # mul_add_c2(a[0],b[5],c3,c1,c2); 1663 daddu $25,$1 1664 daddu $7,$25 1665 sltu $1,$7,$25 1666 daddu $2,$1 1667 sd $3,4*8($4) 1668 mflo ($24,$12,$9) 1669 mfhi ($25,$12,$9) 1670 daddu $7,$24 1671 sltu $1,$7,$24 1672 dmultu ($13,$8) # forward multiplication 1673 daddu $7,$24 1674 daddu $1,$25 1675 sltu $24,$7,$24 1676 daddu $2,$1 1677 daddu $25,$24 1678 sltu $3,$2,$1 1679 daddu $2,$25 1680 sltu $25,$2,$25 1681 daddu $3,$25 1682 mflo ($24,$13,$8) 1683 mfhi ($25,$13,$8) 1684 daddu $7,$24 1685 sltu $1,$7,$24 1686 dmultu ($14,$15) # forward multiplication 1687 daddu $7,$24 1688 daddu $1,$25 1689 sltu $24,$7,$24 1690 daddu $2,$1 1691 daddu $25,$24 1692 sltu $1,$2,$1 1693 daddu $2,$25 1694 daddu $3,$1 1695 sltu $25,$2,$25 1696 daddu $3,$25 1697 mflo ($24,$14,$15) 1698 mfhi ($25,$14,$15) 1699 daddu $7,$24 1700 sltu $1,$7,$24 1701 dmultu ($10,$12) # forward multiplication 1702 daddu $7,$24 1703 daddu $1,$25 1704 sltu $24,$7,$24 1705 daddu $2,$1 1706 daddu $25,$24 1707 sltu $1,$2,$1 1708 daddu $2,$25 1709 daddu $3,$1 1710 sltu $25,$2,$25 1711 daddu $3,$25 1712 mflo ($24,$10,$12) 1713 mfhi ($25,$10,$12) 1714 sd $7,5*8($4) 1715 daddu $2,$24 1716 sltu $1,$2,$24 1717 dmultu ($9,$13) # forward multiplication 1718 daddu $2,$24 1719 daddu $1,$25 1720 sltu $24,$2,$24 1721 daddu $3,$1 1722 daddu $25,$24 1723 sltu $7,$3,$1 1724 daddu $3,$25 1725 sltu $25,$3,$25 1726 daddu $7,$25 1727 mflo ($24,$9,$13) 1728 mfhi ($25,$9,$13) 1729 daddu $2,$24 1730 sltu $1,$2,$24 1731 dmultu ($8,$14) # forward multiplication 1732 daddu $2,$24 1733 daddu $1,$25 1734 sltu $24,$2,$24 1735 daddu $3,$1 1736 daddu $25,$24 1737 sltu $1,$3,$1 1738 daddu $3,$25 1739 daddu $7,$1 1740 sltu $25,$3,$25 1741 daddu $7,$25 1742 mflo ($24,$8,$14) 1743 mfhi ($25,$8,$14) 1744 daddu $2,$24 1745 sltu $1,$2,$24 1746 dmultu ($15,$15) # forward multiplication 1747 daddu $2,$24 1748 daddu $1,$25 1749 sltu $24,$2,$24 1750 daddu $3,$1 1751 daddu $25,$24 1752 sltu $1,$3,$1 1753 daddu $3,$25 1754 daddu $7,$1 1755 sltu $25,$3,$25 1756 daddu $7,$25 1757 mflo ($24,$15,$15) 1758 mfhi ($25,$15,$15) 1759 daddu $2,$24 1760 sltu $1,$2,$24 1761 dmultu ($12,$11) # mul_add_c2(a[0],b[7],c2,c3,c1); 1762 daddu $25,$1 1763 daddu $3,$25 1764 sltu $1,$3,$25 1765 daddu $7,$1 1766 sd $2,6*8($4) 1767 mflo ($24,$12,$11) 1768 mfhi ($25,$12,$11) 1769 daddu $3,$24 1770 sltu $1,$3,$24 1771 dmultu ($13,$10) # forward multiplication 1772 daddu $3,$24 1773 daddu $1,$25 1774 sltu $24,$3,$24 1775 daddu $7,$1 1776 daddu $25,$24 1777 sltu $2,$7,$1 1778 daddu $7,$25 1779 sltu $25,$7,$25 1780 daddu $2,$25 1781 mflo ($24,$13,$10) 1782 mfhi ($25,$13,$10) 1783 daddu $3,$24 1784 sltu $1,$3,$24 1785 dmultu ($14,$9) # forward multiplication 1786 daddu $3,$24 1787 daddu $1,$25 1788 sltu $24,$3,$24 1789 daddu $7,$1 1790 daddu $25,$24 1791 sltu $1,$7,$1 1792 daddu $7,$25 1793 daddu $2,$1 1794 sltu $25,$7,$25 1795 daddu $2,$25 1796 mflo ($24,$14,$9) 1797 mfhi ($25,$14,$9) 1798 daddu $3,$24 1799 sltu $1,$3,$24 1800 dmultu ($15,$8) # forward multiplication 1801 daddu $3,$24 1802 daddu $1,$25 1803 sltu $24,$3,$24 1804 daddu $7,$1 1805 daddu $25,$24 1806 sltu $1,$7,$1 1807 daddu $7,$25 1808 daddu $2,$1 1809 sltu $25,$7,$25 1810 daddu $2,$25 1811 mflo ($24,$15,$8) 1812 mfhi ($25,$15,$8) 1813 daddu $3,$24 1814 sltu $1,$3,$24 1815 dmultu ($11,$13) # forward multiplication 1816 daddu $3,$24 1817 daddu $1,$25 1818 sltu $24,$3,$24 1819 daddu $7,$1 1820 daddu $25,$24 1821 sltu $1,$7,$1 1822 daddu $7,$25 1823 daddu $2,$1 1824 sltu $25,$7,$25 1825 daddu $2,$25 1826 mflo ($24,$11,$13) 1827 mfhi ($25,$11,$13) 1828 sd $3,7*8($4) 1829 daddu $7,$24 1830 sltu $1,$7,$24 1831 dmultu ($10,$14) # forward multiplication 1832 daddu $7,$24 1833 daddu $1,$25 1834 sltu $24,$7,$24 1835 daddu $2,$1 1836 daddu $25,$24 1837 sltu $3,$2,$1 1838 daddu $2,$25 1839 sltu $25,$2,$25 1840 daddu $3,$25 1841 mflo ($24,$10,$14) 1842 mfhi ($25,$10,$14) 1843 daddu $7,$24 1844 sltu $1,$7,$24 1845 dmultu ($9,$15) # forward multiplication 1846 daddu $7,$24 1847 daddu $1,$25 1848 sltu $24,$7,$24 1849 daddu $2,$1 1850 daddu $25,$24 1851 sltu $1,$2,$1 1852 daddu $2,$25 1853 daddu $3,$1 1854 sltu $25,$2,$25 1855 daddu $3,$25 1856 mflo ($24,$9,$15) 1857 mfhi ($25,$9,$15) 1858 daddu $7,$24 1859 sltu $1,$7,$24 1860 dmultu ($8,$8) # forward multiplication 1861 daddu $7,$24 1862 daddu $1,$25 1863 sltu $24,$7,$24 1864 daddu $2,$1 1865 daddu $25,$24 1866 sltu $1,$2,$1 1867 daddu $2,$25 1868 daddu $3,$1 1869 sltu $25,$2,$25 1870 daddu $3,$25 1871 mflo ($24,$8,$8) 1872 mfhi ($25,$8,$8) 1873 daddu $7,$24 1874 sltu $1,$7,$24 1875 dmultu ($14,$11) # mul_add_c2(a[2],b[7],c1,c2,c3); 1876 daddu $25,$1 1877 daddu $2,$25 1878 sltu $1,$2,$25 1879 daddu $3,$1 1880 sd $7,8*8($4) 1881 mflo ($24,$14,$11) 1882 mfhi ($25,$14,$11) 1883 daddu $2,$24 1884 sltu $1,$2,$24 1885 dmultu ($15,$10) # forward multiplication 1886 daddu $2,$24 1887 daddu $1,$25 1888 sltu $24,$2,$24 1889 daddu $3,$1 1890 daddu $25,$24 1891 sltu $7,$3,$1 1892 daddu $3,$25 1893 sltu $25,$3,$25 1894 daddu $7,$25 1895 mflo ($24,$15,$10) 1896 mfhi ($25,$15,$10) 1897 daddu $2,$24 1898 sltu $1,$2,$24 1899 dmultu ($8,$9) # forward multiplication 1900 daddu $2,$24 1901 daddu $1,$25 1902 sltu $24,$2,$24 1903 daddu $3,$1 1904 daddu $25,$24 1905 sltu $1,$3,$1 1906 daddu $3,$25 1907 daddu $7,$1 1908 sltu $25,$3,$25 1909 daddu $7,$25 1910 mflo ($24,$8,$9) 1911 mfhi ($25,$8,$9) 1912 daddu $2,$24 1913 sltu $1,$2,$24 1914 dmultu ($11,$15) # forward multiplication 1915 daddu $2,$24 1916 daddu $1,$25 1917 sltu $24,$2,$24 1918 daddu $3,$1 1919 daddu $25,$24 1920 sltu $1,$3,$1 1921 daddu $3,$25 1922 daddu $7,$1 1923 sltu $25,$3,$25 1924 daddu $7,$25 1925 mflo ($24,$11,$15) 1926 mfhi ($25,$11,$15) 1927 sd $2,9*8($4) 1928 daddu $3,$24 1929 sltu $1,$3,$24 1930 dmultu ($10,$8) # forward multiplication 1931 daddu $3,$24 1932 daddu $1,$25 1933 sltu $24,$3,$24 1934 daddu $7,$1 1935 daddu $25,$24 1936 sltu $2,$7,$1 1937 daddu $7,$25 1938 sltu $25,$7,$25 1939 daddu $2,$25 1940 mflo ($24,$10,$8) 1941 mfhi ($25,$10,$8) 1942 daddu $3,$24 1943 sltu $1,$3,$24 1944 dmultu ($9,$9) # forward multiplication 1945 daddu $3,$24 1946 daddu $1,$25 1947 sltu $24,$3,$24 1948 daddu $7,$1 1949 daddu $25,$24 1950 sltu $1,$7,$1 1951 daddu $7,$25 1952 daddu $2,$1 1953 sltu $25,$7,$25 1954 daddu $2,$25 1955 mflo ($24,$9,$9) 1956 mfhi ($25,$9,$9) 1957 daddu $3,$24 1958 sltu $1,$3,$24 1959 dmultu ($8,$11) # mul_add_c2(a[4],b[7],c3,c1,c2); 1960 daddu $25,$1 1961 daddu $7,$25 1962 sltu $1,$7,$25 1963 daddu $2,$1 1964 sd $3,10*8($4) 1965 mflo ($24,$8,$11) 1966 mfhi ($25,$8,$11) 1967 daddu $7,$24 1968 sltu $1,$7,$24 1969 dmultu ($9,$10) # forward multiplication 1970 daddu $7,$24 1971 daddu $1,$25 1972 sltu $24,$7,$24 1973 daddu $2,$1 1974 daddu $25,$24 1975 sltu $3,$2,$1 1976 daddu $2,$25 1977 sltu $25,$2,$25 1978 daddu $3,$25 1979 mflo ($24,$9,$10) 1980 mfhi ($25,$9,$10) 1981 daddu $7,$24 1982 sltu $1,$7,$24 1983 dmultu ($11,$9) # forward multiplication 1984 daddu $7,$24 1985 daddu $1,$25 1986 sltu $24,$7,$24 1987 daddu $2,$1 1988 daddu $25,$24 1989 sltu $1,$2,$1 1990 daddu $2,$25 1991 daddu $3,$1 1992 sltu $25,$2,$25 1993 daddu $3,$25 1994 mflo ($24,$11,$9) 1995 mfhi ($25,$11,$9) 1996 sd $7,11*8($4) 1997 daddu $2,$24 1998 sltu $1,$2,$24 1999 dmultu ($10,$10) # forward multiplication 2000 daddu $2,$24 2001 daddu $1,$25 2002 sltu $24,$2,$24 2003 daddu $3,$1 2004 daddu $25,$24 2005 sltu $7,$3,$1 2006 daddu $3,$25 2007 sltu $25,$3,$25 2008 daddu $7,$25 2009 mflo ($24,$10,$10) 2010 mfhi ($25,$10,$10) 2011 daddu $2,$24 2012 sltu $1,$2,$24 2013 dmultu ($10,$11) # mul_add_c2(a[6],b[7],c2,c3,c1); 2014 daddu $25,$1 2015 daddu $3,$25 2016 sltu $1,$3,$25 2017 daddu $7,$1 2018 sd $2,12*8($4) 2019 mflo ($24,$10,$11) 2020 mfhi ($25,$10,$11) 2021 daddu $3,$24 2022 sltu $1,$3,$24 2023 dmultu ($11,$11) # forward multiplication 2024 daddu $3,$24 2025 daddu $1,$25 2026 sltu $24,$3,$24 2027 daddu $7,$1 2028 daddu $25,$24 2029 sltu $2,$7,$1 2030 daddu $7,$25 2031 sltu $25,$7,$25 2032 daddu $2,$25 2033 mflo ($24,$11,$11) 2034 mfhi ($25,$11,$11) 2035 sd $3,13*8($4) 2036 2037 daddu $7,$24 2038 sltu $1,$7,$24 2039 daddu $25,$1 2040 daddu $2,$25 2041 sd $7,14*8($4) 2042 sd $2,15*8($4) 2043 2044 .set noreorder 2045 jr $31 2046 nop 2047.end bn_sqr_comba8 2048 2049.align 5 2050.globl bn_sqr_comba4 2051.ent bn_sqr_comba4 2052bn_sqr_comba4: 2053 .set reorder 2054 ld $12,0($5) 2055 ld $13,8($5) 2056 dmultu ($12,$12) # mul_add_c(a[0],b[0],c1,c2,c3); 2057 ld $14,2*8($5) 2058 ld $15,3*8($5) 2059 mflo ($2,$12,$12) 2060 mfhi ($3,$12,$12) 2061 sd $2,0($4) 2062 2063 dmultu ($12,$13) # mul_add_c2(a[0],b[1],c2,c3,c1); 2064 mflo ($24,$12,$13) 2065 mfhi ($25,$12,$13) 2066 slt $2,$25,$0 2067 dsll $25,1 2068 dmultu ($14,$12) # mul_add_c2(a[2],b[0],c3,c1,c2); 2069 slt $6,$24,$0 2070 daddu $25,$6 2071 dsll $24,1 2072 daddu $3,$24 2073 sltu $1,$3,$24 2074 daddu $7,$25,$1 2075 sd $3,8($4) 2076 mflo ($24,$14,$12) 2077 mfhi ($25,$14,$12) 2078 daddu $7,$24 2079 sltu $1,$7,$24 2080 dmultu ($13,$13) # forward multiplication 2081 daddu $7,$24 2082 daddu $1,$25 2083 sltu $24,$7,$24 2084 daddu $2,$1 2085 daddu $25,$24 2086 sltu $3,$2,$1 2087 daddu $2,$25 2088 sltu $25,$2,$25 2089 daddu $3,$25 2090 mflo ($24,$13,$13) 2091 mfhi ($25,$13,$13) 2092 daddu $7,$24 2093 sltu $1,$7,$24 2094 dmultu ($12,$15) # mul_add_c2(a[0],b[3],c1,c2,c3); 2095 daddu $25,$1 2096 daddu $2,$25 2097 sltu $1,$2,$25 2098 daddu $3,$1 2099 sd $7,2*8($4) 2100 mflo ($24,$12,$15) 2101 mfhi ($25,$12,$15) 2102 daddu $2,$24 2103 sltu $1,$2,$24 2104 dmultu ($13,$14) # forward multiplication 2105 daddu $2,$24 2106 daddu $1,$25 2107 sltu $24,$2,$24 2108 daddu $3,$1 2109 daddu $25,$24 2110 sltu $7,$3,$1 2111 daddu $3,$25 2112 sltu $25,$3,$25 2113 daddu $7,$25 2114 mflo ($24,$13,$14) 2115 mfhi ($25,$13,$14) 2116 daddu $2,$24 2117 sltu $1,$2,$24 2118 dmultu ($15,$13) # forward multiplication 2119 daddu $2,$24 2120 daddu $1,$25 2121 sltu $24,$2,$24 2122 daddu $3,$1 2123 daddu $25,$24 2124 sltu $1,$3,$1 2125 daddu $3,$25 2126 daddu $7,$1 2127 sltu $25,$3,$25 2128 daddu $7,$25 2129 mflo ($24,$15,$13) 2130 mfhi ($25,$15,$13) 2131 sd $2,3*8($4) 2132 daddu $3,$24 2133 sltu $1,$3,$24 2134 dmultu ($14,$14) # forward multiplication 2135 daddu $3,$24 2136 daddu $1,$25 2137 sltu $24,$3,$24 2138 daddu $7,$1 2139 daddu $25,$24 2140 sltu $2,$7,$1 2141 daddu $7,$25 2142 sltu $25,$7,$25 2143 daddu $2,$25 2144 mflo ($24,$14,$14) 2145 mfhi ($25,$14,$14) 2146 daddu $3,$24 2147 sltu $1,$3,$24 2148 dmultu ($14,$15) # mul_add_c2(a[2],b[3],c3,c1,c2); 2149 daddu $25,$1 2150 daddu $7,$25 2151 sltu $1,$7,$25 2152 daddu $2,$1 2153 sd $3,4*8($4) 2154 mflo ($24,$14,$15) 2155 mfhi ($25,$14,$15) 2156 daddu $7,$24 2157 sltu $1,$7,$24 2158 dmultu ($15,$15) # forward multiplication 2159 daddu $7,$24 2160 daddu $1,$25 2161 sltu $24,$7,$24 2162 daddu $2,$1 2163 daddu $25,$24 2164 sltu $3,$2,$1 2165 daddu $2,$25 2166 sltu $25,$2,$25 2167 daddu $3,$25 2168 mflo ($24,$15,$15) 2169 mfhi ($25,$15,$15) 2170 sd $7,5*8($4) 2171 2172 daddu $2,$24 2173 sltu $1,$2,$24 2174 daddu $25,$1 2175 daddu $3,$25 2176 sd $2,6*8($4) 2177 sd $3,7*8($4) 2178 2179 .set noreorder 2180 jr $31 2181 nop 2182.end bn_sqr_comba4 2183